# The following code builds a Nueral Network with goal to determine a wine sample's
# quality based on features such as pH level, density, acidity, sulfur dioxide, and
# many other features. The output results in 'Bad', 'Good', or 'Great' wine.
# The network as 2 hidden layers with one input and output layer. A report
# is also printed on classification resutls.
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler,LabelEncoder,OneHotEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import keras
from keras import regularizers
import pandas as pd
import tensorflow as tf
# FIND DATASET : https://archive.ics.uci.edu/dataset/186/wine+quality
# Extract data from the txt file and split data into features and labels
# To pre-process, Standardize features and Encode labels since the features
# are discrete and continuous values ranging from 0 to 2000
# open the txt with the dataset
data = pd.read_excel("winequality-white.xlsx",header = 0)
features = data.iloc[:, 0:-1]
labels = data.iloc[:,-1]
#convert labels from discrete to ordinal for a better classification results
labels = pd.Series(["bad" if elm <= 4 else "okay" if 4 < elm <= 8 else "great" for elm in labels])
scaler = StandardScaler()
features = scaler.fit_transform(features)
encoder = LabelEncoder()
labels = encoder.fit_transform(labels)
labelsOnehot = keras.utils.to_categorical(labels,num_classes=3)
# Split the model into 70/30 and randomize the data since it's organized by labels
trainFeatures, testFeatures, trainLabels, testLabels =\
train_test_split(features, labelsOnehot, train_size=0.7, random_state=42)
# Build the neural network. The input layer will have 13 nodes since there as 13 features
# Use two hidden layers and dropout layers with relu activation function. included regularizes
# to mitigate overfitting since neural networks are prone to it.
neural_network = keras.Sequential([
# Input layer
keras.layers.InputLayer(input_shape=(features.shape[1],)),
# Hidden layers
keras.layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
keras.layers.Dropout(0.2),
keras.layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
keras.layers.Dropout(0.2),
# Output Layer
keras.layers.Dense(3, activation='softmax')
])
# Chose the adam optimizer with categorical loss and used to accuracy for metrics
neural_network.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
# Final hyperparameters: 100 epochs and 1/3 validation split, otherwise default values
constraints = neural_network.fit(trainFeatures,trainLabels,epochs=100,batch_size=32,validation_split=0.2,verbose=1)
loss, modelAccuracy = neural_network.evaluate(testFeatures,testLabels)
print(f"\nTest accuracy : {modelAccuracy:4f}")
# Make predictions on the test set and then convert back into class labels.
# Afterward, convert those labels from one hot to original class labels
predictions = neural_network.predict(testFeatures)
predictedLabels = predictions.argmax(axis=1)
trueLabels = testLabels.argmax(axis=1)
report = classification_report(trueLabels, predictedLabels, labels=[0, 1, 2], target_names=encoder.classes_)
print(report)
# Plot training & validation accuracy and loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(constraints.history['accuracy'])
plt.plot(constraints.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'])
plt.subplot(1, 2, 2)
plt.plot(constraints.history['loss'])
plt.plot(constraints.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'])
plt.tight_layout()
plt.show()