Binary classification from 2 features with Keras (Tensor Flow 2.0)¶

Classification using Keras, performing a classification similar to the TensorFlow implementation.

Based on the same data model as in the binary classification workbook.

The classification is on a single boundary defined by a continuous function and added white noise.

Initial model is the logistic regression implemented with Keras. A further multi-layer model is developed in order to better fit the quadratic model of the data.

from tensorflow import keras # TF 2.0+ required import numpy as np from numpy import random import matplotlib.pyplot as plt import matplotlib.colors as pltcolors from sklearn import metrics as skMetrics import pandas import seaborn as sns

usingTensorBoard = False

Model¶

Quadratic function a boundary between positive and negative values

Adding some unknown as a Gaussian noise

The values of X are uniformly distributed and independent

# Two features, Gaussian noise nFeatures = 2 def generateBatch(N): # xMin = 0 xMax = 1 b = 0.1 std = 0.1 # x = random.uniform(xMin, xMax, (N, 2)) # 4th degree relation to shape the boundary boundary = 2*(x[:,0]**4 + (x[:,0]-0.3)**3 + b) # Adding some gaussian noise labels = boundary + random.normal(0, std, N) > x[:,1] return (x, labels) def plotHeatMap(X, classes, title=None, fmt='.2g', ax=None, xlabel=None, ylabel=None): """ Fix heatmap plot from Seaborn with pyplot 3.1.0, 3.1.1  https://stackoverflow.com/questions/56942670/matplotlib-seaborn-first-and-last-row-cut-in-half-of-heatmap-plot  """ ax = sns.heatmap(X, xticklabels=classes, yticklabels=classes, annot=True, fmt=fmt, cmap=plt.cm.Blues, ax=ax) #notation: "annot" not "annote" bottom, top = ax.get_ylim() ax.set_ylim(bottom + 0.5, top - 0.5) if title: ax.set_title(title) if xlabel: ax.set_xlabel(xlabel) if ylabel: ax.set_ylabel(ylabel) def plotConfusionMatrix(yTrue, yEst, classes, title=None, fmt='.2g', ax=None): plotHeatMap(skMetrics.confusion_matrix(yTrue, yEst), classes, title, fmt, ax, xlabel='Estimations', ylabel='True values');

Training data¶

N = 2000 # x has 1 dim in R, label has 1 dim in B xTrain, labelTrain = generateBatch(N) colors = ['blue','red'] fig = plt.figure(figsize=(12,4)) plt.subplot(1,3,1) plt.scatter(xTrain[:,0], xTrain[:,1], c=labelTrain, cmap=pltcolors.ListedColormap(colors), marker=',', alpha=0.2) plt.xlabel('x0') plt.ylabel('x1') plt.title('Generated train data') plt.grid() cb = plt.colorbar() loc = np.arange(0,1,1./len(colors)) cb.set_ticks(loc) cb.set_ticklabels([0,1]) plt.subplot(1,3,2) plt.scatter(xTrain[:,0], labelTrain, marker=',', alpha=0.01) plt.xlabel('x0') plt.ylabel('label') plt.grid() plt.subplot(1,3,3) plt.scatter(xTrain[:,1], labelTrain, marker=',', alpha=0.01) plt.xlabel('x1') plt.ylabel('label') plt.grid()

count, bins, ignored = plt.hist(labelTrain*1.0, 10, density=True, alpha=0.5) p = np.mean(labelTrain) print('Bernouilli parameter of the distribution:', p)

Bernouilli parameter of the distribution: 0.5085

Test data for verification of the model¶

xTest, labelTest = generateBatch(N) testColors = ['navy', 'orangered']

Helpers¶

def plotTestResult(xTest, labelTest, yEst, labelEst): plt.figure(figsize=(12,4)) plt.subplot(1,3,1) plt.scatter(xTest[:,0], xTest[:,1], c=labelEst, cmap=pltcolors.ListedColormap(testColors), marker='x', alpha=0.2); plt.xlabel('x0') plt.ylabel('x1') plt.grid() plt.title('Estimated') cb = plt.colorbar() loc = np.arange(0,1,1./len(testColors)) cb.set_ticks(loc) cb.set_ticklabels([0,1]); plt.subplot(1,3,2) plt.hist(yEst, 10, density=True, alpha=0.5) plt.title('Bernouilli parameter =' + str(np.mean(labelEst))) plt.subplot(1,3,3) plt.scatter(xTest[:,0], xTest[:,1], c=labelTest, cmap=pltcolors.ListedColormap(colors), marker='x', alpha=0.2); plt.xlabel('x0') plt.ylabel('x1') plt.grid() plt.title('Generator') cb = plt.colorbar() loc = np.arange(0,1,1./len(colors)) cb.set_ticks(loc) cb.set_ticklabels([0,1]); def plotLossAccuracy(loss, accuracy): plt.figure(figsize=(15,4)) plt.subplot(1,3,1) plt.plot(loss) plt.grid() plt.title('Loss') plt.subplot(1,3,2) plt.plot(accuracy) plt.grid() plt.title('Accuracy');

Using Keras with TensorFlow backend¶

Logistic regression using Keras¶

References:

# Number of epochs nEpoch = 200 nBatch = 32 # 32 is default

model = keras.models.Sequential([ keras.layers.Dense(1, activation=keras.activations.sigmoid, input_shape=[nFeatures]) ]) model.compile(optimizer='adam', loss='binary_crossentropy', #'sparse_categorical_crossentropy', metrics=['accuracy']) callbacks = [] if usingTensorBoard: ks = keras.callbacks.TensorBoard(log_dir="./logs/", histogram_freq=1, write_graph=True, write_grads=True, batch_size=1) callbacks = [ks] hist = model.fit(xTrain, labelTrain, epochs=nEpoch, batch_size=nBatch, verbose=0, callbacks=callbacks)

weights, biases = model.get_weights() print('Est W=', weights.reshape(-1), ', b=', biases[0])

Est W= [ 6.613447 -5.2263746] , b= -0.55753434

plotLossAccuracy(hist.history['loss'], hist.history['accuracy'])

Testing the model¶

#model.evaluate(xTest, labelTest) yEst = model.predict(xTest).reshape(-1) labelEst = yEst > 0.5

plotTestResult(xTest, labelTest, yEst, labelEst)

plotConfusionMatrix(labelTest, labelEst, np.array(['Blue', 'Red']));

print(skMetrics.classification_report(labelTest, labelEst))

 precision recall f1-score support False 0.87 0.88 0.88 959 True 0.89 0.88 0.89 1041 accuracy 0.88 2000 macro avg 0.88 0.88 0.88 2000 weighted avg 0.88 0.88 0.88 2000

# Visualization now done using TensorBoard #from IPython.display import SVG #from keras.utils.vis_utils import model_to_dot #SVG(model_to_dot(model).create(prog='dot', format='svg')) #from keras.utils import plot_model #plot_model(model, to_file='model.png')

Adding a regularizer¶

We have seen in ClassificationContinuous2Features-TensorFlow that the solution is not unique. Let's add a constraint through a regularizer.

Reference: https://machinelearningmastery.com/how-to-reduce-generalization-error-in-deep-neural-networks-with-activity-regularization-in-keras/

model2 = keras.models.Sequential([ keras.layers.Dense(1, activation='linear', input_shape=[nFeatures], bias_regularizer=keras.regularizers.l1(0.01), # <--- kernel_regularizer=keras.regularizers.l1(0.01)), # <---- keras.layers.Activation(keras.activations.sigmoid) # <---- ]) model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) callbacks = [] if usingTensorBoard: ks = keras.callbacks.TensorBoard(log_dir="./logs2/", histogram_freq=1, write_graph=True, write_grads=True, batch_size=1) callbacks = [ks] hist2 = model2.fit(xTrain, labelTrain, epochs=nEpoch, batch_size=nBatch, verbose=0, callbacks=callbacks)

plotLossAccuracy(hist2.history['loss'], hist2.history['accuracy'])

weights2, biases2 = model2.get_weights() print('With regularizer W=', weights2.reshape(-1), ', b=', biases2[0])

With regularizer W= [ 5.662139 -4.575111] , b= -0.3705385

With a kernel regularizer, the convergence seems faster, it depends however on the initialization values.

The weights and bias are not that different

Testing the model with regularizer¶

yEst2 = model2.predict(xTest).reshape(-1) labelEst2 = yEst2 > 0.5

plotTestResult(xTest, labelTest, yEst2, labelEst2)

plotConfusionMatrix(labelTest, labelEst2, np.array(['Blue', 'Red']))

print(skMetrics.classification_report(labelTest, labelEst2))

 precision recall f1-score support False 0.88 0.87 0.88 959 True 0.88 0.89 0.89 1041 accuracy 0.88 2000 macro avg 0.88 0.88 0.88 2000 weighted avg 0.88 0.88 0.88 2000

Adding capacity to the model¶

The above model is not able to match the actual model boundary as its capacity is a simple linear, 1st degree, separation of the plan

Let's add more neurons or more layers to our model

Two layers (4 -> 1)¶

nEpoch3 = 512 model3 = keras.models.Sequential([ keras.layers.Dense(4, activation=keras.activations.relu, input_shape=[nFeatures], # <--- bias_regularizer=keras.regularizers.l1(0.01), kernel_regularizer=keras.regularizers.l1(0.01)), keras.layers.Dense(1, activation=keras.activations.sigmoid) # <--- ]) model3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) callbacks = [] if usingTensorBoard: ks = keras.callbacks.TensorBoard(log_dir="./logs3/", histogram_freq=1, write_graph=True, write_grads=True, batch_size=1) callbacks = [ks] hist3 = model3.fit(xTrain, labelTrain, epochs=nEpoch3, batch_size=nBatch, verbose=0, callbacks=callbacks)

plotLossAccuracy(hist3.history['loss'], hist3.history['accuracy'])

weights3_1, biases3_1, weights3_2, biases3_2 = model3.get_weights() print('2 Layers') print('W1 =', weights3_1.reshape(-1)) print('b1 =', biases3_1) print('W2 =', weights3_2.reshape(-1)) print('b2 =', biases3_2)

2 Layers W1 = [ 5.9890741e-01 1.6685007e+00 2.5536180e-03 -4.2117099e-04 -7.7986467e-01 5.6196753e-02 1.1522832e+00 -9.0610338e-06] b1 = [ 3.4219748e-01 -8.2417828e-01 1.3022455e-03 -1.0507734e-05] W2 = [ 8.008138 13.226465 -5.250704 0.3851403] b2 = [-1.3699114]

Testing the model with two layers¶

yEst3 = model3.predict(xTest).reshape(-1) labelEst3 = yEst3 > 0.5

plotTestResult(xTest, labelTest, yEst3, labelEst3)

plotConfusionMatrix(labelTest, labelEst3, np.array(['Blue', 'Red']));

print(skMetrics.classification_report(labelTest, labelEst3))

 precision recall f1-score support False 0.93 0.94 0.93 959 True 0.94 0.93 0.94 1041 accuracy 0.94 2000 macro avg 0.94 0.94 0.94 2000 weighted avg 0.94 0.94 0.94 2000

Conclusion¶

The network is able to match much better the generation function leading to an improvement in the accuracy from 90% to ~94%, given a gaussian noise of std=0.1.

On the accuracy plot, we see that the optimization happens in two steps: initial convergence to 90% accuracy (as the simpler network), then slower optimization from 90 to 94% of accuracy.

However, the experiments have shown much more instability of the optimizer with more layers, even sometimes failing to converge. With this network, the regularizer seems mandatory otherwise the second round of optimization is never reached.

Where to go from here ?¶

Other linear implementations and simple neural nets using "raw" Python or SciKit Learn (Notebook), using TensorFlow (Notebook), or the K Nearest Neighbors classifier (Notebook)

More complex multi-class models on the Czech and Norways flags using Keras (Notebook), showing one of the main motivations to neural networks.

Compare with the two feature linear regression using simple algorithms (Notebook), or using Keras (Notebook)

data-science

Notebooks and Python about data science

If you like this project please add your Star