Import various modules that we need for this notebook.
%pylab inline
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.datasets import mnist, cifar10
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.optimizers import SGD, RMSprop
from keras.utils import np_utils
from keras.regularizers import l2
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.callbacks import EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
Load the MNIST dataset, flatten the images, convert the class labels, and scale the data.
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32') / 255
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32') / 255
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)
Build and compile a basic model.
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape = (1, 28, 28)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(10))
model.add(Activation('softmax'))
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)
Fit the model over 10 epochs. The predictiveness is impressive for such a small model!
model.fit(X_train, Y_train, batch_size=32, nb_epoch=10,
verbose=1, show_accuracy=True, validation_split=0.1)
For reference, here are the dimensions of the weights in each layer:
print(model.layers[0].get_weights()[0].shape) # Convolution2D
print(model.layers[5].get_weights()[0].shape) # Dense
Evaluate model on the test set
print("Test classification rate %0.05f" % model.evaluate(X_test, Y_test, show_accuracy=True)[1])
Predict classes on the test set.
y_hat = model.predict_classes(X_test)
pd.crosstab(y_hat, y_test)
test_wrong = [im for im in zip(X_test,y_hat,y_test) if im[1] != im[2]]
print(len(test_wrong))
plt.figure(figsize=(15, 15))
for ind, val in enumerate(test_wrong):
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
plt.subplot(15, 15, ind + 1)
im = 1 - val[0].reshape((28,28))
axis("off")
plt.imshow(im, cmap='gray')
Putting two layers of convolution immediately after one another tends to produce very predictive models. Here, we also follow the convolution layers by a dense hidden layer. Note that training this model takes significantly longer than the dense models to run. As such, I ran only the first 1000 samples. Using all of them should yield a classification rate near 99.5% on the entire test set.
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape = (1, 28, 28)))
model.add(Activation("relu"))
model.add(Convolution2D(32, 3, 3, border_mode='same'))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)
model.fit(X_train[:1000], Y_train[:1000], batch_size=32, nb_epoch=10,
verbose=1, show_accuracy=True, validation_split=0.1)
print("Test classification rate %0.05f" % model.evaluate(X_test, Y_test, show_accuracy=True)[1])
y_hat = model.predict_classes(X_test)
pd.crosstab(y_hat, y_test)
test_wrong = [im for im in zip(X_test,y_hat,y_test) if im[1] != im[2]]
plt.figure(figsize=(10, 10))
for ind, val in enumerate(test_wrong[:100]):
plt.subplots_adjust(left=0, right=1, bottom=0, top=1)
plt.subplot(10, 10, ind + 1)
im = 1 - val[0].reshape((28,28))
plt.axis("off")
plt.text(0, 0, val[2], fontsize=14, color='blue')
plt.text(8, 0, val[1], fontsize=14, color='red')
plt.imshow(im, cmap='gray')
Now, we want to visualize the convolution layers here.
print(model.layers) # list of the layers
print(model.layers[0].get_weights()[0].shape) # the weights
The first set of weights will be given as weights the same size as the input space.
W1 = model.layers[0].get_weights()[0]
plt.figure(figsize=(10, 10), frameon=False)
for ind, val in enumerate(W1):
plt.subplot(6, 6, ind + 1)
im = val.reshape((3,3))
plt.axis("off")
plt.imshow(im, cmap='gray',interpolation='nearest')
The second layer of weights will be given as a set of 32 weights with dimensions of 3x3x32.
W2 = model.layers[2].get_weights()[0]
plt.figure(figsize=(10, 10), frameon=False)
for ind, val in enumerate(W2):
plt.subplot(6, 6, ind + 1)
im = val.reshape((32,9))
plt.axis("off")
plt.imshow(im, cmap='gray',interpolation='nearest')
Look at activations of several examples after the first layer:
model2 = copy.copy(model)
model2.layers = model2.layers[:2]
model2.compile(loss='categorical_crossentropy', optimizer=rms) # don't forget this step!
these = random.choice(range(1000),3,replace=False)
x_rep = model2.predict(X_test[these])
for this_x_rep in x_rep:
plt.figure(figsize=(10, 10), frameon=False)
for ind, val in enumerate(this_x_rep):
plt.subplot(6, 6, ind + 1)
plt.axis("off")
plt.imshow(val, cmap='gray',interpolation='nearest')
Activations after the second layer:
model2 = copy.copy(model)
model2.layers = model2.layers[:4]
model2.compile(loss='categorical_crossentropy', optimizer=rms) # don't forget this step!
x_rep = model2.predict(X_test[these])
for this_x_rep in x_rep:
plt.figure(figsize=(10, 10), frameon=False)
for ind, val in enumerate(this_x_rep):
plt.subplot(6, 6, ind + 1)
plt.axis("off")
plt.imshow(val, cmap='gray',interpolation='nearest')
Finally, activations after max pooling:
model2 = copy.copy(model)
model2.layers = model2.layers[:5]
model2.compile(loss='categorical_crossentropy', optimizer=rms) # don't forget this step!
x_rep = model2.predict(X_test[these])
for this_x_rep in x_rep:
plt.figure(figsize=(10, 10), frameon=False)
for ind, val in enumerate(this_x_rep):
plt.subplot(6, 6, ind + 1)
plt.axis("off")
plt.imshow(val, cmap='gray',interpolation='nearest')
Just to show off a few more tweaks, we'll run a simple MLP model. Here we use weights and an alternative to vanillia stochastic gradient descent.
model = Sequential()
model.add(Flatten(input_shape = (1, 28, 28)))
model.add(Dense(128, init="glorot_normal"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)
model.fit(X_train, Y_train, batch_size=32, nb_epoch=25,
verbose=1, show_accuracy=True, validation_split=0.1,
callbacks=[EarlyStopping(monitor='val_loss', patience=0)])