The data consists of 48x48 pixel grayscale images of faces. The faces have been automatically registered so that the face is more or less centered and occupies about the same amount of space in each image. The task is to categorize each face based on the emotion shown in the facial expression in to one of seven categories (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral).
train.csv contains two columns, "emotion" and "pixels". The "emotion" column contains a numeric code ranging from 0 to 6, inclusive, for the emotion that is present in the image. The "pixels" column contains a string surrounded in quotes for each image. The contents of this string a space-separated pixel values in row major order. test.csv contains only the "pixels" column and your task is to predict the emotion column.
The training set consists of 28,709 examples. The public test set used for the leaderboard consists of 3,589 examples. The final test set, which was used to determine the winner of the competition, consists of another 3,589 examples.
This dataset was prepared by Pierre-Luc Carrier and Aaron Courville, as part of an ongoing research project. They have graciously provided the workshop organizers with a preliminary version of their dataset to use for this contest.
%matplotlib inline
import graphlab as gl
import matplotlib.pyplot as plt
import numpy as np
from timeit import default_timer as timer
import os
trainfile = 'train.txt'
testfile = 'test.txt'
pri_testfile = 'pri_test.txt'
# Read training data into lists
trainlabels = []
traindata = []
with open(trainfile, 'r') as f:
for line in f:
currfile, currlabel = line.split()
trainlabels.append(currlabel)
# Read public test data into lists
testlabels = []
testdata = []
with open(testfile, 'r') as f:
for line in f:
currfile, currlabel = line.split()
testlabels.append(currlabel)
# Read private test data into lists
pri_testlabels = []
pri_testdata = []
with open(pri_testfile, 'r') as f:
for line in f:
currfile, currlabel = line.split()
pri_testlabels.append(currlabel)
# Verify dataset sizes
print len(traindata), len(trainlabels), len(testdata), len(testlabels), len(pri_testdata), len(pri_testlabels)
# Load the images
trainimgs = gl.image_analysis.load_images('Training/', random_order=False)
testimgs = gl.image_analysis.load_images('PublicTest/', random_order=False)
pri_testimgs = gl.image_analysis.load_images('PrivateTest/', random_order=False)
# Verify data shapes
print trainimgs.shape, testimgs.shape, pri_testimgs.shape
# Create columns for the data labels
trainimgs['label'] = trainlabels
testimgs['label'] = testlabels
pri_testimgs['label'] = pri_testlabels
# Randomly permute rows of the data
np.random.seed(0)
train_idx = np.random.permutation(len(trainimgs))
test_idx = np.random.permutation(len(testimgs))
pri_test_idx = np.random.permutation(len(pri_testimgs))
# Create a column for the random permutation
trainimgs['idx'] = train_idx
testimgs['idx'] = test_idx
pri_testimgs['idx'] = pri_test_idx
train_idx
pri_testimgs.print_rows(2)
# Sort the datasets based on the random permutation
trainimgs_rand = trainimgs.sort('idx')
testimgs_rand = testimgs.sort('idx')
pri_testimgs_rand = pri_testimgs.sort('idx')
pri_testimgs_rand.print_rows(5)
# Remove the columns that are not required
trainimgs_rand.remove_columns(['idx', 'path'])
testimgs_rand.remove_columns(['idx', 'path'])
pri_testimgs_rand.remove_columns(['idx', 'path'])
# Save the random permutations - for future use
np.save('GraphLabOutput/train_idx', train_idx)
np.save('GraphLabOutput/test_idx', test_idx)
np.save('GraphLabOutput/pri_test_idx', pri_test_idx)
training_data = trainimgs_rand
test_data = testimgs_rand
pri_test_data = pri_testimgs_rand
print training_data.shape, test_data.shape, pri_test_data.shape#, validation_data.shape
# Resize the images in the datasets
training_data['image'] = gl.image_analysis.resize(training_data['image'], 48, 48, 1, decode=True)
test_data['image'] = gl.image_analysis.resize(test_data['image'], 48, 48, 1, decode=True)
pri_test_data['image'] = gl.image_analysis.resize(pri_test_data['image'], 48, 48, 1, decode=True)
# Convert the labels to int
training_data['label'] = training_data['label'].astype(int)
#validation_data['label'] = validation_data['label'].astype(int)
test_data['label'] = test_data['label'].astype(int)
pri_test_data['label'] = pri_test_data['label'].astype(int)
# Verify that there are 7 unique labels
print np.unique(training_data['label'])
#print np.unique(validation_data['label'])
print np.unique(test_data['label'])
print np.unique(pri_test_data['label'])
from graphlab import mxnet as mx
# Load training data mean
training_im_mean = np.load("Training_Img_Mean.npy")
print training_im_mean.shape
print "Mean min max", np.min(training_im_mean), np.max(training_im_mean)
training_im_mean = np.reshape(training_im_mean, (48, 48, 1)) # Reshape mean
# Store the public test data in an iterator
testdataiter = mx.io.SFrameImageIter(test_data, data_field=['image'],
label_field='label',
data_name='data',
label_name='sm_label', mean_nd = training_im_mean)
# Store the private test data in an iterator
pri_testdataiter = mx.io.SFrameImageIter(pri_test_data, data_field=['image'],
label_field='label',
data_name='data',
label_name='sm_label', mean_nd = training_im_mean)
# Define the network symbols
# Conv layers
data = mx.symbol.Variable('data')
conv1= mx.symbol.Convolution(data = data, name='conv1', num_filter=64, kernel=(3,3), stride=(2,2), pad=(2,2))
bn1 = mx.symbol.BatchNorm(data = conv1, name="bn1")
act1 = mx.symbol.Activation(data = bn1, name='relu1', act_type="relu")
#mp1 = mx.symbol.Pooling(data = act1, name = 'mp1', kernel=(2,2), stride=(2,2), pool_type='max')
mp1 = act1
conv2= mx.symbol.Convolution(data = mp1, name='conv2', num_filter=64, kernel=(3,3), stride=(2,2), pad=(2,2))
bn2 = mx.symbol.BatchNorm(data = conv2, name="bn2")
act2 = mx.symbol.Activation(data = bn2, name='relu2', act_type="relu")
#mp2 = mx.symbol.Pooling(data = act2, name = 'mp2', kernel=(2,2), stride=(2,2), pool_type='max')
mp2 = act2
conv3= mx.symbol.Convolution(data = mp2, name='conv3', num_filter=64, kernel=(3,3), stride=(2,2), pad=(2,2))
bn3 = mx.symbol.BatchNorm(data = conv3, name="bn3")
act3 = mx.symbol.Activation(data = bn3, name='relu3', act_type="relu")
mp3 = mx.symbol.Pooling(data = act3, name = 'mp3', kernel=(2,2), stride=(2,2), pool_type='max')
#mp3 = act3
mp3 = mx.symbol.Dropout(data = mp3, p = 0.5)
conv4 = mx.symbol.Convolution(data = mp3, name='conv4', num_filter=128, kernel=(3,3), stride=(2,2), pad=(2,2))
bn4 = mx.symbol.BatchNorm(data = conv4, name="bn4")
act4 = mx.symbol.Activation(data = bn4, name='relu4', act_type="relu")
#mp4 = mx.symbol.Pooling(data = act4, name = 'mp4', kernel=(2,2), stride=(2,2), pool_type='max')
mp4 = act4
conv5 = mx.symbol.Convolution(data = mp4, name='conv5', num_filter=128, kernel=(3,3), stride=(2,2), pad=(2,2))
bn5 = mx.symbol.BatchNorm(data = conv5, name="bn5")
act5 = mx.symbol.Activation(data = bn5, name='relu5', act_type="relu")
mp5 = mx.symbol.Pooling(data = act5, name = 'mp5', kernel=(2,2), stride=(2,2), pool_type='max')
# Fully connected layers
fl = mx.symbol.Flatten(data = mp5, name="flatten")
fc2 = mx.symbol.FullyConnected(data = fl, name='fc2', num_hidden=1024)
fc2 = mx.symbol.Dropout(data = fc2, p = 0.7)
fc3 = mx.symbol.FullyConnected(data = fc2, name='fc3', num_hidden=512)
fc3 = mx.symbol.Dropout(data = fc3, p = 0.7)
fc4 = mx.symbol.FullyConnected(data = fc3, name='fc4', num_hidden=7)
softmax = mx.symbol.SoftmaxOutput(data = fc4, name = 'sm')
batch_size = 128
num_epoch = 15
start = timer()
# Prepare the training data iterator from SFrame
# `data_name` must match the first layer's name of the network.
# `label_name` must match the last layer's name plus "_label".
dataiter = mx.io.SFrameImageIter(training_data, data_field='image',
label_field='label',
data_name='data',
label_name='sm_label', batch_size=batch_size, mean_nd = training_im_mean)#,\
#random_flip=True)
# Train the network
adam = mx.optimizer.Adam(learning_rate=0.0001)
model = mx.model.FeedForward.create(softmax, X=dataiter,
num_epoch=num_epoch,
learning_rate=0.0001, wd=0.0008,
momentum=0.9,
eval_metric=mx.metric.Accuracy(),
optimizer=adam)
end = timer()
os.system('say "Your program has finished"')
print "Time elapsed = ", end-start, "seconds"
# Make predictions on the public test data
testpred = model.predict(testdataiter)
os.system('say "Your program has finished"')
# Make predictions on the private test data
pri_testpred = model.predict(pri_testdataiter)
os.system('say "Your program has finished"')
# Verify shapes of predictions
print testpred.shape, pri_testpred.shape
# Look at the mean prediction scores for each class
print np.mean(testpred, axis = 0)
print np.mean(pri_testpred, axis = 0)
print np.sum(testpred) # Should be = number of test elements
print np.sum(pri_testpred) # Should be = number of test elements
# Save the predictions
testpred_argmax = np.argmax(testpred, axis = 1)
pri_testpred_argmax = np.argmax(pri_testpred, axis = 1)
np.save('GraphLabOutput/testpred_GraphLab_CNN_48', testpred_argmax)
np.save('GraphLabOutput/pri_testpred_GraphLab_CNN_48', pri_testpred_argmax)
print "Public test accuracy = ", np.mean(np.equal(np.argmax(testpred, axis = 1), test_data['label']))
print "Private test accuracy = ", np.mean(np.equal(np.argmax(pri_testpred, axis = 1), pri_test_data['label']))
Results - trained on 80% of training data
10 epochs - 0.001 Adam, 0.0001 wd L2, softmax, momentum 0.9, 128 batch size - training 61.4% test 39.426%; 30 mins
15 epochs - 0.001 Adam, 0.0001 wd L2, softmax, momentum 0.9, 128 batch size - training 72.34% test 44.135%; 46 mins
8 epochs - 0.0001 Adam, 0.0005 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.7 FC (thrown away) and dropout 0.7 after 3rd conv layer - training 42.92% public test private test ; 45 mins (5 conv layers; 64 64 64 128 128)
Results - trained on 100% of training data
15 epochs - 0.001 Adam, 0.0001 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.75 (thrown away) - training 72.2% test 43.6%; 56 mins
10 epochs - 0.001 Adam, 0.0001 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0 (thrown away) - training 61.8% test 34%; 56 mins (3rd conv layer had only 64 filters)
20 epochs - 0.001 Adam, 0.0003 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.5 (thrown away) - training 76.67% test 43.99%; 56 mins
20 epochs - 0.001 Adam, 0.0003 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.6 (thrown away) - training 78.65% test 40.84%; 70 mins
15 epochs - 0.001 Adam, 0.0003 wd L2, softmax, momentum 0.9, 64 batch size, dropout 0.7 (thrown away) - training 69.71% test 44.72%; 57 mins
15 epochs - 0.001 Adam, 0.0003 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.7 (thrown away) - training 73.86% test 46.14%; 84 mins (4 conv layers; 64 64 64 128)
15 epochs - 0.001 Adam, 0.0005 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.7 (thrown away) - training 72.28% public test 46.89% private test 45.55%; 84 mins (5 conv layers; 64 64 64 128 128)
7 epochs - 0.001 Adam, 0.0005 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.7 (thrown away) - training 53.33% public test 47.87% private test 46.17%; 84 mins (5 conv layers; 64 64 64 128 128)
10 epochs - 0.0001 Adam, 0.0005 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.7 FC (thrown away) and dropout 0.5 after 3rd conv layer - training 48.86% public test 47.28% private test 47.62%; 74 mins (5 conv layers; 64 64 64 128 128)
10 epochs - 0.0001 Adam, 0.0005 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.7 FC (thrown away) and dropout 0.5 after 3rd conv layer - training 48.5% public test 47.23% private test 48.06%; 75 mins (5 conv layers; 64 64 64 128 128)
15 epochs - 0.0001 Adam, 0.0008 wd L2, softmax, momentum 0.9, 128 batch size, dropout 0.7 FC (thrown away) and dropout 0.5 after 3rd conv layer - training 52.4% public test 48.09% private test 48.25%; >100 mins (5 conv layers; 64 64 64 128 128)
# Training accuracies for the best model
# (have to hardcode since MXNet doesn't return the data in an object)
accuracy = [27, 35.6, 39.7, 41.8, 43.3, 44.8, 45.8, 46.8, 47.6, 48.5]
maximum = [100]*10
error = [a - b for a, b in zip(maximum, accuracy)]
num_epoch = 10
epochs = np.arange(1, num_epoch+1)
plt.figure(figsize=(10, 8))
plt.plot(epochs, accuracy)
plt.xlabel('Epoch')
plt.ylabel('Training accuracy (%)')
plt.title('Training accuracy vs. Number of epochs')
num_epoch = 10
epochs = np.arange(1, num_epoch+1)
plt.figure(figsize=(10, 8))
plt.plot(epochs, error)
plt.xlabel('Epoch')
plt.ylabel('Training error (%)')
plt.title('Training error vs. Number of epochs')