The data consists of 48x48 pixel grayscale images of faces. The faces have been automatically registered so that the face is more or less centered and occupies about the same amount of space in each image. The task is to categorize each face based on the emotion shown in the facial expression in to one of seven categories (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral).
train.csv contains two columns, "emotion" and "pixels". The "emotion" column contains a numeric code ranging from 0 to 6, inclusive, for the emotion that is present in the image. The "pixels" column contains a string surrounded in quotes for each image. The contents of this string a space-separated pixel values in row major order. test.csv contains only the "pixels" column and your task is to predict the emotion column.
The training set consists of 28,709 examples. The public test set used for the leaderboard consists of 3,589 examples. The final test set, which was used to determine the winner of the competition, consists of another 3,589 examples.
This dataset was prepared by Pierre-Luc Carrier and Aaron Courville, as part of an ongoing research project. They have graciously provided the workshop organizers with a preliminary version of their dataset to use for this contest.
%matplotlib inline
import sys
sys.path.append('/Users/rahulsridhar/anaconda2/lib/python2.7/site-packages') # To import cv2, which datagenerator uses
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from os import listdir
from os.path import isfile, join
import datagenerator
import cv2
import pickle
import os
np.random.seed(15)
# Create training data
with open('train.txt','w') as f:
for i in range(7):
path = 'Training/'+str(i)
onlyfiles = [fl for fl in listdir(path) if isfile(join(path, fl))]
print len(onlyfiles), onlyfiles[0]
for curr_file in onlyfiles:
if curr_file.find(".DS_Store") != -1:
continue
fname = path + '/' + curr_file + ' ' + str(i) + '\n'
f.write(fname)
# Create public test data
with open('test.txt','w') as f:
for i in range(7):
path = 'PublicTest/'+str(i)
onlyfiles = [fl for fl in listdir(path) if isfile(join(path, fl))]
print len(onlyfiles), onlyfiles[0]
for curr_file in onlyfiles:
if curr_file.find(".DS_Store") != -1:
continue
fname = path + '/' + curr_file + ' ' + str(i) + '\n'
f.write(fname)
# Create private test data
with open('pri_test.txt','w') as f:
for i in range(7):
path = 'PrivateTest/'+str(i)
onlyfiles = [fl for fl in listdir(path) if isfile(join(path, fl))]
print len(onlyfiles), onlyfiles[0]
for curr_file in onlyfiles:
if curr_file.find(".DS_Store") != -1:
continue
fname = path + '/' + curr_file + ' ' + str(i) + '\n'
f.write(fname)
# Verify that all files have been written to the text files
with open("train.txt", 'r') as f1, open("test.txt", 'r') as f2, open("pri_test.txt", 'r') as f3:
train_lines = f1.readlines()
test_lines = f2.readlines()
pri_test_lines = f3.readlines()
print len(train_lines), len(test_lines), len(pri_test_lines)
# Delete objects that are not required
del train_lines, test_lines, pri_test_lines, onlyfiles, f, f1, f2, fl, i, path
# Read all filenames
with open('train.txt','r') as f:
allfiles = f.readlines()
# Initialization
im_sum = np.zeros((48, 48))
# Compute cumulative sum of normalized images
im_sum = np.zeros((48, 48))
for i in range(len(allfiles)):
#print i,
filename = allfiles[i].split()[0]
currFile = plt.imread(filename)
currCopy = currFile
# Normalize the image
norm_image = cv2.normalize(src=currFile, dst=currCopy, alpha=0, beta=1, \
norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
im_sum += norm_image
os.system('say "Your program has finished"')
# Ensure that the loop has run till the end and verify object shapes
print i
print im_sum.shape
print im_sum
print "Min max", np.min(im_sum), np.max(im_sum) # Ensure that there are no NaNs
# Compute image mean
im_mean = im_sum/len(allfiles)
print im_mean
print "Mean min max", np.min(im_mean), np.max(im_mean)
np.save("Training_Img_Mean", im_mean) # Store object
# Read a sample image
temp = plt.imread(allfiles[0].split()[0])
temp2 = temp
# Normalize the image
norm_image = cv2.normalize(src=temp, dst=temp2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
plt.imshow(temp, cmap='gray') # Original image
plt.imshow(norm_image, cmap='gray') # Normalized image