Applying transfer learning to the facial expression recognition problem
%matplotlib inline
import os
import tensorflow as tf
import numpy as np
from timeit import default_timer as timer
from AlexNet.alexnet import AlexNet
from matplotlib import pyplot as plt
import os
import cv2
from datagenerator import ImageDataGenerator
import time
# Load mean
train_mean = np.load('Training_Img_Mean.npy')
print train_mean.shape
#plt.imshow(train_mean, cmap='gray')
train_mean = train_mean.astype(np.float32) # cv2.cvtColor requires this dtype
train_mean = cv2.cvtColor(train_mean, cv2.COLOR_GRAY2RGB)
train_mean = cv2.resize(train_mean, (227, 227))
print train_mean.shape
plt.imshow(train_mean)
# stacked_img = np.stack((train_mean,)*3).reshape((48, 48, 3))
# print stacked_img.shape
# plt.imshow(stacked_img)
# plt.imshow(train_mean)
# With new layers
n_classes = 7
x = tf.placeholder(tf.float32, (None, 227, 227, 3))
#x = tf.placeholder(tf.float32, (None, 227, 227, 1))
y = tf.placeholder(tf.float32, (None, n_classes))
fc6 = AlexNet(x, feature_extract=True)
fc6 = tf.stop_gradient(fc6)
shape = (fc6.get_shape().as_list()[-1], 4096)
fc7W = tf.Variable(tf.truncated_normal(shape, stddev=0.001))
fc7b = tf.Variable(tf.zeros(4096))
fc7 = tf.nn.relu(tf.matmul(fc6, fc7W) + fc7b)
shape2 = (fc7.get_shape().as_list()[-1], 2048)
fc8W = tf.Variable(tf.truncated_normal(shape2, stddev=0.001))
fc8b = tf.Variable(tf.zeros(2048))
fc8 = tf.nn.relu(tf.matmul(fc7, fc8W) + fc8b)
shape3 = (fc8.get_shape().as_list()[-1], 512)
fc9W = tf.Variable(tf.truncated_normal(shape3, stddev=0.001))
fc9b = tf.Variable(tf.zeros(512))
fc9 = tf.nn.relu(tf.matmul(fc8, fc9W) + fc9b)
shape4 = (fc9.get_shape().as_list()[-1], n_classes)
fc10W = tf.Variable(tf.truncated_normal(shape4, stddev=0.001))
fc10b = tf.Variable(tf.zeros(n_classes))
logits = tf.matmul(fc9, fc10W) + fc10b
#probs = tf.nn.softmax(logits)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss_op = tf.reduce_mean(cross_entropy)
opt = tf.train.AdamOptimizer()
train_op = opt.minimize(loss_op, var_list=[fc7W, fc7b, fc8W, fc8b, fc9W, fc9b, fc10W, fc10b])
#preds = tf.arg_max(logits, 1)
#accuracy_op = tf.reduce_mean(tf.cast(tf.equal(preds, labels), tf.float32))
correct_pred = tf.equal(tf.arg_max(logits, 1), tf.arg_max(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()
# Hyperparameters
start = timer()
training_iters = 200000
batch_size = 128
display_step = 10
train_file = 'train.txt'
test_file = 'test.txt'
train_generator = ImageDataGenerator(train_file, horizontal_flip=False, shuffle=True, \
mean=train_mean, scale_size_alex=(227, 227, 3), \
nb_classes=7, alex=True)
test_generator = ImageDataGenerator(test_file, horizontal_flip=False, shuffle=False, \
mean=train_mean, scale_size_alex=(227, 227, 3), nb_classes=7, \
alex=True)
# Number of batches per epoch for each dataset
train_batches_per_epoch = np.floor(train_generator.data_size/batch_size).astype(np.int16)
test_batches_per_epoch = np.floor(test_generator.data_size/batch_size).astype(np.int16)
# Train the network
with tf.Session() as sess:
sess.run(init)
step = 1
while step*batch_size < training_iters:
batch_x, batch_y = train_generator.next_batch(batch_size)
sess.run(train_op, feed_dict = {x:batch_x, y:batch_y})
if step%display_step == 0:
loss, acc = sess.run([loss_op, accuracy], feed_dict={x:batch_x, y:batch_y})
print "\nIter " + str(step*batch_size) + " Loss=", loss, "Accuracy=", acc
#print step,
step += 1
print "-----Optimization Done-----"
# Test accuracy - sum up accuracies on each of the test batches
test_acc = 0.
test_count = 0
print "Testing the model on batch: "
for _ in range(test_batches_per_epoch):
batch_tx, batch_ty = test_generator.next_batch(batch_size)
acc = sess.run(accuracy, feed_dict={x: batch_tx, y: batch_ty})
test_acc += acc
test_count += 1
print test_count,
test_acc /= test_count
print("Test Accuracy = {:.7f}".format(test_acc))
os.system('say "Your program has finished"')
end = timer()
print "\nTime elapsed = ", end-start, " seconds"