Data dictionary:
BR_INST_EXEC.ALL_BRANCHES: Speculative and retired branches
Cycles (CPU_CLK_UNHALTED.THREAD_P): Thread cycles when thread is not in halt state
ICACHE.MISSES: # instruction cache, victim cache, and streaming buffer misses. Uncacheable accesses included
Instructions (INST_RETIRED.ANY_P): Number of instructions retired
IPC: Instructions/Cycles
ITLB_MISSES.MISS_CAUSES_A_WALK: Misses at all ITLB levels that causes a page walk
CYCLE_ACTIVITY.CYCLES_L1D_PENDING: Cycles while L1 cache miss demand load is outstanding
L1D.REPLACEMENT: L1D data line replacements
L2_cache_misses (L2_RQSTS.MISS): All requests that miss L2 cache
L2_cache_accesses (L2_RQSTS.REFERENCES): All L2 requests
MACHINE_CLEARS.COUNT: Number of machine clears (nukes) of any type
MACHINE_CLEARS.CYCLES: Cycles where there was a nuke (thread-specific and all thread)
MEM_LOAD_UOPS_RETIRED.L1_MISS: Retired load uops misses in L1 cache as data sources
MISALIGN_MEM_REF.LOADS: Speculative cache line split load uops dispached to L1 cache
RESOURCE_STALLS.ANY: Resource-related stall cycles
UOPS_EXECUTED.CORE: Number of uops executed on the core
DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK: Load misses in all DTLB levels that cause page walks
UOPS_EXECUTED.THREAD: Counts the number of uops to be executed per thread each cycle
UOPS_ISSUED.ANY: Uops that resource allocation table (RAT) issues to reservation station (RS)
UOPS_ISSUED.STALL_CYCLES: Cycles when RAT does not issue uops to RS for the thread
UOPS_RETIRED.ALL: Actually retired uops
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import minmax_scale
#data_path = '../Data/Instruments_counters/'
data_path = '../Data/Instruments_counters/Iteration 2/'
num_runs = 5
X = {}
for i in xrange(num_runs):
fname = data_path + 'Run' + str(i+1) + '_four_ctrs.xlsx'
X[i+1] = pd.read_excel(fname)
for i in range(num_runs):
print X[i+1].shape
with open('func_names.txt', 'r') as f:
func_names = []
for line in f:
func_names.append(line.strip('\n'))
print func_names[:3]
for i in range(num_runs):
bool_idx = []
for row in xrange(len(X[i+1])):
bool_idx.append(X[i+1].loc[row, 'Symbol Name'].strip() in func_names)
print "Pre removal shape:", X[i+1].shape,
X[i+1] = X[i+1][bool_idx]
print "; Post removal shape:", X[i+1].shape
for i in range(num_runs):
print X[i+1].columns
# Drop unnecessary columns
cols_drop = ['Total Samples', 'Running Time', 'Self (ms)']
for i in range(num_runs):
X[i+1] = X[i+1].drop(cols_drop, axis=1)
print X[i+1].shape
# Strip white space in the symbol column and join
X[1]['Symbol Name'] = X[1]['Symbol Name'].str.strip()
X_final = X[1].copy()
for i in range(2, num_runs+1):
X[i]['Symbol Name'] = X[i]['Symbol Name'].str.strip()
X_final = X_final.merge(X[i], on="Symbol Name")
print X_final.shape
X_final.head()
# Rearrange and drop some columns
cols = X_final.columns.values.tolist()
cols.remove("Symbol Name")
cols.insert(0, "Symbol Name")
X_final = X_final[cols]
#X_final = X_final.drop('TX_MEM.ABORT_CAPACITY_WRITE', axis=1)
print X_final.shape
X_final.head()
X_final.describe()
for col in X_final:
if X_final[col].dtype != 'object':
X_final[col] = minmax_scale(X_final[col])
X_final.describe()
# Save the data
pd.to_pickle(X_final, "Intermediate/X_all_runs")
# Look at the correlation matrix
plt.figure(figsize=(10, 8))
plt.matshow(X_final.corr(), fignum=1)
plt.colorbar()
plt.show()
# Read the C program
with open("../Data/TSVC_force_vec/tsc.c", 'r') as f:
c_prog = f.readlines()
print len(c_prog)
# Read the vectorization report
# with open("../Data/TSVC_force_vec/reportgcc.lst.txt", 'r') as f:
with open("../Data/TSVC_force_vec_2/reportgcc.lst.txt", 'r') as f:
vec_report = f.readlines()
print len(vec_report)
# Get the first and last lines of the function definitions for each function
func_first_line = {}
func_last_line = {}
for func in func_names:
# First line
idx = [i for i, st in enumerate(c_prog) if " "+func + "(" in c_prog[i]][0]
func_first_line[func] = idx
# Last line
while True:
idx += 1
if "return" in c_prog[idx]:
func_last_line[func] = idx
break
print func, func_first_line[func], func_last_line[func]
# Compare the line numbers against vectorization report
loop_vectorized = {} # 1 for yes 0 for no
for func in func_names:
#for func in temp:
print func,
for num in range(func_first_line[func], func_last_line[func]+1):
try:
to_find = ":"+str(num)+":"
# get first index of occurrence
idx = [i for i, st in enumerate(vec_report) if to_find in vec_report[i]][0]
#print num, "/", idx, ";"
if "vectorized loop" in vec_report[idx]:
loop_vectorized[func] = 1
print "Yes"
break
elif "loop not vectorized" or "not beneficial" in vec_report[idx]:
loop_vectorized[func] = 0
print "No"
break
except IndexError:
pass
print len(np.nonzero(loop_vectorized.values())[0])
print np.nonzero(loop_vectorized.values())[0]
vec_labels = pd.DataFrame(loop_vectorized, index=[0]).T
vec_labels['Symbol Name'] = vec_labels.index
vec_labels.columns = ['Vectorizable', 'Symbol Name']
vec_labels.columns
vec_labels.head()
print vec_labels.shape #s151 doesn't get called for some reason; add it manually
vec_labels = vec_labels.append({'Vectorizable': 0, 'Symbol Name': 's151'}, ignore_index=True)
print vec_labels.shape
Data_final = X_final.merge(vec_labels, on='Symbol Name')
print Data_final.shape
Data_final.to_pickle('Intermediate/Data_final')