From 3d3cebb9560e0f3973a6dbecda7e2ab4a15f64d4 Mon Sep 17 00:00:00 2001 From: Sabrina Hoppe Date: Sat, 5 May 2018 22:22:21 +0200 Subject: [PATCH] evaluation code --- 05_plot_weights.py | 234 +++++++++++++++++++++++++++++++ 06_baselines.py | 101 +++++++++++++ 07_evaluation_across_contexts.py | 155 ++++++++++++++++++++ 08_descriptive.py | 99 +++++++++++++ 09_plot_ws_hist.py | 37 +++++ README.md | 47 +++++-- 6 files changed, 660 insertions(+), 13 deletions(-) create mode 100644 05_plot_weights.py create mode 100644 06_baselines.py create mode 100644 07_evaluation_across_contexts.py create mode 100644 08_descriptive.py create mode 100644 09_plot_ws_hist.py diff --git a/05_plot_weights.py b/05_plot_weights.py new file mode 100644 index 0000000..2e635dd --- /dev/null +++ b/05_plot_weights.py @@ -0,0 +1,234 @@ +import numpy as np +import matplotlib.pyplot as plt +from config import conf +import os, sys +import pandas as pns +from config import names as gs +import getopt +import matplotlib.gridspec as gridspec +from sklearn.metrics import f1_score + +import seaborn as sns +sns.set(style='whitegrid', color_codes=True) +sns.set_context('poster') + +dark_color = sns.xkcd_rgb['charcoal grey'] +light_color = sns.xkcd_rgb['cloudy blue'] + +def plot_weights(): + # for each personality trait, compute the list of median feature importances across all cross validation folds and iterations + medianlist = [] + for t in xrange(0, conf.n_traits): + medianlist.append( + list(imp_df.loc[imp_df['T'] == t].groupby(by='feat_num')['feature importance'].median())) + + # find the 5th to highest feature importance for each trait and write their importances into a .tex table - see Table 2, SI + n = 15 + most_important_features = [] + most_important_features_lists = [] + for ml in medianlist: + locallist = [] + for i in xrange(1,(n+1)): + fn = gs.full_long_label_list[int(np.argsort(np.array(ml))[-i])] + locallist.append(fn) + if fn not in most_important_features: + most_important_features.append(fn) + most_important_features_lists.append(locallist) + most_important_features.sort() + + # write the full list of feature importances into a .tex table - shown in Table 2, SI + filename = conf.figure_folder + '/table2.tex' + with open(filename, 'w') as f: + f.write('feature&Neur.&Extr.&Open.&Agree.&Consc.&PCS&CEI') + f.write('\\\\\n\hline\n') + for fi in xrange(0, len(most_important_features)): + f.write(most_important_features[fi]) + for t in xrange(0, conf.n_traits): + m = imp_df[(imp_df['T'] == t)&(imp_df.feature == most_important_features[fi])]['feature importance'].median() + if most_important_features[fi] in most_important_features_lists[t]: + f.write('& \\textbf{' + '%.3f}' % m) + else: + f.write('&' + '%.3f' % m) + f.write('\\\\\n') + print filename, 'written.' + + # create Figure 2 + # first collect the set of individual top TOP_N features per trait: + TOP_N = 10 + featlabels = [] + for trait in xrange(0, conf.n_traits): + basedata = imp_df.loc[imp_df['T'] == trait] + gp = basedata.groupby(by='feature')['feature importance'].median() + order = gp.sort_values(ascending=False) + featlabels.extend(order[:TOP_N].keys()) + super_feats = np.unique(np.array(featlabels)) + + # collect the sum of feature importances for these labels, to sort the features by their median + super_feats_importance_sum = np.zeros((len(super_feats))) + for i in xrange(0, len(super_feats)): + super_feats_importance_sum[i] = imp_df[imp_df.feature==super_feats[i]].groupby(by=['T'])['feature importance'].median().sum() + super_feats_sort_indices = np.argsort(super_feats_importance_sum)[::-1] + + # add some interesting features from related work to the list of features whose importance will be shown + must_have_feats = [ + 'inter quartile range x', 'range x', 'maximum x', 'std x', '1st quartile x', 'range pupil diameter', 'median y', + 'mean difference of subsequent x', 'mean fixation duration', '3rd quartile y', + 'fixation rate', 'mean saccade amplitude', 'dwelling time' + ] + # but only add them if they are not in the list yet + additional_feats = np.array([a for a in must_have_feats if a not in super_feats], dtype=object) + + # collect the sum of feature importances for these labels as well, so they can be sorted by their median importance in the plot + additional_feats_importance_sum = np.zeros((len(additional_feats))) + for trait in xrange(0, conf.n_traits): + basedata = imp_df.loc[imp_df['T'] == trait] + for i in xrange(0, len(additional_feats)): + logi = basedata.feature == additional_feats[i] + additional_feats_importance_sum[i] += float(basedata[logi]['feature importance'].median()) + additional_feats_sort_indices = np.argsort(additional_feats_importance_sum)[::-1] + + # create the figure + plt.figure(figsize=(20, 12)) + grs = gridspec.GridSpec(len(super_feats) + len(additional_feats) + 1, conf.n_traits) + + for trait in xrange(0, conf.n_traits): + # upper part of the figure, i.e. important features + ax = plt.subplot(grs[:len(super_feats),trait]) + basedata = imp_df.loc[imp_df['T'] == trait] + feat_importances = [] + for i in xrange(0, len(super_feats)): + logi = basedata.feature == super_feats[super_feats_sort_indices][i] + feat_importances.append(list(basedata[logi]['feature importance'])) + bp = plt.boxplot(x=feat_importances, #notch=True, labels=super_feats[super_feats_sort_indices], + patch_artist=True, sym='', vert=False, whis='range', positions=np.arange(0,len(feat_importances))) + + # asthetics + for i in xrange(0, len(super_feats)): + bp['boxes'][i].set(color=dark_color) + bp['boxes'][i].set(facecolor=light_color) + bp['whiskers'][2 * i].set(color=dark_color, linestyle='-') + bp['whiskers'][2 * i + 1].set(color=dark_color, linestyle='-') + bp['caps'][2 * i].set(color=dark_color) + bp['caps'][2 * i + 1].set(color=dark_color) + bp['medians'][i].set(color=dark_color) + + if not trait == 0: + plt.ylabel('') + plt.setp(ax.get_yticklabels(), visible=False) + else: + ax.set_yticklabels(super_feats[super_feats_sort_indices]) + + xlimmax = 0.47 + xticks = [0.15, 0.35] + plt.xlim((0, xlimmax)) + plt.xticks(xticks) + plt.setp(ax.get_xticklabels(), visible=False) + + # lower part of the figure, i.e. features from related work + ax = plt.subplot(grs[(-len(additional_feats)):, trait]) + basedata = imp_df.loc[imp_df['T'] == trait] + feat_importances = [] + for i in xrange(0, len(additional_feats)): + logi = basedata.feature == additional_feats[additional_feats_sort_indices][i] + feat_importances.append(basedata[logi]['feature importance']) + bp = plt.boxplot(x=feat_importances, patch_artist=True, sym='', vert=False, whis='range', + positions=np.arange(0,len(feat_importances))) + + # asthetics + for i in xrange(0, len(additional_feats)): + bp['boxes'][i].set(color=dark_color) + bp['boxes'][i].set(facecolor=light_color) #, alpha=0.5) + bp['whiskers'][2 * i].set(color=dark_color, linestyle='-') + bp['whiskers'][2 * i + 1].set(color=dark_color, linestyle='-') + bp['caps'][2 * i].set(color=dark_color) + bp['caps'][2 * i + 1].set(color=dark_color) + bp['medians'][i].set(color=dark_color) #, linewidth=.1) + + if not trait == 0: + plt.ylabel('') + plt.setp(ax.get_yticklabels(), visible=False) + else: + ax.set_yticklabels(additional_feats[additional_feats_sort_indices]) + plt.xlim((0, xlimmax)) + plt.xticks(xticks) + if trait == 3: + plt.xlabel(conf.medium_traitlabels[trait] + '\n\nFeature Importance') + else: + plt.xlabel(conf.medium_traitlabels[trait]) + + filename = conf.figure_folder + '/figure2.pdf' + plt.savefig(filename, bbox_inches='tight') + print filename.split('/')[-1], 'written.' + plt.close() + + +if __name__ == "__main__": + # target file names - save table of F1 scores, feature importances and majority predictions there + datapathI = conf.get_result_folder(conf.annotation_all) + '/f1s.csv' # F1 scores from each iteration + datapathII = conf.get_result_folder(conf.annotation_all) + '/feature_importance.csv' # Feature importance from each iteration + datapathIII = conf.get_result_folder(conf.annotation_all) + '/majority_predictions.csv' # Majority voting result for each participant over all iterations + + if not os.path.exists(conf.figure_folder): + os.mkdir(conf.figure_folder) + + # if target files do not exist yet, create them + if (not os.path.exists(datapathI)) or (not os.path.exists(datapathII)) or (not os.path.exists(datapathIII)): + f1s = [] + feature_importances = [] + majority_predictions = [] + for trait in xrange(0, conf.n_traits): + predictions = np.zeros((conf.n_participants, conf.max_n_iter),dtype=int)-1 + ground_truth = np.loadtxt(conf.binned_personality_file, delimiter=',', skiprows=1, usecols=(trait+1,)) + for si in xrange(0, conf.max_n_iter): + filename = conf.get_result_filename(conf.annotation_all, trait, False, si, add_suffix=True) + if os.path.exists(filename): + data = np.load(filename) + if (data['predictions'] > 0).all(): + assert data['f1'] == f1_score(ground_truth, data['predictions'], average='macro') + f1s.append([data['f1'], conf.medium_traitlabels[trait]]) + else: + # if there was no time window for a condition, like if shopping data only is evaluated, + # the F1 score for each person without a single time window will be set to -1 + # but should not be used as such to compute the mean F1 score. + # Thus, here the F1 score is re-computed on the relevant participants only. + pr = data['predictions'] + pr = pr[pr > 0] + + dt = ground_truth[pr > 0] + + f1s.append([f1_score(dt, pr, average='macro'), conf.medium_traitlabels[trait]]) + + for outer_cv_i in xrange(0, 5): # number outer CV, not person anymore + for fi in xrange(0, conf.max_n_feat): + feature_importances.append([data['feature_importances'][outer_cv_i, fi], trait, gs.full_long_label_list[fi], fi]) + + predictions[:,si] = data['predictions'] + else: + print 'did not find', filename + + # compute majority voting for each participant over all iterations + for p in xrange(0, conf.n_participants): + (values, counts) = np.unique(predictions[p, predictions[p,:]>0], return_counts=True) + ind = np.argmax(counts) + majority_predictions.append([values[ind], p, conf.medium_traitlabels[trait]]) + + f1s_df = pns.DataFrame(data=f1s, columns=['F1', 'trait']) + f1s_df.to_csv(datapathI) + + imp_df = pns.DataFrame(data=feature_importances, columns=['feature importance', 'T', 'feature', 'feat_num']) + imp_df.to_csv(datapathII) + + majority_predictions_df = pns.DataFrame(data=majority_predictions, columns=['prediction','participant','trait']) + majority_predictions_df.to_csv(datapathIII) + + else: + print 'No new results are collected as previous results were available. If you want to overwrite them, please delete the following files:' + print datapathI + print datapathII + print datapathIII + + f1s_df = pns.read_csv(datapathI) + imp_df = pns.read_csv(datapathII) + majority_predictions_df = pns.read_csv(datapathIII) + + plot_weights() # Figure 2 diff --git a/06_baselines.py b/06_baselines.py new file mode 100644 index 0000000..dc37c56 --- /dev/null +++ b/06_baselines.py @@ -0,0 +1,101 @@ +import numpy as np +import matplotlib.pyplot as plt +from config import conf +import os, sys +import pandas as pns +from config import names as gs +import getopt +import matplotlib.gridspec as gridspec +from sklearn.metrics import f1_score, accuracy_score + +import seaborn as sns +sns.set(style='whitegrid', color_codes=True) +sns.set_context('poster') + +dark_color = sns.xkcd_rgb['charcoal grey'] +light_color = sns.xkcd_rgb['cloudy blue'] + +max_n_feat = conf.max_n_feat +m_iter = conf.max_n_iter + +featurelabels = gs.full_long_label_list +participant_ids = np.arange(0, conf.n_participants) + + +def plot_overview(): + all_baselines.groupby(by=['trait', 'clf_name'])['F1'].mean().to_csv(conf.figure_folder + + '/figure1.csv') + print 'Figure1.csv written' + + sns.set(font_scale=2.1) + plt.figure(figsize=(20, 10)) + ax = plt.subplot(1,1,1) + sns.barplot(x='trait', y='F1', hue='clf_name', data=all_baselines, capsize=.05, errwidth=3, + linewidth=3, estimator=np.mean, edgecolor=dark_color, + palette={'our classifier': sns.xkcd_rgb['windows blue'], + 'most frequent class': sns.xkcd_rgb['faded green'], + 'random guess':sns.xkcd_rgb['greyish brown'], + 'label permutation':sns.xkcd_rgb['dusky pink'] + } + ) + plt.plot([-0.5,6.5], [0.33, 0.33], c=dark_color, linestyle='--', linewidth=3, label='theoretical chance level') + handles, labels = ax.get_legend_handles_labels() + ax.legend([handles[1], handles[2], handles[3], handles[4], handles[0]], [labels[1], labels[2], labels[3], labels[4], labels[0]], fontsize=20) + plt.xlabel('') + plt.ylabel('F1 score', fontsize=20) + plt.ylim((0, 0.55)) + filename = conf.figure_folder + '/figure1.pdf' + plt.savefig(filename, bbox_inches='tight') + plt.close() + print 'wrote', filename.split('/')[-1] + + +if __name__ == "__main__": + # collect F1 scores for classifiers on all data from a file that was written by evaluation_single_context.py + datapath = conf.get_result_folder(conf.annotation_all) + '/f1s.csv' + if not os.path.exists(datapath): + print 'could not find', datapath + print 'consider (re-)running evaluation_single_context.py' + sys.exit(1) + our_classifier = pns.read_csv(datapath) + our_classifier['clf_name'] = 'our classifier' + + # baseline 1: guess the most frequent class from each training set that was written by train_baseline.py + datapath = conf.result_folder + '/most_frequ_class_baseline.csv' + if not os.path.exists(datapath): + print 'could not find', datapath + print 'consider (re-)running train_baseline.py' + sys.exit(1) + most_frequent_class_df = pns.read_csv(datapath) + most_frequent_class_df['clf_name'] = 'most frequent class' + + # compute all other baselines ad hoc + collection = [] + for trait in xrange(0, conf.n_traits): + # baseline 2: random guess + truth = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=(trait+1,), delimiter=',') + for i in xrange(0, 100): + rand_guess = np.random.randint(1, 4, conf.n_participants) + f1 = f1_score(truth, rand_guess, average='macro') + collection.append([f1, conf.medium_traitlabels[trait], i, 'random guess']) + + # baseline 3: label permutation test + # was computed using label_permutation_test.sh and written into results. ie. is just loaded here + for si in xrange(0, m_iter): + filename_rand = conf.get_result_filename(conf.annotation_all, trait, True, si, add_suffix=True) + if os.path.exists(filename_rand): + data = np.load(filename_rand) + pr = data['predictions'] + dt = truth[pr > 0] + pr = pr[pr > 0] + f1 = f1_score(dt, pr, average='macro') + collection.append([f1, conf.medium_traitlabels[trait], si, 'label permutation']) + else: + print 'did not find', filename_rand + print 'consider (re-)running label_permutation_test.sh' + sys.exit(1) + + collectiondf = pns.DataFrame(data=collection,columns=['F1','trait','iteration','clf_name']) + all_baselines = pns.concat([our_classifier, most_frequent_class_df, collectiondf]) + + plot_overview() # Figure 1 diff --git a/07_evaluation_across_contexts.py b/07_evaluation_across_contexts.py new file mode 100644 index 0000000..344ef61 --- /dev/null +++ b/07_evaluation_across_contexts.py @@ -0,0 +1,155 @@ +import numpy as np +from config import conf +import os, sys +from config import names as gs +import pandas as pd + +truth = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',') + +# all comparisons to perform. Each has +# a name, +# two annotation values that determine if classifiers trained on all data or on specific subsets only will be examined; +# names for both tasks to compare +comparisons = dict({'split halves': [conf.annotation_all, conf.annotation_all, 'first half', 'second half'], + 'two ways': [conf.annotation_ways, conf.annotation_ways, 'way there', 'way back'], + 'way vs shop in general classifier': [conf.annotation_all, conf.annotation_all, 'both ways' ,'shop'], + 'way vs shop in specialised classifier': [conf.annotation_ways, conf.annotation_shop, 'both ways', 'shop'], + 'way in specialised classifier vs way in general classifier': [conf.annotation_ways, conf.annotation_all, 'both ways', 'both ways'], + 'shop in specialised classifier vs shop in general classifier': [conf.annotation_shop, conf.annotation_all, 'shop', 'shop'] + }) + +def get_majority_vote(predictions): + if len(predictions) == 0: + return -1 + (values, counts) = np.unique(predictions, return_counts=True) + ind = np.argmax(counts) + return values[ind] + +def get_average_correlation(predA, predB, m_iter): + """ + :param predA: predictions for task A, n_participants x m_iter + :param predB: predictions for task B, n_participants x m_iter + :return: + """ + correlations = [] + for si in xrange(0, m_iter): + if predB.ndim == 1: + if np.sum(predA[:,si]) > 0: + A = predA[:,si] + B = predB + consider = (A>0) + A = A[consider] + B = B[consider] + else: + continue + else: + if np.sum(predA[:,si]) > 0 and (np.sum(predB[:,si]) > 0): + A = predA[:,si] + B = predB[:,si] + consider = (A>0) & (B>0) + A = A[consider] + B = B[consider] + else: + continue + + correlation = np.corrcoef(np.array([A, B]))[0][1] + correlations.append(correlation) + + avg = np.tanh(np.mean(np.arctanh(np.array(correlations)))) + return avg + + +if __name__ == "__main__": + # check if the output target folder already exists and create if not + if not os.path.exists(conf.figure_folder): + os.mkdir(conf.figure_folder) + + # collect masks for each participant, annotation (all data, shop, way), window size and subset in question (e.g. first half, or way to the shop) + # each mask is True for samples of a particular participant and subset; False for all others + window_masks = [] + for wsi in xrange(0, len(conf.all_window_sizes)): + x_file, y_file, id_file = conf.get_merged_feature_files(conf.all_window_sizes[wsi]) + + for annotation_value in conf.annotation_values: + ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int) + + if annotation_value == conf.annotation_shop: + ids_ws = ids_ws[ids_ws[:, 1] == conf.time_window_annotation_shop, :] + elif annotation_value == conf.annotation_ways: + ids_ws = ids_ws[(ids_ws[:, 1] == conf.time_window_annotation_wayI) | (ids_ws[:, 1] == conf.time_window_annotation_wayII), :] + + for p in xrange(0, conf.n_participants): + ids_ws_p = ids_ws[(ids_ws[:, 0] == p), :] + + window_masks.append([annotation_value, p, wsi, 'first half', ids_ws_p[:, 2] == conf.time_window_annotation_halfI]) + window_masks.append([annotation_value, p, wsi, 'second half', ids_ws_p[:, 2] == conf.time_window_annotation_halfII]) + + window_masks.append([annotation_value, p, wsi, 'way there', ids_ws_p[:, 1] == conf.time_window_annotation_wayI]) + window_masks.append([annotation_value, p, wsi, 'way back', ids_ws_p[:, 1] == conf.time_window_annotation_wayII]) + + window_masks.append([annotation_value, p, wsi, 'shop', ids_ws_p[:, 1] == conf.time_window_annotation_shop]) + window_masks.append([annotation_value, p, wsi, 'both ways', np.logical_or(ids_ws_p[:, 1] == conf.time_window_annotation_wayI,ids_ws_p[:, 1] == conf.time_window_annotation_wayII)]) + + window_masks_df = pd.DataFrame(window_masks, columns=['annotation', 'participant', 'window size index', 'subtask', 'mask']) + + # collect predictions for each participant and each setting that is interesting for one of the comparisons + # Results are directly written into figures/table1-5.csv + with open(conf.figure_folder + '/table1-5.csv', 'w') as f: + f.write('comparison') + for trait in xrange(0, conf.n_traits): + f.write(',' + conf.medium_traitlabels[trait]) + f.write('\n') + + for comp_title, (annotation_value_I, annotation_value_II, subtaskI, subtaskII) in comparisons.items(): + f.write(comp_title) + result_filename = conf.result_folder + '/predictions_' + comp_title.replace(' ','_') + '.npz' + if not os.path.exists(result_filename): + print 'computing data for', comp_title + print 'Note taht this might take a while - if the script is run again, intermediate results will be available and speed up all computations.' + + predictions_I = np.zeros((conf.n_participants, conf.n_traits, conf.max_n_iter), dtype=int) + predictions_II = np.zeros((conf.n_participants, conf.n_traits, conf.max_n_iter), dtype=int) + + for trait in xrange(0, conf.n_traits): + for si in xrange(0, conf.max_n_iter): + filenameI = conf.get_result_filename(annotation_value_I, trait, False, si, add_suffix=True) + filenameII = conf.get_result_filename(annotation_value_II, trait, False, si, add_suffix=True) + + if os.path.exists(filenameI) and os.path.exists(filenameII): + dataI = np.load(filenameI) + detailed_predictions_I = dataI['detailed_predictions'] + chosen_window_indices_I = dataI['chosen_window_indices'] + + dataII = np.load(filenameII) + detailed_predictions_II = dataII['detailed_predictions'] + chosen_window_indices_II = dataII['chosen_window_indices'] + + for p, window_index_I, window_index_II, local_detailed_preds_I, local_detailed_preds_II in zip(xrange(0, conf.n_participants), chosen_window_indices_I, chosen_window_indices_II, detailed_predictions_I, detailed_predictions_II): + maskI = window_masks_df[(window_masks_df.annotation == annotation_value_I) & + (window_masks_df.participant == p) & + (window_masks_df['window size index'] == window_index_I) & + (window_masks_df.subtask == subtaskI) + ].as_matrix(columns=['mask'])[0][0] + maskII = window_masks_df[(window_masks_df.annotation == annotation_value_II) & + (window_masks_df.participant == p) & + (window_masks_df['window size index'] == window_index_II) & + (window_masks_df.subtask == subtaskII) + ].as_matrix(columns=['mask'])[0][0] + + predictions_I[p, trait, si] = get_majority_vote(np.array(local_detailed_preds_I)[maskI]) + predictions_II[p, trait, si] = get_majority_vote(np.array(local_detailed_preds_II)[maskII]) + else: + print 'did not find', filenameI, 'or', filenameII + sys.exit(1) + np.savez(result_filename, predictions_I=predictions_I, predictions_II=predictions_II) + else: + data = np.load(result_filename) + predictions_I = data['predictions_I'] + predictions_II = data['predictions_II'] + + # predictions_I are predictions from one context, predictions_II is the other context + # compute their average correlation and write it to file + for t in xrange(0, conf.n_traits): + corrI = get_average_correlation(predictions_I[:, t, :], predictions_II[:, t, :], 100) + f.write(','+'%.2f'%corrI) + f.write('\n') diff --git a/08_descriptive.py b/08_descriptive.py new file mode 100644 index 0000000..1bde7ed --- /dev/null +++ b/08_descriptive.py @@ -0,0 +1,99 @@ +import numpy as np +import matplotlib.pyplot as plt +from config import names as gs +from config import conf +import sys +import math +import os + + +def get_stats(): + annotation_times = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1)[:, 1:] + shop_duration = annotation_times[:, 1] - annotation_times[:, 0] + print + print 'Time spent in the shop:' + print 'MEAN', np.mean(shop_duration/60.), 'min' + print 'STD', np.std(shop_duration/60.), 'min' + + +def get_feature_correlations(): + # find the window size that was most frequently chosen + hist_sum = np.zeros((len(conf.all_window_sizes)), dtype=int) + for trait in xrange(0, conf.n_traits): + for si in xrange(0, 100): + filename = conf.get_result_filename(conf.annotation_all, trait, False, si, add_suffix=True) + if os.path.exists(filename): + data = np.load(filename) + chosen_window_indices = data['chosen_window_indices'] + hist, _ = np.histogram(chosen_window_indices, bins=np.arange(-0.5, len(conf.all_window_sizes), 1)) + hist_sum += hist + else: + print 'did not find', filename + + ws = conf.all_window_sizes[np.argmax(hist_sum)] + + # load features for the most frequently chosen time window + x_file, y_file, id_file = conf.get_merged_feature_files(ws) + x_ws = np.genfromtxt(x_file, delimiter=',', skip_header=1) + ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int)[:,0] + y = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',') + y_ws = np.genfromtxt(y_file, delimiter=',', skip_header=1).astype(int) + + # compute average feature per person + avg_x_ws = np.zeros((conf.n_participants, conf.max_n_feat)) + for p in xrange(0,conf.n_participants): + avg_x_ws[p,:] = np.mean(x_ws[ids_ws == p, :], axis=0) + + feature_correlations_avg = [] + for fi in xrange(0, conf.max_n_feat): + C_avg = np.corrcoef(y.transpose(), avg_x_ws[:, fi])[-1][:-1] + feature_correlations_avg.append(C_avg) + + feature_correlations_avg = np.array(feature_correlations_avg) + + # find the 5th to highest correlation for each trait and write them into a .tex table - see Table 4 in SI + n = 15 + highest_correlated_features = [] + highest_correlated_features_lists = [] + highest_correlated_features_names = [] + for t in xrange(0, conf.n_traits): + hcf = feature_correlations_avg[:,t].argsort()[-n:] + locallist = [] + for f in hcf: + if f not in highest_correlated_features: + highest_correlated_features.append(f) + highest_correlated_features_names.append(gs.full_long_label_list[f].lower()) + locallist.append(f) + + highest_correlated_features_lists.append(locallist) + + features = zip(highest_correlated_features_names, highest_correlated_features) + highest_correlated_features = [y for (x,y) in sorted(features)] + #highest_correlated_features.sort() + + filename = conf.figure_folder + '/table4.tex' + print len(highest_correlated_features) + with open(filename, 'w') as f: + f.write('feature&Neur.&Extr.&Open.&Agree.&Consc.&PCS&CEI') + f.write('\\\\\n\hline\n') + for fi in highest_correlated_features: + f.write(gs.full_long_label_list[fi]) + for t in xrange(0, conf.n_traits): + fc = feature_correlations_avg[fi,t] + if math.isnan(fc): + f.write('&-') + elif fi in highest_correlated_features_lists[t]: + f.write('&\\textbf{'+'%.2f}'%fc) + else: + f.write('&'+'%.2f'%fc) + f.write('\\\\\n') + print + print filename, 'written' + + +if __name__ == "__main__": + import os + if not os.path.exists(conf.figure_folder): + os.makedirs(conf.figure_folder) + get_stats() # prints statistics on the time participants spent inside the shop + get_feature_correlations() # Table 4 diff --git a/09_plot_ws_hist.py b/09_plot_ws_hist.py new file mode 100644 index 0000000..e69f3aa --- /dev/null +++ b/09_plot_ws_hist.py @@ -0,0 +1,37 @@ +import numpy as np +import matplotlib.pyplot as plt +import seaborn +from config import conf +import os + +hist_sum = np.zeros((len(conf.all_window_sizes)), dtype=int) +for trait in xrange(0, conf.n_traits): + for si in xrange(0, 100): + filename = conf.get_result_filename(conf.annotation_all, trait, False, si, add_suffix=True) + if os.path.exists(filename): + data = np.load(filename) + chosen_window_indices = data['chosen_window_indices'] + hist, _ = np.histogram(chosen_window_indices, bins=np.arange(-0.5, len(conf.all_window_sizes), 1)) + hist_sum += hist + else: + print 'did not find', filename + +hist_sum_sum = np.sum(hist_sum) + +plt.figure() +ax = plt.subplot(111) +bars = ax.bar(conf.all_window_sizes, hist_sum/float(hist_sum_sum)*100, width=8, tick_label=[str(x) for x in conf.all_window_sizes]) + +for rect in bars: + height = rect.get_height() + ax.text(rect.get_x() + rect.get_width()/2., 1.01*height, + '%d' % (height/100.*hist_sum_sum), + ha='center', va='bottom') + +# Hide the right and top spines +ax.spines['right'].set_visible(False) +ax.spines['top'].set_visible(False) +plt.xlabel('window size in s') +plt.ylabel('percentage') +plt.savefig('figures/ws_hist.pdf') +plt.close() diff --git a/README.md b/README.md index 8f8ba0f..80edb65 100644 --- a/README.md +++ b/README.md @@ -25,27 +25,48 @@ reproducing the paper results step by step: 1. __Extract features from raw gaze data__: `python 00_compute_features.py` to compute gaze features for all participants Once extracted, the features are stored in `features/ParticipantXX/window_features_YY.npy` where XX is the participant number and YY the length of the sliding window in seconds. + + 2. __Train random forest classifiers__ - `./01 train_classifiers.sh` to reproduce the evaluation setting described in the paper in which each classifier was trained 100 times. - `./02_train_specialized_classifiers.sh` to train specialized classifiers on parts of the data (specifically on data from inside the shop or on the way). + `./01 train_classifiers.sh` to reproduce the evaluation setting described in the paper in which each classifier was trained 100 times. + `./02_train_specialized_classifiers.sh` to train specialized classifiers on parts of the data (specifically on data from inside the shop or on the way). - If the scripts cannot be executed, you might not have the right access permissions to do so. On Linux, you can try `chmod +x 01_train_classifiers.sh`,`chmod +x 02_train_specialized_classifiers.sh` and `chmod +x 03_label_permutation_test.sh` (see below for when/how to use the last script). + If the scripts cannot be executed, you might not have the right access permissions to do so. On Linux, you can try `chmod +x 01_train_classifiers.sh`,`chmod +x 02_train_specialized_classifiers.sh` and `chmod +x 03_label_permutation_test.sh` (see below for when/how to use the last script). - In case you want to call the script differently, e.g. to speed-up the computation or try with different parameters, you can pass the following arguments to `classifiers.train_classifier`: - `-t` trait index between 0 and 6 - `-l` lowest number of repetitions, e.g. 0 - `-m` max number of repetitions, e.g. 100 - `-a` using partial data only: 0 (all data), 1 (way data), 2(shop data) + In case you want to call the script differently, e.g. to speed-up the computation or try with different parameters, you can pass the following arguments to `classifiers.train_classifier`: + `-t` trait index between 0 and 6 + `-l` lowest number of repetitions, e.g. 0 + `-m` max number of repetitions, e.g. 100 + `-a` using partial data only: 0 (all data), 1 (way data), 2(shop data) - In case of performance issues, it might be useful to check `_conf.py` and change `max_n_jobs` to restrict the number of jobs (i.e. threads) running in parallel. + In case of performance issues, it might be useful to check `_conf.py` and change `max_n_jobs` to restrict the number of jobs (i.e. threads) running in parallel. - The results will be saved in `results/A0` for all data, `results/A1` for way data only and `results/A2` for data inside a shop. Each file is named `TTT_XXX.npz`, where TTT is the abbreviation of the personality trait (`O`,`C`,`E`,`A`,`N` for the Big Five and `CEI` or `PCS` for the two curiosity measures). XXX enumerates the classifiers (remember that we always train 100 classifiers for evaluation because there is some randomness involved in the training process). + The results will be saved in `results/A0` for all data, `results/A1` for way data only and `results/A2` for data inside a shop. Each file is named `TTT_XXX.npz`, where TTT is the abbreviation of the personality trait (`O`,`C`,`E`,`A`,`N` for the Big Five and `CEI` or `PCS` for the two curiosity measures). XXX enumerates the classifiers (remember that we always train 100 classifiers for evaluation because there is some randomness involved in the training process). -3. __Evaluate Baselines__ - * To train a classifier that always predicts the most frequent personality score range from its current training set, please execute `python 03_train_baseline.py` - * To train classifiers on permuted labels, i.e. perform the so-called label permutation test, please execute `./04_label_permutation_test.sh` +3. __Train baselines__ + * To train a classifier that always predicts the most frequent personality score range from its current training set, please execute `python 03_train_baseline.py` + * To train classifiers on permuted labels, i.e. perform the so-called label permutation test, please execute `./04_label_permutation_test.sh` +4. __Performance analysis__ + * Run `python 05_plot_weights.py` to extract feature importance scores. These scores will be visualized in `figures/figure2.pdf` which corresponds to Figure 2 in the paper and `figures/table2.tex` which is shown in Table 2 in the supplementary information. + (additionally this step computes F1 scores which are required for the next step, so do not skip it) + * The results obtained from both baselines will be written to disk and read once you execute `python 06_baselines.py`. + A figure illustrating the actual classifiers' performance along with the random results will be written to `figures/figure1.pdf` as well as `figures/figure1.csv` and correspond to Figure 1 in the paper. + + +5. __Context comparison__ +`python 07_evaluation_across_contexts.py` to compute the average correlation coefficients between predictions based on data from different contexts. The table with all coefficients will be written to `figures/table1-5.csv` which can be found in Table 1 and Table 5 in supplementary information. +If (some) files in the results folder are missing, try re-running all one of the bash (\*.sh) scripts again. + +6. __Descriptive analysis__ +`python 08_descriptive.py` to compute the correlation between each participant's average feature for the most frequently chosen time window and their personality score range. Results are written to four files `figures/table4-1.tex`,`figures/table4-2.tex`,`figures/table4-3.tex`,`figures/table4-4.tex` and are shown together in Table 4 in the supplementary information. + +7. __Window Size Histogram__ +`python 09_plot_ws_hist.py` to plot a histogram of window sizes chosen during the nested cross validation routine to `figures/ws_hist.pdf`. + +All these scripts write intermediate results to disk, i.e. if you start a script a second time, it will be much faster - but the first run can take some time, e.g. up to 8 hours to train classifiers for one context on a 16 core machine; 1 hour to compute correlations between contexts. + ## Citation If you want to cite this project, please use the following Bibtex format: