import numpy as np import matplotlib.pyplot as plt from config import names as gs from config import conf import sys import math import os def get_stats(): annotation_times = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1)[:, 1:] shop_duration = annotation_times[:, 1] - annotation_times[:, 0] print print 'Time spent in the shop:' print 'MEAN', np.mean(shop_duration/60.), 'min' print 'STD', np.std(shop_duration/60.), 'min' def get_feature_correlations(): # find the window size that was most frequently chosen hist_sum = np.zeros((len(conf.all_window_sizes)), dtype=int) for trait in xrange(0, conf.n_traits): for si in xrange(0, 100): filename = conf.get_result_filename(conf.annotation_all, trait, False, si, add_suffix=True) if os.path.exists(filename): data = np.load(filename) chosen_window_indices = data['chosen_window_indices'] hist, _ = np.histogram(chosen_window_indices, bins=np.arange(-0.5, len(conf.all_window_sizes), 1)) hist_sum += hist else: print 'did not find', filename ws = conf.all_window_sizes[np.argmax(hist_sum)] # load features for the most frequently chosen time window x_file, y_file, id_file = conf.get_merged_feature_files(ws) x_ws = np.genfromtxt(x_file, delimiter=',', skip_header=1) ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int)[:,0] y = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',') y_ws = np.genfromtxt(y_file, delimiter=',', skip_header=1).astype(int) # compute average feature per person avg_x_ws = np.zeros((conf.n_participants, conf.max_n_feat)) for p in xrange(0,conf.n_participants): avg_x_ws[p,:] = np.mean(x_ws[ids_ws == p, :], axis=0) feature_correlations_avg = [] for fi in xrange(0, conf.max_n_feat): C_avg = np.corrcoef(y.transpose(), avg_x_ws[:, fi])[-1][:-1] feature_correlations_avg.append(C_avg) feature_correlations_avg = np.array(feature_correlations_avg) # find the 5th to highest correlation for each trait and write them into a .tex table - see Table 4 in SI n = 15 highest_correlated_features = [] highest_correlated_features_lists = [] highest_correlated_features_names = [] for t in xrange(0, conf.n_traits): hcf = feature_correlations_avg[:,t].argsort()[-n:] locallist = [] for f in hcf: if f not in highest_correlated_features: highest_correlated_features.append(f) highest_correlated_features_names.append(gs.full_long_label_list[f].lower()) locallist.append(f) highest_correlated_features_lists.append(locallist) features = zip(highest_correlated_features_names, highest_correlated_features) highest_correlated_features = [y for (x,y) in sorted(features)] #highest_correlated_features.sort() filename = conf.figure_folder + '/table4.tex' print len(highest_correlated_features) with open(filename, 'w') as f: f.write('feature&Neur.&Extr.&Open.&Agree.&Consc.&PCS&CEI') f.write('\\\\\n\hline\n') for fi in highest_correlated_features: f.write(gs.full_long_label_list[fi]) for t in xrange(0, conf.n_traits): fc = feature_correlations_avg[fi,t] if math.isnan(fc): f.write('&-') elif fi in highest_correlated_features_lists[t]: f.write('&\\textbf{'+'%.2f}'%fc) else: f.write('&'+'%.2f'%fc) f.write('\\\\\n') print print filename, 'written' if __name__ == "__main__": import os if not os.path.exists(conf.figure_folder): os.makedirs(conf.figure_folder) get_stats() # prints statistics on the time participants spent inside the shop get_feature_correlations() # Table 4