eye_movements_personality/08_descriptive.py

100 lines
3.5 KiB
Python

import numpy as np
import matplotlib.pyplot as plt
from config import names as gs
from config import conf
import sys
import math
import os
def get_stats():
annotation_times = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1)[:, 1:]
shop_duration = annotation_times[:, 1] - annotation_times[:, 0]
print
print 'Time spent in the shop:'
print 'MEAN', np.mean(shop_duration/60.), 'min'
print 'STD', np.std(shop_duration/60.), 'min'
def get_feature_correlations():
# find the window size that was most frequently chosen
hist_sum = np.zeros((len(conf.all_window_sizes)), dtype=int)
for trait in xrange(0, conf.n_traits):
for si in xrange(0, 100):
filename = conf.get_result_filename(conf.annotation_all, trait, False, si, add_suffix=True)
if os.path.exists(filename):
data = np.load(filename)
chosen_window_indices = data['chosen_window_indices']
hist, _ = np.histogram(chosen_window_indices, bins=np.arange(-0.5, len(conf.all_window_sizes), 1))
hist_sum += hist
else:
print 'did not find', filename
ws = conf.all_window_sizes[np.argmax(hist_sum)]
# load features for the most frequently chosen time window
x_file, y_file, id_file = conf.get_merged_feature_files(ws)
x_ws = np.genfromtxt(x_file, delimiter=',', skip_header=1)
ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int)[:,0]
y = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',')
y_ws = np.genfromtxt(y_file, delimiter=',', skip_header=1).astype(int)
# compute average feature per person
avg_x_ws = np.zeros((conf.n_participants, conf.max_n_feat))
for p in xrange(0,conf.n_participants):
avg_x_ws[p,:] = np.mean(x_ws[ids_ws == p, :], axis=0)
feature_correlations_avg = []
for fi in xrange(0, conf.max_n_feat):
C_avg = np.corrcoef(y.transpose(), avg_x_ws[:, fi])[-1][:-1]
feature_correlations_avg.append(C_avg)
feature_correlations_avg = np.array(feature_correlations_avg)
# find the 5th to highest correlation for each trait and write them into a .tex table - see Table 4 in SI
n = 15
highest_correlated_features = []
highest_correlated_features_lists = []
highest_correlated_features_names = []
for t in xrange(0, conf.n_traits):
hcf = feature_correlations_avg[:,t].argsort()[-n:]
locallist = []
for f in hcf:
if f not in highest_correlated_features:
highest_correlated_features.append(f)
highest_correlated_features_names.append(gs.full_long_label_list[f].lower())
locallist.append(f)
highest_correlated_features_lists.append(locallist)
features = zip(highest_correlated_features_names, highest_correlated_features)
highest_correlated_features = [y for (x,y) in sorted(features)]
#highest_correlated_features.sort()
filename = conf.figure_folder + '/table4.tex'
print len(highest_correlated_features)
with open(filename, 'w') as f:
f.write('feature&Neur.&Extr.&Open.&Agree.&Consc.&PCS&CEI')
f.write('\\\\\n\hline\n')
for fi in highest_correlated_features:
f.write(gs.full_long_label_list[fi])
for t in xrange(0, conf.n_traits):
fc = feature_correlations_avg[fi,t]
if math.isnan(fc):
f.write('&-')
elif fi in highest_correlated_features_lists[t]:
f.write('&\\textbf{'+'%.2f}'%fc)
else:
f.write('&'+'%.2f'%fc)
f.write('\\\\\n')
print
print filename, 'written'
if __name__ == "__main__":
import os
if not os.path.exists(conf.figure_folder):
os.makedirs(conf.figure_folder)
get_stats() # prints statistics on the time participants spent inside the shop
get_feature_correlations() # Table 4