eye_movements_personality/08_descriptive.py

import numpy as np
import matplotlib.pyplot as plt
from config import names as gs
from config import conf
import sys
import math
import os


def get_stats():
	annotation_times = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1)[:, 1:]
	shop_duration = annotation_times[:, 1] - annotation_times[:, 0]
	print
	print 'Time spent in the shop:'
	print 'MEAN', np.mean(shop_duration/60.), 'min'
	print 'STD', np.std(shop_duration/60.), 'min'


def get_feature_correlations():
	# find the window size that was most frequently chosen
	hist_sum = np.zeros((len(conf.all_window_sizes)), dtype=int)
	for trait in xrange(0, conf.n_traits):
		for si in xrange(0, 100):
			filename = conf.get_result_filename(conf.annotation_all, trait, False, si, add_suffix=True)
			if os.path.exists(filename):
				data = np.load(filename)
				chosen_window_indices = data['chosen_window_indices']
				hist, _ = np.histogram(chosen_window_indices, bins=np.arange(-0.5, len(conf.all_window_sizes), 1))
				hist_sum += hist
			else:
				print 'did not find', filename

	ws = conf.all_window_sizes[np.argmax(hist_sum)]

	# load features for the most frequently chosen time window
	x_file, y_file, id_file = conf.get_merged_feature_files(ws)
	x_ws = np.genfromtxt(x_file, delimiter=',', skip_header=1)
	ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int)[:,0]
	y = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',')
	y_ws = np.genfromtxt(y_file, delimiter=',', skip_header=1).astype(int)

	# compute average feature per person
	avg_x_ws = np.zeros((conf.n_participants, conf.max_n_feat))
	for p in xrange(0,conf.n_participants):
		avg_x_ws[p,:] = np.mean(x_ws[ids_ws == p, :], axis=0)

	feature_correlations_avg = []
	for fi in xrange(0, conf.max_n_feat):
		C_avg = np.corrcoef(y.transpose(), avg_x_ws[:, fi])[-1][:-1]
		feature_correlations_avg.append(C_avg)

	feature_correlations_avg = np.array(feature_correlations_avg)

	# find the 5th to highest correlation for each trait and write them into a .tex table - see Table 4 in SI
	n = 15
	highest_correlated_features = []
	highest_correlated_features_lists = []
	highest_correlated_features_names = []
	for t in xrange(0, conf.n_traits):
		hcf = feature_correlations_avg[:,t].argsort()[-n:]
		locallist = []
		for f in hcf:
			if f not in highest_correlated_features:
				highest_correlated_features.append(f)
				highest_correlated_features_names.append(gs.full_long_label_list[f].lower())
			locallist.append(f)

		highest_correlated_features_lists.append(locallist)

	features = zip(highest_correlated_features_names, highest_correlated_features)
	highest_correlated_features = [y for (x,y) in sorted(features)]
	#highest_correlated_features.sort()

	filename = conf.figure_folder + '/table4.tex'
	print len(highest_correlated_features)
	with open(filename, 'w') as f:
		f.write('feature&Neur.&Extr.&Open.&Agree.&Consc.&PCS&CEI')
		f.write('\\\\\n\hline\n')
		for fi in highest_correlated_features:
			f.write(gs.full_long_label_list[fi])
			for t in xrange(0, conf.n_traits):
				fc = feature_correlations_avg[fi,t]
				if math.isnan(fc):
					f.write('&-')
				elif fi in highest_correlated_features_lists[t]:
					f.write('&\\textbf{'+'%.2f}'%fc)
				else:
					f.write('&'+'%.2f'%fc)
			f.write('\\\\\n')
	print
	print filename, 'written'


if __name__ == "__main__":
	import os
	if not os.path.exists(conf.figure_folder):
		os.makedirs(conf.figure_folder)
	get_stats()  # prints statistics on the time participants spent inside the shop
	get_feature_correlations()  # Table 4
evaluation code 2018-05-05 22:22:21 +02:00			`import numpy as np`
			`import matplotlib.pyplot as plt`
			`from config import names as gs`
			`from config import conf`
			`import sys`
			`import math`
			`import os`


			`def get_stats():`
			`annotation_times = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1)[:, 1:]`
			`shop_duration = annotation_times[:, 1] - annotation_times[:, 0]`
			`print`
			`print 'Time spent in the shop:'`
			`print 'MEAN', np.mean(shop_duration/60.), 'min'`
			`print 'STD', np.std(shop_duration/60.), 'min'`


			`def get_feature_correlations():`
			`# find the window size that was most frequently chosen`
			`hist_sum = np.zeros((len(conf.all_window_sizes)), dtype=int)`
			`for trait in xrange(0, conf.n_traits):`
			`for si in xrange(0, 100):`
			`filename = conf.get_result_filename(conf.annotation_all, trait, False, si, add_suffix=True)`
			`if os.path.exists(filename):`
			`data = np.load(filename)`
			`chosen_window_indices = data['chosen_window_indices']`
			`hist, _ = np.histogram(chosen_window_indices, bins=np.arange(-0.5, len(conf.all_window_sizes), 1))`
			`hist_sum += hist`
			`else:`
			`print 'did not find', filename`

			`ws = conf.all_window_sizes[np.argmax(hist_sum)]`

			`# load features for the most frequently chosen time window`
			`x_file, y_file, id_file = conf.get_merged_feature_files(ws)`
			`x_ws = np.genfromtxt(x_file, delimiter=',', skip_header=1)`
			`ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int)[:,0]`
			`y = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',')`
			`y_ws = np.genfromtxt(y_file, delimiter=',', skip_header=1).astype(int)`

			`# compute average feature per person`
			`avg_x_ws = np.zeros((conf.n_participants, conf.max_n_feat))`
			`for p in xrange(0,conf.n_participants):`
			`avg_x_ws[p,:] = np.mean(x_ws[ids_ws == p, :], axis=0)`

			`feature_correlations_avg = []`
			`for fi in xrange(0, conf.max_n_feat):`
			`C_avg = np.corrcoef(y.transpose(), avg_x_ws[:, fi])[-1][:-1]`
			`feature_correlations_avg.append(C_avg)`

			`feature_correlations_avg = np.array(feature_correlations_avg)`

			`# find the 5th to highest correlation for each trait and write them into a .tex table - see Table 4 in SI`
			`n = 15`
			`highest_correlated_features = []`
			`highest_correlated_features_lists = []`
			`highest_correlated_features_names = []`
			`for t in xrange(0, conf.n_traits):`
			`hcf = feature_correlations_avg[:,t].argsort()[-n:]`
			`locallist = []`
			`for f in hcf:`
			`if f not in highest_correlated_features:`
			`highest_correlated_features.append(f)`
			`highest_correlated_features_names.append(gs.full_long_label_list[f].lower())`
			`locallist.append(f)`

			`highest_correlated_features_lists.append(locallist)`

			`features = zip(highest_correlated_features_names, highest_correlated_features)`
			`highest_correlated_features = [y for (x,y) in sorted(features)]`
			`#highest_correlated_features.sort()`

			`filename = conf.figure_folder + '/table4.tex'`
			`print len(highest_correlated_features)`
			`with open(filename, 'w') as f:`
			`f.write('feature&Neur.&Extr.&Open.&Agree.&Consc.&PCS&CEI')`
			`f.write('\\\\\n\hline\n')`
			`for fi in highest_correlated_features:`
			`f.write(gs.full_long_label_list[fi])`
			`for t in xrange(0, conf.n_traits):`
			`fc = feature_correlations_avg[fi,t]`
			`if math.isnan(fc):`
			`f.write('&-')`
			`elif fi in highest_correlated_features_lists[t]:`
			`f.write('&\\textbf{'+'%.2f}'%fc)`
			`else:`
			`f.write('&'+'%.2f'%fc)`
			`f.write('\\\\\n')`
			`print`
			`print filename, 'written'`


			`if __name__ == "__main__":`
			`import os`
			`if not os.path.exists(conf.figure_folder):`
			`os.makedirs(conf.figure_folder)`
			`get_stats() # prints statistics on the time participants spent inside the shop`
			`get_feature_correlations() # Table 4`