feature extraction code

This commit is contained in:
Sabrina Hoppe 2018-05-05 21:25:39 +02:00
parent a20084fefa
commit cebde1be17
9 changed files with 1279 additions and 2 deletions

0
config/__init__.py Normal file
View file

97
config/conf.py Normal file
View file

@ -0,0 +1,97 @@
import numpy as np
# global parameters
n_participants = 42
n_traits = 7
max_n_feat = 207
max_n_iter = 100
all_window_sizes = [5, 15, 30, 45, 60, 75, 90, 105, 120, 135]
all_shop_window_sizes = [5, 15] # at least 3/4 of the people have a time window in these times
# cross validation paramters
n_inner_folds = 3
n_outer_folds = 5
# Random Forest Parameters
tree_max_features = 15
tree_max_depth = 5
n_estimators = 100
max_n_jobs = 5
# given a window size, determine step size correctly for even and odd numbers
def get_step_size(window_size):
step_size = window_size / 2.0
if step_size * 10 % 2 == 0:
step_size = int(step_size)
return step_size
# relative paths
data_folder = 'data'
info_folder = 'info'
feature_folder = 'features'
result_folder = 'results'
figure_folder = 'figures'
annotation_path = info_folder + '/annotation.csv'
binned_personality_file = info_folder + '/binned_personality.csv'
personality_sex_age_file = info_folder + '/personality_sex_age.csv'
# load the personality trait names from file and map them to abbreviations
traitlabels = np.loadtxt(binned_personality_file, delimiter=',', dtype=str)[0, 1:]
def get_abbr(s):
return ''.join(item[0] for item in s.split() if item[0].isupper())
medium_traitlabels = [get_abbr(s) if (" " in s) else s for s in traitlabels]
short_traitlabels = [''.join(item[0] for item in tl.split() if item[0].isupper()) for tl in traitlabels]
# dynamically create relative paths for result files to create
def get_result_folder(annotation_val):
return result_folder + '/A' + str(annotation_val)
def get_result_filename(annotation_val, trait, shuffle_labels, i, add_suffix=False):
filename = get_result_folder(annotation_val) + '/' + short_traitlabels[trait]
if shuffle_labels:
filename += '_rnd'
filename += '_' + str(i).zfill(3)
if add_suffix:
filename += '.npz'
return filename
def get_feature_folder(participant):
return feature_folder + '/Participant' + str(participant).zfill(2)
def get_merged_feature_files(window_size):
return feature_folder + '/merged_features_' + str(window_size) + '.csv', feature_folder + '/merged_traits_' + str(window_size) + '.csv', feature_folder + '/merged_ids_' + str(window_size) + '.csv'
def get_data_folder(participant):
return data_folder + '/Participant' + str(participant).zfill(2)
def get_window_times_file(participant, window_size):
return get_feature_folder(participant) + "/window_times_" + str(window_size) + '.npy'
def get_window_features_file(participant, window_size):
return get_feature_folder(participant) + "/window_features_" + str(window_size) + '.npy'
def get_overall_features_file(participant):
return get_feature_folder(participant) + "/overall_features.npy"
# parameters for fixation/saccade detection
fixation_radius_threshold = 0.025
fixation_duration_threshold = 0.1
saccade_min_velocity = 2
max_saccade_duration = 0.5
# annotation constants (as given as arguments to train_classifier, and as used for file names in result_folder)
annotation_all = 0
annotation_ways = 1
annotation_shop = 2
annotation_values = [annotation_all, annotation_ways, annotation_shop]
# annotations used in merged_ids_* files in the feature_folder
# column 1
time_window_annotation_wayI = 1
time_window_annotation_shop = 2
time_window_annotation_wayII = 3
# column 2
time_window_annotation_halfI = 1
time_window_annotation_halfII = 2

160
config/names.py Normal file
View file

@ -0,0 +1,160 @@
fixations_list_labels = ['mean x', 'mean y',
'var x', 'var y',
't start', 't end',
'start index', 'end index',
'mean diameter', 'var diameter',
'mean successive angles', 'var successive angles'
]
fix_mean_x_i = 0
fix_mean_y_i = 1
fix_var_x_i = 2
fix_var_y_i = 3
fix_start_t_i = 4
fix_end_t_i = 5
fix_start_index_i = 6
fix_end_index_i = 7
fix_mean_diam_i = 8
fix_var_diam_i = 9
fix_mean_succ_angles = 10
fix_var_succ_angles = 11
saccades_list_labels = ['start x', 'start y',
'end x', 'end y',
'angle',
't start', 't end',
'start index', 'end index',
'mean diameter', 'var diameter',
'peak velocity', 'amplitude',
]
sacc_start_x_i = 0
sacc_start_y_i = 1
sacc_end_x_i = 2
sacc_end_y_i = 3
sacc_angle_i = 4
sacc_t_start_i = 5
sacc_t_end_i = 6
sacc_start_index_i = 7
sacc_end_index_i = 8
sacc_mean_diam_i = 9
sacc_var_diam_i = 10
sacc_peak_vel_i = 11
sacc_amplitude_i = 12
blink_list_labels = ['t start', 't end', 'start index', 'end index']
blink_start_t_i = 0
blink_end_ti_i = 1
blink_start_index_i = 2
blink_end_index_i = 3
event_feature_labels = ['fixation rate', 'saccade rate', # 0 1
'small sacc. rate', 'large sacc. rate', 'positive sacc. rate', 'negative sacc. rate', # 2 3 4 5
'ratio sacc - fix', # 6
'ratio small sacc', 'ratio large sacc', 'ratio right sacc', 'ratio left sacc', # 7 8 9 10
'mean sacc amplitude', 'var sacc amplitude', 'min sacc amplitude', 'max sacc amplitude', #11 12 13 14
'mean peak velocity', 'var peak velocity', 'min peak velocity', 'max peak velocity', # 15 16 17 18
'mean mean diameter sacc', 'var mean diameter sacc', 'mean var diameter sacc', # 19 20 21 22
'var var diameter sacc',
'mean fix duration', 'var fix duration', 'min fix duration', 'max fix duration', # 23 24 25 26
'dwelling time',
'mean mean subsequent angle', 'var mean subsequent angle', 'mean var subsequent angle', 'var var subsequent angle',
'mean var x', 'mean var y', 'var var x', 'var var y', # 27 28 29 30
'mean mean diameter fix', 'var mean diameter fix', 'mean var diameter fix', 'var var diameter fix', # 31 32 33 34
'mean blink duration', 'var blink duration', 'min blink duration', 'max blink duration', # 35 36 37 38
'blink rate' # 39
]
event_feature_labels_long = ['fixation rate', 'saccade rate', # 0 1
'small saccade rate', 'large saccade rate', 'positive saccade rate', 'negative saccade rate', # 2 3 4 5
'saccade:fixation ratio', # 6
'ratio of small saccades', 'ratio of large saccades', 'ratio of right saccades', 'ratio of left saccades', # 7 8 9 10
'mean saccade amplitude', 'var saccade amplitude', 'min saccade amplitude', 'max saccade amplitude', #11 12 13 14
'mean saccadic peak velocity', 'var saccadic peak velocity', 'min saccadic peak velocity', 'max saccadic peak velocity', # 15 16 17 18
'mean of the mean pupil diameter during saccades', 'var of the mean pupil diameter during saccades',
'mean of the var pupil diameter during saccades', 'var of the var pupil diameter during saccades', # 19 20 21 22
'mean fixation duration', 'var fixation duration', 'min fixation duration', 'max fixation duration', # 23 24 25 26
'dwelling time',
'mean of the mean of subsequent angles', 'var of the mean of subsequent angles',
'mean of the var of subsequent angles', 'var of the var of subsequent angles',
'mean of the var of x', 'mean of the var of y', 'var of the var of x', 'var of the var of y', # 27 28 29 30
'mean of the mean pupil diameter during fixations', 'var of the mean pupil diameter during fixations',
'mean of the var pupil diameter during fixations', 'var of the var pupil diameter during fixations', # 31 32 33 34
'mean blink duration', 'var blink duration', 'min blink duration', 'max blink duration', # 35 36 37 38
'blink rate' # 39
]
def get_wordbook_feature_labels(movement_abbreviation):
return [movement_abbreviation + s + ' WB' + str(n) for n in [1, 2, 3, 4] for s in ['>0', 'max', 'min', 'arg max', 'arg min', 'range', 'mean', 'var']]
def get_wordbook_feature_labels_long(movement_abbreviation):
return [s1 + str(n) + '-gram ' + movement_abbreviation + s2 for n in [1, 2, 3, 4]
for (s1, s2) in [('number of different ', ' movements'),
('max frequency ', ' movements'),
('min frequency ', ' movements'),
('most frequent ', ' movement'),
('least frequent ', ' movement'),
('range of frequencies of ', ' movements'),
('mean frequency of ', ' movements'),
('var frequency of ', ' movements')
]]
position_feature_labels = ['mean x', 'mean y', 'mean diameter',
'min x', 'min y', 'min diameter',
'max x', 'max y', 'max diameter',
'min-max x', 'min-max y', 'min-max diameter',
'std x', 'std y', 'std diameter',
'median x', 'median y', 'median diameter',
'1st quart x', '1st quart y', '1st quart diameter',
'3rd quart x', '3rd quart y', '3rd quart diameter',
'IQR x', 'IQR y', 'IQR diameter',
'mean abs diff x', 'mean abs diff y', 'mean abs diff diameter',
'mean diff x', 'mean diff y', 'mean diff diameter',
'mean subsequent angle'
]
position_feature_labels_long = ['mean x', 'mean y', 'mean pupil diameter',
'minimum x', 'minimum y', 'minimum pupil diameter',
'maximum x', 'maximum y', 'maximum pupil diameter',
'range x', 'range y', 'range pupil diameter',
'std x', 'std y', 'std pupil diameter',
'median x', 'median y', 'median pupil diameter',
'1st quartile x', '1st quartile y', '1st quartile pupil diameter',
'3rd quartile x', '3rd quartile y', '3rd quartile pupil diameter',
'inter quartile range x', 'inter quartile range y', 'inter quartile range pupil diameter',
'mean difference of subsequent x', 'mean difference of subsequent y', 'mean difference of subsequent pupil diameters',
'mean diff x', 'mean diff y', 'mean diff pupil diameter',
'mean subsequent angle'
]
heatmap_feature_labels = ['heatmap_'+str(i).zfill(2) for i in xrange(0, 64)]
heatmap_feature_labels_long = ['heatmap cell '+str(i).zfill(2) for i in xrange(0, 64)]
full_label_list = event_feature_labels + heatmap_feature_labels + position_feature_labels + \
get_wordbook_feature_labels('sacc.') + get_wordbook_feature_labels('SF')
full_long_label_list = event_feature_labels_long + heatmap_feature_labels_long + position_feature_labels_long + \
get_wordbook_feature_labels_long('sacc.') + get_wordbook_feature_labels_long('SF')
sacc_dictionary = ['A', 'B', 'C', 'R', 'E', 'F', 'G', 'D', 'H', 'J', 'K', 'L', 'M', 'N', 'O', 'U', 'u', 'b', 'r', 'f',
'd', 'j', 'l', 'n']
sacc_bins_two = [a+b for a in sacc_dictionary for b in sacc_dictionary]
sacc_bins_three = [a+b+c for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary]
sacc_bins_four = [a+b+c+d for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary for d in sacc_dictionary]
sacc_bins = [sacc_dictionary, sacc_bins_two, sacc_bins_three, sacc_bins_four]
saccFix_dictionary = ['S_lu', 'S_ld', 'S_lr', 'S_ll', 'S_su', 'S_sd', 'S_sr', 'S_sl', 'F_l', 'F_s']
saccFix_bins_two = [a+b for a in saccFix_dictionary for b in saccFix_dictionary]
saccFix_bins_three = [a+b+c for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary]
saccFix_bins_four = [a+b+c+d for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary for d in saccFix_dictionary]
saccFix_bins = [saccFix_dictionary, saccFix_bins_two, saccFix_bins_three, saccFix_bins_four]
def write_pami_feature_labels_to_file(targetfile):
f = open(targetfile, 'w') # creates if it does not exist
f.write(',short,long\n')
i = 0
for item1, item2 in zip(full_label_list, full_long_label_list):
f.write(str(i) + ',' + item1 + ',' + item2 + '\n')
i += 1
f.close()