feature extraction code

This commit is contained in:
Sabrina Hoppe 2018-05-05 21:25:39 +02:00
parent a20084fefa
commit cebde1be17
9 changed files with 1279 additions and 2 deletions

103
00_compute_features.py Normal file
View file

@ -0,0 +1,103 @@
import numpy as np
import os
from config import conf as conf
from featureExtraction import gaze_analysis as ga
import threading
import getopt
import sys
from config import names as gs
def compute_sliding_window_features(participant, ws, gazeAnalysis_instance):
"""
calls the gazeAnalysis instance it was given, calls it to get features and saves those to file
"""
window_features, window_times = gazeAnalysis_instance.get_window_features(ws, conf.get_step_size(ws))
np.save(conf.get_window_features_file(participant, ws), window_features)
np.save(conf.get_window_times_file(participant, ws), window_times)
if __name__ == "__main__":
for p in xrange(0,conf.n_participants):
threads = [] # one thread per time window will be used and collected in this list
# create data folder, plus one subfolder for participant p
if not os.path.exists(conf.get_feature_folder(p)):
os.makedirs(conf.get_feature_folder(p))
# make sure all relevant raw data files exist in the right folder
gaze_file = conf.get_data_folder(p) + '/gaze_positions.csv'
pupil_diameter_file = conf.get_data_folder(p) + '/pupil_diameter.csv'
events_file = conf.get_data_folder(p) + '/events.csv'
assert os.path.exists(gaze_file) and os.path.exists(pupil_diameter_file) and os.path.exists(events_file)
# load relevant data
gaze = np.genfromtxt(gaze_file, delimiter=',', skip_header=1)
pupil_diameter = np.genfromtxt(pupil_diameter_file, delimiter=',', skip_header=1)
events = np.genfromtxt(events_file, delimiter=',', skip_header=1, dtype=str)
# create instance of gazeAnalysis class that will be used for feature extraction
# this already does some initial computation that will be useful for all window sizes:
extractor = ga.gazeAnalysis(gaze, conf.fixation_radius_threshold, conf.fixation_duration_threshold,
conf.saccade_min_velocity, conf.max_saccade_duration,
pupil_diameter=pupil_diameter, event_strings=events)
# compute sliding window features by creating one thread per window size
for window_size in conf.all_window_sizes:
if not os.path.exists(conf.get_window_features_file(p, window_size)):
thread = threading.Thread(target=compute_sliding_window_features, args=(p, window_size, extractor))
thread.start()
threads.append(thread)
for t in threads:
t.join()
print 'finished all features for participant', p
# Merge the features from all participants into three files per window_size:
# merged_features includes all features
# merged_traits contains the ground truth personality score ranges
# merged_ids contains the participant number and context (way, shop, half of the recording)
# load ground truth from info folder:
binned_personality = np.genfromtxt(conf.binned_personality_file, delimiter=',', skip_header=1)
trait_labels = np.loadtxt(conf.binned_personality_file, delimiter=',', dtype=str)[0,:]
annotation = np.genfromtxt(conf.annotation_path, delimiter=',', skip_header=1)
for window_size in conf.all_window_sizes:
print 'merging window size', window_size
windowfeats_subtask_all = []
windowfeats_subtask_ids = []
windowfeats_subtask_all_y = []
for p in xrange(0, conf.n_participants):
featfilename = conf.get_window_features_file(p, window_size)
timesfilename = conf.get_window_times_file(p, window_size)
if os.path.exists(featfilename) and os.path.exists(timesfilename):
data = np.load(featfilename).tolist()
windowfeats_subtask_all.extend(data)
windowfeats_subtask_all_y.extend([binned_personality[p, 1:]] * len(data))
times = np.load(timesfilename)[:, 2:]
ann = annotation[p,1:]
ids_annotation = np.zeros((len(data), 3), dtype=int) # person, way/shop, half
ids_annotation[:,0] = p
ids_annotation[(times[:,1] < ann[0]),1] = conf.time_window_annotation_wayI
ids_annotation[(times[:,0] > ann[0]) & (times[:,1] < ann[1]),1] = conf.time_window_annotation_shop
ids_annotation[(times[:,0] > ann[1]),1] = conf.time_window_annotation_wayII
ids_annotation[:(len(data)/2), 2] = conf.time_window_annotation_halfI
ids_annotation[(len(data)/2):, 2] = conf.time_window_annotation_halfII
windowfeats_subtask_ids.extend(ids_annotation.tolist())
else:
print 'did not find ', featfilename
sys.exit(1)
ids = np.array(windowfeats_subtask_ids)
x = np.array(windowfeats_subtask_all, dtype=float)
y = np.array(windowfeats_subtask_all_y)
f1, f2, f3 = conf.get_merged_feature_files(window_size)
np.savetxt(f1, x, delimiter=',', header=','.join(gs.full_long_label_list), comments='')
np.savetxt(f2, y, delimiter=',', header=','.join(trait_labels), comments='')
np.savetxt(f3, ids, delimiter=',', header='Participant ID', comments='')

View file

@ -1,8 +1,7 @@
# Eye movements during everyday behavior predict personality traits
*Sabrina Hoppe, Tobias Loetscher, Stephanie Morey and Andreas Bulling*
This repository provides all data used for the publication [in Frontiers in Human Neuroscience](https://dx.doi.org/10.3389/fnhum.2018.00105).
Code is coming soon!
This repository provides all data and code used for the publication [in Frontiers in Human Neuroscience](https://dx.doi.org/10.3389/fnhum.2018.00105).
## Dataset
* Gaze data recorded at 60Hz from 42 participants is stored in `data/ParticipantXX`.
@ -20,6 +19,14 @@ Code is coming soon!
* Timestamps indicating the times when participants entered and left the shop are given in `info/annotation.csv` in seconds.
## Code
reproducing the paper results step by step:
1. __Extract features from raw gaze data__:
`python compute_features.py` to compute gaze features for all participants
Once extracted, the features are stored in `features/ParticipantXX/window_features_YY.npy` where XX is the participant number and YY the length of the sliding window in seconds.
## Citation
If you want to cite this project, please use the following Bibtex format:

1
__init__.py Normal file
View file

@ -0,0 +1 @@

0
config/__init__.py Normal file
View file

97
config/conf.py Normal file
View file

@ -0,0 +1,97 @@
import numpy as np
# global parameters
n_participants = 42
n_traits = 7
max_n_feat = 207
max_n_iter = 100
all_window_sizes = [5, 15, 30, 45, 60, 75, 90, 105, 120, 135]
all_shop_window_sizes = [5, 15] # at least 3/4 of the people have a time window in these times
# cross validation paramters
n_inner_folds = 3
n_outer_folds = 5
# Random Forest Parameters
tree_max_features = 15
tree_max_depth = 5
n_estimators = 100
max_n_jobs = 5
# given a window size, determine step size correctly for even and odd numbers
def get_step_size(window_size):
step_size = window_size / 2.0
if step_size * 10 % 2 == 0:
step_size = int(step_size)
return step_size
# relative paths
data_folder = 'data'
info_folder = 'info'
feature_folder = 'features'
result_folder = 'results'
figure_folder = 'figures'
annotation_path = info_folder + '/annotation.csv'
binned_personality_file = info_folder + '/binned_personality.csv'
personality_sex_age_file = info_folder + '/personality_sex_age.csv'
# load the personality trait names from file and map them to abbreviations
traitlabels = np.loadtxt(binned_personality_file, delimiter=',', dtype=str)[0, 1:]
def get_abbr(s):
return ''.join(item[0] for item in s.split() if item[0].isupper())
medium_traitlabels = [get_abbr(s) if (" " in s) else s for s in traitlabels]
short_traitlabels = [''.join(item[0] for item in tl.split() if item[0].isupper()) for tl in traitlabels]
# dynamically create relative paths for result files to create
def get_result_folder(annotation_val):
return result_folder + '/A' + str(annotation_val)
def get_result_filename(annotation_val, trait, shuffle_labels, i, add_suffix=False):
filename = get_result_folder(annotation_val) + '/' + short_traitlabels[trait]
if shuffle_labels:
filename += '_rnd'
filename += '_' + str(i).zfill(3)
if add_suffix:
filename += '.npz'
return filename
def get_feature_folder(participant):
return feature_folder + '/Participant' + str(participant).zfill(2)
def get_merged_feature_files(window_size):
return feature_folder + '/merged_features_' + str(window_size) + '.csv', feature_folder + '/merged_traits_' + str(window_size) + '.csv', feature_folder + '/merged_ids_' + str(window_size) + '.csv'
def get_data_folder(participant):
return data_folder + '/Participant' + str(participant).zfill(2)
def get_window_times_file(participant, window_size):
return get_feature_folder(participant) + "/window_times_" + str(window_size) + '.npy'
def get_window_features_file(participant, window_size):
return get_feature_folder(participant) + "/window_features_" + str(window_size) + '.npy'
def get_overall_features_file(participant):
return get_feature_folder(participant) + "/overall_features.npy"
# parameters for fixation/saccade detection
fixation_radius_threshold = 0.025
fixation_duration_threshold = 0.1
saccade_min_velocity = 2
max_saccade_duration = 0.5
# annotation constants (as given as arguments to train_classifier, and as used for file names in result_folder)
annotation_all = 0
annotation_ways = 1
annotation_shop = 2
annotation_values = [annotation_all, annotation_ways, annotation_shop]
# annotations used in merged_ids_* files in the feature_folder
# column 1
time_window_annotation_wayI = 1
time_window_annotation_shop = 2
time_window_annotation_wayII = 3
# column 2
time_window_annotation_halfI = 1
time_window_annotation_halfII = 2

160
config/names.py Normal file
View file

@ -0,0 +1,160 @@
fixations_list_labels = ['mean x', 'mean y',
'var x', 'var y',
't start', 't end',
'start index', 'end index',
'mean diameter', 'var diameter',
'mean successive angles', 'var successive angles'
]
fix_mean_x_i = 0
fix_mean_y_i = 1
fix_var_x_i = 2
fix_var_y_i = 3
fix_start_t_i = 4
fix_end_t_i = 5
fix_start_index_i = 6
fix_end_index_i = 7
fix_mean_diam_i = 8
fix_var_diam_i = 9
fix_mean_succ_angles = 10
fix_var_succ_angles = 11
saccades_list_labels = ['start x', 'start y',
'end x', 'end y',
'angle',
't start', 't end',
'start index', 'end index',
'mean diameter', 'var diameter',
'peak velocity', 'amplitude',
]
sacc_start_x_i = 0
sacc_start_y_i = 1
sacc_end_x_i = 2
sacc_end_y_i = 3
sacc_angle_i = 4
sacc_t_start_i = 5
sacc_t_end_i = 6
sacc_start_index_i = 7
sacc_end_index_i = 8
sacc_mean_diam_i = 9
sacc_var_diam_i = 10
sacc_peak_vel_i = 11
sacc_amplitude_i = 12
blink_list_labels = ['t start', 't end', 'start index', 'end index']
blink_start_t_i = 0
blink_end_ti_i = 1
blink_start_index_i = 2
blink_end_index_i = 3
event_feature_labels = ['fixation rate', 'saccade rate', # 0 1
'small sacc. rate', 'large sacc. rate', 'positive sacc. rate', 'negative sacc. rate', # 2 3 4 5
'ratio sacc - fix', # 6
'ratio small sacc', 'ratio large sacc', 'ratio right sacc', 'ratio left sacc', # 7 8 9 10
'mean sacc amplitude', 'var sacc amplitude', 'min sacc amplitude', 'max sacc amplitude', #11 12 13 14
'mean peak velocity', 'var peak velocity', 'min peak velocity', 'max peak velocity', # 15 16 17 18
'mean mean diameter sacc', 'var mean diameter sacc', 'mean var diameter sacc', # 19 20 21 22
'var var diameter sacc',
'mean fix duration', 'var fix duration', 'min fix duration', 'max fix duration', # 23 24 25 26
'dwelling time',
'mean mean subsequent angle', 'var mean subsequent angle', 'mean var subsequent angle', 'var var subsequent angle',
'mean var x', 'mean var y', 'var var x', 'var var y', # 27 28 29 30
'mean mean diameter fix', 'var mean diameter fix', 'mean var diameter fix', 'var var diameter fix', # 31 32 33 34
'mean blink duration', 'var blink duration', 'min blink duration', 'max blink duration', # 35 36 37 38
'blink rate' # 39
]
event_feature_labels_long = ['fixation rate', 'saccade rate', # 0 1
'small saccade rate', 'large saccade rate', 'positive saccade rate', 'negative saccade rate', # 2 3 4 5
'saccade:fixation ratio', # 6
'ratio of small saccades', 'ratio of large saccades', 'ratio of right saccades', 'ratio of left saccades', # 7 8 9 10
'mean saccade amplitude', 'var saccade amplitude', 'min saccade amplitude', 'max saccade amplitude', #11 12 13 14
'mean saccadic peak velocity', 'var saccadic peak velocity', 'min saccadic peak velocity', 'max saccadic peak velocity', # 15 16 17 18
'mean of the mean pupil diameter during saccades', 'var of the mean pupil diameter during saccades',
'mean of the var pupil diameter during saccades', 'var of the var pupil diameter during saccades', # 19 20 21 22
'mean fixation duration', 'var fixation duration', 'min fixation duration', 'max fixation duration', # 23 24 25 26
'dwelling time',
'mean of the mean of subsequent angles', 'var of the mean of subsequent angles',
'mean of the var of subsequent angles', 'var of the var of subsequent angles',
'mean of the var of x', 'mean of the var of y', 'var of the var of x', 'var of the var of y', # 27 28 29 30
'mean of the mean pupil diameter during fixations', 'var of the mean pupil diameter during fixations',
'mean of the var pupil diameter during fixations', 'var of the var pupil diameter during fixations', # 31 32 33 34
'mean blink duration', 'var blink duration', 'min blink duration', 'max blink duration', # 35 36 37 38
'blink rate' # 39
]
def get_wordbook_feature_labels(movement_abbreviation):
return [movement_abbreviation + s + ' WB' + str(n) for n in [1, 2, 3, 4] for s in ['>0', 'max', 'min', 'arg max', 'arg min', 'range', 'mean', 'var']]
def get_wordbook_feature_labels_long(movement_abbreviation):
return [s1 + str(n) + '-gram ' + movement_abbreviation + s2 for n in [1, 2, 3, 4]
for (s1, s2) in [('number of different ', ' movements'),
('max frequency ', ' movements'),
('min frequency ', ' movements'),
('most frequent ', ' movement'),
('least frequent ', ' movement'),
('range of frequencies of ', ' movements'),
('mean frequency of ', ' movements'),
('var frequency of ', ' movements')
]]
position_feature_labels = ['mean x', 'mean y', 'mean diameter',
'min x', 'min y', 'min diameter',
'max x', 'max y', 'max diameter',
'min-max x', 'min-max y', 'min-max diameter',
'std x', 'std y', 'std diameter',
'median x', 'median y', 'median diameter',
'1st quart x', '1st quart y', '1st quart diameter',
'3rd quart x', '3rd quart y', '3rd quart diameter',
'IQR x', 'IQR y', 'IQR diameter',
'mean abs diff x', 'mean abs diff y', 'mean abs diff diameter',
'mean diff x', 'mean diff y', 'mean diff diameter',
'mean subsequent angle'
]
position_feature_labels_long = ['mean x', 'mean y', 'mean pupil diameter',
'minimum x', 'minimum y', 'minimum pupil diameter',
'maximum x', 'maximum y', 'maximum pupil diameter',
'range x', 'range y', 'range pupil diameter',
'std x', 'std y', 'std pupil diameter',
'median x', 'median y', 'median pupil diameter',
'1st quartile x', '1st quartile y', '1st quartile pupil diameter',
'3rd quartile x', '3rd quartile y', '3rd quartile pupil diameter',
'inter quartile range x', 'inter quartile range y', 'inter quartile range pupil diameter',
'mean difference of subsequent x', 'mean difference of subsequent y', 'mean difference of subsequent pupil diameters',
'mean diff x', 'mean diff y', 'mean diff pupil diameter',
'mean subsequent angle'
]
heatmap_feature_labels = ['heatmap_'+str(i).zfill(2) for i in xrange(0, 64)]
heatmap_feature_labels_long = ['heatmap cell '+str(i).zfill(2) for i in xrange(0, 64)]
full_label_list = event_feature_labels + heatmap_feature_labels + position_feature_labels + \
get_wordbook_feature_labels('sacc.') + get_wordbook_feature_labels('SF')
full_long_label_list = event_feature_labels_long + heatmap_feature_labels_long + position_feature_labels_long + \
get_wordbook_feature_labels_long('sacc.') + get_wordbook_feature_labels_long('SF')
sacc_dictionary = ['A', 'B', 'C', 'R', 'E', 'F', 'G', 'D', 'H', 'J', 'K', 'L', 'M', 'N', 'O', 'U', 'u', 'b', 'r', 'f',
'd', 'j', 'l', 'n']
sacc_bins_two = [a+b for a in sacc_dictionary for b in sacc_dictionary]
sacc_bins_three = [a+b+c for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary]
sacc_bins_four = [a+b+c+d for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary for d in sacc_dictionary]
sacc_bins = [sacc_dictionary, sacc_bins_two, sacc_bins_three, sacc_bins_four]
saccFix_dictionary = ['S_lu', 'S_ld', 'S_lr', 'S_ll', 'S_su', 'S_sd', 'S_sr', 'S_sl', 'F_l', 'F_s']
saccFix_bins_two = [a+b for a in saccFix_dictionary for b in saccFix_dictionary]
saccFix_bins_three = [a+b+c for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary]
saccFix_bins_four = [a+b+c+d for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary for d in saccFix_dictionary]
saccFix_bins = [saccFix_dictionary, saccFix_bins_two, saccFix_bins_three, saccFix_bins_four]
def write_pami_feature_labels_to_file(targetfile):
f = open(targetfile, 'w') # creates if it does not exist
f.write(',short,long\n')
i = 0
for item1, item2 in zip(full_label_list, full_long_label_list):
f.write(str(i) + ',' + item1 + ',' + item2 + '\n')
i += 1
f.close()

View file

View file

@ -0,0 +1,327 @@
import numpy as np
import sys
import math
from config import names as gs
def get_fixation_list(gaze, errors, xi, yi, ti, fixation_radius_threshold, fixation_duration_threshold, pupil_diameter):
n, m = gaze.shape
fixations = []
fixation = [] # single fixation, to be appended to fixations
counter = 0 # number of points in the fixation
sumx = 0 # used to compute the center of a fixation in x and y direction
sumy = 0
distance = 0 # captures the distance of a current sample from the fixation center
i = 0 # iterates through the gaze samples
while i < n - 1:
x = gaze[i, xi]
y = gaze[i, yi]
if counter == 0:
# ignore erroneous samples before a fixation
if errors[i]:
i += 1
continue
centerx = x
centery = y
else:
centerx = np.true_divide(sumx, counter)
centery = np.true_divide(sumy, counter)
if not errors[i]: # only update distance if the current sample is not erroneous
distance = np.sqrt((x - centerx) * (x - centerx) + (y - centery) * (y - centery))
if distance > fixation_radius_threshold: # start new fixation
if gaze[(i - 1), ti] - gaze[(i - counter), ti] >= fixation_duration_threshold:
start_index = i - counter + 1
end_index = i - 1 - 1
# discard fixations with more than 50% erroneous samples
percentage_error = np.sum(errors[start_index:(end_index + 1)]) / float(end_index - start_index)
if percentage_error >= 0.5:
if errors[i]:
i += 1
counter = 0
else:
counter = 1
sumx = x
sumy = y
continue
gaze_indices = np.arange(start_index, end_index+1)[np.logical_not(errors[start_index:(end_index + 1)])]
start_index = gaze_indices[0]
end_index = gaze_indices[-1]
gazex = gaze[start_index:(end_index + 1), xi][np.logical_not(errors[start_index:(end_index + 1)])]
gazey = gaze[start_index:(end_index + 1), yi][np.logical_not(errors[start_index:(end_index + 1)])]
gazet = gaze[start_index:(end_index + 1), ti][np.logical_not(errors[start_index:(end_index + 1)])]
# extract fixation characteristics
fixation.append(np.mean(gazex)) # 0.-1. mean x,y
fixation.append(np.mean(gazey))
fixation.append(np.var(gazex)) # 2-3. var x, y
fixation.append(np.var(gazey))
fixation.append(gazet[0]) # 4-5. t_start, t_end
fixation.append(gazet[-1])
fixation.append(gaze_indices[0]) # 6-7. index_start, index_end
fixation.append(gaze_indices[-1])
ds = ((pupil_diameter[start_index:(end_index+1), 1] + pupil_diameter[start_index:(end_index+1), 2]) / 2.)[np.logical_not(errors[start_index:(end_index+1)])]
fixation.append(np.mean(ds)) # 8. mean pupil diameter
fixation.append(np.var(ds)) # 9. var pupil diameter
succ_dx = gazex[1:] - gazex[:-1]
succ_dy = gazey[1:] - gazey[:-1]
succ_angles = np.arctan2(succ_dy, succ_dx)
fixation.append(np.mean(succ_angles)) # 10 mean successive angle
fixation.append(np.var(succ_angles)) # 11 var successive angle
fixations.append(fixation)
assert len(fixation) == len(gs.fixations_list_labels)
# set up new fixation
fixation = []
if errors[i]:
i += 1
counter = 0
else:
counter = 1
sumx = x
sumy = y
else:
if not errors[i]:
counter += 1
sumx += x
sumy += y
i += 1
return fixations
def get_saccade_list(gaze, fixations, xi, yi, ti, pupil_diameter, fixation_radius_threshold, errors,
saccade_min_velocity, max_saccade_duration):
saccades = []
wordbook_string = []
# each movement between two subsequent fixations could be a saccade, but
for i in xrange(1, len(fixations)):
# ...not if the window is too long
duration = float(fixations[i][gs.fix_start_t_i] - fixations[i - 1][gs.fix_end_t_i])
if duration > max_saccade_duration:
continue
start_index = fixations[i - 1][gs.fix_end_index_i]
end_index = fixations[i][gs.fix_start_index_i]
gazex = gaze[start_index:(end_index + 1), xi][np.logical_not(errors[start_index:(end_index + 1)])]
gazey = gaze[start_index:(end_index + 1), yi][np.logical_not(errors[start_index:(end_index + 1)])]
gazet = gaze[start_index:(end_index + 1), ti][np.logical_not(errors[start_index:(end_index + 1)])]
dx = np.abs(gazex[1:] - gazex[:-1])
dy = np.abs(gazey[1:] - gazey[:-1])
dt = np.abs(gazet[1:] - gazet[:-1])
# ...not if less than 2 non-errouneous amples are left:
if len(dt) < 2:
continue
distance = np.linalg.norm([dx, dy])
peak_velocity = np.amax(distance / dt)
start_x = gazex[0]
start_y = gazey[0]
end_x = gazex[-1]
end_y = gazey[-1]
dx = end_x - start_x
dy = end_y - start_y
# ...not if the amplitude is shorter than a fith of fixation_radius_threshold
amplitude = np.linalg.norm([dx, dy])
if amplitude < fixation_radius_threshold / 5.0:
continue
# ...not if the peak velocity is very low
if peak_velocity < saccade_min_velocity:
continue
percentage_error = np.sum(errors[start_index:(end_index + 1)]) / float(end_index - start_index)
# ...not if more than 50% of the data are erroneous
if percentage_error >= 0.5:
continue
else: # found saccade!
# compute characteristics of the saccade, like start and end point, amplitude, ...
saccade = []
saccade.append(start_x) # 0.-1. start x,y
saccade.append(start_y)
saccade.append(end_x) # 2-3. end x,y
saccade.append(end_y)
if dx == 0:
radians = 0
else:
radians = np.arctan(np.true_divide(dy, dx))
if dx > 0:
if dy < 0:
radians += (2 * np.pi)
else:
radians = np.pi + radians
saccade.append(radians) # 4. angle
saccade.append(fixations[i - 1][gs.fix_end_t_i]) # 5-6. t_start, t_end
saccade.append(fixations[i][gs.fix_start_t_i])
saccade.append(start_index) # 7-8. index_start, index_end
saccade.append(end_index)
ds = (pupil_diameter[start_index:(end_index + 1), 1] + pupil_diameter[start_index:(end_index + 1),
2]) / 2.0
saccade.append(np.mean(ds)) # 9. mean pupil diameter
saccade.append(np.var(ds)) # 10. var pupil diameter
saccade.append(peak_velocity) # 11. peak velocity
saccade.append(amplitude) # 12. amplitude
# append character representing this kind of saccade to the wordbook_string which will be used for n-gram features
sac_id = get_dictionary_entry_for_saccade(amplitude, fixation_radius_threshold, radians)
wordbook_string.append(sac_id)
saccades.append(saccade)
# assert all saccade characteristics were computed
assert len(saccade) == len(gs.saccades_list_labels)
return saccades, wordbook_string
def get_blink_list(event_strings, gaze, ti):
assert len(event_strings) == len(gaze)
# detect Blinks
blinks = []
blink = [] # single blink, to be appended to blinks
i = 0
starti = i
blink_started = False
while i < len(event_strings) - 1:
if event_strings[i] == 'Blink' and not blink_started: # start new blink
starti = i
blink_started = True
elif blink_started and not event_strings[i] == 'Blink':
blink.append(gaze[starti, ti])
blink.append(gaze[i - 1, ti])
blink.append(starti)
blink.append(i - 1)
blinks.append(blink)
assert len(blink) == len(gs.blink_list_labels)
blink_started = False
blink = []
i += 1
return blinks
def get_dictionary_entry_for_saccade(amplitude, fixation_radius_threshold, degree_radians):
# Saacade Type: small, iff amplitude less than 2 fixation_radius_thresholds
# U
# O A
# N u B
# M n b C
# L l r R
# K j f E
# J d F
# H G
# D
degrees = np.true_divide(degree_radians * 180.0, np.pi)
if amplitude < 2 * fixation_radius_threshold:
d_degrees = degrees / (np.true_divide(90, 4))
if d_degrees < 1:
sac_id = 'r'
elif d_degrees < 3:
sac_id = 'b'
elif d_degrees < 5:
sac_id = 'u'
elif d_degrees < 7:
sac_id = 'n'
elif d_degrees < 9:
sac_id = 'l'
elif d_degrees < 11:
sac_id = 'j'
elif d_degrees < 13:
sac_id = 'd'
elif d_degrees < 15:
sac_id = 'f'
elif d_degrees < 16:
sac_id = 'r'
else:
print
print 'error! d_degrees cannot be matched to a sac_id for a small saccade ', d_degrees
sys.exit(1)
else: # large
d_degrees = degrees / (np.true_divide(90, 8))
if d_degrees < 1:
sac_id = 'R'
elif d_degrees < 3:
sac_id = 'C'
elif d_degrees < 5:
sac_id = 'B'
elif d_degrees < 7:
sac_id = 'A'
elif d_degrees < 9:
sac_id = 'U'
elif d_degrees < 11:
sac_id = 'O'
elif d_degrees < 13:
sac_id = 'N'
elif d_degrees < 15:
sac_id = 'M'
elif d_degrees < 17:
sac_id = 'L'
elif d_degrees < 19:
sac_id = 'K'
elif d_degrees < 21:
sac_id = 'J'
elif d_degrees < 23:
sac_id = 'H'
elif d_degrees < 25:
sac_id = 'D'
elif d_degrees < 27:
sac_id = 'G'
elif d_degrees < 29:
sac_id = 'F'
elif d_degrees < 31:
sac_id = 'E'
elif d_degrees < 33:
sac_id = 'R'
else:
print 'error! d_degrees cannot be matched to a sac_id for a large saccade: ', d_degrees
sys.exit(1)
return sac_id
def detect_all(gaze, errors, ti, xi, yi, fixation_radius_threshold=0.01, pupil_diameter=None, event_strings=None,
fixation_duration_threshold=0.1, saccade_min_velocity=2, max_saccade_duration=0.1):
"""
:param gaze: gaze data, typically [t,x,y]
:param fixation_radius_threshold: dispersion threshold
:param fixation_duration_threshold: temporal threshold
:param ti, xi, yi: index data for gaze,i.e. for [t,x,y] ti=0, xi=1, yi=2
:param pupil_diameter: pupil diameters values, same length as gaze
:param event_strings: list of events, here provided by SMI. used to extract blink information
"""
fixations = get_fixation_list(gaze, errors, xi, yi, ti, fixation_radius_threshold, fixation_duration_threshold,
pupil_diameter)
saccades, wordbook_string = get_saccade_list(gaze, fixations, xi, yi, ti, pupil_diameter,
fixation_radius_threshold, errors, saccade_min_velocity,
max_saccade_duration)
blinks = get_blink_list(event_strings, gaze, ti)
return fixations, saccades, blinks, wordbook_string

View file

@ -0,0 +1,582 @@
#!/usr/bin/python
import numpy as np
import sys, os
from featureExtraction import event_detection as ed
import operator
from config import names as gs
class gazeAnalysis (object):
# dictionary for saccade-based n-grams:
# each character encodes one direction, capital characters stand for long saccades, the others for short ones
# short means the saccade amplitude is less than 2 fixation_radius_thresholds
# U
# O A
# N u B
# M n b C
# L l . r R
# K j f E
# J d F
# H G
# D
sacc_dictionary = ['A', 'B', 'C', 'R', 'E', 'F', 'G', 'D', 'H', 'J', 'K', 'L', 'M', 'N', 'O', 'U', 'u', 'b', 'r', 'f',
'd', 'j', 'l', 'n']
sacc_bins_two = [a+b for a in sacc_dictionary for b in sacc_dictionary]
sacc_bins_three = [a+b+c for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary]
sacc_bins_four = [a+b+c+d for a in sacc_dictionary for b in sacc_dictionary for c in sacc_dictionary for d in sacc_dictionary]
sacc_bins = [sacc_dictionary, sacc_bins_two, sacc_bins_three, sacc_bins_four]
# dictionary for saccade and fixation-based n-grams:
# S are saccades, long or short (i.e. longer or shorter than the fixation radius), and up/down/right/left
# e.g. S_lu is a long saccade up
# F are fixations, either long or short (i.e. longer or shorter than twice the minimum fixation duration)
# saccFix_dictionary = ['S_lu', 'S_ld', 'S_lr', 'S_ll', 'S_su', 'S_sd', 'S_sr', 'S_sl', 'F_l', 'F_s']
saccFix_dictionary = ['U', 'D', 'R', 'L', 'u', 'd', 'r', 'l', 'F', 'f']
saccFix_bins_two = [a+b for a in saccFix_dictionary for b in saccFix_dictionary]
saccFix_bins_three = [a+b+c for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary]
saccFix_bins_four = [a+b+c+d for a in saccFix_dictionary for b in saccFix_dictionary for c in saccFix_dictionary for d in saccFix_dictionary]
saccFix_bins = [saccFix_dictionary, saccFix_bins_two, saccFix_bins_three, saccFix_bins_four]
def __init__(self, gaze, fixation_radius_threshold, fixation_duration_threshold, saccade_min_velocity,max_saccade_duration,
pupil_diameter=None, event_strings=None, ti=0, xi=1, yi=2):
assert gaze.size > 0
# save data in instance
self.gaze = gaze
self.diams = pupil_diameter
self.event_strings = event_strings
# save constants, indices and thresholds that will be used muttiple times
self.fixation_radius_threshold = fixation_radius_threshold
self.fixation_duration_threshold = fixation_duration_threshold
self.xi = xi
self.yi = yi
self.ti = ti
# detect errors, fixations, saccades and blinks
self.errors = self.detect_errors()
self.fixations, self.saccades, self.blinks, self.wordbook_string = \
ed.detect_all(self.gaze, self.errors, self.ti, self.xi, self.yi, pupil_diameter=pupil_diameter,
event_strings=event_strings, fixation_duration_threshold=fixation_duration_threshold,
fixation_radius_threshold=fixation_radius_threshold, saccade_min_velocity=saccade_min_velocity,
max_saccade_duration=max_saccade_duration)
def detect_errors(self, confidence_threshold=0.8, outlier_threshold=0.5):
"""
:param confidence_threshold: threshold below which all gaze data is deleted
:param outlier_threshold: threshold beyond which gaze must not be outside the calibration area (i.e. [0,1])
"""
errors = np.full((len(self.gaze)), False, dtype=bool)
# gaze is nan
errors[np.isnan(self.gaze[:, self.xi])] = True
errors[np.isnan(self.gaze[:, self.yi])] = True
# gaze outside a certain range
errors[self.gaze[:, self.xi] < -outlier_threshold] = True
errors[self.gaze[:, self.xi] > outlier_threshold + 1] = True
errors[self.gaze[:, self.yi] < -outlier_threshold] = True
errors[self.gaze[:, self.yi] > outlier_threshold + 1] = True
return errors
def get_window_features(self, sliding_window_size, sliding_window_step_size, start_index=-1, end_index=-1):
"""
computes features using a sliding window approach with the given sliding_window_size and sliding_window_step_size
"""
# if no start and end index are given, use all data
if start_index == -1 and end_index == -1:
start_index = 0
end_index = len(self.gaze[:, 0]) - 1
# compute start and end times of each resulting sliding window
window_times = self.get_sliding_windows(start_index, end_index, sliding_window_size, sliding_window_step_size)
#compute features for each of these windows:
window_feature_list = []
for [a,b,at,bt] in window_times:
overallstats = self.get_full_feature_vector(a, b)
window_feature_list.append(overallstats)
assert len(gs.full_label_list) == len(window_feature_list[0])
window_feature_list = np.array(window_feature_list)
window_times = np.array(window_times)
return window_feature_list, window_times
def get_full_feature_vector(self, start_index, end_index):
"""
assembles the full feature vector of its part:
features based on fixations/saccades/blinks, raw data, heatmaps, n-grams based on saccades and n-grams based on saccades and fixations
"""
# features based on events, i.e. fixations/saccades/blinks
features = self.get_event_features(start_index, end_index)
assert len(gs.event_feature_labels) == len(features)
# features based on raw data, like quartiles of gaze posiitons
raw_features = self.get_raw_features(start_index, end_index)
features.extend(raw_features)
assert len(gs.position_feature_labels) == len(raw_features)
# heatmap features
heatmap_features = self.get_heatmap_features(start_index, end_index)
features.extend(heatmap_features)
assert len(gs.heatmap_feature_labels) == len(heatmap_features)
# n-gram features based on saccades
sacc_wordbook_features = self.get_sacc_ngram_features(start_index, end_index)
features.extend(sacc_wordbook_features)
assert len(gs.get_wordbook_feature_labels('')) == len(sacc_wordbook_features)
# n-gram features based on saccades and fixations
saccFix_wordbook_features = self.get_saccFix_ngram_features(start_index, end_index)
features.extend(saccFix_wordbook_features)
assert len(gs.get_wordbook_feature_labels('')) == len(saccFix_wordbook_features)
return features
def get_event_features(self, start_index, end_index):
"""
computes features based on fixations, saccades and blinks within the selected gaze window
"""
event_features = []
# get non-errouneous samples between start_index and end_index:
x,y,d = self.get_x_y_d_part(start_index, end_index)
data = self.gaze[start_index:(end_index+1), :]
fixations, saccades, wordbook_string, blinks = self.get_FixSacWb_timed(start_index, end_index)
n, m = data.shape
timespan = (data[-1,self.ti] - data[0,self.ti])
num_fix =len(fixations)
num_sacc =len(saccades)
if timespan == 0:
fix_rate = 0
sacc_rate = 0
else:
fix_rate = num_fix / float(timespan)
sacc_rate = num_sacc / float(timespan)
event_features.append(fix_rate) # 0. rate of fixations
event_features.append(sacc_rate) # 1. rate of saccades
sacc_UR_L = wordbook_string.count('U') + wordbook_string.count('A') + wordbook_string.count('B') + wordbook_string.count('C')
sacc_BR_L = wordbook_string.count('R') + \
wordbook_string.count('E') + \
wordbook_string.count('F') + \
wordbook_string.count('G')
sacc_UL_L = wordbook_string.count('O') \
+ wordbook_string.count('N') \
+ wordbook_string.count('M') \
+ wordbook_string.count('L')
sacc_BL_L = wordbook_string.count('K') \
+ wordbook_string.count('J') \
+ wordbook_string.count('H') \
+ wordbook_string.count('D')
sacc_UR_S = wordbook_string.count('u') \
+ wordbook_string.count('b')
sacc_BR_S = wordbook_string.count('r') \
+ wordbook_string.count('f')
sacc_UL_S = wordbook_string.count('n') \
+ wordbook_string.count('l')
sacc_BL_S = wordbook_string.count('j') \
+ wordbook_string.count('d')
num_s_sacc = sacc_UR_S + sacc_BR_S + sacc_UL_S + sacc_BL_S
num_la_sacc = sacc_UR_L + sacc_BR_L + sacc_UL_L + sacc_BL_L
num_r_sacc = sacc_UR_S + sacc_BR_S + sacc_UR_L + sacc_BR_L
num_l_sacc = sacc_UL_S + sacc_BL_S + sacc_UL_L + sacc_BL_L
if timespan > 0:
event_features.append(num_s_sacc / float(timespan)) #2. rate of small saccades
event_features.append(num_la_sacc / float(timespan)) #3. rate of large saccades
event_features.append(num_r_sacc / float(timespan)) #4. rate of pos saccades
event_features.append(num_l_sacc / float(timespan)) #5. rate of neg saccades
else:
event_features.extend([0]*4)
if num_fix > 0:
event_features.append(num_sacc / float(num_fix)) # 6. ratio saccades / fixations
else:
event_features.append(0)
if num_sacc > 0:
event_features.append(num_s_sacc /float(num_sacc)) # 7. ratio small sacc
event_features.append(num_la_sacc / float(num_sacc)) # 8. ratio large sacc
event_features.append(num_r_sacc / float(num_sacc)) # 9. ratio pos sacc
event_features.append(num_l_sacc / float(num_sacc)) # 10. ratio neg sacc
else:
event_features.extend([0]*4)
sacc_array = np.array(saccades)
fix_array = np.array(fixations)
if sacc_array.size > 0:
# amplitude features
amplitudes = sacc_array[:, gs.sacc_amplitude_i]
event_features.append(np.mean(amplitudes)) # 11: mean sacc amplitude
event_features.append(np.var(amplitudes)) # 12: var sacc amplitude
event_features.append(amplitudes.min()) # 13 min sacc amplitude
event_features.append(amplitudes.max()) # 14: max sacc amplitude
# peak velocity features
velocities = sacc_array[:, gs.sacc_peak_vel_i]
event_features.append(np.mean(velocities)) # 15: mean peak velocity
event_features.append(np.var(velocities)) # 16: var peak velocity
event_features.append(velocities.min()) # 17: min peak velocity
event_features.append(velocities.max()) # 18: max peak velocity
if sacc_array[0, :].size == 13:
event_features.append(np.mean(sacc_array[:, gs.sacc_mean_diam_i])) # 19 mean mean diameter
event_features.append(np.var(sacc_array[:, gs.sacc_mean_diam_i])) # 20 var mean diameter
event_features.append(np.mean(sacc_array[:, gs.sacc_var_diam_i])) # 21 mean var diameter
event_features.append(np.var(sacc_array[:, gs.sacc_var_diam_i])) # 22 var var diameter
else:
event_features.extend([0]*4)
else:
event_features.extend([0]*12)
if fix_array.size > 0:
durations = np.array(fix_array[:, gs.fix_end_t_i]) - np.array(fix_array[:, gs.fix_start_t_i])
event_features.append(np.mean(durations)) # 23: mean fix duration
event_features.append(np.var(durations)) # 24: var fix duration
event_features.append(durations.min()) # 25: min fix duration
event_features.append(durations.max()) # 26: max fix duration
event_features.append(durations.sum()) # 27: dwelling time
event_features.append(np.mean(fix_array[:, gs.fix_mean_succ_angles])) # 28: mean mean subsequent angle
event_features.append(np.var(fix_array[:, gs.fix_mean_succ_angles])) # 28: var mean subsequent angle
event_features.append(np.mean(fix_array[:, gs.fix_var_succ_angles])) # 28: mean var subsequent angle
event_features.append(np.var(fix_array[:, gs.fix_var_succ_angles])) # 28: var var subsequent angle
lnnII = np.logical_not(np.isnan(fix_array[:, gs.fix_var_x_i]))
lnnIII = np.logical_not(np.isnan(fix_array[:, gs.fix_var_y_i]))
event_features.append(np.mean(fix_array[lnnII, gs.fix_var_x_i])) # mean var x
event_features.append(np.mean(fix_array[lnnIII, gs.fix_var_y_i])) # mean var y
event_features.append(np.var(fix_array[lnnII, gs.fix_var_x_i])) # 29: var var x
event_features.append(np.var(fix_array[lnnIII, gs.fix_var_y_i])) # 30: var var y
if fix_array[0, :].size == 12:
event_features.append(np.mean(fix_array[:, gs.fix_mean_diam_i])) # 31 mean mean diameter
event_features.append(np.var(fix_array[:, gs.fix_mean_diam_i])) # 32 var mean diameter
event_features.append(np.mean(fix_array[:, gs.fix_var_diam_i])) # 33 mean var diameter
event_features.append(np.var(fix_array[:, gs.fix_var_diam_i])) # 34 var var diameter
else:
event_features.extend([0]*4)
else:
event_features.extend([0]*17)
blink_array = np.array(blinks)
if blink_array.size > 0:
durations = np.array(blink_array[:, 1]) - np.array(blink_array[:, 0])
event_features.append(np.mean(durations)) #35
event_features.append(np.var(durations)) #36
event_features.append(durations.min()) #37
event_features.append(durations.max()) #38
event_features.append(np.true_divide(len(blink_array), timespan)) #39
else:
event_features.extend([0]*5)
return event_features
def get_x_y_d_part(self, start_index, end_index):
x = self.gaze[start_index:(end_index + 1), self.xi]
y = self.gaze[start_index:(end_index + 1), self.yi]
d = (self.diams[start_index:(end_index + 1), 1] + self.diams[start_index:(end_index + 1), 2]) / 2.0
err = self.errors[start_index:(end_index + 1)]
x = x[np.logical_not(err)]
y = y[np.logical_not(err)]
d = d[np.logical_not(err)]
return x,y,d
def get_raw_features(self, start_index, end_index):
"""
computes features based on raw gaze data, like percentiles of x coordinates
"""
raw_features = []
x,y,d = self.get_x_y_d_part(start_index, end_index)
for a in [x, y, d]:
raw_features.append(np.mean(a))
for a in [x, y, d]:
raw_features.append(np.amin(a))
for a in [x, y, d]:
raw_features.append(np.amax(a))
for a in [x, y, d]:
raw_features.append(np.amax(a) - np.amin(a))
for a in [x, y, d]:
raw_features.append(np.std(a))
for a in [x, y, d]:
raw_features.append(np.median(a))
for a in [x, y, d]:
raw_features.append(np.percentile(a, 25))
for a in [x, y, d]:
raw_features.append(np.percentile(a, 75))
for a in [x, y, d]:
raw_features.append(np.percentile(a, 75) - np.percentile(a, 25))
for a in [x, y, d]:
raw_features.append(np.mean(np.abs(a[1:] - a[:-1])))
for a in [x, y, d]:
raw_features.append(np.mean(a[1:] - a[:-1]))
dx = x[:-1] - x[1:]
dy = y[:-1] - y[1:]
succ_angles = np.arctan2(dy, dx)
raw_features.append(np.mean(succ_angles))# 28: mean subsequent angle
return raw_features
def get_heatmap_features(self, start_index, end_index):
"""
computes a heatmap over the raw gaze positions, in a 8x8 grid
"""
x,y,d = self.get_x_y_d_part(start_index, end_index)
xmin = np.percentile(x, 2.5)
xmax = np.percentile(x, 97.5)
ymin = np.percentile(y, 2.5)
ymax = np.percentile(y, 97.5)
heatmap, xedges, yedges = np.histogram2d(x, y, bins=(8, 8), range=[[xmin, xmax], [ymin, ymax]])
normalised_flat_heatmap = heatmap.flatten() / np.sum(heatmap)
return normalised_flat_heatmap
def get_sacc_ngram_features(self, start_index, end_index):
# find those saccades that either start or end within start_index and end_index, or start before start_index and end after end_index
mysacc = [sacc for sacc in self.saccades if (sacc[gs.sacc_start_index_i] > start_index and sacc[gs.sacc_start_index_i] < end_index)
or (sacc[gs.sacc_end_index_i] > start_index and sacc[gs.sacc_end_index_i] < end_index)
or (sacc[gs.sacc_start_index_i] < start_index and sacc[gs.sacc_end_index_i] > end_index)]
# create string representing all saccades in mysacc
mywbs = [self.wordbook_string[mysacc.index(sacc)] for sacc in mysacc]
# create all possible n-grams of a certain length
ngrams = []
ngrams.append(mywbs)
for n in xrange(2,5):
ngrams.append([reduce(operator.add, mywbs[i:i+n]) for i in range(len(mywbs) - n)])
# compute histograms of the actual n-grams occuring in the data
histograms=[]
for i in xrange(0,4):
histograms.append(dict((x, ngrams[i].count(x)) for x in self.sacc_bins[i]))
# compute features from each histogram and append them to one list
sacc_ngram_features = []
for h in histograms:
wb_feat = self.get_ngram_features(h)
sacc_ngram_features.extend(wb_feat)
return sacc_ngram_features
def get_saccFix_wb_string_saccades(self, sacc):
"""
returns a string for a single saccade that will be used for n-gram features based on saccades and fixations
"""
amplitude = sacc[gs.sacc_amplitude_i]
angle_rad = sacc[gs.sacc_angle_i]
angle_deg = np.true_divide(angle_rad * 180.0, np.pi)
# 0 degrees is pointing to the right
if angle_deg < 45:
wb_str = 'r'
elif angle_deg < 135:
wb_str = 'u'
elif angle_deg < 225:
wb_str = 'l'
elif angle_deg < 315:
wb_str = 'd'
else:
wb_str = 'r'
if amplitude >= 2 * self.fixation_radius_threshold: # less than 2 fixation_radius_thresholds
wb_str = wb_str.upper()
return wb_str
def get_saccFix_wb_string_fixations(self, fix):
"""
returns a string for a single fixation that will be used for n-gram features based on saccades and fixations
"""
if fix[gs.fix_end_t_i] - fix[gs.fix_start_t_i] < 2 * self.fixation_duration_threshold:
return 'f'
else:
return 'F'
def get_saccFix_ngram_features(self, start_index, end_index):
"""
computes n-gram features based on saccades and fixations
"""
# find all saccades and fixations between start_index and end_index,
# and create a string of their encodings
sacc_index = 0
fix_index = 0
wordbook_string = []
sacc_start_i = gs.sacc_start_index_i
sacc_end_i = gs.sacc_end_index_i
fix_start_i = gs.fix_start_index_i
fix_end_i = gs.fix_end_index_i
while (self.saccades[sacc_index][sacc_end_i] < start_index) and (sacc_index < len(self.saccades) - 1):
sacc_index += 1
while (self.fixations[fix_index][fix_end_i] < start_index) and (fix_index < len(self.fixations) - 1):
fix_index += 1
while sacc_index < len(self.saccades) and fix_index < len(self.fixations):
if (self.saccades[sacc_index][sacc_start_i] < end_index) and (self.fixations[fix_index][fix_start_i] < end_index):
if self.saccades[sacc_index][sacc_start_i] < self.fixations[fix_index][fix_start_i]:
wordbook_string.append(self.get_saccFix_wb_string_saccades(self.saccades[sacc_index]))
sacc_index += 1
else:
wordbook_string.append(self.get_saccFix_wb_string_fixations(self.fixations[fix_index]))
fix_index += 1
elif self.saccades[sacc_index][sacc_start_i] < end_index:
wordbook_string.append(self.get_saccFix_wb_string_saccades(self.saccades[sacc_index]))
sacc_index += 1
elif self.fixations[fix_index][fix_start_i] < end_index:
wordbook_string.append(self.get_saccFix_wb_string_fixations(self.fixations[fix_index]))
fix_index += 1
else:
sacc_index += 1
fix_index += 1
# compute all possible n-grams
ngrams = []
ngrams.append(wordbook_string)
for n in xrange(2,5):
ngrams.append([reduce(operator.add, wordbook_string[i:i+n]) for i in range(len(wordbook_string) - n)])
# compute histograms for n-grams
histograms=[]
for i in xrange(0,4):
histograms.append(dict((x, ngrams[i].count(x)) for x in self.saccFix_bins[i]))
# compute features from each histogram and append to one list
ngram_features = []
for h in histograms:
wb_feat = self.get_ngram_features(h)
ngram_features.extend(wb_feat)
return ngram_features
def get_FixSacWb_timed(self, start_index, end_index):
"""
returns list of fixations, saccades, blinks and the associated wordbook_string
for the time between start_index and end_index
"""
myfix = [fix for fix in self.fixations if (fix[gs. fix_start_index_i] > start_index
and fix[gs. fix_start_index_i] < end_index)
or (fix[gs.fix_end_index_i]>start_index and fix[gs.fix_end_index_i]<end_index)
or (fix[gs.fix_start_index_i]<start_index and fix[gs.fix_end_index_i]>end_index)]
mysacc = [sacc for sacc in self.saccades if (sacc[gs.sacc_start_index_i]>start_index and sacc[gs.sacc_start_index_i]<end_index)
or (sacc[gs.sacc_end_index_i]>start_index and sacc[gs.sacc_end_index_i]<end_index)
or (sacc[gs.sacc_start_index_i]<start_index and sacc[gs.sacc_end_index_i]>end_index)]
mywbs = [self.wordbook_string[self.saccades.index(sacc)] for sacc in mysacc]
blinks = [b for b in self.blinks if (b[gs.blink_start_index_i]>start_index and b[gs.blink_start_index_i]<end_index)
or (b[gs.blink_end_index_i]>start_index and b[gs.blink_end_index_i]<end_index)
or (b[gs.blink_start_index_i]<start_index and b[gs.blink_end_index_i]>end_index)]
return myfix, mysacc, mywbs, blinks
def get_sliding_windows(self, start_index, end_index, sliding_window_size, sliding_window_step_size):
"""
computes a list of time windows resulting from the sliding windows approach with the given sliding_window_size and sliding_window_step_size
"""
window_times = [] # consisting of lists [a,b,t_a,t_b] where a is start index, b ist end index
a = start_index
b = min(self.getEndWindow(a, sliding_window_size), end_index)
if self.check_sliding_window_conditions(a, b, end_index, sliding_window_size):
window_times.append([a, b, self.gaze[a, self.ti], self.gaze[b, self.ti]])
while b < end_index:
a = self.getStartWindow(a, sliding_window_step_size)
b = self.getEndWindow(a, sliding_window_size)
# check if some conditions are fulfilled and if so, add to the window list
if self.check_sliding_window_conditions(a, b, end_index, sliding_window_size):
window_times.append([a, b, self.gaze[a, self.ti], self.gaze[b, self.ti]])
return window_times
def check_sliding_window_conditions(self, a, b, end_index, sliding_window_size):
# discard too short or too long sliding windows
window_duration = self.gaze[b, self.ti] - self.gaze[a, self.ti]
min_duration = sliding_window_size - 0.1*sliding_window_size
max_duration = sliding_window_size + 0.1*sliding_window_size
if window_duration < min_duration or window_duration>max_duration:
return False
if b <= end_index:
errors_in_window = np.sum(self.errors[a:(b+1)]) / (b-a)
if errors_in_window < 0.5:
# discard window if no saccade or fixation was detected
fixations, saccades, wordbook_string, blinks = self.get_FixSacWb_timed(a,b)
if len(fixations) == 0 and len(saccades) == 0:
return False
xgaze = self.gaze[a:(b+1), self.xi][np.logical_not(self.errors[a:(b+1)])]
ygaze = self.gaze[a:(b+1), self.yi][np.logical_not(self.errors[a:(b+1)])]
# discard window if less than 5 samples
if len(xgaze) < 5:
return False
# exclude windows with more than 66% constant gaze
(xvals, xcounts) = np.unique(xgaze, return_counts=True)
(yvals, ycounts) = np.unique(ygaze, return_counts=True)
xcounts = xcounts / float(np.sum(xcounts))
ycounts = ycounts / float(np.sum(ycounts))
if xcounts.max() > 0.66 or ycounts.max() > 0.66:
return False
#accept every remaining window
return True
else:
# discard windows with more than 50% erroneous samples
return False
return False
def getStartWindow(self, old_start, sliding_window_step_size):
start = old_start
starttime = self.gaze[old_start, self.ti]
while starttime < self.gaze[old_start, self.ti] + sliding_window_step_size:
start += 1
if start >= len(self.gaze[:, self.xi])-1:
return len(self.gaze[:, self.xi])-1
starttime = self.gaze[start, self.ti]
return start
def getEndWindow(self, start, sliding_window_size):
end = start
while self.gaze[end, self.ti] < self.gaze[start, self.ti] + sliding_window_size:
end += 1
if end >= len(self.gaze[:, self.xi])-1:
return len(self.gaze[:, self.xi])-1
return end
def get_ngram_features(self, wb):
feat = []
feat.append(sum(x > 0 for x in wb.values())) # 1. size
feat.append(np.max(wb.values())) # 2. maximum
nonzeros = [i for i in wb.values() if i]
if len(nonzeros)<1:
feat.append(0)
else:
feat.append(min(nonzeros)) # 3. non-zero minimum
feat.append(np.argmax(wb.values())) # 2. arg max
if len(nonzeros)<1:
feat.append(0)
else:
feat.append(np.argmin(np.array(nonzeros))) # 3. arg min
feat.append(feat[1] - feat[2]) # 4. diff max - min
feat.append(np.mean(wb.values())) # 5. mean of all counts
feat.append(np.var(wb.values())) # 6. var of all counts
return feat