from __future__ import division ''' For each experiment, this script tracks movement of the marker in the video from the information in aruco_frames.npy It then correlates this information with gaze data from pupil_positions.npy finally, for every target in the video (25 targets in calibration, 16 in test), it maps 3D marker position (mean position over the duration of pause) to the gaze position (mean position over the pause duration) and stores this info together with the projected 2D marker position in a separate npy file. the resulting file contains the ground truth data for this experiment. ''' import os, sys import numpy as np import matplotlib.pyplot as plt from pylab import rcParams from scipy.ndimage.filters import gaussian_filter1d as g1d from scipy import signal from sklearn.neighbors import NearestNeighbors as knn # from sklearn import svm from sklearn.cluster import AgglomerativeClustering from tracker import readCameraParams, Marker from util.tools import is_outlier, moving_average sys.path.append('..') # so we can import from pupil from pupil import player_methods from vector import Vector as v import pdb ROOT_DATA_DIR = '/home/mmbrian/HiWi/etra2016_mohsen/code/recording/data/participants' def unifiy_markers_per_frame(marker_data): ''' Since ArUco sometimes detects a marker twice in a frame, we need to either ignore one or somehow compute their mean. Also this method maps each final marker to its center's 3D and 2D position wrt scene camera ''' camera_matrix, dist_coeffs = readCameraParams() # in case of relocating camera.yml input the new path as cam_math mdata, mrdata = [], [] for fn in xrange(len(marker_data)): if len(marker_data[fn]) > 0: markers = map(lambda m: Marker.fromList(m), marker_data[fn]) markers = map(lambda m: np.array([np.array(m.getCenter()), np.array(m.getCenterInImage(camera_matrix, dist_coeffs))]), markers) marker = sum(markers)/len(markers) marker = [marker[0][0], marker[0][1], marker[0][2], marker[1][0], marker[1][1]] # marker_data[fn] = marker mdata.append(marker) mrdata.append(marker) else: # if marker is not detected, assign last detected position to this frame # marker_data[fn] = marker_data[fn-1] mdata.append(mdata[fn-1]) mrdata.append([]) # this contains real marker information (all tracked positions) # return marker_data return np.array(mdata), mrdata def fix_labels(labels, window = 2, elements = [0, 1], outliers = []): labels = list(labels) for i in xrange(window, len(labels)-window): neighborhood = labels[i-window:i+window+1] if outliers[i]: # removing this label from decision making neighborhood = neighborhood[:i] + neighborhood[i+1:] element_counts = [list(neighborhood).count(e) for e in elements] dominant_element = elements[element_counts.index(max(element_counts))] labels[i] = dominant_element return labels def find_intervals(labels, mean, marker_speed): ''' Given the label information of frame to frame motion speed, this method returns the frame intervals for which the marker is either "moving" or "not moving" Notice that len(labels) equals the number of frames minus one ''' nm_label = labels[0] intervals = [] curr_label, start, end = -1, -1, -1 not_moving = 0 for i in xrange(len(labels)): if curr_label < 0: # first label curr_label = labels[i] start = i else: if labels[i] != curr_label: # label changed end = i intervals.append([start, end, curr_label]) if curr_label == nm_label: not_moving+=1 curr_label = labels[i] start = i+1 end = len(labels) intervals.append([start, end, curr_label]) if curr_label == nm_label: not_moving+=1 # Now we do a post check to see if two non moving intervals are very close to each other, # the middle interval is most likely a misclassification # computing average interval length for moving intervals if (len(intervals) > 49 and not_moving > 25) or (len(intervals)>31 and not_moving>16): ret = merge_intervals(intervals, nm_label, mean, marker_speed, remove_outliers=True) return ret, sum(1 for e in ret if e[2] == nm_label) else: return intervals, not_moving def merge_intervals(intervals, nm_label, mean, marker_speed, remove_outliers=True): mlength = np.array([seg[1] - seg[0] for seg in intervals if seg[2] != nm_label]) nmlength = np.array([seg[1] - seg[0] for seg in intervals if seg[2] == nm_label]) if remove_outliers: mlength_outliers = mlength[is_outlier(mlength, thresh=3.5)] avg_m_length = (sum(mlength)-sum(mlength_outliers))/(mlength.size - mlength_outliers.size) nmlength_outliers = nmlength[is_outlier(nmlength, thresh=3.5)] avg_nm_length = (sum(nmlength)-sum(nmlength_outliers))/(nmlength.size - nmlength_outliers.size) else: avg_m_length = sum(mlength)/mlength.size avg_nm_length = sum(nmlength)/nmlength.size thresh = 3.5 # removes a moving interval if average length is at least this time larger than its length i = 1 ret = [] ret.append(intervals[0]) while i < len(intervals): length = intervals[i][1] - intervals[i][0] ratio, label = 1, intervals[i][2] if label == nm_label: ratio = avg_nm_length/length else: ratio = avg_m_length/length if ratio>=thresh: # average length is at least 2 times larger than the length of this interval # replace this interval by merge the two not moving intervals around it # check if average of elements in this interval is greater than mean if np.mean(marker_speed[intervals[i][0]:intervals[i][1]]) < mean: last_intv = ret.pop() ret.append([last_intv[0], intervals[i+1][1], 1-label]) print 'Merged two intervals' i+=2 continue else: pass ret.append(intervals[i]) i+=1 return ret # def main(force=False): # rcParams['figure.figsize'] = 15, 7 # recordings_processed = 0 # recordings_successful = 0 # for d1 in os.listdir(ROOT_DATA_DIR): # if d1.startswith('p'): # every participant # d2 = os.path.join(ROOT_DATA_DIR, d1) # .../pi/ # d2 = os.path.join(d2, os.listdir(d2)[0]) # .../pi/../ # for d3 in os.listdir(d2): # every recording # d4 = os.path.join(d2, d3) # .../pi/../00X/ # print '> Processing', d4 # frames_dir = os.path.join(d4, '_aruco_frames.npy') # if not os.path.isfile(frames_dir): # the recording is not yet processed for marker tracking # print '> Recording does not contain marker data...' # continue # intervals_dir = os.path.join(d4, 'gaze_intervals.npy') # if os.path.isfile(intervals_dir): # print '> Recording already processed...' # if force: # print '> Processing again...' # else: # continue # marker_data = np.load(frames_dir) # # marker_data includes data on tracked markers per frame # # it's a list with as many entries as the number of video frames, each entry # # has a list of tracked markers, each marker item has marker id, marker corners, Rvec, Tvec # wt = np.load(os.path.join(d4, 'world_timestamps.npy')) # # Processing pupil positions # pp = np.load(os.path.join(d4, 'pupil_positions.npy')) # timestamp confidence id pos_x pos_y diameter # # pos_x and pos_y are normalized (Origin 0,0 at the bottom left and 1,1 at the top right) # # converting each element to dictionary for correlation # pp = map(lambda e: dict(zip(['timestamp', 'conf', 'id', 'x', 'y', 'diam'], e)), pp) # pp_by_frame = player_methods.correlate_data(pp, wt) # # Keeping only pupil positions with nonzero confidence # pp_by_frame = map(lambda l: filter(lambda p: p['conf']>0, l), pp_by_frame) # # Computing a single pupil position for the frame by taking mean of all detected pupil positions # pp_by_frame = map(lambda data: # sum(np.array([pp['x'], pp['y']]) for pp in data)/len(data) if data else np.array([-1, -1]), pp_by_frame) # # Now each nonempty value of pp_by_frame is a tuple of (x, y) for pupil position in that frame # # Checking if timestamps, markers per frame and pupil positions per frame are correlated # assert len(marker_data) == len(wt) == len(pp_by_frame) # # Good, now we need to find the frame ranges in which marker is not moving, for that we need the marker_data # # and using the position info per frame, we can compute movement speed and detect when it is it almost zero # marker_data, mrdata = unifiy_markers_per_frame(marker_data) # # Smoothing x and y coords # marker_data[:, 3] = g1d(marker_data[:, 3], sigma=2) # marker_data[:, 4] = g1d(marker_data[:, 4], sigma=2) # marker_speed = [] # for fn, fnp1 in ((f, f+1) for f in xrange(len(marker_data)-1)): # if marker_data[fnp1] != [] and marker_data[fn] != []: # # dx = marker_data[fnp1][0] - marker_data[fn][0] # # dy = marker_data[fnp1][1] - marker_data[fn][1] # # dz = marker_data[fnp1][2] - marker_data[fn][2] # # speed = np.sqrt(dx**2 + dy**2 + dz**2) * 100 # # print fn, fnp1, len(marker_data), marker_data[fnp1], marker_data[fn] # dx = marker_data[fnp1][3] - marker_data[fn][3] # dy = marker_data[fnp1][4] - marker_data[fn][4] # speed = np.sqrt(dx**2 + dy**2) # # print 'marker speed:', speed # marker_speed.append(speed) # else: # marker_speed.append(marker_speed[-1]) # set speed to last speed if marker could not be detected # # Performing binary clustering on marker speed # model = AgglomerativeClustering(n_clusters=2, linkage="ward", affinity="euclidean") # marker_speed = np.array(marker_speed) # # Checking for outliers based on "median absolute deviation" # outliers = is_outlier(marker_speed, thresh=3.5) # print sum(outliers == True), 'outliers detected' # # removing outliers # outlier_inds = [i for i in xrange(outliers.size) if outliers[i]] # marker_speed = list(np.delete(marker_speed, outlier_inds)) # # replacing removed outliers by average of their neighbours # outliers_inds = sorted(outlier_inds) # window = 1 # for ind in outlier_inds: # start = max(ind-window, 0) # neighbours = marker_speed[start:ind+window] # new_val = sum(neighbours)/len(neighbours) # marker_speed.insert(ind, new_val) # marker_speed = np.array(marker_speed) # # smoothed_signal = marker_speed[:] # smoothed_signal = signal.medfilt(marker_speed, 13) # # smoothed_signal = g1d(marker_speed, sigma=2) # # smoothed_signal = moving_average(smoothed_signal, 7) # model.fit(map(lambda e: [e], smoothed_signal)) # labels = fix_labels(model.labels_, window=1, outliers = outliers) # outliers = map(lambda e: 10 if e else 5, outliers) # mean = np.mean(smoothed_signal) # intervals, nm = find_intervals(labels, mean, smoothed_signal) # print '>', len(intervals), 'Intervals found in total.', nm, 'gaze intervals.' # interval_display = [] # for dur in intervals: # interval_display.extend([dur[2]]*(dur[1]-dur[0]+1)) # interval_display = interval_display[:-1] # print len(interval_display), len(marker_data)-1, intervals[-1][1]-intervals[0][0] # # print intervals # # print labels # # return # # print len(marker_data), len(marker_speed) # plt.plot(range(len(marker_data)-1), marker_speed, 'b', # # range(len(marker_data)-1), labels, 'r', # range(len(marker_data)-1), smoothed_signal, 'g', # range(len(marker_data)-1), interval_display, 'r') # # plt.show() # # plt.clf() # # return # # plt.clf() # recordings_processed += 1 # intervals_okay = True # if not nm in [16, 25]: # intervals_okay = False # pdb.set_trace() # print '> Storing odd figure...' # plt.savefig('./temp/%s-%s__%snm.png' % (d1, d3, str(nm))) # # print '> Entering manual override mode...' # # print '> Enter halt to quit.' # # # set manual_bypass to True in case you wanna discard changes in override mode # # cmd = raw_input(':') # # while cmd != 'halt' and cmd != 'pass': # # exec cmd in globals(), locals() # # cmd = raw_input(':') # if intervals_okay: # print '> Intervals seem okay.' # plt.savefig(os.path.join(d4, 'marker_motion.png')) # recordings_successful += 1 # # Store interval information # # Use pp_by_frame and marker_data to compute gaze and target points corresponding to this interval # gaze_intervals = intervals[::2] # starting from the first interval, gaze, moving, gaze, moving, gaze, ... # t2d, t3d, p = [], [], [] # for intv in gaze_intervals: # s, e = intv[0], intv[1] # null_gaze, null_marker = 0, 0 # gaze_point = np.array([0, 0]) # marker_3d_position = np.array([0, 0, 0]) # marker_2d_position = np.array([0, 0]) # for fn in xrange(s, e+1): # if all(pp_by_frame[fn]==np.array([-1, -1])): # null_gaze += 1 # else: # gaze_point = gaze_point + pp_by_frame[fn] # if mrdata[fn] == []: # null_marker += 1 # else: # marker_3d_position = marker_3d_position + np.array(mrdata[fn][:3]) # marker_2d_position = marker_2d_position + np.array(mrdata[fn][3:]) # gaze_point = gaze_point/(e-s+1-null_gaze) # marker_3d_position = marker_3d_position/(e-s+1-null_marker) # marker_2d_position = marker_2d_position/(e-s+1-null_marker) # t2d.append(marker_2d_position) # t3d.append(marker_3d_position) # p.append(gaze_point) # print '> Storing intervals, gaze data, and marker data...' # np.save(intervals_dir, np.array(gaze_intervals)) # np.save(os.path.join(d4, 'p.npy'), np.array(p)) # np.save(os.path.join(d4, 't2d.npy'), np.array(t2d)) # np.save(os.path.join(d4, 't3d.npy'), np.array(t3d)) # print '>', recordings_processed, 'recordings processed.', recordings_successful, 'successful.' # plt.clf() PARTICIPANTS = ['p10', 'p16', 'p13', 'p24', 'p5', 'p14', 'p26', 'p12', 'p20', 'p7', 'p15', 'p11', 'p21', 'p25'] def main(force=False): recordings_processed = 0 recordings_successful = 0 for d1 in os.listdir(ROOT_DATA_DIR): if d1.startswith('p'): # every participant if not d1 in PARTICIPANTS: continue d2 = os.path.join(ROOT_DATA_DIR, d1) # .../pi/ d2 = os.path.join(d2, os.listdir(d2)[0]) # .../pi/../ for d3 in os.listdir(d2): # every recording d4 = os.path.join(d2, d3) # .../pi/../00X/ print '> Processing', d4 frames_dir = os.path.join(d4, '_aruco_frames.npy') if not os.path.isfile(frames_dir): # the recording is not yet processed for marker tracking print '> Recording does not contain marker data...' continue intervals_dir = os.path.join(d4, 'gaze_intervals.npy') if os.path.isfile(intervals_dir): print '> Recording already processed...' if force: print '> Processing again...' else: continue marker_data = np.load(frames_dir) # marker_data includes data on tracked markers per frame # it's a list with as many entries as the number of video frames, each entry # has a list of tracked markers, each marker item has marker id, marker corners, Rvec, Tvec wt = np.load(os.path.join(d4, 'world_timestamps.npy')) # Processing pupil positions pp = np.load(os.path.join(d4, 'pupil_positions.npy')) # timestamp confidence id pos_x pos_y diameter # pos_x and pos_y are normalized (Origin 0,0 at the bottom left and 1,1 at the top right) # converting each element to dictionary for correlation pp = map(lambda e: dict(zip(['timestamp', 'conf', 'id', 'x', 'y', 'diam'], e)), pp) pp_by_frame = player_methods.correlate_data(pp, wt) # Keeping only pupil positions with nonzero confidence pp_by_frame = map(lambda l: filter(lambda p: p['conf']>0, l), pp_by_frame) # Computing a single pupil position for the frame by taking mean of all detected pupil positions pp_by_frame = map(lambda data: sum(np.array([pp['x'], pp['y']]) for pp in data)/len(data) if data else np.array([-1, -1]), pp_by_frame) # Now each nonempty value of pp_by_frame is a tuple of (x, y) for pupil position in that frame # Checking if timestamps, markers per frame and pupil positions per frame are correlated assert len(marker_data) == len(wt) == len(pp_by_frame) # Good, now we need to find the frame ranges in which marker is not moving, for that we need the marker_data # and using the position info per frame, we can compute movement speed and detect when it is it almost zero marker_data, mrdata = unifiy_markers_per_frame(marker_data) gaze_intervals = np.load(intervals_dir) recordings_processed += 1 intervals_okay = True if intervals_okay: print '> Intervals seem okay.' recordings_successful += 1 t2d, t3d, p = [], [], [] t2d_med, t3d_med, p_med, p_frames = [], [], [], [] for intv in gaze_intervals: s, e = intv[0], intv[1] null_gaze, null_marker = 0, 0 gaze_point = np.array([0, 0]) marker_3d_position = np.array([0, 0, 0]) marker_2d_position = np.array([0, 0]) gpts, m3ds, m2ds = [], [], [] valid_frames = [] for fn in xrange(s, e+1): if all(pp_by_frame[fn]==np.array([-1, -1])) or mrdata[fn] == []: # either pupil detection failed or marker detection # the whole pupil-marker correspondence is invalid # ignore this frame pass else: gpts.append(pp_by_frame[fn]) marker_3d_position = marker_3d_position + np.array(mrdata[fn][:3]) marker_2d_position = marker_2d_position + np.array(mrdata[fn][3:]) m3ds.append(np.array(mrdata[fn][:3])) m2ds.append(np.array(mrdata[fn][3:])) valid_frames.append(fn) if not len(valid_frames): # this marker-pupil correspondece failed print '> Failed to find reliable correspondece for a marker position...' # In this case participant data should be completely ignored # retrun # Computing the median pupil position final_p = np.median(gpts, axis=0) p_med.append(final_p) # Finding the closest pupil position to this median in the valid frames dists = map(lambda pupil_position: (v(pupil_position)-v(final_p)).mag, gpts) dists = zip(range(len(gpts)), dists) closest = min(dists, key=lambda pair:pair[1]) # Getting the index for this position ind = closest[0] # Finding the k nearest pupil position to this one k = 3 while True: try: nbrs = knn(n_neighbors=k, algorithm='ball_tree').fit(gpts) dists, indices = nbrs.kneighbors(gpts) break except ValueError, err: k-=1 nearest_ind = indices[ind] frames_numbers = map(lambda i: valid_frames[i], nearest_ind) p_frames.append(frames_numbers) # Now we take eye images from these frames # Also the pupil-marker correspondece is now final_p and m2ds[ind] m3d[ind] t2d_med.append(m2ds[ind]) t3d_med.append(m3ds[ind]) # t2d_med.append(np.median(m2ds, axis=0)) # t3d_med.append(np.median(m3ds, axis=0)) print '> gaze and marker data...' # np.save(intervals_dir, np.array(gaze_intervals)) np.save(os.path.join(d4, 'p_frames.npy'), np.array(p_frames)) # np.save(os.path.join(d4, 'p.npy'), np.array(p_med)) # np.save(os.path.join(d4, 't2d.npy'), np.array(t2d_med)) # np.save(os.path.join(d4, 't3d.npy'), np.array(t3d_med)) print '>', recordings_processed, 'recordings processed.', recordings_successful, 'successful.' if __name__ == '__main__': main(force=True)