gazesim/code/recording/process_recordings.py

454 lines
19 KiB
Python

from __future__ import division
'''
For each experiment, this script tracks movement of the marker in the video from the information in aruco_frames.npy
It then correlates this information with gaze data from pupil_positions.npy
finally, for every target in the video (25 targets in calibration, 16 in test), it maps 3D marker position (mean position over the duration of pause)
to the gaze position (mean position over the pause duration) and stores this info together with the projected 2D marker position in a separate npy file.
the resulting file contains the ground truth data for this experiment.
'''
import os, sys
import numpy as np
import matplotlib.pyplot as plt
from pylab import rcParams
from scipy.ndimage.filters import gaussian_filter1d as g1d
from scipy import signal
from sklearn.neighbors import NearestNeighbors as knn
# from sklearn import svm
from sklearn.cluster import AgglomerativeClustering
from tracker import readCameraParams, Marker
from util.tools import is_outlier, moving_average
sys.path.append('..') # so we can import from pupil
from pupil import player_methods
from vector import Vector as v
import pdb
ROOT_DATA_DIR = '/home/mmbrian/HiWi/etra2016_mohsen/code/recording/data/participants'
def unifiy_markers_per_frame(marker_data):
'''
Since ArUco sometimes detects a marker twice in a frame, we need to either ignore one or somehow compute their mean.
Also this method maps each final marker to its center's 3D and 2D position wrt scene camera
'''
camera_matrix, dist_coeffs = readCameraParams() # in case of relocating camera.yml input the new path as cam_math
mdata, mrdata = [], []
for fn in xrange(len(marker_data)):
if len(marker_data[fn]) > 0:
markers = map(lambda m: Marker.fromList(m), marker_data[fn])
markers = map(lambda m: np.array([np.array(m.getCenter()),
np.array(m.getCenterInImage(camera_matrix, dist_coeffs))]), markers)
marker = sum(markers)/len(markers)
marker = [marker[0][0], marker[0][1], marker[0][2], marker[1][0], marker[1][1]]
# marker_data[fn] = marker
mdata.append(marker)
mrdata.append(marker)
else: # if marker is not detected, assign last detected position to this frame
# marker_data[fn] = marker_data[fn-1]
mdata.append(mdata[fn-1])
mrdata.append([]) # this contains real marker information (all tracked positions)
# return marker_data
return np.array(mdata), mrdata
def fix_labels(labels, window = 2, elements = [0, 1], outliers = []):
labels = list(labels)
for i in xrange(window, len(labels)-window):
neighborhood = labels[i-window:i+window+1]
if outliers[i]: # removing this label from decision making
neighborhood = neighborhood[:i] + neighborhood[i+1:]
element_counts = [list(neighborhood).count(e) for e in elements]
dominant_element = elements[element_counts.index(max(element_counts))]
labels[i] = dominant_element
return labels
def find_intervals(labels, mean, marker_speed):
'''
Given the label information of frame to frame motion speed, this method returns the frame
intervals for which the marker is either "moving" or "not moving"
Notice that len(labels) equals the number of frames minus one
'''
nm_label = labels[0]
intervals = []
curr_label, start, end = -1, -1, -1
not_moving = 0
for i in xrange(len(labels)):
if curr_label < 0: # first label
curr_label = labels[i]
start = i
else:
if labels[i] != curr_label: # label changed
end = i
intervals.append([start, end, curr_label])
if curr_label == nm_label: not_moving+=1
curr_label = labels[i]
start = i+1
end = len(labels)
intervals.append([start, end, curr_label])
if curr_label == nm_label: not_moving+=1
# Now we do a post check to see if two non moving intervals are very close to each other,
# the middle interval is most likely a misclassification
# computing average interval length for moving intervals
if (len(intervals) > 49 and not_moving > 25) or (len(intervals)>31 and not_moving>16):
ret = merge_intervals(intervals, nm_label, mean, marker_speed, remove_outliers=True)
return ret, sum(1 for e in ret if e[2] == nm_label)
else:
return intervals, not_moving
def merge_intervals(intervals, nm_label, mean, marker_speed, remove_outliers=True):
mlength = np.array([seg[1] - seg[0] for seg in intervals if seg[2] != nm_label])
nmlength = np.array([seg[1] - seg[0] for seg in intervals if seg[2] == nm_label])
if remove_outliers:
mlength_outliers = mlength[is_outlier(mlength, thresh=3.5)]
avg_m_length = (sum(mlength)-sum(mlength_outliers))/(mlength.size - mlength_outliers.size)
nmlength_outliers = nmlength[is_outlier(nmlength, thresh=3.5)]
avg_nm_length = (sum(nmlength)-sum(nmlength_outliers))/(nmlength.size - nmlength_outliers.size)
else:
avg_m_length = sum(mlength)/mlength.size
avg_nm_length = sum(nmlength)/nmlength.size
thresh = 3.5 # removes a moving interval if average length is at least this time larger than its length
i = 1
ret = []
ret.append(intervals[0])
while i < len(intervals):
length = intervals[i][1] - intervals[i][0]
ratio, label = 1, intervals[i][2]
if label == nm_label:
ratio = avg_nm_length/length
else:
ratio = avg_m_length/length
if ratio>=thresh: # average length is at least 2 times larger than the length of this interval
# replace this interval by merge the two not moving intervals around it
# check if average of elements in this interval is greater than mean
if np.mean(marker_speed[intervals[i][0]:intervals[i][1]]) < mean:
last_intv = ret.pop()
ret.append([last_intv[0], intervals[i+1][1], 1-label])
print 'Merged two intervals'
i+=2
continue
else:
pass
ret.append(intervals[i])
i+=1
return ret
# def main(force=False):
# rcParams['figure.figsize'] = 15, 7
# recordings_processed = 0
# recordings_successful = 0
# for d1 in os.listdir(ROOT_DATA_DIR):
# if d1.startswith('p'): # every participant
# d2 = os.path.join(ROOT_DATA_DIR, d1) # .../pi/
# d2 = os.path.join(d2, os.listdir(d2)[0]) # .../pi/../
# for d3 in os.listdir(d2): # every recording
# d4 = os.path.join(d2, d3) # .../pi/../00X/
# print '> Processing', d4
# frames_dir = os.path.join(d4, '_aruco_frames.npy')
# if not os.path.isfile(frames_dir): # the recording is not yet processed for marker tracking
# print '> Recording does not contain marker data...'
# continue
# intervals_dir = os.path.join(d4, 'gaze_intervals.npy')
# if os.path.isfile(intervals_dir):
# print '> Recording already processed...'
# if force:
# print '> Processing again...'
# else:
# continue
# marker_data = np.load(frames_dir)
# # marker_data includes data on tracked markers per frame
# # it's a list with as many entries as the number of video frames, each entry
# # has a list of tracked markers, each marker item has marker id, marker corners, Rvec, Tvec
# wt = np.load(os.path.join(d4, 'world_timestamps.npy'))
# # Processing pupil positions
# pp = np.load(os.path.join(d4, 'pupil_positions.npy')) # timestamp confidence id pos_x pos_y diameter
# # pos_x and pos_y are normalized (Origin 0,0 at the bottom left and 1,1 at the top right)
# # converting each element to dictionary for correlation
# pp = map(lambda e: dict(zip(['timestamp', 'conf', 'id', 'x', 'y', 'diam'], e)), pp)
# pp_by_frame = player_methods.correlate_data(pp, wt)
# # Keeping only pupil positions with nonzero confidence
# pp_by_frame = map(lambda l: filter(lambda p: p['conf']>0, l), pp_by_frame)
# # Computing a single pupil position for the frame by taking mean of all detected pupil positions
# pp_by_frame = map(lambda data:
# sum(np.array([pp['x'], pp['y']]) for pp in data)/len(data) if data else np.array([-1, -1]), pp_by_frame)
# # Now each nonempty value of pp_by_frame is a tuple of (x, y) for pupil position in that frame
# # Checking if timestamps, markers per frame and pupil positions per frame are correlated
# assert len(marker_data) == len(wt) == len(pp_by_frame)
# # Good, now we need to find the frame ranges in which marker is not moving, for that we need the marker_data
# # and using the position info per frame, we can compute movement speed and detect when it is it almost zero
# marker_data, mrdata = unifiy_markers_per_frame(marker_data)
# # Smoothing x and y coords
# marker_data[:, 3] = g1d(marker_data[:, 3], sigma=2)
# marker_data[:, 4] = g1d(marker_data[:, 4], sigma=2)
# marker_speed = []
# for fn, fnp1 in ((f, f+1) for f in xrange(len(marker_data)-1)):
# if marker_data[fnp1] != [] and marker_data[fn] != []:
# # dx = marker_data[fnp1][0] - marker_data[fn][0]
# # dy = marker_data[fnp1][1] - marker_data[fn][1]
# # dz = marker_data[fnp1][2] - marker_data[fn][2]
# # speed = np.sqrt(dx**2 + dy**2 + dz**2) * 100
# # print fn, fnp1, len(marker_data), marker_data[fnp1], marker_data[fn]
# dx = marker_data[fnp1][3] - marker_data[fn][3]
# dy = marker_data[fnp1][4] - marker_data[fn][4]
# speed = np.sqrt(dx**2 + dy**2)
# # print 'marker speed:', speed
# marker_speed.append(speed)
# else:
# marker_speed.append(marker_speed[-1]) # set speed to last speed if marker could not be detected
# # Performing binary clustering on marker speed
# model = AgglomerativeClustering(n_clusters=2, linkage="ward", affinity="euclidean")
# marker_speed = np.array(marker_speed)
# # Checking for outliers based on "median absolute deviation"
# outliers = is_outlier(marker_speed, thresh=3.5)
# print sum(outliers == True), 'outliers detected'
# # removing outliers
# outlier_inds = [i for i in xrange(outliers.size) if outliers[i]]
# marker_speed = list(np.delete(marker_speed, outlier_inds))
# # replacing removed outliers by average of their neighbours
# outliers_inds = sorted(outlier_inds)
# window = 1
# for ind in outlier_inds:
# start = max(ind-window, 0)
# neighbours = marker_speed[start:ind+window]
# new_val = sum(neighbours)/len(neighbours)
# marker_speed.insert(ind, new_val)
# marker_speed = np.array(marker_speed)
# # smoothed_signal = marker_speed[:]
# smoothed_signal = signal.medfilt(marker_speed, 13)
# # smoothed_signal = g1d(marker_speed, sigma=2)
# # smoothed_signal = moving_average(smoothed_signal, 7)
# model.fit(map(lambda e: [e], smoothed_signal))
# labels = fix_labels(model.labels_, window=1, outliers = outliers)
# outliers = map(lambda e: 10 if e else 5, outliers)
# mean = np.mean(smoothed_signal)
# intervals, nm = find_intervals(labels, mean, smoothed_signal)
# print '>', len(intervals), 'Intervals found in total.', nm, 'gaze intervals.'
# interval_display = []
# for dur in intervals:
# interval_display.extend([dur[2]]*(dur[1]-dur[0]+1))
# interval_display = interval_display[:-1]
# print len(interval_display), len(marker_data)-1, intervals[-1][1]-intervals[0][0]
# # print intervals
# # print labels
# # return
# # print len(marker_data), len(marker_speed)
# plt.plot(range(len(marker_data)-1), marker_speed, 'b',
# # range(len(marker_data)-1), labels, 'r',
# range(len(marker_data)-1), smoothed_signal, 'g',
# range(len(marker_data)-1), interval_display, 'r')
# # plt.show()
# # plt.clf()
# # return
# # plt.clf()
# recordings_processed += 1
# intervals_okay = True
# if not nm in [16, 25]:
# intervals_okay = False
# pdb.set_trace()
# print '> Storing odd figure...'
# plt.savefig('./temp/%s-%s__%snm.png' % (d1, d3, str(nm)))
# # print '> Entering manual override mode...'
# # print '> Enter halt to quit.'
# # # set manual_bypass to True in case you wanna discard changes in override mode
# # cmd = raw_input(':')
# # while cmd != 'halt' and cmd != 'pass':
# # exec cmd in globals(), locals()
# # cmd = raw_input(':')
# if intervals_okay:
# print '> Intervals seem okay.'
# plt.savefig(os.path.join(d4, 'marker_motion.png'))
# recordings_successful += 1
# # Store interval information
# # Use pp_by_frame and marker_data to compute gaze and target points corresponding to this interval
# gaze_intervals = intervals[::2] # starting from the first interval, gaze, moving, gaze, moving, gaze, ...
# t2d, t3d, p = [], [], []
# for intv in gaze_intervals:
# s, e = intv[0], intv[1]
# null_gaze, null_marker = 0, 0
# gaze_point = np.array([0, 0])
# marker_3d_position = np.array([0, 0, 0])
# marker_2d_position = np.array([0, 0])
# for fn in xrange(s, e+1):
# if all(pp_by_frame[fn]==np.array([-1, -1])):
# null_gaze += 1
# else:
# gaze_point = gaze_point + pp_by_frame[fn]
# if mrdata[fn] == []:
# null_marker += 1
# else:
# marker_3d_position = marker_3d_position + np.array(mrdata[fn][:3])
# marker_2d_position = marker_2d_position + np.array(mrdata[fn][3:])
# gaze_point = gaze_point/(e-s+1-null_gaze)
# marker_3d_position = marker_3d_position/(e-s+1-null_marker)
# marker_2d_position = marker_2d_position/(e-s+1-null_marker)
# t2d.append(marker_2d_position)
# t3d.append(marker_3d_position)
# p.append(gaze_point)
# print '> Storing intervals, gaze data, and marker data...'
# np.save(intervals_dir, np.array(gaze_intervals))
# np.save(os.path.join(d4, 'p.npy'), np.array(p))
# np.save(os.path.join(d4, 't2d.npy'), np.array(t2d))
# np.save(os.path.join(d4, 't3d.npy'), np.array(t3d))
# print '>', recordings_processed, 'recordings processed.', recordings_successful, 'successful.'
# plt.clf()
PARTICIPANTS = ['p10', 'p16', 'p13', 'p24', 'p5', 'p14', 'p26', 'p12', 'p20', 'p7', 'p15', 'p11', 'p21', 'p25']
def main(force=False):
recordings_processed = 0
recordings_successful = 0
for d1 in os.listdir(ROOT_DATA_DIR):
if d1.startswith('p'): # every participant
if not d1 in PARTICIPANTS:
continue
d2 = os.path.join(ROOT_DATA_DIR, d1) # .../pi/
d2 = os.path.join(d2, os.listdir(d2)[0]) # .../pi/../
for d3 in os.listdir(d2): # every recording
d4 = os.path.join(d2, d3) # .../pi/../00X/
print '> Processing', d4
frames_dir = os.path.join(d4, '_aruco_frames.npy')
if not os.path.isfile(frames_dir): # the recording is not yet processed for marker tracking
print '> Recording does not contain marker data...'
continue
intervals_dir = os.path.join(d4, 'gaze_intervals.npy')
if os.path.isfile(intervals_dir):
print '> Recording already processed...'
if force:
print '> Processing again...'
else:
continue
marker_data = np.load(frames_dir)
# marker_data includes data on tracked markers per frame
# it's a list with as many entries as the number of video frames, each entry
# has a list of tracked markers, each marker item has marker id, marker corners, Rvec, Tvec
wt = np.load(os.path.join(d4, 'world_timestamps.npy'))
# Processing pupil positions
pp = np.load(os.path.join(d4, 'pupil_positions.npy')) # timestamp confidence id pos_x pos_y diameter
# pos_x and pos_y are normalized (Origin 0,0 at the bottom left and 1,1 at the top right)
# converting each element to dictionary for correlation
pp = map(lambda e: dict(zip(['timestamp', 'conf', 'id', 'x', 'y', 'diam'], e)), pp)
pp_by_frame = player_methods.correlate_data(pp, wt)
# Keeping only pupil positions with nonzero confidence
pp_by_frame = map(lambda l: filter(lambda p: p['conf']>0, l), pp_by_frame)
# Computing a single pupil position for the frame by taking mean of all detected pupil positions
pp_by_frame = map(lambda data:
sum(np.array([pp['x'], pp['y']]) for pp in data)/len(data) if data else np.array([-1, -1]), pp_by_frame)
# Now each nonempty value of pp_by_frame is a tuple of (x, y) for pupil position in that frame
# Checking if timestamps, markers per frame and pupil positions per frame are correlated
assert len(marker_data) == len(wt) == len(pp_by_frame)
# Good, now we need to find the frame ranges in which marker is not moving, for that we need the marker_data
# and using the position info per frame, we can compute movement speed and detect when it is it almost zero
marker_data, mrdata = unifiy_markers_per_frame(marker_data)
gaze_intervals = np.load(intervals_dir)
recordings_processed += 1
intervals_okay = True
if intervals_okay:
print '> Intervals seem okay.'
recordings_successful += 1
t2d, t3d, p = [], [], []
t2d_med, t3d_med, p_med, p_frames = [], [], [], []
for intv in gaze_intervals:
s, e = intv[0], intv[1]
null_gaze, null_marker = 0, 0
gaze_point = np.array([0, 0])
marker_3d_position = np.array([0, 0, 0])
marker_2d_position = np.array([0, 0])
gpts, m3ds, m2ds = [], [], []
valid_frames = []
for fn in xrange(s, e+1):
if all(pp_by_frame[fn]==np.array([-1, -1])) or mrdata[fn] == []:
# either pupil detection failed or marker detection
# the whole pupil-marker correspondence is invalid
# ignore this frame
pass
else:
gpts.append(pp_by_frame[fn])
marker_3d_position = marker_3d_position + np.array(mrdata[fn][:3])
marker_2d_position = marker_2d_position + np.array(mrdata[fn][3:])
m3ds.append(np.array(mrdata[fn][:3]))
m2ds.append(np.array(mrdata[fn][3:]))
valid_frames.append(fn)
if not len(valid_frames):
# this marker-pupil correspondece failed
print '> Failed to find reliable correspondece for a marker position...'
# In this case participant data should be completely ignored
# retrun
# Computing the median pupil position
final_p = np.median(gpts, axis=0)
p_med.append(final_p)
# Finding the closest pupil position to this median in the valid frames
dists = map(lambda pupil_position: (v(pupil_position)-v(final_p)).mag, gpts)
dists = zip(range(len(gpts)), dists)
closest = min(dists, key=lambda pair:pair[1])
# Getting the index for this position
ind = closest[0]
# Finding the k nearest pupil position to this one
k = 3
while True:
try:
nbrs = knn(n_neighbors=k, algorithm='ball_tree').fit(gpts)
dists, indices = nbrs.kneighbors(gpts)
break
except ValueError, err:
k-=1
nearest_ind = indices[ind]
frames_numbers = map(lambda i: valid_frames[i], nearest_ind)
p_frames.append(frames_numbers)
# Now we take eye images from these frames
# Also the pupil-marker correspondece is now final_p and m2ds[ind] m3d[ind]
t2d_med.append(m2ds[ind])
t3d_med.append(m3ds[ind])
# t2d_med.append(np.median(m2ds, axis=0))
# t3d_med.append(np.median(m3ds, axis=0))
print '> gaze and marker data...'
# np.save(intervals_dir, np.array(gaze_intervals))
np.save(os.path.join(d4, 'p_frames.npy'), np.array(p_frames))
# np.save(os.path.join(d4, 'p.npy'), np.array(p_med))
# np.save(os.path.join(d4, 't2d.npy'), np.array(t2d_med))
# np.save(os.path.join(d4, 't3d.npy'), np.array(t3d_med))
print '>', recordings_processed, 'recordings processed.', recordings_successful, 'successful.'
if __name__ == '__main__':
main(force=True)