454 lines
No EOL
19 KiB
Python
454 lines
No EOL
19 KiB
Python
from __future__ import division
|
|
'''
|
|
For each experiment, this script tracks movement of the marker in the video from the information in aruco_frames.npy
|
|
It then correlates this information with gaze data from pupil_positions.npy
|
|
finally, for every target in the video (25 targets in calibration, 16 in test), it maps 3D marker position (mean position over the duration of pause)
|
|
to the gaze position (mean position over the pause duration) and stores this info together with the projected 2D marker position in a separate npy file.
|
|
the resulting file contains the ground truth data for this experiment.
|
|
'''
|
|
import os, sys
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from pylab import rcParams
|
|
from scipy.ndimage.filters import gaussian_filter1d as g1d
|
|
from scipy import signal
|
|
|
|
from sklearn.neighbors import NearestNeighbors as knn
|
|
|
|
# from sklearn import svm
|
|
from sklearn.cluster import AgglomerativeClustering
|
|
|
|
from tracker import readCameraParams, Marker
|
|
from util.tools import is_outlier, moving_average
|
|
|
|
sys.path.append('..') # so we can import from pupil
|
|
from pupil import player_methods
|
|
from vector import Vector as v
|
|
|
|
import pdb
|
|
|
|
ROOT_DATA_DIR = '/home/mmbrian/HiWi/etra2016_mohsen/code/recording/data/participants'
|
|
|
|
def unifiy_markers_per_frame(marker_data):
|
|
'''
|
|
Since ArUco sometimes detects a marker twice in a frame, we need to either ignore one or somehow compute their mean.
|
|
Also this method maps each final marker to its center's 3D and 2D position wrt scene camera
|
|
'''
|
|
camera_matrix, dist_coeffs = readCameraParams() # in case of relocating camera.yml input the new path as cam_math
|
|
mdata, mrdata = [], []
|
|
for fn in xrange(len(marker_data)):
|
|
if len(marker_data[fn]) > 0:
|
|
markers = map(lambda m: Marker.fromList(m), marker_data[fn])
|
|
markers = map(lambda m: np.array([np.array(m.getCenter()),
|
|
np.array(m.getCenterInImage(camera_matrix, dist_coeffs))]), markers)
|
|
marker = sum(markers)/len(markers)
|
|
marker = [marker[0][0], marker[0][1], marker[0][2], marker[1][0], marker[1][1]]
|
|
# marker_data[fn] = marker
|
|
mdata.append(marker)
|
|
mrdata.append(marker)
|
|
else: # if marker is not detected, assign last detected position to this frame
|
|
# marker_data[fn] = marker_data[fn-1]
|
|
mdata.append(mdata[fn-1])
|
|
mrdata.append([]) # this contains real marker information (all tracked positions)
|
|
# return marker_data
|
|
return np.array(mdata), mrdata
|
|
|
|
def fix_labels(labels, window = 2, elements = [0, 1], outliers = []):
|
|
labels = list(labels)
|
|
for i in xrange(window, len(labels)-window):
|
|
neighborhood = labels[i-window:i+window+1]
|
|
if outliers[i]: # removing this label from decision making
|
|
neighborhood = neighborhood[:i] + neighborhood[i+1:]
|
|
element_counts = [list(neighborhood).count(e) for e in elements]
|
|
dominant_element = elements[element_counts.index(max(element_counts))]
|
|
labels[i] = dominant_element
|
|
return labels
|
|
|
|
def find_intervals(labels, mean, marker_speed):
|
|
'''
|
|
Given the label information of frame to frame motion speed, this method returns the frame
|
|
intervals for which the marker is either "moving" or "not moving"
|
|
Notice that len(labels) equals the number of frames minus one
|
|
'''
|
|
nm_label = labels[0]
|
|
intervals = []
|
|
curr_label, start, end = -1, -1, -1
|
|
not_moving = 0
|
|
for i in xrange(len(labels)):
|
|
if curr_label < 0: # first label
|
|
curr_label = labels[i]
|
|
start = i
|
|
else:
|
|
if labels[i] != curr_label: # label changed
|
|
end = i
|
|
intervals.append([start, end, curr_label])
|
|
if curr_label == nm_label: not_moving+=1
|
|
curr_label = labels[i]
|
|
start = i+1
|
|
end = len(labels)
|
|
intervals.append([start, end, curr_label])
|
|
if curr_label == nm_label: not_moving+=1
|
|
|
|
# Now we do a post check to see if two non moving intervals are very close to each other,
|
|
# the middle interval is most likely a misclassification
|
|
# computing average interval length for moving intervals
|
|
if (len(intervals) > 49 and not_moving > 25) or (len(intervals)>31 and not_moving>16):
|
|
ret = merge_intervals(intervals, nm_label, mean, marker_speed, remove_outliers=True)
|
|
return ret, sum(1 for e in ret if e[2] == nm_label)
|
|
else:
|
|
return intervals, not_moving
|
|
|
|
|
|
|
|
def merge_intervals(intervals, nm_label, mean, marker_speed, remove_outliers=True):
|
|
mlength = np.array([seg[1] - seg[0] for seg in intervals if seg[2] != nm_label])
|
|
nmlength = np.array([seg[1] - seg[0] for seg in intervals if seg[2] == nm_label])
|
|
if remove_outliers:
|
|
mlength_outliers = mlength[is_outlier(mlength, thresh=3.5)]
|
|
avg_m_length = (sum(mlength)-sum(mlength_outliers))/(mlength.size - mlength_outliers.size)
|
|
|
|
nmlength_outliers = nmlength[is_outlier(nmlength, thresh=3.5)]
|
|
avg_nm_length = (sum(nmlength)-sum(nmlength_outliers))/(nmlength.size - nmlength_outliers.size)
|
|
else:
|
|
avg_m_length = sum(mlength)/mlength.size
|
|
avg_nm_length = sum(nmlength)/nmlength.size
|
|
|
|
thresh = 3.5 # removes a moving interval if average length is at least this time larger than its length
|
|
i = 1
|
|
ret = []
|
|
ret.append(intervals[0])
|
|
while i < len(intervals):
|
|
length = intervals[i][1] - intervals[i][0]
|
|
ratio, label = 1, intervals[i][2]
|
|
if label == nm_label:
|
|
ratio = avg_nm_length/length
|
|
else:
|
|
ratio = avg_m_length/length
|
|
if ratio>=thresh: # average length is at least 2 times larger than the length of this interval
|
|
# replace this interval by merge the two not moving intervals around it
|
|
# check if average of elements in this interval is greater than mean
|
|
if np.mean(marker_speed[intervals[i][0]:intervals[i][1]]) < mean:
|
|
last_intv = ret.pop()
|
|
ret.append([last_intv[0], intervals[i+1][1], 1-label])
|
|
print 'Merged two intervals'
|
|
i+=2
|
|
continue
|
|
else:
|
|
pass
|
|
ret.append(intervals[i])
|
|
i+=1
|
|
return ret
|
|
|
|
|
|
# def main(force=False):
|
|
# rcParams['figure.figsize'] = 15, 7
|
|
# recordings_processed = 0
|
|
# recordings_successful = 0
|
|
# for d1 in os.listdir(ROOT_DATA_DIR):
|
|
# if d1.startswith('p'): # every participant
|
|
# d2 = os.path.join(ROOT_DATA_DIR, d1) # .../pi/
|
|
# d2 = os.path.join(d2, os.listdir(d2)[0]) # .../pi/../
|
|
# for d3 in os.listdir(d2): # every recording
|
|
# d4 = os.path.join(d2, d3) # .../pi/../00X/
|
|
# print '> Processing', d4
|
|
# frames_dir = os.path.join(d4, '_aruco_frames.npy')
|
|
# if not os.path.isfile(frames_dir): # the recording is not yet processed for marker tracking
|
|
# print '> Recording does not contain marker data...'
|
|
# continue
|
|
# intervals_dir = os.path.join(d4, 'gaze_intervals.npy')
|
|
# if os.path.isfile(intervals_dir):
|
|
# print '> Recording already processed...'
|
|
# if force:
|
|
# print '> Processing again...'
|
|
# else:
|
|
# continue
|
|
|
|
# marker_data = np.load(frames_dir)
|
|
# # marker_data includes data on tracked markers per frame
|
|
# # it's a list with as many entries as the number of video frames, each entry
|
|
# # has a list of tracked markers, each marker item has marker id, marker corners, Rvec, Tvec
|
|
# wt = np.load(os.path.join(d4, 'world_timestamps.npy'))
|
|
# # Processing pupil positions
|
|
# pp = np.load(os.path.join(d4, 'pupil_positions.npy')) # timestamp confidence id pos_x pos_y diameter
|
|
# # pos_x and pos_y are normalized (Origin 0,0 at the bottom left and 1,1 at the top right)
|
|
# # converting each element to dictionary for correlation
|
|
# pp = map(lambda e: dict(zip(['timestamp', 'conf', 'id', 'x', 'y', 'diam'], e)), pp)
|
|
# pp_by_frame = player_methods.correlate_data(pp, wt)
|
|
|
|
# # Keeping only pupil positions with nonzero confidence
|
|
# pp_by_frame = map(lambda l: filter(lambda p: p['conf']>0, l), pp_by_frame)
|
|
# # Computing a single pupil position for the frame by taking mean of all detected pupil positions
|
|
# pp_by_frame = map(lambda data:
|
|
# sum(np.array([pp['x'], pp['y']]) for pp in data)/len(data) if data else np.array([-1, -1]), pp_by_frame)
|
|
# # Now each nonempty value of pp_by_frame is a tuple of (x, y) for pupil position in that frame
|
|
|
|
# # Checking if timestamps, markers per frame and pupil positions per frame are correlated
|
|
# assert len(marker_data) == len(wt) == len(pp_by_frame)
|
|
|
|
# # Good, now we need to find the frame ranges in which marker is not moving, for that we need the marker_data
|
|
# # and using the position info per frame, we can compute movement speed and detect when it is it almost zero
|
|
|
|
# marker_data, mrdata = unifiy_markers_per_frame(marker_data)
|
|
# # Smoothing x and y coords
|
|
# marker_data[:, 3] = g1d(marker_data[:, 3], sigma=2)
|
|
# marker_data[:, 4] = g1d(marker_data[:, 4], sigma=2)
|
|
|
|
# marker_speed = []
|
|
# for fn, fnp1 in ((f, f+1) for f in xrange(len(marker_data)-1)):
|
|
# if marker_data[fnp1] != [] and marker_data[fn] != []:
|
|
# # dx = marker_data[fnp1][0] - marker_data[fn][0]
|
|
# # dy = marker_data[fnp1][1] - marker_data[fn][1]
|
|
# # dz = marker_data[fnp1][2] - marker_data[fn][2]
|
|
# # speed = np.sqrt(dx**2 + dy**2 + dz**2) * 100
|
|
|
|
# # print fn, fnp1, len(marker_data), marker_data[fnp1], marker_data[fn]
|
|
# dx = marker_data[fnp1][3] - marker_data[fn][3]
|
|
# dy = marker_data[fnp1][4] - marker_data[fn][4]
|
|
# speed = np.sqrt(dx**2 + dy**2)
|
|
|
|
# # print 'marker speed:', speed
|
|
# marker_speed.append(speed)
|
|
# else:
|
|
# marker_speed.append(marker_speed[-1]) # set speed to last speed if marker could not be detected
|
|
# # Performing binary clustering on marker speed
|
|
# model = AgglomerativeClustering(n_clusters=2, linkage="ward", affinity="euclidean")
|
|
# marker_speed = np.array(marker_speed)
|
|
# # Checking for outliers based on "median absolute deviation"
|
|
# outliers = is_outlier(marker_speed, thresh=3.5)
|
|
# print sum(outliers == True), 'outliers detected'
|
|
|
|
# # removing outliers
|
|
# outlier_inds = [i for i in xrange(outliers.size) if outliers[i]]
|
|
# marker_speed = list(np.delete(marker_speed, outlier_inds))
|
|
# # replacing removed outliers by average of their neighbours
|
|
# outliers_inds = sorted(outlier_inds)
|
|
# window = 1
|
|
# for ind in outlier_inds:
|
|
# start = max(ind-window, 0)
|
|
# neighbours = marker_speed[start:ind+window]
|
|
# new_val = sum(neighbours)/len(neighbours)
|
|
# marker_speed.insert(ind, new_val)
|
|
# marker_speed = np.array(marker_speed)
|
|
|
|
# # smoothed_signal = marker_speed[:]
|
|
# smoothed_signal = signal.medfilt(marker_speed, 13)
|
|
# # smoothed_signal = g1d(marker_speed, sigma=2)
|
|
# # smoothed_signal = moving_average(smoothed_signal, 7)
|
|
# model.fit(map(lambda e: [e], smoothed_signal))
|
|
# labels = fix_labels(model.labels_, window=1, outliers = outliers)
|
|
# outliers = map(lambda e: 10 if e else 5, outliers)
|
|
|
|
# mean = np.mean(smoothed_signal)
|
|
|
|
# intervals, nm = find_intervals(labels, mean, smoothed_signal)
|
|
# print '>', len(intervals), 'Intervals found in total.', nm, 'gaze intervals.'
|
|
# interval_display = []
|
|
# for dur in intervals:
|
|
# interval_display.extend([dur[2]]*(dur[1]-dur[0]+1))
|
|
# interval_display = interval_display[:-1]
|
|
|
|
# print len(interval_display), len(marker_data)-1, intervals[-1][1]-intervals[0][0]
|
|
# # print intervals
|
|
# # print labels
|
|
# # return
|
|
# # print len(marker_data), len(marker_speed)
|
|
# plt.plot(range(len(marker_data)-1), marker_speed, 'b',
|
|
# # range(len(marker_data)-1), labels, 'r',
|
|
# range(len(marker_data)-1), smoothed_signal, 'g',
|
|
# range(len(marker_data)-1), interval_display, 'r')
|
|
# # plt.show()
|
|
# # plt.clf()
|
|
# # return
|
|
# # plt.clf()
|
|
|
|
|
|
# recordings_processed += 1
|
|
# intervals_okay = True
|
|
# if not nm in [16, 25]:
|
|
# intervals_okay = False
|
|
# pdb.set_trace()
|
|
# print '> Storing odd figure...'
|
|
# plt.savefig('./temp/%s-%s__%snm.png' % (d1, d3, str(nm)))
|
|
# # print '> Entering manual override mode...'
|
|
# # print '> Enter halt to quit.'
|
|
# # # set manual_bypass to True in case you wanna discard changes in override mode
|
|
# # cmd = raw_input(':')
|
|
# # while cmd != 'halt' and cmd != 'pass':
|
|
# # exec cmd in globals(), locals()
|
|
# # cmd = raw_input(':')
|
|
|
|
# if intervals_okay:
|
|
# print '> Intervals seem okay.'
|
|
# plt.savefig(os.path.join(d4, 'marker_motion.png'))
|
|
# recordings_successful += 1
|
|
# # Store interval information
|
|
# # Use pp_by_frame and marker_data to compute gaze and target points corresponding to this interval
|
|
# gaze_intervals = intervals[::2] # starting from the first interval, gaze, moving, gaze, moving, gaze, ...
|
|
# t2d, t3d, p = [], [], []
|
|
# for intv in gaze_intervals:
|
|
# s, e = intv[0], intv[1]
|
|
# null_gaze, null_marker = 0, 0
|
|
# gaze_point = np.array([0, 0])
|
|
# marker_3d_position = np.array([0, 0, 0])
|
|
# marker_2d_position = np.array([0, 0])
|
|
# for fn in xrange(s, e+1):
|
|
# if all(pp_by_frame[fn]==np.array([-1, -1])):
|
|
# null_gaze += 1
|
|
# else:
|
|
# gaze_point = gaze_point + pp_by_frame[fn]
|
|
# if mrdata[fn] == []:
|
|
# null_marker += 1
|
|
# else:
|
|
# marker_3d_position = marker_3d_position + np.array(mrdata[fn][:3])
|
|
# marker_2d_position = marker_2d_position + np.array(mrdata[fn][3:])
|
|
|
|
# gaze_point = gaze_point/(e-s+1-null_gaze)
|
|
# marker_3d_position = marker_3d_position/(e-s+1-null_marker)
|
|
# marker_2d_position = marker_2d_position/(e-s+1-null_marker)
|
|
|
|
# t2d.append(marker_2d_position)
|
|
# t3d.append(marker_3d_position)
|
|
# p.append(gaze_point)
|
|
# print '> Storing intervals, gaze data, and marker data...'
|
|
# np.save(intervals_dir, np.array(gaze_intervals))
|
|
# np.save(os.path.join(d4, 'p.npy'), np.array(p))
|
|
# np.save(os.path.join(d4, 't2d.npy'), np.array(t2d))
|
|
# np.save(os.path.join(d4, 't3d.npy'), np.array(t3d))
|
|
# print '>', recordings_processed, 'recordings processed.', recordings_successful, 'successful.'
|
|
|
|
# plt.clf()
|
|
|
|
|
|
PARTICIPANTS = ['p10', 'p16', 'p13', 'p24', 'p5', 'p14', 'p26', 'p12', 'p20', 'p7', 'p15', 'p11', 'p21', 'p25']
|
|
def main(force=False):
|
|
recordings_processed = 0
|
|
recordings_successful = 0
|
|
for d1 in os.listdir(ROOT_DATA_DIR):
|
|
if d1.startswith('p'): # every participant
|
|
if not d1 in PARTICIPANTS:
|
|
continue
|
|
|
|
d2 = os.path.join(ROOT_DATA_DIR, d1) # .../pi/
|
|
d2 = os.path.join(d2, os.listdir(d2)[0]) # .../pi/../
|
|
for d3 in os.listdir(d2): # every recording
|
|
d4 = os.path.join(d2, d3) # .../pi/../00X/
|
|
print '> Processing', d4
|
|
frames_dir = os.path.join(d4, '_aruco_frames.npy')
|
|
if not os.path.isfile(frames_dir): # the recording is not yet processed for marker tracking
|
|
print '> Recording does not contain marker data...'
|
|
continue
|
|
intervals_dir = os.path.join(d4, 'gaze_intervals.npy')
|
|
if os.path.isfile(intervals_dir):
|
|
print '> Recording already processed...'
|
|
if force:
|
|
print '> Processing again...'
|
|
else:
|
|
continue
|
|
|
|
marker_data = np.load(frames_dir)
|
|
# marker_data includes data on tracked markers per frame
|
|
# it's a list with as many entries as the number of video frames, each entry
|
|
# has a list of tracked markers, each marker item has marker id, marker corners, Rvec, Tvec
|
|
wt = np.load(os.path.join(d4, 'world_timestamps.npy'))
|
|
# Processing pupil positions
|
|
pp = np.load(os.path.join(d4, 'pupil_positions.npy')) # timestamp confidence id pos_x pos_y diameter
|
|
# pos_x and pos_y are normalized (Origin 0,0 at the bottom left and 1,1 at the top right)
|
|
# converting each element to dictionary for correlation
|
|
pp = map(lambda e: dict(zip(['timestamp', 'conf', 'id', 'x', 'y', 'diam'], e)), pp)
|
|
pp_by_frame = player_methods.correlate_data(pp, wt)
|
|
|
|
# Keeping only pupil positions with nonzero confidence
|
|
pp_by_frame = map(lambda l: filter(lambda p: p['conf']>0, l), pp_by_frame)
|
|
# Computing a single pupil position for the frame by taking mean of all detected pupil positions
|
|
pp_by_frame = map(lambda data:
|
|
sum(np.array([pp['x'], pp['y']]) for pp in data)/len(data) if data else np.array([-1, -1]), pp_by_frame)
|
|
# Now each nonempty value of pp_by_frame is a tuple of (x, y) for pupil position in that frame
|
|
|
|
# Checking if timestamps, markers per frame and pupil positions per frame are correlated
|
|
assert len(marker_data) == len(wt) == len(pp_by_frame)
|
|
|
|
# Good, now we need to find the frame ranges in which marker is not moving, for that we need the marker_data
|
|
# and using the position info per frame, we can compute movement speed and detect when it is it almost zero
|
|
|
|
marker_data, mrdata = unifiy_markers_per_frame(marker_data)
|
|
|
|
gaze_intervals = np.load(intervals_dir)
|
|
|
|
recordings_processed += 1
|
|
intervals_okay = True
|
|
|
|
if intervals_okay:
|
|
print '> Intervals seem okay.'
|
|
recordings_successful += 1
|
|
|
|
t2d, t3d, p = [], [], []
|
|
t2d_med, t3d_med, p_med, p_frames = [], [], [], []
|
|
for intv in gaze_intervals:
|
|
s, e = intv[0], intv[1]
|
|
null_gaze, null_marker = 0, 0
|
|
gaze_point = np.array([0, 0])
|
|
marker_3d_position = np.array([0, 0, 0])
|
|
marker_2d_position = np.array([0, 0])
|
|
gpts, m3ds, m2ds = [], [], []
|
|
valid_frames = []
|
|
for fn in xrange(s, e+1):
|
|
if all(pp_by_frame[fn]==np.array([-1, -1])) or mrdata[fn] == []:
|
|
# either pupil detection failed or marker detection
|
|
# the whole pupil-marker correspondence is invalid
|
|
# ignore this frame
|
|
pass
|
|
else:
|
|
gpts.append(pp_by_frame[fn])
|
|
|
|
marker_3d_position = marker_3d_position + np.array(mrdata[fn][:3])
|
|
marker_2d_position = marker_2d_position + np.array(mrdata[fn][3:])
|
|
|
|
m3ds.append(np.array(mrdata[fn][:3]))
|
|
m2ds.append(np.array(mrdata[fn][3:]))
|
|
|
|
valid_frames.append(fn)
|
|
|
|
if not len(valid_frames):
|
|
# this marker-pupil correspondece failed
|
|
print '> Failed to find reliable correspondece for a marker position...'
|
|
# In this case participant data should be completely ignored
|
|
# retrun
|
|
|
|
# Computing the median pupil position
|
|
final_p = np.median(gpts, axis=0)
|
|
p_med.append(final_p)
|
|
# Finding the closest pupil position to this median in the valid frames
|
|
dists = map(lambda pupil_position: (v(pupil_position)-v(final_p)).mag, gpts)
|
|
dists = zip(range(len(gpts)), dists)
|
|
closest = min(dists, key=lambda pair:pair[1])
|
|
# Getting the index for this position
|
|
ind = closest[0]
|
|
# Finding the k nearest pupil position to this one
|
|
k = 3
|
|
while True:
|
|
try:
|
|
nbrs = knn(n_neighbors=k, algorithm='ball_tree').fit(gpts)
|
|
dists, indices = nbrs.kneighbors(gpts)
|
|
break
|
|
except ValueError, err:
|
|
k-=1
|
|
nearest_ind = indices[ind]
|
|
frames_numbers = map(lambda i: valid_frames[i], nearest_ind)
|
|
p_frames.append(frames_numbers)
|
|
# Now we take eye images from these frames
|
|
# Also the pupil-marker correspondece is now final_p and m2ds[ind] m3d[ind]
|
|
t2d_med.append(m2ds[ind])
|
|
t3d_med.append(m3ds[ind])
|
|
# t2d_med.append(np.median(m2ds, axis=0))
|
|
# t3d_med.append(np.median(m3ds, axis=0))
|
|
|
|
|
|
print '> gaze and marker data...'
|
|
# np.save(intervals_dir, np.array(gaze_intervals))
|
|
np.save(os.path.join(d4, 'p_frames.npy'), np.array(p_frames))
|
|
# np.save(os.path.join(d4, 'p.npy'), np.array(p_med))
|
|
# np.save(os.path.join(d4, 't2d.npy'), np.array(t2d_med))
|
|
# np.save(os.path.join(d4, 't3d.npy'), np.array(t3d_med))
|
|
print '>', recordings_processed, 'recordings processed.', recordings_successful, 'successful.'
|
|
if __name__ == '__main__':
|
|
main(force=True) |