gazesim/code/recording/process_recordings.py

from __future__ import division
'''
For each experiment, this script tracks movement of the marker in the video from the information in aruco_frames.npy
It then correlates this information with gaze data from pupil_positions.npy
finally, for every target in the video (25 targets in calibration, 16 in test), it maps 3D marker position (mean position over the duration of pause)
to the gaze position (mean position over the pause duration) and stores this info together with the projected 2D marker position in a separate npy file.
the resulting file contains the ground truth data for this experiment.
'''
import os, sys
import numpy as np
import matplotlib.pyplot as plt
from pylab import rcParams
from scipy.ndimage.filters import gaussian_filter1d as g1d
from scipy import signal

from sklearn.neighbors import NearestNeighbors as knn

# from sklearn import svm
from sklearn.cluster import AgglomerativeClustering

from tracker import readCameraParams, Marker
from util.tools import is_outlier, moving_average

sys.path.append('..') # so we can import from pupil
from pupil import player_methods
from vector import Vector as v

import pdb

ROOT_DATA_DIR = '/home/mmbrian/HiWi/etra2016_mohsen/code/recording/data/participants'

def unifiy_markers_per_frame(marker_data):
	'''
	Since ArUco sometimes detects a marker twice in a frame, we need to either ignore one or somehow compute their mean.
	Also this method maps each final marker to its center's 3D and 2D position wrt scene camera
	'''
	camera_matrix, dist_coeffs = readCameraParams() # in case of relocating camera.yml input the new path as cam_math
	mdata, mrdata = [], []
	for fn in xrange(len(marker_data)):
		if len(marker_data[fn]) > 0:
			markers = map(lambda m: Marker.fromList(m), marker_data[fn])
			markers = map(lambda m: np.array([np.array(m.getCenter()),
											  np.array(m.getCenterInImage(camera_matrix, dist_coeffs))]), markers)
			marker = sum(markers)/len(markers)
			marker = [marker[0][0], marker[0][1], marker[0][2], marker[1][0], marker[1][1]]
			# marker_data[fn] = marker
			mdata.append(marker)
			mrdata.append(marker)
		else: # if marker is not detected, assign last detected position to this frame
			# marker_data[fn] = marker_data[fn-1]
			mdata.append(mdata[fn-1])
			mrdata.append([]) # this contains real marker information (all tracked positions)
	# return marker_data
	return np.array(mdata), mrdata

def fix_labels(labels, window = 2, elements = [0, 1], outliers = []):
	labels = list(labels)
	for i in xrange(window, len(labels)-window):
		neighborhood = labels[i-window:i+window+1]
		if outliers[i]: # removing this label from decision making
			neighborhood = neighborhood[:i] + neighborhood[i+1:]
		element_counts = [list(neighborhood).count(e) for e in elements]
		dominant_element = elements[element_counts.index(max(element_counts))]
		labels[i] = dominant_element
	return labels

def find_intervals(labels, mean, marker_speed):
	'''
	Given the label information of frame to frame motion speed, this method returns the frame
	intervals for which the marker is either "moving" or "not moving"
	Notice that len(labels) equals the number of frames minus one
	'''
	nm_label = labels[0]
	intervals = []
	curr_label, start, end = -1, -1, -1
	not_moving = 0
	for i in xrange(len(labels)):
		if curr_label < 0: # first label
			curr_label = labels[i]
			start = i
		else:
			if labels[i] != curr_label: # label changed
				end = i
				intervals.append([start, end, curr_label])
				if curr_label == nm_label: not_moving+=1
				curr_label = labels[i]
				start = i+1
	end = len(labels)
	intervals.append([start, end, curr_label])
	if curr_label == nm_label: not_moving+=1

	# Now we do a post check to see if two non moving intervals are very close to each other,
	# the middle interval is most likely a misclassification
	# computing average interval length for moving intervals
	if (len(intervals) > 49 and not_moving > 25) or (len(intervals)>31 and not_moving>16):
		ret = merge_intervals(intervals, nm_label, mean, marker_speed, remove_outliers=True)
		return ret, sum(1 for e in ret if e[2] == nm_label)
	else:
		return intervals, not_moving


def merge_intervals(intervals, nm_label, mean, marker_speed, remove_outliers=True):
	mlength = np.array([seg[1] - seg[0] for seg in intervals if seg[2] != nm_label])
	nmlength = np.array([seg[1] - seg[0] for seg in intervals if seg[2] == nm_label])
	if remove_outliers:
		mlength_outliers = mlength[is_outlier(mlength, thresh=3.5)]
		avg_m_length = (sum(mlength)-sum(mlength_outliers))/(mlength.size - mlength_outliers.size)

		nmlength_outliers = nmlength[is_outlier(nmlength, thresh=3.5)]
		avg_nm_length = (sum(nmlength)-sum(nmlength_outliers))/(nmlength.size - nmlength_outliers.size)
	else:
		avg_m_length = sum(mlength)/mlength.size
		avg_nm_length = sum(nmlength)/nmlength.size

	thresh = 3.5 # removes a moving interval if average length is at least this time larger than its length
	i = 1
	ret = []
	ret.append(intervals[0])
	while i < len(intervals):
		length = intervals[i][1] - intervals[i][0]
		ratio, label = 1, intervals[i][2]
		if label == nm_label:
			ratio = avg_nm_length/length
		else:
			ratio = avg_m_length/length
		if ratio>=thresh: # average length is at least 2 times larger than the length of this interval
			# replace this interval by merge the two not moving intervals around it
			# check if average of elements in this interval is greater than mean
			if np.mean(marker_speed[intervals[i][0]:intervals[i][1]]) < mean:
				last_intv = ret.pop()
				ret.append([last_intv[0], intervals[i+1][1], 1-label])
				print 'Merged two intervals'
				i+=2
				continue
		else:
			pass
		ret.append(intervals[i])
		i+=1
	return ret


# def main(force=False):
# 	rcParams['figure.figsize'] = 15, 7
# 	recordings_processed = 0
# 	recordings_successful = 0
# 	for d1 in os.listdir(ROOT_DATA_DIR):
# 		if d1.startswith('p'): # every participant
# 			d2 = os.path.join(ROOT_DATA_DIR, d1) # .../pi/
# 			d2 = os.path.join(d2, os.listdir(d2)[0]) # .../pi/../
# 			for d3 in os.listdir(d2): # every recording
# 				d4 = os.path.join(d2, d3) # .../pi/../00X/
# 				print '> Processing', d4
# 				frames_dir = os.path.join(d4, '_aruco_frames.npy')
# 				if not os.path.isfile(frames_dir): # the recording is not yet processed for marker tracking
# 					print '> Recording does not contain marker data...'
# 					continue
# 				intervals_dir = os.path.join(d4, 'gaze_intervals.npy')
# 				if os.path.isfile(intervals_dir):
# 					print '> Recording already processed...'
# 					if force:
# 						print '> Processing again...'
# 					else:
# 						continue

# 				marker_data = np.load(frames_dir)
# 				# marker_data includes data on tracked markers per frame
# 				# it's a list with as many entries as the number of video frames, each entry
# 				# has a list of tracked markers, each marker item has marker id, marker corners, Rvec, Tvec
# 				wt = np.load(os.path.join(d4, 'world_timestamps.npy'))
# 				# Processing pupil positions
# 				pp = np.load(os.path.join(d4, 'pupil_positions.npy')) # timestamp	confidence	id	pos_x	pos_y	diameter
# 				# pos_x and pos_y are normalized (Origin 0,0 at the bottom left and 1,1 at the top right)
# 				# converting each element to dictionary for correlation
# 				pp = map(lambda e: dict(zip(['timestamp', 'conf', 'id', 'x', 'y', 'diam'], e)), pp)
# 				pp_by_frame = player_methods.correlate_data(pp, wt)

# 				# Keeping only pupil positions with nonzero confidence
# 				pp_by_frame = map(lambda l: filter(lambda p: p['conf']>0, l), pp_by_frame)
# 				# Computing a single pupil position for the frame by taking mean of all detected pupil positions
# 				pp_by_frame = map(lambda data:
# 					sum(np.array([pp['x'], pp['y']]) for pp in data)/len(data) if data else np.array([-1, -1]), pp_by_frame)
# 				# Now each nonempty value of pp_by_frame is a tuple of (x, y) for pupil position in that frame

# 				# Checking if timestamps, markers per frame and pupil positions per frame are correlated
# 				assert len(marker_data) == len(wt) == len(pp_by_frame)

# 				# Good, now we need to find the frame ranges in which marker is not moving, for that we need the marker_data
# 				# and using the position info per frame, we can compute movement speed and detect when it is it almost zero

# 				marker_data, mrdata = unifiy_markers_per_frame(marker_data)
# 				# Smoothing x and y coords
# 				marker_data[:, 3] = g1d(marker_data[:, 3], sigma=2)
# 				marker_data[:, 4] = g1d(marker_data[:, 4], sigma=2)

# 				marker_speed = []
# 				for fn, fnp1 in ((f, f+1) for f in xrange(len(marker_data)-1)):
# 					if marker_data[fnp1] != [] and marker_data[fn] != []:
# 						# dx = marker_data[fnp1][0] - marker_data[fn][0]
# 						# dy = marker_data[fnp1][1] - marker_data[fn][1]
# 						# dz = marker_data[fnp1][2] - marker_data[fn][2]
# 						# speed = np.sqrt(dx**2 + dy**2 + dz**2) * 100

# 						# print fn, fnp1, len(marker_data), marker_data[fnp1], marker_data[fn]
# 						dx = marker_data[fnp1][3] - marker_data[fn][3]
# 						dy = marker_data[fnp1][4] - marker_data[fn][4]
# 						speed = np.sqrt(dx**2 + dy**2)

# 						# print 'marker speed:', speed
# 						marker_speed.append(speed)
# 					else:
# 						marker_speed.append(marker_speed[-1]) # set speed to last speed if marker could not be detected
# 				# Performing binary clustering on marker speed
# 				model = AgglomerativeClustering(n_clusters=2, linkage="ward", affinity="euclidean")
# 				marker_speed = np.array(marker_speed)
# 				# Checking for outliers based on "median absolute deviation"
# 				outliers = is_outlier(marker_speed, thresh=3.5)
# 				print sum(outliers == True), 'outliers detected'

# 				# removing outliers
# 				outlier_inds = [i for i in xrange(outliers.size) if outliers[i]]
# 				marker_speed = list(np.delete(marker_speed, outlier_inds))
# 				# replacing removed outliers by average of their neighbours
# 				outliers_inds = sorted(outlier_inds)
# 				window = 1
# 				for ind in outlier_inds:
# 					start = max(ind-window, 0)
# 					neighbours = marker_speed[start:ind+window]
# 					new_val = sum(neighbours)/len(neighbours)
# 					marker_speed.insert(ind, new_val)
# 				marker_speed = np.array(marker_speed)

# 				# smoothed_signal = marker_speed[:]
# 				smoothed_signal = signal.medfilt(marker_speed, 13)
# 				# smoothed_signal = g1d(marker_speed, sigma=2)
# 				# smoothed_signal = moving_average(smoothed_signal, 7)
# 				model.fit(map(lambda e: [e], smoothed_signal))
# 				labels = fix_labels(model.labels_, window=1, outliers = outliers)
# 				outliers = map(lambda e: 10 if e else 5, outliers)

# 				mean = np.mean(smoothed_signal)

# 				intervals, nm = find_intervals(labels, mean, smoothed_signal)
# 				print '>', len(intervals), 'Intervals found in total.', nm, 'gaze intervals.'
# 				interval_display = []
# 				for dur in intervals:
# 					interval_display.extend([dur[2]]*(dur[1]-dur[0]+1))
# 				interval_display = interval_display[:-1]

# 				print len(interval_display), len(marker_data)-1, intervals[-1][1]-intervals[0][0]
# 				# print intervals
# 				# print labels
# 				# return
# 				# print len(marker_data), len(marker_speed)
# 				plt.plot(range(len(marker_data)-1), marker_speed, 'b',
# 						 # range(len(marker_data)-1), labels, 'r',
# 						 range(len(marker_data)-1), smoothed_signal, 'g',
# 						 range(len(marker_data)-1), interval_display, 'r')
# 				# plt.show()
# 				# plt.clf()
# 				# return
# 				# plt.clf()


# 				recordings_processed += 1
# 				intervals_okay = True
# 				if not nm in [16, 25]:
# 					intervals_okay = False
# 					pdb.set_trace()
# 					print '> Storing odd figure...'
# 					plt.savefig('./temp/%s-%s__%snm.png' % (d1, d3, str(nm)))
# 					# print '> Entering manual override mode...'
# 					# print '> Enter halt to quit.'
# 					# # set manual_bypass to True in case you wanna discard changes in override mode
# 					# cmd = raw_input(':')
# 					# while cmd != 'halt' and cmd != 'pass':
# 					# 	exec cmd in globals(), locals()
# 					# 	cmd = raw_input(':')

# 				if intervals_okay:
# 					print '> Intervals seem okay.'
# 					plt.savefig(os.path.join(d4, 'marker_motion.png'))
# 					recordings_successful += 1
# 					# Store interval information
# 					# Use pp_by_frame and marker_data to compute gaze and target points corresponding to this interval
# 					gaze_intervals = intervals[::2] # starting from the first interval, gaze, moving, gaze, moving, gaze, ...
# 					t2d, t3d, p = [], [], []
# 					for intv in gaze_intervals:
# 						s, e = intv[0], intv[1]
# 						null_gaze, null_marker = 0, 0
# 						gaze_point = np.array([0, 0])
# 						marker_3d_position = np.array([0, 0, 0])
# 						marker_2d_position = np.array([0, 0])
# 						for fn in xrange(s, e+1):
# 							if all(pp_by_frame[fn]==np.array([-1, -1])):
# 								null_gaze += 1
# 							else:
# 								gaze_point = gaze_point + pp_by_frame[fn]
# 							if mrdata[fn] == []:
# 								null_marker += 1
# 							else:
# 								marker_3d_position = marker_3d_position + np.array(mrdata[fn][:3])
# 								marker_2d_position = marker_2d_position + np.array(mrdata[fn][3:])

# 						gaze_point = gaze_point/(e-s+1-null_gaze)
# 						marker_3d_position = marker_3d_position/(e-s+1-null_marker)
# 						marker_2d_position = marker_2d_position/(e-s+1-null_marker)

# 						t2d.append(marker_2d_position)
# 						t3d.append(marker_3d_position)
# 						p.append(gaze_point)
# 					print '> Storing intervals, gaze data, and marker data...'
# 					np.save(intervals_dir, np.array(gaze_intervals))
# 					np.save(os.path.join(d4, 'p.npy'), np.array(p))
# 					np.save(os.path.join(d4, 't2d.npy'), np.array(t2d))
# 					np.save(os.path.join(d4, 't3d.npy'), np.array(t3d))
# 				print '>', recordings_processed, 'recordings processed.', recordings_successful, 'successful.'

# 				plt.clf()


PARTICIPANTS = ['p10', 'p16', 'p13', 'p24', 'p5', 'p14', 'p26', 'p12', 'p20', 'p7', 'p15', 'p11', 'p21', 'p25']
def main(force=False):
	recordings_processed = 0
	recordings_successful = 0
	for d1 in os.listdir(ROOT_DATA_DIR):
		if d1.startswith('p'): # every participant
			if not d1 in PARTICIPANTS:
				continue

			d2 = os.path.join(ROOT_DATA_DIR, d1) # .../pi/
			d2 = os.path.join(d2, os.listdir(d2)[0]) # .../pi/../
			for d3 in os.listdir(d2): # every recording
				d4 = os.path.join(d2, d3) # .../pi/../00X/
				print '> Processing', d4
				frames_dir = os.path.join(d4, '_aruco_frames.npy')
				if not os.path.isfile(frames_dir): # the recording is not yet processed for marker tracking
					print '> Recording does not contain marker data...'
					continue
				intervals_dir = os.path.join(d4, 'gaze_intervals.npy')
				if os.path.isfile(intervals_dir):
					print '> Recording already processed...'
					if force:
						print '> Processing again...'
					else:
						continue

				marker_data = np.load(frames_dir)
				# marker_data includes data on tracked markers per frame
				# it's a list with as many entries as the number of video frames, each entry
				# has a list of tracked markers, each marker item has marker id, marker corners, Rvec, Tvec
				wt = np.load(os.path.join(d4, 'world_timestamps.npy'))
				# Processing pupil positions
				pp = np.load(os.path.join(d4, 'pupil_positions.npy')) # timestamp	confidence	id	pos_x	pos_y	diameter
				# pos_x and pos_y are normalized (Origin 0,0 at the bottom left and 1,1 at the top right)
				# converting each element to dictionary for correlation
				pp = map(lambda e: dict(zip(['timestamp', 'conf', 'id', 'x', 'y', 'diam'], e)), pp)
				pp_by_frame = player_methods.correlate_data(pp, wt)

				# Keeping only pupil positions with nonzero confidence
				pp_by_frame = map(lambda l: filter(lambda p: p['conf']>0, l), pp_by_frame)
				# Computing a single pupil position for the frame by taking mean of all detected pupil positions
				pp_by_frame = map(lambda data:
					sum(np.array([pp['x'], pp['y']]) for pp in data)/len(data) if data else np.array([-1, -1]), pp_by_frame)
				# Now each nonempty value of pp_by_frame is a tuple of (x, y) for pupil position in that frame

				# Checking if timestamps, markers per frame and pupil positions per frame are correlated
				assert len(marker_data) == len(wt) == len(pp_by_frame)

				# Good, now we need to find the frame ranges in which marker is not moving, for that we need the marker_data
				# and using the position info per frame, we can compute movement speed and detect when it is it almost zero

				marker_data, mrdata = unifiy_markers_per_frame(marker_data)

				gaze_intervals = np.load(intervals_dir)

				recordings_processed += 1
				intervals_okay = True

				if intervals_okay:
					print '> Intervals seem okay.'
					recordings_successful += 1

					t2d, t3d, p = [], [], []
					t2d_med, t3d_med, p_med, p_frames = [], [], [], []
					for intv in gaze_intervals:
						s, e = intv[0], intv[1]
						null_gaze, null_marker = 0, 0
						gaze_point = np.array([0, 0])
						marker_3d_position = np.array([0, 0, 0])
						marker_2d_position = np.array([0, 0])
						gpts, m3ds, m2ds = [], [], []
						valid_frames = []
						for fn in xrange(s, e+1):
							if all(pp_by_frame[fn]==np.array([-1, -1])) or mrdata[fn] == []:
								# either pupil detection failed or marker detection
								# the whole pupil-marker correspondence is invalid
								# ignore this frame
								pass
							else:
								gpts.append(pp_by_frame[fn])

								marker_3d_position = marker_3d_position + np.array(mrdata[fn][:3])
								marker_2d_position = marker_2d_position + np.array(mrdata[fn][3:])

								m3ds.append(np.array(mrdata[fn][:3]))
								m2ds.append(np.array(mrdata[fn][3:]))

								valid_frames.append(fn)

						if not len(valid_frames):
							# this marker-pupil correspondece failed
							print '> Failed to find reliable correspondece for a marker position...'
							# In this case participant data should be completely ignored
							# retrun

						# Computing the median pupil position
						final_p = np.median(gpts, axis=0)
						p_med.append(final_p)
						# Finding the closest pupil position to this median in the valid frames
						dists = map(lambda pupil_position: (v(pupil_position)-v(final_p)).mag, gpts)
						dists = zip(range(len(gpts)), dists)
						closest = min(dists, key=lambda pair:pair[1])
						# Getting the index for this position
						ind = closest[0]
						# Finding the k nearest pupil position to this one
						k = 3
						while True:
							try:
								nbrs = knn(n_neighbors=k, algorithm='ball_tree').fit(gpts)
								dists, indices = nbrs.kneighbors(gpts)
								break
							except ValueError, err:
								k-=1
						nearest_ind = indices[ind]
						frames_numbers = map(lambda i: valid_frames[i], nearest_ind)
						p_frames.append(frames_numbers)
						# Now we take eye images from these frames
						# Also the pupil-marker correspondece is now final_p and m2ds[ind] m3d[ind]
						t2d_med.append(m2ds[ind])
						t3d_med.append(m3ds[ind])
						# t2d_med.append(np.median(m2ds, axis=0))
						# t3d_med.append(np.median(m3ds, axis=0))


					print '> gaze and marker data...'
					# np.save(intervals_dir, np.array(gaze_intervals))
					np.save(os.path.join(d4, 'p_frames.npy'), np.array(p_frames))
					# np.save(os.path.join(d4, 'p.npy'), np.array(p_med))
					# np.save(os.path.join(d4, 't2d.npy'), np.array(t2d_med))
					# np.save(os.path.join(d4, 't3d.npy'), np.array(t3d_med))
				print '>', recordings_processed, 'recordings processed.', recordings_successful, 'successful.'
if __name__ == '__main__':
	main(force=True)