gazesim/code/minimize.py

from __future__ import division
import numpy as np
from numpy import linalg as LA
from scipy import optimize
from sklearn.preprocessing import PolynomialFeatures as P
from vector import Vector as v
import cv2

'''
This module contains all the methods used in solving the minimization problems
in our paper.
'''

M = 7 # this is the size of the feature vector
_q = lambda p: np.array([p[0], p[1], p[0]*p[0], p[1]*p[1], p[0]*p[1], p[0]*p[1]*p[0]*p[1], 1])

def alpha(q, w):
	'''
	q is the polynomial representation of a 2D pupil point p
	w is the Mx2 parameter matrix (M is the length of our feature vector)
	(could be initialized as a zero matrix np.zeros((M, 2)))
	returns alpha_i = (theta_i, phi_i)
	'''
	theta = np.dot(w[:, 0], q)
	phi = np.dot(w[:, 1], q)
	return theta, phi

def g(q, w):
	'''
	computes the unit gaze ray originating from the eye corresponding to q
	'''
	theta, phi = alpha(q, w)
	sin = map(np.sin, (theta, phi))
	cos = map(np.cos, (theta, phi))
	## This is THE definition, maps (0,0) to (0,0,1) and it produces a unit vector
	return np.array([sin[0],
					 cos[0]*sin[1],
					 cos[0]*cos[1]])
def g3D3D(pose):
	theta, phi = pose
	sin = map(np.sin, (theta, phi))
	cos = map(np.cos, (theta, phi))
	return np.array([sin[0],
					 cos[0]*sin[1],
					 cos[0]*cos[1]])

####################################################################################
## Initial attempt to solve E(w, 0) Using a similiar idea to:
## http://scipy-lectures.github.io/advanced/mathematical_optimization/#id28

def getWfromRowRepr(w):
	w1 = w[:M] # 1st col
	w2 = w[M:] # 2nd col
	return np.array([w1, w2]).transpose()

def getRfromRowRepr(w):
	return np.array([w[:3], w[3:6], w[6:]])

def f(w, qi, ti):
	'''
	E(\bm{w}) = \sum_{i=1}^N | \bm{(\theta_i, \phi_i)} - \bm{q}_i\bm{w}|^2
	'''
	w = getWfromRowRepr(w)
	alpha_i = map(lambda q: alpha(q, w), qi) # theta, phi
	# we compare the estimated polar coordinates with polar coordinates corresponding
	# to ti as in the 2D to 2D case
	p_ti = map(lambda g: [np.arctan(g.x/g.z), np.arctan(g.y/g.z)], map(v,ti))
	return map(lambda pair: np.sqrt((pair[0][0]-pair[1][0])**2 + (pair[0][1]-pair[1][1])**2), zip(p_ti, alpha_i))

def findInitialW(p, t, retMatrix = True):
	ti = map(np.array, t)
	qi = map(_q, p)

	w0 = np.zeros(2*M)
	wr = optimize.leastsq(f, w0[:], args=(qi, ti))
	if retMatrix:
		return getWfromRowRepr(wr[0])
	else:
		return wr[0]

# tub function > 0 in -pi:pi, positive outside
tub = lambda x: max(-x-np.pi, 0, x-np.pi)
tub_weight = 10000
def f3D3D(w, ni, ti):
	'''
	E_{\textmd{3Dto3D}}(\bm{R}, \bm{e}) = \sum_{i=1}^N |\bm{R} \bm{n}_i  \times (\bm{t}_i - \bm{e}) |^2
	'''
	rvec = w[:3]
	tvec = w[3:] # e
	R = cv2.Rodrigues(np.array(rvec))[0]
	return map(lambda (t, n): LA.norm(np.cross(R.dot(n), t-tvec)), zip(ti, ni)) + \
	[tub_weight * tub(w[0]), tub_weight * tub(w[1]), tub_weight * tub(w[2])]
	# this distance measure works if we initialize the minimization by np.array([0.,0.,0.,0.,0.,0.])
	# return map(lambda (t, n): -np.dot(R.dot(n), t-tvec)/LA.norm(t-tvec) + 1, zip(ti, ni))

def findW3D3D(po, t):
	'''
	po stands for pose i.e. a vector in the direction of gaze for each pupil
	'''
	ti = map(np.array, t) # ground truth data in scene camera coordinate system
	ni = map(np.array, po) # pupil pose in eye coordinate system (normal to pupil disc)
	# w0 = np.array([0.,0.,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)
	w0 = np.array([0.,np.pi,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)
	wr = optimize.leastsq(f3D3D, w0[:], args=(ni, ti))
	return wr[0]
####################################################################################
## Solving target energy using e0 = (0, 0, 0) and w0 from above

def minimizeEnergy(p, t, e0 = None, pose_given = False):
	if e0 is None:
		e0 = np.array([0, 0, 0])

	if pose_given:
		w = findW3D3D(p, t)
		return cv2.Rodrigues(np.array(w[:3]))[0], w[3:] # R, e
	else:
		w0 = findInitialW(p, t, False)
		qi = map(_q, p)

	ti = map(np.array, t)
	we0 = np.concatenate((w0, e0))

	wer = optimize.leastsq(f2D3D, we0[:], args=(qi, ti))[0]
	w = getWfromRowRepr(wer[:2*M])
	e = wer[2*M:]
	return w, e, getWfromRowRepr(w0)

def f2D3D(we, qi, ti):
	'''
	E_{\textmd{2Dto3D}}(\bm{w}, \bm{e}) = \sum_{i=1}^N |\bm{g}(\bm{q}_i\bm{w}) \times (\bm{t}_i - \bm{e})|^2
	'''
	# extracting parameters from the combined vector
	w = getWfromRowRepr(we[:2*M])
	e = we[2*M:]
	gis = map(lambda q: g(q, w), qi)
	return map(lambda pair: LA.norm(np.cross(pair[1], pair[0]-e)), zip(ti, gis))