gazesim/code/minimize.py

from __future__ import division
import numpy as np 
from numpy import linalg as LA
from scipy import optimize
from sklearn.preprocessing import PolynomialFeatures as P
from vector import Vector as v
import cv2

M = 7 # this is the size of the feature vector
_q = lambda p: np.array([p[0], p[1], p[0]*p[0], p[1]*p[1], p[0]*p[1], p[0]*p[1]*p[0]*p[1], 1])

def alpha(q, w):
	'''
	q is the polynomial representation of a 2D pupil point p
	w is the Mx2 parameter matrix (M is the length of our feature vector) 
	(could be initialized as a zero matrix np.zeros((M, 2)))
	returns alpha_i = (theta_i, phi_i)
	'''
	theta = np.dot(w[:, 0], q)
	phi = np.dot(w[:, 1], q)
	return theta, phi

def g(q, w):
	'''
	computes the unit gaze ray originating from the eye corresponding to q
	'''
	theta, phi = alpha(q, w)
	sin = map(np.sin, (theta, phi))
	cos = map(np.cos, (theta, phi))
	## This is THE definition, maps (0,0) to (0,0,1) and it produces a unit vector
	return np.array([sin[0], 
					 cos[0]*sin[1],
					 cos[0]*cos[1]])
def g3D3D(pose):
	theta, phi = pose
	sin = map(np.sin, (theta, phi))
	cos = map(np.cos, (theta, phi))
	return np.array([sin[0], 
					 cos[0]*sin[1],
					 cos[0]*cos[1]])

####################################################################################
## Initial attempt to solve E(w, 0) Using a similiar idea to: 
## http://scipy-lectures.github.io/advanced/mathematical_optimization/#id28

def getWfromRowRepr(w):
	w1 = w[:M] # 1st col
	w2 = w[M:] # 2nd col
	return np.array([w1, w2]).transpose()

def getRfromRowRepr(w):
	return np.array([w[:3], w[3:6], w[6:]])	

def f(w, qi, ti):
	'''
	E(\bm{w}) = \sum_{i=1}^N | \bm{(\theta_i, \phi_i)} - \bm{q}_i\bm{w}|^2
	'''
	w = getWfromRowRepr(w)
	alpha_i = map(lambda q: alpha(q, w), qi) # theta, phi
	# we compare the estimated polar coordinates with polar coordinates corresponding
	# to ti as in the 2D to 2D case
	p_ti = map(lambda g: [np.arctan(g.x/g.z), np.arctan(g.y/g.z)], map(v,ti))
	return map(lambda pair: np.sqrt((pair[0][0]-pair[1][0])**2 + (pair[0][1]-pair[1][1])**2), zip(p_ti, alpha_i)) 

def findInitialW(p, t, retMatrix = True):
	ti = map(np.array, t)
	qi = map(_q, p)

	w0 = np.zeros(2*M)
	wr = optimize.leastsq(f, w0[:], args=(qi, ti))
	if retMatrix:
		return getWfromRowRepr(wr[0])
	else:
		return wr[0]

# tub function > 0 in -pi:pi, positive outside
tub = lambda x: max(-x-np.pi, 0, x-np.pi)
tub_weight = 10000
def f3D3D(w, ni, ti):
	'''
	E_{\textmd{3Dto3D}}(\bm{R}, \bm{e}) = \sum_{i=1}^N |\bm{R} \bm{n}_i  \times (\bm{t}_i - \bm{e}) |^2
	'''
	rvec = w[:3]
	tvec = w[3:] # e
	R = cv2.Rodrigues(np.array(rvec))[0]
	return map(lambda (t, n): LA.norm(np.cross(R.dot(n), t-tvec)), zip(ti, ni)) + \
	[tub_weight * tub(w[0]), tub_weight * tub(w[1]), tub_weight * tub(w[2])]
	# this distance measure works if we initialize the minimization by np.array([0.,0.,0.,0.,0.,0.]) 
	# return map(lambda (t, n): -np.dot(R.dot(n), t-tvec)/LA.norm(t-tvec) + 1, zip(ti, ni))
	
def findW3D3D(po, t):
	'''
	po stands for pose i.e. a vector in the direction of gaze for each pupil
	'''
	ti = map(np.array, t) # ground truth data in scene camera coordinate system
	ni = map(np.array, po) # pupil pose in eye coordinate system (normal to pupil disc)
	# w0 = np.array([0.,0.,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)
	w0 = np.array([0.,np.pi,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)
	wr = optimize.leastsq(f3D3D, w0[:], args=(ni, ti))
	return wr[0]
####################################################################################
## Solving target energy using e0 = (0, 0, 0) and w0 from above

def minimizeEnergy(p, t, e0 = None, pose_given = False):
	if e0 is None:
		e0 = np.array([0, 0, 0])

	if pose_given:
		w = findW3D3D(p, t)
		return cv2.Rodrigues(np.array(w[:3]))[0], w[3:] # R, e
	else:
		w0 = findInitialW(p, t, False)
		qi = map(_q, p)

	ti = map(np.array, t)
	we0 = np.concatenate((w0, e0))

	wer = optimize.leastsq(f2D3D, we0[:], args=(qi, ti))[0]
	w = getWfromRowRepr(wer[:2*M])
	e = wer[2*M:]
	return w, e, getWfromRowRepr(w0)

def f2D3D(we, qi, ti):
	'''
	E_{\textmd{2Dto3D}}(\bm{w}, \bm{e}) = \sum_{i=1}^N |\bm{g}(\bm{q}_i\bm{w}) \times (\bm{t}_i - \bm{e})|^2
	'''
	# extracting parameters from the combined vector
	w = getWfromRowRepr(we[:2*M])
	e = we[2*M:]
	gis = map(lambda q: g(q, w), qi)
	return map(lambda pair: LA.norm(np.cross(pair[1], pair[0]-e)), zip(ti, gis))
migrated code to public repository 2016-03-09 19:52:35 +01:00			`from __future__ import division`
			`import numpy as np`
			`from numpy import linalg as LA`
			`from scipy import optimize`
			`from sklearn.preprocessing import PolynomialFeatures as P`
			`from vector import Vector as v`
			`import cv2`

			`M = 7 # this is the size of the feature vector`
			`_q = lambda p: np.array([p[0], p[1], p[0]p[0], p[1]p[1], p[0]p[1], p[0]p[1]p[0]p[1], 1])`

			`def alpha(q, w):`
			`'''`
			`q is the polynomial representation of a 2D pupil point p`
			`w is the Mx2 parameter matrix (M is the length of our feature vector)`
			`(could be initialized as a zero matrix np.zeros((M, 2)))`
			`returns alpha_i = (theta_i, phi_i)`
			`'''`
			`theta = np.dot(w[:, 0], q)`
			`phi = np.dot(w[:, 1], q)`
			`return theta, phi`

			`def g(q, w):`
			`'''`
			`computes the unit gaze ray originating from the eye corresponding to q`
			`'''`
			`theta, phi = alpha(q, w)`
			`sin = map(np.sin, (theta, phi))`
			`cos = map(np.cos, (theta, phi))`
			`## This is THE definition, maps (0,0) to (0,0,1) and it produces a unit vector`
			`return np.array([sin[0],`
			`cos[0]*sin[1],`
			`cos[0]*cos[1]])`
			`def g3D3D(pose):`
			`theta, phi = pose`
			`sin = map(np.sin, (theta, phi))`
			`cos = map(np.cos, (theta, phi))`
			`return np.array([sin[0],`
			`cos[0]*sin[1],`
			`cos[0]*cos[1]])`

			`####################################################################################`
			`## Initial attempt to solve E(w, 0) Using a similiar idea to:`
			`## http://scipy-lectures.github.io/advanced/mathematical_optimization/#id28`

			`def getWfromRowRepr(w):`
			`w1 = w[:M] # 1st col`
			`w2 = w[M:] # 2nd col`
			`return np.array([w1, w2]).transpose()`

			`def getRfromRowRepr(w):`
			`return np.array([w[:3], w[3:6], w[6:]])`

			`def f(w, qi, ti):`
			`'''`
			`E(\bm{w}) = \sum_{i=1}^N \| \bm{(\theta_i, \phi_i)} - \bm{q}_i\bm{w}\|^2`
			`'''`
			`w = getWfromRowRepr(w)`
			`alpha_i = map(lambda q: alpha(q, w), qi) # theta, phi`
			`# we compare the estimated polar coordinates with polar coordinates corresponding`
			`# to ti as in the 2D to 2D case`
			`p_ti = map(lambda g: [np.arctan(g.x/g.z), np.arctan(g.y/g.z)], map(v,ti))`
			`return map(lambda pair: np.sqrt((pair[0][0]-pair[1][0])2 + (pair[0][1]-pair[1][1])2), zip(p_ti, alpha_i))`

			`def findInitialW(p, t, retMatrix = True):`
			`ti = map(np.array, t)`
			`qi = map(_q, p)`

			`w0 = np.zeros(2*M)`
			`wr = optimize.leastsq(f, w0[:], args=(qi, ti))`
			`if retMatrix:`
			`return getWfromRowRepr(wr[0])`
			`else:`
			`return wr[0]`

			`# tub function > 0 in -pi:pi, positive outside`
			`tub = lambda x: max(-x-np.pi, 0, x-np.pi)`
			`tub_weight = 10000`
			`def f3D3D(w, ni, ti):`
			`'''`
			`E_{\textmd{3Dto3D}}(\bm{R}, \bm{e}) = \sum_{i=1}^N \|\bm{R} \bm{n}_i \times (\bm{t}_i - \bm{e}) \|^2`
			`'''`
			`rvec = w[:3]`
			`tvec = w[3:] # e`
			`R = cv2.Rodrigues(np.array(rvec))[0]`
			`return map(lambda (t, n): LA.norm(np.cross(R.dot(n), t-tvec)), zip(ti, ni)) + \`
			`[tub_weight * tub(w[0]), tub_weight * tub(w[1]), tub_weight * tub(w[2])]`
			`# this distance measure works if we initialize the minimization by np.array([0.,0.,0.,0.,0.,0.])`
			`# return map(lambda (t, n): -np.dot(R.dot(n), t-tvec)/LA.norm(t-tvec) + 1, zip(ti, ni))`

			`def findW3D3D(po, t):`
			`'''`
			`po stands for pose i.e. a vector in the direction of gaze for each pupil`
			`'''`
			`ti = map(np.array, t) # ground truth data in scene camera coordinate system`
			`ni = map(np.array, po) # pupil pose in eye coordinate system (normal to pupil disc)`
			`# w0 = np.array([0.,0.,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)`
			`w0 = np.array([0.,np.pi,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)`
			`wr = optimize.leastsq(f3D3D, w0[:], args=(ni, ti))`
			`return wr[0]`
			`####################################################################################`
			`## Solving target energy using e0 = (0, 0, 0) and w0 from above`

			`def minimizeEnergy(p, t, e0 = None, pose_given = False):`
			`if e0 is None:`
			`e0 = np.array([0, 0, 0])`

			`if pose_given:`
			`w = findW3D3D(p, t)`
			`return cv2.Rodrigues(np.array(w[:3]))[0], w[3:] # R, e`
			`else:`
			`w0 = findInitialW(p, t, False)`
			`qi = map(_q, p)`

			`ti = map(np.array, t)`
			`we0 = np.concatenate((w0, e0))`

			`wer = optimize.leastsq(f2D3D, we0[:], args=(qi, ti))[0]`
			`w = getWfromRowRepr(wer[:2*M])`
			`e = wer[2*M:]`
			`return w, e, getWfromRowRepr(w0)`

			`def f2D3D(we, qi, ti):`
			`'''`
			`E_{\textmd{2Dto3D}}(\bm{w}, \bm{e}) = \sum_{i=1}^N \|\bm{g}(\bm{q}_i\bm{w}) \times (\bm{t}_i - \bm{e})\|^2`
			`'''`
			`# extracting parameters from the combined vector`
			`w = getWfromRowRepr(we[:2*M])`
			`e = we[2*M:]`
			`gis = map(lambda q: g(q, w), qi)`
			`return map(lambda pair: LA.norm(np.cross(pair[1], pair[0]-e)), zip(ti, gis))`