gazesim/code/minimize.py

136 lines
4.4 KiB
Python

from __future__ import division
import numpy as np
from numpy import linalg as LA
from scipy import optimize
from sklearn.preprocessing import PolynomialFeatures as P
from vector import Vector as v
import cv2
'''
This module contains all the methods used in solving the minimization problems
in our paper.
'''
M = 7 # this is the size of the feature vector
_q = lambda p: np.array([p[0], p[1], p[0]*p[0], p[1]*p[1], p[0]*p[1], p[0]*p[1]*p[0]*p[1], 1])
def alpha(q, w):
'''
q is the polynomial representation of a 2D pupil point p
w is the Mx2 parameter matrix (M is the length of our feature vector)
(could be initialized as a zero matrix np.zeros((M, 2)))
returns alpha_i = (theta_i, phi_i)
'''
theta = np.dot(w[:, 0], q)
phi = np.dot(w[:, 1], q)
return theta, phi
def g(q, w):
'''
computes the unit gaze ray originating from the eye corresponding to q
'''
theta, phi = alpha(q, w)
sin = map(np.sin, (theta, phi))
cos = map(np.cos, (theta, phi))
## This is THE definition, maps (0,0) to (0,0,1) and it produces a unit vector
return np.array([sin[0],
cos[0]*sin[1],
cos[0]*cos[1]])
def g3D3D(pose):
theta, phi = pose
sin = map(np.sin, (theta, phi))
cos = map(np.cos, (theta, phi))
return np.array([sin[0],
cos[0]*sin[1],
cos[0]*cos[1]])
####################################################################################
## Initial attempt to solve E(w, 0) Using a similiar idea to:
## http://scipy-lectures.github.io/advanced/mathematical_optimization/#id28
def getWfromRowRepr(w):
w1 = w[:M] # 1st col
w2 = w[M:] # 2nd col
return np.array([w1, w2]).transpose()
def getRfromRowRepr(w):
return np.array([w[:3], w[3:6], w[6:]])
def f(w, qi, ti):
'''
E(\bm{w}) = \sum_{i=1}^N | \bm{(\theta_i, \phi_i)} - \bm{q}_i\bm{w}|^2
'''
w = getWfromRowRepr(w)
alpha_i = map(lambda q: alpha(q, w), qi) # theta, phi
# we compare the estimated polar coordinates with polar coordinates corresponding
# to ti as in the 2D to 2D case
p_ti = map(lambda g: [np.arctan(g.x/g.z), np.arctan(g.y/g.z)], map(v,ti))
return map(lambda pair: np.sqrt((pair[0][0]-pair[1][0])**2 + (pair[0][1]-pair[1][1])**2), zip(p_ti, alpha_i))
def findInitialW(p, t, retMatrix = True):
ti = map(np.array, t)
qi = map(_q, p)
w0 = np.zeros(2*M)
wr = optimize.leastsq(f, w0[:], args=(qi, ti))
if retMatrix:
return getWfromRowRepr(wr[0])
else:
return wr[0]
# tub function > 0 in -pi:pi, positive outside
tub = lambda x: max(-x-np.pi, 0, x-np.pi)
tub_weight = 10000
def f3D3D(w, ni, ti):
'''
E_{\textmd{3Dto3D}}(\bm{R}, \bm{e}) = \sum_{i=1}^N |\bm{R} \bm{n}_i \times (\bm{t}_i - \bm{e}) |^2
'''
rvec = w[:3]
tvec = w[3:] # e
R = cv2.Rodrigues(np.array(rvec))[0]
return map(lambda (t, n): LA.norm(np.cross(R.dot(n), t-tvec)), zip(ti, ni)) + \
[tub_weight * tub(w[0]), tub_weight * tub(w[1]), tub_weight * tub(w[2])]
# this distance measure works if we initialize the minimization by np.array([0.,0.,0.,0.,0.,0.])
# return map(lambda (t, n): -np.dot(R.dot(n), t-tvec)/LA.norm(t-tvec) + 1, zip(ti, ni))
def findW3D3D(po, t):
'''
po stands for pose i.e. a vector in the direction of gaze for each pupil
'''
ti = map(np.array, t) # ground truth data in scene camera coordinate system
ni = map(np.array, po) # pupil pose in eye coordinate system (normal to pupil disc)
# w0 = np.array([0.,0.,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)
w0 = np.array([0.,np.pi,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)
wr = optimize.leastsq(f3D3D, w0[:], args=(ni, ti))
return wr[0]
####################################################################################
## Solving target energy using e0 = (0, 0, 0) and w0 from above
def minimizeEnergy(p, t, e0 = None, pose_given = False):
if e0 is None:
e0 = np.array([0, 0, 0])
if pose_given:
w = findW3D3D(p, t)
return cv2.Rodrigues(np.array(w[:3]))[0], w[3:] # R, e
else:
w0 = findInitialW(p, t, False)
qi = map(_q, p)
ti = map(np.array, t)
we0 = np.concatenate((w0, e0))
wer = optimize.leastsq(f2D3D, we0[:], args=(qi, ti))[0]
w = getWfromRowRepr(wer[:2*M])
e = wer[2*M:]
return w, e, getWfromRowRepr(w0)
def f2D3D(we, qi, ti):
'''
E_{\textmd{2Dto3D}}(\bm{w}, \bm{e}) = \sum_{i=1}^N |\bm{g}(\bm{q}_i\bm{w}) \times (\bm{t}_i - \bm{e})|^2
'''
# extracting parameters from the combined vector
w = getWfromRowRepr(we[:2*M])
e = we[2*M:]
gis = map(lambda q: g(q, w), qi)
return map(lambda pair: LA.norm(np.cross(pair[1], pair[0]-e)), zip(ti, gis))