131 lines
4.3 KiB
Python
131 lines
4.3 KiB
Python
|
from __future__ import division
|
||
|
import numpy as np
|
||
|
from numpy import linalg as LA
|
||
|
from scipy import optimize
|
||
|
from sklearn.preprocessing import PolynomialFeatures as P
|
||
|
from vector import Vector as v
|
||
|
import cv2
|
||
|
|
||
|
M = 7 # this is the size of the feature vector
|
||
|
_q = lambda p: np.array([p[0], p[1], p[0]*p[0], p[1]*p[1], p[0]*p[1], p[0]*p[1]*p[0]*p[1], 1])
|
||
|
|
||
|
def alpha(q, w):
|
||
|
'''
|
||
|
q is the polynomial representation of a 2D pupil point p
|
||
|
w is the Mx2 parameter matrix (M is the length of our feature vector)
|
||
|
(could be initialized as a zero matrix np.zeros((M, 2)))
|
||
|
returns alpha_i = (theta_i, phi_i)
|
||
|
'''
|
||
|
theta = np.dot(w[:, 0], q)
|
||
|
phi = np.dot(w[:, 1], q)
|
||
|
return theta, phi
|
||
|
|
||
|
def g(q, w):
|
||
|
'''
|
||
|
computes the unit gaze ray originating from the eye corresponding to q
|
||
|
'''
|
||
|
theta, phi = alpha(q, w)
|
||
|
sin = map(np.sin, (theta, phi))
|
||
|
cos = map(np.cos, (theta, phi))
|
||
|
## This is THE definition, maps (0,0) to (0,0,1) and it produces a unit vector
|
||
|
return np.array([sin[0],
|
||
|
cos[0]*sin[1],
|
||
|
cos[0]*cos[1]])
|
||
|
def g3D3D(pose):
|
||
|
theta, phi = pose
|
||
|
sin = map(np.sin, (theta, phi))
|
||
|
cos = map(np.cos, (theta, phi))
|
||
|
return np.array([sin[0],
|
||
|
cos[0]*sin[1],
|
||
|
cos[0]*cos[1]])
|
||
|
|
||
|
####################################################################################
|
||
|
## Initial attempt to solve E(w, 0) Using a similiar idea to:
|
||
|
## http://scipy-lectures.github.io/advanced/mathematical_optimization/#id28
|
||
|
|
||
|
def getWfromRowRepr(w):
|
||
|
w1 = w[:M] # 1st col
|
||
|
w2 = w[M:] # 2nd col
|
||
|
return np.array([w1, w2]).transpose()
|
||
|
|
||
|
def getRfromRowRepr(w):
|
||
|
return np.array([w[:3], w[3:6], w[6:]])
|
||
|
|
||
|
def f(w, qi, ti):
|
||
|
'''
|
||
|
E(\bm{w}) = \sum_{i=1}^N | \bm{(\theta_i, \phi_i)} - \bm{q}_i\bm{w}|^2
|
||
|
'''
|
||
|
w = getWfromRowRepr(w)
|
||
|
alpha_i = map(lambda q: alpha(q, w), qi) # theta, phi
|
||
|
# we compare the estimated polar coordinates with polar coordinates corresponding
|
||
|
# to ti as in the 2D to 2D case
|
||
|
p_ti = map(lambda g: [np.arctan(g.x/g.z), np.arctan(g.y/g.z)], map(v,ti))
|
||
|
return map(lambda pair: np.sqrt((pair[0][0]-pair[1][0])**2 + (pair[0][1]-pair[1][1])**2), zip(p_ti, alpha_i))
|
||
|
|
||
|
def findInitialW(p, t, retMatrix = True):
|
||
|
ti = map(np.array, t)
|
||
|
qi = map(_q, p)
|
||
|
|
||
|
w0 = np.zeros(2*M)
|
||
|
wr = optimize.leastsq(f, w0[:], args=(qi, ti))
|
||
|
if retMatrix:
|
||
|
return getWfromRowRepr(wr[0])
|
||
|
else:
|
||
|
return wr[0]
|
||
|
|
||
|
# tub function > 0 in -pi:pi, positive outside
|
||
|
tub = lambda x: max(-x-np.pi, 0, x-np.pi)
|
||
|
tub_weight = 10000
|
||
|
def f3D3D(w, ni, ti):
|
||
|
'''
|
||
|
E_{\textmd{3Dto3D}}(\bm{R}, \bm{e}) = \sum_{i=1}^N |\bm{R} \bm{n}_i \times (\bm{t}_i - \bm{e}) |^2
|
||
|
'''
|
||
|
rvec = w[:3]
|
||
|
tvec = w[3:] # e
|
||
|
R = cv2.Rodrigues(np.array(rvec))[0]
|
||
|
return map(lambda (t, n): LA.norm(np.cross(R.dot(n), t-tvec)), zip(ti, ni)) + \
|
||
|
[tub_weight * tub(w[0]), tub_weight * tub(w[1]), tub_weight * tub(w[2])]
|
||
|
# this distance measure works if we initialize the minimization by np.array([0.,0.,0.,0.,0.,0.])
|
||
|
# return map(lambda (t, n): -np.dot(R.dot(n), t-tvec)/LA.norm(t-tvec) + 1, zip(ti, ni))
|
||
|
|
||
|
def findW3D3D(po, t):
|
||
|
'''
|
||
|
po stands for pose i.e. a vector in the direction of gaze for each pupil
|
||
|
'''
|
||
|
ti = map(np.array, t) # ground truth data in scene camera coordinate system
|
||
|
ni = map(np.array, po) # pupil pose in eye coordinate system (normal to pupil disc)
|
||
|
# w0 = np.array([0.,0.,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)
|
||
|
w0 = np.array([0.,np.pi,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z)
|
||
|
wr = optimize.leastsq(f3D3D, w0[:], args=(ni, ti))
|
||
|
return wr[0]
|
||
|
####################################################################################
|
||
|
## Solving target energy using e0 = (0, 0, 0) and w0 from above
|
||
|
|
||
|
def minimizeEnergy(p, t, e0 = None, pose_given = False):
|
||
|
if e0 is None:
|
||
|
e0 = np.array([0, 0, 0])
|
||
|
|
||
|
if pose_given:
|
||
|
w = findW3D3D(p, t)
|
||
|
return cv2.Rodrigues(np.array(w[:3]))[0], w[3:] # R, e
|
||
|
else:
|
||
|
w0 = findInitialW(p, t, False)
|
||
|
qi = map(_q, p)
|
||
|
|
||
|
ti = map(np.array, t)
|
||
|
we0 = np.concatenate((w0, e0))
|
||
|
|
||
|
wer = optimize.leastsq(f2D3D, we0[:], args=(qi, ti))[0]
|
||
|
w = getWfromRowRepr(wer[:2*M])
|
||
|
e = wer[2*M:]
|
||
|
return w, e, getWfromRowRepr(w0)
|
||
|
|
||
|
def f2D3D(we, qi, ti):
|
||
|
'''
|
||
|
E_{\textmd{2Dto3D}}(\bm{w}, \bm{e}) = \sum_{i=1}^N |\bm{g}(\bm{q}_i\bm{w}) \times (\bm{t}_i - \bm{e})|^2
|
||
|
'''
|
||
|
# extracting parameters from the combined vector
|
||
|
w = getWfromRowRepr(we[:2*M])
|
||
|
e = we[2*M:]
|
||
|
gis = map(lambda q: g(q, w), qi)
|
||
|
return map(lambda pair: LA.norm(np.cross(pair[1], pair[0]-e)), zip(ti, gis))
|