from __future__ import division import numpy as np from numpy import linalg as LA from scipy import optimize from sklearn.preprocessing import PolynomialFeatures as P from vector import Vector as v import cv2 M = 7 # this is the size of the feature vector _q = lambda p: np.array([p[0], p[1], p[0]*p[0], p[1]*p[1], p[0]*p[1], p[0]*p[1]*p[0]*p[1], 1]) def alpha(q, w): ''' q is the polynomial representation of a 2D pupil point p w is the Mx2 parameter matrix (M is the length of our feature vector) (could be initialized as a zero matrix np.zeros((M, 2))) returns alpha_i = (theta_i, phi_i) ''' theta = np.dot(w[:, 0], q) phi = np.dot(w[:, 1], q) return theta, phi def g(q, w): ''' computes the unit gaze ray originating from the eye corresponding to q ''' theta, phi = alpha(q, w) sin = map(np.sin, (theta, phi)) cos = map(np.cos, (theta, phi)) ## This is THE definition, maps (0,0) to (0,0,1) and it produces a unit vector return np.array([sin[0], cos[0]*sin[1], cos[0]*cos[1]]) def g3D3D(pose): theta, phi = pose sin = map(np.sin, (theta, phi)) cos = map(np.cos, (theta, phi)) return np.array([sin[0], cos[0]*sin[1], cos[0]*cos[1]]) #################################################################################### ## Initial attempt to solve E(w, 0) Using a similiar idea to: ## http://scipy-lectures.github.io/advanced/mathematical_optimization/#id28 def getWfromRowRepr(w): w1 = w[:M] # 1st col w2 = w[M:] # 2nd col return np.array([w1, w2]).transpose() def getRfromRowRepr(w): return np.array([w[:3], w[3:6], w[6:]]) def f(w, qi, ti): ''' E(\bm{w}) = \sum_{i=1}^N | \bm{(\theta_i, \phi_i)} - \bm{q}_i\bm{w}|^2 ''' w = getWfromRowRepr(w) alpha_i = map(lambda q: alpha(q, w), qi) # theta, phi # we compare the estimated polar coordinates with polar coordinates corresponding # to ti as in the 2D to 2D case p_ti = map(lambda g: [np.arctan(g.x/g.z), np.arctan(g.y/g.z)], map(v,ti)) return map(lambda pair: np.sqrt((pair[0][0]-pair[1][0])**2 + (pair[0][1]-pair[1][1])**2), zip(p_ti, alpha_i)) def findInitialW(p, t, retMatrix = True): ti = map(np.array, t) qi = map(_q, p) w0 = np.zeros(2*M) wr = optimize.leastsq(f, w0[:], args=(qi, ti)) if retMatrix: return getWfromRowRepr(wr[0]) else: return wr[0] # tub function > 0 in -pi:pi, positive outside tub = lambda x: max(-x-np.pi, 0, x-np.pi) tub_weight = 10000 def f3D3D(w, ni, ti): ''' E_{\textmd{3Dto3D}}(\bm{R}, \bm{e}) = \sum_{i=1}^N |\bm{R} \bm{n}_i \times (\bm{t}_i - \bm{e}) |^2 ''' rvec = w[:3] tvec = w[3:] # e R = cv2.Rodrigues(np.array(rvec))[0] return map(lambda (t, n): LA.norm(np.cross(R.dot(n), t-tvec)), zip(ti, ni)) + \ [tub_weight * tub(w[0]), tub_weight * tub(w[1]), tub_weight * tub(w[2])] # this distance measure works if we initialize the minimization by np.array([0.,0.,0.,0.,0.,0.]) # return map(lambda (t, n): -np.dot(R.dot(n), t-tvec)/LA.norm(t-tvec) + 1, zip(ti, ni)) def findW3D3D(po, t): ''' po stands for pose i.e. a vector in the direction of gaze for each pupil ''' ti = map(np.array, t) # ground truth data in scene camera coordinate system ni = map(np.array, po) # pupil pose in eye coordinate system (normal to pupil disc) # w0 = np.array([0.,0.,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z) w0 = np.array([0.,np.pi,0.,0.,0.,0.]) # rvec (roll, pitch, yaw), tvec (x, y, z) wr = optimize.leastsq(f3D3D, w0[:], args=(ni, ti)) return wr[0] #################################################################################### ## Solving target energy using e0 = (0, 0, 0) and w0 from above def minimizeEnergy(p, t, e0 = None, pose_given = False): if e0 is None: e0 = np.array([0, 0, 0]) if pose_given: w = findW3D3D(p, t) return cv2.Rodrigues(np.array(w[:3]))[0], w[3:] # R, e else: w0 = findInitialW(p, t, False) qi = map(_q, p) ti = map(np.array, t) we0 = np.concatenate((w0, e0)) wer = optimize.leastsq(f2D3D, we0[:], args=(qi, ti))[0] w = getWfromRowRepr(wer[:2*M]) e = wer[2*M:] return w, e, getWfromRowRepr(w0) def f2D3D(we, qi, ti): ''' E_{\textmd{2Dto3D}}(\bm{w}, \bm{e}) = \sum_{i=1}^N |\bm{g}(\bm{q}_i\bm{w}) \times (\bm{t}_i - \bm{e})|^2 ''' # extracting parameters from the combined vector w = getWfromRowRepr(we[:2*M]) e = we[2*M:] gis = map(lambda q: g(q, w), qi) return map(lambda pair: LA.norm(np.cross(pair[1], pair[0]-e)), zip(ti, gis))