298 lines
10 KiB
Python
298 lines
10 KiB
Python
|
import json
|
||
|
import numpy as np
|
||
|
import matplotlib
|
||
|
import matplotlib.pyplot as plt
|
||
|
import matplotlib.patches as patches
|
||
|
from pattern.text.en import singularize
|
||
|
import nengo.spa as spa
|
||
|
import scipy.integrate as integrate
|
||
|
|
||
|
RGB_COLORS = []
|
||
|
hex_colors = ['#8a3ffc', '#ff7eb6', '#6fdc8c', '#d2a106', '#ba4e00', '#33b1ff', '#570408',
|
||
|
'#fa4d56', '#4589ff', '#08bdba', '#d4bbff', '#007d79', '#d12771', '#bae6ff']
|
||
|
|
||
|
for h in hex_colors:
|
||
|
RGB_COLORS.append(matplotlib.colors.to_rgb(h))
|
||
|
|
||
|
for i, (name, h) in enumerate(matplotlib.colors.cnames.items()):
|
||
|
if i > 10:
|
||
|
RGB_COLORS.append(matplotlib.colors.to_rgb(h))
|
||
|
|
||
|
|
||
|
f = open('gqa_all_relations_map.json')
|
||
|
RELATION_DICT = json.load(f)
|
||
|
f.close()
|
||
|
|
||
|
f = open('gqa_all_vocab_classes.json')
|
||
|
CLASS_DICT = json.load(f)
|
||
|
f.close()
|
||
|
|
||
|
f = open('gqa_all_attributes.json')
|
||
|
ATTRIBUTE_DICT = json.load(f)
|
||
|
f.close()
|
||
|
|
||
|
|
||
|
def bbox_to_mask(x, y, w, h, img_size=(500, 500), name=None, visualize=False):
|
||
|
img = np.zeros(img_size)
|
||
|
mask_w = np.ones(w)
|
||
|
for j in range(y, y+h):
|
||
|
img[j][x:x+w] = mask_w
|
||
|
|
||
|
if visualize:
|
||
|
fig = plt.figure(figsize=(img_size[0] // 80, img_size[1] // 80))
|
||
|
plt.imshow(img, cmap='gray')
|
||
|
if name:
|
||
|
plt.title(name)
|
||
|
plt.axis('off')
|
||
|
plt.show()
|
||
|
|
||
|
return img
|
||
|
|
||
|
def make_good_unitary(D, eps=1e-3, rng=np.random):
|
||
|
"""from https://github.com/ctn-waterloo/cogsci2019-ssp/tree/master"""
|
||
|
a = rng.rand((D - 1) // 2)
|
||
|
sign = rng.choice((-1, +1), len(a))
|
||
|
phi = sign * np.pi * (eps + a * (1 - 2 * eps))
|
||
|
assert np.all(np.abs(phi) >= np.pi * eps)
|
||
|
assert np.all(np.abs(phi) <= np.pi * (1 - eps))
|
||
|
|
||
|
fv = np.zeros(D, dtype='complex64')
|
||
|
fv[0] = 1
|
||
|
fv[1:(D + 1) // 2] = np.cos(phi) + 1j * np.sin(phi)
|
||
|
fv[-1:D // 2:-1] = np.conj(fv[1:(D + 1) // 2])
|
||
|
if D % 2 == 0:
|
||
|
fv[D // 2] = 1
|
||
|
|
||
|
assert np.allclose(np.abs(fv), 1)
|
||
|
v = np.fft.ifft(fv)
|
||
|
# assert np.allclose(v.imag, 0, atol=1e-5)
|
||
|
v = v.real
|
||
|
assert np.allclose(np.fft.fft(v), fv)
|
||
|
assert np.allclose(np.linalg.norm(v), 1)
|
||
|
return spa.SemanticPointer(v)
|
||
|
|
||
|
def get_heatmap_vectors(xs, ys, x_axis_sp, y_axis_sp):
|
||
|
"""from https://github.com/ctn-waterloo/cogsci2019-ssp/tree/master:
|
||
|
Precompute spatial semantic pointers for every location in the linspace
|
||
|
Used to quickly compute heat maps by a simple vectorized dot product (matrix multiplication)
|
||
|
"""
|
||
|
if x_axis_sp.__class__.__name__ == 'SemanticPointer':
|
||
|
dim = len(x_axis_sp.v)
|
||
|
else:
|
||
|
dim = len(x_axis_sp)
|
||
|
x_axis_sp = spa.SemanticPointer(data=x_axis_sp)
|
||
|
y_axis_sp = spa.SemanticPointer(data=y_axis_sp)
|
||
|
|
||
|
vectors = np.zeros((len(xs), len(ys), dim))
|
||
|
|
||
|
for i, x in enumerate(xs):
|
||
|
for j, y in enumerate(ys):
|
||
|
p = encode_point(
|
||
|
x=x, y=y, x_axis=x_axis_sp, y_axis=y_axis_sp,
|
||
|
)
|
||
|
vectors[i, j, :] = p.v
|
||
|
|
||
|
return vectors
|
||
|
|
||
|
def power(s, e):
|
||
|
"""from https://github.com/ctn-waterloo/cogsci2019-ssp/tree/master"""
|
||
|
x = np.fft.ifft(np.fft.fft(s.v) ** e).real
|
||
|
return spa.SemanticPointer(data=x)
|
||
|
|
||
|
def encode_point(x, y, x_axis, y_axis):
|
||
|
"""from https://github.com/ctn-waterloo/cogsci2019-ssp/tree/master"""
|
||
|
return power(x_axis, x) * power(y_axis, y)
|
||
|
|
||
|
def encode_region(x, y, x_axis, y_axis):
|
||
|
"""from https://github.com/ctn-waterloo/cogsci2019-ssp/tree/master"""
|
||
|
print(integrate.quad(power(x_axis, x) * power(y_axis, y), x, x+28))
|
||
|
return integrate.quad(power(x_axis, x) * power(y_axis, y), x, x+28)
|
||
|
|
||
|
|
||
|
def plot_heatmap(img, img_area, encoded_pos, xs, ys, heatmap_vectors, name='', vmin=-1, vmax=1, invert=False):
|
||
|
"""from https://github.com/ctn-waterloo/cogsci2019-ssp/tree/master"""
|
||
|
assert encoded_pos.__class__.__name__ == 'SemanticPointer'
|
||
|
|
||
|
# sp has shape (dim,) and heatmap_vectors have shape (xs, ys, dim) so the result will be (xs, ys)
|
||
|
vec_sim = np.tensordot(encoded_pos.v, heatmap_vectors, axes=([0], [2]))
|
||
|
|
||
|
num_plots = 3 if img_area is not None else 2
|
||
|
fig, axs = plt.subplots(1, num_plots, figsize=(4 * num_plots + 3, 3))
|
||
|
fig.suptitle(name)
|
||
|
|
||
|
axs[0].imshow(img)
|
||
|
axs[0].axis('off')
|
||
|
|
||
|
if img_area is not None:
|
||
|
axs[1].imshow(img_area, cmap='gray')
|
||
|
axs[1].set_xticks(np.arange(0, len(xs), 20), np.arange(0, img.shape[1], img.shape[1] / len(xs)).astype(int)[::20])
|
||
|
axs[1].set_yticks(np.arange(0, len(ys), 10), np.arange(0, img.shape[0], img.shape[0] / len(ys)).astype(int)[::10])
|
||
|
axs[1].axis('off')
|
||
|
|
||
|
im = axs[2].imshow(np.transpose(vec_sim), origin='upper', interpolation='none', extent=(xs[-1], xs[0], ys[-1], ys[0]), vmin=vmin, vmax=vmax, cmap='plasma')
|
||
|
axs[2].axis('off')
|
||
|
|
||
|
else:
|
||
|
im = axs[1].imshow(np.transpose(vec_sim), origin='upper', interpolation='none', extent=(xs[-1], xs[0], ys[-1], ys[0]), vmin=vmin, vmax=vmax, cmap='plasma')
|
||
|
axs[1].axis('off')
|
||
|
|
||
|
fig.colorbar(im, ax=axs.ravel().tolist())
|
||
|
plt.show()
|
||
|
|
||
|
|
||
|
def generate_region_vector(desired, xs, ys, x_axis_sp, y_axis_sp):
|
||
|
"""from https://github.com/ctn-waterloo/cogsci2019-ssp/tree/master"""
|
||
|
vector = np.zeros_like((x_axis_sp.v))
|
||
|
for i, x in enumerate(xs):
|
||
|
for j, y in enumerate(ys):
|
||
|
if desired[j, i] == 1:
|
||
|
vector += encode_point(x, y, x_axis_sp, y_axis_sp).v
|
||
|
|
||
|
sp = spa.SemanticPointer(data=vector)
|
||
|
sp.normalize()
|
||
|
return sp
|
||
|
|
||
|
|
||
|
def bb_intersection_over_union(boxA, boxB):
|
||
|
"""from https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/"""
|
||
|
# determine the (x, y)-coordinates of the intersection rectangle
|
||
|
xA = max(boxA[0], boxB[0])
|
||
|
yA = max(boxA[1], boxB[1])
|
||
|
xB = min(boxA[2], boxB[2])
|
||
|
yB = min(boxA[3], boxB[3])
|
||
|
|
||
|
# compute the area of intersection rectangle
|
||
|
interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
|
||
|
if interArea == 0:
|
||
|
return 0
|
||
|
# compute the area of both the prediction and ground-truth
|
||
|
# rectangles
|
||
|
boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
|
||
|
boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
|
||
|
|
||
|
# compute the intersection over union by taking the intersection
|
||
|
# area and dividing it by the sum of prediction + ground-truth
|
||
|
# areas - the interesection area
|
||
|
iou = interArea / float(boxAArea + boxBArea - interArea)
|
||
|
|
||
|
# return the intersection over union value
|
||
|
return iou
|
||
|
|
||
|
|
||
|
def encode_point_multidim(values, axes):
|
||
|
""" power(x_axis, x) * power(y_axis, y) for variable dimensions """
|
||
|
assert len(values) == len(axes), f'number of values {len(values)} does not match number of axes {len(axes)}'
|
||
|
res = 1
|
||
|
for v, a in zip(values, axes):
|
||
|
res *= power(a, v)
|
||
|
return res
|
||
|
|
||
|
def get_heatmap_vectors_multidim(xs, ys, ws, hs, x_axis, y_axis, w_axis, h_axis):
|
||
|
""" adaptation of get_heatmap_vectors for 4 dimensions """
|
||
|
assert x_axis.__class__.__name__ == 'SemanticPointer', f'Axes need to be of type SemanticPointer but are {x_axis.__class__.__name__}'
|
||
|
|
||
|
dim = len(x_axis.v)
|
||
|
vectors = np.zeros((len(xs), len(ys), len(ws), len(hs), dim))
|
||
|
|
||
|
for i, x in enumerate(xs):
|
||
|
for j, y in enumerate(ys):
|
||
|
for n, w in enumerate(ws):
|
||
|
for k, h in enumerate(hs):
|
||
|
p = encode_point_multidim(values=[x, y, w, h], axes=[x_axis, y_axis, w_axis, h_axis])
|
||
|
vectors[i, j, n, k, :] = p.v
|
||
|
|
||
|
return vectors
|
||
|
|
||
|
|
||
|
def ssp_to_loc_multidim(sp, heatmap_vectors, linspace):
|
||
|
""" adaptation of loc_match from https://github.com/ctn-waterloo/cogsci2019-ssp/tree/master
|
||
|
Convert an SSP to the approximate 4-dim location that it represents.
|
||
|
Uses the heatmap vectors as a lookup table
|
||
|
"""
|
||
|
xs, ys, ws, hs = linspace
|
||
|
|
||
|
assert sp.__class__.__name__ == 'SemanticPointer', \
|
||
|
f'Queried object needs to be of type SemanticPointer but is {sp.__class__.__name__}'
|
||
|
|
||
|
# axes: a list of axes to be summed over, first sequence applying to first tensor, second to second tensor
|
||
|
vs = np.tensordot(sp.v, heatmap_vectors, axes=([0], [4]))
|
||
|
|
||
|
res = np.unravel_index(np.argmax(vs, axis=None), vs.shape)
|
||
|
|
||
|
x = xs[res[0]]
|
||
|
y = ys[res[1]]
|
||
|
w = ws[res[2]]
|
||
|
h = hs[res[3]]
|
||
|
|
||
|
|
||
|
return np.array([x, y, w, h])
|
||
|
|
||
|
|
||
|
def encode_image_ssp(img, sg_data, axes, new_size, dim, visualize=True):
|
||
|
"""encode all objects in an image to an SSP memory"""
|
||
|
|
||
|
img_size = img.shape[:2]
|
||
|
|
||
|
if img_size[1] / 2 < img_size[0]:
|
||
|
scale = img_size[0] / new_size[0]
|
||
|
else:
|
||
|
scale = img_size[1] / new_size[1]
|
||
|
|
||
|
# scale width and height to fixed size of 10
|
||
|
w_scale = img_size[1] / 10
|
||
|
h_scale = img_size[0] / 10
|
||
|
|
||
|
|
||
|
if visualize:
|
||
|
print(f'Original image {img_size[1]}x{img_size[0]} --> {np.array(img_size) / scale}')
|
||
|
fig, ax = plt.subplots(1,1)
|
||
|
ax.imshow(img, interpolation='none', origin='upper', extent=[0, img_size[1] / scale, img_size[0] / scale, 0])
|
||
|
plt.axis('off')
|
||
|
|
||
|
|
||
|
encoded_items = {}
|
||
|
encoded_ssps = {}
|
||
|
|
||
|
memory = spa.SemanticPointer(data=np.zeros(dim))
|
||
|
name_lst = []
|
||
|
|
||
|
for i, obj in enumerate(sg_data.items()):
|
||
|
id_num, obj_dict = obj
|
||
|
name = obj_dict.get('name')
|
||
|
name = singularize(name)
|
||
|
name_lst.append(name)
|
||
|
name += '_' + str(name_lst.count(name))
|
||
|
|
||
|
# extract ground truth data and scale to fit to SSPs
|
||
|
x, y, width, height = obj_dict.get('x'), obj_dict.get('y'), obj_dict.get('w'), obj_dict.get('h')
|
||
|
x, y, width, height = x / scale, y / scale, width / w_scale, height / h_scale
|
||
|
|
||
|
width = width if width >= 1 else 1
|
||
|
height = height if height >= 1 else 1
|
||
|
|
||
|
# Round values to next int (otherwise decoding gets buggy)
|
||
|
item = np.round([x, y, width, height], decimals=0).astype(int)
|
||
|
encoded_items[name] = item
|
||
|
#print(name, item)
|
||
|
|
||
|
pos = encode_point_multidim(list(item), axes)
|
||
|
ssp = spa.SemanticPointer(dim)
|
||
|
encoded_ssps[name] = ssp
|
||
|
|
||
|
memory += ssp * pos
|
||
|
|
||
|
if visualize:
|
||
|
x, y, width, height = item
|
||
|
width, height = (width * w_scale) / scale, (height * h_scale) / scale
|
||
|
rect = patches.Rectangle((x, y),
|
||
|
width, height,
|
||
|
linewidth = 2,
|
||
|
label = name,
|
||
|
edgecolor = 'c',
|
||
|
facecolor = 'none')
|
||
|
ax.add_patch(rect)
|
||
|
|
||
|
if visualize:
|
||
|
plt.show()
|
||
|
|
||
|
return encoded_items, encoded_ssps, memory
|