944 lines
No EOL
35 KiB
Python
944 lines
No EOL
35 KiB
Python
import numpy as np
|
|
import keras
|
|
import matplotlib.pyplot as plt
|
|
import sys
|
|
import os
|
|
from keras.layers import Input, TimeDistributed, Lambda, Conv2D, MaxPooling2D, UpSampling2D, Concatenate
|
|
import keras.backend as K
|
|
from keras.models import Model
|
|
import tensorflow as tf
|
|
from keras.utils import Sequence
|
|
from keras.optimizers import Adam, RMSprop, SGD
|
|
import cv2
|
|
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
|
|
from PIL import Image
|
|
from IPython.display import clear_output
|
|
import scipy.io
|
|
from copy import deepcopy
|
|
import re
|
|
|
|
# DEBUG
|
|
DEBUG = False
|
|
|
|
# number of rows of input images
|
|
cat2000_c = 1920
|
|
cat2000_r = 1080
|
|
#cat2000_r_out = 1088 # this is divisible by 16
|
|
cat2000_r_out = 1104 # divible by 48
|
|
cat2000_c_out = cat2000_c # already divisible by 16
|
|
|
|
cc_c = 300
|
|
cc_r = 225
|
|
cc_c_out = 1776
|
|
cc_r_out = 1344
|
|
|
|
#shape_r = int(cat2000_r/6)
|
|
shape_r = 240
|
|
#shape_r = cc_r
|
|
# number of cols of input images
|
|
#shape_c = int(cat2000_c/6)
|
|
shape_c = 320
|
|
#shape_c = cc_c
|
|
# number of rows of downsampled maps
|
|
shape_r_gt = 30
|
|
# number of cols of downsampled maps
|
|
shape_c_gt = 40
|
|
# number of rows of model outputs
|
|
#shape_r_out = cat2000_r_out
|
|
shape_r_out = 480
|
|
#shape_r_out = cc_r_out
|
|
# number of cols of model outputs
|
|
#shape_c_out = cat2000_c_out
|
|
shape_c_out = 640
|
|
#shape_c_out = cc_c_out
|
|
# final upsampling factor
|
|
upsampling_factor = 16
|
|
# number of epochs
|
|
nb_epoch = 50
|
|
# number of timesteps
|
|
nb_timestep = 3
|
|
# number of learned priors
|
|
nb_gaussian = 16
|
|
|
|
def repeat(x):
|
|
return K.repeat_elements(K.expand_dims(x,axis=1), nb_timestep, axis=1)
|
|
# return K.reshape(K.repeat(K.batch_flatten(x), nb_timestep), (1, nb_timestep, shape_r_gt, shape_c_gt, 512))
|
|
|
|
def repeat_shape(s):
|
|
return (s[0], nb_timestep) + s[1:]
|
|
|
|
|
|
def padding(img, shape_r, shape_c, channels=3):
|
|
img_padded = np.zeros((shape_r, shape_c, channels), dtype=np.uint8)
|
|
if channels == 1:
|
|
img_padded = np.zeros((shape_r, shape_c), dtype=np.uint8)
|
|
|
|
original_shape = img.shape
|
|
rows_rate = original_shape[0]/shape_r
|
|
cols_rate = original_shape[1]/shape_c
|
|
|
|
if rows_rate > cols_rate:
|
|
new_cols = (original_shape[1] * shape_r) // original_shape[0]
|
|
img = cv2.resize(img, (new_cols, shape_r))
|
|
if new_cols > shape_c:
|
|
new_cols = shape_c
|
|
img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols)] = img
|
|
else:
|
|
new_rows = (original_shape[0] * shape_c) // original_shape[1]
|
|
img = cv2.resize(img, (shape_c, new_rows))
|
|
if new_rows > shape_r:
|
|
new_rows = shape_r
|
|
img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img
|
|
|
|
return img_padded
|
|
|
|
def resize_fixation(img, rows=480, cols=640):
|
|
out = np.zeros((rows, cols))
|
|
factor_scale_r = rows / img.shape[0]
|
|
factor_scale_c = cols / img.shape[1]
|
|
|
|
coords = np.argwhere(img)
|
|
for coord in coords:
|
|
r = int(np.round(coord[0]*factor_scale_r))
|
|
c = int(np.round(coord[1]*factor_scale_c))
|
|
if r == rows:
|
|
r -= 1
|
|
if c == cols:
|
|
c -= 1
|
|
out[r, c] = 1
|
|
|
|
return out
|
|
|
|
def padding_fixation(img, shape_r, shape_c):
|
|
img_padded = np.zeros((shape_r, shape_c))
|
|
|
|
original_shape = img.shape
|
|
rows_rate = original_shape[0]/shape_r
|
|
cols_rate = original_shape[1]/shape_c
|
|
|
|
if rows_rate > cols_rate:
|
|
new_cols = (original_shape[1] * shape_r) // original_shape[0]
|
|
img = resize_fixation(img, rows=shape_r, cols=new_cols)
|
|
if new_cols > shape_c:
|
|
new_cols = shape_c
|
|
img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols)] = img
|
|
else:
|
|
new_rows = (original_shape[0] * shape_c) // original_shape[1]
|
|
img = resize_fixation(img, rows=new_rows, cols=shape_c)
|
|
if new_rows > shape_r:
|
|
new_rows = shape_r
|
|
img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img
|
|
|
|
return img_padded
|
|
|
|
def preprocess_fixmaps(paths, shape_r, shape_c, fix_as_mat=False, fix_key="", pad=True):
|
|
|
|
if pad:
|
|
ims = np.zeros((len(paths), shape_r, shape_c, 1))
|
|
else:
|
|
ims = []
|
|
|
|
# print('ims.shape:',ims.shape)
|
|
for i, path in enumerate(paths):
|
|
if path == 'dummy':
|
|
fix_map = np.zeros((480,640))
|
|
elif fix_as_mat:
|
|
mat = scipy.io.loadmat(path)
|
|
if DEBUG:
|
|
print('mat',mat)
|
|
fix_map = mat[fix_key]
|
|
else:
|
|
fix_map = cv2.imread(path, 0)
|
|
|
|
if DEBUG:
|
|
print('fix_map shape, np.max(fix_map),np.min(fix_map),np.mean(fix_map)',fix_map.shape,np.max(fix_map),np.min(fix_map),np.mean(fix_map))
|
|
if pad:
|
|
ims[i, :, :, 0] = padding_fixation(fix_map, shape_r=shape_r, shape_c=shape_c)
|
|
else:
|
|
ims.append(fix_map)
|
|
# ims = np.array(ims)
|
|
# print('ims[-1].shape:',ims[-1].shape)
|
|
return ims
|
|
|
|
def load_maps(paths):
|
|
ims = []
|
|
|
|
for i, path in enumerate(paths):
|
|
original_map = np.load(path, allow_pickle=True)
|
|
# TODO: chect for /255.0
|
|
ims.append(original_map.astype(np.float32))
|
|
ims = np.array(ims)
|
|
# print('load_maps: ims[-1].shape',ims[-1].shape)
|
|
return ims
|
|
|
|
def preprocess_maps(paths, shape_r, shape_c, pad=True):
|
|
if pad:
|
|
ims = np.zeros((len(paths), shape_r, shape_c, 1))
|
|
else:
|
|
ims = []
|
|
for i, path in enumerate(paths):
|
|
original_map = cv2.imread(path, 0)
|
|
if pad:
|
|
padded_map = padding(original_map, shape_r, shape_c, 1)
|
|
ims[i,:,:, 0] = padded_map.astype(np.float32)
|
|
ims[i,:,:, 0] /= 255.0
|
|
else:
|
|
ims.append(original_map.astype(np.float32)/255.0)
|
|
# ims = np.array(ims)
|
|
# print('ims.shape in preprocess_maps',ims.shape)
|
|
# print('prep_maps: ims[-1].shape',ims[-1].shape)
|
|
return ims
|
|
|
|
def load_images(paths):
|
|
ims =[]
|
|
|
|
for i, path in enumerate(paths):
|
|
img = np.load(path, allow_pickle=True)
|
|
ims.append(img)
|
|
|
|
ims = np.array(ims)
|
|
# print('load_images: ims.shape',np.array(ims).shape)
|
|
return ims
|
|
|
|
def preprocess_images(paths, shape_r, shape_c, pad=True):
|
|
if pad:
|
|
ims = np.zeros((len(paths), shape_r, shape_c, 3))
|
|
else:
|
|
ims =[]
|
|
|
|
for i, path in enumerate(paths):
|
|
original_image = cv2.imread(path)
|
|
if original_image is None:
|
|
raise ValueError('Path unreadable: %s' % path)
|
|
if pad:
|
|
padded_image = padding(original_image, shape_r, shape_c, 3)
|
|
ims[i] = padded_image
|
|
else:
|
|
original_image = original_image.astype(np.float32)
|
|
original_image[..., 0] -= 103.939
|
|
original_image[..., 1] -= 116.779
|
|
original_image[..., 2] -= 123.68
|
|
ims.append(original_image)
|
|
# ims = np.array(ims)
|
|
print('ims.shape in preprocess_imgs',ims.shape)
|
|
|
|
# DEBUG
|
|
# plt.figure()
|
|
# plt.subplot(1,2,1)
|
|
# plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
|
|
# plt.subplot(1,2,2)
|
|
# plt.imshow(cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB))
|
|
# plt.suptitle(path)
|
|
|
|
if pad:
|
|
ims[:, :, :, 0] -= 103.939
|
|
ims[:, :, :, 1] -= 116.779
|
|
ims[:, :, :, 2] -= 123.68
|
|
|
|
return ims
|
|
|
|
def reverse_preprocess(img):
|
|
im = deepcopy(img)
|
|
|
|
im[:, :, 0] += 103.939
|
|
im[:, :, 1] += 116.779
|
|
im[:, :, 2] += 123.68
|
|
|
|
# print(np.max(im), np.min(im), type(im[0][0][0]))
|
|
|
|
im = im[...,::-1]
|
|
|
|
|
|
im = np.array(im, dtype=np.uint8)
|
|
|
|
return im
|
|
|
|
|
|
|
|
def postprocess_predictions(pred, shape_r, shape_c, blur=False, normalize=False, zero_to_255 = False):
|
|
predictions_shape = pred.shape
|
|
rows_rate = shape_r / predictions_shape[0]
|
|
cols_rate = shape_c / predictions_shape[1]
|
|
|
|
# pred = pred / np.max(pred) * 255
|
|
|
|
# print('Preparing to resize...')
|
|
if blur:
|
|
sigma=blur
|
|
pred = scipy.ndimage.filters.gaussian_filter(pred, sigma=sigma)
|
|
|
|
if rows_rate > cols_rate:
|
|
new_cols = (predictions_shape[1] * shape_r) // predictions_shape[0]
|
|
pred = cv2.resize(pred, (new_cols, shape_r))
|
|
img = pred[:, ((pred.shape[1] - shape_c) // 2):((pred.shape[1] - shape_c) // 2 + shape_c)]
|
|
else:
|
|
new_rows = (predictions_shape[0] * shape_c) // predictions_shape[1]
|
|
pred = cv2.resize(pred, (shape_c, new_rows))
|
|
img = pred[((pred.shape[0] - shape_r) // 2):((pred.shape[0] - shape_r) // 2 + shape_r), :]
|
|
|
|
|
|
# print('Resized')
|
|
|
|
if normalize:
|
|
img = img / np.max(img) * 255
|
|
if zero_to_255:
|
|
img = np.abs(img - 255)
|
|
return img
|
|
|
|
|
|
|
|
class MultidurationGenerator(Sequence):
|
|
def __init__(self,
|
|
img_filenames,
|
|
map_filenames=None,
|
|
fix_filenames=None,
|
|
batch_size=1,
|
|
img_size=(shape_r,shape_c),
|
|
map_size=(shape_r_out, shape_c_out),
|
|
shuffle=True,
|
|
augment=False,
|
|
n_output_maps=1,
|
|
n_output_fixs=1,
|
|
mode = 'multistream_concat',
|
|
fix_key='',
|
|
return_names=False,
|
|
fix_as_mat=False,
|
|
pad_gt_maps=True,
|
|
read_npy=False
|
|
):
|
|
'''
|
|
Generator for multi-duration saliency data. Receives lists of images, and t lists of heatmaps and fixations, where t
|
|
is the number of saliency time steps to yield. The generator will automatically infer t from the length of map_filenames.
|
|
|
|
This generator has 3 different modes:
|
|
1. multistream_concat: concatenates fix and maps for a given timestep into one tensor of shape (bs, 2, r, c, 1). Then appends
|
|
all these tensors in a list of size t, and yields that tensor as y_true. This mode is made to work with losses that recuperate the
|
|
map and fixation by slicing the y_true tensor internally.
|
|
|
|
2. multistream_full: doesn't concatenate the fix and maps; instead, yields all fixations and maps needed for each timestep as a
|
|
different element in the final output list. For example, if we are training with 3 losses and 2 timesteps, this generator will
|
|
yield a list of length 6 as y_true output: 3 maps/fis for timestep1, and 3 maps/fixs for timestep2.
|
|
|
|
3. singlestream: concatenates all timesteps in one tensor. for each loss, the generator will yield a tensor of shape
|
|
(bs, time, r, c, 1). If we are working with kl, cc and nss, for example, the generator will output a list of length 3,
|
|
where each element is a tensor of the mentioned shape. This mode should be used with losses that are adapted to tensors with
|
|
a time dimension.
|
|
|
|
'''
|
|
|
|
print('Instantiating MultidurationGenerator. \
|
|
Number of files received: %d. Batch size: %d. \
|
|
Image size: %s. Augmentation: %d. Mode: %s' \
|
|
% (len(img_filenames), batch_size, str(img_size), augment,mode ))
|
|
|
|
if (mode == 'multistream_concat') and (map_filenames is None or fix_filenames is None):
|
|
print('Multistream concat can only be used when both fixations and maps are provided. \
|
|
If only one is enough, use `multistream_full`.')
|
|
|
|
|
|
self.n_output_maps = n_output_maps
|
|
self.n_output_fixs = n_output_fixs
|
|
self.fix_as_mat = fix_as_mat
|
|
self.fix_key = fix_key
|
|
self.pad_gt_maps = pad_gt_maps
|
|
|
|
self.img_filenames = np.array(img_filenames)
|
|
self.read_npy = read_npy
|
|
|
|
# check that maps make sense
|
|
if map_filenames is not None:
|
|
self.map_filenames = np.array(map_filenames)
|
|
assert all([len(self.img_filenames) == len(elt) for elt in self.map_filenames]), "Mismatch between images and maps. Images size: " + self.img_filenames.shape.__str__() + " Maps size: " + self.map_filenames.shape.__str__()
|
|
self.timesteps = len(map_filenames)
|
|
else:
|
|
self.n_output_maps = 0
|
|
self.map_filenames = None
|
|
print('Warning: No maps filenames provided, no outputs of that kind will be generated')
|
|
|
|
|
|
# check that fixs make sense
|
|
if fix_filenames is not None:
|
|
self.fix_filenames = np.array(fix_filenames)
|
|
assert all([len(self.img_filenames) == len(elt) for elt in self.fix_filenames]), "Mismatch between images and fixations. Images size: " + self.img_filenames.shape.__str__() + " Fix size: " + self.fix_filenames.shape.__str__()
|
|
self.timesteps = len(fix_filenames)
|
|
else:
|
|
self.n_output_fixs = 0
|
|
self.fix_filenames = None
|
|
print('Warning: No fix filenames provided, no outputs of that kind will be generated')
|
|
|
|
self.batch_size = batch_size
|
|
self.img_size = img_size
|
|
self.map_size = map_size
|
|
self.shuffle = shuffle
|
|
self.augment = augment
|
|
self.mode = mode
|
|
self.return_names = return_names
|
|
|
|
# Defining augmentation sequence
|
|
if augment:
|
|
sometimes = lambda aug: iaa.Sometimes(0.4, aug)
|
|
self.seq = iaa.Sequential([
|
|
sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
|
|
iaa.Fliplr(0.5), # horizontally flip 50% of the images
|
|
sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
|
|
sometimes(iaa.Affine(rotate=(-15, 15)))
|
|
], random_order=True)
|
|
|
|
|
|
if shuffle:
|
|
self.on_epoch_end()
|
|
|
|
def __len__(self):
|
|
return int(np.ceil(len(self.img_filenames) / float(self.batch_size)))
|
|
|
|
def __getitem__(self, idx):
|
|
|
|
# Get input images
|
|
batch_imgs = self.img_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
|
|
if self.read_npy:
|
|
images = load_images(batch_imgs)
|
|
else:
|
|
images = preprocess_images(batch_imgs, self.img_size[0], self.img_size[1])
|
|
|
|
# Get ground truth maps for all times
|
|
if self.n_output_maps>=1:
|
|
maps = []
|
|
for t in range(self.timesteps):
|
|
maps_names_t = self.map_filenames[t][idx * self.batch_size : (idx + 1) * self.batch_size]
|
|
|
|
if self.read_npy:
|
|
maps_t = load_maps(maps_names_t)
|
|
else:
|
|
maps_t = preprocess_maps(maps_names_t, self.map_size[0], self.map_size[1], pad=self.pad_gt_maps)
|
|
|
|
maps.append(maps_t)
|
|
|
|
# Get fix maps for all times
|
|
if self.n_output_fixs>=1:
|
|
fixs = []
|
|
for t in range(self.timesteps):
|
|
fix_names_t = self.fix_filenames[t][idx * self.batch_size : (idx + 1) * self.batch_size]
|
|
|
|
if self.read_npy:
|
|
fix_t = load_images(fix_names_t)
|
|
else:
|
|
fix_t = preprocess_fixmaps(fix_names_t, self.map_size[0], self.map_size[1], fix_as_mat=self.fix_as_mat, fix_key=self.fix_key, pad=self.pad_gt_maps)
|
|
|
|
fixs.append(fix_t)
|
|
|
|
if self.augment:
|
|
seq_det = self.seq.to_deterministic()
|
|
images = seq_det.augment_images(images)
|
|
for ta in range(len(maps)):
|
|
if self.n_output_maps>=1:
|
|
maps[ta] = seq_det.augment_heatmaps(maps[ta])
|
|
if self.n_output_fixs>=1:
|
|
fixs[ta] = seq_det.augment_heatmaps(fixs[ta])
|
|
|
|
if self.mode == 'singlestream':
|
|
# Returns a list of n_output_maps+n_output_fixs elements. Each element is a 5D tensor: (bs, timesteps, r, c, 1)
|
|
outs = []
|
|
if self.n_output_maps>=1:
|
|
maps_with_time = np.zeros((len(batch_imgs),self.timesteps,self.map_size[0],self.map_size[1],1))
|
|
for i in range(self.timesteps):
|
|
maps_with_time[:,i,...] = maps[i]
|
|
|
|
# new version of block above that handles images of varying size
|
|
# maps_with_time = []
|
|
# for bidx in range(self.batch_size):
|
|
# # maps_with_time is list of len batch_size with 3D tensors of shape t,w,h
|
|
# maps_with_time.append( [maps[ti][bidx] for ti in range(self.timesteps)] )
|
|
|
|
|
|
outs.extend([maps_with_time]*self.n_output_maps)
|
|
|
|
|
|
if self.n_output_fixs>=1:
|
|
fixs_with_time = np.zeros((len(batch_imgs),self.timesteps,self.map_size[0],self.map_size[1],1))
|
|
for i in range(self.timesteps):
|
|
fixs_with_time[:,i,...] = fixs[i]
|
|
|
|
# new version of block above that handles images of varying size
|
|
# fixs_with_time = []
|
|
# for bidx in range(self.batch_size):
|
|
# # fixs_with_time is list of len batch_size with 3D tensors of shape t,w,h
|
|
# fixs_with_time.append( np.array([fixs[ti][bidx] for ti in range(self.timesteps)]) )
|
|
|
|
outs.extend([fixs_with_time]*self.n_output_fixs)
|
|
|
|
elif self.mode == 'multistream_concat':
|
|
# returns a list of t elements: [ [maps_t1,fix_t1], [maps_t2,fix_t2] , [maps_t3,fix_t3], ...]
|
|
outs=[]
|
|
for i in range(self.timesteps):
|
|
outs.append(np.concatenate([np.expand_dims(maps[i],axis=1),np.expand_dims(fixs[i],axis=1)], axis=1))
|
|
# print('len(outs) multistream concat:',len(outs))
|
|
|
|
elif self.mode == 'multistream_full':
|
|
# returns a list of size timestep*losses. If 2 losses maps, 1 loss fix, 2 timesteps, we have: [m1, m1, m2, m2, fix1, fix2]
|
|
outs = []
|
|
if self.n_output_maps >= 1:
|
|
for i in range(self.timesteps):
|
|
outs.extend([maps[i]]*self.n_output_maps)
|
|
if self.n_output_fixs >= 1:
|
|
for i in range(self.timesteps):
|
|
outs.extend([fixs[i]]*self.n_output_fixs)
|
|
|
|
if self.return_names:
|
|
return images, outs, batch_imgs
|
|
return images, outs
|
|
|
|
|
|
def on_epoch_end(self):
|
|
if self.shuffle:
|
|
idxs = list(range(len(self.img_filenames)))
|
|
np.random.shuffle(idxs)
|
|
self.img_filenames = self.img_filenames[idxs]
|
|
for i in range(len(self.map_filenames)):
|
|
self.map_filenames[i] = self.map_filenames[i][idxs]
|
|
if self.fix_filenames is not None:
|
|
self.fix_filenames[i] = self.fix_filenames[i][idxs]
|
|
|
|
class SalImpGenerator(Sequence):
|
|
|
|
def __init__(
|
|
self,
|
|
img_filenames,
|
|
imp_filenames,
|
|
fix_filenames=None,
|
|
batch_size=1,
|
|
img_size=(shape_r,shape_c),
|
|
map_size=(shape_r_out, shape_c_out),
|
|
shuffle=True,
|
|
augment=False,
|
|
n_output_maps=1,
|
|
concat_fix_and_maps=True,
|
|
fix_as_mat=False,
|
|
fix_key="",
|
|
pad_maps=True,
|
|
pad_imgs=True,
|
|
read_npy=False,
|
|
return_names=False):
|
|
|
|
print('Instantiating SalImpGenerator. Number of files received: %d. Batch size: %d. Image size: %s. Map size: %s. Augmentation: %d, Pad_imgs: %s. Pad_maps: %s.' %
|
|
(len(img_filenames), batch_size, str(img_size), str(map_size), augment, pad_imgs, pad_maps ))
|
|
|
|
self.img_filenames = np.array(img_filenames)
|
|
self.imp_filenames = np.array(imp_filenames)
|
|
self.batch_size = batch_size
|
|
self.img_size = img_size
|
|
self.map_size = map_size
|
|
self.shuffle = shuffle
|
|
self.augment = augment
|
|
self.n_output_maps = n_output_maps
|
|
self.concat_fix_and_maps = concat_fix_and_maps
|
|
self.fix_as_mat=fix_as_mat
|
|
self.fix_key = fix_key
|
|
self.pad_imgs = pad_imgs
|
|
self.pad_maps = pad_maps
|
|
self.return_names=return_names
|
|
self.read_npy = read_npy
|
|
|
|
if fix_filenames is not None:
|
|
self.fix_filenames = np.array(fix_filenames)
|
|
else:
|
|
self.fix_filenames = None
|
|
|
|
if augment:
|
|
sometimes = lambda aug: iaa.Sometimes(0.4, aug)
|
|
self.seq = iaa.Sequential([
|
|
sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
|
|
iaa.Fliplr(0.5), # horizontally flip 50% of the images
|
|
sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
|
|
sometimes(iaa.Affine(rotate=(-15, 15)))
|
|
], random_order=True)
|
|
|
|
|
|
if shuffle:
|
|
self.on_epoch_end()
|
|
|
|
def __len__(self):
|
|
return int(np.ceil(len(self.img_filenames) / float(self.batch_size)))
|
|
|
|
def __getitem__(self, idx):
|
|
batch_x = self.img_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
|
|
batch_y = self.imp_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
|
|
|
|
# print('img names in this batch:', batch_x)
|
|
# print('imp names in this batch:', batch_y)
|
|
|
|
|
|
if self.read_npy:
|
|
images = load_images(batch_x)
|
|
maps = load_maps(batch_y)
|
|
else:
|
|
images = preprocess_images(batch_x, self.img_size[0], self.img_size[1], pad =self.pad_imgs)
|
|
maps = preprocess_maps(batch_y, self.map_size[0], self.map_size[1], pad =self.pad_maps)
|
|
|
|
if self.fix_filenames is not None:
|
|
if self.read_npy:
|
|
fixs = load_images(self.fix_filenames[idx * self.batch_size : (idx + 1) * self.batch_size])
|
|
else:
|
|
fixs = preprocess_fixmaps(
|
|
self.fix_filenames[idx * self.batch_size : (idx + 1) * self.batch_size],
|
|
self.map_size[0],
|
|
self.map_size[1],
|
|
fix_as_mat=self.fix_as_mat,
|
|
fix_key=self.fix_key)
|
|
|
|
if self.augment:
|
|
seq_det = self.seq.to_deterministic()
|
|
images = seq_det.augment_images(images)
|
|
maps = seq_det.augment_heatmaps(maps)
|
|
if self.fix_filenames is not None:
|
|
fixs = seq_det.augment_heatmaps(fixs)
|
|
|
|
if self.fix_filenames is not None and self.concat_fix_and_maps:
|
|
outs = np.concatenate([np.expand_dims(maps,axis=1),np.expand_dims(fixs,axis=1)], axis=1)
|
|
if self.n_output_maps >1:
|
|
outs = [outs]*self.n_output_maps
|
|
else:
|
|
if self.n_output_maps ==1:
|
|
if self.fix_filenames is not None:
|
|
outs=[maps,fixs]
|
|
else:
|
|
outs=maps
|
|
else:
|
|
outs = [maps]*self.n_output_maps
|
|
if self.fix_filenames is not None:
|
|
outs.append(fixs)
|
|
|
|
# print('generator: len(outs) should be 3:', len(outs))
|
|
# print('generator: outs[0].shape (should be bs,2,r,c,1):', outs[0].shape)
|
|
# print('generator: outs[0][0][0].shape (should be first map of batch)',outs[0][0][0].shape)
|
|
if self.return_names:
|
|
return images, outs, batch_x
|
|
return images, outs
|
|
|
|
|
|
def on_epoch_end(self):
|
|
if self.shuffle:
|
|
idxs = list(range(len(self.img_filenames)))
|
|
np.random.shuffle(idxs)
|
|
self.img_filenames = self.img_filenames[idxs]
|
|
self.imp_filenames = self.imp_filenames[idxs]
|
|
if self.fix_filenames is not None:
|
|
self.fix_filenames = self.fix_filenames[idxs]
|
|
|
|
def eval_generator(
|
|
img_filenames,
|
|
map_filenames,
|
|
fixmap_filenames,
|
|
fixcoord_filenames,
|
|
inp_size,
|
|
fix_as_mat=False,
|
|
fix_key="",
|
|
fixcoord_filetype='mat',
|
|
):
|
|
"""
|
|
Returns tuples img, heatmap, fixmap, fix_coords to be used for data eval
|
|
|
|
img_filenames, map_filesnames, fixmap_filenames should a length-n list where
|
|
n is the number of timestamps
|
|
|
|
heatmap, fixmap, fixcoords are all also length-n
|
|
|
|
"""
|
|
assert len(map_filenames) == len(fixmap_filenames)
|
|
n_times = len(map_filenames)
|
|
n_img = len(map_filenames[0])
|
|
for i in range(n_img):
|
|
imgs = []
|
|
maps = []
|
|
fixmaps = []
|
|
fixcoords = []
|
|
#img = load_images([img_filenames[i]])
|
|
img = preprocess_images([img_filenames[i]], inp_size[0], inp_size[1])
|
|
|
|
for t in range(n_times):
|
|
# load the image
|
|
#img = cv2.imread(img_filenames[i])
|
|
map_ = cv2.imread(map_filenames[t][i], cv2.IMREAD_GRAYSCALE)
|
|
# print("map max min", map_.max(), map_.min())
|
|
mapshape = map_.shape
|
|
if fix_as_mat:
|
|
# fixmap = load_images([fixmap_filenames[t][i]],)
|
|
fixmap = preprocess_fixmaps(
|
|
[fixmap_filenames[t][i]],
|
|
mapshape[0],
|
|
mapshape[1],
|
|
fix_as_mat=fix_as_mat,
|
|
fix_key=fix_key)
|
|
fixmap = np.squeeze(fixmap)
|
|
else:
|
|
fixmap = cv2.imread(fixmap_filenames[t][i], 0)
|
|
if fixcoord_filenames:
|
|
assert len(fixcoord_filenames) == n_times
|
|
if fixcoord_filetype == 'mat':
|
|
fixdata = scipy.io.loadmat(fixcoord_filenames[t][i])
|
|
resolution = fixdata["resolution"][0]
|
|
#assert resolution[0] == img.shape[1] and resolution[1] == img.shape[2]
|
|
fix_coords_all_participants = fixdata["gaze"]
|
|
all_fixations = []
|
|
for participant in fix_coords_all_participants:
|
|
all_fixations.extend(participant[0][2])
|
|
else:
|
|
raise RuntimeError("fixcoord filetype %s is unsupported" % fixcoord_filetype)
|
|
else:
|
|
all_fixations = None
|
|
imgs.append(img)
|
|
maps.append(map_)
|
|
fixmaps.append(fixmap)
|
|
fixcoords.append(all_fixations)
|
|
yield (imgs, maps, fixmaps, fixcoords, img_filenames[i])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_str2label(dataset_path, label_mapping_file=None):
|
|
str2label={}
|
|
if label_mapping_file:
|
|
with open(label_mapping_file, "r") as f:
|
|
lines = [l.strip() for l in f.readlines()]
|
|
for l in lines:
|
|
cl = l.split()[0]
|
|
i = l.split()[-1]
|
|
str2label[cl] = int(i)
|
|
else:
|
|
for i,cl in enumerate([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]):
|
|
str2label[cl] = i
|
|
return str2label
|
|
|
|
def get_labels(filenames, str2label=None):
|
|
|
|
if not str2label:
|
|
str2label = get_str2label(dataset_path = '/netpool/homes/wangyo/Dataset/imp1k/imgs', label_mapping_file = "/netpool/homes/wangyo/Dataset/imp1k/imp1k_with_nat_images_label_map.txt")
|
|
|
|
onehot_arr = np.zeros((len(filenames), len(str2label)))
|
|
# print('filenames in get labels',filenames)
|
|
|
|
for i,f in enumerate(filenames):
|
|
split = re.split('/|\\\\',f)
|
|
class_name = split[-2]
|
|
if split[-4] == 'Salicon':
|
|
label = str2label['natural_images']
|
|
onehot_arr[i, label] = 1
|
|
else:
|
|
# print('CLASS NAME IN GET_LABELS', class_name)
|
|
label = str2label[class_name]
|
|
onehot_arr[i, label] = 1
|
|
return onehot_arr
|
|
|
|
|
|
class ImpAndClassifGenerator(Sequence):
|
|
|
|
def __init__(
|
|
self,
|
|
img_filenames,
|
|
imp_filenames,
|
|
fix_filenames=None,
|
|
extra_imgs=None, # For feeding a much larger dataset, e.g. salicon, that the generator will subsample to maintain class balance
|
|
extra_imps=None,
|
|
extra_fixs=None,
|
|
extras_per_epoch=160,
|
|
batch_size=1,
|
|
img_size=(shape_r,shape_c),
|
|
map_size=(shape_r_out, shape_c_out),
|
|
shuffle=True,
|
|
augment=False,
|
|
n_output_maps=1,
|
|
concat_fix_and_maps=True,
|
|
fix_as_mat=False,
|
|
fix_key="",
|
|
str2label=None,
|
|
dummy_labels=False,
|
|
num_classes=6,
|
|
pad_imgs=True,
|
|
pad_maps=True,
|
|
return_names=False,
|
|
return_labels=True,
|
|
read_npy=False):
|
|
|
|
print('Instantiating ImpAndClassifGenerator. Number of files received: %d. Extras: %s. Batch size: %d. Image size: %s. Map size: %s. Augmentation: %d, Pad_imgs: %s. Pad_maps: %s. Num classes: %d.' %
|
|
(len(img_filenames), len(extra_imgs) if extra_imgs is not None else None, batch_size, str(img_size), str(map_size), augment, pad_imgs, pad_maps, num_classes ))
|
|
|
|
self.img_filenames = np.array(img_filenames)
|
|
self.imp_filenames = np.array(imp_filenames)
|
|
self.batch_size = batch_size
|
|
self.img_size = img_size
|
|
self.map_size = map_size
|
|
self.shuffle = shuffle
|
|
self.augment = augment
|
|
self.n_output_maps = n_output_maps
|
|
self.concat_fix_and_maps = concat_fix_and_maps
|
|
self.fix_as_mat = fix_as_mat
|
|
self.fix_key = fix_key
|
|
self.str2label = str2label
|
|
self.num_classes = num_classes
|
|
self.dummy_labels = dummy_labels
|
|
self.pad_imgs = pad_imgs
|
|
self.pad_maps = pad_maps
|
|
self.extra_idx = 0
|
|
self.extra_imgs = np.array(extra_imgs) if extra_imgs is not None else None
|
|
self.extra_imps = np.array(extra_imps) if extra_imps is not None else None
|
|
self.extra_fixs = np.array(extra_fixs) if extra_fixs is not None else None
|
|
self.extras_per_epoch = extras_per_epoch
|
|
self.return_names = return_names
|
|
self.return_labels=return_labels
|
|
self.read_npy=read_npy
|
|
|
|
if fix_filenames is not None:
|
|
self.fix_filenames = np.array(fix_filenames)
|
|
else:
|
|
self.fix_filenames = None
|
|
|
|
if augment:
|
|
sometimes = lambda aug: iaa.Sometimes(0.4, aug)
|
|
self.seq = iaa.Sequential([
|
|
sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
|
|
iaa.Fliplr(0.5), # horizontally flip 50% of the images
|
|
sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
|
|
sometimes(iaa.Affine(rotate=(-15, 15)))
|
|
], random_order=True)
|
|
|
|
|
|
self.on_epoch_end()
|
|
|
|
def __len__(self):
|
|
return int(np.ceil(len(self.imgs_this_epoch) / float(self.batch_size)))
|
|
|
|
def __getitem__(self, idx):
|
|
batch_x = self.imgs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size]
|
|
batch_y = self.imps_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size]
|
|
|
|
# print('img names in this batch:', batch_x)
|
|
# print('imp names in this batch:', batch_y)
|
|
|
|
if self.read_npy:
|
|
images = load_images(batch_x)
|
|
maps = load_maps(batch_y)
|
|
else:
|
|
images = preprocess_images(batch_x, self.img_size[0], self.img_size[1], pad= self.pad_imgs)
|
|
maps = preprocess_maps(batch_y, self.map_size[0], self.map_size[1], pad=self.pad_maps)
|
|
|
|
|
|
if not self.dummy_labels:
|
|
labels = get_labels(batch_x, self.str2label) # Returns a numpy array of shape (bs, num_classes)
|
|
else:
|
|
labels = np.zeros((len(images),self.num_classes))
|
|
if self.fix_filenames is not None:
|
|
if read_npy:
|
|
fixs = load_images(self.fixs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size])
|
|
else:
|
|
fixs = preprocess_fixmaps(
|
|
self.fixs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size],
|
|
self.map_size[0],
|
|
self.map_size[1],
|
|
fix_as_mat=self.fix_as_mat,
|
|
fix_key=self.fix_key)
|
|
|
|
if self.augment:
|
|
seq_det = self.seq.to_deterministic()
|
|
images = seq_det.augment_images(images)
|
|
maps = seq_det.augment_heatmaps(maps)
|
|
if self.fixs_this_epoch is not None:
|
|
fixs = seq_det.augment_heatmaps(fixs)
|
|
|
|
if self.fix_filenames is not None and self.concat_fix_and_maps:
|
|
# outs = np.concatenate([np.expand_dims(maps,axis=1),np.expand_dims(fixs,axis=1)], axis=1)
|
|
if self.n_output_maps >1:
|
|
outs = [outs]*self.n_output_maps
|
|
if self.return_labels: outs.append(labels)
|
|
else:
|
|
if self.n_output_maps ==1:
|
|
if self.fix_filenames is not None:
|
|
outs=[maps,fixs]
|
|
if self.return_labels: outs.append(labels)
|
|
else:
|
|
outs=[maps]
|
|
if self.return_labels: outs.append(labels)
|
|
else:
|
|
outs = [maps]*self.n_output_maps
|
|
if self.fix_filenames is not None:
|
|
outs.append(fixs)
|
|
if self.return_labels: outs.append(labels)
|
|
if self.return_names:
|
|
outs.append(batch_x)
|
|
|
|
return images, outs
|
|
|
|
|
|
def on_epoch_end(self):
|
|
|
|
if self.extra_imgs is not None:
|
|
# Sample a new set of extra images
|
|
extra_imgs_this_epoch = self.extra_imgs[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
|
|
extra_imps_this_epoch = self.extra_imps[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
|
|
if self.extra_fixs is not None:
|
|
extra_fixs_this_epoch = self.extra_fixs[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
|
|
else:
|
|
extra_fixs_this_epoch = []
|
|
self.extra_idx +=1
|
|
|
|
else:
|
|
extra_imgs_this_epoch = []
|
|
extra_imps_this_epoch = []
|
|
extra_fixs_this_epoch = []
|
|
|
|
self.imgs_this_epoch = np.concatenate([self.img_filenames, extra_imgs_this_epoch])
|
|
self.imps_this_epoch = np.concatenate([self.imp_filenames, extra_imps_this_epoch])
|
|
if self.fix_filenames is not None:
|
|
self.fixs_this_epoch = np.concatenate([self.fix_filenames, extra_fixs_this_epoch])
|
|
|
|
idxs = np.array(range(len(self.imgs_this_epoch)))
|
|
if self.shuffle:
|
|
np.random.shuffle(idxs)
|
|
self.imgs_this_epoch = self.imgs_this_epoch[idxs]
|
|
self.imps_this_epoch = self.imps_this_epoch[idxs]
|
|
if self.fix_filenames is not None:
|
|
self.fixs_this_epoch = self.fixs_this_epoch[idxs]
|
|
|
|
def UMSI_eval_generator(
|
|
img_filenames,
|
|
map_filenames,
|
|
inp_size,
|
|
|
|
):
|
|
"""
|
|
Returns tuples img, heatmap to be used for data eval
|
|
|
|
"""
|
|
n_img = len(map_filenames[0])
|
|
for i in range(n_img):
|
|
imgs = []
|
|
maps = []
|
|
img = preprocess_images([img_filenames[i]], inp_size[0], inp_size[1])
|
|
map_ = cv2.imread(map_filenames[i], cv2.IMREAD_GRAYSCALE)
|
|
|
|
mapshape = map_.shape
|
|
imgs.append(img)
|
|
maps.append(map_)
|
|
|
|
yield (imgs, maps, img_filenames[i])
|
|
|
|
|
|
class RecallNet_Generator(Sequence) :
|
|
def __init__(self, image_filenames, labels, mean_accs, type0_accs, batch_size=8) :
|
|
self.image_filenames = np.array(image_filenames)
|
|
self.labels = np.array(labels)
|
|
self.mean_accs = np.array(mean_accs)
|
|
self.type0_accs = np.array(type0_accs)
|
|
self.batch_size = batch_size
|
|
def __len__(self) :
|
|
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
|
|
|
|
def __getitem__(self, idx) :
|
|
batch_label = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
|
|
batch_img = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
|
|
batch_img = preprocess_images(batch_img,240,320)
|
|
batch_mean = self.mean_accs[idx * self.batch_size : (idx+1) * self.batch_size]
|
|
batch_type0 = self.type0_accs[idx * self.batch_size : (idx+1) * self.batch_size]
|
|
#out = [batch_mean,batch_type0,batch_label]
|
|
out = [batch_mean,batch_type0]
|
|
return batch_img, out |