visrecall/RecallNet/src/sal_imp_utilities.py

import numpy as np
import keras
import matplotlib.pyplot as plt
import sys
import os
from keras.layers import Input, TimeDistributed, Lambda, Conv2D, MaxPooling2D, UpSampling2D, Concatenate
import keras.backend as K
from keras.models import Model
import tensorflow as tf
from keras.utils import Sequence
from keras.optimizers import Adam, RMSprop, SGD
import cv2
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from PIL import Image
from IPython.display import clear_output
import scipy.io
from copy import deepcopy
import re

# DEBUG
DEBUG = False

# number of rows of input images
cat2000_c = 1920
cat2000_r = 1080
#cat2000_r_out = 1088 # this is divisible by 16
cat2000_r_out = 1104 # divible by 48
cat2000_c_out = cat2000_c # already divisible by 16

cc_c = 300
cc_r = 225
cc_c_out = 1776
cc_r_out = 1344

#shape_r = int(cat2000_r/6)
shape_r = 240
#shape_r = cc_r
# number of cols of input images
#shape_c = int(cat2000_c/6)
shape_c = 320
#shape_c = cc_c
# number of rows of downsampled maps
shape_r_gt = 30
# number of cols of downsampled maps
shape_c_gt = 40
# number of rows of model outputs
#shape_r_out = cat2000_r_out
shape_r_out = 480
#shape_r_out = cc_r_out
# number of cols of model outputs
#shape_c_out = cat2000_c_out
shape_c_out = 640
#shape_c_out = cc_c_out
# final upsampling factor
upsampling_factor = 16
# number of epochs
nb_epoch = 50
# number of timesteps
nb_timestep = 3
# number of learned priors
nb_gaussian = 16

def repeat(x):
    return K.repeat_elements(K.expand_dims(x,axis=1), nb_timestep, axis=1)
#     return K.reshape(K.repeat(K.batch_flatten(x), nb_timestep), (1, nb_timestep, shape_r_gt, shape_c_gt, 512))

def repeat_shape(s):
    return (s[0], nb_timestep) + s[1:]


def padding(img, shape_r, shape_c, channels=3):
    img_padded = np.zeros((shape_r, shape_c, channels), dtype=np.uint8)
    if channels == 1:
        img_padded = np.zeros((shape_r, shape_c), dtype=np.uint8)

    original_shape = img.shape
    rows_rate = original_shape[0]/shape_r
    cols_rate = original_shape[1]/shape_c

    if rows_rate > cols_rate:
        new_cols = (original_shape[1] * shape_r) // original_shape[0]
        img = cv2.resize(img, (new_cols, shape_r))
        if new_cols > shape_c:
            new_cols = shape_c
        img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols)] = img
    else:
        new_rows = (original_shape[0] * shape_c) // original_shape[1]
        img = cv2.resize(img, (shape_c, new_rows))
        if new_rows > shape_r:
            new_rows = shape_r
        img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img

    return img_padded

def resize_fixation(img, rows=480, cols=640):
    out = np.zeros((rows, cols))
    factor_scale_r = rows / img.shape[0]
    factor_scale_c = cols / img.shape[1]

    coords = np.argwhere(img)
    for coord in coords:
        r = int(np.round(coord[0]*factor_scale_r))
        c = int(np.round(coord[1]*factor_scale_c))
        if r == rows:
            r -= 1
        if c == cols:
            c -= 1
        out[r, c] = 1

    return out

def padding_fixation(img, shape_r, shape_c):
    img_padded = np.zeros((shape_r, shape_c))

    original_shape = img.shape
    rows_rate = original_shape[0]/shape_r
    cols_rate = original_shape[1]/shape_c

    if rows_rate > cols_rate:
        new_cols = (original_shape[1] * shape_r) // original_shape[0]
        img = resize_fixation(img, rows=shape_r, cols=new_cols)
        if new_cols > shape_c:
            new_cols = shape_c
        img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols)] = img
    else:
        new_rows = (original_shape[0] * shape_c) // original_shape[1]
        img = resize_fixation(img, rows=new_rows, cols=shape_c)
        if new_rows > shape_r:
            new_rows = shape_r
        img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img

    return img_padded

def preprocess_fixmaps(paths, shape_r, shape_c, fix_as_mat=False, fix_key="", pad=True):

    if pad:
        ims = np.zeros((len(paths), shape_r, shape_c, 1))
    else:
        ims = []

#     print('ims.shape:',ims.shape)
    for i, path in enumerate(paths):
        if path == 'dummy':
            fix_map = np.zeros((480,640))
        elif fix_as_mat:
            mat = scipy.io.loadmat(path)
            if DEBUG:
                print('mat',mat)
            fix_map = mat[fix_key]
        else:
            fix_map = cv2.imread(path, 0)

        if DEBUG:
            print('fix_map shape, np.max(fix_map),np.min(fix_map),np.mean(fix_map)',fix_map.shape,np.max(fix_map),np.min(fix_map),np.mean(fix_map))
        if pad:
            ims[i, :, :, 0] = padding_fixation(fix_map, shape_r=shape_r, shape_c=shape_c)
        else:
            ims.append(fix_map)
#             ims = np.array(ims)
        # print('ims[-1].shape:',ims[-1].shape)
    return ims

def load_maps(paths):
    ims = []

    for i, path in enumerate(paths):
        original_map = np.load(path, allow_pickle=True)
        # TODO: chect for /255.0
        ims.append(original_map.astype(np.float32))
    ims = np.array(ims)
    # print('load_maps: ims[-1].shape',ims[-1].shape)
    return ims

def preprocess_maps(paths, shape_r, shape_c, pad=True):
    if pad:
        ims = np.zeros((len(paths), shape_r, shape_c, 1))
    else:
        ims = []
    for i, path in enumerate(paths):
        original_map = cv2.imread(path, 0)
        if pad:
            padded_map = padding(original_map, shape_r, shape_c, 1)
            ims[i,:,:, 0] = padded_map.astype(np.float32)
            ims[i,:,:, 0] /= 255.0
        else:
            ims.append(original_map.astype(np.float32)/255.0)
#             ims = np.array(ims)
            # print('ims.shape in preprocess_maps',ims.shape)
    # print('prep_maps: ims[-1].shape',ims[-1].shape)
    return ims

def load_images(paths):
    ims =[]

    for i, path in enumerate(paths):
        img = np.load(path, allow_pickle=True)
        ims.append(img)
    
    ims = np.array(ims)
    # print('load_images: ims.shape',np.array(ims).shape)
    return ims

def preprocess_images(paths, shape_r, shape_c, pad=True):
    if pad:
        ims = np.zeros((len(paths), shape_r, shape_c, 3))
    else:
        ims =[]

    for i, path in enumerate(paths):
        original_image = cv2.imread(path)
        if original_image is None:
            raise ValueError('Path unreadable: %s' % path)
        if pad:
            padded_image = padding(original_image, shape_r, shape_c, 3)
            ims[i] = padded_image
        else:
            original_image = original_image.astype(np.float32)
            original_image[..., 0] -= 103.939
            original_image[..., 1] -= 116.779
            original_image[..., 2] -= 123.68
            ims.append(original_image)
#             ims = np.array(ims)
            print('ims.shape in preprocess_imgs',ims.shape)

        # DEBUG
#     plt.figure()
#     plt.subplot(1,2,1)
#     plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
#     plt.subplot(1,2,2)
#     plt.imshow(cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB))
#     plt.suptitle(path)

    if pad:
        ims[:, :, :, 0] -= 103.939
        ims[:, :, :, 1] -= 116.779
        ims[:, :, :, 2] -= 123.68

    return ims

def reverse_preprocess(img):
    im = deepcopy(img)

    im[:, :, 0] += 103.939
    im[:, :, 1] += 116.779
    im[:, :, 2] += 123.68

#     print(np.max(im), np.min(im), type(im[0][0][0]))

    im = im[...,::-1]


    im = np.array(im, dtype=np.uint8)

    return im


def postprocess_predictions(pred, shape_r, shape_c, blur=False, normalize=False, zero_to_255 = False):
    predictions_shape = pred.shape
    rows_rate = shape_r / predictions_shape[0]
    cols_rate = shape_c / predictions_shape[1]

    # pred = pred / np.max(pred) * 255

#    print('Preparing to resize...')
    if blur:
        sigma=blur
        pred = scipy.ndimage.filters.gaussian_filter(pred, sigma=sigma)

    if rows_rate > cols_rate:
        new_cols = (predictions_shape[1] * shape_r) // predictions_shape[0]
        pred = cv2.resize(pred, (new_cols, shape_r))
        img = pred[:, ((pred.shape[1] - shape_c) // 2):((pred.shape[1] - shape_c) // 2 + shape_c)]
    else:
        new_rows = (predictions_shape[0] * shape_c) // predictions_shape[1]
        pred = cv2.resize(pred, (shape_c, new_rows))
        img = pred[((pred.shape[0] - shape_r) // 2):((pred.shape[0] - shape_r) // 2 + shape_r), :]


#    print('Resized')

    if normalize:
        img = img / np.max(img) * 255
    if zero_to_255:
        img = np.abs(img - 255)
    return img


class MultidurationGenerator(Sequence):
    def __init__(self, 
                 img_filenames, 
                 map_filenames=None, 
                 fix_filenames=None, 
                 batch_size=1, 
                 img_size=(shape_r,shape_c), 
                 map_size=(shape_r_out, shape_c_out),
                 shuffle=True, 
                 augment=False, 
                 n_output_maps=1, 
                 n_output_fixs=1, 
                 mode = 'multistream_concat', 
                 fix_key='',
                 return_names=False, 
                 fix_as_mat=False, 
                 pad_gt_maps=True,
                 read_npy=False
                 ):
        '''
        Generator for multi-duration saliency data. Receives lists of images, and t lists of heatmaps and fixations, where t
        is the number of saliency time steps to yield. The generator will automatically infer t from the length of map_filenames.

        This generator has 3 different modes:
        1. multistream_concat: concatenates fix and maps for a given timestep into one tensor of shape (bs, 2, r, c, 1). Then appends
        all these tensors in a list of size t, and yields that tensor as y_true. This mode is made to work with losses that recuperate the
        map and fixation by slicing the y_true tensor internally.

        2. multistream_full: doesn't concatenate the fix and maps; instead, yields all fixations and maps needed for each timestep as a
        different element in the final output list. For example, if we are training with 3 losses and 2 timesteps, this generator will
        yield a list of length 6 as y_true output: 3 maps/fis for timestep1, and 3 maps/fixs for timestep2.

        3. singlestream: concatenates all timesteps in one tensor. for each loss, the generator will yield a tensor of shape
        (bs, time, r, c, 1). If we are working with kl, cc and nss, for example, the generator will output a list of length 3,
        where each element is a tensor of the mentioned shape. This mode should be used with losses that are adapted to tensors with
        a time dimension.

        '''

        print('Instantiating MultidurationGenerator. \
        Number of files received: %d. Batch size: %d. \
        Image size: %s. Augmentation: %d. Mode: %s' \
        % (len(img_filenames), batch_size, str(img_size), augment,mode ))

        if (mode == 'multistream_concat') and (map_filenames is None or fix_filenames is None):
            print('Multistream concat can only be used when both fixations and maps are provided. \
            If only one is enough, use `multistream_full`.')


        self.n_output_maps = n_output_maps
        self.n_output_fixs = n_output_fixs
        self.fix_as_mat = fix_as_mat
        self.fix_key = fix_key
        self.pad_gt_maps = pad_gt_maps

        self.img_filenames = np.array(img_filenames)
        self.read_npy = read_npy

        # check that maps make sense
        if map_filenames is not None:
            self.map_filenames = np.array(map_filenames)
            assert all([len(self.img_filenames) == len(elt) for elt in self.map_filenames]), "Mismatch between images and maps. Images size: " + self.img_filenames.shape.__str__() + " Maps size: " + self.map_filenames.shape.__str__()
            self.timesteps = len(map_filenames)
        else:
            self.n_output_maps = 0
            self.map_filenames = None
            print('Warning: No maps filenames provided, no outputs of that kind will be generated')


        # check that fixs make sense
        if fix_filenames is not None:
            self.fix_filenames = np.array(fix_filenames)
            assert all([len(self.img_filenames) == len(elt) for elt in self.fix_filenames]), "Mismatch between images and fixations. Images size: " + self.img_filenames.shape.__str__() + " Fix size: " + self.fix_filenames.shape.__str__()
            self.timesteps = len(fix_filenames)
        else:
            self.n_output_fixs = 0
            self.fix_filenames = None
            print('Warning: No fix filenames provided, no outputs of that kind will be generated')

        self.batch_size = batch_size
        self.img_size = img_size
        self.map_size = map_size
        self.shuffle = shuffle
        self.augment = augment
        self.mode = mode
        self.return_names = return_names

        # Defining augmentation sequence
        if augment:
            sometimes = lambda aug: iaa.Sometimes(0.4, aug)
            self.seq = iaa.Sequential([
                    sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
                    iaa.Fliplr(0.5), # horizontally flip 50% of the images
                    sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
                    sometimes(iaa.Affine(rotate=(-15, 15)))
                ], random_order=True)


        if shuffle:
            self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.img_filenames) / float(self.batch_size)))

    def __getitem__(self, idx):

        # Get input images
        batch_imgs = self.img_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
        if self.read_npy:
            images = load_images(batch_imgs)
        else:
            images = preprocess_images(batch_imgs, self.img_size[0], self.img_size[1])

        # Get ground truth maps for all times
        if self.n_output_maps>=1:
            maps = []
            for t in range(self.timesteps):
                maps_names_t = self.map_filenames[t][idx * self.batch_size : (idx + 1) * self.batch_size]
                
                if self.read_npy:
                    maps_t = load_maps(maps_names_t)
                else:
                    maps_t = preprocess_maps(maps_names_t, self.map_size[0], self.map_size[1], pad=self.pad_gt_maps)

                maps.append(maps_t)

        # Get fix maps for all times
        if self.n_output_fixs>=1:
            fixs = []
            for t in range(self.timesteps):
                fix_names_t = self.fix_filenames[t][idx * self.batch_size : (idx + 1) * self.batch_size]
                
                if self.read_npy:
                    fix_t = load_images(fix_names_t)
                else:
                    fix_t = preprocess_fixmaps(fix_names_t, self.map_size[0], self.map_size[1], fix_as_mat=self.fix_as_mat, fix_key=self.fix_key, pad=self.pad_gt_maps)

                fixs.append(fix_t)

        if self.augment:
            seq_det = self.seq.to_deterministic()
            images = seq_det.augment_images(images)
            for ta in range(len(maps)):
                if self.n_output_maps>=1:
                    maps[ta] = seq_det.augment_heatmaps(maps[ta])
                if self.n_output_fixs>=1:
                    fixs[ta] = seq_det.augment_heatmaps(fixs[ta])

        if self.mode == 'singlestream':
            # Returns a list of n_output_maps+n_output_fixs elements. Each element is a 5D tensor: (bs, timesteps, r, c, 1)
            outs = []
            if self.n_output_maps>=1:
                maps_with_time = np.zeros((len(batch_imgs),self.timesteps,self.map_size[0],self.map_size[1],1))
                for i in range(self.timesteps):
                    maps_with_time[:,i,...] = maps[i]

                # new version of block above that handles images of varying size
#                 maps_with_time = []
#                 for bidx in range(self.batch_size):
#                     # maps_with_time is list of len batch_size with 3D tensors of shape t,w,h
#                     maps_with_time.append( [maps[ti][bidx] for ti in range(self.timesteps)] )
                
                
                outs.extend([maps_with_time]*self.n_output_maps)


            if self.n_output_fixs>=1:
                fixs_with_time = np.zeros((len(batch_imgs),self.timesteps,self.map_size[0],self.map_size[1],1))
                for i in range(self.timesteps):
                    fixs_with_time[:,i,...] = fixs[i]

                # new version of block above that handles images of varying size
#                 fixs_with_time = []
#                 for bidx in range(self.batch_size):
#                     # fixs_with_time is list of len batch_size with 3D tensors of shape t,w,h
#                     fixs_with_time.append( np.array([fixs[ti][bidx] for ti in range(self.timesteps)]) )                    
                    
                outs.extend([fixs_with_time]*self.n_output_fixs)

        elif self.mode == 'multistream_concat':
            # returns a list of t elements: [ [maps_t1,fix_t1], [maps_t2,fix_t2] , [maps_t3,fix_t3], ...]
            outs=[]
            for i in range(self.timesteps):
                outs.append(np.concatenate([np.expand_dims(maps[i],axis=1),np.expand_dims(fixs[i],axis=1)], axis=1))
                # print('len(outs) multistream concat:',len(outs))

        elif self.mode == 'multistream_full':
            # returns a list of size timestep*losses. If 2 losses maps, 1 loss fix, 2 timesteps, we have: [m1, m1, m2, m2, fix1, fix2]
            outs = []
            if self.n_output_maps >= 1:
                for i in range(self.timesteps):
                    outs.extend([maps[i]]*self.n_output_maps)
            if self.n_output_fixs >= 1:
                for i in range(self.timesteps):
                    outs.extend([fixs[i]]*self.n_output_fixs)

        if self.return_names:
            return images, outs, batch_imgs
        return images, outs


    def on_epoch_end(self):
        if self.shuffle:
            idxs = list(range(len(self.img_filenames)))
            np.random.shuffle(idxs)
            self.img_filenames = self.img_filenames[idxs]
            for i in range(len(self.map_filenames)):
                self.map_filenames[i] = self.map_filenames[i][idxs]
                if self.fix_filenames is not None:
                    self.fix_filenames[i] = self.fix_filenames[i][idxs]

class SalImpGenerator(Sequence):

    def __init__(
        self,
        img_filenames,
        imp_filenames,
        fix_filenames=None,
        batch_size=1,
        img_size=(shape_r,shape_c),
        map_size=(shape_r_out, shape_c_out),
        shuffle=True,
        augment=False,
        n_output_maps=1,
        concat_fix_and_maps=True,
        fix_as_mat=False,
        fix_key="",
        pad_maps=True,
        pad_imgs=True,
        read_npy=False,
        return_names=False):

        print('Instantiating SalImpGenerator. Number of files received: %d. Batch size: %d. Image size: %s. Map size: %s. Augmentation: %d, Pad_imgs: %s. Pad_maps: %s.' %
              (len(img_filenames), batch_size, str(img_size), str(map_size), augment, pad_imgs, pad_maps ))

        self.img_filenames = np.array(img_filenames)
        self.imp_filenames = np.array(imp_filenames)
        self.batch_size = batch_size
        self.img_size = img_size
        self.map_size = map_size
        self.shuffle = shuffle
        self.augment = augment
        self.n_output_maps = n_output_maps
        self.concat_fix_and_maps = concat_fix_and_maps
        self.fix_as_mat=fix_as_mat
        self.fix_key = fix_key
        self.pad_imgs = pad_imgs
        self.pad_maps = pad_maps
        self.return_names=return_names
        self.read_npy = read_npy

        if fix_filenames is not None:
            self.fix_filenames = np.array(fix_filenames)
        else:
            self.fix_filenames = None

        if augment:
            sometimes = lambda aug: iaa.Sometimes(0.4, aug)
            self.seq = iaa.Sequential([
                    sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
                    iaa.Fliplr(0.5), # horizontally flip 50% of the images
                    sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
                    sometimes(iaa.Affine(rotate=(-15, 15)))
                ], random_order=True)


        if shuffle:
            self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.img_filenames) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.img_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_y = self.imp_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]

#         print('img names in this batch:', batch_x)
#         print('imp names in this batch:', batch_y)

        
        if self.read_npy:
            images = load_images(batch_x)
            maps = load_maps(batch_y)
        else:
            images = preprocess_images(batch_x, self.img_size[0], self.img_size[1], pad =self.pad_imgs)
            maps = preprocess_maps(batch_y, self.map_size[0], self.map_size[1], pad =self.pad_maps)
        
        if self.fix_filenames is not None:
            if self.read_npy:
                fixs = load_images(self.fix_filenames[idx * self.batch_size : (idx + 1) * self.batch_size])
            else:
                fixs = preprocess_fixmaps(
                    self.fix_filenames[idx * self.batch_size : (idx + 1) * self.batch_size],
                    self.map_size[0],
                    self.map_size[1],
                    fix_as_mat=self.fix_as_mat,
                    fix_key=self.fix_key)
            
        if self.augment:
            seq_det = self.seq.to_deterministic()
            images = seq_det.augment_images(images)
            maps = seq_det.augment_heatmaps(maps)
            if self.fix_filenames is not None:
                fixs = seq_det.augment_heatmaps(fixs)

        if self.fix_filenames is not None and self.concat_fix_and_maps:
            outs = np.concatenate([np.expand_dims(maps,axis=1),np.expand_dims(fixs,axis=1)], axis=1)
            if self.n_output_maps >1:
                outs = [outs]*self.n_output_maps
        else:
            if self.n_output_maps ==1:
                if self.fix_filenames is not None:
                    outs=[maps,fixs]
                else:
                    outs=maps
            else:
                outs = [maps]*self.n_output_maps
                if self.fix_filenames is not None:
                    outs.append(fixs)

#         print('generator: len(outs) should be 3:', len(outs))
#         print('generator: outs[0].shape (should be bs,2,r,c,1):', outs[0].shape)
#         print('generator: outs[0][0][0].shape (should be first map of batch)',outs[0][0][0].shape)
        if self.return_names:
            return images, outs, batch_x
        return images, outs


    def on_epoch_end(self):
        if self.shuffle:
            idxs = list(range(len(self.img_filenames)))
            np.random.shuffle(idxs)
            self.img_filenames = self.img_filenames[idxs]
            self.imp_filenames = self.imp_filenames[idxs]
            if self.fix_filenames is not None:
                self.fix_filenames = self.fix_filenames[idxs]

def eval_generator(
    img_filenames,
    map_filenames,
    fixmap_filenames,
    fixcoord_filenames,
    inp_size,
    fix_as_mat=False,
    fix_key="",
    fixcoord_filetype='mat',
    ):
    """
        Returns tuples img, heatmap, fixmap, fix_coords to be used for data eval

        img_filenames, map_filesnames, fixmap_filenames should a length-n list where
        n is the number of timestamps

        heatmap, fixmap, fixcoords are all also length-n

    """
    assert len(map_filenames) == len(fixmap_filenames)
    n_times = len(map_filenames)
    n_img = len(map_filenames[0])
    for i in range(n_img):
        imgs = []
        maps = []
        fixmaps = []
        fixcoords = []
        #img = load_images([img_filenames[i]])
        img = preprocess_images([img_filenames[i]], inp_size[0], inp_size[1])

        for t in range(n_times):
            # load the image
            #img = cv2.imread(img_filenames[i])
            map_ = cv2.imread(map_filenames[t][i], cv2.IMREAD_GRAYSCALE)
#             print("map max min", map_.max(), map_.min())
            mapshape = map_.shape
            if fix_as_mat:
                # fixmap = load_images([fixmap_filenames[t][i]],)
                fixmap = preprocess_fixmaps(
                    [fixmap_filenames[t][i]],
                    mapshape[0],
                    mapshape[1],
                    fix_as_mat=fix_as_mat,
                    fix_key=fix_key)
                fixmap = np.squeeze(fixmap)
            else:
                fixmap = cv2.imread(fixmap_filenames[t][i], 0)
            if fixcoord_filenames:
                assert len(fixcoord_filenames) == n_times
                if fixcoord_filetype == 'mat':
                    fixdata = scipy.io.loadmat(fixcoord_filenames[t][i])
                    resolution = fixdata["resolution"][0]
                    #assert resolution[0] == img.shape[1] and resolution[1] == img.shape[2]
                    fix_coords_all_participants = fixdata["gaze"]
                    all_fixations = []
                    for participant in fix_coords_all_participants:
                        all_fixations.extend(participant[0][2])
                else:
                    raise RuntimeError("fixcoord filetype %s is unsupported" % fixcoord_filetype)
            else:
                all_fixations = None
            imgs.append(img)
            maps.append(map_)
            fixmaps.append(fixmap)
            fixcoords.append(all_fixations)
        yield (imgs, maps, fixmaps, fixcoords, img_filenames[i])


def get_str2label(dataset_path, label_mapping_file=None):
    str2label={}
    if label_mapping_file:
        with open(label_mapping_file, "r") as f:
            lines = [l.strip() for l in f.readlines()]
            for l in lines:
                cl = l.split()[0]
                i = l.split()[-1]
                str2label[cl] = int(i)
    else:
        for i,cl in enumerate([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]):
            str2label[cl] = i
    return str2label

def get_labels(filenames, str2label=None):

    if not str2label:
        str2label = get_str2label(dataset_path = '/netpool/homes/wangyo/Dataset/imp1k/imgs', label_mapping_file = "/netpool/homes/wangyo/Dataset/imp1k/imp1k_with_nat_images_label_map.txt")

    onehot_arr = np.zeros((len(filenames), len(str2label)))
#     print('filenames in get labels',filenames)

    for i,f in enumerate(filenames):
        split = re.split('/|\\\\',f)
        class_name = split[-2]
        if split[-4] == 'Salicon':
            label = str2label['natural_images']
            onehot_arr[i, label] = 1
        else:
    #         print('CLASS NAME IN GET_LABELS', class_name)
            label = str2label[class_name]
            onehot_arr[i, label] = 1
    return onehot_arr


class ImpAndClassifGenerator(Sequence):

    def __init__(
        self,
        img_filenames,
        imp_filenames,
        fix_filenames=None,
        extra_imgs=None, # For feeding a much larger dataset, e.g. salicon, that the generator will subsample to maintain class balance
        extra_imps=None,
        extra_fixs=None,
        extras_per_epoch=160,
        batch_size=1,
        img_size=(shape_r,shape_c),
        map_size=(shape_r_out, shape_c_out),
        shuffle=True,
        augment=False,
        n_output_maps=1,
        concat_fix_and_maps=True,
        fix_as_mat=False,
        fix_key="",
        str2label=None,
        dummy_labels=False,
        num_classes=6,
        pad_imgs=True,
        pad_maps=True,
        return_names=False,
        return_labels=True,
        read_npy=False):

        print('Instantiating ImpAndClassifGenerator. Number of files received: %d. Extras: %s. Batch size: %d. Image size: %s. Map size: %s. Augmentation: %d, Pad_imgs: %s. Pad_maps: %s. Num classes: %d.' %
              (len(img_filenames), len(extra_imgs) if extra_imgs is not None else None, batch_size, str(img_size), str(map_size), augment, pad_imgs, pad_maps, num_classes ))

        self.img_filenames = np.array(img_filenames)
        self.imp_filenames = np.array(imp_filenames)
        self.batch_size = batch_size
        self.img_size = img_size
        self.map_size = map_size
        self.shuffle = shuffle
        self.augment = augment
        self.n_output_maps = n_output_maps
        self.concat_fix_and_maps = concat_fix_and_maps
        self.fix_as_mat = fix_as_mat
        self.fix_key = fix_key
        self.str2label = str2label
        self.num_classes = num_classes
        self.dummy_labels = dummy_labels
        self.pad_imgs = pad_imgs
        self.pad_maps = pad_maps
        self.extra_idx = 0
        self.extra_imgs = np.array(extra_imgs) if extra_imgs is not None else None
        self.extra_imps = np.array(extra_imps) if extra_imps is not None else None
        self.extra_fixs = np.array(extra_fixs) if extra_fixs is not None else None
        self.extras_per_epoch = extras_per_epoch
        self.return_names = return_names
        self.return_labels=return_labels
        self.read_npy=read_npy

        if fix_filenames is not None:
            self.fix_filenames = np.array(fix_filenames)
        else:
            self.fix_filenames = None

        if augment:
            sometimes = lambda aug: iaa.Sometimes(0.4, aug)
            self.seq = iaa.Sequential([
                    sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
                    iaa.Fliplr(0.5), # horizontally flip 50% of the images
                    sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
                    sometimes(iaa.Affine(rotate=(-15, 15)))
                ], random_order=True)


        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.imgs_this_epoch) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.imgs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_y = self.imps_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size]

#         print('img names in this batch:', batch_x)
#         print('imp names in this batch:', batch_y)

        if self.read_npy:
            images = load_images(batch_x)
            maps = load_maps(batch_y)
        else:
            images = preprocess_images(batch_x, self.img_size[0], self.img_size[1], pad= self.pad_imgs)
            maps = preprocess_maps(batch_y, self.map_size[0], self.map_size[1], pad=self.pad_maps)
        

        if not self.dummy_labels:
            labels = get_labels(batch_x, self.str2label) # Returns a numpy array of shape (bs, num_classes)
        else:
            labels = np.zeros((len(images),self.num_classes))
        if self.fix_filenames is not None:
            if read_npy:
                fixs = load_images(self.fixs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size])
            else:
                fixs = preprocess_fixmaps(
                    self.fixs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size],
                    self.map_size[0],
                    self.map_size[1],
                    fix_as_mat=self.fix_as_mat,
                    fix_key=self.fix_key)
            
        if self.augment:
            seq_det = self.seq.to_deterministic()
            images = seq_det.augment_images(images)
            maps = seq_det.augment_heatmaps(maps)
            if self.fixs_this_epoch is not None:
                fixs = seq_det.augment_heatmaps(fixs)

        if self.fix_filenames is not None and self.concat_fix_and_maps:
#             outs = np.concatenate([np.expand_dims(maps,axis=1),np.expand_dims(fixs,axis=1)], axis=1)
            if self.n_output_maps >1:
                outs = [outs]*self.n_output_maps
            if self.return_labels: outs.append(labels)
        else:
            if self.n_output_maps ==1:
                if self.fix_filenames is not None:
                    outs=[maps,fixs]
                    if self.return_labels: outs.append(labels)
                else:
                    outs=[maps]
                    if self.return_labels: outs.append(labels)
            else:
                outs = [maps]*self.n_output_maps
                if self.fix_filenames is not None:
                    outs.append(fixs)
                if self.return_labels: outs.append(labels)
        if self.return_names:
           outs.append(batch_x)
        
        return images, outs


    def on_epoch_end(self):

        if self.extra_imgs is not None:
            # Sample a new set of extra images
            extra_imgs_this_epoch = self.extra_imgs[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
            extra_imps_this_epoch = self.extra_imps[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
            if self.extra_fixs is not None:
                extra_fixs_this_epoch = self.extra_fixs[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
            else:
                extra_fixs_this_epoch = []
            self.extra_idx +=1

        else:
            extra_imgs_this_epoch = []
            extra_imps_this_epoch = []
            extra_fixs_this_epoch = []

        self.imgs_this_epoch = np.concatenate([self.img_filenames, extra_imgs_this_epoch])
        self.imps_this_epoch = np.concatenate([self.imp_filenames,  extra_imps_this_epoch])
        if self.fix_filenames is not None:
            self.fixs_this_epoch = np.concatenate([self.fix_filenames, extra_fixs_this_epoch])

        idxs = np.array(range(len(self.imgs_this_epoch)))
        if self.shuffle:
            np.random.shuffle(idxs)
            self.imgs_this_epoch = self.imgs_this_epoch[idxs]
            self.imps_this_epoch = self.imps_this_epoch[idxs]
            if self.fix_filenames is not None:
                self.fixs_this_epoch = self.fixs_this_epoch[idxs]

def UMSI_eval_generator(
    img_filenames,
    map_filenames,
    inp_size,

    ):
    """
        Returns tuples img, heatmap to be used for data eval

    """
    n_img = len(map_filenames[0])
    for i in range(n_img):
        imgs = []
        maps = []
        img = preprocess_images([img_filenames[i]], inp_size[0], inp_size[1])
        map_ = cv2.imread(map_filenames[i], cv2.IMREAD_GRAYSCALE)
       
        mapshape = map_.shape
        imgs.append(img)
        maps.append(map_)
            
        yield (imgs, maps, img_filenames[i])


class RecallNet_Generator(Sequence) :
    def __init__(self, image_filenames, labels, mean_accs, type0_accs, batch_size=8) :
        self.image_filenames = np.array(image_filenames)
        self.labels = np.array(labels)
        self.mean_accs = np.array(mean_accs)
        self.type0_accs = np.array(type0_accs)
        self.batch_size = batch_size
    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)

    def __getitem__(self, idx) :
        batch_label = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_img = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_img = preprocess_images(batch_img,240,320)
        batch_mean = self.mean_accs[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_type0 = self.type0_accs[idx * self.batch_size : (idx+1) * self.batch_size]
        #out = [batch_mean,batch_type0,batch_label]
        out = [batch_mean,batch_type0]
        return batch_img, out