VSA4VQA/generate_query_masks.py

import os
import time
import json
import queue
from multiprocessing import Process, Queue
from multiprocessing.pool import Pool
from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

DATA_PATH = 'GQA/'
REL_PATH = 'full_relations_df.pkl'
IMG_SIZE = (500, 500)
NUM_PROCESSES = 20
NUM_SAMPLES = 100


def bbox_to_mask(x, y, w, h, img_size=IMG_SIZE, name=None, visualize=False):
    img = np.zeros(img_size)
    mask_w = np.ones(np.clip(w, 0, img_size[1]-x))

    for j in range(y, np.clip(y+h, 0, img_size[0])):
        img[j][x:x+w] = mask_w

    if visualize:
        fig = plt.figure(figsize=(img_size[0] // 80, img_size[1] // 80))
        plt.imshow(img, cmap='gray')
        if name:
            plt.title(name)
        plt.axis('off')
        plt.show()

    return img

def get_all_relations_df(data):

	print(f'Length of scenegraph data set: {len(data)}')
	start = time.time()

	df = pd.DataFrame(columns=['image_id', 'relation', 'from', 'to', 'obj_loc', 'obj_w', 'obj_h', 'obj_center',
                               'rel_obj_loc', 'rel_obj_w', 'rel_obj_h'])

	for img_id in data.keys():
		all_objects = data.get(str(img_id)).get('objects').items()

  		# get all object names
		all_objects_dict = {id_num: (obj_dict.get('name'), obj_dict.get('x'), obj_dict.get('y'), obj_dict.get('w'), obj_dict.get('h'))
                      		for (id_num, obj_dict) in all_objects}

		# get all relations
		for obj in all_objects:
			id_num, obj_dict = obj
			name = obj_dict.get('name')
			x, y, width, height  = obj_dict.get('x'), obj_dict.get('y'), obj_dict.get('w'), obj_dict.get('h')
			center = [x + width / 2, y + height / 2]

			for relation in obj_dict.get('relations'):
				rel = relation.get('name')
				rel_obj, rel_x, rel_y, rel_w, rel_h = all_objects_dict.get(relation.get('object'))


				temp = pd.DataFrame.from_dict([{'image_id': img_id, 'relation': rel, 'from': name, 'to': rel_obj,
												'obj_loc': [x, y], 'obj_w': width, 'obj_h': height, 'center': center,
												'rel_obj_loc': [rel_x, rel_y], 'rel_obj_w': rel_w, 'rel_obj_h': rel_h}])

				df = pd.concat([df,  temp], ignore_index=True)
				#print(f'{df.iloc[-1]["from"]} {df.iloc[-1].relation} {df.iloc[-1].to}')

	out_path = 'all_relations.pkl'
	df.to_pickle(out_path)
	print(f'Saved df to {out_path}')

	end = time.time()
	elapsed = end - start
	print(f'Took {int(elapsed // 60)}:{int(elapsed % 60)} min:s for all {len(df)} relations --> {elapsed / len(df):.2f}s / relation')


def generate_query_mask(df, rel, i, img_center=np.array([250, 250]), uni_size=np.array([50, 50])):

    # uni_obj only needed for visualization in the end
    uni_obj = bbox_to_mask(img_center[0] - (uni_size[0] // 2), img_center[1] - (uni_size[1] // 2),
                       50, 50, img_size=(500, 500))

    temp_df = df.loc[df.relation == rel]
    print(f'[{i}] Number of "{rel}" samples: {len(temp_df)}')

    query_mask = np.zeros((500, 500), dtype=np.uint8)
    counter = 0
    num_discard = 0

    for idx in range(len(temp_df)):
        if counter >= NUM_SAMPLES:
            print(f'[{i}] Reached {counter} samples for relation "{rel}":')
            break

        img_id = temp_df.iloc[idx].image_id
        img_size = (data.get(img_id)['height'], data.get(img_id)['width'])

        # get relative object info and generate binary mask
        obj_loc = temp_df.iloc[idx].rel_obj_loc
        width = temp_df.iloc[idx].rel_obj_w
        height = temp_df.iloc[idx].rel_obj_h

        # get mask info and generate binary mask
        mask_loc = temp_df.iloc[idx].obj_loc
        mask_w = temp_df.iloc[idx].obj_w
        mask_h = temp_df.iloc[idx].obj_h

        if obj_loc[0] > img_size[1] or obj_loc[1] > img_size[0] or mask_loc[0] > img_size[1] or mask_loc[1] > img_size[0]:
            #print('error in bounding box -- discard sample')
            continue

        obj = bbox_to_mask(obj_loc[0], obj_loc[1], width, height, img_size=img_size)
        mask = bbox_to_mask(mask_loc[0], mask_loc[1], mask_w, mask_h, img_size=img_size)

        img = obj*2 + mask
        img_transformed = np.zeros((1000, 1000), dtype=np.uint8)

        # scale image first
        scale_x, scale_y = uni_size[0] / width, uni_size[1] / height
        scale_mat = np.array([[scale_y, 0, 0], [0, scale_x, 0], [0, 0, 1]])

        if scale_x > 5 or scale_y > 5:
            num_discard += 1
            #print(f'Scale is too high! x: {scale_x}, y: {scale_y} -- discard sample')
            continue

        for i, row in enumerate(img):
            for j, col in enumerate(row):
                pixel_data = img[i, j]
                input_coords = np.array([i, j, 1])
                i_out, j_out, _ = scale_mat @ input_coords

                if i_out > 0 and i_out < 1000 and j_out > 0 and j_out < 1000 and pixel_data > 0:
                    # new indices must be within new image -- discard others
                    img_transformed[int(i_out), int(j_out)] = pixel_data

        if not len(np.where(img_transformed >= 2)[0]) > 0:
            # no data in transformed image -- discard sample
            continue

        # find new (x, y) location of object
        new_loc = sorted([[y, x] for (y, x) in zip(*np.where(img_transformed >= 2))])[0]
        new_center = [new_loc[0] + uni_size[0] // 2, new_loc[1] + uni_size[1] // 2]

        # move object to center
        move_x, move_y = img_center - new_center
        move_mat = np.array([[1, 0, move_x], [0, 1, move_y], [0, 0, 1]])

        img_moved = np.zeros((500, 500), dtype=np.uint8)
        for i, row in enumerate(img_transformed):
            for j, col in enumerate(row):
                pixel_data = img_transformed[i, j]
                input_coords = np.array([i, j, 1])
                i_out, j_out, _ = move_mat @ input_coords

                if i_out > 0 and i_out < 500 and j_out > 0 and j_out < 500 and pixel_data > 0:
                    # new indices must be within new image -- discard others
                    img_moved[int(i_out), int(j_out)] = pixel_data

        # extract relative object mask and add to query mask
        mask_transformed = np.where(img_moved==1, img_moved, 0) + np.where(img_moved==3, img_moved, 0)
        query_mask += mask_transformed
        counter += 1

    if counter > 0:
        query_mask = query_mask / counter
        rel_name = '_'.join(rel.split(' '))
        np.save(f'relations/{rel_name}.npy', query_mask)
        print(f'[{i}] Saved query mask to: relations/{rel_name}.npy')

        if num_discard > 0:
            print(f'[{i}] Discarded {num_discard} samples, because scaling was too high.')

        plt.figure(figsize=(3,3))
        plt.imshow(uni_obj*0.1+ query_mask, cmap='gray')
        plt.title(rel)
        plt.axis('off')
        plt.savefig(f'relations/{rel_name}.png', bbox_inches='tight', dpi=300)
        plt.clf()

    else:
        print(f'[{i}] Could not generate query mask for "{rel}"')

def run_process(tasks, df):
    while True:
        try:
            '''
                try to get task from the queue. get_nowait() function will
                raise queue.Empty exception if the queue is empty.
                queue(False) function would do the same task also.
            '''
            task = tasks.get_nowait()
            i = list(df.relation.unique()).index(task)
        except queue.Empty:
            break
        else:
            ''' no exception has been raised '''
            print(f'[{i}] Starting relation #{i}: {task}')
            print()
            generate_query_mask(df, task, i)
            time.sleep(.5)

    return True


# task executed in a worker process
def get_relations_task(img_id):
    width, height = data.get(str(img_id))['width'], data.get(str(img_id))['height']
    all_objects = data.get(str(img_id)).get('objects').items()

    # get all object names
    all_objects_dict = {id_num: (obj_dict.get('name'), obj_dict.get('x'), obj_dict.get('y'), obj_dict.get('w'), obj_dict.get('h'))
                        for (id_num, obj_dict) in all_objects}

    all_relations = []

    # get all relations
    for obj in all_objects:
        id_num, obj_dict = obj
        name = obj_dict.get('name')
        x, y, obj_w, obj_h  = obj_dict.get('x'), obj_dict.get('y'), obj_dict.get('w'), obj_dict.get('h')
        center = [x + width / 2, y + height / 2]

        for relation in obj_dict.get('relations'):
            rel = relation.get('name')
            rel_obj, rel_x, rel_y, rel_w, rel_h = all_objects_dict.get(relation.get('object'))

            all_relations.append({'image_id': img_id, 'width': width, 'height': height, 'relation': rel,
                                'from': name, 'to': rel_obj, 'obj_loc': [x, y], 'obj_w': obj_w, 'obj_h': obj_h,
                                'obj_center': center,'rel_obj_loc': [rel_x, rel_y], 'rel_obj_w': rel_w, 'rel_obj_h': rel_h})

    return all_relations


if __name__ == '__main__':


    path = os.path.join(DATA_PATH, 'train_sceneGraphs.json')
    assert os.path.exists(path), f'{path} does not exist!'

    with open(os.path.join(DATA_PATH, 'train_sceneGraphs.json'), 'r') as f:
        data = json.load(f)

    print(f'Length of scenegraph data set: {len(data)}')

    if not os.path.exists(REL_PATH):
        print('Generating dataframe of all relations...')
        # generate list of relations pkl -- use multiprocessing!
        # create and configure the process pool
        with Pool(processes=NUM_PROCESSES) as pool:

            df = pd.DataFrame(columns=['image_id', 'width', 'height', 'relation', 'from', 'to', 'obj_loc', 'obj_w',
                                       'obj_h', 'obj_center', 'rel_obj_loc', 'rel_obj_w', 'rel_obj_h'])

            # execute tasks in order
            for i, result in enumerate(tqdm(pool.map(get_relations_task, list(data.keys()), chunksize=100))):
                temp = pd.DataFrame.from_dict(result)
                df = pd.concat([df,  temp], ignore_index=True)
                if i % 10000 == 0:
                    df.to_pickle('temp_' + REL_PATH)
                    print(f'Saved df to {"temp_" + REL_PATH}')

        df.to_pickle(REL_PATH)
        print(f'Saved df to {REL_PATH}')
    else:
        df = pd.read_pickle(REL_PATH)

    print(f'Number of relations: {len(df.relation.unique())}')
    print(df.relation.unique())

    # generate query mask for each relation
    #for i, rel in enumerate(df.relation.unique()):
    #	generate_query_mask(df, rel, i)

    print('Generating a query mask for each relation...')
    # generate query mask for each relation -- use multiprocessing
    tasks = Queue()
    procs = []

    # only use relations with at least 1000 samples
    rel_lst = df.relation.value_counts()[df.relation.value_counts() > 1000].index.to_list()

    for rel in rel_lst:
        tasks.put(rel)

    # creating processes -- run only NUM_PROCESSES processes at the same time
    for _ in range(NUM_PROCESSES):
        p = Process(target=run_process, args=(tasks, df,))
        procs.append(p)
        p.start()

    # completing all processes
    for p in procs:
        p.join()