feat: update web Interface

This commit is contained in:
Yao Wang 2022-07-29 14:22:57 +02:00
parent d5d633b6c7
commit f9844dba10
1111 changed files with 171093 additions and 0 deletions

View file

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 8 17:53:59 2021
@author: jiao1
"""
from PIL import Image, ImageFilter
from pathlib import Path
current_path = Path('./')
for i in range(1,11):
p = current_path.joinpath(str(i))
out_path = current_path.joinpath(str(i)+'_blur')
if not out_path.exists():
out_path.mkdir()
for file_name in p.rglob('*.png'):
name = str(file_name).split("/")[-1]
file_name = str(file_name)
image = Image.open(file_name)
image = image.convert(mode="RGB")
print(image.size)
if image.size[0]>2000 or image.size[1]>2000:
blurred = image.filter(ImageFilter.GaussianBlur(radius = 24))
elif image.size[0]>1000 or image.size[1]>1000:
blurred = image.filter(ImageFilter.GaussianBlur(radius = 12))
else:
blurred = image.filter(ImageFilter.GaussianBlur(radius = 5))
out = out_path.joinpath(name)
blurred.save(str(out))
for file_name in p.rglob('*.jpg'):
name = str(file_name).split("\\")[-1]
file_name = str(file_name)
image = Image.open(file_name)
image = image.convert(mode="RGB")
print(image.size)
if image.size[0]>2000 or image.size[1]>2000:
blurred = image.filter(ImageFilter.GaussianBlur(radius = 24))
elif image.size[0]>1000 or image.size[1]>1000:
blurred = image.filter(ImageFilter.GaussianBlur(radius = 12))
else:
blurred = image.filter(ImageFilter.GaussianBlur(radius = 5))
out = out_path.joinpath(name)
blurred.save(str(out))

View file

@ -0,0 +1,40 @@
from PIL import Image
import os
import sys
BASE = "../assets/task_data/"
dirs = ["all_images", "sentinel_CC", "natural", "tutorial_CC", "real_CC", "sentinel_images", "tutorial_sentinels", "real_images", "tutorial_images"]
def get_bad_ims(base, dirs):
n = 0
bad = []
dirs_to_check = [os.path.join(base, d) for d in dirs]
for d in dirs_to_check:
for path, subdirs, files in os.walk(d):
for name in files:
im_path = os.path.join(path, name)
n += 1
if not _is_valid_img(im_path):
bad.append(name)
print("Checked %d imgs" % n)
return bad
def _is_valid_img(im_path):
try:
Image.open(im_path)
return True
except:
return False
if __name__ == "__main__":
#print(_is_valid_img("valid.jpg"))
base = None
if len(sys.argv) == 1:
print("Using % as base path" % BASE)
base = BASE
else:
base = sys.argv[1]
print(base)
print(get_bad_ims(base, dirs))

View file

@ -0,0 +1,50 @@
import generate_codecharts
import os
import json
def create_codecharts(real_CC_dir,ncodecharts,image_width,image_height):
# new directory (will get populated in this file)
if not os.path.exists(real_CC_dir):
os.makedirs(real_CC_dir)
# note: no buckets in this directory, all buckets will sample from a single source
data = {}
data_with_coords = {}
print('Generating %d codecharts...'%(ncodecharts))
for img_num in range(ncodecharts):
if img_num%100==0:
print('%d/%d'%(img_num,ncodecharts))
filename = os.path.join(real_CC_dir,'real_CC_%d.jpg'%(img_num))
valid_codes, coordinates = generate_codecharts.create_codechart(filename,image_width,image_height)
data[filename] = {'valid_codes':valid_codes}
data_with_coords[filename] = {'valid_codes':valid_codes, 'coordinates':coordinates}
with open(os.path.join(real_CC_dir,'CC_codes.json'), 'w') as outfile:
json.dump(data, outfile)
print('Writing out %s'%(os.path.join(real_CC_dir,'CC_codes.json')))
with open(os.path.join(real_CC_dir,'CC_codes_full.json'), 'w') as outfile:
json.dump(data_with_coords, outfile)
print('Writing out %s'%(os.path.join(real_CC_dir,'CC_codes_full.json')))
print('Done!')
if __name__ == "__main__":
rootdir = './task_data'
# use these settings to figure out how many codecharts to make
# so that each codechart will be sampled once
num_subject_files = 200 #100 # default: 100 subject files/ bucket
num_images_per_sf = 35 #20 # default: 20 images/ subject file
ncodecharts = 20 #num_subject_files*num_images_per_sf
image_width = 1920 # in pixel
image_height = 1080 # in pixel
real_CC_dir = os.path.join(rootdir,'real_CC')
create_codecharts(real_CC_dir,ncodecharts,image_width,image_height)

View file

@ -0,0 +1,89 @@
import os
import glob
from PIL import Image
from collections import Counter
import matplotlib.pyplot as plt
# resize all images to these max dimensions (to be consistent across experiments with different image sizes)
MAX_H = 1340 #1920 1920 resized to 1000, max height can be 700 so 700x(1920/1000)
MAX_W = 1920
to_resize = True
todebug = False
def get_max_dims(allfiles):
widths = []
heights = []
for file in allfiles:
im = Image.open(file)
width, height = im.size
widths.append(width)
heights.append(height)
if todebug:
print("Image widths:",Counter(widths).keys())
print("Image heights:",Counter(heights).keys())
maxwidth = max(list(Counter(widths).keys()))
maxheight = max(list(Counter(heights).keys()))
if to_resize:
ratio = min(MAX_W/maxwidth, MAX_H/maxheight)
maxwidth = int(maxwidth*ratio)
maxheight = int(maxheight*ratio)
return maxwidth,maxheight
def save_padded_images(real_image_dir,allfiles,toplot=False,maxwidth=None,maxheight=None):
if maxwidth==None or maxheight==None:
maxwidth,maxheight = get_max_dims(allfiles)
print('Padding %d image files to dimensions: [%d,%d]...'%(len(allfiles),maxwidth,maxheight))
for file in allfiles:
#print(file)
im = Image.open(file)
if to_resize:
#resize image to fixed dimensions
width, height = im.size
ratio = min(maxwidth/width, maxheight/height)
newwidth = int(width*ratio)
newheight = int(height*ratio)
if todebug: print('resizing %dx%d to %dx%d (ratio=%2.2f)'%(width,height,newwidth,newheight,ratio))
im = im.resize((newwidth,newheight), Image.ANTIALIAS)
width, height = im.size
padded_im = Image.new('RGB',
(maxwidth, maxheight),
(126, 126, 126))
offset = ((maxwidth - width) // 2, (maxheight - height) // 2)
padded_im.paste(im, offset)
padded_im.save(os.path.join(real_image_dir,os.path.basename(file)))
if toplot:
plt.figure(figsize=(10,10))
plt.imshow(padded_im)
plt.axis('off');
plt.show();
print('Done!')
return maxwidth,maxheight
if __name__ == "__main__":
sourcedir = '../../importance_dataset/predimportance/analysis/CVs_all' # take images from here
rootdir = './task_data'
real_image_dir = os.path.join(rootdir,'real_images')
if not os.path.exists(real_image_dir):
print('Creating directory %s'%(real_image_dir))
os.makedirs(real_image_dir)
allfiles = glob.glob(os.path.join(sourcedir,'*.png'))
save_padded_images(real_image_dir,allfiles,toplot=False)

View file

@ -0,0 +1,39 @@
import os
import random
from shutil import copyfile
import glob
def distribute_images(from_dir,real_image_dir,num_buckets,start_bucket_at):
# distribute images across buckets
img_files = []
for ext in ('*.jpeg', '*.png', '*.jpg'):
img_files.extend(glob.glob(os.path.join(from_dir, ext)))
random.shuffle(img_files) # shuffle all images randomly at start
images_per_bucket = int(len(img_files)/float(num_buckets))
print('Distributing images across %d buckets'%(num_buckets))
for b in range(num_buckets):
bucket_dir = os.path.join(real_image_dir,'bucket%d'%(start_bucket_at+b))
if not os.path.exists(bucket_dir):
os.makedirs(bucket_dir)
print('Populating %s/bucket%d with %d images'%(real_image_dir,start_bucket_at+b,images_per_bucket))
for i in range(b*images_per_bucket,b*images_per_bucket+images_per_bucket):
destfile = os.path.basename(img_files[i])
copyfile(img_files[i], os.path.join(bucket_dir,destfile))
if __name__ == "__main__":
num_buckets = 1 # num buckets to split images into
start_bucket_at = 0 # where to start the naming of the buckets (in case other buckets already exist)
rootdir = './task_data'
real_image_dir = os.path.join(rootdir,'real_images')
if not os.path.exists(real_image_dir):
os.makedirs(real_image_dir)
from_dir = os.path.join(rootdir,'all_images')
distribute_images(from_dir,real_image_dir,num_buckets,start_bucket_at)

View file

@ -0,0 +1,39 @@
from PIL import Image, ImageDraw, ImageColor, ImageFont
import matplotlib.pyplot as plt
import numpy as np
import json
import os
# FIXED parameters
text_color = ImageColor.getrgb("white")
font_type = "arial.ttf"
px_pt_ratio = 20/29 # according to our image dimensions, 29 point = 20 px
def pixel_to_point(num):
return int(num*(1/px_pt_ratio))
def save_fixation_cross(rootdir,image_width,image_height):
font_size = int(image_height*0.0278)
print('using font size: %d'%(font_size))
# Generate and save fixation cross image
img = Image.new('RGB', (image_width, image_height), (126, 126, 126))
d = ImageDraw.Draw(img)
try:
font = ImageFont.truetype(font_type, pixel_to_point(font_size)) # takes in point value
except OSError:
print("WARNING: using a different font because oculd not find %s on your computer" % font_type)
font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeSans.ttf", pixel_to_point(font_size))
d.text((image_width/2.0 - font_size, image_height/2.0 - font_size), '+', text_color, font)
filename = 'fixation-cross.jpg'
img.save(os.path.join(rootdir,filename))
print('Saved fixation cross image as %s'%(os.path.join(rootdir,filename)))
if __name__ == "__main__":
rootdir = './task_data'
image_width = 1920 # in pixel
image_height = 1080 # in pixel

View file

@ -0,0 +1,166 @@
from PIL import Image, ImageDraw, ImageColor, ImageFont
import matplotlib.pyplot as plt
import numpy as np
import string
import random
import json
import os
import math
# DEFINE PARAMATERS
forbidden_letters = set(["I", "O"]) # letters to not use in code charts because can be confused with digits
px_pt_ratio = 20/29 # according to our image dimensions, 29 point = 20 px
text_color = ImageColor.getrgb("gray")
font_type = "arial.ttf"
tojitter = True # add jitter from a regular grid
ebuf = 5 # buffer number of pixels to leave from the edges of the image so codecharts are not tangent to image edges
go_to_image_edges = False # if want to make sure to sample triplets to the very edge of the image (downside: triplets may be more crowded)
def point_to_pixel(num):
return int(num*px_pt_ratio)
def pixel_to_point(num):
return int(num*(1/px_pt_ratio))
def generate_rand_letter():
letter = random.choice(string.ascii_uppercase)
while letter in forbidden_letters:
letter = random.choice(string.ascii_uppercase)
return letter
def generate_rand_triplet():
code = ""
code += generate_rand_letter()
# the following code prevents the two digits from being identical or equal to 0
for i in range(2):
if i == 0:
forbidden_num = 0
else:
forbidden_num = int(code[i])
r = list(range(1, forbidden_num)) + list(range(forbidden_num+1, 10))
code += str(random.choice(r))
return code
def create_codechart(filename,image_width,image_height):
font_size = int(image_height*0.0185)
# all these parameters depend on font size
max_triplet_width = font_size*3 # in pixel - max triplet width; used 'W88' as widest triplet code (width~60, height=20)
max_triplet_height = font_size # the tallest a triplet can be
d_v = 4*max_triplet_height # vertical distance to maintain b/w triplets in the grid
d_h = 2*max_triplet_width # horizontal distance to maintain b/w triplets (from start of one triplet to start of another)
# make sure that not too much empty space is left over by spacing out triplets
N_h = int(math.floor((image_width-max_triplet_width-2*ebuf) / float(d_h))) # number of triplets that will be tiled horizontally
d_h = int(math.floor((image_width-max_triplet_width-2*ebuf) / float(N_h))) # recompute the horizontal dist between triplets to eliminate extra space
N_v = int(math.floor((image_height-max_triplet_height-2*ebuf) / float(d_v)))
d_v = int(math.floor((image_height-max_triplet_height-2*ebuf) / float(N_v)))
# -------------
post_jitter_buffer = 6 # small buffer to cover edge case of triplets immediately adjacent to one another (for legibility)
j_v = int(0.25*(d_v) - post_jitter_buffer/2) # max vertical jitter for one side of a triplet
j_h = int(0.25*(d_h) - post_jitter_buffer) # max horizontal jitter for on side of a triplet
# set up image canvas and font size/style
img = Image.new('RGB', (image_width, image_height))
d = ImageDraw.Draw(img)
try:
font = ImageFont.truetype(font_type, pixel_to_point(font_size)) # takes in point value
except OSError:
print("WARNING: using a different font bc could not find %s on your computer" % font_type)
font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeSans.ttf", pixel_to_point(font_size))
valid_codes = set()
coordinates = {}
# initialize starting locations for triplets on the image grid
x_init = ebuf
y_init = ebuf
# -------- improvement made after 01/2020 (after TurkEyes paper) --------
# original problem was grid-like artifacts in the collected data because the grid spacing between consecutive triplets
# was always similar (despite a small bit of jitter added when using the triplet)
xoffset = random.choice(list(range(int(d_h/2.0))))
yoffset = random.choice(list(range(int(d_v/2.0))))
x_init = x_init+xoffset
y_init = y_init+yoffset
# -----------------------------------------------------------------------
x = x_init
while x < image_width-max_triplet_width-ebuf:
y = y_init
while y < image_height-max_triplet_height-ebuf:
triplet_code = generate_rand_triplet()
# check for if triplet has already been used in image since all codes should be unique
while triplet_code in valid_codes:
triplet_code = generate_rand_triplet()
valid_codes.add(triplet_code)
if tojitter:
# implement jitter to x and y coordinates (note: can turn either of them off)
min_x = max(ebuf,x-j_h)
max_x = min(x+j_h+1,image_width-max_triplet_width-ebuf)
min_y = max(ebuf,y-j_v)
max_y = min(y+j_v+1,image_height-max_triplet_height-ebuf-2) # a little bit of extra buffer in vertical dimension
x_range = list(range(min_x, max_x))
y_range = list(range(min_y, max_y))
j_x = random.choice(x_range)
j_y = random.choice(y_range)
else:
j_x = x
j_y = y
# writes triplet to image
d.text((j_x, j_y), triplet_code, text_color, font)
coordinates[triplet_code] = (j_x, j_y)
y_prev = y
y = y+d_v # regularly sample the image vertically
# triplets are not guaranteed to go to edge of image, and gap could be large
# see if can still squeeze in a triplet without overlapping previous ones (could still be quite close)
if go_to_image_edges and y >= image_height-max_triplet_height-ebuf:
y = y_prev + max_triplet_height+j_v+1 + post_jitter_buffer*2
x_prev = x
x = x+d_h # regularly sample the image horizontally
if go_to_image_edges and x >= image_width-max_triplet_width-ebuf:
x = x_prev + max_triplet_width+j_h+1 + post_jitter_buffer*2
img.save(filename)
return (list(valid_codes), coordinates)
if __name__ == "__main__":
# create some code charts to test this code
rootdir = './task_data'
num_codecharts = 3 # generate this many codecharts
#image_width = 1920 # in pixel
#image_height = 1080 # in pixel
image_height = 1340 #1344
image_width = int(1036*image_height/float(1344))
# set up directories
if not os.path.exists(rootdir):
os.makedirs(rootdir)
test_dir = os.path.join(rootdir,'TEST')
if not os.path.exists(test_dir):
os.makedirs(test_dir)
data = {}
for i in range(num_codecharts):
filename = os.path.join(test_dir,'CC_%d.jpg'%(i))
valid_codes, coordinates = create_codechart(filename,image_width,image_height)
data[filename] = (valid_codes, coordinates)
with open(os.path.join(test_dir,'data.json'), 'w') as outfile:
json.dump(data, outfile)

View file

@ -0,0 +1,151 @@
from PIL import Image, ImageDraw, ImageColor, ImageFont
import matplotlib.pyplot as plt
import numpy as np
import string
import random
import json
import generate_codecharts as gc
import os
# FIXED parameters
text_color = ImageColor.getrgb("white")
font_type = "arial.ttf"
px_pt_ratio = 20/29 # according to our image dimensions, 29 point = 20 px
valid_target_types=["red_dot", "fix_cross", "img"]
def make_sentinel(codechart_filename,sentinel_filename,image_width,image_height,border_padding,target_type="red_dot", target_im_dir=""):
# border_padding used to guarantee that chosen sentinel location is not too close to border to be hard to spot
font_size = int(image_height*0.0278)
correct_codes = []
if target_type not in valid_target_types:
raise RuntimeError("target_type must be one of %s" % valid_target_types.__str__())
valid_codes, coordinates = gc.create_codechart(codechart_filename,image_width,image_height)
# pick random code
r = list(range(0, len(valid_codes)))
index = random.choice(r)
triplet = valid_codes[index]
triplet_coordinate = coordinates[triplet]
# to make sure that the cross is visible
while (triplet_coordinate[0] <= border_padding or triplet_coordinate[0] >= image_width-border_padding) \
or (triplet_coordinate[1] <= border_padding or triplet_coordinate[1] >=image_height-border_padding):
index = random.choice(r)
triplet = valid_codes[index]
triplet_coordinate = coordinates[triplet]
# check bg color
if target_type == "fix_cross":
bg_color = 126
else:
bg_color = 255
# create and save cross sentinel image
img = Image.new('RGB', (image_width, image_height), (bg_color, bg_color, bg_color))
d = ImageDraw.Draw(img)
try:
font = ImageFont.truetype(font_type, gc.pixel_to_point(font_size)) # takes in point value
except OSError:
print("WARNING: using different font bc could not find %s" % font_type)
font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeSans.ttf", gc.pixel_to_point(font_size))
if target_type == "fix_cross":
plot_coord = (triplet_coordinate[0]+font_size, triplet_coordinate[1]) # offset cross location to the center of the triplet
d.text(plot_coord, '+', text_color, font)
elif target_type == "red_dot":
d.ellipse((triplet_coordinate[0], triplet_coordinate[1], triplet_coordinate[0]+font_size*2, triplet_coordinate[1]+font_size*2), fill = 'red', outline ='red')
elif target_type == "img":
if not target_im_dir:
raise RuntimeError("No im dir provided for sentinel targets")
# Get a list of images in the target im dir
images = os.listdir(target_im_dir)
target = Image.open(os.path.join(target_im_dir, random.choice(images)))
# resize the target
width = 200
height = int(target.height*width/target.width)
target = target.resize((width, height))
plot_coord = (triplet_coordinate[0]-int(width/2), triplet_coordinate[1]-int(height/2))
img.paste(target, plot_coord)
# correct_codes lie within the sentinel width
for ii in range(len(valid_codes)):
dist = np.linalg.norm(np.array(coordinates[valid_codes[ii]])-np.array(triplet_coordinate))
if dist <= width/2.0:
correct_codes.append(valid_codes[ii]);
pass
else:
raise RuntimeError("target_type %s does not exist" % target_type)
img.save(sentinel_filename)
D = {'correct_code':triplet, 'coordinate':triplet_coordinate, 'correct_codes':correct_codes}
D_full = {'correct_code':triplet, 'coordinate':triplet_coordinate, \
'valid_codes':valid_codes, 'coordinates':coordinates, 'codechart_file':codechart_filename,\
'correct_codes':correct_codes}
return D,D_full
def generate_sentinels(sentinel_image_dir,sentinel_CC_dir,num_buckets,start_bucket_at,sentinel_images_per_bucket,\
image_width,image_height,border_padding,target_type,target_im_dir=""):
# Set up directories
if not os.path.exists(sentinel_image_dir):
os.makedirs(sentinel_image_dir)
if not os.path.exists(sentinel_CC_dir):
os.makedirs(sentinel_CC_dir)
# Start generating sentinels
img_num_offset = (start_bucket_at-1)*sentinel_images_per_bucket # start at a new index id
for b in range(num_buckets):
image_bucket_dir = os.path.join(sentinel_image_dir,'bucket%d'%(start_bucket_at+b))
if not os.path.exists(image_bucket_dir):
os.makedirs(image_bucket_dir)
CC_bucket_dir = os.path.join(sentinel_CC_dir,'bucket%d'%(start_bucket_at+b))
if not os.path.exists(CC_bucket_dir):
os.makedirs(CC_bucket_dir)
data = {} # save to a json the filename, the coordinate of the + cross, and the triplet at that coordinate
data_with_coords = {} # also save a list of other valid triplets and coordinates (for analysis)
print('Populating %s with %d sentinel images'%(image_bucket_dir,sentinel_images_per_bucket))
print('Populating %s with %d corresponding codecharts'%(CC_bucket_dir,sentinel_images_per_bucket))
for i in range(sentinel_images_per_bucket):
img_num = img_num_offset + b*sentinel_images_per_bucket + i + 1
# generate random code chart
codechart_filename = os.path.join(CC_bucket_dir,'sentinel_CC_%d.jpg'%(img_num))
sentinel_filename = os.path.join(image_bucket_dir,'sentinel_image_%d.jpg'%(img_num))
D,D_full = make_sentinel(codechart_filename,sentinel_filename,image_width,image_height,border_padding,target_type, target_im_dir)
data[sentinel_filename] = D
data_with_coords[sentinel_filename] = D_full
with open(os.path.join(image_bucket_dir,'sentinel_codes.json'), 'w') as outfile:
json.dump(data, outfile)
print('Writing out %s'%(os.path.join(image_bucket_dir,'sentinel_codes.json')))
with open(os.path.join(image_bucket_dir,'sentinel_codes_full.json'), 'w') as outfile:
json.dump(data_with_coords, outfile)
print('Writing out %s'%(os.path.join(image_bucket_dir,'sentinel_codes_full.json')))
if __name__ == "__main__":
# Set these parameters
sentinel_images_per_bucket = 500
num_buckets = 1
start_bucket_at = 0 # to avoid overwriting the existing buckets
image_width = 1920
image_height = 1080
border_padding = 100 # don't put fixation cross in this region of the image
rootdir = './task_data'
target_type = "img"
target_im_dir = "sentinel_target_images"
sentinel_image_dir = os.path.join(rootdir,'sentinel_images')
sentinel_CC_dir = os.path.join(rootdir,'sentinel_CC')
generate_sentinels(sentinel_image_dir,sentinel_CC_dir,num_buckets,start_bucket_at,sentinel_images_per_bucket,\
image_width,image_height,border_padding,target_type, target_im_dir)

View file

@ -0,0 +1,123 @@
from cv2 import cv2
import numpy as np
import re,os
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
from shutil import rmtree
import pandas as pd
from natsort import natsorted
import imageio
import moviepy.editor as mp
bp = "E:/1Study/Hiwi/Massvis dataset" # change base path to the directory where you have downloaded the salicon data
curdir = 'E:/1Study/Hiwi/massvis/eyetracking/csv_files/fixationsByVis'
eledir = 'E:/1Study/Hiwi/massvis/dataset/csv_files/targets393_elementLabels/elementLabels/'
origimdir = os.path.join(bp, 'targets')
def highlight_function(x,y,r,imagePath,outPath,imgName):
# load input image
img= cv2.imread(imagePath, cv2.IMREAD_UNCHANGED)
# blur the image_origin to imgBlur
imgBlur=cv2.blur(img,(15,15))
# reduce brightness of imgBlur by 40%
w=imgBlur.shape[1]
h=imgBlur.shape[0]
imgBrightness=0.6
for xi in range(0,w):
for xj in range(0,h):
imgBlur[xj,xi,0]=int(imgBlur[xj,xi,0]*imgBrightness)
imgBlur[xj,xi,1]=int(imgBlur[xj,xi,1]*imgBrightness)
imgBlur[xj,xi,2]=int(imgBlur[xj,xi,2]*imgBrightness)
# get size of image
height, width = img.shape[:2]
height = int(height)
width = int(width)
# generate in-circle-display template
circleIn = np.zeros((height, width, 1), np.uint8)
circleIn = cv2.circle(circleIn, (x, y), r, (1), -1)
# generate out-circle-display template
circleOut = circleIn.copy()
circleOut[circleOut == 0] = 2
circleOut[circleOut == 1] = 0
circleOut[circleOut == 2] = 1
# generate imgIn in which only the content in the spotlight circle remains
# generate a blank img with the same size of input image
imgIn = np.zeros((height, width, 4), np.uint8)
# copy first 3 channel
imgIn[:, :, 0] = np.multiply(img[:, :, 0], circleIn[:, :, 0])
imgIn[:, :, 1] = np.multiply(img[:, :, 1], circleIn[:, :, 0])
imgIn[:, :, 2] = np.multiply(img[:, :, 2], circleIn[:, :, 0])
# set non-transparent part of α channel
circleIn[circleIn == 1] = 255
imgIn[:, :, 3] = circleIn[:, :, 0]
# generate imgOut in which only the content outside the spotlight circle remains and be blurred
imgOut = np.zeros((height, width, 4), np.uint8)
imgOut[:, :, 0] = np.multiply(imgBlur[:, :, 0], circleOut[:, :, 0])
imgOut[:, :, 1] = np.multiply(imgBlur[:, :, 1], circleOut[:, :, 0])
imgOut[:, :, 2] = np.multiply(imgBlur[:, :, 2], circleOut[:, :, 0])
circleOut[circleOut == 1] = 255
imgOut[:, :, 3] = circleOut[:, :, 0]
# generate output image by adding imgIn and imgOut
imgHighlight = cv2.add(imgIn,imgOut)
cv2.imwrite(os.path.join(outPath , imgName+'.png'),imgHighlight)
if(cv2.waitKey(0)==27):
cv2.destroyAllWindows()
return imgHighlight
for curfile in os.listdir(curdir):
os.makedirs(os.path.join(bp, 'highlight'), exist_ok=True)
outpath = os.path.join(bp, 'highlight', curfile)
os.makedirs(outpath, exist_ok=True)
basepath = os.path.basename(curfile)
imname, ext = os.path.splitext(basepath)
print('imname',imname)
allfiles = natsorted(glob(os.path.join(curdir, curfile, 'enc' ,'*.csv')))
# get the experiment data (csv format)
for subcsv in allfiles:
fixations = pd.read_csv(subcsv, header=None)
x=[]
y=[]
duration=[]
for row in fixations.iterrows():
x.append(row[1][1])
y.append(row[1][2])
duration.append(row[1][3])
gif_image=[]
# generate spotlight gif image
for i in range(len(x)):
#adjust the duration parameter so that the spotlight circle won't be too large or too small to display
dur = duration[i]/3 if duration[i]/3>100 else 100
dur = dur if dur>100 else 100
# Make the display time of each spotlight correspond to the observation time of the experimenter
dur_int=int(duration[i]/50)
for j in range(dur_int):
gif_image.append(highlight_function(int(x[i]),int(y[i]),int(dur),os.path.join(origimdir, curfile+'.png'),outpath,imname+'('+str(i+1)+')') )
print(i)
# save spotlight gif image
imageio.mimsave(os.path.join(outpath,imname+'.gif'), gif_image, fps=100)
# problem!!! : The generated spotlight gif image is too large
# possible solution: convert gif format to webm format. Converting to webm format can significantly reduce the file size

View file

@ -0,0 +1,89 @@
import generate_codecharts
import generate_sentinels
import os
import string
import random
import json
import matplotlib.pyplot as plt
import numpy as np
import base64
import glob
def generate_tutorials(tutorial_image_dir,rootdir,image_width,image_height,border_padding,N,target_type,target_imdir,N_sent=0):
if not os.path.exists(tutorial_image_dir):
raise Exception('Please include a directory of tutorial images at: %s'%(tutorial_image_dir))
tutorial_images = []
for ext in ('*.jpeg', '*.png', '*.jpg'):
tutorial_images.extend(glob.glob(os.path.join(tutorial_image_dir, ext)))
print('A total of %d images will be sampled from for the tutorials.'%(len(tutorial_images)))
tutorial_CC_dir = os.path.join(rootdir,'tutorial_CC')
if not os.path.exists(tutorial_CC_dir):
os.makedirs(tutorial_CC_dir)
tutorial_sentinel_dir = os.path.join(rootdir,'tutorial_sentinels')
if not os.path.exists(tutorial_sentinel_dir):
os.makedirs(tutorial_sentinel_dir)
# make the corresponding codecharts
data = {}
data_with_coords = {}
for img_num in range(N):
filename = tutorial_images[img_num]
codechart_filename = os.path.join(tutorial_CC_dir,'tutorial_real_CC_%d.jpg'%(img_num))
valid_codes, coordinates = generate_codecharts.create_codechart(codechart_filename,image_width,image_height)
data[filename] = {'valid_codes':valid_codes,'flag':'tutorial_real','codechart_file':codechart_filename}
data_with_coords[filename] = {'valid_codes':valid_codes, 'coordinates':coordinates,\
'flag':'tutorial_real','codechart_file':codechart_filename}
# now generate sentinel images (also N) with their corresponding codecharts
print('Populating %s with %d sentinel images'%(tutorial_sentinel_dir,N_sent))
print('Populating %s with %d corresponding codecharts'%(tutorial_CC_dir,N_sent))
for img_num in range(N_sent):
codechart_filename = os.path.join(tutorial_CC_dir,'tutorial_sentinel_CC_%d.jpg'%(img_num))
sentinel_filename = os.path.join(tutorial_sentinel_dir,'tutorial_sentinel_%d.jpg'%(img_num))
D,D_full = generate_sentinels.make_sentinel(codechart_filename,sentinel_filename,\
image_width,image_height,border_padding,target_type, target_imdir)
D['flag'] = 'tutorial_sentinel'
D['codechart_file'] = codechart_filename
D_full['flag'] = 'tutorial_sentinel'
D_full['codechart_file'] = codechart_filename
data[sentinel_filename] = D
data_with_coords[sentinel_filename] = D_full
with open(os.path.join(rootdir,'tutorial.json'), 'w') as outfile:
json.dump(data, outfile)
print('Writing out %s'%(os.path.join(rootdir,'tutorial.json')))
with open(os.path.join(rootdir,'tutorial_full.json'), 'w') as outfile:
json.dump(data_with_coords, outfile)
print('Writing out %s'%(os.path.join(rootdir,'tutorial_full.json')))
if __name__ == "__main__":
image_width = 1920
image_height = 1080
border_padding = 100 # don't put fixation cross in this region of the image
rootdir = './task_data'
target_type = "red_dot"
target_imdir = ""
tutorial_image_dir = os.path.join(rootdir,'tutorial_images')
tutorial_images = glob.glob(os.path.join(tutorial_image_dir,'*.jpg'))
N = len(tutorial_images)
# assume that tutorial_images contains as many images (N) as desired for the tutorials
generate_tutorials(tutorial_image_dir,rootdir,image_width,image_height,border_padding,N,target_type,target_imdir)

View file

@ -0,0 +1,581 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Experimental set-up: ##\n",
"\n",
"This code will generate experimental files that can either be independently hosted on a website and run with recruited participants, or via our [MTurk iPython notebook](https://github.com/a-newman/mturk-api-notebook) be used for launching Amazon Mechanical Turk (MTurk) HITs. \n",
"\n",
"An experiment is composed of different sets of images:\n",
"* **target images** are the images you want to collect attention data on - those are images that you provide (in directory `sourcedir` below)\n",
"* **tutorial images** are images that will be shown to participants at the beginning of the experiment to get them familiarized with the codecharts set-up (you can reuse the tutorial image we provide, or provide your own in directory `tutorial_source_dir` below)\n",
" * *hint: if your images are very different in content from the images in our set, you may want to train your participants on your own images, to avoid a context switch between the tutorial and main experiment*\n",
"* **sentinel images** are images interspersed throughout the experiment where participant attention is guided to a very specific point on the screen, used as validation/calibration images to ensure participants are actually moving their eyes and looking where they're supposed to; the code below will intersperse images from the `sentinel_target_images` directory we provide throughout your experimental sequence\n",
" * sentinel images can be interspersed throughout both the tutorial and target images, or excluded from the tutorial (via `add_sentinels_to_tutorial` flag below); we recommend having sentinel images as part of the tutorial to familiarize participants with such images as well\n",
" \n",
"The code below will populate the `rootdir` task directory with #`num_subject_files` subject files for you, where each subject file corresponds to an experiment you can run on a single participant. For each subject file, a set of #`num_images_per_sf` will be randomly sampled from the `sourcedir` image directory. A set of #`num_sentinels_per_sf` sentinel images will also be sampled from the `sentinel_imdir` image directory, and distributed throughout the experiment. A tutorial will be generated at the beginning of the experiment with #`num_imgs_per_tutorial` randomly sampled from the `tutorial_source_dir` image directory, along with an additional #`num_sentinels_per_tutorial` sentinel files distributed throughout the tutorial (if `add_sentinels_to_tutorial` flag is set to true). "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import string\n",
"import random\n",
"import json \n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import base64 \n",
"import glob"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"sourcedir = '10' # replace this with your own directory of experiment images\n",
"filldir = '1' # the filler images for False alarm and Correct rejection\n",
"blurdir = '10_blur' # blurry images"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# PARAMETERS for generating subject files\n",
"num_subject_files = 1 # number of subject files to generate (i.e., # of mturk assignments that will be put up) \n",
"num_images_per_sf = 20 # number of target images per subject file \n",
"num_imgs_per_tutorial = 0 # number of tutorial images per subject file\n",
"num_sentinels_per_sf = 0 # number of sentinel images to distribute throughout the experiment (excluding the tutorial)\n",
"add_sentinels_to_tutorial = False # whether to have sentinel images as part of the tutorial\n",
"num_sentinels_per_tutorial = 0 # number of sentinel images to distribute throughout the tutorial"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Another bit of terminology and experimental logistics involves **buckets** which are a way to distribute experiment stimuli so that multiple experiments can be run in parallel (and participants can be reused for different subsets of images). If you have a lot of images that you want to collect data on, and for each participant you are sampling a set of only #`num_images_per_sf`, then you might have to generate a large `num_subject_files` in order to have enough data points per image. A way to speed up data collection is to split all the target images into #`num_buckets` disjoint buckets, and then to generate subject files per bucket. Given that subject files generated per bucket are guaranteed to have a disjoint set of images, the same participant can be run on multiple subject files from different buckets. MTurk HITs corresponding to different buckets can be launched all at once. In summary, in MTurk terms, you can generate as many HITs as `num_buckets` specified below, and as many assignments per HIT as `num_subject_files`. \n",
"\n",
"The way the codecharts methodology works, a jittered grid of alphanumeric triplets appears after every image presentation (whether it is a target, sentinel, or tutorial image), since a participant will need to indicate where on the preceding image s/he looked, by reporting a triplet. To avoid generating an excessive number of codecharts (that bulks up all the subject files), we can reuse some codecharts across buckets. The way we do this is by pre-generating #`ncodecharts` codecharts, and then randomly sampling from these when generating the individual subject files."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# we pre-generate some codecharts and sentinel images so that we can reuse these across participants and buckets \n",
"# and potentially not have to generate as many files; these can be set to any number, and the corresponding code\n",
"# will just sample as many images as need per subject file\n",
"\n",
"ncodecharts = num_subject_files*num_images_per_sf # number of codecharts to generate; can be changed\n",
"sentinel_images_per_bucket = num_subject_files*num_sentinels_per_sf # can be changed\n",
"\n",
"# set these parameters\n",
"num_buckets = 1 # number of disjoint sets of subject files to create (for running multiple parallel HITs)\n",
"start_bucket_at = 0 # you can use this and the next parameter to generate more buckets if running the code later\n",
"which_buckets = [0] # a list of specific buckets e.g., [4,5,6] to generate experiment data for\n",
"\n",
"rootdir = '../assets/task_data' # where all the experiment data will be stored\n",
"if not os.path.exists(rootdir):\n",
" print('Creating directory %s'%(rootdir))\n",
" os.makedirs(rootdir)\n",
"\n",
"real_image_dir = os.path.join(rootdir,'real_images') # target images, distributed by buckets\n",
" # (shared across buckets)\n",
"sentinel_image_dir = os.path.join(rootdir,'sentinel_images') # sentinel images, distributed by buckets\n",
"sentinel_CC_dir = os.path.join(rootdir,'sentinel_CC') # codecharts corresponding to the sentinel images\n",
" # (shared across buckets)\n",
"#sentinel_targetim_dir = os.path.join(rootdir, 'sentinel_target') \n",
"real_blurred_dir = os.path.join(rootdir,'real_blurred')\n",
"real_filler_dir = os.path.join(rootdir,'real_filler')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20 files copied from 10 to ../assets/task_data/all_images\n",
"Padding 20 image files to dimensions: [1734,1340]...\n",
"Done!\n"
]
}
],
"source": [
"# this cell creates an `all_images` directory, copies images from sourcedir, and pads them to the required dimensions\n",
"\n",
"import create_padded_image_dir\n",
"\n",
"all_image_dir = os.path.join(rootdir,'all_images')\n",
"if not os.path.exists(all_image_dir):\n",
" print('Creating directory %s'%(all_image_dir))\n",
" os.makedirs(all_image_dir)\n",
" \n",
"allfiles = []\n",
"for ext in ('*.jpeg', '*.png', '*.jpg'):\n",
" allfiles.extend(glob.glob(os.path.join(sourcedir, ext)))\n",
"print(\"%d files copied from %s to %s\"%(len(allfiles),sourcedir,all_image_dir))\n",
" \n",
"image_width,image_height = create_padded_image_dir.save_padded_images(all_image_dir,allfiles)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20 files copied from 10_blur to ../assets/task_data/blurred_images\n",
"Padding 20 image files to dimensions: [1734,1340]...\n",
"Done!\n"
]
}
],
"source": [
"blurred_image_dir = os.path.join(rootdir,'blurred_images')\n",
"\n",
"if not os.path.exists(blurred_image_dir):\n",
" print('Creating directory %s'%(blurred_image_dir))\n",
" os.makedirs(blurred_image_dir)\n",
" \n",
"blurfiles = []\n",
"for ext in ('*.jpeg', '*.png', '*.jpg'):\n",
" blurfiles.extend(glob.glob(os.path.join(blurdir, ext)))\n",
"print(\"%d files copied from %s to %s\"%(len(blurfiles),blurdir,blurred_image_dir))\n",
" \n",
"image_width,image_height = create_padded_image_dir.save_padded_images(blurred_image_dir,blurfiles)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20 files copied from 1 to ../assets/task_data/filler_images\n",
"Padding 20 image files to dimensions: [1786,1340]...\n",
"Done!\n"
]
}
],
"source": [
"filler_image_dir = os.path.join(rootdir,'filler_images')\n",
"\n",
"if not os.path.exists(filler_image_dir):\n",
" print('Creating directory %s'%(filler_image_dir))\n",
" os.makedirs(filler_image_dir)\n",
" \n",
"fillerfiles = []\n",
"for ext in ('*.jpeg', '*.png', '*.jpg'):\n",
" fillerfiles.extend(glob.glob(os.path.join(filldir, ext)))\n",
"print(\"%d files copied from %s to %s\"%(len(fillerfiles),filldir,filler_image_dir))\n",
" \n",
"image_width,image_height = create_padded_image_dir.save_padded_images(filler_image_dir,fillerfiles)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Distributing images across 1 buckets\n",
"Populating ../assets/task_data/real_images/bucket0 with 20 images\n"
]
}
],
"source": [
"# this cell creates the requested number of buckets and distributes images from `all_image_dir` to the corresponding\n",
"# bucket directories inside `real_image_dir`\n",
"\n",
"from distribute_image_files_by_buckets import distribute_images\n",
"\n",
"distribute_images(all_image_dir,real_image_dir,num_buckets,start_bucket_at)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Distributing images across 1 buckets\n",
"Populating ../assets/task_data/real_blurred/bucket0 with 20 images\n"
]
}
],
"source": [
"distribute_images(blurred_image_dir,real_blurred_dir,num_buckets,start_bucket_at)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Distributing images across 1 buckets\n",
"Populating ../assets/task_data/real_filler/bucket0 with 20 images\n"
]
}
],
"source": [
"distribute_images(filler_image_dir,real_filler_dir,num_buckets,start_bucket_at)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We create sentinel images by taking a small object (one of a: fixation cross, red dot, or image of a face) and choosing a random location for it on a blank image (away from the image boundaries by at least `border_padding` pixels). The code below creates #`sentinel_images_per_bucket` such sentinel images in each bucket. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now that all the previous cells have generated the requisite image, codechart, sentinel, and tutorial files, the following code will generate `num_subject_files` individual subject files by sampling from the appropriate image directories and creating an experimental sequence. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Only run this if you want new subject json"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"../assets/task_data/real_blurred/bucket0/*.jpg\n",
"Generating 1 subject files in bucket 0\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"wsj265.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"treasuryD07_3.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"economist_daily_chart_242.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"economist_daily_chart_257.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"economist_daily_chart_243.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"whoJ43_1.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"economist_daily_chart_150.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"3iRYXLvZ8oVQDMLR-CebnQ==.0.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"economist_daily_chart_194.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"whoQ12_2.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"whoF03.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"wsj3.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"whoB10_1.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"3LY3OX8bU7uKhgcRPgDRxw==.0.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"economist_daily_chart_262.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"economist_daily_chart_260.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"0BmWZbQdEukHi79Lit01oQ==.0.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"wsj86.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"whoJ36_2.json\n",
"['../assets/task_data/real_blurred/bucket0/wsj265.png', '../assets/task_data/real_blurred/bucket0/treasuryD07_3.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_242.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_257.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_243.png', '../assets/task_data/real_blurred/bucket0/whoJ43_1.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_150.png', '../assets/task_data/real_blurred/bucket0/3iRYXLvZ8oVQDMLR-CebnQ==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_194.png', '../assets/task_data/real_blurred/bucket0/whoQ12_2.png', '../assets/task_data/real_blurred/bucket0/whoF03.png', '../assets/task_data/real_blurred/bucket0/wsj3.png', '../assets/task_data/real_blurred/bucket0/whoB10_1.png', '../assets/task_data/real_blurred/bucket0/3LY3OX8bU7uKhgcRPgDRxw==.0.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_262.png', '../assets/task_data/real_blurred/bucket0/economist_daily_chart_260.png', '../assets/task_data/real_blurred/bucket0/0BmWZbQdEukHi79Lit01oQ==.0.png', '../assets/task_data/real_blurred/bucket0/wsj86.png', '../assets/task_data/real_blurred/bucket0/whoJ36_2.png', '../assets/task_data/real_blurred/bucket0/whoK21.png']\n",
"whoK21.json\n",
"Subject file ../assets/task_data/subject_files/bucket0/subject_file_0.json DONE\n"
]
}
],
"source": [
"gt_answers = []\n",
"reco_answers = []\n",
"start_subjects_at = 0 # where to start creating subject files at (if had created other subject files previously)\n",
"#if os.path.exists(os.path.join(rootdir,'subject_files/bucket0')):\n",
"# subjfiles = glob.glob(os.path.join(rootdir,'subject_files/bucket0/*.json'))\n",
"# start_subjects_at = len(subjfiles)\n",
"\n",
"\n",
"\n",
"## GENERATING SUBJECT FILES \n",
"subjdir = os.path.join(rootdir,'subject_files')\n",
"if not os.path.exists(subjdir):\n",
" os.makedirs(subjdir)\n",
" #os.makedirs(os.path.join(rootdir,'full_subject_files'))\n",
"\n",
"\n",
" \n",
"# iterate over all buckets \n",
"for b in range(len(which_buckets)): \n",
"\n",
" bucket = 'bucket%d'%(which_buckets[b])\n",
" img_bucket_dir = os.path.join(real_image_dir,bucket)\n",
" blur_bucket_dir = os.path.join(real_blurred_dir,bucket)\n",
" filler_bucket_dir = os.path.join(real_filler_dir,bucket)\n",
" img_files = []\n",
" blur_files = []\n",
" filler_files = []\n",
" for ext in ('*.jpeg', '*.png', '*.jpg'):\n",
" img_files.extend(glob.glob(os.path.join(img_bucket_dir, ext)))\n",
" \n",
" for ext in ('*.jpeg', '*.png', '*.jpg'):\n",
" blur_files.extend(glob.glob(os.path.join(blur_bucket_dir, ext)))\n",
" print(blur_files)\n",
" for ext in ('*.jpeg', '*.png', '*.jpg'):\n",
" filler_files.extend(glob.glob(os.path.join(filler_bucket_dir, ext)))\n",
" \n",
" filler_files.extend(img_files) #40 images for recognition task\n",
" random.shuffle(filler_files)\n",
" \n",
" print(os.path.join(blur_bucket_dir, ext))\n",
" #sentinel_bucket_dir = os.path.join(sentinel_image_dir,bucket)\n",
" #sentinel_files = glob.glob(os.path.join(sentinel_bucket_dir,'*.jpg'))\n",
" \n",
" #with open(os.path.join(sentinel_bucket_dir,'sentinel_codes_full.json')) as f:\n",
" # sentinel_codes_data = json.load(f) # contains mapping of image path to valid codes\n",
" \n",
" subjdir = os.path.join(rootdir,'subject_files',bucket)\n",
" if not os.path.exists(subjdir):\n",
" os.makedirs(subjdir)\n",
" #os.makedirs(os.path.join(rootdir,'full_subject_files',bucket))\n",
" \n",
" print('Generating %d subject files in bucket %d'%(num_subject_files,b))\n",
" # for each bucket, generate subject files \n",
" for i in range(num_subject_files):\n",
" \n",
" #random.shuffle(img_files)\n",
" #random.shuffle(sentinel_files)\n",
" #random.shuffle(real_codecharts)\n",
"\n",
" # for each subject files, add real images \n",
" sf_data = []\n",
" full_sf_data = []\n",
"\n",
" \n",
" # initialize temporary arrays, because will shuffle real & sentinel tutorial images before adding to\n",
" # final subject files\n",
" sf_data_temp = []\n",
" full_sf_data_temp = []\n",
" \n",
" \n",
" \n",
" \n",
" # ADDING REAL IMAGES \n",
" for j in range(int(num_images_per_sf/2)):\n",
" for k in range(2):\n",
" image_data = {}\n",
" image_data[\"image\"] = img_files[j*2+k] # stores image path \n",
"\n",
" # select a code chart\n",
" #pathname = real_codecharts[j*2+k] # since shuffled, will pick up first set of random codecharts\n",
"\n",
" #image_data[\"codechart\"] = pathname # stores codechart path \n",
" #image_data[\"codes\"] = real_codes_data[pathname]['valid_codes'] # stores valid codes \n",
" image_data[\"flag\"] = 'real' # stores flag of whether we have real or sentinel image\n",
"\n",
" full_image_data = image_data.copy() # identical to image_data but includes a key for coordinates\n",
" #full_image_data[\"coordinates\"] = real_codes_data[pathname]['coordinates'] # store locations - (x, y) coordinate of each triplet \n",
"\n",
" sf_data.append(image_data)\n",
" full_sf_data.append(full_image_data)\n",
" \n",
" for w in range(2):\n",
" blur_data = {}\n",
" print(blur_files)\n",
" blur_data['image'] = blur_files[j*2+w]\n",
" # on Windows comment this out\n",
" #QA_file_name = blur_files[j*2+w].split('\\\\')[-1][:-4]+'.json'\n",
" QA_file_name = blur_files[j*2+w].split('/')[-1][:-4]+'.json'\n",
" \n",
" with open(os.path.join(sourcedir,QA_file_name)) as f:\n",
" print(QA_file_name)\n",
" blur_data['QA'] = json.load(f)\n",
" #print(blur_data['QA'])\n",
" for item in blur_data['QA']:\n",
" if 'answer' in blur_data['QA'][item]:\n",
" gt_answers.append(blur_data['QA'][item]['answer'])\n",
" #pathname = real_codecharts[j*2+w] # since shuffled, will pick up first set of random codecharts\n",
"\n",
" #blur_data[\"codechart\"] = pathname # stores codechart path \n",
" #blur_data[\"codes\"] = real_codes_data[pathname]['valid_codes'] # stores valid codes \n",
" blur_data[\"flag\"] = 'blur'\n",
" \n",
" full_blur_data = blur_data.copy()\n",
" #full_blur_data[\"coordinates\"] = real_codes_data[pathname]['coordinates']\n",
" \n",
" sf_data.append(blur_data)\n",
" full_sf_data.append(full_blur_data)\n",
" \n",
" for img in filler_files:\n",
" filler_data = {}\n",
" filler_data[\"image\"] = img # stores image path \n",
"\n",
" # select a code chart\n",
" #pathname = real_codecharts[1] # since shuffled, will pick up first set of random codecharts\n",
"\n",
" #filler_data[\"codechart\"] = pathname # stores codechart path \n",
" #filler_data[\"codes\"] = real_codes_data[pathname]['valid_codes'] # stores valid codes \n",
" filler_data[\"flag\"] = 'fill' # stores flag of whether we have real or sentinel image\n",
" filler_data[\"showed\"] = \"real_images\" in img\n",
" if filler_data[\"showed\"] == True:\n",
" reco_answers.append('1')\n",
" else:\n",
" reco_answers.append('2')\n",
"\n",
" full_filler_data = filler_data.copy() # identical to image_data but includes a key for coordinates\n",
" #full_filler_data[\"coordinates\"] = real_codes_data[pathname]['coordinates'] # store locations - (x, y) coordinate of each triplet \n",
"\n",
" sf_data.append(filler_data)\n",
" full_sf_data.append(full_filler_data)\n",
" \n",
" \n",
"\n",
" \n",
"\n",
" # Add an image_id to each subject file entry\n",
" image_id = 0 # represents the index of the image in the subject file \n",
" for d in range(len(sf_data)): \n",
" sf_data[d]['index'] = image_id\n",
" full_sf_data[d]['index'] = image_id\n",
" image_id+=1\n",
"\n",
" subj_num = start_subjects_at+i\n",
" with open(os.path.join(rootdir,'subject_files',bucket,'subject_file_%d.json'%(subj_num)), 'w') as outfile: \n",
" print('Subject file %s DONE'%(outfile.name))\n",
" json.dump(sf_data, outfile)\n",
" with open(os.path.join(rootdir,'full_subject_files',bucket,'subject_file_%d.json'%(subj_num)), 'w') as outfile: \n",
" json.dump(full_sf_data, outfile)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"100"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(gt_answers)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"40"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(reco_answers)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"#import numpy as np\n",
"#np.save('gt_answers2',gt_answers)\n",
"#np.save('recogt_answers2',reco_answers)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 144 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 127 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 247 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 213 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 181 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 149 KiB