5.6 KiB
5.6 KiB
In [ ]:
import os import numpy as np import cv2 import pandas as pd import matplotlib.pyplot as plt import subprocess import warnings import dataset_utils as utils
Read all Montezuma's Revenge trials¶
In [ ]:
DATA_PATH = 'montezuma_revenge/' df = pd.read_csv(os.path.join(DATA_PATH, 'meta_data.csv')) df = df.loc[df.GameName.str.contains('montezuma_revenge')] df.sort_values(by=['trial_id'], inplace=True, ascending=True) df.reset_index(drop=True, inplace=True) df.head()
Get folder names of each trial¶
In [ ]:
file_lst = [os.path.join(root, name) for root, dirs, files in os.walk(DATA_PATH) for name in files if 'tar.bz2' in name] folder_lst = [f.split('.')[0].split('/')[-1] for f in file_lst] folder_lst.sort(key=lambda x: int(str(x).split('_')[0]), reverse=False) df['trial_folder'] = folder_lst df.to_pickle(os.path.join(DATA_PATH, "all_trials_summary.pkl")) df.head()
Unpack all folders¶
In [ ]:
for i in df.trial_id: file = [f for f in file_lst if str(i) + '_' in f] print(i, *file) cmd = f'tar -jxf {file[0]} --directory {DATA_PATH}' subprocess.call(cmd, shell=True)
Genarate Dataframe with all Trials¶
In [ ]:
%%time def write_unique_id(frame_id, episode_id, trial_id): if not pd.isna(episode_id) and not pd.isna(frame_id): unique_id = str(trial_id)+ '_' + '_'.join(frame_id.split('_')[:2]) + '_E{:01d}'.format(int(episode_id)) elif not pd.isna(frame_id): unique_id = str(trial_id)+ '_' + '_'.join(frame_id.split('_')[:2]) + '_E0' else: unique_id = None return unique_id path = os.path.join(DATA_PATH, df.iloc[0].trial_folder) print(path) # Read Annotations trial_df = utils.txt_to_dataframe(path + '.txt') # Write unique ID trial_df['ID'] = trial_df.apply(lambda x: write_unique_id(x['frame_id'], x['episode_id'], df.iloc[0].trial_id), axis=1) # Write image paths trial_df['img_path'] = trial_df.apply(lambda x: os.path.join(path, str(x['frame_id']) + '.png'), axis=1) # Reorder columns cols = ['ID'] + [c for c in trial_df.columns.tolist() if not c=='ID'] trial_df = trial_df[cols] # Cut frames without annotations trial_df = trial_df[trial_df.ID.notnull()] print(f'Episodes: {trial_df.ID.unique()}\n') full_df = trial_df.copy() for idx in df.index[1:]: row = df.iloc[idx] if row.GameName == 'montezuma_revenge': path = os.path.join(DATA_PATH, row.trial_folder) elif row.GameName == 'montezuma_revenge_highscore': path = os.path.join(DATA_PATH, 'highscore', row.trial_folder) else: path = '' warnings.warn(f"GameName of row {idx} not recognised! Returning empty path.") print(f'Reading {path}') # Read Annotations trial_df = utils.txt_to_dataframe(path + '.txt') # Write unique ID trial_df['ID'] = trial_df.apply(lambda x: write_unique_id(x['frame_id'], x['episode_id'], row.trial_id), axis=1) # Write image paths trial_df['img_path'] = trial_df.apply(lambda x: os.path.join(path, str(x['frame_id']) + '.png'), axis=1) # Cut frames without annotations trial_df = trial_df[trial_df.ID.notnull()] print(f'Episodes: {trial_df.ID.unique()}\n') full_df = pd.concat([full_df, trial_df], join='inner', ignore_index=True) outpath = os.path.join(DATA_PATH, "all_trials.pkl") print(f'Saving dataframe to {outpath}\n') full_df.to_pickle(outpath) full_df.head()