Int-HRL/RAMStateLabeling.ipynb

8.1 KiB

Get RAM state of Montezuma's Revenge

In [ ]:
import os
import random
import cv2

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import gym

from atariari.benchmark.wrapper import AtariARIWrapper
from utils import visualize_sample


DATA_PATH = 'montezuma_revenge'

df = pd.read_pickle(os.path.join(DATA_PATH, "all_trials.pkl"))
df.head()

Use AtariARI Wrapper to extract RAM state

labels: {'room_number': 15,
'player_x': 46,
'player_y': 235,
'player_direction': 76,
'enemy_skull_x': 58,
'enemy_skull_y': 240,
'key_monster_x': 132,
'key_monster_y': 254,
'level': 0,
'num_lives': 1,
'items_in_inventory_count': 0,
'room_state': 10,
'score_0': 1,
'score_1': 8,
'score_2': 0}
In [ ]:
env = AtariARIWrapper(gym.make('MontezumaRevenge-v4', 
                       frameskip=1, 
                       render_mode='rgb_array', 
                       repeat_action_probability=0.0))

#env.unwrapped.ale.getRAM()
obs = env.reset(seed=42)
obs, reward, done, info = env.step(1)

Visualize AtariHEAD data and RAM state labels

offset of player and skull locations was discovered manually

In [ ]:
from IPython import display
obs = env.reset()

screen = plt.imshow(env.render(mode='rgb_array'), aspect='auto')
plt.axis('off')

all_images = []
agent_locations = []
skull_locations = []
room_ids = []

for i, action in enumerate(df.loc[df.ID == '285_RZ_5619207_E00'].action.values): 

    n_state, reward, done, info = env.step(action)
    img = info['rgb']
    room_ids.append(info['labels']['room_number'])
    
    # agent 
    mean_x, mean_y = info['labels']['player_x'], 320 - info['labels']['player_y']
    agent_locations.append([mean_x, mean_y])
    
    x1, x2, y1, y2 = mean_x - 5 , mean_x + 10, mean_y - 15, mean_y + 10
    img = cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
    
    # skull
    mean_x, mean_y = info['labels']['enemy_skull_x'] + 35, info['labels']['enemy_skull_y'] - 65
    skull_locations.append([mean_x, mean_y])
    x1, x2, y1, y2 = mean_x - 5, mean_x + 5, mean_y - 10, mean_y + 5
    img = cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)
    
    img = cv2.putText(img=img, text='Room ID: ' + str(info['labels']['room_number']) + ' index: ' + str(i), org=(5, 205), fontFace=cv2.FONT_HERSHEY_SIMPLEX, 
                        fontScale=0.3, color=(255, 255, 255),thickness=1)
        
    screen.set_data(img) # just update the data
    display.display(plt.gcf())
    display.clear_output(wait=True)

Number of actions with correct labeling for environment with random seed = 42

Discovered manually through above visualization [-1: all actions valid, 0: no actions valid]

In [ ]:
test = {'284_RZ_5540489_E00': 11900, '285_RZ_5619207_E00': 2940, '285_RZ_5619207_E01': -1,
       '287_RZ_7172481_E00': 0, '291_RZ_7364933_E00': 12000, '324_RZ_452975_E00':0,
       '333_RZ_900705_E00': 3000, '340_RZ_1323550_E00': 5950, '359_RZ_1993616_E00': 9000,
       '365_RZ_2079996_E00': 9000, '371_RZ_2173469_E00': -1, '385_RZ_2344725_E00': 3500,
       '398_RZ_2530473_E00': 1200, '402_RZ_2603283_E00': -1, '416_RZ_2788252_E00': -1,
       '429_RZ_2945490_E00': 4500, '436_RZ_3131841_E00': 10500, '459_RZ_3291266_E00': 5400,
       '469_RZ_3390904_E00': 14500, '480_RZ_3470098_E00': 8000, '493_RZ_3557734_E00': 10500, 
       '523_RZ_4091327_E00': 0, '536_RZ_4420664_E00': 0, '548_RZ_4509746_E00': 0,
       '561_RZ_4598680_E00': 0, '573_RZ_4680777_E00': 0, '584_RZ_4772014_E00': 0,
       '588_RZ_5032278_E00': 0}

num_frames = 0 
num_labeled_frames = 0
counter = 0 
for episode in test.keys():
    counter += 1
    num_frames += len(df.loc[df.ID == episode])
    num_samples = test.get(episode)
    if num_samples == -1:
        num_labeled_frames += len(df.loc[df.ID == episode])
    else: 
        num_labeled_frames += num_samples
    
print(f'Overall percantage {num_labeled_frames / num_frames:%} for 21 episodes')

Label Atari-HEAD data

In [ ]:
%%time 
df['level'] = None 
df['room_id'] = None
df['player_location'] = None
df['skull_location'] = None 

for episode in df.ID.unique():
    
    obs = env.reset()
    room_ids = []
    agent_locations = []
    skull_locations = []
    level = []
    
    num_valid_actions = test.get(episode)

    for action in df.loc[df.ID == episode].action.values[:num_valid_actions]: 
        
        n_state, reward, done, info = env.step(action)
        room_ids.append(info['labels']['room_number'])
        level.append(info['labels']['level'])
        
        # agent 
        mean_x, mean_y = info['labels']['player_x'], 320 - info['labels']['player_y']
        agent_locations.append([mean_x, mean_y])
        
        # skull
        mean_x, mean_y = info['labels']['enemy_skull_x'] + 35, info['labels']['enemy_skull_y'] - 65
        skull_locations.append([mean_x, mean_y])
    
    index = df.loc[df.ID == episode].index[:num_valid_actions]
    df.loc[index, 'level'] = level
    df.loc[index, 'room_id'] = room_ids
    df.loc[index, 'player_location'] = agent_locations
    df.loc[index, 'skull_location'] = skull_locations

print(f'Percentage of labeled data {len(df[df.room_id.notnull()]) / len(df):%}')
print()
df.to_pickle(os.path.join(DATA_PATH, "all_trials_labeled.pkl"))  
df.head()
In [ ]: