Int-HRL/agent/atari_env.py

import os
import cv2
import gym
import numpy as np
import torchvision.transforms as T

from atariari.benchmark.wrapper import AtariARIWrapper

SUBGOAL_ORDER = [8, 6, 1, 0, 2, 7, 2, 0, 1, 6, 8, 9] 

class ALEEnvironment():

    def __init__(self, args, device='cpu'):
        if 'cuda' in device:
            os.environ['CUDA_VISIBLE_DEVICES']= device.split(':')[-1]

        self.ale = AtariARIWrapper(gym.make('MontezumaRevenge-v4', 
                           frameskip=args.frame_skip, 
                           render_mode='rgb_array', # return the rgb key in step metadata with the current environment RGB frame.
                           repeat_action_probability=0.0))

        self.histLen = args.sequence_length 
        print(self.histLen)

        self.screen_width = args.screen_width
        self.screen_height = args.screen_height

        self.actions = np.arange(self.ale.action_space.n)

        print('Action space: ', self.actions)

        self.mode = "train"
        self.life_lost = False
        
        # Set environment with random seed! 
        self.ale.reset(seed=args.random_seed)
        
        # Get init screen 
        self.init_screen = self.getScreen()
        print(f'Size of screen is {self.init_screen.shape}')
        
        # Perform NOOP action to init self.done and self.info 
        _, reward, self.done, self.info = self.ale.step(0)

        # Use subgoals from gaze analysis 
        self.goalSet = np.loadtxt('subgoals.txt', dtype=int, delimiter=',')
        self.goalCenterLoc = []

        for goal in self.goalSet:
            goalCenter = [float(goal[0]+goal[2])/2, float(goal[1]+goal[3])/2]
            self.goalCenterLoc.append(goalCenter)

        self.agentOriginLoc = [42, 33]
        self.agentLastX = 42
        self.agentLastY = 33
        self.reachedGoal = [0, 0, 0,0]
        self.histState = self.initializeHistState()
        print('Histogram of states: ', self.histState.shape)

        self.to_tensor = T.ToTensor()
        

    def initializeHistState(self):
        if self.histLen >= 2:
            histState = np.concatenate((self.getState(), self.getState()), axis = 2)
            for _ in range(self.histLen - 2):
                histState = np.concatenate((histState, self.getState()), axis = 2)
        else:
            histState = self.getState()
        
        return histState
    
    def get_input_shape(self):
        return (self.histLen, self.screen_width, self.screen_height)

    def numActions(self):
        return len(self.actions)

    def resetGoalReach(self):
        self.reachedGoal = [0, 0, 0,0]
        
    def act(self, action):
        'Perform action and handle results'
        lives = self.info['lives']
        
        _, reward, self.done, self.info = self.ale.step(self.actions[action])
        
        # agent location from AtariARI Wrapper 
        self.agentLastX, self.agentLastY = self.info['labels']['player_x'], 320 - self.info['labels']['player_y']
        
        self.life_lost = (not lives == self.info['lives'])
        currState = self.getState()
        self.histState = np.concatenate((self.histState[:, :, 1:], currState), axis = 2)
        return reward

    def restart(self):
        'Restart environment: set goals and agent location'
        self.ale.reset()
        self.life_lost = False
        self.reachedGoal = [0, 0, 0, 0]
        for i in range(19):
            self.act(0) #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]
    
    def beginNextLife(self):
        'Begin next life without restarting environment: set goals and agent location'
        self.life_lost = False
        self.reachedGoal = [0, 0, 0,0]
        for i in range(19):
            self.act(0) #wait for initialization
        self.histState = self.initializeHistState()
        self.agentLastX = self.agentOriginLoc[0]
        self.agentLastY = self.agentOriginLoc[1]

    def detect_agent_with_tracker(self):
        img = self.getScreenOrig()
        preds = self.panamajoe_detector.predict(img)[0]
        if len(preds['boxes']) == 1: 
            box = preds['boxes'][0].detach().cpu().numpy()
            x, y, width, height  = box[0], box[1], box[2]-box[0], box[3]-box[1]
            self.agentLastX, self.agentLastY = x + width / 2, y + height / 2
        return (self.agentLastX, self.agentLastY)

    def goal_reached(self, goal):
        if self.panamajoe_detector:
            [[x1, y1, x2, y2]] = self.goalSet[goal]
            x_mean, y_mean = self.detect_agent_with_tracker()
            return x_mean > x1 and x_mean < x2 and y_mean > y1 and y_mean < y2
        else:
            return self.goalReached(goal)

    def get_goal_direction(self, goal_idx):
        if ((goal_idx-1) == -1):
            lastGoalCenter = self.agentOriginLoc
        else:
            lastGoalCenter = self.goalCenterLoc[SUBGOAL_ORDER[goal_idx-1]]

        goal_direction = np.array(self.goalSet[SUBGOAL_ORDER[goal_idx]]).mean(axis=0)  - lastGoalCenter
        norm = np.linalg.norm(goal_direction)
        
        if norm == 0: 
            return goal_direction
       
        return goal_direction / norm 

    def distanceReward(self, lastGoal, goal):
        'Calculate distance between agent and next sub-goal'
        if lastGoal is None:
            lastGoalCenter = self.agentOriginLoc
        else:
            lastGoalCenter = self.goalCenterLoc[lastGoal]
        goalCenter = self.goalCenterLoc[goal]
        agentX, agentY = self.agentLastX, self.agentLastY
        dis = np.sqrt((goalCenter[0] - agentX)*(goalCenter[0] - agentX) + (goalCenter[1]-agentY)*(goalCenter[1]-agentY))
        disLast = np.sqrt((lastGoalCenter[0] - agentX)*(lastGoalCenter[0] - agentX) + (lastGoalCenter[1]-agentY)*(lastGoalCenter[1]-agentY))
        disGoals = np.sqrt((goalCenter[0]-lastGoalCenter[0])*(goalCenter[0]-lastGoalCenter[0]) + (goalCenter[1]-lastGoalCenter[1])*(goalCenter[1]-lastGoalCenter[1]))
        return 0.001 * (disLast - dis) / disGoals
    
    def getScreen(self):
        'Get processed screen: grayscale and resized'
        screen = self.ale.render(mode='rgb_array')
        screen = cv2.cvtColor(screen, cv2.COLOR_RGB2GRAY)
        resized = cv2.resize(screen, (self.screen_width, self.screen_height), interpolation=cv2.INTER_AREA)
        return resized
    
    def getScreenOrig(self):
        'Get original RGB screen'
        return self.ale.render(mode='rgb_array')
    
    def getScreenRGB(self):
        'Get RGB screen for finding agent location'
        screen = self.ale.render(mode='rgb_array')
        resized = cv2.resize(screen, (self.screen_width, self.screen_height), interpolation=cv2.INTER_AREA)
        return resized

    def getState(self):
        'Get current state, i.e. current screen. Process screen and add color channel for input of network'
        screen = self.ale.render(mode='rgb_array')
        screen = cv2.cvtColor(screen, cv2.COLOR_RGB2GRAY)
        resized = cv2.resize(screen, (self.screen_width, self.screen_height), interpolation=cv2.INTER_AREA)
        return np.expand_dims(resized, axis=-1)

    def getStackedState(self):
        return self.to_tensor(self.histState)

    def isTerminal(self):
        if self.mode == 'train':
            return self.done or self.life_lost
        return self.done

    def isGameOver(self):
        return self.done

    def isLifeLost(self):
        return self.life_lost

    def reset(self):
        self.ale.reset()
        self.life_lost = False

    def goalReached(self, goal):
        goalPosition = self.goalSet[goal]
        goalScreen = self.init_screen
        stateScreen = self.getState()
        count = 0
        for y in range (goalPosition[0][0], goalPosition[1][0]):
            for x in range (goalPosition[0][1], goalPosition[1][1]):
                
                if goalScreen[x][y] != stateScreen[x][y]:
                    count = count + 1
        
        # 30 is total number of pixels of agent
        if float(count) / 30 > 0.3:
            self.reachedGoal[goal] = 1
            return True
        
        return False

    def trueGoalReached(self, goal):
        'With the AtariARI Wrapper enabled agent locations are updated every step and yield true location'
        goalPosition = self.goalSet[goal]
        return self.agentLastX > goalPosition[0] and self.agentLastX < goalPosition[2] and self.agentLastY > goalPosition[1] and self.agentLastY < goalPosition[3]

    def goalNotReachedBefore(self, goal):
        if (self.reachedGoal[goal] == 1):
            return False
        return True
add Int-HRL agent scripts 2025-03-12 18:46:09 +01:00			`import os`
			`import cv2`
			`import gym`
			`import numpy as np`
			`import torchvision.transforms as T`

			`from atariari.benchmark.wrapper import AtariARIWrapper`

			`SUBGOAL_ORDER = [8, 6, 1, 0, 2, 7, 2, 0, 1, 6, 8, 9]`

			`class ALEEnvironment():`

			`def __init__(self, args, device='cpu'):`
			`if 'cuda' in device:`
			`os.environ['CUDA_VISIBLE_DEVICES']= device.split(':')[-1]`

			`self.ale = AtariARIWrapper(gym.make('MontezumaRevenge-v4',`
			`frameskip=args.frame_skip,`
			`render_mode='rgb_array', # return the rgb key in step metadata with the current environment RGB frame.`
			`repeat_action_probability=0.0))`

			`self.histLen = args.sequence_length`
			`print(self.histLen)`

			`self.screen_width = args.screen_width`
			`self.screen_height = args.screen_height`

			`self.actions = np.arange(self.ale.action_space.n)`

			`print('Action space: ', self.actions)`

			`self.mode = "train"`
			`self.life_lost = False`

			`# Set environment with random seed!`
			`self.ale.reset(seed=args.random_seed)`

			`# Get init screen`
			`self.init_screen = self.getScreen()`
			`print(f'Size of screen is {self.init_screen.shape}')`

			`# Perform NOOP action to init self.done and self.info`
			`_, reward, self.done, self.info = self.ale.step(0)`

			`# Use subgoals from gaze analysis`
			`self.goalSet = np.loadtxt('subgoals.txt', dtype=int, delimiter=',')`
			`self.goalCenterLoc = []`

			`for goal in self.goalSet:`
			`goalCenter = [float(goal[0]+goal[2])/2, float(goal[1]+goal[3])/2]`
			`self.goalCenterLoc.append(goalCenter)`

			`self.agentOriginLoc = [42, 33]`
			`self.agentLastX = 42`
			`self.agentLastY = 33`
			`self.reachedGoal = [0, 0, 0,0]`
			`self.histState = self.initializeHistState()`
			`print('Histogram of states: ', self.histState.shape)`

			`self.to_tensor = T.ToTensor()`


			`def initializeHistState(self):`
			`if self.histLen >= 2:`
			`histState = np.concatenate((self.getState(), self.getState()), axis = 2)`
			`for _ in range(self.histLen - 2):`
			`histState = np.concatenate((histState, self.getState()), axis = 2)`
			`else:`
			`histState = self.getState()`

			`return histState`

			`def get_input_shape(self):`
			`return (self.histLen, self.screen_width, self.screen_height)`

			`def numActions(self):`
			`return len(self.actions)`

			`def resetGoalReach(self):`
			`self.reachedGoal = [0, 0, 0,0]`

			`def act(self, action):`
			`'Perform action and handle results'`
			`lives = self.info['lives']`

			`_, reward, self.done, self.info = self.ale.step(self.actions[action])`

			`# agent location from AtariARI Wrapper`
			`self.agentLastX, self.agentLastY = self.info['labels']['player_x'], 320 - self.info['labels']['player_y']`

			`self.life_lost = (not lives == self.info['lives'])`
			`currState = self.getState()`
			`self.histState = np.concatenate((self.histState[:, :, 1:], currState), axis = 2)`
			`return reward`

			`def restart(self):`
			`'Restart environment: set goals and agent location'`
			`self.ale.reset()`
			`self.life_lost = False`
			`self.reachedGoal = [0, 0, 0, 0]`
			`for i in range(19):`
			`self.act(0) #wait for initialization`
			`self.histState = self.initializeHistState()`
			`self.agentLastX = self.agentOriginLoc[0]`
			`self.agentLastY = self.agentOriginLoc[1]`

			`def beginNextLife(self):`
			`'Begin next life without restarting environment: set goals and agent location'`
			`self.life_lost = False`
			`self.reachedGoal = [0, 0, 0,0]`
			`for i in range(19):`
			`self.act(0) #wait for initialization`
			`self.histState = self.initializeHistState()`
			`self.agentLastX = self.agentOriginLoc[0]`
			`self.agentLastY = self.agentOriginLoc[1]`

			`def detect_agent_with_tracker(self):`
			`img = self.getScreenOrig()`
			`preds = self.panamajoe_detector.predict(img)[0]`
			`if len(preds['boxes']) == 1:`
			`box = preds['boxes'][0].detach().cpu().numpy()`
			`x, y, width, height = box[0], box[1], box[2]-box[0], box[3]-box[1]`
			`self.agentLastX, self.agentLastY = x + width / 2, y + height / 2`
			`return (self.agentLastX, self.agentLastY)`

			`def goal_reached(self, goal):`
			`if self.panamajoe_detector:`
			`[[x1, y1, x2, y2]] = self.goalSet[goal]`
			`x_mean, y_mean = self.detect_agent_with_tracker()`
			`return x_mean > x1 and x_mean < x2 and y_mean > y1 and y_mean < y2`
			`else:`
			`return self.goalReached(goal)`

			`def get_goal_direction(self, goal_idx):`
			`if ((goal_idx-1) == -1):`
			`lastGoalCenter = self.agentOriginLoc`
			`else:`
			`lastGoalCenter = self.goalCenterLoc[SUBGOAL_ORDER[goal_idx-1]]`

			`goal_direction = np.array(self.goalSet[SUBGOAL_ORDER[goal_idx]]).mean(axis=0) - lastGoalCenter`
			`norm = np.linalg.norm(goal_direction)`

			`if norm == 0:`
			`return goal_direction`

			`return goal_direction / norm`

			`def distanceReward(self, lastGoal, goal):`
			`'Calculate distance between agent and next sub-goal'`
			`if lastGoal is None:`
			`lastGoalCenter = self.agentOriginLoc`
			`else:`
			`lastGoalCenter = self.goalCenterLoc[lastGoal]`
			`goalCenter = self.goalCenterLoc[goal]`
			`agentX, agentY = self.agentLastX, self.agentLastY`
			`dis = np.sqrt((goalCenter[0] - agentX)(goalCenter[0] - agentX) + (goalCenter[1]-agentY)(goalCenter[1]-agentY))`
			`disLast = np.sqrt((lastGoalCenter[0] - agentX)(lastGoalCenter[0] - agentX) + (lastGoalCenter[1]-agentY)(lastGoalCenter[1]-agentY))`
			`disGoals = np.sqrt((goalCenter[0]-lastGoalCenter[0])(goalCenter[0]-lastGoalCenter[0]) + (goalCenter[1]-lastGoalCenter[1])(goalCenter[1]-lastGoalCenter[1]))`
			`return 0.001 * (disLast - dis) / disGoals`

			`def getScreen(self):`
			`'Get processed screen: grayscale and resized'`
			`screen = self.ale.render(mode='rgb_array')`
			`screen = cv2.cvtColor(screen, cv2.COLOR_RGB2GRAY)`
			`resized = cv2.resize(screen, (self.screen_width, self.screen_height), interpolation=cv2.INTER_AREA)`
			`return resized`

			`def getScreenOrig(self):`
			`'Get original RGB screen'`
			`return self.ale.render(mode='rgb_array')`

			`def getScreenRGB(self):`
			`'Get RGB screen for finding agent location'`
			`screen = self.ale.render(mode='rgb_array')`
			`resized = cv2.resize(screen, (self.screen_width, self.screen_height), interpolation=cv2.INTER_AREA)`
			`return resized`

			`def getState(self):`
			`'Get current state, i.e. current screen. Process screen and add color channel for input of network'`
			`screen = self.ale.render(mode='rgb_array')`
			`screen = cv2.cvtColor(screen, cv2.COLOR_RGB2GRAY)`
			`resized = cv2.resize(screen, (self.screen_width, self.screen_height), interpolation=cv2.INTER_AREA)`
			`return np.expand_dims(resized, axis=-1)`

			`def getStackedState(self):`
			`return self.to_tensor(self.histState)`

			`def isTerminal(self):`
			`if self.mode == 'train':`
			`return self.done or self.life_lost`
			`return self.done`

			`def isGameOver(self):`
			`return self.done`

			`def isLifeLost(self):`
			`return self.life_lost`

			`def reset(self):`
			`self.ale.reset()`
			`self.life_lost = False`

			`def goalReached(self, goal):`
			`goalPosition = self.goalSet[goal]`
			`goalScreen = self.init_screen`
			`stateScreen = self.getState()`
			`count = 0`
			`for y in range (goalPosition[0][0], goalPosition[1][0]):`
			`for x in range (goalPosition[0][1], goalPosition[1][1]):`

			`if goalScreen[x][y] != stateScreen[x][y]:`
			`count = count + 1`

			`# 30 is total number of pixels of agent`
			`if float(count) / 30 > 0.3:`
			`self.reachedGoal[goal] = 1`
			`return True`

			`return False`

			`def trueGoalReached(self, goal):`
			`'With the AtariARI Wrapper enabled agent locations are updated every step and yield true location'`
			`goalPosition = self.goalSet[goal]`
			`return self.agentLastX > goalPosition[0] and self.agentLastX < goalPosition[2] and self.agentLastY > goalPosition[1] and self.agentLastY < goalPosition[3]`

			`def goalNotReachedBefore(self, goal):`
			`if (self.reachedGoal[goal] == 1):`
			`return False`
			`return True`