我试图让用户使用自定义键来玩 CarRacing-v0 环境,我想我可以使用 utils.play 来实现,如下所示:
import gym
from gym.utils.play import *
play(gym.make("CarRacing-v0"))
它适用于 atari 环境,但在这种情况下我得到了
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-3-080385c697d2> in <module>()
8 # play.keys_to_action = KEYWORD_TO_KEY
9
---> 10 play(gym.make("CarRacing-v0"))
11
12 # mapping of keys
~/Documents/openai/gym/gym/utils/play.py in play(env, transpose, fps, zoom, callback, keys_to_action)
92 else:
93 assert False, env.spec.id + " does not have explicit key to action mapping, " + \
---> 94 "please specify one manually"
95 relevant_keys = set(sum(map(list, keys_to_action.keys()),[]))
96
AssertionError: CarRacing-v0 does not have explicit key to action mapping, please specify one manually
所以我想知道如何进行这个自定义动作映射? 播放代码中的注释说它是一个映射keys_to_action: dict: tuple(int) -> int
atari_env 是这样做的:
KEYWORD_TO_KEY = {
'UP': ord('w'),
'DOWN': ord('s'),
'LEFT': ord('a'),
'RIGHT': ord('d'),
'FIRE': ord(' '),
}
我知道 car_racing 脚本通过捕获按下 3 元素数组的按键并将该值传递给 env.step 来实现此目的。所以我在这里尝试了类似的方法:
KEYWORD_TO_KEY = {'STEER':ord('a'),'GAS':ord('w'),'BREAK':ord('s')}
play.keys_to_action = KEYWORD_TO_KEY
没成功。我知道转向是错误的,但我想我至少能让车辆向一个方向转弯。 然后我检查了将关键字重新映射到一款 Atari 游戏上的自定义组合。 游戏可以运行,但按键映射是原始的,不是我修改的。
你们知道如何正确执行此自定义键映射吗?
这是一个 pygame 的示例。我还在评论中添加了如何玩其他健身房环境。
import pygame
import gym
from gym.utils.play import play
import numpy as np
import warnings
warnings.filterwarnings('ignore')
# mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 1}
# play(gym.make("CartPole-v0"), keys_to_action=mapping)
# mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 2}
# play(gym.make("MountainCar-v0"), keys_to_action=mapping, noop=0)
mapping = {"w": np.array([0, 0.7, 0]),
"a": np.array([-1, 0, 0]),
"s": np.array([0, 0, 1]),
"d": np.array([1, 0, 0]),
"wa": np.array([-1, 0.7, 0]),
"dw": np.array([1, 0.7, 0]),
"ds": np.array([1, 0, 1]),
"as": np.array([-1, 0, 1]),
}
default_action = np.array([0,0,0])
play(gym.make("CarRacing-v2"), keys_to_action=mapping, noop=default_action)
这是一个在 play() 之外工作的示例。这使您有机会记录人类行为并将其用于学习。
def register_input():
global quit, restart
for event in pygame.event.get():
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_LEFT:
a[0] = -1.0
if event.key == pygame.K_RIGHT:
a[0] = +1.0
if event.key == pygame.K_UP:
a[1] = +1.0
if event.key == pygame.K_DOWN:
a[2] = +0.8
if event.key == pygame.K_RETURN:
restart = True
if event.key == pygame.K_ESCAPE:
quit = True
if event.type == pygame.KEYUP:
if event.key == pygame.K_LEFT:
a[0] = 0
if event.key == pygame.K_RIGHT:
a[0] = 0
if event.key == pygame.K_UP:
a[1] = 0
if event.key == pygame.K_DOWN:
a[2] = 0
if event.type == pygame.QUIT:
quit = True
env = gym.make(environment_name, render_mode="human")
quit = False
while not quit:
env.reset()
total_reward = 0.0
steps = 0
restart = False
while True:
register_input()
s, r, terminated, truncated, info = env.step(action)
total_reward += r
if steps % 200 == 0 or terminated or truncated:
print("\naction " + str([f"{x:+0.2f}" for x in action]))
print(f"step {steps} total_reward {total_reward:+0.2f}")
steps += 1
if terminated or truncated or restart or quit:
break
env.close()