脚本只要我允许,脚本就会坐在这里。 为什么会发生这种情况?我该如何解决?
脚本:
import numpy as np
import gymnasium as gym
import torch as th
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from typing import Any
class DummyVecEnv(gym.Env):
def __init__(self, n_controls: int = 1):
super().__init__()
self.n_controls = n_controls
self.observation_space = gym.spaces.Box(
low=0, high=1e4, shape=(1,),
dtype=np.float32
)
self.action_space = gym.spaces.Box(
low=-1e4, high=1e4, shape=(self.n_controls,),
dtype=np.float32
)
def _get_obs(self) -> np.ndarray:
return self.observation_space.sample()
def reset(
self,
*,
seed: int | None = None,
config: dict[str, Any] = {},
) -> tuple[np.ndarray, dict[str, Any]]:
return self._get_obs(), {}
def step(
self,
action: np.ndarray
) -> (np.ndarray, float, bool, bool, dict):
obs = self._get_obs()
reward = np.random.rand()
terminated = False
truncated = False
return obs, reward, terminated, truncated, {}
class MakeDummyEnv:
def __init__(self, n_controls: int = 1, is_eval_env: bool = False):
self.n_controls = n_controls
self.is_eval_env = is_eval_env
def __call__(self):
env = DummyVecEnv(n_controls=self.n_controls)
if self.is_eval_env:
return Monitor(env)
else:
return env
class TensorboardCallback(BaseCallback):
"""
Custom callback for plotting additional values in tensorboard.
"""
def __init__(self, verbose=0):
super().__init__(verbose)
def _on_rollout_end(self) -> None:
""" the rollout is lost before _on_step is called, log returns here """
if hasattr(self.model, "num_timesteps"):
print('Number of time steps at rollout end:', self.model.num_timesteps)
if hasattr(self.model, "rollout_buffer"):
rollout_data = next(self.model.rollout_buffer.get(batch_size=None))
self.logger.record('train/buffer_size', len(rollout_data.actions))
self.logger.record('train/return_mean', rollout_data.returns.mean().item())
self.logger.record('train/return_std', rollout_data.returns.std().item())
self.logger.record('train/first_env_temp',
rollout_data.observations.flatten()[0].item()
)
if not hasattr(self.model, "n_iterations"):
self.model.n_iterations = 0
self.model.n_iterations += 1
def _on_step(self) -> bool:
""" log the std of each output here """
if hasattr(self.model, "num_timesteps"):
self.logger.record('train/num_timesteps', self.model.num_timesteps)
if hasattr(self.model.policy, "log_std"):
for i in range(len(self.model.policy.log_std)):
self.logger.record(
f"train/std_{i:d}",
th.exp(self.model.policy.log_std[i]).item()
)
return True
def _on_training_start(self) -> None:
if hasattr(self.model, "num_timesteps"):
print('Number of time steps at training start:', self.model.num_timesteps)
def _on_training_end(self) -> None:
if hasattr(self.model, "num_timesteps"):
print('Number of time steps at training end:', self.model.num_timesteps)
class CustomEvalCallback(EvalCallback):
def _on_step(self) -> bool:
""" this is called by self.on_step which is called when EvalCallback is triggered """
print(f"_on_step: CustomEvalCallback has been called {self.n_calls:d} times")
if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
print('\teval will be performed now...')
super()._on_step()
return True
if __name__ == "__main__":
print('Creating environments for SubprocVecEnv...')
vec_env = SubprocVecEnv(
env_fns=[MakeDummyEnv(n_controls=3) for _ in range(3)]
)
print('Defining the A2C model...')
model = A2C(
policy="MlpPolicy",
env=vec_env,
n_steps=1,
verbose=0,
device='cpu',
tensorboard_log="./logs"
)
print('Making an evaluation callback...')
n_eval_env = 2
eval_env = SubprocVecEnv(
env_fns=[
MakeDummyEnv(n_controls=3, is_eval_env=True)
for _ in range(n_eval_env)
]
)
eval_callback = CustomEvalCallback(
eval_env=eval_env,
callback_on_new_best=None,
callback_after_eval=None,
n_eval_episodes=n_eval_env,
eval_freq=5, # this appears to be number of iterations, not time steps
log_path=None, # leave this as None
best_model_save_path='saves', # saves as 'best_model' here
deterministic=True,
render=False,
verbose=0,
warn=True,
)
print('Training the A2C model...')
model.learn(
total_timesteps=25,
progress_bar=False,
log_interval=1,
callback=[TensorboardCallback(), eval_callback],
tb_log_name='dummy_log',
)
问题是,DummyveCenv从未完成,即它永远不会返回或截断为真实。 这意味着