evalcallback悬挂在稳定的baselines中3

问题描述 投票:0回答:1

脚本只要我允许,脚本就会坐在这里。 为什么会发生这种情况?我该如何解决?

脚本:

import numpy as np import gymnasium as gym import torch as th from stable_baselines3 import A2C from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.callbacks import BaseCallback, EvalCallback from typing import Any class DummyVecEnv(gym.Env): def __init__(self, n_controls: int = 1): super().__init__() self.n_controls = n_controls self.observation_space = gym.spaces.Box( low=0, high=1e4, shape=(1,), dtype=np.float32 ) self.action_space = gym.spaces.Box( low=-1e4, high=1e4, shape=(self.n_controls,), dtype=np.float32 ) def _get_obs(self) -> np.ndarray: return self.observation_space.sample() def reset( self, *, seed: int | None = None, config: dict[str, Any] = {}, ) -> tuple[np.ndarray, dict[str, Any]]: return self._get_obs(), {} def step( self, action: np.ndarray ) -> (np.ndarray, float, bool, bool, dict): obs = self._get_obs() reward = np.random.rand() terminated = False truncated = False return obs, reward, terminated, truncated, {} class MakeDummyEnv: def __init__(self, n_controls: int = 1, is_eval_env: bool = False): self.n_controls = n_controls self.is_eval_env = is_eval_env def __call__(self): env = DummyVecEnv(n_controls=self.n_controls) if self.is_eval_env: return Monitor(env) else: return env class TensorboardCallback(BaseCallback): """ Custom callback for plotting additional values in tensorboard. """ def __init__(self, verbose=0): super().__init__(verbose) def _on_rollout_end(self) -> None: """ the rollout is lost before _on_step is called, log returns here """ if hasattr(self.model, "num_timesteps"): print('Number of time steps at rollout end:', self.model.num_timesteps) if hasattr(self.model, "rollout_buffer"): rollout_data = next(self.model.rollout_buffer.get(batch_size=None)) self.logger.record('train/buffer_size', len(rollout_data.actions)) self.logger.record('train/return_mean', rollout_data.returns.mean().item()) self.logger.record('train/return_std', rollout_data.returns.std().item()) self.logger.record('train/first_env_temp', rollout_data.observations.flatten()[0].item() ) if not hasattr(self.model, "n_iterations"): self.model.n_iterations = 0 self.model.n_iterations += 1 def _on_step(self) -> bool: """ log the std of each output here """ if hasattr(self.model, "num_timesteps"): self.logger.record('train/num_timesteps', self.model.num_timesteps) if hasattr(self.model.policy, "log_std"): for i in range(len(self.model.policy.log_std)): self.logger.record( f"train/std_{i:d}", th.exp(self.model.policy.log_std[i]).item() ) return True def _on_training_start(self) -> None: if hasattr(self.model, "num_timesteps"): print('Number of time steps at training start:', self.model.num_timesteps) def _on_training_end(self) -> None: if hasattr(self.model, "num_timesteps"): print('Number of time steps at training end:', self.model.num_timesteps) class CustomEvalCallback(EvalCallback): def _on_step(self) -> bool: """ this is called by self.on_step which is called when EvalCallback is triggered """ print(f"_on_step: CustomEvalCallback has been called {self.n_calls:d} times") if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0: print('\teval will be performed now...') super()._on_step() return True if __name__ == "__main__": print('Creating environments for SubprocVecEnv...') vec_env = SubprocVecEnv( env_fns=[MakeDummyEnv(n_controls=3) for _ in range(3)] ) print('Defining the A2C model...') model = A2C( policy="MlpPolicy", env=vec_env, n_steps=1, verbose=0, device='cpu', tensorboard_log="./logs" ) print('Making an evaluation callback...') n_eval_env = 2 eval_env = SubprocVecEnv( env_fns=[ MakeDummyEnv(n_controls=3, is_eval_env=True) for _ in range(n_eval_env) ] ) eval_callback = CustomEvalCallback( eval_env=eval_env, callback_on_new_best=None, callback_after_eval=None, n_eval_episodes=n_eval_env, eval_freq=5, # this appears to be number of iterations, not time steps log_path=None, # leave this as None best_model_save_path='saves', # saves as 'best_model' here deterministic=True, render=False, verbose=0, warn=True, ) print('Training the A2C model...') model.learn( total_timesteps=25, progress_bar=False, log_interval=1, callback=[TensorboardCallback(), eval_callback], tb_log_name='dummy_log', )

问题是,DummyveCenv从未完成,即它永远不会返回或截断为真实。  这意味着
python-3.x reinforcement-learning stablebaseline3
1个回答
最新问题
© www.soinside.com 2019 - 2024. All rights reserved.