PicklingError:无法腌制<built-in function reset_code>

问题描述 投票:0回答:1

我学习了一门关于人工智能和应用的课程,我们的教授给了我们一个Python程序,用pytorch在计算机上运行它。我有一个 GPU,所以我在让它在我的 GPU(即 Nvidia 3070 TI 笔记本电脑)上运行时遇到了一些麻烦,但当我使用我的 CPU 时也会发生此错误。

现在我有这个错误:

PicklingError: Can't pickle <built-in function reset_code>: it's not found as torch._C._dynamo.eval_frame.reset_code

我找不到解决我的问题的解决方案。

这是我的代码:

Save the trained algorithm
save_path = "TD3_trained_agent.pt"
import os
import imageio
import gymnasium as gym
import numpy as np
import torch
from agilerl.algorithms.td3 import TD3
from agilerl.components.replay_buffer import ReplayBuffer
from agilerl.hpo.mutation import Mutations
from agilerl.hpo.tournament import TournamentSelection
from agilerl.training.train_off_policy import train_off_policy
from agilerl.utils.utils import create_population, make_vect_envs
from tqdm import trange


# Initial hyperparameters
INIT_HP = {
"ALGO": "TD3",
"POP_SIZE": 4,  # Population size
"BATCH_SIZE": 128,  # Batch size
"LR_ACTOR": 0.0001,  # Actor learning rate
"LR_CRITIC": 0.001,  # Critic learning rate
"O_U_NOISE": True,  # Ornstein-Uhlenbeck action noise
"EXPL_NOISE": 0.1,  # Action noise scale
"MEAN_NOISE": 0.0,  # Mean action noise
"THETA": 0.15,  # Rate of mean reversion in OU noise
"DT": 0.01,  # Timestep for OU noise
"GAMMA": 0.99,  # Discount factor
"MEMORY_SIZE": 100_000,  # Max memory buffer size
"POLICY_FREQ": 2,  # Policy network update frequency
"LEARN_STEP": 1,  # Learning frequency
"TAU": 0.005,  # For soft update of target parameters
# Swap image channels dimension from last to first [H, W, C] -> [C, H, W]
"CHANNELS_LAST": False,  # Use with RGB states
"EPISODES": 1000,  # Number of episodes to train for
"EVO_EPOCHS": 20,  # Evolution frequency, i.e. evolve after every 20 episodes
"TARGET_SCORE": 200.0,  # Target score that will beat the environment
"EVO_LOOP": 3,  # Number of evaluation episodes
'EVO_STEPS': 10_000,            # Evolution frequency
'EVAL_STEPS': None,             # Evaluation steps
#"MAX_STEPS": 100,  # Maximum number of steps an agent takes in an environment
"MAX_STEPS": 10,  # Maximum number of steps an agent takes in an environment
"TOURN_SIZE": 2,  # Tournament size
"ELITISM": True,  # Elitism in tournament selection
'EVAL_LOOP': 1,
'LEARNING_DELAY': 1000,         # Steps before starting learning
}

# Mutation parameters
MUT_P = {
# Mutation probabilities
"NO_MUT": 0.4,  # No mutation
"ARCH_MUT": 0.2,  # Architecture mutation
"NEW_LAYER": 0.2,  # New layer mutation
"PARAMS_MUT": 0.2,  # Network parameters mutation
"ACT_MUT": 0.2,  # Activation layer mutation
"RL_HP_MUT": 0.2,  # Learning HP mutation
# Learning HPs to choose from
"RL_HP_SELECTION": ["lr", "batch_size", "learn_step"],
"MUT_SD": 0.1,  # Mutation strength
"RAND_SEED": 42,  # Random seed
# Define max and min limits for mutating RL hyperparams
"MIN_LR": 0.0001,
"MAX_LR": 0.01,
"MIN_BATCH_SIZE": 8,
"MAX_BATCH_SIZE": 1024,
"MIN_LEARN_STEP": 1,
"MAX_LEARN_STEP": 16,
}

# Set-up the device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if torch.backends.mps.is_available():
 device = 'mps'  # for M1/M2 Mac use 'mps'

print(torch.cuda.is_available())
print(device)

 # Ensure that only serializable objects are used
def get_device():
 if torch.cuda.is_available():
     return 'cuda'
 elif torch.backends.mps.is_available():
     return 'mps'
 else:
     return 'cpu'



def main(): 

 #create the environment
 num_envs=8
 env = make_vect_envs("LunarLanderContinuous-v2", num_envs=num_envs)  # Create environment

 try:
     state_dim = env.single_observation_space.n  # Discrete observation space
     one_hot = True  # Requires one-hot encoding
 except Exception:
     state_dim = env.single_observation_space.shape  # Continuous observation space
     one_hot = False  # Does not require one-hot encoding
 try:
     action_dim = env.single_action_space.n  # Discrete action space
 except Exception:
     action_dim = env.single_action_space.shape[0]  # Continuous action space

 INIT_HP["MAX_ACTION"] = float(env.single_action_space.high[0])
 INIT_HP["MIN_ACTION"] = float(env.single_action_space.low[0])

 if INIT_HP["CHANNELS_LAST"]:
     # Adjust dimensions for PyTorch API (C, H, W), for envs with RGB image states
     state_dim = (state_dim[2], state_dim[0], state_dim[1])

 # Set-up the device
 #device = "cuda" if torch.cuda.is_available() else "cpu"
 device = get_device()
 print(torch.cuda.is_available())
 print (device)
 if(device != 'cpu'):
     print("réussie")
 device = 'cpu'
 # Define the network configuration of a simple mlp with two hidden layers, each with 64 nodes
 net_config = {"arch": "mlp", "hidden_size": [64, 64]}

# Define a population
pop = create_population(
    algo="TD3",  # Algorithm
    state_dim=state_dim,  # State dimension
    action_dim=action_dim,  # Action dimension
    one_hot=one_hot,  # One-hot encoding
    net_config=net_config,  # Network configuration
    INIT_HP=INIT_HP,  # Initial hyperparameters
    population_size=INIT_HP["POP_SIZE"],  # Population size
    num_envs=num_envs,
    device=device,
    )

field_names = ["state", "action", "reward", "next_state", "terminated"]
memory = ReplayBuffer(
    memory_size=10_000,  # Max replay buffer size
    field_names=field_names,  # Field names to store in memory
    device=device,
)

tournament = TournamentSelection(
    INIT_HP["TOURN_SIZE"],
    INIT_HP["ELITISM"],
    INIT_HP["POP_SIZE"],
    INIT_HP["EVAL_LOOP"],
)

mutations = Mutations(
    algo=INIT_HP["ALGO"],
    no_mutation=MUT_P["NO_MUT"],
    architecture=MUT_P["ARCH_MUT"],
    new_layer_prob=MUT_P["NEW_LAYER"],
    parameters=MUT_P["PARAMS_MUT"],
    activation=MUT_P["ACT_MUT"],
    rl_hp=MUT_P["RL_HP_MUT"],
    rl_hp_selection=MUT_P["RL_HP_SELECTION"],
    min_lr=MUT_P["MIN_LR"],
    max_lr=MUT_P["MAX_LR"],
    min_batch_size=MUT_P["MAX_BATCH_SIZE"],
    max_batch_size=MUT_P["MAX_BATCH_SIZE"],
    min_learn_step=MUT_P["MIN_LEARN_STEP"],
    max_learn_step=MUT_P["MAX_LEARN_STEP"],
    mutation_sd=MUT_P["MUT_SD"],
    arch=net_config["arch"],
    rand_seed=MUT_P["RAND_SEED"],
    device=device,
)
'''
trained_pop, pop_fitnesses = train_off_policy(
    env=env,
    env_name="LunarLanderContinuous-v2",
    algo="TD3",
    pop=pop,
    memory=memory,
    INIT_HP=INIT_HP,
    MUT_P=MUT_P,
    swap_channels=INIT_HP["CHANNELS_LAST"],
    #INIT_HP["MAX_STEPS"]=INIT_HP["MAX_STEPS"],
    evo_steps=INIT_HP["EVO_STEPS"],
    eval_steps=INIT_HP["EVAL_STEPS"],
    eval_loop=INIT_HP["EVAL_LOOP"],
    learning_delay=INIT_HP["LEARNING_DELAY"],
    target=INIT_HP["TARGET_SCORE"],
    tournament=tournament,
    mutation=mutations,
    wb=False,  # Boolean flag to record run with Weights & Biases
    save_elite=True,  # Boolean flag to save the elite agent in the population
    elite_path="TD3_trained_agent.pt",
)
'''

total_steps = 0

# TRAINING LOOP
print("Training...")
pbar = trange(INIT_HP["MAX_STEPS"], unit="step")
while np.less([agent.steps[-1] for agent in pop], INIT_HP["MAX_STEPS"]).all():
    pop_episode_scores = []
    for agent in pop:  # Loop through population
        state, info = env.reset()  # Reset environment at start of episode
        scores = np.zeros(num_envs)
        completed_episode_scores = []
        steps = 0

        for idx_step in range(INIT_HP["EVO_STEPS"] // num_envs):
            if INIT_HP["CHANNELS_LAST"]:
                state = np.moveaxis(state, [-1], [-3])

            action = agent.get_action(state)  # Get next action from agent

            # Act in environment
            next_state, reward, terminated, truncated, info = env.step(action)
            scores += np.array(reward)
            steps += num_envs
            total_steps += num_envs

            # Collect scores for completed episodes
            reset_noise_indices = []
            for idx, (d, t) in enumerate(zip(terminated, truncated)):
                if d or t:
                    completed_episode_scores.append(scores[idx])
                    agent.scores.append(scores[idx])
                    scores[idx] = 0
                    reset_noise_indices.append(idx)
            agent.reset_action_noise(reset_noise_indices)

            # Save experience to replay buffer
            if INIT_HP["CHANNELS_LAST"]:
                memory.save_to_memory(
                    state,
                    action,
                    reward,
                    np.moveaxis(next_state, [-1], [-3]),
                    terminated,
                    is_vectorised=True,
                )
            else:
                memory.save_to_memory(
                    state,
                    action,
                    reward,
                    next_state,
                    terminated,
                    is_vectorised=True,
                )

            # Learn according to learning frequency
            if memory.counter > INIT_HP["LEARNING_DELAY"] and len(memory) >= agent.batch_size:
                for _ in range(num_envs // agent.learn_step):
                    # Sample replay buffer
                    experiences = memory.sample(agent.batch_size)
                    # Learn according to agent's RL algorithm
                    agent.learn(experiences)

            state = next_state

        pbar.update(INIT_HP["EVO_STEPS"] // len(pop))
        agent.steps[-1] += steps
        pop_episode_scores.append(completed_episode_scores)

    # Evaluate population
    fitnesses = [
        agent.test(
            env,
            swap_channels=INIT_HP["CHANNELS_LAST"],
            #INIT_HP["MAX_STEPS"]=INIT_HP["EVAL_STEPS"],
            loop=INIT_HP["EVAL_LOOP"],
        )
        for agent in pop
    ]
    mean_scores = [
        (
            np.mean(episode_scores)
            if len(episode_scores) > 0
            else "0 completed episodes"
        )
        for episode_scores in pop_episode_scores
    ]

    print(f"--- Global steps {total_steps} ---")
    print(f"Steps {[agent.steps[-1] for agent in pop]}")
    print(f"Scores: {mean_scores}")
    print(f'Fitnesses: {["%.2f"%fitness for fitness in fitnesses]}')
    print(
        f'5 fitness avgs: {["%.2f"%np.mean(agent.fitness[-5:]) for agent in pop]}'
    )

    # Tournament selection and population mutation
    elite, pop = tournament.select(pop)
    pop = mutations.mutation(pop)

    # Update step counter
    for agent in pop:
        agent.steps.append(agent.steps[-1])

# Save the trained algorithm
save_path = "TD3_trained_agent.pt"
elite.save_checkpoint(save_path)

pbar.close()
env.close()


#td3 = TD3.load_checkpoint(save_path, device=device)
td3 = TD3.load(save_path, device=device)

test_env = gym.make("LunarLanderContinuous-v2", render_mode="rgb_array")
rewards = []
frames = []
testing_eps = 1 #change this
max_testing_steps = 1000 #change this
with torch.no_grad():
    for ep in range(testing_eps):
        state = test_env.reset()[0]  # Reset environment at start of episode
        score = 0

        for step in range(max_testing_steps):
            # If your state is an RGB image
            if INIT_HP["CHANNELS_LAST"]:
                state = np.moveaxis(state, [-1], [-3])

            # Get next action from agent
            action, *_ = td3.get_action(state, training=False)

            # Save the frame for this step and append to frames list
            frame = test_env.render()
            frames.append(frame)

            # Take the action in the environment
            state, reward, terminated, truncated, _ = test_env.step(action)

            # Collect the score
            score += reward

            # Break if environment 0 is done or truncated
            if terminated or truncated:
                print("terminated")
                break

        # Collect and print episodic reward
        rewards.append(score)
        print("-" * 15, f"Episode: {ep}", "-" * 15)
        print("Episodic Reward: ", rewards[-1])

    print(rewards)

    test_env.close()

frames = frames[::3]
gif_path = "./videos/"
os.makedirs(gif_path, exist_ok=True)
imageio.mimwrite(
    os.path.join("./videos/", "td3_lunar_lander.gif"), frames, duration=50, loop=0
)
mean_fitness = np.mean(rewards)


if __name__ == '__main__':
 main()


elite.save_checkpoint(save_path)
pbar.close()
env.close()

我只尝试在 GPU 和 CPU 之间切换,结果相同。我安装手电筒 2-3 次。

python torch
1个回答
0
投票

我认为问题是你没有创建 Collate_fn 类

© www.soinside.com 2019 - 2024. All rights reserved.