我目前正在尝试训练我的神经网络并在给定点后体验 NaN 值。
输入:状态和动作
输出:状态值和动作概率
输入已先验标准化
状态大小 = (87,)
action_size = (864,)
我无法理解为什么 NaN 值会在一段时间后出现。
需要输入!
def normalize_data(self, input_state, legal_actions):
scaler = MinMaxScaler()
input_state = np.array(input_state).reshape(-1, 1)
input_state = scaler.fit_transform(input_state)
legal_actions = np.array(legal_actions).reshape(-1, 1)
legal_actions = scaler.fit_transform(legal_actions)
input_state = input_state.ravel()
legal_actions = legal_actions.ravel()
return input_state, legal_actions
def build_network(self):
# incorporating Residual tower
state_input = Input(shape=self.state_size, name='Feature_Vector')
leg_actions = Input(shape=self.action_size, name='Legal Actions')
t = concatenate([state_input, leg_actions])
t = BatchNormalization()(t)
t = Dense(2340)(t)
t = Activation('relu')(t)
t = Dropout(0.0)(t)
a = leg_actions
actions_prob = Dense(self.action_size[0],activation='softmax')(self.policy_head(t))
action_probs_masked = Multiply()([actions_prob, a])
layer = Lambda(lambda x1: x1 / K.sum(x1, axis=1)[:, None])
action_logits_masked = layer(action_probs_masked) # probability
self.pi = action_logits_masked
self.v = Dense(1, activation='sigmoid', name='v')(self.value_head(t))
self.model = Model(inputs=[state_input, leg_actions], outputs=[self.v, self.pi])
plot_model(self.model, to_file='residual_tower_Productionline.png', show_shapes=True, show_layer_names=True)
self.model.compile(loss=[self.mse, 'categorical_crossentropy'],optimizer=tf.keras.optimizers.Adam(0.001), metrics=['mae', 'accuracy'])
print(self.model.summary())
def value_head(self, input):
x = Dense(100, name='Dense_valuehead')(input)enter image description here
layer = tf.keras.layers.ReLU()
x = layer(x)
return x
def policy_head(self, input):
print('policy head')
y = Dense(2160, name='Dense_policyhead')(input)
layer = tf.keras.layers.ReLU()
y = layer(y)
return y