我首先制作了如下模型:
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout, BatchNormalization,
AveragePooling2D, ReLU, Activation
from tensorflow.keras import Model
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv = Conv2D(4, (3,3), padding = 'same', activation = 'linear'
,input_shape = x_train.shape[1:])
self.bn = BatchNormalization()
self.RL = ReLU()
self.FL = Flatten()
self.d1 = Dense(4, activation = 'relu')
self.d2 = Dense(100, activation = 'softmax')
def call(self,x):
x = self.conv(x)
x = self.bn(x)
x = self.RL(x)
x = self.FL(x)
x = self.d1(x)
return self.d2(x)
但是,此模型不能很好地工作。准确度仅为1%,这意味着它什么也没学到。 (我使用CIFAR100训练了该模型-简单性只是检查代码)但是当我如下更改代码时,它就起作用了。
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv = Conv2D(4, (3,3), padding = 'same', activation = 'linear'
,input_shape = x_train.shape[1:])
self.bn = BatchNormalization()
# The below code is changed from ReLU() -> Activation('relu')
self.RL = Activation('relu')
self.FL = Flatten()
self.d1 = Dense(4, activation = 'relu')
self.d2 = Dense(100, activation = 'softmax')
def call(self,x):
x = self.conv(x)
x = self.bn(x)
x = self.RL(x)
x = self.FL(x)
x = self.d1(x)
return self.d2(x)
为什么会发生?我不知道这个问题。谢谢您的阅读。
它们是完全等效的,没有任何区别会使您的网络不稳定。
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout, BatchNormalization, AveragePooling2D, ReLU, Activation, Input
from tensorflow.keras import Model
import numpy as np
ip = Input(shape = 5)
rl = ReLU()(ip)
model1 = Model(ip, rl)
ip = Input(shape = (5,))
rl = Activation('relu')(ip)
model2 = Model(ip, rl)
i = np.array([[1., 2., 3., 4., 5.], [-1., -100., -123213., 0., 100000.], [-1., 100234323423., -123213., 0., 100000.]])
print(model1(i))
print(model2(i))
输出:
tf.Tensor(
[[1.0000000e+00 2.0000000e+00 3.0000000e+00 4.0000000e+00 5.0000000e+00]
[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.0000000e+05]
[0.0000000e+00 1.0023432e+11 0.0000000e+00 0.0000000e+00 1.0000000e+05]], shape=(3, 5), dtype=float32)
tf.Tensor(
[[1.0000000e+00 2.0000000e+00 3.0000000e+00 4.0000000e+00 5.0000000e+00]
[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 1.0000000e+05]
[0.0000000e+00 1.0023432e+11 0.0000000e+00 0.0000000e+00 1.0000000e+05]], shape=(3, 5), dtype=float32)
这两个模型具有相同的输出。