我需要一个烤宽面条层,它以两层作为输入,并返回使用第二层向量索引的第一层的张量,这是对其执行argmax操作后的结果。 举个例子:第一层(layer0)形状是(64,9,19,21),第二层(layer1)形状是(64,8)。 所以我想要执行的操作是: argmax_layer:对layer1的dimension(1)进行argmax运算,返回一个一维数组。 然后取上一步的argmax结果来索引layer0的第二维,即layer0[np.arange(64),Argmax_layer,:,:],所以输出形状是(64,19,21)。
这是自定义层的实现:
class ArgmaxLayer(lasagne.layers.MergeLayer):
def __init__(self, incomings, **kwargs):
super(ArgmaxLayer, self).__init__(incomings=incomings, **kwargs)
if len(incomings) != 2:
raise ValueError("ArgmaxLayer requires two inputs.")
def get_output_for(self, inputs, **kwargs):
# Sample z from Normal(z|mu, sigma) using the reparameterization trick.
input0_layer = inputs[0]
input1_layer = inputs[1]
layer_argmax = np.argmax(input1_layer,axis=-1)
result = input0_layer[np.arange(64),layer_argmax,:,:]
return result
def get_output_shape_for(self, input_shapes):
inshape1= input_shapes[0]
return (inshape1[0], inshape1[2],inshape1[3])
这是神经网络:
def build_model():
l_in_1 = lasagne.layers.InputLayer(shape=(None, 19, 21))
l_in_2 = lasagne.layers.InputLayer(shape=(None, 9, 19, 21))
l_reshape_a2 = lasagne.layers.ReshapeLayer(
l_in_2, (batch_size, 9*19*21))
l_12 = lasagne.layers.DenseLayer(
l_reshape_a2, num_units=8, nonlinearity=lasagne.nonlinearities.softmax)
l_ArgmaxLayer = ArgmaxLayer(incomings=[l_in_2,l_12])
l_in_sal1 = lasagne.layers.ConcatLayer([l_in_1, l_ArgmaxLayer], axis=-1)
l_reshape_a = lasagne.layers.ReshapeLayer(
l_in_sal1, (batch_size, 19*42))
l_1 = lasagne.layers.DenseLayer(
l_reshape_a, num_units=N_L1, nonlinearity=lasagne.nonlinearities.rectify)
l_1_b = lasagne.layers.batch_norm(l_1)
l_out = lasagne.layers.DenseLayer(
l_1_b, num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax)
return l_in_1, l_in_2, l_out
这是 NN 总结:
| Layer | Layer_name | output_shape | # parameters |
_____________________________________________________________________________
| 0 | InputLayer | (None, 19, 21) | 0 |
| 1 | InputLayer | (None, 9, 19, 21) | 0 |
| 2 | ReshapeLayer | (64, 3591) | 0 |
| 3 | DenseLayer | (64, 8) | 28736 |
| 4 | ArgmaxLayer | (None, 19, 21) | 28736 |
| 5 | ConcatLayer | (None, 19, 42) | 28736 |
| 6 | ReshapeLayer | (64, 798) | 28736 |
| 7 | DenseLayer | (64, 200) | 188336 |
| 8 | BatchNormLayer | (64, 200) | 189136 |
| 9 | NonlinearityLayer | (64, 200) | 189136 |
| 10 | DenseLayer | (64, 8) | 190744 |
当我尝试构建神经网络时,我收到以下错误:
DisconnectedInputError Traceback (most recent call last)
Cell In[1], line 2018
2014 print()
2017 if __name__ == '__main__':
-> 2018 main()
Cell In[1], line 464, in main()
460 momentum = theano.shared(np.float32(clr_momentum))
461 # print(" on_train_begin setting learning rate to %.8f: " % learning_rate.get_value())
462 ##############################################################################################
--> 464 all_grads = T.grad(cost_tr, all_params)
466 cut_norm = config.cut_grad
467 updates, norm_calc = nn.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True)
theano\gradient.py:589, in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
586 for elem in wrt:
587 if elem not in var_to_app_to_idx and elem is not cost \
588 and elem not in grad_dict:
--> 589 handle_disconnected(elem)
590 grad_dict[elem] = disconnected_type()
592 cost_name = None
theano\gradient.py:576, in grad.<locals>.handle_disconnected(var)
574 elif disconnected_inputs == 'raise':
575 message = utils.get_variable_trace_string(var)
--> 576 raise DisconnectedInputError(message)
577 else:
578 raise ValueError("Invalid value for keyword "
579 "'disconnected_inputs', valid values are "
580 "'ignore', 'warn' and 'raise'.")
DisconnectedInputError:
Backtrace when that variable is created:
File "C:\*\lib\site-packages\IPython\core\interactiveshell.py", line 3382, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "C:\*\lib\site-packages\IPython\core\interactiveshell.py", line 3442, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "C:\*\AppData\Local\Temp\ipykernel_13860\1447824330.py", line 2018, in <module>
main()
File "C:\*\AppData\Local\Temp\ipykernel_13860\1447824330.py", line 257, in main
l_in_1, l_in_2, l_out = config.build_model()
File "E:\*.py", line 122, in build_model
l_12 = lasagne.layers.DenseLayer(
File "C:\*\lib\site-packages\lasagne\layers\dense.py", line 107, in __init__
self.b = self.add_param(b, (num_units,), name="b",
File "C:\*\lib\site-packages\lasagne\layers\base.py", line 234, in add_param
param = utils.create_param(spec, shape, name)
File "C:\*\lib\site-packages\lasagne\utils.py", line 393, in create_param
spec = theano.shared(spec, broadcastable=bcast)
这个实现有什么问题?
当我尝试将所有图层作为 lasagne.layers.InputLayer 传递时,ArgmaxLayer 工作时没有任何错误:
这是一个例子:
class ArgmaxLayer(lasagne.layers.MergeLayer):
def __init__(self, incomings, **kwargs):
super(ArgmaxLayer, self).__init__(incomings, **kwargs)
self.incomings = incomings
if len(self.incomings) != 2:
raise ValueError("ArgmaxLayer requires two inputs.")
def get_output_for(self, inputs, **kwargs):
input0_layer = inputs[0]
input1_layer = inputs[1]
layer_argmax = np.argmax(input1_layer[:,0:9],axis=-1)
result = input0_layer[np.arange(64),layer_argmax,:,:]
return result
def get_output_shape_for(self, input_shapes):
inshape1= input_shapes[0]
return (inshape1[0], inshape1[2],inshape1[3])
def build_model():
l_in_1 = lasagne.layers.InputLayer(shape=(None, 19, 21))
l_in_2 = lasagne.layers.InputLayer(shape=(None, 9, 19, 21))
l_reshape_a2 = lasagne.layers.ReshapeLayer(
l_in_1, (batch_size, 19*21))
l_ArgmaxLayer = ArgmaxLayer(incomings=[l_in_2,l_reshape_a2])
l_reshape_a = lasagne.layers.ReshapeLayer(
l_ArgmaxLayer, (batch_size, 19*21))
l_1 = lasagne.layers.DenseLayer(
l_reshape_a, num_units=N_L1, nonlinearity=lasagne.nonlinearities.rectify)
l_1_b = lasagne.layers.batch_norm(l_1)
l_out = lasagne.layers.DenseLayer(
l_1_b, num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax)
return l_in_1, l_in_2, l_out
但在实际场景中,我的第二层可以是任何中间层,而不仅仅是InputLayer。
如何解决这个问题?
原始实现的问题是 ArgmaxLayer 类试图访问其输入的 InputLayer 属性。但是,由于 ArgmaxLayer 的第二个输入可以是任何中间层,因此不能保证具有 InputLayer 属性。要解决此问题,您可以对 ArgmaxLayer 类使用以下实现。此实现适用于任何类型的输入层,而不仅仅是 InputLayer 实例。
class ArgmaxLayer(lasagne.layers.MergeLayer):
def __init__(self, incomings, **kwargs):
super(ArgmaxLayer, self).__init__(incomings, **kwargs)
self.incomings = incomings
if len(self.incomings) != 2:
raise ValueError("ArgmaxLayer requires two inputs.")
def get_output_for(self, inputs, **kwargs):
input0_layer = inputs[0]
input1_layer = inputs[1]
# Check if the second input is an InputLayer.
if isinstance(input1_layer, lasagne.layers.InputLayer):
# If it is an InputLayer, use the InputLayer's `get_output_shape_for()` method to get the output shape.
output_shape = input1_layer.get_output_shape_for(input_shapes=inputs)
else:
# If it is not an InputLayer, use the `get_output_shape_for()` method of the layer itself.
output_shape = input1_layer.get_output_shape_for(input_shapes=inputs)
# Get the argmax of the second input along the last dimension.
layer_argmax = np.argmax(input1_layer, axis=-1)
# Index the first input using the argmax of the second input.
result = input0_layer[np.arange(output_shape[0]), layer_argmax, :, :]
return result
def get_output_shape_for(self, input_shapes):
inshape1 = input_shapes[0]
return (inshape1[0], inshape1[2], inshape1[3])
要使用 ArgmaxLayer 类,您只需使用要使用的两个输入来实例化它:
l_in_1 = lasagne.layers.InputLayer(shape=(None, 19, 21))
l_in_2 = lasagne.layers.InputLayer(shape=(None, 9, 19, 21))
# Create the ArgmaxLayer instance.
l_ArgmaxLayer = ArgmaxLayer(incomings=[l_in_2, l_1])
# Continue building your model.