我正在尝试在theano中实现类激活的热图,正如Francois Chollet在使用Python进行深度学习的类激活的可视化热图第5.4.3节中描述的那样,这是Grad-CAM计算的步骤之一。
我能够计算关于小批量中输入样本的预测类别(类别 0)的梯度。以下是我的代码的相关部分:
import theano.tensor as T
import numpy as np
import lasagne as nn
import importlib
import theano
from global_vars import *
theano.config.floatX = 'float32'
seq_len = 19
num_features = 42
config_name = 'Conv_test'
config_initialize(config_name)
metadata_path = "metadata/Conv_test/dump_Conv_test-20230429-160908-223.pkl"
metadata = np.load(metadata_path, allow_pickle=True)
config = importlib.import_module("configurations.%s" % config_name)
params = np.array(metadata['param_values'])
l_in, l_out = config.build_model()
nn.layers.set_all_param_values(l_out, metadata['param_values'])
all_layers = nn.layers.get_all_layers(l_out)
i=0
for layer in all_layers:
#name = string.ljust(layer.__class__.__name__, 32)
name = layer.__class__.__name__
print(" layer %d: %s %s %s" % (i, name, nn.layers.get_output_shape(layer), nn.layers.count_params(layer)))
i+=1
layer_name = all_layers[34]
sym_x = T.tensor3()
conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
nn_output = nn.layers.get_output(l_out, sym_x, deterministic=True) #softmax output
grads = theano.gradient.jacobian(nn_output[:,0], wrt=sym_x)
res = theano.function(inputs=[sym_x], outputs=[nn_output, conv_output, grads],allow_input_downcast=True)
input_data = np.random.random((64, seq_len, num_features))
out, conv, grads =res(input_data)
print("Model output shape", out.shape)
print("Conv output shape",conv.shape)
print("Gradients out shape", grads.shape)
,输出:
layer 0: InputLayer (None, 19, 42) 0
layer 1: DimshuffleLayer (None, 42, 19) 0
layer 2: Conv1DLayer (None, 16, 19) 2016
layer 3: BatchNormLayer (None, 16, 19) 2080
layer 4: NonlinearityLayer (None, 16, 19) 2080
layer 5: Conv1DLayer (None, 16, 19) 3360
layer 6: BatchNormLayer (None, 16, 19) 3424
layer 7: NonlinearityLayer (None, 16, 19) 3424
layer 8: Conv1DLayer (None, 16, 19) 4704
layer 9: BatchNormLayer (None, 16, 19) 4768
layer 10: NonlinearityLayer (None, 16, 19) 4768
layer 11: ConcatLayer (None, 48, 19) 10272
layer 12: DimshuffleLayer (None, 19, 48) 10272
layer 13: ConcatLayer (None, 19, 90) 10272
layer 14: DimshuffleLayer (None, 90, 19) 10272
layer 15: Conv1DLayer (None, 16, 19) 14592
layer 16: BatchNormLayer (None, 16, 19) 14656
layer 17: NonlinearityLayer (None, 16, 19) 14656
layer 18: Conv1DLayer (None, 16, 19) 17472
layer 19: BatchNormLayer (None, 16, 19) 17536
layer 20: NonlinearityLayer (None, 16, 19) 17536
layer 21: Conv1DLayer (None, 16, 19) 20352
layer 22: BatchNormLayer (None, 16, 19) 20416
layer 23: NonlinearityLayer (None, 16, 19) 20416
layer 24: ConcatLayer (None, 48, 19) 32064
layer 25: DimshuffleLayer (None, 19, 48) 32064
layer 26: ConcatLayer (None, 19, 138) 32064
layer 27: DimshuffleLayer (None, 138, 19) 32064
layer 28: Conv1DLayer (None, 16, 19) 38688
layer 29: BatchNormLayer (None, 16, 19) 38752
layer 30: NonlinearityLayer (None, 16, 19) 38752
layer 31: Conv1DLayer (None, 16, 19) 43104
layer 32: BatchNormLayer (None, 16, 19) 43168
layer 33: NonlinearityLayer (None, 16, 19) 43168
layer 34: Conv1DLayer (None, 16, 19) 47520
layer 35: BatchNormLayer (None, 16, 19) 47584
layer 36: NonlinearityLayer (None, 16, 19) 47584
layer 37: ConcatLayer (None, 48, 19) 65376
layer 38: DimshuffleLayer (None, 19, 48) 65376
layer 39: ConcatLayer (None, 19, 186) 65376
layer 40: ReshapeLayer (64, 3534) 65376
layer 41: DenseLayer (64, 200) 772176
layer 42: BatchNormLayer (64, 200) 772976
layer 43: NonlinearityLayer (64, 200) 772976
layer 44: DenseLayer (64, 8) 774584
layer 45: NonlinearityLayer (64, 8) 774584
Model output shape (64, 8)
Conv output shape (64, 16, 19)
Gradients out shape (64, 64, 19, 42)
但是,我在弄清楚如何计算关于所选中间卷积层的输出特征图的预测类的梯度方面遇到了很多麻烦。当我尝试以下行时:
grads = theano.gradient.jacobian(nn_output[:,0], wrt=conv_output)
输出以下错误:
---------------------------------------------------------------------------
DisconnectedInputError Traceback (most recent call last)
Cell In[46], line 36
34 conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
35 nn_output = nn.layers.get_output(l_out, sym_x, deterministic=True) #softmax output
---> 36 grads = theano.gradient.jacobian(nn_output[:,0], conv_output)
37 res = theano.function(inputs=[sym_x], outputs=[nn_output, conv_output, grads],allow_input_downcast=True)
38 input_data = np.random.random((64, seq_len, num_features))
File *\lib\site-packages\theano\gradient.py:1912, in jacobian(expression, wrt, consider_constant, disconnected_inputs)
1907 return rvals
1908 # Computing the gradients does not affect the random seeds on any random
1909 # generator used n expression (because during computing gradients we are
1910 # just backtracking over old values. (rp Jan 2012 - if anyone has a
1911 # counter example please show me)
-> 1912 jacobs, updates = theano.scan(inner_function,
1913 sequences=arange(expression.shape[0]),
1914 non_sequences=[expression] + wrt)
1915 assert not updates, \
1916 ("Scan has returned a list of updates. This should not "
1917 "happen! Report this to theano-users (also include the "
1918 "script that generated the error)")
1919 return format_as(using_list, using_tuple, jacobs)
File *\lib\site-packages\theano\scan_module\scan.py:774, in scan(fn, sequences, outputs_info, non_sequences, n_steps, truncate_gradient, go_backwards, mode, name, profile, allow_gc, strict, return_list)
768 dummy_args = [arg for arg in args
769 if (not isinstance(arg, SharedVariable) and
770 not isinstance(arg, tensor.Constant))]
771 # when we apply the lambda expression we get a mixture of update rules
772 # and outputs that needs to be separated
--> 774 condition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))
775 if condition is not None:
776 as_while = True
File *\lib\site-packages\theano\gradient.py:1902, in jacobian.<locals>.inner_function(*args)
1900 rvals = []
1901 for inp in args[2:]:
-> 1902 rval = grad(expr[idx],
1903 inp,
1904 consider_constant=consider_constant,
1905 disconnected_inputs=disconnected_inputs)
1906 rvals.append(rval)
1907 return rvals
File *\lib\site-packages\theano\gradient.py:589, in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
586 for elem in wrt:
587 if elem not in var_to_app_to_idx and elem is not cost \
588 and elem not in grad_dict:
--> 589 handle_disconnected(elem)
590 grad_dict[elem] = disconnected_type()
592 cost_name = None
File *\lib\site-packages\theano\gradient.py:576, in grad.<locals>.handle_disconnected(var)
574 elif disconnected_inputs == 'raise':
575 message = utils.get_variable_trace_string(var)
--> 576 raise DisconnectedInputError(message)
577 else:
578 raise ValueError("Invalid value for keyword "
579 "'disconnected_inputs', valid values are "
580 "'ignore', 'warn' and 'raise'.")
DisconnectedInputError:
Backtrace when that variable is created:
File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3203, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3382, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3442, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "C:\Users\YN\AppData\Local\Temp\ipykernel_15448\4013410499.py", line 34, in <module>
conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
File "*\lib\site-packages\lasagne\layers\helper.py", line 197, in get_output
all_outputs[layer] = layer.get_output_for(layer_inputs, **kwargs)
File "*\lib\site-packages\lasagne\layers\conv.py", line 352, in get_output_for
conved = self.convolve(input, **kwargs)
File "*\lib\site-packages\lasagne\layers\conv.py", line 511, in convolve
conved = self.convolution(input, self.W,
File "*\lib\site-packages\lasagne\theano_extensions\conv.py", line 75, in conv1d_mc0
return conved[:, :, 0, :] # drop the unused dimension
我错过了什么吗?有没有办法让梯度计算起作用?或者theano中有Grad-CAM的实现吗?
首先,我尝试从 theano 函数中删除 NN 最后一层输出,只返回 conv_output 和梯度,而不是下面的行:
res = theano.function(inputs=[sym_x], outputs=[nn_output, conv_output, grads],allow_input_downcast=True)
只是,
res = theano.function(inputs=[sym_x], outputs=[conv_output, grads],allow_input_downcast=True)
但不幸的是,无法抛出相同的错误。
然后,我尝试在一行中获取 conv_output 和模型输出,如下所示:
nn_output, conv_output = nn.layers.get_output([l_out, layer_name], sym_x, deterministic=True)
,解决了问题。
所以,这是完整的代码片段:
import theano.tensor as T
import numpy as np
import lasagne as nn
import importlib
import theano
from global_vars import *
theano.config.floatX = 'float32'
seq_len = 19
num_features = 42
config_name = 'Conv_test'
config_initialize(config_name)
metadata_path = "metadata/Conv_test/dump_Conv_test-20230429-160908-223.pkl"
metadata = np.load(metadata_path, allow_pickle=True)
config = importlib.import_module("configurations.%s" % config_name)
params = np.array(metadata['param_values'])
l_in, l_out = config.build_model()
nn.layers.set_all_param_values(l_out, metadata['param_values'])
all_layers = nn.layers.get_all_layers(l_out)
i=0
for layer in all_layers:
#name = string.ljust(layer.__class__.__name__, 32)
name = layer.__class__.__name__
print(" layer %d: %s %s %s" % (i, name, nn.layers.get_output_shape(layer), nn.layers.count_params(layer)))
i+=1
layer_name = all_layers[34]
sym_x = T.tensor3()
nn_output, conv_output = nn.layers.get_output([l_out, layer_name], sym_x, deterministic=True)
grads = theano.gradient.jacobian(nn_output[:,0], wrt=conv_output)
res = theano.function(inputs=[sym_x], outputs=[nn_output, conv_output, grads],allow_input_downcast=True)
input_data = np.random.random((64, seq_len, num_features))
out, conv, grds =res(input_data)
print("Model output shape", out.shape)
print("Conv output shape",conv.shape)
print("Gradients out shape", grds.shape)
,输出:
The learning rate scheduler class being unpickled
layer 0: InputLayer (None, 19, 42) 0
layer 1: DimshuffleLayer (None, 42, 19) 0
layer 2: Conv1DLayer (None, 16, 19) 2016
layer 3: BatchNormLayer (None, 16, 19) 2080
layer 4: NonlinearityLayer (None, 16, 19) 2080
layer 5: Conv1DLayer (None, 16, 19) 3360
layer 6: BatchNormLayer (None, 16, 19) 3424
layer 7: NonlinearityLayer (None, 16, 19) 3424
layer 8: Conv1DLayer (None, 16, 19) 4704
layer 9: BatchNormLayer (None, 16, 19) 4768
layer 10: NonlinearityLayer (None, 16, 19) 4768
layer 11: ConcatLayer (None, 48, 19) 10272
layer 12: DimshuffleLayer (None, 19, 48) 10272
layer 13: ConcatLayer (None, 19, 90) 10272
layer 14: DimshuffleLayer (None, 90, 19) 10272
layer 15: Conv1DLayer (None, 16, 19) 14592
layer 16: BatchNormLayer (None, 16, 19) 14656
layer 17: NonlinearityLayer (None, 16, 19) 14656
layer 18: Conv1DLayer (None, 16, 19) 17472
layer 19: BatchNormLayer (None, 16, 19) 17536
layer 20: NonlinearityLayer (None, 16, 19) 17536
layer 21: Conv1DLayer (None, 16, 19) 20352
layer 22: BatchNormLayer (None, 16, 19) 20416
layer 23: NonlinearityLayer (None, 16, 19) 20416
layer 24: ConcatLayer (None, 48, 19) 32064
layer 25: DimshuffleLayer (None, 19, 48) 32064
layer 26: ConcatLayer (None, 19, 138) 32064
layer 27: DimshuffleLayer (None, 138, 19) 32064
layer 28: Conv1DLayer (None, 16, 19) 38688
layer 29: BatchNormLayer (None, 16, 19) 38752
layer 30: NonlinearityLayer (None, 16, 19) 38752
layer 31: Conv1DLayer (None, 16, 19) 43104
layer 32: BatchNormLayer (None, 16, 19) 43168
layer 33: NonlinearityLayer (None, 16, 19) 43168
layer 34: Conv1DLayer (None, 16, 19) 47520
layer 35: BatchNormLayer (None, 16, 19) 47584
layer 36: NonlinearityLayer (None, 16, 19) 47584
layer 37: ConcatLayer (None, 48, 19) 65376
layer 38: DimshuffleLayer (None, 19, 48) 65376
layer 39: ConcatLayer (None, 19, 186) 65376
layer 40: ReshapeLayer (64, 3534) 65376
layer 41: DenseLayer (64, 200) 772176
layer 42: BatchNormLayer (64, 200) 772976
layer 43: NonlinearityLayer (64, 200) 772976
layer 44: DenseLayer (64, 8) 774584
layer 45: NonlinearityLayer (64, 8) 774584
Model output shape (64, 8)
Conv output shape (64, 16, 19)
Gradients out shape (64, 64, 16, 19)