所以,我一直在尝试使用一些 numpy 从头开始实现基本的 Autograd 和神经网络。这是我的 AD 代码的一部分,对于这个问题很重要,对于 MRE 已经大大缩短了。这是 grad.py
from typing import Self
import numpy as np
class Variable:
def __init__(self, value: np.ndarray=None):
self.value = value if isinstance(value, np.ndarray) else np.asarray(value)
self.prev = None
def _variablify(self, x) -> Self:
if not isinstance(x, Variable):
x = Variable(x)
return x
def __add__(self, x) -> Self:
x = self._variablify(x)
y = Variable(self.value + x.value)
return y
def __mul__(self, x) -> Self:
x = self._variablify(x)
y = Variable(self.value * x.value)
return y
__radd__ = __add__
__rmul__ = __mul__
def dot(self, x):
x = self._variablify(x)
y = Variable(self.value.dot(x.value))
return y
def __lt__(self, other):
return self.value < other
def __gt__(self, other):
return self.value > other
def dot(a: Variable, b: Variable):
return a.dot(b)
在另一个文件 main.py 中,我尝试实现一个神经网络
from typing import Self
import numpy as np
from grad import Variable
import grad
class Layer:
def __init__(self, neurons: int):
self.n_size = neurons
self.activation = Variable(0)
def previous(self, layer: Self):
self.previous_layer = layer
self.previous_layer.next_layer = self
def next(self, layer: Self):
self.next_layer = layer
self.next_layer.previous_layer = self
def initialise(self):
self.weight_matrix = Variable(np.random.normal(0, 0.01, (self.n_size, self.next_layer.n_size)))
self.bias_vector = Variable(np.random.normal(0, 0.01, (1, self.next_layer.n_size)))
self.next_layer.x = grad.dot(self.activation, self.weight_matrix) + self.bias_vector
self.next_layer.activation = np.where(self.next_layer.x > 0, self.next_layer.x, 0.01*self.next_layer.x) # Using LeakyReLU
if __name__ == "__main__":
input_layer = Layer(5)
input_layer.activation = Variable(np.random.randint(1, 5, (1,5)))
h1 = Layer(3)
h1.previous(input_layer)
output = Layer(2)
output.previous(h1)
input_layer.initialise()
h1.initialise()
print(input_layer.activation, h1.activation, output.activation)
因此,正如您在 grad.py 中看到的,我已经实现了点积包装器的代码。但现在,运行 main.py 文件时出现错误 -
Traceback (most recent call last):
File ".../main.py", line 62, in <module>
h1.initialise()
File ".../main.py", line 40, in initialise
self.next_layer.x = grad.dot(self.activation, self.weight_matrix) + self.bias_vector
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../grad.py", line 191, in dot
return a.dot(b)
^^^^^^^^
File ".../grad.py", line 49, in __mul__
y = Variable(self.value * x.value)
~~~~~~~~~~~^~~~~~~~~
ValueError: operands could not be broadcast together with shapes (1,3) (3,2)
现在对我来说,这很奇怪。因为这个错误似乎告诉我们
a.dot(b)
以某种方式调用了 __mul__
,但它从未这样做过。我完全不知道这里发生了什么。任何帮助将不胜感激。
谢谢。
该错误表明形状 (1, 3) 和 (3, 2) 被视为应该参与逐元素乘法。这对于点积来说是不正确的。
如果 self.value 或 x.value 不是 numpy.ndarray NumPy 可能会回退到使用 mul
的逐元素乘法可能的解决方法是确保此形状与 grad.dot 结果匹配:
self.bias_vector = Variable(np.random.normal(0, 0.01, (1, self.next_layer.n_size)))