当我想做反向传播时，如何解决 sigmoid 求导中的溢出问题

Question

我正在尝试实现一个神经网络并训练它。我知道我的前向函数（前向传播）工作正常。但我不确定我的反向传播。问题是当我想用 sig_derivation 函数计算 sigmoid 导数时，它会遇到溢出。

import numpy as np
import tensorflow as tf

# Parameters
features = 3
hidden_layer = 3
outputs_num = 2
epochs = 1
batch_num = 10
sample_num = 100 # => m
alpha = 0.5  # Learning rate

rng = np.random.default_rng()
X = rng.integers(low=0, high=21, size=(features, sample_num))
Y = rng.integers(low=0, high=2, size=(outputs_num, sample_num))

class neural_net:
    activations = {'relu': tf.nn.relu,'tanh': tf.tanh, 'sigmoid': tf.sigmoid, 'softmax': tf.nn.softmax}
    
    def __init__(self, inputs :list, outputs :list, learning_rate :float, batche_num :int, num_layer :int, features :int, each_layer_neurons :list, each_layer_activation :list, sample_num :int, epoch :int, batch_num :int) -> None:
        self.activation_derivation = {'relu': self.relu_derivation,'tanh': self.tanh_derivation, 'sigmoid': self.sig_derivation, "softmax": self.sig_derivation}
        self.learning_rate = learning_rate
        self.numlayer = num_layer
        self.Y = outputs
        self.ac = each_layer_activation
        self.epochs = epoch
        self.n = sample_num

        self.batche = batche_num
        # Split data based on batches
        self.xbatches = np.split(inputs,batch_num,axis=1)
        self.ybatches = np.split(outputs,batch_num,axis=1)
        # Initializing weights and biases for each layer
        self.w = []
        self.b = []
        self.w.append(rng.integers(low=0, high=11, size=(each_layer_neurons[0], features)) * 0.01)
        for i in range(num_layer-1):
            self.w.append(rng.integers(low=0, high=11, size=(each_layer_neurons[i+1], each_layer_neurons[i])) * 0.01)

        for i in range(num_layer):
            self.b.append(np.zeros((each_layer_neurons[i],1)))
    
    def train(self):
        for i in range(self.epochs):
            loss = 0
            for c, b in enumerate(self.xbatches):
                self.a = []
                self.z = []
                self.a.append(b)
                self.forward()
                loss += self.loss(self.ybatches[c],self.a[self.numlayer])
                self.da = []
                self.dz = []
                self.dw = []
                self.db = []
                self.backward(c)
                self.update_parameters()
            print(f'Epoch {i + 1}, Loss(Cost): {loss/self.batche}')
    
    def forward(self):
        for i in range(self.numlayer):
            self.z.append(np.dot(self.w[i],self.a[i]) + self.b[i])
            self.a.append(np.array(self.activations[self.ac[i]](self.z[i])))

           
    def backward(self, batchnum):
        l = self.numlayer
        self.da.append(-(np.divide(self.ybatches[batchnum], self.a[l]) - np.divide(1 - self.ybatches[batchnum], 1 - self.a[l])))
        for i in range(l-1,-1,-1):
            derivative = self.activation_derivation[self.ac[i]](self.z[i])
            self.dz.append(np.multiply(self.da[l-1-i], derivative))
            self.dw.append(np.dot(self.dz[l-1-i], self.a[i].T) / (self.n // self.batche))
            self.db.append(np.sum(self.dz[l-1-i],axis=1,keepdims=True) / (self.n // self.batche))
            if i > 0:
                self.da.append(np.dot(self.w[i].T, self.dz[l-1-i]))

    def update_parameters(self):
        l = self.numlayer
        for i in range(l):
            self.w[i] -= self.learning_rate * self.dw[l-1-i]
            self.b[i] -= self.learning_rate * self.db[l-1-i]

    def loss(self, real, current):
        return -np.sum(real * np.log(current) + (1 - real) * np.log(1 - current)) / (self.n // self.batche)

    def tanh_derivation(self, mat):
        return (1 - np.tanh(mat) ** 2)
    
    def sig_derivation(self, mat):
        return mat * (1 - mat)
    
    def relu_derivation(self, mat):
        return (mat > 0).astype(float)
    
setup = neural_net(X,Y,alpha,batch_num,hidden_layer,features,[3,4,2],['relu','relu','softmax'],sample_num,epochs,batch_num)
setup.train()

我尝试对 z 值进行 np.round，但似乎有时不会对值进行舍入，因为它们是浮动的。尝试过 np.format_float_positional。

Answer 1

方法

sig_derivation()

必须先计算sigmoid：

def sig_derivation(self, mat):
    sig = tf.sigmoid(mat)
    return sig * (1 - sig)

当我想做反向传播时，如何解决 sigmoid 求导中的溢出问题

问题描述投票：0回答：1

1个回答

最新问题

当我想做反向传播时，如何解决 sigmoid 求导中的溢出问题

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1