为什么这个梯度下降算法不适用于正弦函数的所有值？

Question

我有这个梯度下降代码，可以很好地计算布尔函数，但不能计算正弦函数。不知有什么问题呢？我试图将隐藏层或输出层的激活函数从 sigmoid 更改为双曲正切，但它仍然无法计算某些正弦值的良好值。

这里是Python代码：

# L'algorithme de rétro-propagation du gradient dans un
# réseau de neurones avec 1 couche cachée.

# modifications par D. Mattei

from random import seed, uniform
seed(1789)     # si vous voulez avoir les mêmes tirages aléatoires à chaque exécution du fichier !
from math import exp, pow,pi , sin, tanh
from MatrixNumPy import MatrixNumPy
from time import time
import sys

# sigmoïde
def sig(x):

    try:
        s = 1/(1+ exp(-x))
    except OverflowError as e:
        # Somehow no exception is caught here...
        #print('OverflowError...')
        #print("x=",x)
        #sys.exit(1)
        s = 0
    except Exception as e:
        print(e)
    
    return s

class ReseauRetroPropagation():
    
    def __init__(self,ne=2,nc=3,ns=1,nbiter=3,eta=1):
        '''Construit un réseau de neurones avec une couche cachée. Il y a ne entrées (+ biais),
        nc neurones dans la couche cachée (+ biais) et ns neurones en sortie.'''
    
        print(ne,'entrées(+1),',nc,'neurones cachés(+1) et',ns,'en sortie.')
        
        # le réseau calcule sur 7 vecteurs et 2 matrices
        self.z_i = ne * [0]     # les entrées concrètes seront fournies avec la méthode accepte
        
        # ne+1 in the matrix size because with add one column of bias in the matrix for each hidden neuron of the hidden layer "c"
        self.mat_ij = MatrixNumPy(lambda j,i: uniform(-1,1),nc,ne+1)  # self.mat_ij[j][i] == poids i->j
        
        self.z_j = nc * [0]     # valeurs z_j des neurones cachés
        self.grad_j = nc * [0]    # gradients locaux des neurones cachés

        # nc+1 in the matrix size because with add one column of bias in the matrix for each neuron of the output layer "k"
        self.mat_jk = MatrixNumPy(lambda k,j: uniform(-1,1),ns,nc+1)  # self.mat_jk[k][j] == poids j->k
        
        self.z_k = ns * [0]     # valeurs z_k des neurones de sortie
        self.grad_k = ns * [0]    # gradients locaux des neurones de sortie
        
        self.nbiter = nbiter
        self.eta = eta                  # "learning rate" 
        self.error = 0


        
    # fusionne accept et propage
    # z_* sans le coef. 1 constant
    def accepte_et_propage(self,Lentrees):         # on entre des entrées et on les propage
        
        if len(Lentrees) != len(self.z_i):
            raise ValueError("Mauvais nombre d'entrées !")
        
        self.z_i = Lentrees       # on ne touche pas au biais
        
        # propagation des entrées vers la sortie
        
        # calcul des stimuli reçus par la couche cachée à-partir des entrées

        # note: i just reference the variables for code readness (hide all the self keyword)
        mat_ij = self.mat_ij
        z_i = self.z_i

        # create a list with 1 in front
        z_i_1 = [1] + z_i
        
        z̃_j = mat_ij * z_i_1 # z̃_i = matrix * iterable (list here)

        # calcul des réponses des neurones cachés
        z_j = list(map(sig,z̃_j))
        #z_j = list(map(tanh,z̃_j))
            
        # calcul des stimuli reçus par la couche de sortie
        mat_jk = self.mat_jk

        # create a list with 1 in front
        z_j_1 = [1] + z_j
        
        z̃_k = mat_jk * z_j_1 # matrix * iterable (list here)

        # calcul des réponses de la couche de sortie
        z_k = list(map(sig,z̃_k))
        #z_k = list(map(tanh,z̃_k))
        
        # update the variable when necessary
        self.z_j = z_j
        self.z_k = z_k

        #print("accepte_et_propage : self.z_k ="); print(self.z_k)
        #return self.z_k               # et retour des sorties


    
    def apprentissage(self,Lexemples):  # apprentissage des poids par une liste d'exemples

        nbiter = self.nbiter

        ip = 0                          # numéro de l'exemple courant

        # TODO: take in account the error as stop point
        for it in range(nbiter):   # le nombre d'itérations est fixé !
            
            error = 0.0                     # l'erreur totale pour cet exemple
            
            (entrees,sorties_attendues) = Lexemples[ip]         # un nouvel exemple à apprendre
            
            # PROPAGATION VERS L'AVANT
            self.accepte_et_propage(entrees)       # sorties obtenues sur l'exemple courant, self.z_k et z_j sont mis à jour
              
            # RETRO_PROPAGATION VERS L'ARRIERE, EN DEUX TEMPS

            # note: i just reference the variables for code readness (hide all the self keyword)
            z_k = self.z_k # read-only variable
            grad_k = self.grad_k

            ns = len(z_k)
            
            # TEMPS 1. calcul des gradients locaux sur la couche k de sortie (les erreurs commises)
            for k in range(ns):
                grad_k[k] = sorties_attendues[k] - z_k[k]       # gradient sur un neurone de sortie (erreur locale)
                error += pow(grad_k[k],2)                              # l'erreur quadratique totale
                
            error *= 0.5
            #print(it)
            #print(error)
            if it == nbiter-1 : self.error = error                     # mémorisation de l'erreur totale à la dernière itération

            # modification des poids j->k
            mat_jk = self.mat_jk # read/write data

            z_i = self.z_i
            z_j = self.z_j
            nc = len(z_j)
            #eta = self.eta
            eta = ((0.0001 - 1.0) / nbiter) * it + 1.0
            #print(eta)
            
            # (test fait: modifier la matrice apres le calcul du gradient de la couche j , conclusion: ne change pas la convergence de l'algo)

            self.modification_des_poids(mat_jk,eta,z_j,z_k,grad_k)

            #print(mat_jk)
            
            # for k in range(ns): # line
            #     for j in range(nc): # column , parcours les colonnes de la ligne sauf le bias
            #         mat_jk[k][j+1] -= - eta * z_j[j] * z_k[k] * (1 - z_k[k]) * grad_k[k]
            #     # and update the bias
            #     mat_jk[k][0] -= - eta * 1.0 * z_k[k] * (1 - z_k[k]) * grad_k[k]
                                
            # Réponse à la question "b4" : T_{jk} = z_k * (1-z_k) * w_{jk}


            
            # TEMPS 2. calcul des gradients locaux sur la couche j cachée (rétro-propagation), sauf pour le bias constant
            grad_j = self.grad_j
            
            for j in range(nc):
                # must match the hidden activation function !
                grad_j[j] = sum(z_k[k] * (1 - z_k[k]) * mat_jk[k,j+1] * grad_k[k] for k in range(ns))
                #grad_j[j] = sum((1 - tanh(z_k[k])**2) * mat_jk[k,j+1] * grad_k[k] for k in range(ns))
                
            #print(grad_j)
            
            # modification des poids i->j
            mat_ij = self.mat_ij
             
            self.modification_des_poids(mat_ij,eta,z_i,z_j,grad_j)
            
            # for j in range(nc):  # line
                
            #     for i in range(ne): # column , parcours les colonnes de la ligne sauf le bias
            #         mat_ij[j][i+1] -= -eta * z_i[i] * z_j[j] * (1 - z_j[j]) * grad_j[j]
                    
            #     # and update the bias
            #     mat_ij[j][0] -= -eta * 1.0 * z_j[j] * (1 - z_j[j]) * grad_j[j]
                
                  
            # et l'on passe à l'exemple suivant
            
            ip = (ip + 1) % len(Lexemples)      # parcours des exemples en ordre circulaire


            
    def modification_des_poids(self,M_i_o,eta,z_input,z_output,grad_i_o):
        # the length of output and input layer with coeff. used for bias update             
        (len_layer_output, len_layer_input_plus1forBias) = M_i_o.dim()
        
        len_layer_input = len_layer_input_plus1forBias - 1

        
        for j in range(len_layer_output):  # line
            
            for i in range(len_layer_input): # column , parcours les colonnes de la ligne sauf le bias
                M_i_o[j,i+1] -= -eta * z_input[i] * z_output[j] * (1 - z_output[j]) * grad_i_o[j]

            # and update the bias
            M_i_o[j,0] -= -eta * 1.0 * z_output[j] * (1 - z_output[j]) * grad_i_o[j]
                

            
                
    def dump(self,n,msg):     # dump du réseau en entrant dans l'itération numéro n
        print('---------- DUMP',msg,'itération numéro',n)
        print('mat_ij :') ; print(self.mat_ij)
        print('z_j  :',self.z_j)
        print('grad_j :',self.grad_j)
        print('mat_jk :') ; print(self.mat_jk)
        print('z_k  :',self.z_k)
        print('grad_k :',self.grad_k)
        print()

    def test(self,Lexemples):
        print('Test des exemples :')
        for (entree,sortie_attendue) in Lexemples:
            self.accepte_et_propage(entree)
            print(entree,'-->',self.z_k,': on attendait',sortie_attendue)



            
if __name__ == '__main__':
    
   
    print('################## NOT ##################')
    r1 = ReseauRetroPropagation(1,2,1,nbiter=10000,eta=0.5)
    Lexemples1 = [[[1],[0]],[[0],[1]]]
    START = time() ; r1.apprentissage(Lexemples1) ; END = time()
    r1.test(Lexemples1)
    print('APPRENTISSAGE sur {} itérations, time = {:.2f}s'.format(r1.nbiter,END-START))
    print()
    
    print('################## XOR ##################')
    r2 = ReseauRetroPropagation(2,3,1,nbiter=50000,eta=0.1)    # 2 entrées (+ bias), 3 neurones cachés (+ bias), 1 neurone en sortie
    Lexemples2 = [[[1,0],[1]], [[0,0],[0]], [[0,1],[1]], [[1,1],[0]]]
    START = time() ; r2.apprentissage(Lexemples2) ; END = time()
    print('APPRENTISSAGE sur {} itérations, time = {:.2f}s'.format(r2.nbiter,END-START))
    r2.test(Lexemples2)
    print("Error=") ; print(r2.error)
    #print("r2.mat_ij=",r2.mat_ij)
    #print("r2.mat_jk=",r2.mat_jk)



    print('################## SINUS ##################')
    r3 = ReseauRetroPropagation(1,50,1,nbiter=50000,eta=0.01)    # 2 entrées (+ bias), 3 couches de neurones cachés (+ bias), 1 neurone en sortie
    Llearning = [ [[x],[sin(x)]] for x in [ uniform(-pi,pi) for n in range(1000)] ]
    Ltest = [ [[x],[sin(x)]] for x in [ uniform(-pi/2,pi/2) for n in range(10)] ]
    START = time() ; r3.apprentissage(Llearning) ; END = time()
    print('APPRENTISSAGE sur {} itérations, time = {:.2f}s'.format(r3.nbiter,END-START))
    r3.test(Ltest)
    print("Error=") ; print(r3.error)

和矩阵类代码：

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
MatrixNumPy.py
The class MatrixNumPy.
Derived from:
exo_mat2.py
La classe MatrixNumPy : algèbre des matrices de format quelconque, avec numpy
"""

# D. Mattei

from multimethod import multimethod

from typing import Union,Callable

from collections.abc import Iterable

import numpy


class MatError(Exception):     # juste pour la lisibilité des exceptions
    pass


Numeric = Union[float, int]



class MatrixNumPy:
    '''Construct an object MatrixNumPy.'''

    # >>> m1=MatrixNumPy(2,3)
    @multimethod
    def __init__(self,n : Numeric,p : Numeric): # lines, columns
        '''Construit un objet matrice de type MatrixNumPy, d'attributs le format self.dim
        et le tableau architecturé en liste de listes de même longueur. Exemples :
            m = MatrixNumPy([[1,3],[-2,4],[0,-1]])  à 3 lignes et 2 colonnes
            m = MatrixNumPy(lambda i,j: i+j,3,5)    à 3 lignes et 5 colonnes'''

        if __debug__:
            print("# MatrixNumPy constructor MatrixNumPy (Numeric,Numeric) #")

        self.__init__(lambda i,j: 0,n,p) # return a Zero matrix


   
    @multimethod
    def __init__(self,f : Callable,n : Numeric,p : Numeric):

        if __debug__:
            print("# MatrixNumPy constructor MatrixNumPy (function,Numeric,Numeric) #")

        self.A = numpy.array([[f(i,j) for j in range(p)] for i in range(n)])

    
    @multimethod
    def __init__(self,Af : list):  # la liste qui contient les éléments de matrice

        if __debug__:
            print("# MatrixNumPy constructor MatrixNumPy,list #")

        if any(map(lambda x:type(x) != list,Af)) :
            raise MatError('MatrixNumPy : on attend une liste de listes !')
        p = len(Af[0])
        if any(map(lambda x:len(x)!=p,Af)) :
            raise MatError('MatrixNumPy : on attend une liste de listes de même longueur !')
        self.A = numpy.array(Af)         # l'array qui contient les éléments de matrice
        


    @multimethod
    def __init__(self,Arr : numpy.ndarray):

        if __debug__:
            print("# MatrixNumPy constructor MatrixNumPy,numpy.ndarray #")

        self.A = Arr

        

    def dim(self):
        '''Retourne le format de la matrice courante.'''

        return self.A.shape

    

    # m1=MatrixNumPy(lambda i,j : i+j, 5,2)
    # # MatrixNumPy constructor MatrixNumPy (function,Numeric,Numeric) #
    # m1
    #       0.00          1.00
    #       1.00          2.00
    #       2.00          3.00
    #       3.00          4.00
    #       4.00          5.00
    # MatrixNumPy @ 0x105ae03d0 

    # print(m1)
    #       0.00          1.00
    #       1.00          2.00
    #       2.00          3.00
    #       3.00          4.00
    #       4.00          5.00
    def __repr__(self):
        '''Retourne une chaine formatée avec colonnes alignées représentant
        la matrice m.'''
        
        return self.__str__() + '\nMatrixNumPy @ {} \n'.format(hex(id(self)))
        


    # >>> print(m)
    def __str__(self):

        '''Retourne une chaine formatée avec colonnes alignées représentant
        la matrice m.'''

        return self.A.__str__()

    

    def __getitem__(self,i):        # pour pouvoir écrire m[i] pour la ligne i
        return self.A[i]            # et m[i][j] pour l'élément en ligne i et colonne j

    def __setitem__(self, i, data):
          self.A[i] = data

    def lig(self,i):                # m.lig(i) <==> m[i]
        '''Retourne la ligne i >= 0 de la matrice sous forme de liste plate.'''
        return self.A[i].tolist()

    def col(self,j):
        '''Retourne la colonne j >= 0 de la matrice sous forme de liste plate.'''
        (n,_) = self.dim()
        return [self.A[i][j] for i in range(n)]

    
    def __add__(self,m2):
        '''Retourne la somme de la matrice courante et d'une matrice m2
        de même format.'''
        (n,p) = self.dim()
        if m2.dim() != (n,p):
            raise MatError('mat_sum : Mauvais formats de matrices !')
        A = self.A ; A2 = m2.A
        AplusA2 = numpy.add(A,A2)
        return MatrixNumPy(AplusA2)
    

    def __sub__(self,m2):
        '''Retourne la différence entre la matrice courante et une matrice
        m2 de même format.'''
        return MatrixNumPy(numpy.substract(self.A,m2.A))


    def mul(self,k):
        '''Retourne le produit externe du nombre k par la matrice m.'''
        (n,p) = self.dim()
        return MatrixNumPy(lambda i,j : k*self.A[i][j],n,p)
    

    # R  : multiplicand
    # matrix multiplication by number
    
    @multimethod
    def __rmul__(self, m : Numeric): #  self is at RIGHT of multiplication operand : m * self
        '''Retourne le produit externe du nombre par la matrice'''
        if __debug__:
            print("MatrixNumPy.py : __rmul__(MatrixNumPy,Numeric)")

        return self.mul(m)
        
    
    def app(self,v):                           # v = [a,b,c,d]
        '''Retourne l'application de la matrice self au vecteur v vu comme une liste
        plate. Le résultat est aussi une liste plate.'''
        # transformation de la liste v en matrice uni-colonne
        mv = MatrixNumPy(list(map(lambda x:[x],v)))          # mv = [[a],[b],[c],[d]]
        # l'application n'est autre qu'un produit de matrices
        res = self * mv         # objet de type MatrixNumPy car produit de 2 matrices
        res = res.A             # objet de type Array
        # et on ré-aplatit la liste
        return list(map(lambda A:A[0],res))


    
    # R  : multiplicand
    # m1=MatrixNumPy(lambda i,j : i+j, 5,2)
    # # MatrixNumPy constructor MatrixNumPy (function,Numeric,Numeric) #
    # m1*(-2,-3.5)
    # MatrixNumPy.py : __mul__(MatrixNumPy,Iterable)
    # # MatrixNumPy constructor MatrixNumPy,list #
    # MatrixNumPy.py : __mul__(MatrixNumPy,MatrixNumPy)
    # # MatrixNumPy constructor MatrixNumPy (function,Numeric,Numeric) #
    # [-3.5, -9.0, -14.5, -20.0, -25.5]
    @multimethod
    def __mul__(self, R : Iterable): #  self is at LEFT of multiplication operand : self * R = MatrixNumPy * R, R is at Right

        if __debug__:
            print("MatrixNumPy.py : __mul__(MatrixNumPy,Iterable)")

        return self.app(R)
            

    
    # R  : multiplicand
    # matrix multiplication
    # m2=MatrixNumPy([[-2],[-3.5]])
    
    # m1*m2
    #     >>> m2
    # [[-2. ]
    #  [-3.5]]
    # MatrixNumPy @ 0x7f48a430ee10 

    # >>> m1*m2
    # MatrixNumPy.py : __mul__(MatrixNumPy,MatrixNumPy)
    # # MatrixNumPy constructor MatrixNumPy,numpy.ndarray #
    # [[ -3.5]
    #  [ -9. ]
    #  [-14.5]
    #  [-20. ]
    #  [-25.5]]
    #MatrixNumPy @ 0x7f48a4362590 
    @multimethod
    def __mul__(self, m2 : object): #  self is at LEFT of multiplication operand : self * m2 = MatrixNumPy * m2 = MatrixNumPy * MatrixNumPy, m2 is at Right of operator

        if __debug__:
            print("MatrixNumPy.py : __mul__(MatrixNumPy,MatrixNumPy)")

        (n1,p1) = self.dim()
        (n2,p2) = m2.dim()
        if p1 != n2 : raise MatError('Produit de matrices impossible !')
      
        # le produit aura pour format (n1,p2)
        #return MatrixNumPy(numpy.matmul(self.A,m2.A))
        return MatrixNumPy(self.A @ m2.A)
        
    # m1.A @ m2.A
    # array([[ -3.5],
    #        [ -9. ],
    #        [-14.5],
    #        [-20. ],
    #        [-25.5]])

为什么这个梯度下降算法不适用于正弦函数的所有值？

问题描述投票：0回答：0

最新问题

为什么这个梯度下降算法不适用于正弦函数的所有值？

问题描述 投票：0回答：0

最新问题

问题描述投票：0回答：0