我是 pytorch 的新手,并尝试学习异或问题(有一些噪音)。
当然我知道我必须使用多层和它们之间的非线性。但是我的网络仍然没有学到任何东西,所以我假设我的 pytorch 代码有错误。权重根本不会改变。请帮忙!
这是我的代码:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs #for data generation
from sklearn.model_selection import train_test_split
import torch
X, y = make_blobs(n_samples=200, n_features=2, cluster_std=.1
,centers= [(1,1), (1,0), (0,0),(0,1)])
y[y==2]=0
y[y==3]=1
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=19)
#np->torch
x_train = torch.FloatTensor(x_train)
x_test = torch.FloatTensor(x_test)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)
class XOR(torch.nn.Module):
def __init__(self):
super(XOR, self).__init__()
self.layer1 = torch.nn.Linear(2,2)
self.layer2 = torch.nn.Linear(2, 1)
self.non_linear = torch.nn.Sigmoid()
def forward(self, x):
output = self.layer1(x)
output = self.non_linear(output)
output = self.layer2(output)
output = self.non_linear(output)
return output
model = XOR()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
model.train() #set to train mode
epoch = 50
for e in range(epoch):
# Forward pass
y_pred = model(x_train)
# Compute Loss
loss = criterion(y_pred.flatten(), y_train)
optimizer.zero_grad()
print('Epoch {}: train loss: {}'.format(e, loss.item()))
# Backward pass
loss.backward()
#make gradient update
optimizer.step()
model.eval() #set model to eval mode
#train
y_pred = model(x_train) #predict
y_pred=(y_pred>0.5).int().flatten() #argmax class lable
train_acc = torch.sum(y_pred == y_train.int())/y_train.shape[0]
print("train ACC: ",train_acc.float())
#test
y_pred = model(x_test) #predict
y_pred=(y_pred>0.5).int().flatten() #argmax class lable
test_acc = torch.sum(y_pred == y_test.int())/y_test.shape[0]
print("test ACC: ",test_acc.float())
我尝试增加lr,添加更多层并添加更多神经元。都没有用
我通过交换 SK Learn 多层感知器得到了一些不同的结果:
# see https://stackoverflow.com/questions/75898473/why-does-my-neural-network-not-learn-the-xor-problem?noredirect=1#comment133873645_75898473
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
if __name__ == '__main__':
X, y = make_blobs(n_samples=200, n_features=2, cluster_std=.1, centers= [(1,1), (1,0), (0,0),(0,1)])
y[y == 2] = 0
y[y == 3] = 1
plt.scatter(X[:, 0], X[:, 1], c=y, s=25, edgecolors='k')
plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=19)
model = MLPClassifier(hidden_layer_sizes=20, learning_rate='adaptive', epsilon=0.01)
model.fit(X_train, y_train)
#train
y_pred_train = model.predict(X_train)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s=25, edgecolors='k')
plt.show()
train_acc = np.sum(y_pred_train == y_train)/y_train.shape[0]
print("train accuracy: ", train_acc)
#test
y_pred_test = model.predict(X_test)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, s=25, edgecolors='k')
plt.show()
test_acc = np.sum(y_pred_test == y_test)/y_test.shape[0]
print("test accuracy: ", test_acc)
结果如下:
train accuracy: 0.8134328358208955
test accuracy: 0.7727272727272727
火车散点图:
测试散点图:
可以尝试隐藏层和 epsilon,看看它们对准确性有什么影响。
如果我采用
MLPClassifier
的默认值,我可以使训练和测试精度都等于 1.0,但我确信它只是过度拟合和记忆数据。