我的自定义layernorm函数有什么问题?

问题描述 投票:0回答:1
import numpy as np
import torch
import torch.nn.functional as F

def layer_norm(x, weight, bias, eps=1e-6):
    # x shape: [bs, h, w, c]
    # Calculate mean and variance across the spatial dimensions (height, width)
    mean = np.mean(x, axis=(1, 2), keepdims=True)  # shape: (batch_size, 1, 1, channels)
    var = np.var(x, axis=(1, 2), keepdims=True, ddof=0)  # Use ddof=0 for biased variance

    # Normalize
    x_normalized = (x - mean) / np.sqrt(var + eps)

    # Applying weight and bias
    out = weight[None, None, None, :] * x_normalized + bias[None, None, None, :]
    return out

def test1(x):
    x = np.transpose(x, (0, 2, 3, 1))  # Transpose to [bs, h, w, c]
    weight = np.ones(channels)
    bias = np.zeros(channels)

    normalized_output = layer_norm(x, weight, bias)
    return normalized_output

def test2(x):
    global channels
    x = np.transpose(x, (0, 2, 3, 1))  # Transpose to [bs, h, w, c]
    x_tensor = torch.tensor(x, dtype=torch.float32)
    weight = torch.ones(channels)
    bias = torch.zeros(channels)

    # Use PyTorch's layer norm, normalizing over the last dimension (channels)
    normalized_output = F.layer_norm(x_tensor, normalized_shape=(channels,), weight=weight, bias=bias)
    return normalized_output.detach().numpy()

# Testing
batch, channels, height, width = 4, 3, 8, 8
# Generate random input
x = np.random.randint(-10, 10, (batch, channels, height, width))

# Calculate outputs from both implementations
layernorm1 = test1(x)
layernorm2 = test2(x)

# Check if outputs are close
are_close = np.allclose(layernorm1, layernorm2, atol=1e-4)
print("Outputs are close:", are_close)  # Should output True if they are close enough


var = np.var(x, axis=(1, 2), keepdims=True, ddof=0)  # Use ddof=0 for biased variance
var = np.var(x, axis=(1, 2), keepdims=True)

我的期望是are_close==True,这意味着layernorm1和layernom2的距离非常小。由于layernorm1和layernorm2具有较大的形状,因此我将显示layernorm1和layernorm2的部分结果。 layernorm1[0,0,0:3,0:4] 数组([[ 0.35208505, 1.06448374, -0.52827179], [-1.6216472 , -1.7376534 , -1.07653225], [-1.12821414, 0.88935017, 1 .84752351]]) 层范数2[0 ,0,0:3,0:4] 数组([[ 0.07412489, 1.1859984 , -1.2601235 ], [-1.0690411 , -0.2672601 , 1.336302 ], [-1.3920445 , 0.4800153 , 0.91202 91]],dtype=float32)

我尝试过带或不带 ddof=0 的 variacne 方法,在 print 语句中得到所有 False 。 我想知道如何实现与 Pytorch 内置的layernorm 函数类似的自定义layernorm。 从代码的角度来看,什么是layernorm步骤? Layernorm 对计算机视觉的特征图有何作用?

machine-learning math deep-learning pytorch computer-vision
1个回答
0
投票

您正在沿错误的维度计算平均值/无功值。

Pytorch 的层范数沿着

normalized_shape
指定的维度计算平均值/方差值。从文档来看,输入尺寸预计为
(*, normalized_shape[0], normalized_shape[1], ...)

您的代码具有

normalized_shape=(channels,)
,其输入排列为将通道作为最后一个维度,因此您应该沿着该维度计算平均值/var。相反,您可以沿着暗淡的方向计算它
(1, 2)

这是正确的实现:

def layer_norm(x, weight, bias, eps=1e-5):
    # x shape: [bs, h, w, c]
    # Calculate mean and variance across the spatial dimensions (height, width)
    mean = np.mean(x, axis=-1, keepdims=True)  # shape: (batch_size, 1, 1, channels)
    var = np.var(x, axis=-1, keepdims=True, ddof=0)  # Use ddof=0 for biased variance

    # Normalize
    x_normalized = (x - mean) / np.sqrt(var + eps)

    # Applying weight and bias
    out = weight[None, None, None, :] * x_normalized + bias[None, None, None, :]
    return out

请注意,我还设置了

eps=1e-5
,这是 pytorch 默认值。

这样,运行:

batch, channels, height, width = 4, 3, 8, 8
# Generate random input
x = np.random.randint(-10, 10, (batch, channels, height, width))

# Calculate outputs from both implementations
layernorm1 = test1(x)
layernorm2 = test2(x)

# Check if outputs are close
are_close = np.allclose(layernorm1, layernorm2, atol=1e-6)

结果

are_close == True

© www.soinside.com 2019 - 2024. All rights reserved.