我正在尝试建立一个求解偏微分方程(PDE)的模型。问题是,它需要一个 make 损失函数,其参数为:
例如:如果模型有两个输入节点,则调用第一个“x”和第二个“t”,对于一个批次有 3 个实例,假设为 [(1,2),(3,4),(5,6)],损失函数需要对这些输入值进行预测,并且还需要对输入值 [(1,0),(3,0),(5,0)] 进行预测,这里第二个神经元的输入全部为零。这是求解偏微分方程所必需的,因为它需要 N(x,t) 和 N(x,0),其中“N”代表神经网络,“x”和“t”代表两个输入节点。
每批次都需要执行此操作。
我该怎么做?
# model
model=keras.Sequential([
keras.layers.Dense(2,activation='sigmoid'),
keras.layers.Dense(5,activation='sigmoid'),
keras.layers.Dense(1,activation='sigmoid')
])
# inputs for neural net
x=np.random.uniform(1,5,10)
t=np.random.uniform(1,5,10)
inputs=np.stack((x,t),axis=1)
inputs=tf.convert_to_tensor(inputs)
# function for calculting z1*x and z2*t +b where z1 and z2 are weights for x and t respectively
def P(input_values,weight_of_x,weight_of_t,bias):
input_of_x=input_values[0]
input_of_t=input_values[1]
p_i=(weight_of_x*input_of_x)+(weight_of_t*input_of_t) + bias
return p_i
# calculates output for sigmoid function
def sigmoid(p):
return 1/(1+np.exp(-1*(p)))
# derivative of output with respect to inputs of t (2nd input neuron)
def dN_by_dt(input_value,weights_of_l1,weights_of_l2):
w_connected_to_x=weights_of_l1[0][0]
w_connected_to_t=weights_of_l1[0][1]
b_l1=weights_of_l1[1]
w_l2=weights_of_l2[0]
b_l2=weights_of_l2[1]
temp=[]
for i in range(len(weights_of_l2)):
p_i=P(input_value,w_connected_to_x[i],w_connected_to_t[i],b_l1[i])
activation=sigmoid(p_i)
temp2=w_l2[i]*(1-activation)*activation*w_connected_to_t[i]
temp.append(temp2)
return sum(temp)
# double derivative of output w.r.t inputs od x (1st input neuron)
def d2N_by_dx2(input_value,weights_of_l1,weights_of_l2):
w_connected_to_x=weights_of_l1[0][0]
w_connected_to_t=weights_of_l1[0][1]
b_l1=weights_of_l1[1]
w_l2=weights_of_l2[0]
b_l2=weights_of_l2[1]
temp=[]
for i in range(len(weights_of_l2)):
p_i=P(input_value,w_connected_to_x[i],w_connected_to_t[i],b_l1[i])
activation=sigmoid(p_i)
d_sig_by_dp_i = activation*(1-activation)
temp2=w_l2[i]*w_connected_to_x[i]*( ( d_sig_by_dp_i * w_connected_to_x[i] ) - ( 2 * activation * d_sig_by_dp_i * w_connected_to_x[i] ) )
temp.append(temp2)
return sum(temp)
# final cost function
def cost_function(input_vals,pred1,pred2,weights_of_l1,weights_of_l2): #pred are normal predictions and pred2 are predictions with t=0
dN_dt=np.array([])
d2N_dx2=np.array([])
for i in range(len(input_vals)):
temp1= dN_by_dt(input_vals[i],weights_of_l1,weights_of_l2)
dN_dt=np.append(dN_dt,temp1)
temp2= d2N_by_dx2(input_vals[i],weights_of_l1,weights_of_l2)
d2N_dx2=np.append(d2N_dx2,temp2)
e1=np.array([])
e2=np.array([])
for i in range(len(dN_dt)):
temp1= np.array(( dN_dt[i] - d2N_dx2[i] -( pred1[i] * (1 - pred1[i]) * (pred1[i] - 1) ) )**2)
e1=np.append(e1,temp1)
x_i= input_vals[i][0]
temp2= np.array(( pred2[i] - ( 0.5 * ( 1 + np.tanh( x_i / ( 2*(2**0.5) ) ) ) ) )**2)
e2=np.append(e2,temp2)
E = tf.convert_to_tensor(e1 + e2)
return E
optimizer=tf.keras.optimizers.Adam()
batch_size=5
for epoch in range(3):
for i in range(0,len(inputs),batch_size):
x_batch=inputs[i: (i+batch_size)]
with tf.GradientTape(persistent=True) as tape:
tape.watch(x_batch)
pred1=model(x_batch)
# inputs were all inputs of t are zero
temp_batch=x_batch
x_vals=np.array(temp_batch[:,0])
all_0=np.zeros(len(x_vals))
temp_batch2=np.stack((x_vals,all_0),axis=1)
# getting prediction for the above input
pred2=model(temp_batch2)
weights_of_l1=model.layers[1].get_weights()
weights_of_l2=model.layers[2].get_weights()
cost=cost_function(x_batch,pred1,pred2,weights_of_l1,weights_of_l2)
gradients = tape.gradient(cost, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
出现以下错误:
ValueError: No gradients provided for any variable: (['dense_9/kernel:0', 'dense_9/bias:0', 'dense_10/kernel:0', 'dense_10/bias:0', 'dense_11/kernel:0', 'dense_11/bias:0'],). Provided `grads_and_vars` is ((None, <tf.Variable 'dense_9/kernel:0' shape=(2, 2) dtype=float32, numpy=
array([[-0.22757757, -1.1772537 ],
[ 0.77507913, 0.11150062]], dtype=float32)>), (None, <tf.Variable 'dense_9/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>), (None, <tf.Variable 'dense_10/kernel:0' shape=(2, 5) dtype=float32, numpy=
array([[ 0.75440705, -0.41952217, -0.20862311, -0.5366657 , 0.83467317],
[-0.04175383, -0.8375001 , 0.6257205 , 0.20955968, -0.02585948]],
dtype=float32)>), (None, <tf.Variable 'dense_10/bias:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>), (None, <tf.Variable 'dense_11/kernel:0' shape=(5, 1) dtype=float32, numpy=
array([[-0.14150858],
[-0.8269048 ],
[-0.58234024],
[-0.7746899 ],
[-0.51618624]], dtype=float32)>), (None, <tf.Variable 'dense_11/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>)).
出现错误是因为您的自定义函数混合了 TensorFlow 和 NumPy 操作。 TensorFlow 运算对于梯度计算至关重要,但您使用的是 NumPy 运算。因此,要修复此错误,您需要将所有 NumPy 操作替换为其 TensorFlow 等效操作。这将使所有涉及的操作的梯度计算成为可能。请参考这个要点。