使用的数据点遵循幂律过程,并且可能会变得非常大。使用代码模拟数据,
def simulate_data_constant(beta1, beta2, lam1, sets, total_failures, change):
betha1 = beta1
betha2 = beta2
lambda1 = lam1
lambda2 = lam1
n_traj = sets
n = total_failures
x = change
t1 = np.zeros((n_traj, n))
for k in range(n_traj):
t1[k, 0] = (-np.log(np.random.uniform(0,1))/lambda1)**(1/betha1)
for j in range(1, x):
t1[k, j] = (-np.log(np.random.uniform(0,1))/lambda1 + (t1[k, j-1])**betha1)**(1/betha1)
for j in range(x, n):
t1[k, j] = (-np.log(np.random.uniform(0,1))/lambda2 + (t1[k, j-1])**betha2)**(1/betha2)
return t1
为了计算积分,我首先使用了函数
tplquad
,但 Python 给了我一条消息,说积分不收敛。因此,我尝试使用蒙特卡罗积分与以下代码:
def K1_log(tau, beta1, beta2, n, eps, v, T):
K1_threshold = 700
val = (n+eps)*np.log(np.power(tau, beta1)+np.power(T, beta2)-np.power(tau, beta2)+v)
#return np.where(log_K1_values >= K1_threshold, K1_threshold, log_K1_values)
return val
def q_j_log(j, beta1, beta2, times):
prod1_log = beta1*np.sum(np.log(times[:j]))
prod2_log = beta2*np.sum(np.log(times[j:]))
return prod1_log + prod2_log
def integral1_log(beta1, beta2, tau, n, eps, v, T, times):
K1_values = K1_log(tau, beta1, beta2, n, eps, v, T)
log_prod_term = beta2*np.sum(np.log(times))
print(K1_values-log_prod_term)
return 2*np.log(2) - 3*(np.log(1+beta1) + np.log(1+beta2)) + n*np.log(beta2) + log_prod_term - K1_values - np.log(T)
def integral2_log(beta1, beta2, tau, j, n, eps, v, T, times):
K1_values = K1_log(tau, beta1, beta2, n, eps, v, T)
q_j_values = q_j_log(j, beta1, beta2, times)
return 2*np.log(2) - 3*(np.log(1+beta1) + np.log(1+beta2)) + j*np.log(beta1) + (n-j)*np.log(beta2) + q_j_values - K1_values - np.log(T)
def integral3_log(beta1, beta2, tau, n, eps, v, T, times):
K1_values = K1_log(tau, beta1, beta2, n, eps, v, T)
log_prod_term = beta1*np.sum(np.log(times))
return 2*np.log(2) - 3*(np.log(1+beta1) + np.log(1+beta2)) + n*np.log(beta1) + log_prod_term - K1_values - np.log(T)
def Bayes(times_mat, sets, total_failures):
n_traj = sets
n = total_failures
v = 1
eps = 0.1
num_samples = 1000
for k in range(n_traj):
times = times_mat[k]
T = math.ceil(times[-1])
samples_tau = np.random.uniform(0, times[0], num_samples)
samples_beta1 = np.random.uniform(0, 3, num_samples)
samples_beta2 = np.random.uniform(0, 3, num_samples)
integral_1_values_log = integral1_log(samples_beta1, samples_beta2, samples_tau, n, eps, v, T, times)
integral_1_values = np.array([np.exp(log_val) for log_val in integral_1_values_log])
A1 = np.mean(integral_1_values)*times[0]
A2 = 0
for j in range(1, n-1):
samples_tau = np.random.uniform(times[j], times[j+1])
integral_2_values_log = integral2_log(samples_beta1, samples_beta2, samples_tau, j, n, eps, v, T, times)
integral_2_values = np.array([np.exp(log_val) for log_val in integral_2_values_log])
A2 += np.mean(integral_2_values) * (times[j+1]-times[j])
samples_tau = np.random.uniform(times[-1], T, num_samples)
integral_3_values_log = integral3_log(samples_beta1, samples_beta2, samples_tau, n, eps, v, T, times)
integral_3_values = np.array([np.exp(log_val) for log_val in integral_3_values_log])
A3 = np.mean(integral_3_values) * (T-times[-1])
A = A1 + A2 + A3
上面的实现包括计算的对数变换。 x_i 的值太大,导致
inf
值。这适用于模拟数据的某些情况,但对于其他情况,我仍然得到 inf
作为结果。有没有一种不同的方法可以计算这个积分,这适用于所有模拟数据?
您可以在进行 MC 集成时尝试从他们的 PDF 中采样 β1 或 β2。这是 MC 中的一般原则 - 如果你有积分
∫f(x)g(x)dx,
g(x) 是(可以制成)某个 PDF(x),您可以从此 PDF 中采样,然后将 f(x) 的平均值计算为积分值
这将为收敛带来重大改进,因为大多数样本将集中在接近 0 的位置。Tau 仍将是线性采样。
所以重写你的积分
I = ∫∫∫ PDF(β1) 剩余1 PDF(β2) 剩余2 ...
PDF(β1) = 2/(1 + β1)3
PDF(β2) = 2/(1 + β2)3
取样简单
β1,2) = √(1/U01) - 1,其中 U01 在 (0...1] 范围内均匀
一些代码,Python 3.11 Windows x64
import numpy as np
from matplotlib import pyplot as plt
def BetaPDF(beta):
return 2.0/(1.0 + beta)**3
def sampleBetaPDF(rng, N):
ksi = rng.random(size=N)
ksi = 1.0 - ksi # to make [0...1) -> (0...1] range
beta = np.sqrt(1.0 / ksi) - 1.0
return beta
rng = np.random.default_rng(122807528840384100672342137672332424406)
N = 100000
rnge = (0.0, 10.0)
bins = np.linspace(start = rnge[0], stop = rnge[1], num=101)
# %%
data = sampleBetaPDF(rng, N)
hist, bin_edges = np.histogram(data, bins=bins, density=True)
print(hist.sum())
print(np.sum(hist * np.diff(bin_edges)))
fig = plt.figure(figsize =(10, 7))
plt.hist(data, bins = bins, density = True)
beta = 0.5 * (bin_edges[:-1] + bin_edges[1:])
bpdf = BetaPDF(beta)
plt.scatter(beta, bpdf, c='#9467bd')
plt.title("Beta Sampling vs PDF")
plt.show()
它会产生下面的图表