我写了两个程序来进行RWR。
一个使用 networkx 图,我在转移概率矩阵上运行:
import numpy as np
import networkx as nx
def run_rwr(nxgraph, a, steps):
# Transition probability matrix
A = nx.adjacency_matrix(nxgraph).astype(float)
# Create the transition probability matrix
D = np.diag(np.array(A.sum(axis=1)).flatten())
# Calculate the Laplacian matrix
L = D - A
# Calculate the transition probability matrix
P = np.linalg.inv(D) @ L
# Initialize the probability distribution vector
p = np.zeros(len(nxgraph.nodes))
p[0] = 1
old_p = p
# Save the initial vector
p_0 = p
for j in range(steps):
# Random walk step
p = (1 - a) * P @ old_p + a * p_0
# Check if converged
if np.allclose(p, old_p):
break
old_p = p
return p
def run_rwr(A, a, steps):
# Initialize the probability distribution vector
p = np.zeros((A.shape[0], 1))
p[0] = 1
old_p = p
# Save the initial vector
p_0 = p
for i in range(steps):
# Random walk step
p = (1 - a) * A @ old_p + a * p_0
# check if converged
if np.linalg.norm(p - old_p) < 1e-6:
break
old_p = p
pbar.close()
return p
def build_A():
X = np.loadtxt("human-graph.tsv", dtype=float, comments='#')
m, n = X.shape
print(X.shape)
# the graph is unweighted
X = np.c_[X, np.ones(m)]
base = np.amin(X[:, 0:2])
if base < 0:
raise ValueError('Out of range of node ids: negative base')
# make node id start from 0
X[:, 0:2] = X[:, 0:2] - base
# sort by the first column
b_idx = X[:, 0] > X[:, 1]
a_idx = np.logical_not(b_idx)
B = X[b_idx, :]
# swap the first and second column
B[:, [0, 1]] = B[:, [1, 0]]
# concatenate the two matrices
A = X[a_idx, :]
X = np.vstack((A, B))
# get the row, column, and data vectors
rows = X[:, 0]
cols = X[:, 1]
data = X[:, 2]
# assume id starts from 0
n = int(np.amax(X[:, 0:2])) + 1
# create a sparse matrix from the adjacency matrix
_A = csr_matrix((data, (rows, cols)), shape=(n, n))
A = _A + _A.T
I, J, K = find(A)
A = csr_matrix((np.ones(len(K)), (I, J)), shape=A.shape)
return A
为什么第二个代码在几秒钟内运行,而第一个代码需要很长时间? 当我尝试使用 cupy 在 GPU 上运行第一个时,它因内存错误而崩溃。