我正在使用 R 中的 clusterProfiler 包进行基因集富集分析。我的基本代码可以工作,但我想对结果对象
compareClusterResult
进行子集化,以仅包含特定的路径子集(即仅保留非疾病路径)。我使用 gage
包创建了一个非疾病途径列表,但无法弄清楚如何根据该列表对 compareClusterResult
对象进行子集化。
这是我正在分析的一小部分数据:
library(clusterProfiler)
dput(de_list)
list(fb = c("K08193", "K09851", "K07874", "K14847", "K14793",
"K06670", "K19009", "K13783", "K17963", "K15076", "K08492", "K15262",
"K00901", "K00078", "K15133", "K21407", "K13566", "K14454", "K23565",
"K09341", "K22414", "K00069", "K00069", "K07192", "K10276", "K11348",
"K10389", "K06054", "K06590", "K06678", "K03671", "K17302", "K08155",
"K23387", "K02951", "K12481", "K11434", "K18461", "K23439", "K13208",
"K16803", "K20793", "K06269", "K16749", "K12737", "K14264", "K00857",
"K21863", "K04459", "K01183", "K12856", "K23616", "K23195", "K09188",
"K20193", "K21249", "K05765", "K04703", "K12259", "K24014", "K10141",
"K11099", "K02263", "K01784", "K11884", "K24195", "K14810", "K15113",
"K15283", "K14999", "K14776", "K11433", "K00228", "K03253", "K01410",
"K05768", "K13288", "K07432", "K13718", "K11587", "K02912", "K15235",
"K04351", "K23893", "K20730", "K10310", "K00558", "K15837", "K01205",
"K11660", "K12021", "K23214", "K20791", "K07189", "K01507", "K16682",
"K18163", "K13142", "K23901", "K17501"), mg = c("K19788", "K07874",
"K00128", "K14793", "K06670", "K19009", "K13783", "K17963", "K19476",
"K00078", "K13915", "K21407", "K14719", "K13524", "K22414", "K00069",
"K00069", "K02178", "K12172", "K12866", "K13123", "K24254", "K17302",
"K08155", "K02951", "K12481", "K11434", "K13208", "K17602", "K10571",
"K13758", "K16749", "K00857", "K21863", "K06839", "K03241", "K04459",
"K18200", "K01183", "K23616", "K10442", "K17563", "K05765", "K12259",
"K10141", "K19326", "K10049", "K01784", "K00604", "K24195", "K15113",
"K15283", "K19527", "K14999", "K01410", "K11587", "K02912", "K13109",
"K15235", "K09595", "K23893", "K10310", "K11981", "K08858", "K00558",
"K01205", "K11583", "K11660", "K05291", "K12021", "K18660", "K10393",
"K23214", "K20791", "K06072", "K18163", "K17501", "K09848", "K23336",
"K03064", "K02366", "K02377", "K14971", "K20290", "K13240", "K20185",
"K01109", "K13125", "K16678", "K07964", "K05397", "K15175", "K08705",
"K08561", "K02519", "K17824", "K13122", "K15338", "K12821", "K08752"
))
xx <- compareCluster(de_list, fun="enrichKEGG",
organism="ko", pvalueCutoff=0.05)
以及我想保留的路径 ID 列表:
library(gage)
kg.ko = kegg.gsets("ko") # ("ko" is KEGG ortholog pathway)
kegg.gs = kg.ko$kg.sets[kg.ko$sigmet.idx] # keep only metabolic and signaling pathways
kegg.gs_names <-names(kegg.gs)
kegg.gs_names <- as.data.frame(gsub( " .*$", "", kegg.gs_names ))
names(kegg.gs_names) <- "ID"
所以,我想使用
kegg.gs_names
来子集 xx
。 xx
中的相应条目是xx@compareCluster$ID
,同时维护 clusterProfiler 对象的结构以进行下游绘图。
这是小插图(http://yulab-smu.top/biomedical-knowledge-mining-book/enrichplot.html)。我正在尝试在 15.7 中绘制不包含疾病途径的图。
老问题,但谷歌引导我来到这里,评论帮助我解决了我的问题。
xx
是一个带有很多槽的 S4 对象。感兴趣的数据帧位于 xx@compareClusterResult
访问的槽中。您可以通过这种方式应用任何常见的数据帧操作,例如使用并集/交集/差异/子集/过滤器/排序/等来突出显示您想要的任何内容,然后将修改后的数据帧重新分配回适当的槽中。
subset_df <- xx@compareClusterResult[is.element(xx@compareClusterResult$ID, de_list), )]
xx@compareClusterResult <- subset_df
S4 对象的其余部分保留其他信息,可以使用内部 dotplot 函数进行绘制。