我有 5 个重复的 3 个时间点的蛋白质强度列表。
我想评估不同时间点的蛋白质强度变化在重复之间是否存在统计差异。
数据框代码
dataframe <- structure(list(genes = c("ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA",
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1"), replicate = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), levels = c("1",
"2", "3", "4", "5"), class = "factor"), intensity = c(0, 5.3,
0.1, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 3.1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 6.4, 5.2, 234.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10.9,
6, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.9, 11.4, 125, 0, 0, 0, 0,
0, 0, 0, 0), timepoint = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L), levels = c("2", "24", "48"), class = "factor"),
mean = c(2534.11613603473, 2534.11613603473, 2534.11613603473,
2534.11613603473, 2545.86975397974, 2545.86975397974, 2545.86975397974,
2545.86975397974, 1041.9492040521, 1041.9492040521, 1041.9492040521,
1041.9492040521, 2352.51975397974, 2352.51975397974, 2352.51975397974,
2352.51975397974, 1549.3483357453, 1549.3483357453, 1549.3483357453,
1549.3483357453, 732.627062228654, 732.627062228654, 732.627062228654,
732.627062228654, 3142.20426917511, 3142.20426917511, 3142.20426917511,
3142.20426917511, 1043.8981910275, 1043.8981910275, 1043.8981910275,
1043.8981910275, 425.870405209841, 425.870405209841, 425.870405209841,
425.870405209841, 3623.80745296671, 3623.80745296671, 3623.80745296671,
3623.80745296671, 540.550434153401, 540.550434153401, 540.550434153401,
540.550434153401, 270.068668596237, 270.068668596237, 270.068668596237,
270.068668596237, 3062.0981910275, 3062.0981910275, 3062.0981910275,
3062.0981910275, 1628.59247467438, 1628.59247467438, 1628.59247467438,
1628.59247467438, 528.705282199711, 528.705282199711, 528.705282199711,
528.705282199711), sd = c(25094.6719445552, 25094.6719445552,
25094.6719445552, 25094.6719445552, 21058.9289049973, 21058.9289049973,
21058.9289049973, 21058.9289049973, 9975.3033998113, 9975.3033998113,
9975.3033998113, 9975.3033998113, 26179.750502137, 26179.750502137,
26179.750502137, 26179.750502137, 10207.7081411541, 10207.7081411541,
10207.7081411541, 10207.7081411541, 6420.40357806305, 6420.40357806305,
6420.40357806305, 6420.40357806305, 39498.0097492931, 39498.0097492931,
39498.0097492931, 39498.0097492931, 7936.15443848429, 7936.15443848429,
7936.15443848429, 7936.15443848429, 3413.66443657427, 3413.66443657427,
3413.66443657427, 3413.66443657427, 45997.397176612, 45997.397176612,
45997.397176612, 45997.397176612, 4168.32150996512, 4168.32150996512,
4168.32150996512, 4168.32150996512, 2961.16684541202, 2961.16684541202,
2961.16684541202, 2961.16684541202, 33839.982382166, 33839.982382166,
33839.982382166, 33839.982382166, 11360.6060492469, 11360.6060492469,
11360.6060492469, 11360.6060492469, 4131.58489947706, 4131.58489947706,
4131.58489947706, 4131.58489947706), id = c(1L, 2L, 3L, 4L,
1383L, 1384L, 1385L, 1386L, 2765L, 2766L, 2767L, 2768L, 4147L,
4148L, 4149L, 4150L, 5529L, 5530L, 5531L, 5532L, 6911L, 6912L,
6913L, 6914L, 8293L, 8294L, 8295L, 8296L, 9675L, 9676L, 9677L,
9678L, 11057L, 11058L, 11059L, 11060L, 12439L, 12440L, 12441L,
12442L, 13821L, 13822L, 13823L, 13824L, 15203L, 15204L, 15205L,
15206L, 16585L, 16586L, 16587L, 16588L, 17967L, 17968L, 17969L,
17970L, 19349L, 19350L, 19351L, 19352L)), row.names = c("1.2.1",
"1.2.2", "1.2.3", "1.2.4", "1.24.6911", "1.24.6912", "1.24.6913",
"1.24.6914", "1.48.13821", "1.48.13822", "1.48.13823", "1.48.13824",
"2.2.1383", "2.2.1384", "2.2.1385", "2.2.1386", "2.24.8293",
"2.24.8294", "2.24.8295", "2.24.8296", "2.48.15203", "2.48.15204",
"2.48.15205", "2.48.15206", "3.2.2765", "3.2.2766", "3.2.2767",
"3.2.2768", "3.24.9675", "3.24.9676", "3.24.9677", "3.24.9678",
"3.48.16585", "3.48.16586", "3.48.16587", "3.48.16588", "4.2.4147",
"4.2.4148", "4.2.4149", "4.2.4150", "4.24.11057", "4.24.11058",
"4.24.11059", "4.24.11060", "4.48.17967", "4.48.17968", "4.48.17969",
"4.48.17970", "5.2.5529", "5.2.5530", "5.2.5531", "5.2.5532",
"5.24.12439", "5.24.12440", "5.24.12441", "5.24.12442", "5.48.19349",
"5.48.19350", "5.48.19351", "5.48.19352"), class = "data.frame")
我试过这个:
library(rstatix)
res.aov <- anova_test(data = dataframe, dv = intensity, wid = replicate, within = timepoint, between = replicate)
get_anova_table(res.aov)
结果是:
中的错误: ℹ 在争论中:mutate()
。 ℹ 第 1 组:data = map(.data$data, dplyr::distinct, replicate, .keep_all = TRUE)
。 由replicate = 1
中的错误引起: ℹ 索引:1。 由map()
中的错误引起: !必须使用现有变量。 ✖.f()
未在replicate
中找到。.data
我想也许我的做法完全错误。请帮忙
蒂亚
根据您对数据的描述,听起来您在受试者中进行了重复测量。主题是
genes
和replicate
的独特组合。我们需要创建一个附加列来标识这些唯一组合,然后将该列名称传递给 wid
的 anova_test()
参数。
在提供代码之前,我会强烈警告您,这不太可能产生有用的预测,因为您的强度变量甚至不接近正态分布。它大部分是零,有一些正值。我建议研究替代方案,例如广义线性混合模型(零膨胀)、非参数测试,或将强度变量转换为二元变量(零与非零)并拟合二项式模型。但以下代码将按照您的要求进行方差分析。
dataframe$gene_rep <- with(dataframe, interaction(genes, replicate))
res.aov <- anova_test(data = dataframe, dv = intensity, wid = gene_rep, within = timepoint, between = replicate)
get_anova_table(res.aov)