R 中多次重复的蛋白质强度随时间点变化的统计数据

问题描述 投票:0回答:1

我有 5 个重复的 3 个时间点的蛋白质强度列表。

我想评估不同时间点的蛋白质强度变化在重复之间是否存在统计差异。

数据框代码

dataframe <- structure(list(genes = c("ZFTA", "IPO5", "COPE", "APOL1", "ZFTA", 
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA", 
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA", 
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA", 
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA", 
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA", 
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1", "ZFTA", 
"IPO5", "COPE", "APOL1", "ZFTA", "IPO5", "COPE", "APOL1"), replicate = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), levels = c("1", 
"2", "3", "4", "5"), class = "factor"), intensity = c(0, 5.3, 
0.1, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 3.1, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 6.4, 5.2, 234.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10.9, 
6, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.9, 11.4, 125, 0, 0, 0, 0, 
0, 0, 0, 0), timepoint = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L), levels = c("2", "24", "48"), class = "factor"), 
    mean = c(2534.11613603473, 2534.11613603473, 2534.11613603473, 
    2534.11613603473, 2545.86975397974, 2545.86975397974, 2545.86975397974, 
    2545.86975397974, 1041.9492040521, 1041.9492040521, 1041.9492040521, 
    1041.9492040521, 2352.51975397974, 2352.51975397974, 2352.51975397974, 
    2352.51975397974, 1549.3483357453, 1549.3483357453, 1549.3483357453, 
    1549.3483357453, 732.627062228654, 732.627062228654, 732.627062228654, 
    732.627062228654, 3142.20426917511, 3142.20426917511, 3142.20426917511, 
    3142.20426917511, 1043.8981910275, 1043.8981910275, 1043.8981910275, 
    1043.8981910275, 425.870405209841, 425.870405209841, 425.870405209841, 
    425.870405209841, 3623.80745296671, 3623.80745296671, 3623.80745296671, 
    3623.80745296671, 540.550434153401, 540.550434153401, 540.550434153401, 
    540.550434153401, 270.068668596237, 270.068668596237, 270.068668596237, 
    270.068668596237, 3062.0981910275, 3062.0981910275, 3062.0981910275, 
    3062.0981910275, 1628.59247467438, 1628.59247467438, 1628.59247467438, 
    1628.59247467438, 528.705282199711, 528.705282199711, 528.705282199711, 
    528.705282199711), sd = c(25094.6719445552, 25094.6719445552, 
    25094.6719445552, 25094.6719445552, 21058.9289049973, 21058.9289049973, 
    21058.9289049973, 21058.9289049973, 9975.3033998113, 9975.3033998113, 
    9975.3033998113, 9975.3033998113, 26179.750502137, 26179.750502137, 
    26179.750502137, 26179.750502137, 10207.7081411541, 10207.7081411541, 
    10207.7081411541, 10207.7081411541, 6420.40357806305, 6420.40357806305, 
    6420.40357806305, 6420.40357806305, 39498.0097492931, 39498.0097492931, 
    39498.0097492931, 39498.0097492931, 7936.15443848429, 7936.15443848429, 
    7936.15443848429, 7936.15443848429, 3413.66443657427, 3413.66443657427, 
    3413.66443657427, 3413.66443657427, 45997.397176612, 45997.397176612, 
    45997.397176612, 45997.397176612, 4168.32150996512, 4168.32150996512, 
    4168.32150996512, 4168.32150996512, 2961.16684541202, 2961.16684541202, 
    2961.16684541202, 2961.16684541202, 33839.982382166, 33839.982382166, 
    33839.982382166, 33839.982382166, 11360.6060492469, 11360.6060492469, 
    11360.6060492469, 11360.6060492469, 4131.58489947706, 4131.58489947706, 
    4131.58489947706, 4131.58489947706), id = c(1L, 2L, 3L, 4L, 
    1383L, 1384L, 1385L, 1386L, 2765L, 2766L, 2767L, 2768L, 4147L, 
    4148L, 4149L, 4150L, 5529L, 5530L, 5531L, 5532L, 6911L, 6912L, 
    6913L, 6914L, 8293L, 8294L, 8295L, 8296L, 9675L, 9676L, 9677L, 
    9678L, 11057L, 11058L, 11059L, 11060L, 12439L, 12440L, 12441L, 
    12442L, 13821L, 13822L, 13823L, 13824L, 15203L, 15204L, 15205L, 
    15206L, 16585L, 16586L, 16587L, 16588L, 17967L, 17968L, 17969L, 
    17970L, 19349L, 19350L, 19351L, 19352L)), row.names = c("1.2.1", 
"1.2.2", "1.2.3", "1.2.4", "1.24.6911", "1.24.6912", "1.24.6913", 
"1.24.6914", "1.48.13821", "1.48.13822", "1.48.13823", "1.48.13824", 
"2.2.1383", "2.2.1384", "2.2.1385", "2.2.1386", "2.24.8293", 
"2.24.8294", "2.24.8295", "2.24.8296", "2.48.15203", "2.48.15204", 
"2.48.15205", "2.48.15206", "3.2.2765", "3.2.2766", "3.2.2767", 
"3.2.2768", "3.24.9675", "3.24.9676", "3.24.9677", "3.24.9678", 
"3.48.16585", "3.48.16586", "3.48.16587", "3.48.16588", "4.2.4147", 
"4.2.4148", "4.2.4149", "4.2.4150", "4.24.11057", "4.24.11058", 
"4.24.11059", "4.24.11060", "4.48.17967", "4.48.17968", "4.48.17969", 
"4.48.17970", "5.2.5529", "5.2.5530", "5.2.5531", "5.2.5532", 
"5.24.12439", "5.24.12440", "5.24.12441", "5.24.12442", "5.48.19349", 
"5.48.19350", "5.48.19351", "5.48.19352"), class = "data.frame")

我试过这个:

library(rstatix)
res.aov <- anova_test(data = dataframe, dv = intensity, wid = replicate, within = timepoint, between = replicate)
get_anova_table(res.aov)

结果是:

mutate()
中的错误: ℹ 在争论中:
data = map(.data$data, dplyr::distinct, replicate, .keep_all = TRUE)
。 ℹ 第 1 组:
replicate = 1
。 由
map()
中的错误引起: ℹ 索引:1。 由
.f()
中的错误引起: !必须使用现有变量。 ✖
replicate
未在
.data
中找到。

我想也许我的做法完全错误。请帮忙

蒂亚

r bioinformatics anova
1个回答
0
投票

根据您对数据的描述,听起来您在受试者中进行了重复测量。主题是

genes
replicate
的独特组合。我们需要创建一个附加列来标识这些唯一组合,然后将该列名称传递给
wid
anova_test()
参数。

在提供代码之前,我会强烈警告您,这不太可能产生有用的预测,因为您的强度变量甚至不接近正态分布。它大部分是零,有一些正值。我建议研究替代方案,例如广义线性混合模型(零膨胀)、非参数测试,或将强度变量转换为二元变量(零与非零)并拟合二项式模型。但以下代码将按照您的要求进行方差分析。

dataframe$gene_rep <- with(dataframe, interaction(genes, replicate))
res.aov <- anova_test(data = dataframe, dv = intensity, wid = gene_rep, within = timepoint, between = replicate)
get_anova_table(res.aov)
© www.soinside.com 2019 - 2024. All rights reserved.