我在 R 中遇到了一个更多的数学问题,但我对我的代码有点怀疑,所以这就是我在这里发布这个问题的原因。我正在尝试应用一些公式来创建一些我将用于统计分析的索引。他们试图测量不同观测值的位置,然后将其应用于距离更远的聚类索引,该索引试图测量每个组的观测值的聚类程度(基本上是组内的极化)。我将它应用到两个位置框架上。我这么说是为了让你有个想法。这些公式来自之前的研究,所以我怀疑我是否会更改它们(至少现在是这样)。
一、仓位公式:
然后,极化一:
使用这些公式创建新列的输出非常疯狂。它们有时会产生
inf
和 -inf
的位置,这完全破坏了测量,并且不允许我使用它进行分析,这会影响偏振指数,从而变成 NaN
。我不知道为什么会这样(也许我犯了一个可怕的错误,但我不知道具体在哪里),我能想到的唯一合理的事情是,也许分母可以是0
,然后除以0
可能会产生此类问题(因为它在数学上是未定义的)。我在下面留下了 MWE 数据,以便您可以看到正在发生的情况以及我用于创建变量的基本代码。
数据的 MWE (
dput(head(data,10))
)。作为澄清,即使数据的头部仅出现在其中一个位置帧中,两者都会发生错误。
structure(list(countryname = c("Sweden", "Sweden", "Sweden",
"Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden"
), partyname = c("Green Ecology Party", "Left Party", "Social Democratic Labour Party",
"Liberal People’s Party", "Christian Democratic Community Party",
"Moderate Coalition Party", "Centre Party", "New Democracy",
"Green Ecology Party", "Left Party"), partyabbrev = c("MP", "V",
"SAP", "FP", "KdS", "MSP", "CP", "NyD", "MP", "V"), edate = structure(c(7927,
7927, 7927, 7927, 7927, 7927, 7927, 7927, 9026, 9026), class = "Date"),
date = c(199109L, 199109L, 199109L, 199109L, 199109L, 199109L,
199109L, 199109L, 199409L, 199409L), pervote = c(3.383, 4.513,
37.705, 9.128, 7.135, 21.924, 8.503, 6.732, 5.023, 6.174),
per101_bal = c(0.0131533759186757, 0, 0, 0, 0, 0.00552656143923965,
0, 0.0610648953779551, 0, 0), per102_bal = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0), per103_bal = c(0, 0.0241945356407545,
0, 0, 0, 0, 0, 0, 0, 0), per104_bal = c(0, 0, 0, 0, 0, 0.0331487610319077,
0, 0.0152688765332541, 0, 0), per105_bal = c(0.0526363990200792,
0.0161259807467587, 0, 0, 0, 0, 0, 0, 0.027027027027027,
0), per106_bal = c(0.0131533759186757, 0.00806855489399588,
0, 0, 0.0238122675606307, 0, 0.0083311833505332, 0.00381987182207886,
0, 0), per107_bal = c(0.0526363990200792, 0.112904123309777,
0.0784338716050012, 0.0688056308360199, 0.0952385941019329,
0.0110531228784793, 0.0166623667010664, 0.00762913288909639,
0, 0.0440225813593093), per108_bal = c(0, 0.0161259807467587,
0.0392169358025006, 0.0963304519824807, 0.0238122675606307,
0.0828772063815343, 0.0083311833505332, 0.0228980094223505,
0, 0), per109_bal = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), per110_bal = c(0.10526135036747,
0.0322630905347504, 0, 0, 0, 0, 0, 0, 0.0675675675675676,
0.056608368386097), per201_bal = c(0.0921079744487946, 0,
0, 0.174306741847233, 0.0952385941019329, 0.0828772063815343,
0.0250042999656003, 0.0954225202665422, 0, 0.00628735886650432
), per202_bal = c(0.10526135036747, 0.0806410327750264, 0.0392169358025006,
0.0825744634393825, 0.0476140589806715, 0.0110531228784793,
0.0416666666666667, 0.0458066295997623, 0.0135135135135135,
0.0754704449856099), per203_bal = c(0, 0, 0, 0, 0, 0, 0,
0.0114490047111752, 0.0135135135135135, 0), per204_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per301_bal = c(0, 0, 0, 0, 0.0238122675606307,
0, 0.0333354833161335, 0.0114490047111752, 0, 0), per302_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per303_bal = c(0, 0, 0, 0, 0.0238122675606307,
0.0331487610319077, 0, 0.114500657866814, 0, 0), per304_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per305_bal = c(0, 0, 0.0392169358025006,
0, 0, 0, 0.0083311833505332, 0, 0, 0), per401_bal = c(0,
0, 0, 0.0688056308360199, 0, 0.0828772063815343, 0.0083311833505332,
0.0343470141335257, 0, 0), per402_bal = c(0, 0, 0.0196020953084972,
0.0412808096895591, 0, 0.099446283095723, 0.0666709666322669,
0.0572556343109376, 0, 0), per403_bal = c(0.0131533759186757,
0.0403205163875132, 0, 0.00458532951436608, 0, 0, 0, 0.019088748355333,
0.108108108108108, 0.100630949745406), per404_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0.0125747177330086), per405_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per406_bal = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0), per407_bal = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0), per408_bal = c(0, 0, 0.00980104765424861, 0.0137559885430982,
0, 0, 0.0083311833505332, 0, 0.0135135135135135, 0), per409_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0.0125747177330086), per410_bal = c(0.0131533759186757,
0.0403205163875132, 0.0392169358025006, 0.0183541621177287,
0, 0, 0.0499978500171999, 0, 0.0135135135135135, 0.0943435908789019
), per411_bal = c(0, 0.00806855489399588, 0.0490179834567492,
0.00917065902873216, 0, 0, 0.0833333333333333, 0.0534357624888587,
0, 0.00628735886650432), per412_bal = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0), per413_bal = c(0, 0.00806855489399588, 0, 0,
0, 0, 0, 0, 0, 0), per414_bal = c(0.0131533759186757, 0,
0.0686328239507526, 0.00458532951436608, 0.0952385941019329,
0.160217243720299, 0.0250042999656003, 0.206103306311277,
0.135135135135135, 0.0314478636263006), per415_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per416_bal = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0), per501_bal = c(0.434221672733933, 0.177419175338045,
0.147053950370248, 0.0550496422929216, 0.119050861662564,
0.0386753224711473, 0.241668816649467, 0.0381668859556046,
0.364864864864865, 0.144653531104716), per502_bal = c(0.0263181995100396,
0.0161259807467587, 0, 0.00458532951436608, 0.0476140589806715,
0.0165796843177189, 0.0166623667010664, 0, 0, 0.0251605047597963
), per503_bal = c(0.0526363990200792, 0.0887095876690223,
0.196084679012503, 0.100915781496847, 0.0714263265413022,
0.027622199592668, 0.125, 0.0267178812444294, 0.175675675675676,
0.232709763117113), per504_bal = c(0, 0.104835568415781,
0.0588190311109978, 0.0321101506608269, 0.142852653082604,
0.0220956381534284, 0.0666709666322669, 0.00381987182207886,
0.0540540540540541, 0.0628957272526013), per505_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per506_bal = c(0, 0.0241945356407545,
0.00980104765424861, 0.0321101506608269, 0, 0.0883931602172437,
0.0250042999656003, 0.0267178812444294, 0, 0.0125747177330086
), per507_bal = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), per601_bal = c(0,
0, 0.0490179834567492, 0.00458532951436608, 0, 0.027622199592668,
0.0166623667010664, 0.00762913288909639, 0, 0), per602_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per603_bal = c(0, 0, 0, 0, 0.0714263265413022,
0.0220956381534284, 0.0083311833505332, 0.064884767200034,
0, 0), per604_bal = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), per605_bal = c(0,
0, 0, 0, 0, 0.0883931602172437, 0.0666709666322669, 0.0687046390221128,
0, 0), per606_bal = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), per607_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per608_bal = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0), per701_bal = c(0.0131533759186757, 0.0564464971342719,
0.0686328239507526, 0.0229394916320947, 0, 0, 0.0083311833505332,
0, 0.0135135135135135, 0.0314478636263006), per702_bal = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), per703_bal = c(0, 0.00806855489399588,
0, 0, 0, 0.0331487610319077, 0.0083311833505332, 0, 0, 0),
per704_bal = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), per705_bal = c(0,
0.0241945356407545, 0.029415888148252, 0.0183541621177287,
0.0952385941019329, 0.0110531228784793, 0.0083311833505332,
0, 0, 0), per706_bal = c(0, 0.112904123309777, 0.0588190311109978,
0.146794764761036, 0.0238122675606307, 0.0220956381534284,
0.0250042999656003, 0.00381987182207886, 0, 0.0503099402258136
), per103_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per103_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per201_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per201_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per202_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per202_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per202_3_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per202_4_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per305_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per305_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per305_3_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per305_4_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per305_5_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per305_6_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per416_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per416_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per601_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per601_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per602_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per602_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per605_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per605_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per606_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per606_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per607_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per607_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per607_3_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per608_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per608_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per608_3_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per703_1_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), per703_2_bal = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
)), row.names = c(NA, 10L), class = "data.frame")
代码:
mod_data <- data %>%
mutate(right = rowSums(across(c(per102_bal,per104_bal,per109_bal,per110_bal,per204_bal,per302_bal,per407_bal,per414_bal,per601_bal,per603_bal,per608_bal,per702_bal,per505_bal,per507_bal))),
left = rowSums(across(c(per101_bal,per105_bal,per107_bal,per108_bal,per203_bal,per301_bal,per406_bal,per409_bal,per602_bal,per604_bal,per607_bal,per701_bal,per504_bal,per506_bal))),
gal = rowSums(across(c(per501_bal,per602_bal,per604_bal,per502_bal,per607_bal,per416_bal,per705_bal,per706_bal,per201_bal,per202_bal))),
tan = rowSums(across(c(per305_bal,per601_bal,per605_bal,per608_bal,per606_bal)))
) %>%
# This is the first formula, which I think might be the problem
mutate(lr = log(right/left),
galtan = log(tan/gal)
) %>%
# The second formula
group_by(countryname,edate) %>%
mutate(pol_lr = sqrt(sum(pervote*(lr-mean(lr))/5)^2),
pol_galtan = sqrt(sum(pervote*(galtan-mean(galtan))/5)^2),
)
有人能想出解决这个问题的办法吗?我使用的第一个公式是否存在固有问题,导致无法测量?我编写的代码是否存在可能产生此问题的错误?我知道这些问题很奇怪,但我确实需要帮助解决这个问题,我无法破解它。
第一步(第一个
mutate
)后停止管道并查看结果:
mod_data |> select(right, left, tan, gal) |> summary()
right left tan gal
Min. :0.009171 Min. :0.1081 Min. :0.000000 Min. :0.1823
1st Qu.:0.059512 1st Qu.:0.1484 1st Qu.:0.000000 1st Qu.:0.2813
Median :0.118033 Median :0.1867 Median :0.002293 Median :0.3684
Mean :0.132189 Mean :0.2040 Mean :0.037683 Mean :0.3658
3rd Qu.:0.193693 3rd Qu.:0.2543 3rd Qu.:0.085260 3rd Qu.:0.4242
Max. :0.293886 Max. :0.3306 Max. :0.116015 Max. :0.6579
我们看到
tan
包含零值(事实上,至少最低 25% 的值为零,因为第一个四分位数以及最小值为零)。因此,在下一个表达式中,tan/gal
将为零,因此log(tan/gal)
将是-Inf
。正如上面的评论者所说,这是一个主题领域,而不是一个技术问题,决定您是否可以以明智的方式解决这个问题......