这是此问题的示例数据。
library(data.table);
sampleDT=data.table(sortcol1=c("Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Ez","Ez","Ez","Ez","Ez","Ez","Ez","Jd","Jd","Jd","Jd","Jd","Jd","Jd","Jd","Jd","Nl","Nl","Nl","Nl","Nl","Nl","Nl","Nl","Nl","Nl","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Oz","Oz","Oz","Oz","Oz","Oz","Oz","Oz","Qm","Qm","Qm","Qm","Qm","Qm","Qm","Qm","Rn","Rn","Rn","Rn","Rn","Rn","Rn","Rn","Rn","Yv","Yv","Yv","Yv","Yv","Yv","Yv","Yv","Yv","Yv"),sortcol2=c("26g","A8q","A8q","A8q","A8q","A8q","F20z","K12s","K12s","K12s","M12g","M15h","T9c"," 26g"," 26g","A8q","A8q","A8q","F20z","K12s","K25h","M12g","M12g","M15h","M15h","N22p","N22p","A8q","A8q","F20z","M12g","M12g","N22p","N22p","A8q","K12s","K12s","M12g","N15z","N15z","N15z","N15z","N22p"," 26g","A8q","F20z","F20z","K12s","K12s","K25h","N15z","T9c","T9c"," 26g","A8q","F20z","M12g","M12g","M15h","M15h","M15h","M15h","N15z","N15z","N22p"," 26g","A8q","A8q","K25h","K25h","K25h","M15h","T9c","K12s","K25h","K25h","M12g","N15z","N22p","N22p","N22p","A8q","F20z","K12s","K12s","K12s","M15h","N15z","T9c","T9c"," 26g","A8q","K12s","M12g","M12g","M12g","M15h","N15z","N22p","T9c"),sortcol3=c(52024,5149,17725,52024,71627,71627,17725,5149,5149,121714,121714,5149,52024,17725,71627,5149,52024,71627,241210,16325,1966,1966,121714,1966,171721,121714,241210,5149,221324,171721,17725,71627,71627,241210,241210,5149,17725,16325,1966,5149,16325,121714,16325,1966,241210,17725,121714,16325,221324,52024,17725,171721,241210,1966,5149,16325,17725,121714,1966,121714,171721,221324,1966,71627,1966,221324,52024,71627,16325,221324,221324,241210,5149,17725,17725,221324,16325,17725,1966,71627,121714,52024,52024,5149,71627,221324,5149,241210,5149,71627,16325,1966,16325,5149,17725,52024,71627,5149,121714,121714));
这将是一个理想的输出。
calc1
和 calc2
待计算。
sampleDToutput=data.table(sortcol1=c("Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Ai","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Bs","Ez","Ez","Ez","Ez","Ez","Ez","Ez","Jd","Jd","Jd","Jd","Jd","Jd","Jd","Jd","Jd","Nl","Nl","Nl","Nl","Nl","Nl","Nl","Nl","Nl","Nl","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Ok","Oz","Oz","Oz","Oz","Oz","Oz","Oz","Oz","Qm","Qm","Qm","Qm","Qm","Qm","Qm","Qm","Rn","Rn","Rn","Rn","Rn","Rn","Rn","Rn","Rn","Yv","Yv","Yv","Yv","Yv","Yv","Yv","Yv","Yv","Yv"),sortcol2=c("26g","A8q","A8q","A8q","A8q","A8q","F20z","K12s","K12s","K12s","M12g","M15h","T9c"," 26g"," 26g","A8q","A8q","A8q","F20z","K12s","K25h","M12g","M12g","M15h","M15h","N22p","N22p","A8q","A8q","F20z","M12g","M12g","N22p","N22p","A8q","K12s","K12s","M12g","N15z","N15z","N15z","N15z","N22p"," 26g","A8q","F20z","F20z","K12s","K12s","K25h","N15z","T9c","T9c"," 26g","A8q","F20z","M12g","M12g","M15h","M15h","M15h","M15h","N15z","N15z","N22p"," 26g","A8q","A8q","K25h","K25h","K25h","M15h","T9c","K12s","K25h","K25h","M12g","N15z","N22p","N22p","N22p","A8q","F20z","K12s","K12s","K12s","M15h","N15z","T9c","T9c"," 26g","A8q","K12s","M12g","M12g","M12g","M15h","N15z","N22p","T9c"),sortcol3=c(52024,5149,17725,52024,71627,71627,17725,5149,5149,121714,121714,5149,52024,17725,71627,5149,52024,71627,241210,16325,1966,1966,121714,1966,171721,121714,241210,5149,221324,171721,17725,71627,71627,241210,241210,5149,17725,16325,1966,5149,16325,121714,16325,1966,241210,17725,121714,16325,221324,52024,17725,171721,241210,1966,5149,16325,17725,121714,1966,121714,171721,221324,1966,71627,1966,221324,52024,71627,16325,221324,221324,241210,5149,17725,17725,221324,16325,17725,1966,71627,121714,52024,52024,5149,71627,221324,5149,241210,5149,71627,16325,1966,16325,5149,17725,52024,71627,5149,121714,121714),calc1=c(0,5149,10298,15447,20596,38321,56046,108070,160094,212118,283745,355372,477086,0,1966,3932,5898,11047,27372,45097,97121,168748,240375,362089,483803,655524,896734,0,5149,22874,94501,166128,337849,559173,0,1966,7115,12264,28589,44914,61239,78964,200678,0,1966,18291,36016,53741,105765,227479,399200,620524,861734,0,1966,3932,5898,7864,13013,29338,47063,118690,240404,362118,533839,0,5149,21474,73498,145125,366449,587773,809097,0,1966,18291,36016,53741,71466,143093,264807,0,5149,10298,15447,67471,119495,191122,262749,484073,0,1966,7115,12264,28589,44914,62639,114663,186290,308004),calc2=c(0,0,5149,22874,74898,146525,0,0,5149,10298,0,0,0,0,17725,0,5149,57173,0,0,0,0,1966,0,1966,0,121714,0,5149,0,0,17725,0,5149,0,0,5149,0,0,1966,7115,23440,0,0,0,0,17725,0,16325,0,0,0,171721,0,0,0,0,17725,0,1966,123680,295401,0,1966,0,0,0,52024,0,16325,237649,0,0,0,0,17725,0,0,0,1966,73593,0,0,0,5149,76776,0,0,0,5149,0,0,0,0,5149,22874,0,0,0,0));
(我)
calc1
是 sortcol3
和 sortcol1
2 列排序后 sortcol3
列的累积和。sortcol1
的独特项目。calc1
则更好。(二)
calc2
是对 3 列进行自定义排序后 sortcol3
列的累积总和 sortcol1
、 sortcol2
和 sortcol3
sortcol1
和sortcol2
的唯一项目串联。calc2
则更好。如果只能使用data.table,请。
一种方法,
cbind
是两个独立的order
(请记住,calc1
现在已经乱序并且不再对应于其源行)
library(data.table)
cbind(
sampleDT[order(sortcol1, sortcol2, sortcol3),
.(sortcol3, calc2 = cumsum(c(0, sortcol3)[1:.N])), by = .(sortcol1, sortcol2)],
sampleDT[order(sortcol1, sortcol3),
.(calc1 = cumsum(c(0, sortcol3)[1:.N])), by = sortcol1][,-1]
)
sortcol1 sortcol2 sortcol3 calc2 calc1
<char> <char> <num> <num> <num>
1: Ai 26g 52024 0 0
2: Ai A8q 5149 0 5149
3: Ai A8q 17725 5149 10298
4: Ai A8q 52024 22874 15447
5: Ai A8q 71627 74898 20596
6: Ai A8q 71627 146525 38321
7: Ai F20z 17725 0 56046
8: Ai K12s 5149 0 108070
9: Ai K12s 5149 5149 160094
10: Ai K12s 121714 10298 212118
11: Ai M12g 121714 0 283745
12: Ai M15h 5149 0 355372
13: Ai T9c 52024 0 477086
14: Bs 26g 17725 0 0