R 循环时间太长,我该如何以另一种方式解决这个问题?

问题描述 投票:0回答:4

我正在从收集 NFL 数据的特定站点上抓取和清理数据,因此我最终得到了不同长度的数据帧,如下所示:

enter image description here

现在我要以特定方式总结数据,从球队客场和主场平均数据开始。所以我使用 NFL 球队向量并循环这些线,如下所示:

teamList <- as.matrix(c("New England Patriots", "Dallas Cowboys", "Denver Broncos",
                    "Pittsburgh Steelers", "Seattle Seahawks","Oakland Raiders", "Philadelphia Eagles", "Green Bay Packers"
                    ,"San Francisco 49ers", "New York Giants","Chicago Bears", "Minnesota Vikings", "Washington Redskins",
                    "Carolina Panthers", "Carolina Panthers", "New Orleans Saints", "St. Louis Rams", "New York Jets",
                    "Baltimore Ravens", "San Diego Chargers", "Indianapolis Colts", "Houston Texans", "Arizona Cardinals",
                    "Detroit Lions", "Cleveland Browns", "Atlanta Falcons", "Buffalo Bills", "Jacksonville Jaguars", "Cincinnati Bengals",
                    "Kansas City Chiefs", "Tampa Bay Buccaneers", "Tennesee Titans", "Miami Dolphins"))

#Calcuating average yards per game vs any opponent for each team (home and away)

for (i in 1:nrow(teamList)){

  for (y in 1:nrow(grossM)){

    homeV1 <- matrix(0,1000000,1)
    awayV1 <- matrix(0,1000000,1)
    homeV2 <- matrix(0,1000000,1)
    awayV2 <- matrix(0,1000000,1)

    if (teamList[i,]==grossM[y,4]&grossM[y,5]=="@")(awayV1[y,1] <- grossM[y,9]) else 0 
    if (teamList[i,]==grossM[y,4]&grossM[y,5]=="")(homeV1[y,1] <- grossM[y,9]) else 0 

    if (teamList[i,]==grossM[y,6]&grossM[y,5]=="")(awayV2[y,1] <- grossM[y,11]) else 0 
    if (teamList[i,]==grossM[y,6]&grossM[y,5]=="@")(homeV2[y,1] <- grossM[y,11]) else 0 
    ....

这显然效率低下,但直到现在我还不必担心我的写作效率,因为循环花费的时间太长(我实际上还没有让它完成,因为它需要一个多小时)。有人可以指出我的写入方向,也许是我没有想到的某种类型的矩阵运算吗?

提前感谢您的回复!

编辑:只是随机意识到我应该在这里以某种方式使用聚合,对吗?

 structure(list(Week = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 
17L, 17L, 17L, 17L, 17L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = c(" 1", " 2", " 3", " 4", " 5", " 6", " 7", 
" 8", " 9", "10", "11", "12", "13", "14", "15", "16", "17"), class = "factor"), 
    Day = structure(c(4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 1L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 
    4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 1L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 1L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 1L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L), .Label = c("Mon", "Sat", "Sun", "Thu"), class = "factor"), 
    Date = structure(c(43L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 
    44L, 44L, 44L, 44L, 44L, 44L, 45L, 45L, 46L, 47L, 47L, 47L, 
    47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 48L, 
    49L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 
    50L, 50L, 50L, 51L, 30L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
    40L, 40L, 40L, 40L, 40L, 40L, 41L, 42L, 31L, 31L, 31L, 31L, 
    31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 32L, 33L, 34L, 34L, 
    34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 35L, 36L, 
    37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 
    38L, 39L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
    16L, 16L, 21L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 
    28L, 28L, 28L, 29L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
    18L, 18L, 18L, 18L, 18L, 19L, 20L, 22L, 22L, 22L, 22L, 22L, 
    22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L, 24L, 24L, 24L, 25L, 
    25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L, 
    12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
    13L, 13L, 13L, 14L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 3L, 4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 7L, 8L, 9L, 10L, 10L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 15L, 15L, 15L, 
    15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 
    15L, 52L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L, 53L
    ), .Label = c("December 10", "December 13", "December 14", 
    "December 17", "December 19", "December 20", "December 21", 
    "December 24", "December 26", "December 27", "December 28", 
    "December 3", "December 6", "December 7", "January 3", "November 1", 
    "November 12", "November 15", "November 16", "November 19", 
    "November 2", "November 22", "November 23", "November 26", 
    "November 29", "November 30", "November 5", "November 8", 
    "November 9", "October 1", "October 11", "October 12", "October 15", 
    "October 18", "October 19", "October 22", "October 25", "October 26", 
    "October 29", "October 4", "October 5", "October 8", "September 10", 
    "September 13", "September 14", "September 17", "September 20", 
    "September 21", "September 24", "September 27", "September 28", 
    "September 4", "September 7"), class = "factor"), `Winner/tie` = structure(c(19L, 
    26L, 4L, 22L, 7L, 1L, 5L, 10L, 9L, 29L, 16L, 12L, 17L, 31L, 
    27L, 2L, 10L, 2L, 19L, 30L, 32L, 5L, 18L, 7L, 8L, 23L, 12L, 
    1L, 25L, 9L, 15L, 22L, 21L, 13L, 25L, 5L, 4L, 24L, 1L, 2L, 
    19L, 18L, 23L, 28L, 14L, 7L, 10L, 12L, 3L, 14L, 7L, 26L, 
    10L, 21L, 2L, 22L, 6L, 20L, 5L, 12L, 29L, 32L, 28L, 14L, 
    8L, 4L, 7L, 6L, 21L, 24L, 10L, 1L, 2L, 19L, 30L, 12L, 25L, 
    20L, 17L, 22L, 5L, 10L, 7L, 13L, 27L, 18L, 19L, 12L, 11L, 
    25L, 24L, 28L, 21L, 23L, 32L, 19L, 5L, 29L, 20L, 2L, 15L, 
    16L, 18L, 17L, 1L, 19L, 18L, 28L, 29L, 23L, 7L, 10L, 13L, 
    16L, 1L, 3L, 30L, 20L, 5L, 7L, 24L, 4L, 18L, 22L, 19L, 5L, 
    21L, 14L, 27L, 31L, 25L, 6L, 4L, 6L, 15L, 1L, 19L, 5L, 30L, 
    18L, 16L, 17L, 11L, 32L, 25L, 13L, 15L, 14L, 30L, 10L, 28L, 
    1L, 11L, 5L, 3L, 13L, 9L, 16L, 12L, 19L, 5L, 6L, 11L, 22L, 
    18L, 16L, 26L, 23L, 10L, 13L, 32L, 7L, 28L, 14L, 1L, 3L, 
    12L, 30L, 7L, 5L, 1L, 31L, 28L, 16L, 17L, 22L, 10L, 25L, 
    4L, 24L, 27L, 9L, 1L, 20L, 24L, 16L, 22L, 28L, 15L, 12L, 
    25L, 5L, 8L, 23L, 19L, 32L, 29L, 21L, 29L, 22L, 28L, 26L, 
    32L, 18L, 2L, 16L, 1L, 12L, 7L, 19L, 13L, 25L, 5L, 11L, 23L, 
    32L, 6L, 22L, 14L, 18L, 2L, 4L, 3L, 11L, 29L, 16L, 20L, 13L, 
    1L, 10L, 25L, 20L, 27L, 11L, 5L, 10L, 24L, 17L, 13L, 32L, 
    4L, 18L, 14L, 7L, 16L, 28L, 28L, 2L, 13L, 5L, 4L, 27L, 10L, 
    25L, 7L, 24L, 31L), .Label = c("Arizona Cardinals", "Atlanta Falcons", 
    "Baltimore Ravens", "Buffalo Bills", "Carolina Panthers", 
    "Chicago Bears", "Cincinnati Bengals", "Cleveland Browns", 
    "Dallas Cowboys", "Denver Broncos", "Detroit Lions", "Green Bay Packers", 
    "Houston Texans", "Indianapolis Colts", "Jacksonville Jaguars", 
    "Kansas City Chiefs", "Miami Dolphins", "Minnesota Vikings", 
    "New England Patriots", "New Orleans Saints", "New York Giants", 
    "New York Jets", "Oakland Raiders", "Philadelphia Eagles", 
    "Pittsburgh Steelers", "San Diego Chargers", "San Francisco 49ers", 
    "Seattle Seahawks", "St. Louis Rams", "Tampa Bay Buccaneers", 
    "Tennessee Titans", "Washington Redskins"), class = "factor"), 
    V5 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 
    2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 
    1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
    2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 
    2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 
    1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
    1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
    2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 
    2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 
    2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 
    2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 
    2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 
    1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 
    1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 
    1L, 2L), .Label = c("", "@"), class = "factor"), `Loser/tie` = structure(c(25L, 
    11L, 14L, 8L, 23L, 20L, 15L, 3L, 21L, 28L, 13L, 6L, 32L, 
    30L, 18L, 24L, 16L, 21L, 4L, 20L, 29L, 13L, 11L, 26L, 31L, 
    3L, 28L, 6L, 27L, 24L, 17L, 14L, 32L, 30L, 29L, 20L, 17L, 
    22L, 27L, 9L, 15L, 26L, 8L, 6L, 31L, 3L, 11L, 16L, 25L, 15L, 
    16L, 8L, 18L, 4L, 13L, 17L, 23L, 9L, 30L, 27L, 1L, 24L, 11L, 
    13L, 3L, 31L, 28L, 16L, 27L, 20L, 23L, 11L, 32L, 9L, 15L, 
    29L, 26L, 2L, 31L, 32L, 28L, 8L, 4L, 15L, 3L, 16L, 14L, 26L, 
    6L, 1L, 21L, 27L, 9L, 26L, 30L, 22L, 24L, 8L, 14L, 31L, 4L, 
    25L, 11L, 13L, 3L, 17L, 6L, 9L, 27L, 22L, 25L, 12L, 31L, 
    11L, 8L, 26L, 2L, 21L, 14L, 8L, 9L, 17L, 29L, 15L, 32L, 12L, 
    30L, 10L, 2L, 20L, 23L, 26L, 22L, 29L, 3L, 28L, 21L, 31L, 
    9L, 23L, 10L, 24L, 12L, 20L, 8L, 7L, 31L, 2L, 24L, 6L, 27L, 
    7L, 23L, 32L, 29L, 22L, 17L, 26L, 18L, 4L, 9L, 12L, 24L, 
    17L, 2L, 4L, 15L, 31L, 19L, 20L, 21L, 29L, 25L, 30L, 27L, 
    8L, 11L, 2L, 8L, 20L, 29L, 15L, 18L, 23L, 3L, 21L, 26L, 14L, 
    13L, 19L, 6L, 32L, 18L, 30L, 4L, 26L, 31L, 3L, 14L, 9L, 7L, 
    2L, 27L, 10L, 13L, 6L, 11L, 17L, 30L, 9L, 8L, 17L, 4L, 6L, 
    15L, 3L, 24L, 23L, 27L, 31L, 14L, 10L, 21L, 20L, 26L, 24L, 
    30L, 19L, 17L, 21L, 5L, 9L, 25L, 27L, 28L, 8L, 15L, 31L, 
    12L, 7L, 8L, 2L, 29L, 6L, 30L, 26L, 21L, 19L, 15L, 9L, 22L, 
    12L, 31L, 3L, 23L, 1L, 12L, 20L, 32L, 30L, 6L, 9L, 14L, 8L, 
    3L, 15L, 16L), .Label = c("Arizona Cardinals", "Atlanta Falcons", 
    "Baltimore Ravens", "Buffalo Bills", "Carolina Panthers", 
    "Chicago Bears", "Cincinnati Bengals", "Cleveland Browns", 
    "Dallas Cowboys", "Denver Broncos", "Detroit Lions", "Green Bay Packers", 
    "Houston Texans", "Indianapolis Colts", "Jacksonville Jaguars", 
    "Kansas City Chiefs", "Miami Dolphins", "Minnesota Vikings", 
    "New England Patriots", "New Orleans Saints", "New York Giants", 
    "New York Jets", "Oakland Raiders", "Philadelphia Eagles", 
    "Pittsburgh Steelers", "San Diego Chargers", "San Francisco 49ers", 
    "Seattle Seahawks", "St. Louis Rams", "Tampa Bay Buccaneers", 
    "Tennessee Titans", "Washington Redskins"), class = "factor"), 
    PtsW = structure(c(18L, 23L, 17L, 21L, 23L, 21L, 10L, 9L, 
    17L, 24L, 17L, 21L, 7L, 32L, 10L, 16L, 21L, 14L, 30L, 16L, 
    14L, 14L, 16L, 14L, 18L, 27L, 17L, 37L, 33L, 10L, 13L, 10L, 
    22L, 9L, 2L, 17L, 31L, 14L, 36L, 29L, 39L, 21L, 17L, 16L, 
    25L, 18L, 14L, 28L, 13L, 6L, 26L, 20L, 13L, 14L, 37L, 17L, 
    12L, 16L, 27L, 7L, 14L, 13L, 3L, 17L, 23L, 4L, 17L, 8L, 20L, 
    29L, 6L, 32L, 15L, 20L, 28L, 14L, 14L, 21L, 28L, 24L, 17L, 
    16L, 24L, 21L, 15L, 6L, 24L, 17L, 27L, 15L, 17L, 10L, 17L, 
    27L, 21L, 20L, 17L, 14L, 17L, 1L, 24L, 13L, 18L, 34L, 16L, 
    26L, 13L, 3L, 17L, 24L, 6L, 19L, 10L, 35L, 24L, 19L, 13L, 
    40L, 19L, 21L, 23L, 23L, 11L, 18L, 17L, 27L, 22L, 17L, 7L, 
    24L, 28L, 12L, 12L, 27L, 12L, 29L, 17L, 17L, 1L, 20L, 19L, 
    10L, 8L, 36L, 20L, 1L, 9L, 14L, 35L, 7L, 19L, 24L, 8L, 34L, 
    6L, 14L, 14L, 23L, 20L, 10L, 23L, 7L, 35L, 28L, 10L, 20L, 
    21L, 14L, 20L, 14L, 10L, 21L, 29L, 15L, 9L, 23L, 17L, 13L, 
    27L, 31L, 17L, 32L, 28L, 24L, 5L, 13L, 7L, 35L, 20L, 25L, 
    16L, 9L, 13L, 14L, 13L, 1L, 20L, 25L, 39L, 18L, 23L, 28L, 
    14L, 5L, 17L, 14L, 11L, 21L, 21L, 9L, 20L, 20L, 25L, 28L, 
    13L, 24L, 30L, 20L, 14L, 23L, 6L, 24L, 28L, 25L, 13L, 28L, 
    16L, 16L, 8L, 38L, 10L, 6L, 10L, 22L, 13L, 7L, 28L, 24L, 
    28L, 10L, 18L, 10L, 9L, 14L, 28L, 17L, 25L, 10L, 20L, 24L, 
    12L, 10L, 20L, 14L, 13L, 26L, 26L, 27L, 7L, 10L, 13L, 18L, 
    21L, 20L, 13L, 24L, 16L), .Label = c("10", "12", "13", "14", 
    "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
    "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", 
    "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", 
    "45", "47", "48", "49", "51", "52"), class = "factor"), PtsL = structure(c(12L, 
    19L, 5L, 2L, 4L, 10L, 34L, 4L, 17L, 23L, 11L, 14L, 2L, 5L, 
    21L, 15L, 15L, 11L, 24L, 10L, 2L, 8L, 7L, 10L, 5L, 25L, 8L, 
    14L, 9L, 2L, 11L, 32L, 12L, 34L, 31L, 13L, 5L, 8L, 32L, 19L, 
    8L, 5L, 11L, 1L, 25L, 15L, 3L, 19L, 11L, 4L, 12L, 18L, 11L, 
    2L, 12L, 5L, 11L, 11L, 14L, 21L, 13L, 11L, 2L, 11L, 22L, 
    4L, 15L, 8L, 18L, 8L, 2L, 8L, 10L, 31L, 23L, 2L, 11L, 12L, 
    2L, 11L, 14L, 14L, 12L, 11L, 11L, 2L, 18L, 11L, 26L, 4L, 
    32L, 21L, 11L, 20L, 22L, 14L, 7L, 31L, 12L, 32L, 23L, 4L, 
    10L, 17L, 9L, 32L, 11L, 3L, 31L, 11L, 2L, 2L, 31L, 2L, 11L, 
    17L, 11L, 30L, 17L, 2L, 18L, 8L, 9L, 14L, 2L, 20L, 9L, 15L, 
    7L, 19L, 27L, 10L, 8L, 4L, 11L, 24L, 17L, 2L, 31L, 5L, 4L, 
    10L, 7L, 5L, 34L, 31L, 4L, 12L, 8L, 6L, 4L, 23L, 4L, 7L, 
    4L, 8L, 5L, 21L, 4L, 4L, 5L, 4L, 5L, 11L, 2L, 13L, 16L, 12L, 
    15L, 31L, 5L, 32L, 22L, 3L, 4L, 18L, 14L, 10L, 21L, 28L, 
    21L, 29L, 32L, 11L, 4L, 11L, 21L, 2L, 12L, 19L, 11L, 7L, 
    11L, 8L, 11L, 21L, 33L, 31L, 7L, 32L, 11L, 1L, 2L, 3L, 31L, 
    12L, 5L, 15L, 14L, 7L, 4L, 5L, 16L, 8L, 8L, 5L, 8L, 11L, 
    5L, 7L, 2L, 18L, 27L, 18L, 11L, 15L, 12L, 11L, 3L, 8L, 4L, 
    31L, 8L, 8L, 8L, 4L, 18L, 31L, 33L, 8L, 3L, 8L, 7L, 11L, 
    2L, 11L, 22L, 2L, 31L, 14L, 8L, 4L, 15L, 7L, 8L, 31L, 7L, 
    26L, 31L, 5L, 11L, 8L, 15L, 18L, 7L, 8L, 2L), .Label = c("0", 
    "10", "12", "13", "14", "15", "16", "17", "18", "19", "20", 
    "21", "22", "23", "24", "25", "26", "27", "28", "29", "3", 
    "30", "31", "32", "33", "34", "35", "38", "39", "49", "6", 
    "7", "8", "9"), class = "factor"), YdsW = structure(c(72L, 
    151L, 58L, 52L, 103L, 122L, 13L, 4L, 130L, 65L, 51L, 45L, 
    10L, 37L, 102L, 102L, 32L, 106L, 158L, 52L, 83L, 64L, 64L, 
    97L, 16L, 137L, 72L, 33L, 140L, 70L, 103L, 59L, 74L, 113L, 
    12L, 126L, 123L, 5L, 136L, 132L, 147L, 21L, 146L, 81L, 88L, 
    141L, 67L, 137L, 69L, 47L, 135L, 132L, 59L, 34L, 88L, 121L, 
    81L, 132L, 8L, 73L, 49L, 115L, 60L, 46L, 157L, 3L, 117L, 
    49L, 165L, 161L, 31L, 60L, 116L, 69L, 79L, 45L, 63L, 94L, 
    128L, 148L, 93L, 134L, 68L, 92L, 99L, 44L, 115L, 80L, 167L, 
    38L, 123L, 96L, 23L, 112L, 68L, 66L, 101L, 36L, 138L, 88L, 
    29L, 87L, 121L, 156L, 114L, 131L, 48L, 46L, 96L, 139L, 30L, 
    155L, 15L, 57L, 152L, 76L, 24L, 170L, 89L, 81L, 142L, 118L, 
    27L, 24L, 143L, 122L, 48L, 76L, 41L, 151L, 169L, 136L, 19L, 
    104L, 11L, 139L, 108L, 34L, 48L, 94L, 34L, 23L, 22L, 160L, 
    142L, 10L, 36L, 17L, 162L, 97L, 159L, 93L, 85L, 78L, 96L, 
    75L, 95L, 94L, 43L, 69L, 28L, 24L, 125L, 111L, 76L, 113L, 
    79L, 109L, 127L, 73L, 109L, 86L, 130L, 54L, 55L, 54L, 39L, 
    96L, 87L, 154L, 164L, 145L, 127L, 6L, 4L, 144L, 27L, 163L, 
    98L, 9L, 25L, 41L, 100L, 96L, 62L, 50L, 133L, 120L, 90L, 
    129L, 67L, 120L, 150L, 1L, 39L, 84L, 40L, 124L, 42L, 82L, 
    119L, 134L, 126L, 64L, 49L, 18L, 153L, 27L, 7L, 61L, 35L, 
    87L, 149L, 103L, 20L, 116L, 48L, 123L, 14L, 78L, 83L, 110L, 
    95L, 81L, 2L, 11L, 166L, 51L, 91L, 98L, 89L, 98L, 141L, 63L, 
    77L, 156L, 129L, 132L, 106L, 131L, 29L, 7L, 48L, 26L, 56L, 
    67L, 105L, 168L, 44L, 53L, 71L, 42L, 72L, 156L, 90L, 118L, 
    107L), .Label = c("126", "205", "209", "219", "231", "232", 
    "242", "244", "248", "256", "258", "259", "263", "268", "270", 
    "274", "276", "277", "280", "281", "284", "287", "289", "290", 
    "291", "292", "293", "294", "295", "296", "297", "299", "300", 
    "303", "305", "308", "309", "310", "313", "317", "318", "319", 
    "320", "321", "322", "323", "326", "327", "328", "329", "330", 
    "333", "334", "336", "337", "339", "340", "342", "344", "345", 
    "346", "348", "349", "350", "352", "353", "354", "355", "356", 
    "359", "360", "361", "362", "363", "364", "365", "366", "368", 
    "369", "370", "371", "372", "373", "374", "375", "376", "377", 
    "378", "379", "380", "381", "382", "383", "385", "386", "388", 
    "389", "390", "391", "393", "394", "395", "396", "397", "398", 
    "402", "405", "406", "407", "408", "411", "412", "413", "414", 
    "417", "418", "419", "420", "423", "424", "425", "427", "428", 
    "429", "430", "431", "433", "434", "435", "436", "437", "438", 
    "439", "442", "445", "446", "448", "449", "451", "453", "458", 
    "459", "460", "463", "467", "469", "471", "474", "480", "481", 
    "483", "491", "493", "497", "500", "503", "505", "507", "508", 
    "510", "519", "521", "522", "524", "525", "537", "546", "568", 
    "597", "608"), class = "factor"), TOW = structure(c(1L, 4L, 
    1L, 2L, 1L, 2L, 2L, 2L, 4L, 4L, 1L, 1L, 2L, 2L, 2L, 3L, 2L, 
    1L, 3L, 3L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 3L, 1L, 2L, 
    1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 
    1L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 4L, 1L, 2L, 1L, 1L, 2L, 
    4L, 1L, 1L, 2L, 3L, 2L, 2L, 3L, 3L, 1L, 4L, 1L, 1L, 4L, 2L, 
    1L, 3L, 4L, 3L, 4L, 1L, 1L, 1L, 3L, 2L, 1L, 4L, 1L, 5L, 3L, 
    1L, 1L, 2L, 1L, 4L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 
    2L, 2L, 1L, 3L, 2L, 1L, 1L, 5L, 1L, 1L, 3L, 4L, 1L, 1L, 1L, 
    2L, 1L, 3L, 2L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 3L, 2L, 4L, 3L, 
    1L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 3L, 4L, 2L, 1L, 1L, 3L, 
    1L, 1L, 3L, 2L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
    3L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 3L, 2L, 3L, 1L, 4L, 1L, 3L, 
    2L, 3L, 2L, 2L, 2L, 3L, 1L, 2L, 1L, 4L, 1L, 1L, 2L, 3L, 1L, 
    2L, 3L, 1L, 2L, 1L, 3L, 1L, 2L, 2L, 3L, 2L, 1L, 2L, 1L, 3L, 
    2L, 1L, 2L, 1L, 1L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 
    2L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 2L, 4L, 2L, 
    2L, 1L, 3L, 6L, 3L, 1L, 2L, 1L, 2L, 3L, 2L, 1L, 3L, 1L, 2L, 
    2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 4L, 1L), .Label = c("0", 
    "1", "2", "3", "4", "5"), class = "factor"), YdsL = structure(c(168L, 
    69L, 70L, 85L, 32L, 142L, 44L, 6L, 59L, 102L, 135L, 140L, 
    106L, 50L, 34L, 138L, 79L, 130L, 106L, 87L, 17L, 67L, 87L, 
    108L, 128L, 172L, 88L, 97L, 143L, 23L, 129L, 102L, 133L, 
    82L, 41L, 125L, 132L, 87L, 5L, 105L, 63L, 118L, 109L, 4L, 
    159L, 137L, 60L, 90L, 42L, 157L, 167L, 158L, 89L, 78L, 156L, 
    23L, 30L, 97L, 144L, 11L, 164L, 84L, 39L, 163L, 124L, 52L, 
    136L, 57L, 125L, 130L, 58L, 160L, 48L, 43L, 89L, 96L, 141L, 
    146L, 66L, 22L, 96L, 65L, 117L, 134L, 151L, 91L, 143L, 178L, 
    163L, 169L, 33L, 3L, 166L, 150L, 171L, 121L, 106L, 114L, 
    123L, 39L, 122L, 99L, 51L, 86L, 52L, 48L, 71L, 20L, 9L, 116L, 
    110L, 2L, 15L, 52L, 37L, 120L, 173L, 149L, 111L, 17L, 144L, 
    136L, 84L, 161L, 36L, 140L, 127L, 75L, 69L, 149L, 162L, 99L, 
    82L, 56L, 136L, 102L, 152L, 29L, 19L, 120L, 21L, 161L, 121L, 
    107L, 101L, 39L, 80L, 122L, 127L, 105L, 72L, 124L, 19L, 8L, 
    17L, 46L, 14L, 12L, 101L, 83L, 14L, 115L, 24L, 95L, 92L, 
    148L, 151L, 35L, 68L, 47L, 94L, 104L, 177L, 103L, 117L, 98L, 
    72L, 83L, 50L, 96L, 16L, 127L, 1L, 112L, 122L, 109L, 49L, 
    28L, 139L, 155L, 114L, 45L, 131L, 61L, 145L, 53L, 62L, 69L, 
    86L, 48L, 128L, 25L, 21L, 76L, 9L, 124L, 93L, 113L, 175L, 
    75L, 25L, 26L, 165L, 63L, 119L, 116L, 154L, 121L, 82L, 54L, 
    10L, 128L, 141L, 138L, 102L, 137L, 131L, 55L, 112L, 113L, 
    47L, 73L, 74L, 86L, 77L, 117L, 145L, 40L, 7L, 64L, 42L, 147L, 
    114L, 104L, 129L, 81L, 174L, 11L, 18L, 176L, 67L, 107L, 26L, 
    100L, 13L, 27L, 38L, 170L, 121L, 43L, 155L, 126L, 142L, 131L, 
    153L, 72L, 31L), .Label = c("125", "140", "142", "146", "156", 
    "173", "178", "186", "189", "190", "196", "201", "205", "210", 
    "211", "212", "213", "215", "216", "220", "221", "225", "226", 
    "227", "230", "231", "232", "240", "242", "243", "245", "246", 
    "247", "248", "249", "250", "254", "255", "256", "257", "258", 
    "263", "264", "265", "266", "267", "268", "270", "272", "273", 
    "274", "276", "280", "282", "284", "285", "287", "288", "289", 
    "290", "291", "292", "293", "294", "298", "299", "300", "301", 
    "302", "304", "305", "306", "307", "308", "309", "310", "312", 
    "313", "314", "316", "317", "318", "319", "320", "321", "322", 
    "323", "324", "325", "326", "328", "329", "331", "332", "333", 
    "334", "335", "338", "339", "341", "342", "343", "344", "345", 
    "347", "349", "350", "354", "355", "356", "359", "361", "363", 
    "364", "365", "366", "368", "369", "370", "371", "372", "375", 
    "376", "377", "380", "382", "383", "385", "386", "388", "389", 
    "391", "393", "394", "396", "397", "398", "399", "401", "402", 
    "406", "408", "409", "411", "412", "413", "414", "415", "416", 
    "417", "420", "422", "423", "424", "427", "428", "431", "432", 
    "433", "435", "436", "440", "444", "447", "452", "460", "461", 
    "464", "469", "472", "479", "493", "496", "502", "509", "512", 
    "538", "548"), class = "factor"), TOL = structure(c(2L, 3L, 
    4L, 6L, 3L, 2L, 4L, 3L, 1L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 6L, 
    2L, 4L, 4L, 1L, 2L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 4L, 1L, 6L, 
    4L, 2L, 2L, 3L, 4L, 5L, 5L, 2L, 2L, 3L, 3L, 1L, 3L, 2L, 4L, 
    2L, 1L, 1L, 2L, 2L, 2L, 3L, 5L, 3L, 3L, 1L, 6L, 2L, 4L, 3L, 
    2L, 3L, 1L, 2L, 2L, 1L, 1L, 5L, 4L, 7L, 3L, 3L, 3L, 5L, 3L, 
    4L, 5L, 3L, 1L, 4L, 2L, 4L, 3L, 2L, 1L, 2L, 2L, 4L, 4L, 1L, 
    5L, 3L, 2L, 2L, 2L, 5L, 4L, 3L, 5L, 4L, 1L, 2L, 3L, 3L, 1L, 
    1L, 1L, 2L, 4L, 1L, 4L, 3L, 3L, 1L, 5L, 2L, 5L, 1L, 2L, 2L, 
    1L, 5L, 3L, 3L, 4L, 3L, 1L, 3L, 5L, 2L, 5L, 3L, 5L, 2L, 2L, 
    3L, 2L, 3L, 6L, 2L, 1L, 3L, 4L, 3L, 2L, 5L, 5L, 3L, 1L, 2L, 
    1L, 6L, 5L, 3L, 2L, 3L, 2L, 2L, 4L, 3L, 2L, 3L, 4L, 3L, 2L, 
    3L, 2L, 2L, 4L, 4L, 5L, 2L, 3L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 
    2L, 4L, 3L, 2L, 4L, 4L, 2L, 3L, 2L, 2L, 4L, 1L, 3L, 2L, 2L, 
    3L, 3L, 2L, 4L, 5L, 1L, 3L, 2L, 2L, 2L, 2L, 3L, 5L, 2L, 2L, 
    1L, 3L, 2L, 4L, 5L, 3L, 5L, 4L, 3L, 3L, 3L, 1L, 2L, 3L, 4L, 
    2L, 2L, 4L, 2L, 3L, 4L, 2L, 4L, 2L, 3L, 5L, 5L, 2L, 5L, 3L, 
    1L, 4L, 4L, 2L, 2L, 1L, 5L, 5L, 4L, 3L, 2L, 3L, 2L, 4L, 2L, 
    3L, 3L, 4L, 4L, 5L, 3L, 1L, 3L, 2L, 4L), .Label = c("0", 
    "1", "2", "3", "4", "5", "6"), class = "factor")), .Names = c("Week", 
"Day", "Date", "Winner/tie", "V5", "Loser/tie", "PtsW", "PtsL", 
"YdsW", "TOW", "YdsL", "TOL"), row.names = c(NA, -267L), class = "data.frame")
r loops web-scraping apply
4个回答
1
投票

如果不清楚,请原谅我,但这是我的第一篇堆栈溢出帖子。

我假设您正在从职业足球参考资料中阅读本文。我大概像你一样清理了它。

library(XML)
library(dplyr)

首先读入数据

url <- "http://www.pro-football-reference.com/years/2015/games.htm"
nfl_weeks <- XML::readHTMLTable(url)[[1]]

然后清理数据

nfl_weeks <- nfl_weeks[nfl_weeks$Week != "Week",][,-7]
names(nfl_weeks)[5] <- 'vs'
nfl_weeks[,7:ncol(nfl_weeks)] <- apply(nfl_weeks[,7:ncol(nfl_weeks)],2,as.numeric)

其次是拆分-应用-组合策略

# split
s <- split(nfl_weeks, nfl_weeks$`Winner/tie`)[2:33]
YdsW <- lapply(s, function(i) mYdsW = mean(i$YdsW))
YdsW <- do.call("c", YdsW)
# apply
YdsL<- lapply(s, function(i) mYdsW = mean(i$YdsL))
# combine
YdsL <- do.call("c", YdsL)

1
投票

这是一个总结的解决方案,作为@thepule答案的扩展

主要问题之一是数据不“整洁”(http://garrettgman.github.io/tidying/)。 IE。团队只有一个变量,结果也是一个变量

#wtf <- structure(.....
# in the previous structure the continous variables were encoded as factors and should be transformed to integers
for(i in 7:11) {
  wtf[[i]] <- as.integer(as.character(wtf[[i]]))  
}

wtf %>%
  tbl_df() %>%
  tidyr::gather(key = "outcome", value = "team", 4, 6) %>%
  group_by(team, outcome, V5) %>%
  summarise(meanYds = mean(ifelse(outcome == "Winner/tie", 
                             (YdsW),
                             (YdsL))))

解释

  • wtf %>% tbl_df()
    简单地将
    data.frame
    放入一个不错的类中
  • tidyr::gather
    将游戏的球队和结果“扁平化”为两列
  • 然后我们只需按
    team
    outcome
    V5
    进行分组,这意味着对于这些变量值的每个不同组合,我们可以计算一些度量...
  • ...这就是我们例子中的平均值。但是我们需要根据比赛结果的状态来选择要计算的码数(同样,这里的数据并不整齐,但这里我忽略了这个事实!)。
    ifelse
    将返回获胜或失败年份的
    vector
    ,我们只计算平均值。

表格中应包含所有球队、结果和主场优势的比赛平均码数。


0
投票

也许你可以尝试

foreach
循环。因此,您可以使用所有 CPU 内核的功能。本例使用4核,您应该根据您的硬件进行更改。

library(foreach)
library(doSNOW)

ncpus <- 4
cl <- makeCluster(ncpus, type = "SOCK")
registerDoSNOW(cl)

  f <- foreach(i = 1:nrow(teamList), .combine = rbind, .inorder = F) %dopar% {
    for (y in 1:nrow(grossM)){

      homeV1 <- matrix(0,1000000,1)
      awayV1 <- matrix(0,1000000,1)
      homeV2 <- matrix(0,1000000,1)
      awayV2 <- matrix(0,1000000,1)

      if (teamList[i,]==grossM[y,4]&grossM[y,5]=="@")(awayV1[y,1] <- grossM[y,9]) else 0 
      if (teamList[i,]==grossM[y,4]&grossM[y,5]=="")(homeV1[y,1] <- grossM[y,9]) else 0 

      if (teamList[i,]==grossM[y,6]&grossM[y,5]=="")(awayV2[y,1] <- grossM[y,11]) else 0 
      if (teamList[i,]==grossM[y,6]&grossM[y,5]=="@")(homeV2[y,1] <- grossM[y,11]) else 0 
# .... #
    }
data.frame(team = teamList[i], awayV1, homeV1, awayV2, homeV2)
  }

0
投票

好吧,根据您输入的数据框,以下内容似乎应该为您提供所需的信息,尽管格式可能略有不同。

首先我加载

dplyr
并将码数转换为数字格式:

library(dplyr)
df$YdsW <- as.numeric(df$YdsW)
df$YdsL <- as.numeric(df$YdsL)

比,根据我对数据集的理解:

Avg_win_yard <- df  %>%
        group_by(`Winner/tie`, V5) %>%
        summarise(Total_yard = mean(YdsW))

查看数据框的前 4 行,它们包含:

head(Avg_win_yard, 4)
Source: local data frame [4 x 3]
Groups: Winner/tie [2]

         Winner/tie     V5 Total_yard
             (fctr) (fctr)      (dbl)
1 Arizona Cardinals          109.3333
2 Arizona Cardinals      @   108.0000
3   Atlanta Falcons          111.4000
4   Atlanta Falcons      @    93.7500

我对此的解释是红雀队在主场平均跑动 109.3 码,在主场外平均跑动 108 码,赢得胜利。

同样,对于失败的球队:

Avg_lose_yard <- df  %>%
        group_by(`Loser/tie`, V5) %>%
        summarise(Total_yard = mean(YdsL))

head(Avg_lose_yard, 4)
Source: local data frame [4 x 3]
Groups: Loser/tie [2]

          Loser/tie     V5 Total_yard
             (fctr) (fctr)      (dbl)
1 Arizona Cardinals            169.00
2 Arizona Cardinals      @      95.50
3   Atlanta Falcons             80.75
4   Atlanta Falcons      @     133.50

我将其解释为红雀队在主场外比赛中平均跑动 169 码,在主场比赛中平均跑动 95.50 码。 一旦你掌握了信息,你就可以按照你想要的方式剖析它。

© www.soinside.com 2019 - 2024. All rights reserved.