R 中时间序列的循环和分组

Question

我正在创建一个循环来计算面板数据集上名为“有效刻度”的代理，其中行代表日期，列代表公司。数据集的结构如下：

> str(data_effective1)
'data.frame':   3132 obs. of  507 variables:
 $ data                               : Date, format: "2012-11-23" "2012-11-26" ...
 $ AMAZON.COM                         : num  12 12.2 12.2 12.4 12.6 ...
 $ ABBOTT LABORATORIES                : num  30.8 30.8 30.7 30.9 31.1 ...
 $ AES                                : num  10.1 10.2 10.2 10.4 10.4 ...

> dput(head(data_effective1))
data_effective1 <-
  structure(list(
    data = structure(c(15667, 15670, 15671, 15672, 15673, 15674), class = "Date"), 
    `ABBOTT LABORATORIES` = c(30.8472, 30.7945, 30.651, 30.895, 31.139, 31.1008), 
    `ALLSTATE ORD SHS` = c(40.74, 40.45, 40.42, 40.63, 40.64, 40.48)
  )) |> as.data.frame()

目前，我已经创建了一个循环来计算每列的有效刻度，这就是结果：


effective_tick<-numeric(ncol(data_effective1)-1)

for (i in 2 : (ncol(data_effective1))) {
   cents<-data_effective1[,2]-floor(data_effective1[,2])
   cents<-round(cents*100)
#cluster
cluster <- ifelse(cents %% 100 == 0, 1.00, # Dollar
             ifelse(cents %% 25 == 0, 0.25, # Quarter
             ifelse(cents %% 10 == 0, 0.10, # Dime
             ifelse(cents %% 5 == 0, 0.05,  # Nickel
                    0.01))))#penny
N_j <- table(cluster)
F_j <- N_j / sum(N_j) 

# Step 4: Calcolare le probabilità non vincolate
#inizializzazione
U_j <- numeric(length(F_j))
names(U_j) <- names(F_j)


#per il primo cluster
U_j[1] <- 2 * F_j[1]
#per gli altri

if (length(U_j) > 1) {
    for (j in 2:(length(U_j) - 1)) {
        U_j[j] <- 2 * F_j[j] - F_j[j - 1]
    }
    U_j[length(U_j)] <- F_j[length(U_j)] - F_j[length(U_j) - 1]
}

# Step 5: Vincolare le probabilità
#inizializzazione
g_j <- numeric(length(U_j))
names(g_j) <- names(U_j)


  g_j[1] <- pmin(pmax(U_j[1], 0), 1)
if(length(g_j) > 1) {
  for (j in 2:length(U_j)) {
    g_j[j] <- pmin(pmax(U_j[j], 0), 1 - sum(g_j[1:(j - 1)]))
  }
}


sum(g_j)
# Step 6: Determinare lo spread effettivo per ciascun cluster
s_j <- as.numeric(names(F_j))

# Step 7: Calcolare il prezzo medio
mean_price <- mean(data_effective1[,i])

# Step 8: Calcolare l'Effective Tick
effective_tick[i]<- sum(g_j * s_j) / mean_price
}

现在的问题是我需要每个月迭代这个循环，但我不知道该怎么做。我尝试过各种方法，但到目前为止，我还没有成功。我的想法是：

在这个循环中嵌套一个按月迭代的循环。
创建一个函数来执行第一个循环的操作，然后应用它，按月分组。

问题之一是我有很多 NA，但我无法删除它们，因为该数据集代表标准普尔 500 指数，其中有公司进入和退出该指数。删除 NA 将意味着丢失行并在数据中产生间隙。

有人有想法吗？我有点卡住了。

Answer 1

将代码编写为函数，并为每个月组调用它。使用

format

确定月份，仅从日期列中提取年/月组合。
在下面的代码中，我将输入参数名称从

data_effective1

更改为

。该函数返回一个数据框。
我为每个 sub-df 提供了两种调用它的方法，一种使用

by

，另一种使用

split/lapply

。

calc_tick <- function(X) { 
  effective_tick <- numeric(ncol(X) - 1)

  for (i in 2:(ncol(X))) {
    cents <- X[,2] - floor(X[,2])
    cents <- round(cents*100)
    #cluster
    cluster <- ifelse(cents %% 100 == 0, 1.00,                    # Dollar
                      ifelse(cents %% 25 == 0, 0.25,              # Quarter
                             ifelse(cents %% 10 == 0, 0.10,       # Dime
                                    ifelse(cents %% 5 == 0, 0.05, # Nickel
                                           0.01))))               # penny
    N_j <- table(cluster)
    F_j <- N_j / sum(N_j) 

    # Step 4: Calcolare le probabilità non vincolate
    #inizializzazione
    U_j <- numeric(length(F_j))
    names(U_j) <- names(F_j)

    #per il primo cluster
    U_j[1] <- 2 * F_j[1]
    #per gli altri

    if (length(U_j) > 1) {
      for (j in 2:(length(U_j) - 1)) {
        U_j[j] <- 2 * F_j[j] - F_j[j - 1]
      }
      U_j[length(U_j)] <- F_j[length(U_j)] - F_j[length(U_j) - 1]
    }

    # Step 5: Vincolare le probabilità
    #inizializzazione
    g_j <- numeric(length(U_j))
    names(g_j) <- names(U_j)

    g_j[1] <- pmin(pmax(U_j[1], 0), 1)
    if(length(g_j) > 1) {
      for (j in 2:length(U_j)) {
        g_j[j] <- pmin(pmax(U_j[j], 0), 1 - sum(g_j[1:(j - 1)]))
      }
    }

    # sum(g_j)
    # Step 6: Determinare lo spread effettivo per ciascun cluster
    s_j <- as.numeric(names(F_j))
    
    # Step 7: Calcolare il prezzo medio
    mean_price <- mean(data_effective1[,i])
    
    # Step 8: Calcolare l'Effective Tick
    effective_tick[i - 1L]<- sum(g_j * s_j) / mean_price
  }
  effective_tick |> t() |> as.data.frame() |> setNames(names(X)[-1L])
}

Month <- format(data_effective1$data, "%Y-%m")

by(data_effective1, Month, calc_tick) |> 
  data.table::rbindlist(idcol = TRUE) |>
  as.data.frame()
#>       .id ABBOTT.LABORATORIES ALLSTATE.ORD.SHS
#> 1 2012-11        0.0003235766     0.0002465483

split(data_effective1, Month) |>
  lapply(calc_tick) |> 
  data.table::rbindlist(idcol = TRUE) |>
  as.data.frame()
#>       .id ABBOTT.LABORATORIES ALLSTATE.ORD.SHS
#> 1 2012-11        0.0003235766     0.0002465483

^{创建于 2024 年 11 月 30 日，使用 reprex v2.1.1}

数据

data_effective1 <-
  structure(list(
    data = structure(c(15667, 15670, 15671, 15672, 15673, 15674), class = "Date"), 
    `ABBOTT LABORATORIES` = c(30.8472, 30.7945, 30.651, 30.895, 31.139, 31.1008), 
    `ALLSTATE ORD SHS` = c(40.74, 40.45, 40.42, 40.63, 40.64, 40.48)
  )) |> as.data.frame()

R 中时间序列的循环和分组

问题描述投票：0回答：1

1个回答

数据

最新问题

R 中时间序列的循环和分组

问题描述 投票：0回答：1

1个回答

数据

最新问题

问题描述投票：0回答：1