我正在创建一个循环来计算面板数据集上名为“有效刻度”的代理,其中行代表日期,列代表公司。数据集的结构如下:
> str(data_effective1)
'data.frame': 3132 obs. of 507 variables:
$ data : Date, format: "2012-11-23" "2012-11-26" ...
$ AMAZON.COM : num 12 12.2 12.2 12.4 12.6 ...
$ ABBOTT LABORATORIES : num 30.8 30.8 30.7 30.9 31.1 ...
$ AES : num 10.1 10.2 10.2 10.4 10.4 ...
> dput(head(data_effective1))
data_effective1 <-
structure(list(
data = structure(c(15667, 15670, 15671, 15672, 15673, 15674), class = "Date"),
`ABBOTT LABORATORIES` = c(30.8472, 30.7945, 30.651, 30.895, 31.139, 31.1008),
`ALLSTATE ORD SHS` = c(40.74, 40.45, 40.42, 40.63, 40.64, 40.48)
)) |> as.data.frame()
目前,我已经创建了一个循环来计算每列的有效刻度,这就是结果:
effective_tick<-numeric(ncol(data_effective1)-1)
for (i in 2 : (ncol(data_effective1))) {
cents<-data_effective1[,2]-floor(data_effective1[,2])
cents<-round(cents*100)
#cluster
cluster <- ifelse(cents %% 100 == 0, 1.00, # Dollar
ifelse(cents %% 25 == 0, 0.25, # Quarter
ifelse(cents %% 10 == 0, 0.10, # Dime
ifelse(cents %% 5 == 0, 0.05, # Nickel
0.01))))#penny
N_j <- table(cluster)
F_j <- N_j / sum(N_j)
# Step 4: Calcolare le probabilità non vincolate
#inizializzazione
U_j <- numeric(length(F_j))
names(U_j) <- names(F_j)
#per il primo cluster
U_j[1] <- 2 * F_j[1]
#per gli altri
if (length(U_j) > 1) {
for (j in 2:(length(U_j) - 1)) {
U_j[j] <- 2 * F_j[j] - F_j[j - 1]
}
U_j[length(U_j)] <- F_j[length(U_j)] - F_j[length(U_j) - 1]
}
# Step 5: Vincolare le probabilità
#inizializzazione
g_j <- numeric(length(U_j))
names(g_j) <- names(U_j)
g_j[1] <- pmin(pmax(U_j[1], 0), 1)
if(length(g_j) > 1) {
for (j in 2:length(U_j)) {
g_j[j] <- pmin(pmax(U_j[j], 0), 1 - sum(g_j[1:(j - 1)]))
}
}
sum(g_j)
# Step 6: Determinare lo spread effettivo per ciascun cluster
s_j <- as.numeric(names(F_j))
# Step 7: Calcolare il prezzo medio
mean_price <- mean(data_effective1[,i])
# Step 8: Calcolare l'Effective Tick
effective_tick[i]<- sum(g_j * s_j) / mean_price
}
现在的问题是我需要每个月迭代这个循环,但我不知道该怎么做。我尝试过各种方法,但到目前为止,我还没有成功。我的想法是:
问题之一是我有很多 NA,但我无法删除它们,因为该数据集代表标准普尔 500 指数,其中有公司进入和退出该指数。删除 NA 将意味着丢失行并在数据中产生间隙。
有人有想法吗?我有点卡住了。
将代码编写为函数,并为每个月组调用它。使用
format
确定月份,仅从日期列中提取年/月组合。data_effective1
更改为 X
。该函数返回一个数据框。by
,另一种使用 split/lapply
。
calc_tick <- function(X) {
effective_tick <- numeric(ncol(X) - 1)
for (i in 2:(ncol(X))) {
cents <- X[,2] - floor(X[,2])
cents <- round(cents*100)
#cluster
cluster <- ifelse(cents %% 100 == 0, 1.00, # Dollar
ifelse(cents %% 25 == 0, 0.25, # Quarter
ifelse(cents %% 10 == 0, 0.10, # Dime
ifelse(cents %% 5 == 0, 0.05, # Nickel
0.01)))) # penny
N_j <- table(cluster)
F_j <- N_j / sum(N_j)
# Step 4: Calcolare le probabilità non vincolate
#inizializzazione
U_j <- numeric(length(F_j))
names(U_j) <- names(F_j)
#per il primo cluster
U_j[1] <- 2 * F_j[1]
#per gli altri
if (length(U_j) > 1) {
for (j in 2:(length(U_j) - 1)) {
U_j[j] <- 2 * F_j[j] - F_j[j - 1]
}
U_j[length(U_j)] <- F_j[length(U_j)] - F_j[length(U_j) - 1]
}
# Step 5: Vincolare le probabilità
#inizializzazione
g_j <- numeric(length(U_j))
names(g_j) <- names(U_j)
g_j[1] <- pmin(pmax(U_j[1], 0), 1)
if(length(g_j) > 1) {
for (j in 2:length(U_j)) {
g_j[j] <- pmin(pmax(U_j[j], 0), 1 - sum(g_j[1:(j - 1)]))
}
}
# sum(g_j)
# Step 6: Determinare lo spread effettivo per ciascun cluster
s_j <- as.numeric(names(F_j))
# Step 7: Calcolare il prezzo medio
mean_price <- mean(data_effective1[,i])
# Step 8: Calcolare l'Effective Tick
effective_tick[i - 1L]<- sum(g_j * s_j) / mean_price
}
effective_tick |> t() |> as.data.frame() |> setNames(names(X)[-1L])
}
Month <- format(data_effective1$data, "%Y-%m")
by(data_effective1, Month, calc_tick) |>
data.table::rbindlist(idcol = TRUE) |>
as.data.frame()
#> .id ABBOTT.LABORATORIES ALLSTATE.ORD.SHS
#> 1 2012-11 0.0003235766 0.0002465483
split(data_effective1, Month) |>
lapply(calc_tick) |>
data.table::rbindlist(idcol = TRUE) |>
as.data.frame()
#> .id ABBOTT.LABORATORIES ALLSTATE.ORD.SHS
#> 1 2012-11 0.0003235766 0.0002465483
创建于 2024 年 11 月 30 日,使用 reprex v2.1.1
data_effective1 <-
structure(list(
data = structure(c(15667, 15670, 15671, 15672, 15673, 15674), class = "Date"),
`ABBOTT LABORATORIES` = c(30.8472, 30.7945, 30.651, 30.895, 31.139, 31.1008),
`ALLSTATE ORD SHS` = c(40.74, 40.45, 40.42, 40.63, 40.64, 40.48)
)) |> as.data.frame()