在一个文件夹(
path = "D:/DataLogs/
)中,我有几个子文件夹。在这些子文件夹中,我想检索仅以“QCLog”开头的所有 csv 并将它们合并 (rbind
) 到单个 data.frame 中(所有这些 csv 具有相同的标题和结构),同时创建第一个新列包括这些 QCLog csv 的全名。像下面这样的东西可能会起作用。未经测试。
# function to unzip the csv files
# first get the csv filenames, then extract them to a temp directory
# return one data.frame only
read_csv_in_zip_file <- function(filename, tmpdir) {
csv_files <- unzip(filename, list = TRUE)[["Name"]]
i <- grep("QCLog.*\\.csv", csv_files)
fls <- unzip(filename, files = csv_files[i], exdir = tmpdir)
df_list <- lapply(fls, read.csv)
res <- Map(\(x, f) {
x$filename <- basename(f)
x
}, df_list, fls)
res <- do.call(rbind, res)
row.names(res) <- NULL
res
}
# where to put the unziped files
tmp_dir <- tempdir()
path <- "D:/DataLogs"
pattern <- "QCLog.*\\.csv|\\.zip"
fls <- list.files(path = path, pattern = pattern, full.names = TRUE, recursive = TRUE)
df_list <- lapply(fls, \(f) {
if(grep("\\.zip", f)) {
read_csv_in_zip_file(f, tmp_dir)
} else {
res <- read.csv(f)
res$filename <- f
res
}
})
# one data.frame only
df_all <- do.call(rbind, df_list)
# final clean up
unlink(tmp_dir, recursive = TRUE)
# rm(df_list)