当我开始编写更复杂的代码时,我越来越意识到自己可能养成了一些坏习惯。我的脚本通常可以完成任务,但是我想知道我是否以正确的方式处理事情,因为我的代码经常运行缓慢,在这种情况下R崩溃了。我如何拓宽我的学习范围,从将复杂的任务的简单代码合并到编写自己的函数的过程中。由于这是一个广泛的问题,因此我将提供一个近期任务的示例,其中我担心脚本太笨拙或可能更干净/更快。如果有人可以帮助清理此脚本,或者提供有关如何升级我的代码以执行类似任务的建议,将不胜感激。任何人都可以在这里突出显示可以转换为功能的区域吗?如果是这样,怎么办?是否有编写函数或编写简洁脚本的通用规则?谁能在该脚本中看到任何危险的坏习惯?
示例上下文:
此脚本的目的是遍历轨迹坐标。对于每个位置,我想在指定的错误字段内生成9个随机样本。对于每个位置错误和原始点(总计10个),我希望从源中提取数据。在这种情况下,距形状文件的距离。然后,我想取提取数据的平均值,然后将其重新添加到原始跟踪文件中。
示例数据:
Date_Time longitude latitude
27/10/2011 15:15 -91.98876953 1.671900034
30/10/2011 14:31 -91.91790771 1.955003262
30/10/2011 15:34 -91.91873169 1.961261749
30/10/2011 20:55 -91.86060333 1.996331811
31/10/2011 04:03 -91.67115021 1.929548025
03/11/2011 18:36 -90.67552948 1.850875616
04/11/2011 18:26 -90.65361023 1.799352288
07/11/2011 19:29 -92.13287354 0.755102754
07/11/2011 20:28 -92.13739014 0.783674061
27/12/2011 13:43 -88.16407776 -4.953748703
07/01/2012 18:44 -82.51725006 -5.717019081
07/01/2012 19:30 -82.50763702 -5.706347942
07/01/2012 20:28 -82.50556183 -5.696153641
07/01/2012 21:10 -82.50305176 -5.685819626
08/01/2012 00:27 -82.18003845 -5.623015404
08/01/2012 18:37 -82.17269897 -5.61870575
08/01/2012 19:20 -82.16355133 -5.612465382
此数据表示一个文件,列表中将有许多文件。
示例脚本以完成任务:
#### Packages ####
library(dplyr)
library(geosphere)
library(rgdal)
library(rgeos)
library(truncnorm)
# Load files
dir <- 'C:/Users/Documents/PhD/Chapters/'
sfolder <- paste0(dir, 'Data/Tracks/')
sfiles <- list.files(sfolder , '.csv', recursive = TRUE)
## Load the contours for proximity measurements
# 200
contour2 <- readOGR(paste0(dir, 'QGIS/Base layers/2GEBCO_2020_Contour_200.gpkg'))
# 1000
contour1 <- readOGR(paste0(dir, 'QGIS/Base layers/2GEBCO_2020_Contour_1000.gpkg'))
# Land
land <- readOGR(paste0(dir, 'QGIS/Base layers/GEBCO_2020_Contour_0.gpkg'))
# List of contours to extract
extracts <- c('200','1000', '0')
# Extract proximity data for all tracks
for (o in 1:length(sfiles))
{
# o <- 1
tagType <- dirname(dirname(dirname((sfiles [o])))) # gives 'ARGOS' or 'PSAT'
track <- read.csv(paste0(sfold , sfiles [o]))
ntrack <- nrow(track)
# Create data frame for proximity measures
proximity <- data.frame(matrix(ncol = 3, nrow = ntrack))
# Generate random samples of each point
for(i in 1:nrow(track))
{
# i <- 1
errors <- data.frame(matrix(ncol = 2, nrow = 10))
if(tagType == 'PSAT')
{
meanx <- mean(track$longitude[i]-0.53, track$longitude[i]+0.53)
meany <- mean(track$latitude[i]-1.08, track$latitude[i]+1.08)
errors[,1] <- rnorm(n=10, a=track$longitude[i]-0.53, b=track$longitude[i]+0.53, meanx)
errors[,2] <- rtruncnorm(n=10, a=track$latitude[i]-1.08, b=track$latitude[i]+1.08, meany)
}
if(tagType == 'ARGOS')
{
meanx <- mean(track$longitude[i]-0.12, track$longitude[i]+0.12)
meany <- mean(track$latitude[i]-0.12, track$latitude[i]+0.12)
errors[,1] <- rtruncnorm(n=10, a=track$longitude[i]-0.12, b=track$longitude[i]+0.12, meanx)
errors[,2] <- rtruncnorm(n=10, a=track$latitude[i]-0.12, b=track$latitude[i]+0.12, meany)
}
errors[1,] <- c(track$longitude[i],track$latitude[i])
colnames(errors) <- c('longitude', 'latitude')
errTrack <- SpatialPoints(errors[,c(1,2)])
# Now to get coordinates from contour files
for(a in 1:length(extracts))
{
# a <- 2
extract <- extracts[a]
if(extract == '200')
{ contour <- contour2 }
if(extract == '1000')
{ contour <- contour1 }
if(extract == '0')
{ contour <- land }
n <- length(errTrack) # 10 for 9 random samples + original location
distances <- data.frame(matrix(ncol = 2, nrow = n))
for (e in seq_along(errTrack)) {
distances[e,] <- coordinates(gNearestPoints(errTrack[e], contour))[2,]
}
allDist <- as.data.frame(distances)
colnames(allDist) <- c('longitude', 'latitude')
# Create objects with error lat/long and nearest contour lat/long
p1 <- cbind(errTrack$longitude, errTrack$latitude)
p2 <- cbind(allDist$longitude, allDist$latitude)
# Convert to Great Circle distance
finalDist <- as.data.frame(distHaversine(p1, p2, r=6378137)/1000)
colnames(finalDist) <- 'distance'
finalDist <- finalDist %>%
mutate_if(is.numeric, round, digits = 2)
distValue <- mean(finalDist$distance)
proximity[i,a] <- distValue
} # end for all contour extracts
} # end for each row in track
track$Proximity_land <- proximity$X3
track$Proximity_200m <- proximity$X1
track$Proximity_1000m <- proximity$X2
} # end for all tracks
我知道这可能是一个小众市场,但是如果有人能够提供一些通用的方法来清理具有功能的循环代码,或者有人能够将我引向可能有用的资源,那将不胜感激。同样,如果任何人都可以专门帮助加快/清理此代码,那就太好了! (如果需要,轮廓文件可以是随机多边形,以便复制)。我希望这个问题适合该论坛,如果不能的话,我们深表歉意。
我同意您的评估,可以通过使用某些功能来澄清代码。通过使用函数,您可以将大型复杂的程序分解为可管理的块,可以分别进行推理。
关于程序中的循环,许多人发现映射比循环更清晰。它们本质上就像在循环中那样对元素集合进行迭代,但是不必跟踪索引变量。 purrr软件包提供了出色的地图和其他功能集合。
一些阅读这些主题的好资源,包括https://rstudio-education.github.io/hopr/,https://r4ds.had.co.nz/和https://adv-r.hadley.nz/。
在下面的代码中,我试图将一些代码提取到函数中,以期使控制流程更易于遵循。由于我还没有在实际数据上尝试过该代码,因此如果没有一些修复,它肯定无法工作,但希望它能为您提供一些想法。
calc_errors_psat <- function(long, lat) {
calc_errors(long, lat, 0.53, 1.08)
}
calc_errors_argos <- function(long, lat) {
calc_errors(long, lat, 0.12, 0.12)
}
calc_errTrack <- function(long, lat, long_offset, lat_offset) {
# don't `meanx` and `meany` have the same as value as `long` and `lat`?
meanx <- mean(long - long_offset, long + long_offset)
meany <- mean(lat - lat_offset, lat + lat_offset)
err_long <- rtruncnorm(n=10, a=long-long_offset, b=long+long_offset, meanx)
err_lat <- rtruncnorm(n=10, a=lat-lat_offset, b=lat+lat_offset, meany)
err <- data.frame(
longitude = c(long, err_long),
latitude = c(lat, err_lat)
)
SpatialPoints(err)
}
calc_distValues <- function(errTrack, contour) {
n <- length(errTrack) # 10 for 9 random samples + original location
distances <- data.frame(matrix(ncol = 2, nrow = n))
for (e in seq_along(errTrack)) {
distances[e,] <- coordinates(gNearestPoints(errTrack[e], contour))[2,]
}
allDist <- as.data.frame(distances)
colnames(allDist) <- c('longitude', 'latitude')
# Create objects with error lat/long and nearest contour lat/long
p1 <- cbind(errTrack$longitude, errTrack$latitude)
p2 <- cbind(allDist$longitude, allDist$latitude)
# Convert to Great Circle distance
finalDist <- as.data.frame(distHaversine(p1, p2, r=6378137)/1000)
colnames(finalDist) <- 'distance'
finalDist <- finalDist %>%
mutate_if(is.numeric, round, digits = 2)
mean(finalDist$distance)
}
find_err_fcn <- function(loc) {
tagType <- dirname(dirname(dirname(loc)))
if (tagType == "PSAT") {
calc_errors_psat
} else {
calc_errors_argos
}
}
# get the track file locations
dir <- 'C:/Users/Documents/PhD/Chapters/'
sfolder <- file.path(dir, 'Data/Tracks')
track_locs <- list.files(sfolder, full.names = TRUE)
# read in files and error functions into a data frame, and calculate the track
# errors
track_df <- tibble::tibble(
track_list = purrr::map(track_locs, read.csv),
calc_err_fcns = purrr::map(track_locs, find_err_fcn),
errTrack_list = purrr::map2(track_list, calc_err_fcns, function(x, f) f(x))
)
# calculate the track proximities distances
proximity_contours <- c(
contour2 = readOGR(file.path(dir, 'QGIS/Base layers/2GEBCO_2020_Contour_200.gpkg')),
contour1 = readOGR(file.path(dir, 'QGIS/Base layers/2GEBCO_2020_Contour_1000.gpkg')),
land = readOGR(file.path(dir, 'QGIS/Base layers/GEBCO_2020_Contour_0.gpkg'))
)
track_results <- purrr::map_dfc(
.x = proximity_contours,
.f = function(contour) purrr::map(
.x = track_df$errTrack_list,
.f = calc_distValue,
contour = contour
)
)