我想计算两个不同数据集中两点之间的距离。我不想计算所有点之间的距离 - 只是到数据集B的最近点。 一些例子:
数据集A - 人员 http://pastebin.com/HbaeqACi
数据集B - Waterfeatures: http://pastebin.com/UdDvNtHs
数据集C - 城市: http://pastebin.com/nATnkMRk
所以...我想计算每个人到最近的水上特征点的距离。 我已经尝试过使用rgeos软件包,在遇到一些预测错误之后,我已经开始工作了。但是,这计算(至少我假设)到每个点的所有距离,但是,正如已经说过的,我只对距离最近的水特征点的距离感兴趣。
# load csv files
persons = read.csv("persons.csv", header = TRUE)
water = read.csv("water.csv", header = TRUE)
# change dataframes to SpatialPointDataFrame and assign a projection
library(sp)
library(rgeos)
coordinates(persons) <- c("POINT_X", "POINT_Y")
proj4string(persons) <- CRS("+proj=utm +datum=WGS84")
coordinates(water) <- c("POINT_X", "POINT_Y")
proj4string(water) <- CRS("+proj=utm +datum=WGS84")
# use rgoes package to calculate the distance
distance <- gDistance(persons, water, byid=TRUE)
# works, but calculates a huge number of distances
有没有参考,我错过了。或者我需要使用其他包或功能吗?我还查看了spatstat,它能够计算到最近邻居的距离,但不能计算两个不同数据集的距离:http://hosho.ees.hokudai.ac.jp/~kubo/Rdoc/library/spatstat/html/nndist.html
编辑: 完整的R脚本,包括数据集的绘图:
library(RgoogleMaps)
library(ggplot2)
library(ggmap)
library(sp)
library(fossil)
#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)
city = read.csv("city.csv", header =TRUE)
# plot data
persons_ggplot2 <- persons
city_ggplot2 <- city
water_ggplot2 <- water
gc <- geocode('new york, usa')
center <- as.numeric(gc)
G <- ggmap(get_googlemap(center = center, color = 'bw', scale = 1, zoom = 11, maptype = "terrain", frame=T), extent="device")
G1 <- G + geom_point(aes(x=POINT_X, y=POINT_Y ),data=city, shape = 22, color="black", fill = "yellow", size = 4) + geom_point(aes(x=POINT_X, y=POINT_Y ),data=persons, shape = 8, color="red", size=2.5) + geom_point(aes(x=POINT_X, y=POINT_Y ),data=water_ggplot2, color="blue", size=1)
plot(G1)
#### calculate distance
# Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)
# Generate a function that looks for the closest waterfeature for each id coordinates
NearestW <- function(id){
tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
WaterFeatures <- rbind(tmp,water[,2:3])
tmp1 <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
tmp1 <- which.min(tmp1)
tmp1 <- water[tmp1,1]
tmp1 <- data.frame(tmp1, WaterFeature=tmp)
return(tmp1)
}
#apply to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))
如何为每个人编写寻找最近水景的功能呢?
#requires function earth.dist from "fossil" package
require(fossil)
#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)
#Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)
#Generate a function that looks for the closest waterfeature for each id coordinates
NearestW <- function(id){
tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
WaterFeatures <- rbind(tmp,water[,2:3])
tmp1 <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
tmp1 <- min(tmp1)
tmp1 <- data.frame(tmp1, WaterFeature=tmp)
return(tmp1)
}
#apply to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))
注意:我在原始的read.csv中添加了一个stringsAsFactors参数,它最终使合并更容易
注意:列tmp1记录最近水景的METERS数
也许我有点太晚了,但你可以使用spatstat
来计算两个不同数据集之间的距离。命令是nncross
。您必须使用的参数是ppp
类型的两个对象,您可以使用as.ppp()
函数创建它们。