我正在尝试使用R + MonetDB作为大数据分析堆栈,但是在创建新列并使用我的分析数据填充它时遇到了问题。这是一个玩具示例:
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB.R::MonetDB(), embedded="./test.db")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## insert data into the new column
v1 <- mtcars["mpg"] * pi
dbSendQuery(db, "INSERT INTO mtcars (v1) VALUES (?)", bind.data=v1)
并且错误消息:
Error in .local(conn, statement, ...) :
Unable to execute statement 'INSERT INTO mtcars (v1) VALUES ('c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.23008278...'.
Server says 'ERROR: 49.6371639267187, 61.8893752757189, 47.1238898038469, 67.2300827868216)' to type dbl failed.
' [#conversion of string 'c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.2300827868216, 58.7477826221291, 56.8628270299753, 44.924774946334, 76.6548607475909, 71.6283125018473, 60.318578948924, 55.9203492338983, 51.5221195188726, 54.3495529071034, 47.7522083345649, 32.6725635973338, 32.6725635973338, 46.18141200777, 101.787601976309, 95.5044166691297, 106.499990956694, 67.5442420521806, 48.6946861306418, 47.7522083345649, 41.7831822927443, 60.318578948924, 85.7654794430014, 81.6814089933346, 95.5044166691297,
].
In addition: Warning message:
In if (is.na(value)) statement <- sub("?", "NULL", statement, fixed = TRUE) else if (valueClass %in% :
the condition has length > 1 and only the first element will be used
从这个错误我推测,也许bind.data
不能与MonetDBLite
一起使用?
题:
如何将列添加到MonetDBLite表并使用来自R会话的数据填充它?
首先,上一个语句中的“INSERT”命令不正确。您将需要“更新”声明。
话虽这么说,我提出了一个解决方案,您可以直接从R填充MonetDBLite表:
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
# I added a rownbr to the dataset so it will be easier later
mtcars$rownbr <- 1:nrow(mtcars)
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## insert data into the new column
v1 <- mtcars["mpg"] * pi
for (i in 1:nrow(mtcars)){
myquery <- paste0("UPDATE mtcars SET v1 = ",v1$mpg[i], "where rownbr =",i," ;")
dbSendQuery(db, myquery )
}
通过dbBind
与MonetDBLite进行参数化SQL查询似乎存在问题(请参阅https://github.com/hannesmuehleisen/MonetDBLite-R/issues/16)。下面的代码适用于SQLite:
library(RSQLite)
data(mtcars)
db <- dbConnect(SQLite(), ":memory:")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## do computation with R
mtcars$v1 <- mtcars$mpg * pi
mtcars$row_names <- rownames(mtcars)
update_query <- dbSendQuery(db, 'update mtcars set "v1"=$v1 WHERE row_names=$row_names')
dbBind(update_query, mtcars[, c("v1", "row_names")]) # send the updated data
dbClearResult(update_query) # release the prepared statement
dbReadTable(db, "mtcars")
dbDisconnect(db)
但是,对于MonetDBLite,它会为dbBind步骤生成错误(和警告):
> dbBind(update_query, mtcars[, c("v1", "row_names")]) # send the updated data
Error in vapply(params, function(x) { : values must be length 1,
but FUN(X[[1]]) result is length 32
In addition: Warning message:
In if (is.na(x)) "NULL" else if (is.numeric(x) || is.logical(x)) { :
the condition has length > 1 and only the first element will be used
我想出的一个解决方法是使用glue_data_sql
包中的glue
来“手动”组合查询(无需遍历行):
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
update_query <- glue_data_sql(mtcars, "update mtcars set v1 = {v1} where row_names = {row_names};", .con=db)
lapply(update_query, dbSendQuery, conn=db)
# verify
dbReadTable(db, "mtcars")
dbDisconnect(db)
另一个解决方法是在一个查询中执行insert
(更接近@ Zelazny7的原始尝试):
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbSendQuery(db, "CREATE TABLE mtcars (
row_names VARCHAR(32),
v1 DOUBLE);")
library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
insert_values <- glue_data(mtcars, "('{row_names}', {v1})")
insert_values <- glue_collapse(insert_values, sep=", ", last="")
insert_query <- glue("INSERT INTO mtcars (row_names, v1) VALUES {insert_values}")
dbSendQuery(db, insert_query)
dbReadTable(db, "mtcars")
dbDisconnect(db)