[我正在尝试按照本博客文章(http://sqldatamine.blogspot.com/2013/12/true-multiple-regression-using-sql.html)中引用的结构在Snowflake中构建多元回归模型,但是我正在努力使其适应Snowflake SQL结构,尤其是Javascript中的存储过程。
这里是我要复制的博客文章的一部分:
declare @p int
set @p = 1
while @p <= (select max(xn) from #x)
begin
insert into #c
select xn cxn, zn czn, sum(xv*zv)/sum(zv*zv) cv
from #x join #z on xid = zid where zn = @p-1 and xn>zn group by xn, zn
insert into #z
select zid, xn,xv- sum(cv*zv)
from #x join #z on xid = zid join #c on czn = zn and cxn = xn where xn = @p and zn<xn group by zid, xn,xv
set @p = @p +1
end
这是我的尝试:
CREATE TEMP TABLE TEST_TABLE (ID int, AREA float, ROOMS float, ODD float, PRICE float);
INSERT INTO TEST_TABLE SELECT 1, 2202, 3, 1, 400;
INSERT INTO TEST_TABLE SELECT 2, 1600, 3, 0, 330;
INSERT INTO TEST_TABLE SELECT 3, 2400, 3, 1, 369;
INSERT INTO TEST_TABLE SELECT 4, 1416, 2, 1, 232;
INSERT INTO TEST_TABLE SELECT 5, 3000, 4, 0, 540;
--INDEPENDENT VARIABLE VECTOR--
CREATE TEMP TABLE X_VAR AS
SELECT ID xid, 0 xn, 1 xv FROM TEST_TABLE
UNION ALL
SELECT ID, 1, ROOMS FROM TEST_TABLE
UNION ALL
SELECT ID, 2, AREA FROM TEST_TABLE
UNION ALL
SELECT ID, 3, ODD FROM TEST_TABLE;
--DEPENDANT VARIABLE VECTOR--
CREATE TEMP TABLE Y_VAR AS
SELECT ID yid, 0 yn, PRICE yv FROM TEST_TABLE;
--ORTHOGONAL PROCESSED VALUES--
CREATE TEMP TABLE Z_VAR (zid int, zn int, zv float);
INSERT INTO Z_VAR SELECT ID, 0 zn, 1 zv FROM TEST_TABLE;
--ORTHOGONALIZATION COEFFICIENTS--
CREATE TEMP TABLE C_VAR (cxn int, czn int, cv float);
INSERT INTO C_VAR SELECT ID, 0 zn, 1 zv from TEST_TABLE;
--REGRESSION COEFFICIENTS--
CREATE TEMP TABLE B_VAR (bn int, bv float);
--FIRST LOOP: ORTHOGONALIZATION COEFFICIENT CALC--
CREATE OR REPLACE PROCEDURE ORTH()
RETURNS FLOAT NOT NULL
LANGUAGE JAVASCRIPT
AS
$$
var sql_counter =
`SELECT MAX(XN) FROM X_VAR`;
var sql_bulk =
`INSERT INTO C_VAR
SELECT XN CXN, ZN CZN, SUM(XV*ZV)/SUM(ZV*ZV) CV
FROM X_VAR
JOIN Z_VAR ON XID = ZID
WHERE ZN = p-1
AND XN > ZN
GROUP BY XN, ZN
INSERT INTO Z_VAR
SELECT ZID, XN, XV-SUM(CV*ZV)
FROM X_VAR
JOIN Z_VAR ON XID = ZID
JOIN C_VAR ON CZN = ZN AND CXN = XN
WHERE
1=1
AND XN = P
AND ZN < XN
GROUP BY ZID, XN, XV`;
var p = 1;
while (p <= snowflake.execute(sql_counter)) {
snowflake.execute ({sqlText: sql_bulk})
p = p + 1
}
$$
;
CALL ORTH();
SELECT * FROM C_VAR;
我一直在雪花执行行中遇到一个空的参数错误。我在做什么错?
这可以运行,但是我不确定它要做什么。问题出在您的行上:
while (p <= snowflake.execute(sql_counter)) {
问题是查询未正确执行。您必须向其发送一个语句对象,即下一行中的{sqlText:sql_bulk}。
--FIRST LOOP: ORTHOGONALIZATION COEFFICIENT CALC--
CREATE OR REPLACE PROCEDURE ORTH()
RETURNS FLOAT NOT NULL
LANGUAGE JAVASCRIPT
AS
$$
var sql_counter =
`SELECT MAX(XN) as C FROM X_VAR`;
var sql_bulk =
`INSERT INTO C_VAR
SELECT XN CXN, ZN CZN, SUM(XV*ZV)/SUM(ZV*ZV) CV
FROM X_VAR
JOIN Z_VAR ON XID = ZID
WHERE ZN = p-1
AND XN > ZN
GROUP BY XN, ZN
INSERT INTO Z_VAR
SELECT ZID, XN, XV-SUM(CV*ZV)
FROM X_VAR
JOIN Z_VAR ON XID = ZID
JOIN C_VAR ON CZN = ZN AND CXN = XN
WHERE
1=1
AND XN = P
AND ZN < XN
GROUP BY ZID, XN, XV`;
var p = 1;
while (p <= ExecuteSingleValueQuery('C', sql_counter)) {
snowflake.execute ({sqlText: sql_bulk})
p = p + 1
}
// Added a helper function
function ExecuteSingleValueQuery(columnName, queryString) {
var out;
cmd1 = {sqlText: queryString};
stmt = snowflake.createStatement(cmd1);
var rs;
rs = stmt.execute();
rs.next();
return rs.getColumnValue(columnName);
$$
;
CALL ORTH();
SELECT * FROM C_VAR;
SELECT MAX(XN) FROM X_VAR