我需要跑步:
select arrayagg(o_clerk)
within group (order by o_orderkey desc)
OVER (PARTITION BY o_orderkey order by o_orderkey
ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS RESULT
from sample_data
但是 Snowflake 返回错误
Sliding window frame unsupported for function ARRAYAGG
。如果我尝试在没有滑动窗口的情况下累积所有内容,则会收到错误Cumulative window frame unsupported for function ARRAY_AGG
。
我怎样才能实现这个目标?
样本数据:
create or replace table sample_data as (
with data as (
select 1 a, [1,3,2,4,7,8,10] b
union all select 2, [1,3,2,4,7,8,10]
)
select 'Ord'||a o_orderkey, 'c'||value o_clerk, index
from data, table(flatten(b))
)
;
想要的结果:
(源,用于 BigQuery 迁移)
谢谢@Felipe Hoffa,这是我最初发布的解决方案:
select o_orderkey,
array_compact([
lag(o_clerk, 3) over(partition by o_orderkey order by index)
, lag(o_clerk, 2) over(partition by o_orderkey order by index)
, lag(o_clerk, 1) over(partition by o_orderkey order by index)
, o_clerk
])
from sample_data
使用
ARRAY_AGG
和 ARRAY_SLICE
。受到Rajat的回答的启发
SELECT *
,IFF(ROW_NUMBER() OVER(PARTITION BY o_orderkey ORDER BY INDEX) <= 4, 0,
ROW_NUMBER() OVER(PARTITION BY o_orderkey ORDER BY INDEX)-4) AS start_index
,IFF(ROW_NUMBER() OVER(PARTITION BY o_orderkey ORDER BY INDEX) <= 4,
ROW_NUMBER() OVER(PARTITION BY o_orderkey ORDER BY INDEX),4) AS num_elem
,ARRAY_SLICE(
ARRAY_AGG(o_clerk) WITHIN GROUP (ORDER BY INDEX)
OVER(PARTITION BY o_orderkey)
,start_index
,start_index + num_elem)
FROM sample_data
ORDER BY O_ORDERKEY, INDEX;
输出:
start_index 和 num_elem 计算可以进一步简化/推广以模拟窗口滑动框架
ROWS BETWEEN PRECEDING prec AND FOLLOWING foll
。
SELECT *
,ROW_NUMBER() OVER(PARTITION BY o_orderkey ORDER BY INDEX) AS rn
,3 AS prec
,0 AS foll
,ARRAY_SLICE(
ARRAY_AGG(o_clerk) WITHIN GROUP (ORDER BY INDEX)
OVER(PARTITION BY o_orderkey)
,IFF(rn <= prec+1, 0, rn-(prec+1))
,IFF(rn <= prec+1, 0, rn-(prec+1)) + IFF(rn <= prec+1, rn+foll,prec+1+foll)
)
FROM sample_data
ORDER BY O_ORDERKEY, INDEX;
显示 ARRAY_SLICE 的每个参数以进行调试:
由于我们无法通过
ARRAY_AGG()
获得这些结果,我们可以通过自连接。
首先我们给每一行一个行号,然后我们选择其编号和+3之间的所有内容:
with numbered as (
select o_orderkey, o_clerk, index
from sample_data
), crossed as (
select a.o_orderkey, a.index ai, b.index bi, b.o_clerk
from numbered a
join numbered b
on a.o_orderkey = b.o_orderkey
and a.index between b.index and b.index+3
)
select o_orderkey, array_agg(o_clerk) within group (order by bi)
from crossed
group by o_orderkey, ai
order by o_orderkey, max(bi)
请注意,对于最初的问题,我必须添加一个
index
字段 - 这样我们就可以在行内有一个明确的排序顺序。
从9.1.0版本开始原生支持ARRAY_AGG窗口框架:
SELECT *,ARRAY_AGG(o_clerk) OVER (PARTITION BY o_orderkey order by o_orderkey
ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) RESULT
FROM sample_data;
输出: