目标是提取最新观察的度量值。对于提供的数据:
CREATE OR REPLACE TABLE tab (id INT, col VARIANT)
AS
SELECT 1, [{'date':'2024-01-01', 'metric':1}, {'date':'2024-01-02', 'metric':2}] UNION ALL
SELECT 2, [{'date':'2024-01-12', 'metric':3}, {'date':'2024-01-11', 'metric':4}] UNION ALL
SELECT 3, [{'date':'2024-01-05', 'metric':5}, {'date':'2024-01-04', 'metric':6}, {'date':'2024-01-07', 'metric':7}];
所需输出:
有效地寻找一种方法来对对象数组执行相当于“MIN_BY/MAX_BY”操作的方法,但不首先将其展平。
WITH cte AS (
SELECT tab.id, tab.col,s.value:metric::INT AS metric, s.value:date::DATE AS date
FROM tab
,TABLE(FLATTEN(tab.col)) AS s(val)
)
SELECT id, col, MAX_BY(metric, date) AS col_newest_metric
FROM cte
GROUP BY id, col
ORDER BY id;
使用 REDUCE 函数、OBJECT 累加器和条件逻辑:
SELECT id, col,
REDUCE(col,
{'date':'0000-01-01'::DATE, 'metric':NULL},
(acc, x) -> iff(x:date > acc:date, {'date':x:date, 'metric':x:metric}, acc)
):metric::INT AS col_newest_metric
FROM tab;
输出: