我有一个由客户事件组成的表。这些事件具有包含密钥和值的数组。
以下模拟表说明了这些事件的外观:WITH
CTE_TEST_DATA AS (
SELECT
'XXX' AS user
,'2025-01-01' AS event_date
,'store_1' AS store
,'london' AS city
,[
STRUCT('type' AS key, NULL AS int_value, NULL as float_value, 'online' AS string_value),
STRUCT('ware', NULL, NULL, 'banana'),
STRUCT('amount', 2, NULL, NULL),
STRUCT('price', NULL, 0.5, NULL)
] AS purchases
UNION ALL
SELECT
'XXX' AS user
,'2025-01-01' AS event_date
,'store_1' AS store
,'london' AS city
,[
STRUCT('type', NULL, NULL, 'online'),
STRUCT('ware', NULL, NULL, 'apple'),
STRUCT('amount', 3, NULL, NULL),
STRUCT('price', NULL, 0.6, NULL)
] AS purchases
UNION ALL
SELECT
'XXX' AS user
,'2025-01-01' AS event_date
,'store_1' AS store
,'london' AS city
,[
STRUCT('type', NULL, NULL, 'online'),
STRUCT('ware', NULL, NULL, 'banana'),
STRUCT('amount', 3, NULL, NULL),
STRUCT('price', NULL, 0.5, NULL)
] AS purchases
UNION ALL
SELECT
'XXX' AS user
,'2025-01-02' AS event_date
,'store_1' AS store
,'london' AS city
,[
STRUCT('type', NULL, NULL, 'online'),
STRUCT('ware', NULL, NULL, 'bread'),
STRUCT('amount', 1, NULL, NULL),
STRUCT('price', NULL, 1.0, NULL)
] AS purchases
UNION ALL
SELECT
'YYY' AS user
,'2025-01-01' AS event_date
,'store_2' AS store
,'sydney' AS city
,[
STRUCT('type', NULL, NULL, 'physical'),
STRUCT('ware', NULL, NULL, 'milk'),
STRUCT('amount', 1, NULL, NULL),
STRUCT('price', NULL, 1.5, NULL)
] AS purchases
)
实际客户事件表非常大,因此已经创建了每天的聚合查询,以使分析和报告成本降低。
本每日聚合查询每次用户每次用户的汇总结果_date:
SELECT
CTD.user
,CTD.event_date
,STRUCT(COUNT(*) as amount) AS events_per_user
,*𝑜𝑡ℎ𝑒𝑟 𝑎𝑔𝑔𝑟𝑒𝑔𝑎𝑡𝑒𝑠*
FROM
CTE_TEST_DATA AS CTD
GROUP BY
CTD.user
,CTD.event_date
当新指标添加到客户事件表中时,必须对每日聚合查询进行修改以包括这些查询。由于此设置的问题,我在添加汇总列时有一些约束,主要的是必须使用相关的子查询专门添加新指标,并且不能使用常见的表格表达式。
我试图实现的结果是以下内容,其中数组仅包含不同的商品名称:
total.amount | xxx | ||||
---|---|---|---|---|---|
bana | 2 | 5 | |||
apple | 1 | 3 | Yyy | 2025-01-01 | |
米尔克 | |||||
不幸的是,我最接近实现该结果的是使用以下查询: | SELECT
CTD.user
,CTD.event_date
,STRUCT(COUNT(*) as amount) AS events_per_user
,ARRAY_AGG((
SELECT AS STRUCT
P.string_value AS name
,COUNT(P.string_value) AS distinct_orders
FROM
UNNEST(CTD.purchases) AS P
WHERE 1=1
AND P.key = 'ware'
GROUP BY
P.string_value
)) AS ware
,ARRAY_AGG((
SELECT AS STRUCT
SUM(P.int_value) AS amount
FROM
UNNEST(CTD.purchases) AS P
WHERE 1=1
AND P.key = 'amount'
GROUP BY
user
,event_date
)) AS total
FROM
CTE_TEST_DATA AS CTD
GROUP BY
CTD.user
,CTD.event_date
|
用户event_date
events_per_user.amount
WITH
CTE_TEST_DATA AS (...),
exploded_data AS (
-- Decompose the structure into separate lines
SELECT
user,
event_date,
store,
city,
(
SELECT AS STRUCT *
FROM UNNEST(purchases) AS p
ORDER BY p.key DESC LIMIT 1
) AS ware,
(
SELECT AS STRUCT *
FROM UNNEST(purchases) AS p
ORDER BY p.key DESC LIMIT 1 OFFSET 1
) AS type,
(
SELECT AS STRUCT *
FROM UNNEST(purchases) AS p
ORDER BY p.key DESC LIMIT 1 OFFSET 2
) AS price,
(
SELECT AS STRUCT *
FROM UNNEST(purchases) AS p
ORDER BY p.key DESC LIMIT 1 OFFSET 3
) AS amount
FROM
CTE_TEST_DATA
),
aggregated_fruit_lvl AS (
-- Aggregate data on user/day/fruit level
SELECT
user,
event_date,
ware.string_value,
COUNT(ware.string_value) AS distinct_count,
SUM(amount.int_value) AS sum_count
FROM
exploded_data
GROUP BY
user,
event_date,
ware.string_value
),
aggregated_day_lvl AS (
-- Calculate the number of transactions per day by user
SELECT
user,
event_date,
COUNT(1) AS total
FROM
exploded_data
GROUP BY
user,
event_date
),
aggregated_fruit_lvl_with_struct AS (
-- Create the structure: fruit / count / sum purchase
SELECT
user,
event_date,
ARRAY_AGG(
STRUCT(string_value, distinct_count, sum_count)
) AS name_number_list
FROM
aggregated_fruit_lvl
GROUP BY
user,
event_date
)
SELECT
aflws.user,
aflws.event_date,
adl.total,
aflws.name_number_list
FROM
aggregated_fruit_lvl_with_struct aflws
JOIN
aggregated_day_lvl adl ON aflws.user = adl.user
AND aflws.event_date = adl.event_date;
total.amount | xxx | 2025-01-01 | 3 | ||
---|---|---|---|---|---|
2 | apple | ||||
3 | bana | ||||
3 | Yyy | 2025-01-01 | 1 | 米尔克 | |
不幸的是,我花了数小时的时间研究和尝试不同的方法来实现这一结果,但我不知道是否有可能。 | 为了解决该问题,我首先使用Un -nest将数据归一化以改变现有结构。在此步骤之后,我通过重建所需的结构来执行聚合并重建最终数据: |