我们有一个 Athena 表,其中有一列包含 JSON 值。 Athena 中主列(包含 JSON 值)的数据类型是字符串数据类型。
Athena 表的 DDL 如下:
CREATE EXTERNAL TABLE 'table1'(
'id' bigint,
'requestbody' string //this column contains json values
)
此外,这个“requestbody”列包含大约 5000 个 JSON 字段。而且,它里面还有嵌套数组。
例如,
{
"flow": {
"req": {
"application": {
"applicants": [
{
"id": 191915,
"applicationID": 189176,
"applicantIndex": null,
"sequenceNumber": 1,
"relationType": 1,
"relationCode": null,
"customerType": 1,
"prefix": "MR",
"incomeData": [
{
"name": "Applicant Income",
"description": "Executive Communication",
"submittedIncome": 4000,
"adjustedIncome": 4000,
"isAdjusted": false,
"adjustedBy": null
},
{
"name": "Applicant Other Income",
"description": "Car Allowance Car Allowance",
"submittedIncome": 100,
"adjustedIncome": 100,
"isAdjusted": false,
"adjustedBy": null,
"adjustedDate": null,
"comment": null,
"customerAppIncomeOrder": 1,
"combine": true
}
]
}
]
}
}
}
}
我尝试使用下面的代码来阅读此内容:
with A as (
SELECT
CAST(json_extract(requestbody, '$.flow.req.application.applicants')as ARRAY(MAP(VARCHAR,VARCHAR))) applicants
FROM
table1
)
, b AS (
SELECT
incomedata1['name'] name
FROM
A
CROSS JOIN UNNEST (applicants) t (applicant)
CROSS JOIN UNNEST (applicant.incomedata) t (incomedata1)
)
select name from b;
但是我收到错误:
line 14:23: Expression applicant is not of type ROW
问题在于
的选角声明CAST(json_extract(requestbody, '$.flow.req.application.applicants')as ARRAY(MAP(VARCHAR,VARCHAR))) applicants
无效。您提供的数据显示并非所有键的所有值都是 varchar。您将需要使用 ROW 显式转换。以下应该有效:
A as (
SELECT
CAST(json_extract(requestbody, '$.flow.req.application.applicants') as ARRAY<ROW(
id BIGINT,
applicationID BIGINT,
applicantIndex BIGINT,
sequenceNumber BIGINT,
relationType BIGINT,
relationCode BIGINT,
customerType BIGINT,
prefix VARCHAR,
incomeData ARRAY<ROW(
name VARCHAR,
description VARCHAR,
submittedIncome BIGINT,
adjustedIncome BIGINT,
isAdjusted BOOLEAN,
adjustedBy VARCHAR,
adjustedDate VARCHAR,
comment VARCHAR,
customerAppIncomeOrder VARCHAR,
combine BOOLEAN
)>
)>) applicants
FROM table1
)
, b AS (
SELECT
incomedata1.name name
FROM
A
CROSS JOIN UNNEST (applicants) t (applicant)
CROSS JOIN UNNEST (applicant.incomedata) t (incomedata1)
)
select name from b;