我正在使用雪花查询根据表列中的输入字符串将数据拆分为多个列。我没有得到预期的输出。需要一些帮助:
我们只说输入是:
Activity type [DP - mcr modifyand quac endo; bio fert]; PharmSon BT acticity code [AYx765]
输出应该是:
Activity type in column1.
DP - mcr modifyand quac endo; bio fert in column2
PharmSon BT acticity code in column1
AYx765 in column2
相反,我得到的输出为:
Activity type in column1-- column2 is null
bio fert] in column1--column2 is null
PharmSon BT acticity code in column1--AYx765 in column2
WITH parsed_data AS (
-- Split FILATT data by semicolons and flatten the array into rows
SELECT
INTRNL AS EQ_INTRNL,
'DEV' AS ENVIRONMENT,
FILATT AS raw_data,
SPLIT(FILATT, ';') AS data_array -- Split the data by ';'
FROM table1
),
exploded_data AS (
-- Flatten the array so each item is in a separate row
SELECT
EQ_INTRNL,
ENVIRONMENT,
raw_data,
TRIM(value) AS part, -- Each value is now in 'part'
ROW_NUMBER() OVER (PARTITION BY EQ_INTRNL ORDER BY CURRENT_TIMESTAMP()) AS column1 -- Assign a sequential number, reset for each EQ_INTRNL
FROM parsed_data,
LATERAL FLATTEN(input => data_array)
),
extracted_columns AS (
SELECT
EQ_INTRNL,
ENVIRONMENT,
column1, -- The sequential number for the row
-- Extract the identifier part (before '[')
REGEXP_SUBSTR(part, '^[^\\[]+') AS column2,
-- Extract the content inside brackets, excluding the brackets themselves
REGEXP_SUBSTR(part, '\\[([^\\]]+)\\]', 1, 1, 'e') AS column3
FROM exploded_data
)
SELECT
EQ_INTRNL,
column2,
column3,
ENVIRONMENT,
column1
FROM extracted_columns
WHERE column3 !=''
ORDER BY EQ_INTRNL, column1;
这应该可以使用 UDTF - 您可以使用 Javascript 或基于 Python
这对于多行 1+ 来说应该足够动态
create or replace function dev._neeru.split_data(data varchar)
returns table (data_part varchar, col1 varchar, col2 varchar)
language python
RUNTIME_VERSION = 3.9
handler='SplitData'
as $$
import re
class SplitData:
def process(self, data):
data_parts = re.findall(r'.*?\];?', data)
for data_part in data_parts:
pattern = re.compile(r'(.*?)\[(.*?)\]')
cols = pattern.findall(data_part)
yield (data_part, cols[0][0], cols[0][1])
$$;
with details as (
select 'Activity type [DP - mcr modifyand quac endo; bio fert]; PharmSon BT acticity code [AYx765]' as raw_data
union all
select 'Name [John]; Age [15]' as raw_data
union all
select 'Name [Jack]' as raw_data
union all
select 'Name [Jack]; Activity type [DP - mcr modifyand quac endo; bio fert]; Age [15]' as raw_data
)
select
raw_data,
trim(col1) as col1,
trim(col2) as col2
from
details,
table ( dev._neeru.split_data(details.raw_data) );
+------------------------------------------------------------------------------------------+-------------------------+--------------------------------------+
|RAW_DATA |COL1 |COL2 |
+------------------------------------------------------------------------------------------+-------------------------+--------------------------------------+
|Activity type [DP - mcr modifyand quac endo; bio fert]; PharmSon BT acticity code [AYx765]|Activity type |DP - mcr modifyand quac endo; bio fert|
|Activity type [DP - mcr modifyand quac endo; bio fert]; PharmSon BT acticity code [AYx765]|PharmSon BT acticity code|AYx765 |
|Name [John]; Age [15] |Name |John |
|Name [John]; Age [15] |Age |15 |
|Name [Jack] |Name |Jack |
|Name [Jack]; Activity type [DP - mcr modifyand quac endo; bio fert]; Age [15] |Name |Jack |
|Name [Jack]; Activity type [DP - mcr modifyand quac endo; bio fert]; Age [15] |Activity type |DP - mcr modifyand quac endo; bio fert|
|Name [Jack]; Activity type [DP - mcr modifyand quac endo; bio fert]; Age [15] |Age |15 |
+------------------------------------------------------------------------------------------+-------------------------+--------------------------------------+