我的数据是这样的
Col_X | Col_Y |
---|---|
a | b |
b | c |
c | d |
d | e |
f | g |
t | y |
y | r |
q | w |
n | 米 |
米 | k |
米 | z |
我的最终目标簇表应该是这样的:
集群 |
---|
abcde |
fg |
nmkz |
qw |
蒂尔 |
我的伪数据
select 'a' as x, 'b' as y union all
select 'b' as x, 'c' as y union all
select 'c' as x, 'd' as y union all
select 'd' as x, 'e' as y union all
select 'f' as x, 'g' as y union all
select 't' as x, 'y' as y union all
select 'y' as x, 'r' as y union all
select 'q' as x, 'w' as y union all
select 'n' as x, 'm' as y union all
select 'm' as x, 'k' as y union all
select 'm' as x, 'z' as y
通过一个小准备来递归收集所有兄弟,请注意,如果没有排序列,这部分可能从一个执行到另一个执行是随机的:
with data(x, y) as (
select 'a' as x, 'b' as y union all
select 'b' as x, 'c' as y union all
select 'c' as x, 'd' as y union all
select 'd' as x, 'e' as y union all
select 'f' as x, 'g' as y union all
select 't' as x, 'y' as y union all
select 'y' as x, 'r' as y union all
select 'q' as x, 'w' as y union all
select 'n' as x, 'm' as y union all
select 'm' as x, 'k' as y union all
select 'm' as x, 'z' as y
),
adata(x, y) as (
select x, listagg(y, '') as y from data d group by x
)
,cte(l, x,y,c,r) as (
select 1, x, y, x, x
from adata d
where not exists(select 1 from adata d1 where d1.y = d.x)
union all
select c.l + 1, d.x, d.y, c.c || d.x, c.r
from cte c
join adata d on d.x = c.y
)
select c from (
select r, c || y as c, row_number() over(partition by r order by length(c || y) desc) as rn
from cte
)
where rn = 1
;
abcde
nmkz
qw
fg
tyr