查找sql中相同类型值之间的差距

问题描述 投票:0回答:2

当同一类型的承诺之间存在差距时,事实应包含多个记录,显示每个连续承诺的准确开始和结束日期。 一个例子是 patid 1001,当相同状态连续存在而没有间隙时,它应该是单个记录。


CREATE TABLE #legal_data (
    ClaimID VARCHAR(20)
    ,dim_legalstat_key int -- dimensionkey
    ,[order_start_date] DATE
    ,[order_end_date] DATE
    ,[days_committed]  int -- days between order_start_date & order_end_date
)

INSERT INTO #legal_data
VALUES
    ('1001','11','2022-05-11','2022-10-29','171')
    ,('1001','131','2022-07-15','2023-03-19','247')
    ,('1001','116','2023-03-14','2023-03-20','6')
    ,('1001','11','2023-03-20','2023-03-23','3')
    ,('1207','11','2022-09-13','2023-03-12','180')
    ,('1207','11','2023-03-10','2023-03-23','13')
    ,('1924','2','2021-12-18','2022-06-19','183')
    ,('1924','2','2022-06-19','2023-12-20','184')
    ,('1842','77','2021-02-20','2022-06-17','482')
    ,('1842','77','2022-06-18','2023-12-20','550')
    ,('1661','22','2022-02-14','2023-03-20','399')
    ,('1661','22','2022-02-14','2023-03-23','402')
    ,('1553','4','2022-01-14','2022-02-12','29')
    ,('1553','4','2022-02-14','2023-03-23','402')


----- desired result
CREATE TABLE #legal_Result (
    ClaimID VARCHAR(20)
    ,dim_legalstat_key int-- dimensionkey
    ,[order_start_date] DATE
    ,[order_end_date] DATE
    ,[days_committed]  int --days between order_start_date & order_end_date

)

INSERT INTO #legal_Result
VALUES
    ('1001','11','2022-05-11','2022-10-29','171')
    ,('1001','131','2022-07-15','2023-03-19','247')
    ,('1001','116','2023-03-14','2023-03-20','6')
    ,('1001','11','2023-03-20','2023-03-23','3')
    ,('1207','11','2022-09-13','2023-03-23','191')
    ,('1924','2','2021-12-18','2023-12-20','732')
    ,('1842','77','2021-02-20','2023-12-20','1033') --not working
    ,('1661','22','2022-02-14','2023-03-23','402') ---
    ,('1553','4','2022-01-14','2022-02-12','29') --anything the 
    ,('1553','4','2022-02-14','2023-03-23','402')

select * from #legal_data

select * from #legal_Result
sql database temp-tables gaps-and-islands global-temp-tables
2个回答
2
投票

这是一个差距和岛屿问题。 您已经定义了范围,现在需要对它们进行分组,识别岛屿并执行聚合。

您的预期输出似乎不正确。 ClaimID 1207 的最短日期为 2022-03-10,而不是 2023-09-13。 此外,2022年7月15日至2023年3月29日之间有257天。 假设这些是您的问题中的缺陷,那么这段代码应该适合您。

回复评论

如果您的范围尚未完全整合(ValNik 指出的一种可能性),您可以在组 CTE 之前使用此 CTE 来完成此操作:

ranges as (
    select ClaimID
    , dim_legalstat_key
    , MIN(order_start_date) OVER (PARTITION BY ClaimID, dim_legalstat_key ORDER BY order_start_date, order_end_date ROWS BETWEEN 0 FOLLOWING AND UNBOUNDED FOLLOWING) as order_start_date
    , MAX(order_end_date) OVER (PARTITION BY ClaimID, dim_legalstat_key ORDER BY order_start_date, order_end_date ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) as order_end_date
    from #legal_data f
),

那么,当然,组中的表引用必须更改。

编辑结束

;
WITH
Groups as (
    SELECT ClaimID
    , dim_legalstat_key
    , order_start_date
    , order_end_date
    , LAG(order_end_date,1) OVER (partition by ClaimID, dim_legalstat_key ORDER BY order_start_date, order_end_date) AS PreviousEndDate
    FROM #legal_data
),
IslandID as (
    SELECT ClaimID
    , dim_legalstat_key
    , order_start_date
    , order_end_date
    , CASE WHEN PreviousEndDate >= order_start_date THEN 0 ELSE 1 END AS IslandStartInd
    , SUM(CASE WHEN PreviousEndDate >= order_start_date THEN 0 ELSE 1 END) OVER ( ORDER BY claimid, dim_legalstat_key, order_start_date, order_end_date) AS IslandId
    FROM Groups
), 
Islands as (
  SELECT ClaimID
  , dim_legalstat_key
  , MIN(order_start_date) AS order_start_date
  , MAX(order_end_date) AS order_end_date
  FROM IslandId
  GROUP BY IslandId
  , ClaimID
  , dim_legalstat_key
)

select *
, datediff(day,order_start_date, order_end_date) as days_committed
from Islands
order by ClaimID
, order_start_date
, order_end_date

1
投票

我查看了@dougp提出的解决方案。它适用于“正常”排序的数据。我认为,问题在于“混乱”的有序数据,如下所示:

period1 ---------------------------
period2              -----
period3                     ---------------------------

以这个问题为例

            ,('2925','5','2022-12-10','2022-12-20','x')
            ,('2925','5','2022-12-15','2022-12-18','x')
            ,('2925','5','2022-12-19','2022-12-29','x')

该行的查询结果

索赔ID dim_legalstat_key 订单开始日期 订单结束日期 承诺天数
2925 5 2022-12-10 2022-12-20 10
2925 5 2022-12-19 2022-12-29 10

预期结果

索赔ID dim_legalstat_key 订单开始日期 订单结束日期 承诺天数
2925 5 2022-12-10 2022-12-29 19

也许数据的排序是“正确”的,不会出现这种情况。

这个任务很有趣。我明白了,我对解决方案的看法与@dougp 的解决方案类似。看到测试数据可能有错误, 我将提出一个递归解决方案供考虑。

with ndata as(
select ClaimID,dim_legalstat_key,order_start_date
  ,max(order_end_date)order_end_date
  ,datediff(d,order_start_date,max(order_end_date))days_committed
  ,row_number()over(partition by ClaimId order by order_start_date) rn 
from #legal_data
group by ClaimID,dim_legalstat_key,order_start_date
)
,r as( --Islands head rows
select rn headrow,rn,1 lvl,ClaimID,dim_legalstat_key
       ,order_start_date,order_end_date,days_committed
       ,cast(rn as varchar(1000)) rowlist
from ndata t1
where not exists
    (
     select * from ndata t2 
     where t2.ClaimId=t1.ClaimId and t2.rn<>t1.rn
       and t2.dim_legalstat_key=t1.dim_legalstat_key
       and t2.order_start_date<=t1.order_start_date 
       and t2.order_end_date>=t1.order_start_date 
     )

union all  --iterate through all possible rows

select r.headrow,t2.rn,r.lvl+1 lvl,r.ClaimID,t2.dim_legalstat_key
       ,case when r.order_start_date<t2.order_start_date then r.order_start_date
        else t2.order_start_date end order_start_date
       ,case when r.order_end_date>t2.order_end_date then r.order_end_date
        else t2.order_end_date end order_end_date
       ,r.days_committed
       ,cast(concat(r.rowlist,',',cast(t2.rn as varchar)) as varchar(1000))rowlist
from r inner join ndata t2 
  on  t2.ClaimId=r.ClaimId  and t2.dim_legalstat_key=r.dim_legalstat_key
    and r.rn<>t2.rn
    and charindex(','+cast(t2.rn as varchar)+',',','+r.rowlist+',')=0
    and t2.order_start_date>=r.order_start_date 
    and t2.order_start_date<=r.order_end_date 
 and lvl<100  
)
select ClaimID,dim_legalstat_key
  ,min(order_start_date) order_start_date
  ,max(order_end_date)order_end_date
  ,datediff(d,min(order_start_date),max(order_end_date)) days_committed
from r
group by ClaimID,dim_legalstat_key,headrow
order by ClaimID,dim_legalstat_key

示例

更新1。

对于串联范围(start_date = end_date = end_date + 1day 的下一天) 我会添加一些检查。 (例如 ClaimId 为 1842 的案例)

第一个 CTE

ndata
将具有相同 start_date 的行合并为 1 行。

递归查询的基础部分

r
从相交或连接的行组中选择第一行。
查询的递归部分
consecutively combines
该组中的所有其他行。

更正查询

with ndata as(
select ClaimID,dim_legalstat_key,order_start_date
  ,max(order_end_date)order_end_date
  ,datediff(d,order_start_date,max(order_end_date))days_committed
  ,row_number()over(partition by ClaimId order by order_start_date) rn 
from #legal_data
group by ClaimID,dim_legalstat_key,order_start_date
)
,r as(
select rn headrow,rn,1 lvl,ClaimID,dim_legalstat_key
       ,order_start_date,order_end_date,days_committed
       ,cast(rn as varchar(1000)) rowlist
from ndata t1
where not exists(
  select * from ndata t2 
  where t2.ClaimId=t1.ClaimId and t2.rn<>t1.rn
    and t2.dim_legalstat_key=t1.dim_legalstat_key
    and 
       ( 
        (  t2.order_start_date<=t1.order_start_date 
            and t2.order_end_date>=t1.order_start_date   
        )
        or
        (   -- concatenated date ranges 
           dateadd(day,1,t2.order_end_date)=t1.order_start_date
        )
       )
  )
union all
select r.headrow,t2.rn,r.lvl+1 lvl,r.ClaimID,t2.dim_legalstat_key
       ,case when r.order_start_date<t2.order_start_date then r.order_start_date
        else t2.order_start_date end order_start_date
       ,case when r.order_end_date>t2.order_end_date then r.order_end_date
        else t2.order_end_date end order_end_date
       ,r.days_committed
       ,cast(concat(r.rowlist,',',cast(t2.rn as varchar)) as varchar(1000))rowlist
from r inner join ndata t2 
  on  t2.ClaimId=r.ClaimId  and t2.dim_legalstat_key=r.dim_legalstat_key
    and r.rn<>t2.rn
    and charindex(','+cast(t2.rn as varchar)+',',','+r.rowlist+',')=0
    and t2.order_start_date>=r.order_start_date
    and t2.order_start_date<=dateadd(day,1,r.order_end_date) -- concatenated date ranges
 and lvl<100  
)
--  select * from r;
select ClaimID,dim_legalstat_key
  ,min(order_start_date) order_start_date
  ,max(order_end_date)order_end_date
  ,datediff(d,min(order_start_date),max(order_end_date)) days_committed
from r
group by ClaimID,dim_legalstat_key,headrow
order by ClaimID,dim_legalstat_key

示例在这里

© www.soinside.com 2019 - 2024. All rights reserved.