查找sql中相同类型值之间的差距

Question

当同一类型的承诺之间存在差距时，事实应包含多个记录，显示每个连续承诺的准确开始和结束日期。一个例子是 patid 1001，当相同状态连续存在而没有间隙时，它应该是单个记录。


CREATE TABLE #legal_data (
    ClaimID VARCHAR(20)
    ,dim_legalstat_key int -- dimensionkey
    ,[order_start_date] DATE
    ,[order_end_date] DATE
    ,[days_committed]  int -- days between order_start_date & order_end_date
)

INSERT INTO #legal_data
VALUES
    ('1001','11','2022-05-11','2022-10-29','171')
    ,('1001','131','2022-07-15','2023-03-19','247')
    ,('1001','116','2023-03-14','2023-03-20','6')
    ,('1001','11','2023-03-20','2023-03-23','3')
    ,('1207','11','2022-09-13','2023-03-12','180')
    ,('1207','11','2023-03-10','2023-03-23','13')
    ,('1924','2','2021-12-18','2022-06-19','183')
    ,('1924','2','2022-06-19','2023-12-20','184')
    ,('1842','77','2021-02-20','2022-06-17','482')
    ,('1842','77','2022-06-18','2023-12-20','550')
    ,('1661','22','2022-02-14','2023-03-20','399')
    ,('1661','22','2022-02-14','2023-03-23','402')
    ,('1553','4','2022-01-14','2022-02-12','29')
    ,('1553','4','2022-02-14','2023-03-23','402')


----- desired result
CREATE TABLE #legal_Result (
    ClaimID VARCHAR(20)
    ,dim_legalstat_key int-- dimensionkey
    ,[order_start_date] DATE
    ,[order_end_date] DATE
    ,[days_committed]  int --days between order_start_date & order_end_date

)

INSERT INTO #legal_Result
VALUES
    ('1001','11','2022-05-11','2022-10-29','171')
    ,('1001','131','2022-07-15','2023-03-19','247')
    ,('1001','116','2023-03-14','2023-03-20','6')
    ,('1001','11','2023-03-20','2023-03-23','3')
    ,('1207','11','2022-09-13','2023-03-23','191')
    ,('1924','2','2021-12-18','2023-12-20','732')
    ,('1842','77','2021-02-20','2023-12-20','1033') --not working
    ,('1661','22','2022-02-14','2023-03-23','402') ---
    ,('1553','4','2022-01-14','2022-02-12','29') --anything the 
    ,('1553','4','2022-02-14','2023-03-23','402')

select * from #legal_data

select * from #legal_Result

Answer 1

这是一个差距和岛屿问题。您已经定义了范围，现在需要对它们进行分组，识别岛屿并执行聚合。

您的预期输出似乎不正确。 ClaimID 1207 的最短日期为 2022-03-10，而不是 2023-09-13。此外，2022年7月15日至2023年3月29日之间有257天。假设这些是您的问题中的缺陷，那么这段代码应该适合您。

回复评论

如果您的范围尚未完全整合（ValNik 指出的一种可能性），您可以在组 CTE 之前使用此 CTE 来完成此操作：

ranges as (
    select ClaimID
    , dim_legalstat_key
    , MIN(order_start_date) OVER (PARTITION BY ClaimID, dim_legalstat_key ORDER BY order_start_date, order_end_date ROWS BETWEEN 0 FOLLOWING AND UNBOUNDED FOLLOWING) as order_start_date
    , MAX(order_end_date) OVER (PARTITION BY ClaimID, dim_legalstat_key ORDER BY order_start_date, order_end_date ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) as order_end_date
    from #legal_data f
),

那么，当然，组中的表引用必须更改。

编辑结束

;
WITH
Groups as (
    SELECT ClaimID
    , dim_legalstat_key
    , order_start_date
    , order_end_date
    , LAG(order_end_date,1) OVER (partition by ClaimID, dim_legalstat_key ORDER BY order_start_date, order_end_date) AS PreviousEndDate
    FROM #legal_data
),
IslandID as (
    SELECT ClaimID
    , dim_legalstat_key
    , order_start_date
    , order_end_date
    , CASE WHEN PreviousEndDate >= order_start_date THEN 0 ELSE 1 END AS IslandStartInd
    , SUM(CASE WHEN PreviousEndDate >= order_start_date THEN 0 ELSE 1 END) OVER ( ORDER BY claimid, dim_legalstat_key, order_start_date, order_end_date) AS IslandId
    FROM Groups
), 
Islands as (
  SELECT ClaimID
  , dim_legalstat_key
  , MIN(order_start_date) AS order_start_date
  , MAX(order_end_date) AS order_end_date
  FROM IslandId
  GROUP BY IslandId
  , ClaimID
  , dim_legalstat_key
)

select *
, datediff(day,order_start_date, order_end_date) as days_committed
from Islands
order by ClaimID
, order_start_date
, order_end_date

Answer 2

我查看了@dougp提出的解决方案。它适用于“正常”排序的数据。我认为，问题在于“混乱”的有序数据，如下所示：

period1 ---------------------------
period2              -----
period3                     ---------------------------

以这个问题为例

            ,('2925','5','2022-12-10','2022-12-20','x')
            ,('2925','5','2022-12-15','2022-12-18','x')
            ,('2925','5','2022-12-19','2022-12-29','x')

该行的查询结果

索赔ID	dim_legalstat_key	订单开始日期	订单结束日期	承诺天数
2925	5	2022-12-10	2022-12-20	10
2925	5	2022-12-19	2022-12-29	10

预期结果

索赔ID	dim_legalstat_key	订单开始日期	订单结束日期	承诺天数
2925	5	2022-12-10	2022-12-29	19

也许数据的排序是“正确”的，不会出现这种情况。

这个任务很有趣。我明白了，我对解决方案的看法与@dougp 的解决方案类似。看到测试数据可能有错误，我将提出一个递归解决方案供考虑。

with ndata as(
select ClaimID,dim_legalstat_key,order_start_date
  ,max(order_end_date)order_end_date
  ,datediff(d,order_start_date,max(order_end_date))days_committed
  ,row_number()over(partition by ClaimId order by order_start_date) rn 
from #legal_data
group by ClaimID,dim_legalstat_key,order_start_date
)
,r as( --Islands head rows
select rn headrow,rn,1 lvl,ClaimID,dim_legalstat_key
       ,order_start_date,order_end_date,days_committed
       ,cast(rn as varchar(1000)) rowlist
from ndata t1
where not exists
    (
     select * from ndata t2 
     where t2.ClaimId=t1.ClaimId and t2.rn<>t1.rn
       and t2.dim_legalstat_key=t1.dim_legalstat_key
       and t2.order_start_date<=t1.order_start_date 
       and t2.order_end_date>=t1.order_start_date 
     )

union all  --iterate through all possible rows

select r.headrow,t2.rn,r.lvl+1 lvl,r.ClaimID,t2.dim_legalstat_key
       ,case when r.order_start_date<t2.order_start_date then r.order_start_date
        else t2.order_start_date end order_start_date
       ,case when r.order_end_date>t2.order_end_date then r.order_end_date
        else t2.order_end_date end order_end_date
       ,r.days_committed
       ,cast(concat(r.rowlist,',',cast(t2.rn as varchar)) as varchar(1000))rowlist
from r inner join ndata t2 
  on  t2.ClaimId=r.ClaimId  and t2.dim_legalstat_key=r.dim_legalstat_key
    and r.rn<>t2.rn
    and charindex(','+cast(t2.rn as varchar)+',',','+r.rowlist+',')=0
    and t2.order_start_date>=r.order_start_date 
    and t2.order_start_date<=r.order_end_date 
 and lvl<100  
)
select ClaimID,dim_legalstat_key
  ,min(order_start_date) order_start_date
  ,max(order_end_date)order_end_date
  ,datediff(d,min(order_start_date),max(order_end_date)) days_committed
from r
group by ClaimID,dim_legalstat_key,headrow
order by ClaimID,dim_legalstat_key

示例

更新1。

对于串联范围（start_date = end_date = end_date + 1day 的下一天）我会添加一些检查。（例如 ClaimId 为 1842 的案例）

第一个 CTE

ndata

将具有相同 start_date 的行合并为 1 行。

递归查询的基础部分

从相交或连接的行组中选择第一行。
查询的递归部分

consecutively combines

该组中的所有其他行。

更正查询

with ndata as(
select ClaimID,dim_legalstat_key,order_start_date
  ,max(order_end_date)order_end_date
  ,datediff(d,order_start_date,max(order_end_date))days_committed
  ,row_number()over(partition by ClaimId order by order_start_date) rn 
from #legal_data
group by ClaimID,dim_legalstat_key,order_start_date
)
,r as(
select rn headrow,rn,1 lvl,ClaimID,dim_legalstat_key
       ,order_start_date,order_end_date,days_committed
       ,cast(rn as varchar(1000)) rowlist
from ndata t1
where not exists(
  select * from ndata t2 
  where t2.ClaimId=t1.ClaimId and t2.rn<>t1.rn
    and t2.dim_legalstat_key=t1.dim_legalstat_key
    and 
       ( 
        (  t2.order_start_date<=t1.order_start_date 
            and t2.order_end_date>=t1.order_start_date   
        )
        or
        (   -- concatenated date ranges 
           dateadd(day,1,t2.order_end_date)=t1.order_start_date
        )
       )
  )
union all
select r.headrow,t2.rn,r.lvl+1 lvl,r.ClaimID,t2.dim_legalstat_key
       ,case when r.order_start_date<t2.order_start_date then r.order_start_date
        else t2.order_start_date end order_start_date
       ,case when r.order_end_date>t2.order_end_date then r.order_end_date
        else t2.order_end_date end order_end_date
       ,r.days_committed
       ,cast(concat(r.rowlist,',',cast(t2.rn as varchar)) as varchar(1000))rowlist
from r inner join ndata t2 
  on  t2.ClaimId=r.ClaimId  and t2.dim_legalstat_key=r.dim_legalstat_key
    and r.rn<>t2.rn
    and charindex(','+cast(t2.rn as varchar)+',',','+r.rowlist+',')=0
    and t2.order_start_date>=r.order_start_date
    and t2.order_start_date<=dateadd(day,1,r.order_end_date) -- concatenated date ranges
 and lvl<100  
)
--  select * from r;
select ClaimID,dim_legalstat_key
  ,min(order_start_date) order_start_date
  ,max(order_end_date)order_end_date
  ,datediff(d,min(order_start_date),max(order_end_date)) days_committed
from r
group by ClaimID,dim_legalstat_key,headrow
order by ClaimID,dim_legalstat_key

示例在这里

查找sql中相同类型值之间的差距

问题描述投票：0回答：2

2个回答

最新问题

查找sql中相同类型值之间的差距

问题描述 投票：0回答：2

2个回答

最新问题

问题描述投票：0回答：2