我有一张日期范围的表格
----------------------------------------------------------------
| id | date_start | date_end |
----------------------------------------------------------------
| 1 | 2017-02-03 08:00:00.000 | 2017-02-03 17:00:00.000|
| 2 | 2017-02-04 15:00:00.000 | 2017-02-05 10:00:00.000|
| 3 | 2017-02-06 14:00:00.000 | 2017-02-07 23:00:00.000|
----------------------------------------------------------------
正如您所看到的,某些范围可以涵盖超过1天的时间段(例如#2,#3),我需要将这些记录分开几天才能获得如下结果:
----------------------------------------------------------------
| id | date_start | date_end |
----------------------------------------------------------------
| 1 | 2017-02-03 08:00:00.000 | 2017-02-03 17:00:00.000|
| 2 | 2017-02-04 15:00:00.000 | 2017-02-04 23:59:59.999|
| 2 | 2017-02-05 00:00:00.000 | 2017-02-05 10:00:00.000|
| 3 | 2017-02-06 14:00:00.000 | 2017-02-06 23:59:59.999|
| 3 | 2017-02-07 00:00:00.000 | 2017-02-07 23:00:00.000|
----------------------------------------------------------------
如何在Redshift上使用SQL?
new_start
和new_end
date_start
相同的最后一个间隔我使用date_end
new_start
和new_end - 1 second
WITH days as (
SELECT a.n
from generate_series(1, 100) as a(n)
), ranges as (
SELECT *, (d.n::text || ' DAY')::interval as i,
t1.date_start::date + ((d.n - 1)::text || ' DAY')::interval as new_start,
t1.date_start::date + (d.n::text || ' DAY')::interval as new_end,
CASE WHEN t1.date_start::date = t1.date_end::date AND d.n = 1
THEN t1.date_start
WHEN t1.date_start::date < t1.date_end::date
THEN t1.date_start
ELSE NULL
END as date_start1,
CASE WHEN t1.date_start::date = t1.date_end::date AND d.n = 1
THEN t1.date_end
END date_end1
FROM Table1 t1
CROSS JOIN days d
)
SELECT *, CASE WHEN date_start < new_end AND date_end > new_start
THEN 'overlap'
END as overlap,
CASE WHEN date_end1 IS NOT NULL
THEN date_start1
WHEN date_start < new_end AND date_end > new_start
THEN CASE WHEN date_start > new_start
THEN date_start
ELSE new_start
END
END as final_start,
CASE WHEN date_end1 IS NOT NULL
THEN date_end1
WHEN date_start < new_end AND date_end > new_start
THEN CASE WHEN date_end < new_end
THEN date_end
ELSE new_end - '1 second'::interval
END
END as final_end
FROM ranges
WHERE date_start < new_end AND date_end > new_start
ORDER BY "id", new_start
OUTPUT
最后,我已经这样做了。连续工作时间最长为2天(即2017-12-02开始,2017-12-04结束 - 不会在此数据集中进行; 2017-12-02 - 2017-12-03还可以) 。
-- Select 1-st day's interval for two-days sessions:
SELECT sessions.date_start
,DATE_TRUNC('day',sessions.date_end) as date_end
FROM sessions
WHERE DATEDIFF(day,sessions.date_start,sessions.date_end) = 1
UNION ALL
-- Select 2-nd day's interval for two-days sessions:
SELECT DATE_TRUNC('day',sessions.date_end) as date_start
,sessions.date_end as date_end
FROM sessions
WHERE DATEDIFF(day, sessions.date_start, sessions.date_end) = 1
UNION ALL
-- Select one-day sessions:
SELECT sessions.date_start as date_start
,sessions.date_end as date_end
FROM sessions
WHERE DATEDIFF(day, sessions.date_start, sessions.date_end) = 0