这两者在性能方面有什么区别吗?
-- eliminate duplicates using UNION
SELECT col1,col2,col3 FROM Table1
UNION SELECT col1,col2,col3 FROM Table2
UNION SELECT col1,col2,col3 FROM Table3
UNION SELECT col1,col2,col3 FROM Table4
UNION SELECT col1,col2,col3 FROM Table5
UNION SELECT col1,col2,col3 FROM Table6
UNION SELECT col1,col2,col3 FROM Table7
UNION SELECT col1,col2,col3 FROM Table8
-- eliminate duplicates using DISTINCT
SELECT DISTINCT * FROM
(
SELECT col1,col2,col3 FROM Table1
UNION ALL SELECT col1,col2,col3 FROM Table2
UNION ALL SELECT col1,col2,col3 FROM Table3
UNION ALL SELECT col1,col2,col3 FROM Table4
UNION ALL SELECT col1,col2,col3 FROM Table5
UNION ALL SELECT col1,col2,col3 FROM Table6
UNION ALL SELECT col1,col2,col3 FROM Table7
UNION ALL SELECT col1,col2,col3 FROM Table8
) x
Union 和 Union all 之间的区别在于 UNION ALL
不会消除重复行,而是只是从所有表中提取符合您的查询细节的所有行并将它们合并到一个表中。
UNION 语句有效地对结果集执行 SELECT DISTINCT
。如果您选择 Distinct from Union All 结果集,则输出将
等于 Union 结果集。
编辑:
CPU 成本性能:
让我用例子来解释:我有两个疑问。一个是 Union,另一个是 Union All
SET STATISTICS TIME ON
GO
select distinct * from (select * from dbo.user_LogTime
union all
select * from dbo.user_LogTime) X
GO
SET STATISTICS TIME OFF
SET STATISTICS TIME ON
GO
select * from dbo.user_LogTime
union
select * from dbo.user_LogTime
GO
SET STATISTICS TIME OFF
我确实在 SMSS 的同一查询窗口中运行了两者。
让我们看看 SMSS 中的执行计划:
发生的情况是,使用Union All 和 Distinct 的查询将比使用 Union 的查询花费更多的 CPU 成本。
准时表现:
UNION ALL
:
(1172 row(s) affected)
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 39 ms.
UNION
:
(1172 row(s) affected)
SQL Server Execution Times:
CPU time = 10 ms, elapsed time = 25 ms.
所以Union在性能方面比Union All with Distinct要好得多
/* with each case we should expect a return set:
(1) DISTINCT UNION {1,2,3,4,5} - is redundant with case (2)
(2) UNION {1,2,3,4,5} - more efficient?
(3) DISTINCT UNION ALL {1,2,2,3,3,4,4,5}
(4) UNION ALL {1,1,2,2,2,3,3,4,4,5}
*/
declare @t1 table (c1 varchar(15));
declare @t2 table (c2 varchar(15));
insert into @t1 values ('1'),('1'),('2'),('3'),('4');
insert into @t2 values ('2'),('2'),('3'),('4'),('5');
select DISTINCT * from @t1 --case (1)
UNION
select DISTINCT * from @t2 order by c1
select * from @t1 --case (2)
UNION
select * from @t2 order by c1
select DISTINCT * from @t1 --case (3)
UNION ALL
select DISTINCT * from @t2 order by c1
select * from @t1 --case (4)
UNION ALL
select * from @t2 order by c1
对于下面显示的数据和查询,测试产生了显着差异:
UNION ALL 8.983 sec
UNION DISTINCT 15.344 sec
为了显示此示例的规模和复杂性,表大小和查询代码如下所示
hqsource 600K records
accountingemppos 180K
accountingposld 200K
emp_no_accountingnumeric 20
First UNION block is approx 550K records, second approx 50K
SELECT a.`emp_no_imported` AS `emp_no`,
a.`supervisor_emp_no`,
a.`first name`,
a.`middle name`,
a.`last name`,
a.`jobtitle`,
a.`status`,
CASE WHEN rida.`accounting_emp_no` IS NOT NULL THEN
rida.`accounting_emp_no`
ELSE
a.`emp_no_imported`
END AS `accounting_id`,
CASE WHEN epfp.`emp_no` IS NOT NULL THEN
CASE WHEN `sridf`.`emp_no` IS NOT NULL THEN
`sridf`.`accounting_emp_no`
ELSE
epfp.`emp_no`
END
ELSE
CASE WHEN epp.`emp_no` IS NOT NULL THEN
CASE WHEN `srids`.`emp_no` IS NOT NULL THEN
`srids`.`accounting_emp_no`
ELSE
epp.`emp_no`
END
ELSE
CASE WHEN `srida`.`emp_no` IS NOT NULL THEN
`srida`.`accounting_emp_no`
ELSE
a.`supervisor_emp_no`
END
END
END AS `accounting_s_emp_no`,
ep.`emp_no` AS `traas_emp_no`,
epp.`emp_no` AS `traas_parent_emp_no`
FROM `hqsource`.hq_people a
LEFT OUTER JOIN `hqsource`.`emp_no_accountingnumeric` `rida` ON `rida`.emp_no = a.`emp_no_imported`
LEFT OUTER JOIN `hqsource`.`emp_no_accountingnumeric` `srida` ON `srida`.emp_no = a.`supervisor_emp_no`
LEFT OUTER JOIN `traas`.`accountingemppos_data_extract` ep ON ep.`emp_no` = a.`emp_no_imported` AND ep.`End` = '2899-12-31' AND ep.`Primary` = 'Y'
LEFT OUTER JOIN `epe`.`accountingposld_data_extract` p ON p.`RangeGID` = ep.`GID`
LEFT OUTER JOIN `traas`.`accountingemppos_data_extract` epp ON epp.`GID` = p.`ParentGID` AND epp.`End` = '2899-12-31' AND epp.`Primary` = 'Y'
LEFT OUTER JOIN `hqsource`.`emp_no_accountingnumeric` `rids` ON `rids`.emp_no = ep.`emp_no`
LEFT OUTER JOIN `hqsource`.`emp_no_accountingnumeric` `srids` ON `srids`.emp_no = epp.`emp_no` AND epp.`End` = '2899-12-31' AND epp.`Primary` = 'Y'
LEFT OUTER JOIN `epe`.`accountingemppos_data_extract_filtered` epf ON epf.`emp_no` = a.`emp_no_imported` AND epf.`End` = '2899-12-31' AND epf.`Primary` = 'Y'
LEFT OUTER JOIN `epe`.`accountingposld_data_extract` pf ON pf.`RangeGID` = epf.`GID`
LEFT OUTER JOIN `epe`.`accountingemppos_data_extract_filtered` epfp ON epfp.`GID` = pf.`ParentGID` AND epfp.`End` = '2899-12-31' AND epfp.`Primary` = 'Y'
LEFT OUTER JOIN `hqsource`.`emp_no_accountingnumeric` `ridf` ON `ridf`.emp_no = epf.`emp_no` AND epf.`End` = '2899-12-31' AND epf.`Primary` = 'Y'
LEFT OUTER JOIN `hqsource`.`emp_no_accountingnumeric` `sridf` ON `sridf`.emp_no = epfp.`emp_no` AND epfp.`End` = '2899-12-31' AND epfp.`Primary` = 'Y'
WHERE a.`emp_no_imported` REGEXP ('^[a-z]{2}\\d{5}.$')
UNION ALL
-- UNION DISTINCT
SELECT a.`emp_no_imported` AS `emp_no`, a.`supervisor_emp_no` AS `s_emp_no`, u.`First_Name`, 'ƒ' AS `MI`, u.`Last_Name`, u.`Job_Title`, NULL AS `status`,
CASE WHEN rid.`accounting_emp_no` IS NULL THEN
ep.`emp_no`
ELSE
rid.`accounting_emp_no`
END AS `accounting_emp_no`,
CASE WHEN `srid`.`accounting_emp_no` IS NULL THEN
epp.`emp_no`
ELSE
`srid`.`accounting_emp_no`
END AS `accounting_s_emp_no`,
ep.`emp_no` AS `traas_emp_no`,
epp.`emp_no` AS `traas_parent_emp_no`
FROM `epe`.`accountingemppos_data_extract_filtered` ep
LEFT OUTER JOIN `hqsource`.`hq_people` a ON a.`emp_no_imported` = ep.`emp_no`
LEFT OUTER JOIN `epe`.`accountingposld_data_extract` p ON p.`RangeGID` = ep.`GID`
LEFT OUTER JOIN `epe`.`accountingemppos_data_extract_filtered` epp ON epp.`GID` = p.`ParentGID`
LEFT OUTER JOIN `siebel`.`users_all_output` u ON u.`LOGIN` = ep.`emp_no`
LEFT OUTER JOIN `hqsource`.`emp_no_accountingnumeric` `rid` ON `rid`.emp_no = ep.`emp_no`
LEFT OUTER JOIN `hqsource`.`emp_no_accountingnumeric` `srid` ON `srid`.emp_no = epp.`emp_no`
WHERE
ep.`End` = '2899-12-31' AND
epp.`End` = '2899-12-31' AND
p.`End` = '2899-12-31' AND
ep.emp_no REGEXP ('^F\\d{8}$|^V[0-3]\\d{5}$')
ORDER BY LENGTH(accounting_emp_no) ASC, accounting_emp_no ASC
;两个 UNION 块中每个的 WHERE 子句保证结果是唯一的。 (这个查询已有多年历史并且每天运行。我希望我早点尝试过这个)。
字段名称已被混淆
4,403,063 行到 8,743,056 行中的另一行 生成的不同结果集为 8,141,350 行。
使用 UNION 在 30 到 40 秒之间连接两个查询 对临时表使用 UNION ALL,然后在临时表上使用 SELECT DISTINCT 10秒。
不要假设您会在数百万行中看到与数百或数千行相同的结果