我有一个看起来像这样的数据集:
我想将所有
co
值汇总在一行中,因此最终结果如下所示:
看起来很简单,对吧?只需按照
this answer中的建议使用
crosstab
编写查询。问题是这需要我 CREATE EXTENSION tablefunc;
并且我没有对我的数据库的写访问权限。
有人可以推荐替代方案吗?
条件聚合:
SELECT co,
MIN(CASE WHEN ontology_type = 'industry' THEN tags END) AS industry,
MIN(CASE WHEN ontology_type = 'customer_type' THEN tags END) AS customer_type,
-- ...
FROM tab_name
GROUP BY co
您可以使用DO生成和PREPARE您自己的带有交叉表列的SQL,然后EXECUTE它。
-- replace tab_name to yours table name
DO $$
DECLARE
_query text;
_name text;
BEGIN
_name := 'prepared_query';
_query := '
SELECT co
'||(SELECT ', '||string_agg(DISTINCT
' string_agg(DISTINCT
CASE ontology_type WHEN '||quote_literal(ontology_type)||' THEN tags
ELSE NULL
END, '',''
) AS '||quote_ident(ontology_type),',')
FROM tab_name)||'
FROM tab_name
GROUP BY co
';
BEGIN
EXECUTE 'DEALLOCATE '||_name;
EXCEPTION
WHEN invalid_sql_statement_name THEN
END;
EXECUTE 'PREPARE '||_name||' AS '||_query;
END
$$;
EXECUTE prepared_query;
自版本。 9.4 有 json_object_agg(),它让我们可以动态地执行部分必要的魔法。
但是要完全动态,临时类型(临时表)必须FIRST通过在匿名过程中运行SQL-EXEC来构建。
DB FIDDLE(英国): https://dbfiddle.uk/Sn7iO4zL
免责声明: 通常,创建临时表的能力授予最终用户,但 YMMV。另一个问题是是否匿名。普通用户可以将程序作为内联代码执行。
-- /**
-- begin test data
-- begin test data
-- begin test data
-- */
DROP TABLE IF EXISTS tmpSales ;
CREATE TEMP TABLE tmpSales AS
SELECT
sale_id
,TRUNC(RANDOM()*12)+1 AS book_id
,TRUNC(RANDOM()*100)+1 AS customer_id
,(date '2010-01-01' + random() * (timestamp '2016-12-31' - timestamp '2010-01-01')) AS sale_date
FROM generate_series(1,10000) AS sale_id;
DROP TABLE IF EXISTS tmp_month_total ;
CREATE TEMP TABLE tmp_month_total AS
SELECT
date_part( 'year' , sale_date ) AS year
,date_part( 'month', sale_date ) AS mn
,to_char(sale_date, 'mon') AS month
,COUNT(*) AS total
FROM tmpSales
GROUP BY date_part('year', sale_date), to_char(sale_date, 'mon') ,date_part( 'month', sale_date )
;
DATA:
+----+--+-----+-----+
|year|mn|month|total|
+----+--+-----+-----+
|2010|1 |jan |127 |
|2010|2 |feb |117 |
|2010|3 |mar |121 |
|2010|4 |apr |131 |
|2010|5 |may |106 |
|2010|6 |jun |121 |
|2010|7 |jul |129 |
|2010|8 |aug |114 |
|2010|9 |sep |115 |
|2010|10|oct |110 |
|2010|11|nov |133 |
|2010|12|dec |108 |
+----+--+-----+-----+
-- /**
-- END test data
-- END test data
-- END test data
-- */
-- /**
-- dyn. build a temporary row-type based on existing data, not hard-coded
-- dyn. build a temporary row-type based on existing data, not hard-coded
-- dyn. build a temporary row-type based on existing data, not hard-coded
-- **/
DROP TABLE IF EXISTS tmpTblTyp CASCADE ;
DO LANGUAGE plpgsql $$ DECLARE v_sqlstring VARCHAR = ''; BEGIN
v_sqlstring := CONCAT( 'CREATE TEMP TABLE tmpTblTyp AS SELECT '
,(SELECT STRING_AGG( CONCAT('NULL::int AS ' , month )::TEXT , ' ,'
ORDER BY mn
)::TEXT
FROM
(SELECT DISTINCT month, mn FROM tmp_month_total )a )
,' LIMIT 0 '
) ; -- RAISE NOTICE '%', v_sqlstring ;
EXECUTE( v_sqlstring ) ; END $$;
DROP TABLE IF EXISTS tmpMoToJson ;
CREATE TEMP TABLE tmpMoToJson AS
SELECT
year AS year
,(json_build_array( months )) AS js_months_arr
,json_populate_recordset ( NULL::tmpTblTyp /** use temp table as a record type!! **/
, json_build_array( months )
) jprs /** builds row-type column that can be expanded with (jprs).*
**/
FROM ( SELECT year
-- accum data into JSON array
,json_object_agg(month,total) AS months
FROM tmp_month_total
GROUP BY year
ORDER BY year
) a
;
SELECT
year
,(ROW((jprs).*)::tmpTblTyp).* -- explode the composite type row
FROM tmpMoToJson ;
+----+---+---+---+---+---+---+---+---+---+---+---+---+
|year|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|
+----+---+---+---+---+---+---+---+---+---+---+---+---+
|2010|127|117|121|131|106|121|129|114|115|110|133|108|
|2011|117|112|117|115|139|116|119|152|117|112|115|103|
|2012|129|111|98 |140|109|131|114|110|112|115|100|121|
|2013|128|112|141|127|141|102|113|109|111|110|123|116|
|2014|129|114|117|118|111|123|106|111|127|121|124|145|
|2015|118|113|131|122|120|121|140|114|118|108|114|131|
|2016|117|110|139|100|110|116|112|109|131|117|122|132|
+----+---+---+---+---+---+---+---+---+---+---+---+---+
从 postgres 9.4 开始,您可以使用 json_object_agg...
--set up a temp table with some rows to pivot into columns
drop table _temp;
create temp table _temp(id serial, key int, header text, value text);
insert into _temp(key, header, value) values
(1, 'a', 'this is a'),
(1, 'b', 'this is b'),
(1, 'c', 'this is c'),
(2, 'a', 'this is a'),
(2, 'b', 'this is b'),
(3, 'c', 'this is c')
;
--pivot using json_object_agg
select t.key,
t.headers->>'a' as a,
t.headers->>'b' as b,
t.headers->>'c' as c
from (
select key, json_object_agg(header, value) as headers
from _temp
group by key
) t
order by key
;
/*
--results
+---+---------+---------+---------+
|key|a |b |c |
+---+---------+---------+---------+
|1 |this is a|this is b|this is c|
|2 |this is a|this is b|null |
|3 |null |null |this is c|
+---+---------+---------+---------+
*/
通过使用 pivot 我们也可以实现您需要的输出
SELECT co
,industry
,customer_type
,product_type
,sales_model
,stage
FROM dataSet
PIVOT(max(tags) FOR ontologyType IN (
industry
,customer_type
,product_type
,sales_model
,stage
)) AS PVT