我在 SQL Server 中找到了名为 df 的表here:
-- Parameters
DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';
WITH ModeData AS (
SELECT country,
a.Mode
FROM df
CROSS APPLY (
SELECT TOP 1 Mode, COUNT(*) AS cnt
FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
GROUP BY Mode
ORDER BY COUNT(*) DESC
) a
where year=@year --and country=@country
)
-- Calculate proportions and map modes to labels
, Proportions AS (
SELECT country,
CASE
WHEN Mode = 1 THEN 'Very Dissatisfied'
WHEN Mode = 2 THEN 'Dissatisfied'
WHEN Mode = 3 THEN 'Neutral'
WHEN Mode = 4 THEN 'Satisfied'
WHEN Mode = 5 THEN 'Very Satisfied'
END AS SatisfactionLevel,
COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country) AS Proportion
FROM ModeData
GROUP BY country, Mode
)
-- Pivot the results to get each satisfaction level as a column
SELECT country,
[Very Dissatisfied],
[Dissatisfied],
[Neutral],
[Satisfied],
[Very Satisfied]
FROM Proportions
PIVOT (
MAX(Proportion)
FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])
) AS p
ORDER BY country;
结果表是:
国家 | 非常不满意 | 不满意 | 中性 | 满意 | 非常满意 |
---|---|---|---|---|---|
巴西 | 0.285714285714 | 0.142857142857 | 0.142857142857 | 0.142857142857 | 0.285714285714 |
加拿大 | 0.111111111111 | 0.111111111111 | 0.333333333333 | 0.222222222222 | 0.222222222222 |
法国 | 0.250000000000 | 0.125000000000 | 0.250000000000 | 0.250000000000 | 0.125000000000 |
意大利 | 0.166666666666 | 0.166666666666 | 0.166666666666 | 0.166666666666 | 0.333333333333 |
美国 | 0.222222222222 | 0.111111111111 | 0.111111111111 | 0.333333333333 | 0.222222222222 |
我想计算每个国家/地区的计数。表 df 中每个国家/地区有多少行,并将此计数添加为结果表中的额外列。理想情况下,基于玩具示例数据,我想要的结果如下:
国家 | 非常不满意 | 不满意 | 中性 | 满意 | 非常满意 | 数 |
---|---|---|---|---|---|---|
巴西 | 0.285714285714 | 0.142857142857 | 0.142857142857 | 0.142857142857 | 0.285714285714 | 7 |
加拿大 | 0.111111111111 | 0.111111111111 | 0.333333333333 | 0.222222222222 | 0.222222222222 | 9 |
法国 | 0.250000000000 | 0.125000000000 | 0.250000000000 | 0.250000000000 | 0.125000000000 | 8 |
意大利 | 0.166666666666 | 0.166666666666 | 0.166666666666 | 0.166666666666 | 0.333333333333 | 6 |
美国 | 0.222222222222 | 0.111111111111 | 0.111111111111 | 0.333333333333 | 0.222222222222 | 9 |
添加额外的 cte 来解决国家/地区计数,然后将其包含在最终结果中。
DECLARE @Year INT = 2020; --, @Country varchar(50)= 'Brazil';
WITH ModeData AS (
SELECT country,
a.Mode
FROM df
CROSS APPLY (
SELECT TOP 1 Mode, COUNT(*) AS cnt
FROM (VALUES (val1), (val2), (val3)) AS t(Mode)
GROUP BY Mode
ORDER BY COUNT(*) DESC
) a
where year=@year --and country=@country
)
-- Calculate proportions and map modes to labels
, Proportions AS (
SELECT country,
CASE
WHEN Mode = 1 THEN 'Very Dissatisfied'
WHEN Mode = 2 THEN 'Dissatisfied'
WHEN Mode = 3 THEN 'Neutral'
WHEN Mode = 4 THEN 'Satisfied'
WHEN Mode = 5 THEN 'Very Satisfied'
END AS SatisfactionLevel,
COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country) AS Proportion
FROM ModeData
GROUP BY country, Mode
)
, Pivoted AS (
-- Pivot the results to get each satisfaction level as a column
SELECT Country,
[Very Dissatisfied],
[Dissatisfied],
[Neutral],
[Satisfied],
[Very Satisfied]
FROM Proportions
PIVOT (
MAX(Proportion)
FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])
) AS p
),
CountryCounts AS (
-- Count of countries from original df table
SELECT Country,
COUNT(Country) AS Total
FROM df
GROUP BY Country),
SELECT Pivoted.Country,
[Very Dissatisfied],
Dissatified,
Neutral,
Satisfied,
[Very Satisified],
Total
FROM Pivoted
INNER JOIN CountryCounts
ON Pivoted.Country = CountryCounts.Country
ORDER BY Pivoted.Country;