我有一个包含三列的表格
group
、person
、score
我想计算每组 score
的基尼系数。这样我就可以根据score
衡量人的不平等,最有效的方法是什么?
这是使用 Oracle SQL 代码完成的基尼计算(使用随机样本数据):
WITH -- S a m p l e D a t a :
balances AS
( Select DATE '2018-01-01' as A_DATE, IN_VAL as BALANCE
From ( Select 11 as IN_VAL From Dual Union All
Select 12 From Dual Union All
Select 13 From Dual Union All
Select 14 From Dual Union All
Select 15 From Dual ) -- Gini: 0.061538 [11,12,13,14,15]
Union All
Select DATE '2018-01-02' as A_DATE, IN_VAL as BALANCE
From ( Select 33 as IN_VAL From Dual Union All
Select 33 From Dual Union All
Select 33 From Dual Union All
Select 33 From Dual ) -- Gini: 0.0 [33,33,33,33]
Union All
Select DATE '2018-01-03' as A_DATE, IN_VAL as BALANCE
From ( Select 142 as IN_VAL From Dual Union All
Select 152 From Dual Union All
Select 112 From Dual Union All
Select 182 From Dual Union All
Select 162 From Dual Union All
Select 1452 From Dual Union All
Select 1672 From Dual Union All
Select 112 From Dual Union All
Select 142 From Dual Union All
Select 1112 From Dual) -- Gini: 0.542985 [142,152,112,182,162,1452,1672,112,142,112]
),
使用 CTE returned_balances 对数据进行排名...
ranked_balances AS
( Select A_DATE, BALANCE, Row_Number() Over (Partition By A_DATE Order By BALANCE Desc) as RANK
From balances
)
-- M a i n S Q L : ( Gini Calculation )
SELECT A_DATE,
-- (1 − 2B) https://en.wikipedia.org/wiki/Gini_coefficient
Round( 1 - ( 2 *
Sum( (BALANCE * (RANK - 1) + BALANCE / 2) ) /
Count(*) /
sum(BALANCE)
), 6
) as GINI
FROM ranked_balances
GROUP BY A_DATE
HAVING Sum(BALANCE) > 0
ORDER BY A_DATE asc
/*
A_DATE GINI
-------- ----------
01.01.18 0.061538
02.01.18 0
03.01.18 0.541985 */