我有一个 df,我想创建一个转换,使用 Pandas 将值与它们的类别放在一起。
数据
year quarter Location Low_ stat_AA Low_range_AA Low_ stat_BB Low_range_BB Med_stat_AA Med_range_AA Med_stat_BB Med_range_BB Up_stat_AA Up_range_AA Upp_stat_BB Up_range_BB
2027 Q1 27 NY 1.14 1.03 0.51 1.53
2027 Q1 27 CA 1.14 0.38 0.55 1.02
2027 Q2 27 NY 0 0.86 1.02 1.27
2027 Q2 27 CA 0 3.66 5.4 0
2027 Q3 27 NY 1 0 0 0
2027 Q3 27 CA 0 0 0 0
数据框
import pandas as pd
data = {'year': [2027, 2027, 2027, 2027, 2027, 2027],
'quarter': ['Q1 27', 'Q1 27', 'Q2 27', 'Q2 27', 'Q3 27', 'Q3 27'],
'Location': ['NY', 'CA', 'NY', 'CA', 'NY', 'CA'],
'Lower_stat_AA': [1.14, None, 0, None, 1, None],
'Lower_range_AA': [1.03, None, 0.86, 3.66, 0, None],
'Lower_stat_BB': [0.51, None, 1.02, 5.4, 0, None],
'Lower_range_BB': [1.53, None, 1.27, 0, 0, None],
'Medium_stat_AA': [None, 1.14, None, 0, None, 0],
'Medium_range_AA': [None, 0.38, None, 5.4, None, 0],
'Medium_stat_BB': [None, 0.55, None, 0, None, 0],
'Medium_range_BB': [None, 1.02, None, 0, None, 0],
'Upper_stat_AA': [None, None, None, None, None, None],
'Upper_range_AA': [None, None, None, None, None, None],
'Upper_stat_BB': [None, None, None, None, None, None],
'Upper_range_BB': [None, None, None, None, None, None]
}
df = pd.DataFrame(data)
想要
location range type Q1 27 Q2 27 Q3 27
NY Lower_ stat AA 1.14 0 1
NY Lower_range AA 1.03 0.86 0
NY Lower_stat BB 0.51 1.02 0
NY Lower_range BB 1.53 1.27 0
CA Medium_stat AA 1.14 0 0
CA Medium_range AA 0.38 3.66 0
CA Medium_stat BB 0.55 5.4 0
CA Medium_range BB 1.02 0 0
data = {'location': ['NY', 'NY', 'NY', 'NY', 'CA', 'CA', 'CA', 'CA'],
'range': ['Lower_stat', 'Lower_range', 'Lower_stat', 'Lower_range',
'Medium_stat', 'Medium_range', 'Medium_stat', 'Medium_range'],
'role': ['AA', 'AA', 'BB', 'BB', 'AA', 'AA', 'BB', 'BB'],
'Q1 27': [1.14, 1.03, 0.51, 1.53, 1.14, 0.38, 0.55, 1.02],
'Q2 27': [0, 0.86, 1.02, 1.27, 0, 3.66, 5.4, 0],
'Q3 27': [1, 0, 0, 0, 0, 0, 0, 0]
}
做
import pandas as pd
import janitor
(df
.pivot_longer(
index = slice('year', 'type'),
names_to = ("range", ".value"),
names_sep = " ")
)
以上不会产生所需的输出。 任何建议都是有帮助的。
代码
melt
和枢轴
(df
.melt(['year', 'quarter', 'Location'])
.assign(
range=lambda x: x['variable'].str.rsplit('_', n=1).str[0],
type=lambda x: x['variable'].str.rsplit('_', n=1).str[1])
.pivot_table('value', index=['Location', 'range', 'type'], columns='quarter')
.reset_index()
.fillna(0)
)
output
quarter Location range type Q1 27 Q2 27 Q3 27
0 CA Lower_range AA 0.00 3.66 0.0
1 CA Lower_range BB 0.00 0.00 0.0
2 CA Lower_stat BB 0.00 5.40 0.0
3 CA Medium_range AA 0.38 5.40 0.0
4 CA Medium_range BB 1.02 0.00 0.0
5 CA Medium_stat AA 1.14 0.00 0.0
6 CA Medium_stat BB 0.55 0.00 0.0
7 NY Lower_range AA 1.03 0.86 0.0
8 NY Lower_range BB 1.53 1.27 0.0
9 NY Lower_stat AA 1.14 0.00 1.0
10 NY Lower_stat BB 0.51 1.02 0.0
一个选项,使用janitor.pivot_longer和
pd.pivot
的组合:
# pip install pyjanitor
import pandas as pd
import janitor
(df
.pivot_longer(
index = ['year', 'quarter', 'Location'],
names_to = ['range', 'type'],
names_pattern = r"(.+)_(AA|BB)",
dropna=True)
.pivot(
index = ['Location', 'range', 'type'],
columns = 'quarter',
values = 'value')
.fillna(0, downcast ='infer')
.rename_axis(columns=None)
.reset_index()
)
Location range type Q1 27 Q2 27 Q3 27
0 CA Lower_range AA 0.00 3.66 0.0
1 CA Lower_range BB 0.00 0.00 0.0
2 CA Lower_stat BB 0.00 5.40 0.0
3 CA Medium_range AA 0.38 5.40 0.0
4 CA Medium_range BB 1.02 0.00 0.0
5 CA Medium_stat AA 1.14 0.00 0.0
6 CA Medium_stat BB 0.55 0.00 0.0
7 NY Lower_range AA 1.03 0.86 0.0
8 NY Lower_range BB 1.53 1.27 0.0
9 NY Lower_stat AA 1.14 0.00 1.0
10 NY Lower_stat BB 0.51 1.02 0.0