棘手的组织转换,使用 Pandas 将值与它们的类别放在一起

问题描述 投票:0回答:2

我有一个 df,我想创建一个转换,使用 Pandas 将值与它们的类别放在一起。

数据

year    quarter Location Low_ stat_AA Low_range_AA Low_ stat_BB Low_range_BB    Med_stat_AA Med_range_AA Med_stat_BB Med_range_BB   Up_stat_AA  Up_range_AA Upp_stat_BB Up_range_BB
2027    Q1 27   NY       1.14         1.03         0.51         1.53                                 
2027    Q1 27   CA                                                              1.14        0.38         0.55        1.02               
2027    Q2 27   NY       0            0.86         1.02         1.27                                 
2027    Q2 27   CA                                                              0       3.66             5.4         0              
2027    Q3 27   NY       1            0            0            0                                
2027    Q3 27   CA                                                              0     0                  0           0  

数据框

import pandas as pd

data = {'year': [2027, 2027, 2027, 2027, 2027, 2027],
        'quarter': ['Q1 27', 'Q1 27', 'Q2 27', 'Q2 27', 'Q3 27', 'Q3 27'],
        'Location': ['NY', 'CA', 'NY', 'CA', 'NY', 'CA'],
        'Lower_stat_AA': [1.14, None, 0, None, 1, None],
        'Lower_range_AA': [1.03, None, 0.86, 3.66, 0, None],
        'Lower_stat_BB': [0.51, None, 1.02, 5.4, 0, None],
        'Lower_range_BB': [1.53, None, 1.27, 0, 0, None],
        'Medium_stat_AA': [None, 1.14, None, 0, None, 0],
        'Medium_range_AA': [None, 0.38, None, 5.4, None, 0],
        'Medium_stat_BB': [None, 0.55, None, 0, None, 0],
        'Medium_range_BB': [None, 1.02, None, 0, None, 0],
        'Upper_stat_AA': [None, None, None, None, None, None],
        'Upper_range_AA': [None, None, None, None, None, None],
        'Upper_stat_BB': [None, None, None, None, None, None],
        'Upper_range_BB': [None, None, None, None, None, None]
        }

df = pd.DataFrame(data)

想要

location    range         type  Q1 27   Q2 27   Q3 27   
NY          Lower_ stat   AA    1.14    0       1   
NY          Lower_range   AA    1.03    0.86    0   
NY          Lower_stat    BB    0.51    1.02    0   
NY          Lower_range   BB    1.53    1.27    0   
CA          Medium_stat   AA    1.14    0       0   
CA          Medium_range  AA    0.38    3.66    0   
CA          Medium_stat   BB    0.55    5.4     0                                                                                                                                       
CA          Medium_range  BB    1.02    0       0   
                        

data = {'location': ['NY', 'NY', 'NY', 'NY', 'CA', 'CA', 'CA', 'CA'],
        'range': ['Lower_stat', 'Lower_range', 'Lower_stat', 'Lower_range', 
                  'Medium_stat', 'Medium_range', 'Medium_stat', 'Medium_range'],
        'role': ['AA', 'AA', 'BB', 'BB', 'AA', 'AA', 'BB', 'BB'],
        'Q1 27': [1.14, 1.03, 0.51, 1.53, 1.14, 0.38, 0.55, 1.02],
        'Q2 27': [0, 0.86, 1.02, 1.27, 0, 3.66, 5.4, 0],
        'Q3 27': [1, 0, 0, 0, 0, 0, 0, 0]
        }

import pandas as pd
import janitor

(df
.pivot_longer(
    index = slice('year', 'type'), 
    names_to = ("range", ".value"), 
    names_sep = " ")
)

以上不会产生所需的输出。 任何建议都是有帮助的。

python pandas numpy openpyxl
2个回答
1
投票

代码

melt
和枢轴

(df
 .melt(['year', 'quarter', 'Location'])
 .assign(
     range=lambda x: x['variable'].str.rsplit('_', n=1).str[0], 
     type=lambda x: x['variable'].str.rsplit('_', n=1).str[1])
 .pivot_table('value', index=['Location', 'range', 'type'], columns='quarter')
 .reset_index()
 .fillna(0)
)

output

quarter Location    range           type    Q1 27   Q2 27   Q3 27
0       CA          Lower_range     AA      0.00    3.66    0.0
1       CA          Lower_range     BB      0.00    0.00    0.0
2       CA          Lower_stat      BB      0.00    5.40    0.0
3       CA          Medium_range    AA      0.38    5.40    0.0
4       CA          Medium_range    BB      1.02    0.00    0.0
5       CA          Medium_stat     AA      1.14    0.00    0.0
6       CA          Medium_stat     BB      0.55    0.00    0.0
7       NY          Lower_range     AA      1.03    0.86    0.0
8       NY          Lower_range     BB      1.53    1.27    0.0
9       NY          Lower_stat      AA      1.14    0.00    1.0
10      NY          Lower_stat      BB      0.51    1.02    0.0

0
投票

一个选项,使用janitor.pivot_longer

pd.pivot
的组合:

# pip install pyjanitor
import pandas as pd
import janitor

(df
.pivot_longer(
    index = ['year', 'quarter', 'Location'], 
    names_to = ['range', 'type'], 
    names_pattern = r"(.+)_(AA|BB)", 
    dropna=True)
.pivot(
    index = ['Location', 'range', 'type'], 
    columns = 'quarter', 
    values = 'value')
.fillna(0, downcast ='infer')
.rename_axis(columns=None)
.reset_index()
)

   Location         range type  Q1 27  Q2 27  Q3 27
0        CA   Lower_range   AA   0.00   3.66    0.0
1        CA   Lower_range   BB   0.00   0.00    0.0
2        CA    Lower_stat   BB   0.00   5.40    0.0
3        CA  Medium_range   AA   0.38   5.40    0.0
4        CA  Medium_range   BB   1.02   0.00    0.0
5        CA   Medium_stat   AA   1.14   0.00    0.0
6        CA   Medium_stat   BB   0.55   0.00    0.0
7        NY   Lower_range   AA   1.03   0.86    0.0
8        NY   Lower_range   BB   1.53   1.27    0.0
9        NY    Lower_stat   AA   1.14   0.00    1.0
10       NY    Lower_stat   BB   0.51   1.02    0.0
© www.soinside.com 2019 - 2024. All rights reserved.