熟悉 Python/pandas 中的切片,但是当设置基于列的索引时,它是如何操作的?
有一个数据框,
d
,我为其创建了一个列轴,如下所示:
levels_df = pd.DataFrame(levels)
levels_df = levels_df.set_axis(new_labels,axis=1).iloc[2:]
levels_df.head(10)
Out[14]:
Position 9.0 8.0 7.0 6.0 ... 7.0 8.0 9.0
Side 1.0 1.0 1.0 1.0 ... 0.0 0.0 0.0 0.0
2 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
3 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
4 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
5 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
6 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
7 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
8 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
9 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
10 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
11 4484.75 4485.0 4485.25 4485.5 ... 4488.75 4489.0 4489.25 4489.5
[10 rows x 20 columns]
levels_df
Position 9.0 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0
Side 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 4484.75 4485.0 4485.25 4485.5 4485.75 4486.0 4486.25 4486.5 4486.75 4487.0 4487.25 4487.5 4487.75 4488.0 4488.25 4488.5 4488.75 4489.0 4489.25 4489.5
3 4484.75 4485.0 4485.25 4485.5 4485.75 4486.0 4486.25 4486.5 4486.75 4487.0 4487.25 4487.5 4487.75 4488.0 4488.25 4488.5 4488.75 4489.0 4489.25 4489.5
4 4484.75 4485.0 4485.25 4485.5 4485.75 4486.0 4486.25 4486.5 4486.75 4487.0 4487.25 4487.5 4487.75 4488.0 4488.25 4488.5 4488.75 4489.0 4489.25 4489.5
5 4484.75 4485.0 4485.25 4485.5 4485.75 4486.0 4486.25 4486.5 4486.75 4487.0 4487.25 4487.5 4487.75 4488.0 4488.25 4488.5 4488.75 4489.0 4489.25 4489.5
6 4484.75 4485.0 4485.25 4485.5 4485.75 4486.0 4486.25 4486.5 4486.75 4487.0 4487.25 4487.5 4487.75 4488.0 4488.25 4488.5 4488.75 4489.0 4489.25 4489.5
7 4484.75 4485.0 4485.25 4485.5 4485.75 4486.0 4486.25 4486.5 4486.75 4487.0 4487.25 4487.5 4487.75 4488.0 4488.25 4488.5 4488.75 4489.0 4489.25 4489.5
8 4484.75 4485.0 4485.25 4485.5 4485.75 4486.0 4486.25 4486.5 4486.75 4487.0 4487.25 4487.5 4487.75 4488.0 4488.25 4488.5 4488.75 4489.0 4489.25 4489.5
如何对这样的数据帧进行切片以检索 df 的过滤版本?
例如,像这样:
levels_df.iloc[:5,(levels_df.Position==8) & (levels_df.Side==1)]
会回来:
Position 8.0
Side 1.0
2 4485.0
3 4485.0
4 4485.0
5 4485.0
6 4485.0
而不是错误:
levels_df[(levels_df.Position==8) & (levels_df.Side==1)]
Traceback (most recent call last):
File "/tmp/ipykernel_424060/105426524.py", line 1, in <module>
levels_df[(levels_df.Position==8) & (levels_df.Side==1)]
File "/home/chris/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py", line 5575, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'Position'
我不知道你为什么这样做。我个人会考虑 pandas 多索引
但这应该有效。
levels_df = levels_df.T
filtered_df = levels_df[(levels_df['Position'] == 8) & (levels_df['Side'] == 1)]
desired_form = filtered_df.T