我有以下python
代码:
import pandas as pd
import numpy as np
import openpyxl
wb = openpyxl.load_workbook('MOA_Assignment.xlsx')
PD = pd.DataFrame(wb['Purchase Exit Survey Data'].values)
#drop unneeded rows and columns
PD=PD.iloc[6:,1:]
#sum columns into the 58th row
PD[58]=PD.sum()
为什么DataFrame.sum()
会添加奇怪的数字?我使用DataFrame.astype('int')
来确保数据框不是一个对象,但它仍然给我奇怪的结果。正确的结果应该是6, 1, 2, 1 ,15
的顺序。 CNBC单独至少应该有3.0以上的结果,因为显示的五个数字加起来超过3.请帮忙,谢谢。
PD.head()。to_dict()
{1: {6: 'aapka_colors',
7: 'baby_first',
8: 'bloomberg',
9: 'cbs_sports',
10: 'cnbc'},
2: {6: None, 7: None, 8: None, 9: None, 10: None},
3: {6: None, 7: None, 8: None, 9: 1, 10: 1},
4: {6: None, 7: None, 8: None, 9: None, 10: 1},
5: {6: None, 7: None, 8: None, 9: None, 10: None},
6: {6: None, 7: None, 8: None, 9: None, 10: None},
7: {6: None, 7: None, 8: None, 9: None, 10: None},
8: {6: None, 7: None, 8: None, 9: None, 10: 2},
9: {6: None, 7: None, 8: None, 9: None, 10: None},
10: {6: 1, 7: None, 8: 1, 9: None, 10: None},
11: {6: None, 7: None, 8: None, 9: None, 10: None},
12: {6: None, 7: None, 8: None, 9: None, 10: None},
13: {6: None, 7: None, 8: None, 9: None, 10: None},
14: {6: None, 7: None, 8: None, 9: None, 10: None},
15: {6: None, 7: None, 8: None, 9: None, 10: None},
16: {6: None, 7: None, 8: None, 9: None, 10: None},
17: {6: None, 7: None, 8: None, 9: None, 10: None},
18: {6: None, 7: None, 8: None, 9: None, 10: None},
19: {6: None, 7: None, 8: None, 9: None, 10: None},
20: {6: None, 7: None, 8: None, 9: None, 10: None},
21: {6: None, 7: None, 8: None, 9: None, 10: None},
22: {6: None, 7: None, 8: None, 9: None, 10: None},
23: {6: None, 7: 1, 8: None, 9: None, 10: None},
24: {6: None, 7: None, 8: None, 9: None, 10: None},
25: {6: None, 7: None, 8: None, 9: None, 10: 1},
26: {6: None, 7: None, 8: None, 9: None, 10: None},
27: {6: None, 7: None, 8: None, 9: None, 10: None},
28: {6: None, 7: None, 8: None, 9: None, 10: None},
29: {6: 1, 7: None, 8: None, 9: None, 10: 1},
30: {6: 1, 7: None, 8: None, 9: None, 10: None},
31: {6: None, 7: None, 8: None, 9: None, 10: 1},
32: {6: None, 7: None, 8: None, 9: None, 10: None},
33: {6: None, 7: None, 8: None, 9: None, 10: None},
34: {6: None, 7: None, 8: None, 9: None, 10: None},
35: {6: None, 7: None, 8: None, 9: None, 10: None},
36: {6: None, 7: None, 8: None, 9: None, 10: 1},
37: {6: None, 7: None, 8: None, 9: None, 10: None},
38: {6: None, 7: None, 8: 1, 9: None, 10: 1},
39: {6: None, 7: None, 8: None, 9: None, 10: None},
40: {6: None, 7: None, 8: None, 9: None, 10: None},
41: {6: None, 7: None, 8: None, 9: None, 10: None},
42: {6: None, 7: None, 8: None, 9: None, 10: None},
43: {6: 1, 7: None, 8: None, 9: None, 10: 1},
44: {6: None, 7: None, 8: None, 9: None, 10: None},
45: {6: None, 7: None, 8: None, 9: None, 10: None},
46: {6: None, 7: None, 8: None, 9: None, 10: None},
47: {6: None, 7: None, 8: None, 9: None, 10: None},
48: {6: None, 7: None, 8: None, 9: None, 10: None},
49: {6: None, 7: None, 8: None, 9: None, 10: 1},
50: {6: 2, 7: None, 8: None, 9: None, 10: None},
51: {6: None, 7: None, 8: None, 9: None, 10: None},
52: {6: None, 7: None, 8: None, 9: None, 10: None},
53: {6: None, 7: None, 8: None, 9: None, 10: None},
54: {6: None, 7: None, 8: None, 9: None, 10: None},
55: {6: None, 7: None, 8: None, 9: None, 10: None},
56: {6: None, 7: None, 8: None, 9: None, 10: 1},
57: {6: None, 7: None, 8: None, 9: None, 10: 3},
58: {6: 7, 7: 7, 8: 7, 9: 1, 10: 3}}
更改
PD[58]=PD.sum()
至
PD[58]=PD.sum(1)
# P[58] = PD.sum(axis=1) Or this either option does the same
您目前正在对列进行求和,您想要对行进行求和
我无法复制你的问题。
从您粘贴的字典中,我看到了预期的结果:
res = df.sum(axis=1).to_dict()
# {6: 13.0, 7: 8.0, 8: 9.0, 9: 2.0, 10: 18.0}
df = df.fillna(0.0).loc[:, df.sum(axis=0) != 0]
# 1 3 4 8 10 23 25 29 30 31 36 38 43 \
# 6 aapka_colors 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 1.0
# 7 baby_first 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
# 8 bloomberg 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
# 9 cbs_sports 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
# 10 cnbc 1.0 1.0 2.0 0.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0
# 49 50 56 57 58
# 6 0.0 2.0 0.0 0.0 7
# 7 0.0 0.0 0.0 0.0 7
# 8 0.0 0.0 0.0 0.0 7
# 9 0.0 0.0 0.0 0.0 1
# 10 1.0 0.0 1.0 3.0 3