如果 True 和 False 产生相同的结果,那么 dropna 参数有什么用?
import pandas as pd
import seaborn as sns
# Load the dataset
df = sns.load_dataset("penguins")
df=df.melt( id_vars=['species','island','sex'],
value_vars=['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g'],
var_name='metric',
value_name='value'
)
k1 = df.pivot_table(index='island', columns='metric', values='value',
aggfunc=['mean', 'count','size'], dropna=False,observed=False).reset_index()
k2 = df.pivot_table(index='island', columns='metric', values='value',
aggfunc=['mean', 'count','size'], dropna=True,observed=False).reset_index()
print("Pivot table with dropna=False:\n")
print(k1)
print("\n\nPivot table with dropna=True:\n")
print(k2)
输出:
Pivot table with dropna=False:
island mean \
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm
0 Biscoe 15.874850 45.257485 4716.017964 209.706587
1 Dream 18.344355 44.167742 3712.903226 193.072581
2 Torgersen 18.429412 38.950980 3706.372549 191.196078
count \
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm
0 167 167 167 167
1 124 124 124 124
2 51 51 51 51
size
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm
0 168 168 168 168
1 124 124 124 124
2 52 52 52 52
Pivot table with dropna=True:
island mean \
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm
0 Biscoe 15.874850 45.257485 4716.017964 209.706587
1 Dream 18.344355 44.167742 3712.903226 193.072581
2 Torgersen 18.429412 38.950980 3706.372549 191.196078
count \
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm
0 167 167 167 167
1 124 124 124 124
2 51 51 51 51
size
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm
0 168 168 168 168
1 124 124 124 124
2 52 52 52 52
import pandas as pd
import seaborn as sns
import numpy as np
# Load the dataset
df = sns.load_dataset("penguins")
# Reshape the dataframe
df = df.melt(id_vars=['species', 'island', 'sex'],
value_vars=['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g'],
var_name='metric',
value_name='value')
df=df.dropna(subset='value')
# Introduce some missing values
df.loc[0:3, 'island'] = np.nan
df.loc[2:4, 'metric'] = np.nan
# Create pivot tables with dropna=False and dropna=True
pivot_with_margins = df.pivot_table(index='island', columns='metric', values='value',
aggfunc=['size','count'], dropna=False)
pivot_without_margins = df.pivot_table(index='island', columns='metric', values='value',
aggfunc=['size','count'], dropna=True)
print("Pivot table with margins (dropna=False):")
print(pivot_with_margins)
print("\n\nPivot table with margins (dropna=True):")
print(pivot_without_margins)
Pivot table with margins (dropna=False):
size \
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm NaN
island
Biscoe 167.0 167.0 167.0 167.0 NaN
Dream 124.0 124.0 124.0 124.0 NaN
Torgersen 51.0 47.0 51.0 51.0 1.0
NaN NaN 2.0 NaN NaN 1.0
count
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm NaN
island
Biscoe 167.0 167.0 167.0 167.0 NaN
Dream 124.0 124.0 124.0 124.0 NaN
Torgersen 51.0 47.0 51.0 51.0 1.0
NaN NaN 2.0 NaN NaN 1.0
Pivot table with margins (dropna=True):
size \
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm
island
Biscoe 167 167 167 167
Dream 124 124 124 124
Torgersen 51 47 51 51
count
metric bill_depth_mm bill_length_mm body_mass_g flipper_length_mm
island
Biscoe 167 167 167 167
Dream 124 124 124 124
Torgersen 51 47 51 51