TypeError:-:'int'和'StandardScaler不支持的操作数类型>>

问题描述 投票:0回答:1

大家好,谢谢您的光临,当我尝试获取预测表训练模型时遇到问题,我尝试了很多代码来解决此问题,基本上,我的数据包含很多类型,这是sample of my data,我想使用StandardScaler我的数据。 。这里是原始代码...

# Load the training set
df = pd.read_csv('training_data/R_training_set.csv', 
low_memory=False)
df = df.dropna(how='any',axis=0)
df.shape

ohe_fields=['one_way','surface_type','street_type','hour','weekday','month']

# One-Hot encode a couple of variables
df_ohe = pd.get_dummies(df,columns=ohe_fields)

# Get the one-hot variable names
ohe_feature_names = pd.get_dummies(df[ohe_fields],columns=ohe_fields).columns.tolist()
df_ohe.head()

这是我的数据看起来像

enter image description here

 # Sinuosity is typically close to 1, even for moderately curvy roads. A high sinuosity means a longer road.
feature_transforms = {
    'sinuosity': np.log
}
for feature,transform in feature_transforms.items():
    df_ohe[feature] = transform(df_ohe[feature])

# Continuously valued features
float_feature_names = [
    'accident_counts',
    'speed_limit',
    'aadt',
    'surface_width',
    'sinuosity',
    'euclidean_length',
    'segment_length',
    'road_orient_approx',
    'Rain',
    'dust',
    'temperature',
    'visibility',
    'wind_speed',
    'proximity_to_billboard',
    'proximity_to_major_road',
    'proximity_to_signal',
    'proximity_to_nearest_intersection',
    'proximity_to_nearest_exit',
    'population_density',
    'Hopspot'
]
float_features = df_ohe.xs(float_feature_names,axis=1).values

# Use scikit-learn's StandardScaler
scaler = StandardScaler()
float_scaled = scaler.fit_transform(float_features)
#print (float_features.mean(axis=0))

df_ohe[float_feature_names] = float_scaled
with open('scalers_4.pkl','wb') as fp:
    pickle.dump(scaler,fp)


y = df['target'].values
binary_feature_names = [
    'dew_point_temperature',
    'EXTREME_AIR_TEMPERATURE',
    'at_exit',
    'at_intersection',
]
df_ohe = df_ohe.xs(float_feature_names+binary_feature_names+ohe_feature_names,axis=1)


X = df_ohe.values
y = df['target'].values
feature_names = df_ohe.columns.tolist()


wrangler = {
    'float_scaler_mean': scaler,
    'float_scaler_std': float_scaled,

    'float_feature_names': float_feature_names,
   ## 'drop_columns': drop_columns,
    'ohe_fields': ohe_fields,
    'feature_names': feature_names,
    'feature_transforms': feature_transforms 
}
with open('wrangler_2.pkl','wb') as fp:
    pickle.dump(wrangler,fp)

P.S,我使用xgboost算法训练数据

with open('wrangler_2.pkl','rb') as fp:
    wrangler = pickle.load(fp)
float_scaler_mean = wrangler['float_scaler_mean']
float_scaler_std = wrangler['float_scaler_std']
float_feature_names = wrangler['float_feature_names']
ohe_fields = wrangler['ohe_fields']
feature_names = wrangler['feature_names']
booster = xgboost.Booster()
booster.load_model('new_0003.model')


def make_test_set(df,wrangler):
    float_scaler_mean = wrangler['float_scaler_mean']
    float_scaler_std = wrangler['float_scaler_std']
    float_feature_names = wrangler['float_feature_names']
    ohe_fields = wrangler['ohe_fields']
    feature_names = wrangler['feature_names']
    print(len(feature_names))
    df_ohe = pd.get_dummies(df,columns=ohe_fields)

    float_features = df.xs(float_feature_names,axis=1).values
    float_features = (float_features - float_scaler_mean) / float_scaler_std
    for i,fname in enumerate(float_feature_names):
        df_ohe[fname] = float_features[:,i]

    empty_features = list(set(feature_names) - set(df_ohe.columns.tolist()))

    #for f in empty_features:
    #df_ohe[f] = 0

    df_ohe = df_ohe[feature_names]#.drop(columns=['SegID.1','geometry','SHAPE']+drop_columns,errors='ignore')

    print(df_ohe.columns)
    print(df_ohe.columns.tolist())
    X = df_ohe.values
    feature_names = df_ohe.columns.tolist()
    return X, feature_names

X,names = make_test_set(test_df,wrangler)
print (X.shape)
print (X[0])

我收到错误

82
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-59-a9fdb4a2e66f> in <module>
----> 1 X,names = make_test_set(test_df,wrangler)
      2 print (X.shape)
      3 print (X[0])

<ipython-input-56-18c1867fc096> in make_test_set(df, wrangler)
      9 
     10     float_features = df.xs(float_feature_names,axis=1).values
---> 11     float_features = (float_features - float_scaler_mean) / float_scaler_std
     12     for i,fname in enumerate(float_feature_names):
     13         df_ohe[fname] = float_features[:,i]

TypeError: unsupported operand type(s) for -: 'int' and 'StandardScaler'

我做错什么了吗?用正确的方法引导我,谢谢...

您好,谢谢您的光临,我在尝试获取预测形式训练模型时遇到问题,我尝试了很多代码来解决此问题,基本上,我的数据包含很多类型...

python machine-learning scikit-learn xgboost
1个回答
0
投票

创建了wrangler变量后,您已传递了scaler值,该值是StandardScalar对象。尝试假设您在定义wrangler时想要做什么,我可以想到:

wrangler = {
    'float_scaler_mean': float_features.mean(axis=0),
    'float_scaler_std': float_scaled,
    'float_feature_names': float_feature_names,
    'ohe_fields': ohe_fields,
    'feature_names': feature_names,
    'feature_transforms': feature_transforms 
}
© www.soinside.com 2019 - 2024. All rights reserved.