我尝试在免费访问here中测试用python 2编写的代码。当我运行代码时遇到以下与list
相关的错误
非常感谢帮助 简(我是python的新手)
Traceback
`(C:\Users\myusername\Anaconda3) C:\Users\myusername\Documents\DSSP7\Projet
Insurance\kaggle-prudential-master\code>python.exe logRegression.py
Traceback (most recent call last):
File "logRegression.py", line 98, in <module>
y_1, y_2 = train_ohd.iloc[X_1]['Response'], train_ohd.iloc[X_2]['Response']
File "C:\Users\myusername\Anaconda3\lib\site-
packages\pandas\core\indexing.py", line 1328, in __getitem__
return self._getitem_axis(key, axis=0)
File "C:\Users\myusername\Anaconda3\lib\site-
packages\pandas\core\indexing.py", line 1738, in _getitem_axis
return self._get_list_axis(key, axis=axis)
File "C:\Users\myusername\Anaconda3\lib\site-
packages\pandas\core\indexing.py", line 1715, in _get_list_axis
return self.obj.take(key, axis=axis, convert=False)
File "C:\Users\myusername\Anaconda3\lib\site-
packages\pandas\core\generic.py", line 1928, in take
convert=True, verify=True)
File "C:\Users\myusername
\Anaconda3\lib\site-packages\pandas\core\internals.py", line 3998, in take
else np.asanyarray(indexer, dtype='int64'))
File "C:\Users\myusername
Anaconda3\lib\site-packages\numpy\core\numeric.py", line 583, in asanyarray
return array(a, dtype, copy=False, order=order, subok=True)
TypeError: int() argument must be a string, a bytes-like object or a number,
not 'filter'
log regression.朋友
'import pandas as pd
from sklearn.linear_model import LogisticRegression
import json
from sklearn import metrics
from label_decoders import *
config = json.load(open('settings.json'))
train = pd.read_csv(config['train'])
test = pd.read_csv(config['test'])
# combine train and test
all_data = train.append(test)
# Preprocess data
# create any new variables
all_data['Product_Info_2_char'] = all_data.Product_Info_2.str[0]
all_data['Product_Info_2_num'] = all_data.Product_Info_2.str[1]
# factorize categorial variables
all_data['Product_Info_2'] = pd.factorize(all_data['Product_Info_2'])[0]
all_data['Product_Info_2_char'] = pd.factorize(all_data['Product_Info_2_char'])[0]
all_data['Product_Info_2_num'] = pd.factorize(all_data['Product_Info_2_num'])[0]
## combine features
# BMI by age
all_data['BMI_Age'] = all_data['BMI'] * all_data['Ins_Age']
## sum features
# Med keyword sum
med_keyword_columns = all_data.columns[all_data.columns.str.startswith('Medical_Keyword_')]
all_data['Med_Keywords_Count'] = all_data[med_keyword_columns].sum(axis=1)
# handle missing values : eliminate missing values
## Use -1 for NA
all_data.apply(lambda x: sum(x.isnull()),1)
all_data['countna'] = all_data.apply(lambda x: sum(x.isnull()),1)
all_data.fillna(-1, inplace=True)
#fix the dtype of the label column(convert it to integer)
all_data['Response'] = all_data['Response'].astype(int)
# split train and test
train_ohd = all_data[all_data['Response']>0].copy()
test_ohd = all_data[all_data['Response']<1].copy()
# convert data
features=train_ohd.columns.tolist()
features = [x.replace('=','_') for x in features]
features = [x.replace('_','i') for x in features]
train_ohd.columns = features
features_t=test_ohd.columns.tolist()
features_t = [x.replace('=','i') for x in features_t]
features_t = [x.replace('_','i') for x in features_t]
test_ohd.columns = features_t
features.remove("Id")
features.remove("Response")
train_ohd['lr1'] = [0]*train_ohd.shape[0]
train_ohd['lr2'] = [0]*train_ohd.shape[0]
train_ohd['lr3'] = [0]*train_ohd.shape[0]
train_ohd['lr4'] = [0]*train_ohd.shape[0]
train_ohd['lr5'] = [0]*train_ohd.shape[0]
train_ohd['lr6'] = [0]*train_ohd.shape[0]
train_ohd['lr7'] = [0]*train_ohd.shape[0]
train_ohd['lr8'] = [0]*train_ohd.shape[0]
train_ohd['lr9'] = [0]*train_ohd.shape[0]
train_ohd['lr10'] = [0]*train_ohd.shape[0]
train_ohd['lr11'] = [0]*train_ohd.shape[0]
train_ohd['lr12'] = [0]*train_ohd.shape[0]
train_ohd['lr13'] = [0]*train_ohd.shape[0]
l = train_ohd.shape[0]
ind_list = [(range(0,l//10), filter(lambda x: x not in range(0,l//10), range(0,l))),
(range(l//10,l//10*2), filter(lambda x: x not in range(l//10,l//10*2), range(0,l))),
(range(l//10*2,l//10*3), filter(lambda x: x not in range(l//10*2,l//10*3), range(0,l))),
(range(l//10*3,l//10*4), filter(lambda x: x not in range(l//10*3,l//10*4), range(0,l))),
(range(l//10*4,l//10*5), filter(lambda x: x not in range(l//10*4,l//10*5), range(0,l))),
(range(l//10*5,l//10*6), filter(lambda x: x not in range(l//10*5,l//10*6), range(0,l))),
(range(l//10*6,l//10*7), filter(lambda x: x not in range(l//10*6,l//10*7), range(0,l))),
(range(l//10*7,l//10*8), filter(lambda x: x not in range(l//10*7,l//10*8), range(0,l))),
(range(l//10*8,l//10*9), filter(lambda x: x not in range(l//10*8,l//10*9), range(0,l))),
(range(l//10*9,l), filter(lambda x: x not in range(l//10*9,l), range(0,l)))]
ld = [labels_decoder1,labels_decoder2,labels_decoder3,labels_decoder4,labels_decoder5,labels_decoder6,labels_decoder7,
labels_decoder8,labels_decoder9,labels_decoder10,labels_decoder11,labels_decoder12,labels_decoder13]
# train the model
i = 0
for l in ld:
i = i + 1
for j in range(10):
X_1, X_2 = ind_list[j][1], ind_list[j][0]
y_1, y_2 = train_ohd.iloc[X_1]['Response'], train_ohd.iloc[X_2]['Response']
# get preds based on train data
lr = LogisticRegression(random_state=1)
lr.fit(train_ohd[features].iloc[X_1],l(y_1))
train_ohd['lr%s' % (i)].iloc[X_2] = lr.predict_proba(train_ohd[features].iloc[X_2]).T[1]
train_ohd.to_csv(config['train_lr'],index=0)
y = train_ohd['Response']
#print(y) mon rajout pour afficher en local
# test the model
i = 0
for l in ld:
i = i + 1
# Pas de y dans le test data
# get preds based on test data
###1
lr = LogisticRegression(random_state=1)
lr.fit(train_ohd[features],l(y)), i
test_ohd['lr%s' % (i)] = lr.predict_proba(test_ohd[features]).T[1]
test_ohd.to_csv(config['test_lr'],index=0)
#y_pred = test_ohd A SPEFICIER ce que l'on doit afficher cf kaggle ET A PRINT
#y_pred = test_ohd.to_csv(config['test_lr'],index=0)
`
这里的问题源于filter
方法的不同行为。您可以在this question中看到它,或者在python3或python2文档中阅读它。
简而言之:在python 2中它生成一个列表,所以ind_list[:][1]
都是整数列表。
然而,在python 3中它生成一个generator
,这就是为什么你得到输出TypeError: int() argument must be a string, a bytes-like object or a number, not 'filter'
,因为ind_list[:][1]
都包含过滤器对象。
您可以在创建ind_list
时将过滤器命令的所有输出转换为列表:
list(filter(lambda x: x not in range(0,l//10))
或使用python2。但我猜你有一个特定的原因让你使用python3
暗示:
既然你正在使用anaconda,你可以做到
conda create -n py27 python=2.7 sklearn pandas numpy
其次是
activate py27
在anaconda提示符下,它将为您提供一个使用python2的虚拟环境