我是Machine Learning和python的新手。最近我一直在使用来自kaggle及其代码的亚马逊美食评论数据。我不明白的是这里使用的'partiton'方法是怎样的?而且,最后3行代码究竟做了什么?
%matplotlib inline
import sqlite3
import pandas as pd
import numpy as np
import nltk
import string
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import roc_curve, auc
from nltk.stem.porter import PorterStemmer
# using the SQLite Table to read data.
con = sqlite3.connect('./amazon-fine-food-reviews/database.sqlite')
#filtering only positive and negative reviews i.e.
# not taking into consideration those reviews with Score=3
filtered_data = pd.read_sql_query("""
SELECT *
FROM Reviews
WHERE Score != 3
""", con)
# Give reviews with Score>3 a positive rating, and reviews with a
score<3 a negative rating.
def partition(x):
if x < 3:
return 'negative'
return 'positive'
#changing reviews with score less than 3 to be positive vice-versa
actualScore = filtered_data['Score']
positiveNegative = actualScore.map(partition)
filtered_data['Score'] = positiveNegative
使用来自filtered_data的Score得分创建一个名为actualScore的数组
actualScore = filtered_data['Score']
为值<3创建数组positiveNegative编码为负,为> 3创建正数
positiveNegative = actualScore.map(partition)
用新编码值覆盖旧列分数
filtered_data['Score'] = positiveNegative