import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
import nltk
df = pd.read_csv('/kaggle/input/starbucks-review-play-store/starbucksEN.csv',sep=",", encoding="utf-8")
df = df.head(70000)
ax = df\['Score'\].value_counts().sort_index() \
.plot(kind='bar',
title='Count of Reviews by Stars',
figsize=(10, 5))
ax.set_xlabel('Review Stars')
plt.show()
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm
sia = SentimentIntensityAnalyzer()
res = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
text = row\['Text'\]
myid = row\['Id'\]
res\[myid\] = sia.polarity_scores(text)
AttributeError Traceback (most recent call last)
/tmp/ipykernel_27/744249663.py in \<module\>
3 text = row\['Text'\]
4 myid = row\['Id'\]
\----\> 5 res\[myid\] = sia.polarity_scores(text)
/opt/conda/lib/python3.7/site-packages/nltk/sentiment/vader.py in polarity_scores(self, text)
360 # text, words_and_emoticons, is_cap_diff = self.preprocess(text)
361 sentitext = SentiText(
\--\> 362 text, self.constants.PUNC_LIST, self.constants.REGEX_REMOVE_PUNCTUATION
363 )
364 sentiments = \[\]
/opt/conda/lib/python3.7/site-packages/nltk/sentiment/vader.py in __init__(self, text, punc_list, regex_remove_punctuation)
268 def __init__(self, text, punc_list, regex_remove_punctuation):
269 if not isinstance(text, str):
\--\> 270 text = str(text.encode("utf-8"))
271 self.text = text
272 self.PUNC_LIST = punc_list
AttributeError: 'float' object has no attribute 'encode'
我正在尝试通过在 YouTube 上观看这段视频来进行情绪分析。这是他的 Kaggle 笔记本的链接 https://www.kaggle.com/code/robikscube/sentiment-analysis-python-youtube-tutorial/notebook 我不知道出了什么问题。我该如何解决这个问题?
尝试将“文本”转换为字符串。
res\[myid\] = sia.polarity_scores(str(text))