[我想使用R在Twitter上的推文进行情感分析。R中有任何方法。我为Analysis选择软件包tm。我安装了所需的软件包,并在推文中搜索特定主题。然后我保存数据并制作一个语料库。然后清除数据以进行分析。
mycorpus<-Corpus(VectorSource(tweets_df$text))
removeURL<-function(x) gsub("http[:alnum:]*","",x)
removeNumPunct<-function(x) gsub("[^[:alpha:][:space:]]*","",x)
removeUsername <- function(x) gsub('@[^[:space:]]*', '', x)
mycorpus<-tm_map(mycorpus,PlainTextDocument)
mycorpus <- tm_map(mycorpus, content_transformer(removeUsername))
mycorpus<-tm_map(mycorpus,content_transformer(removeURL))
mycorpus<-tm_map(mycorpus,content_transformer(tolower))
mycorpus <- tm_map(mycorpus, content_transformer(removeNumPunct))
mycorpus <- tm_map(mycorpus, removeNumbers)
mycorpus<-tm_map(mycorpus,stripWhitespace)
Stopwords <- readLines("/Users/Desktop/Stopwords.txt")
mycorpus <- tm_map(mycorpus, function(x) removeWords(x, germanStopwords))
在完成干净步骤后,我创建了一个文档术语矩阵和一个词云。到目前为止,我还没有问题或错误消息。
tweets_dtm <- DocumentTermMatrix(mycorpus)
tweets_dtm
对于情感分数,我进行以下步骤。
score.sentiment = function(Tweets, positivwords, negativwords, progress='none'){
scores = laply(Tweets,
function(Tweets, positivwords, negativwords) {
Tweets = gsub('[[:punct:]]', '', Tweets) # removes punctuation
Tweets = gsub('[[:cntrl:]]', '', Tweets) # removes controlcharacters
Tweets = gsub('\\d+', '', Tweets) # removes numbers
Tweets = gsub('https://', '', Tweets) # removes https://
Tweets = gsub('http://', '', Tweets) # removes http://
Tweets = gsub('[^[:graph:]]', '', Tweets) # removes graphic characters like emoticons
tryTolower=function(x){
y - NA
try_error=tryCatch(tolower(x),error=function(e)e)
if(inherits(try_error,"error"))
y - tolower (x)
return(y)
}
Tweets=sapply (Tweets, tryTolower)
word_list = str_split(Tweets, '\\s+') # splits the tweets by word in a list
words = unlist(word_list) # turns the list into vector
positive.matches = match(words, positivwords) ## returns matching
negative.matches = match(words, negativwords)
positive_matches = !is.na(positive.matches) ## converts matching values to true of false
negative_matches = !is.na(negative.matches)
score = sum(positive_matches) - sum(negative_matches) # true and false are
return(score)
}, positive_matches, negative_matches, .progress -.progress )
return(score)
}
直到这里我没有错误消息。现在,我将以下代码用于推文的情感评分:
score <- score.sentiment(tweets,positivwords,negativwords, .progress='text')
tweets$score=score
但是我有以下错误信息
score.sentiment(税,正词,负词,.progress =“ text”)中的错误: 未使用的参数(.progress =“ text”)
什么是此错误消息?
您为函数progress
而不是.progress
命名了参数,这就是为什么函数无法识别参数的原因。您还输了第二行的lapply
(您写了laply
)。所以这应该工作:
score.sentiment = function(Tweets, positivwords, negativwords, .progress='none'){ #changed progress to .progress
scores = lapply(Tweets, #changed laply to lapply
function(Tweets, positivwords, negativwords) {
Tweets = gsub('[[:punct:]]', '', Tweets) # removes punctuation
Tweets = gsub('[[:cntrl:]]', '', Tweets) # removes controlcharacters
Tweets = gsub('\\d+', '', Tweets) # removes numbers
Tweets = gsub('https://', '', Tweets) # removes https://
Tweets = gsub('http://', '', Tweets) # removes http://
Tweets = gsub('[^[:graph:]]', '', Tweets) # removes graphic characters like emoticons
tryTolower=function(x){
y - NA
try_error=tryCatch(tolower(x),error=function(e)e)
if(inherits(try_error,"error"))
y - tolower (x)
return(y)
}
Tweets=sapply (Tweets, tryTolower)
word_list = str_split(Tweets, '\\s+') # splits the tweets by word in a list
words = unlist(word_list) # turns the list into vector
positive.matches = match(words, positivwords) ## returns matching
negative.matches = match(words, negativwords)
positive_matches = !is.na(positive.matches) ## converts matching values to true of false
negative_matches = !is.na(negative.matches)
score = sum(positive_matches) - sum(negative_matches) # true and false are
return(score)
}, positive_matches, negative_matches, .progress -.progress )
return(score)
}