R语言进行Twitter数据可视化( 二 ) 概述对于这个项目

# 2019-05-28的推特ggplot(data.hour.date1)+geom_bar(aes(x = Hour,y = Total.Tweets,fill = I('red')),stat = 'identity',alpha = 0.75,show.legend = FALSE)+geom_hline(yintercept = mean(data.hour.date1$Total.Tweets),col = I('black'),size = 1)+geom_text(aes(fontface = 'italic',label = paste('Average:',ceiling(mean(data.hour.date1$Total.Tweets)),'Tweets per hour'),x = 6.5,y = mean(data.hour.date1$Total.Tweets)+5),hjust = 'left',size = 4)+labs(title = 'Total Tweets per Hours - Prabowo Subianto',subtitle = '28 May 2019',caption = 'Twitter Crawling 28 - 29 May 2019')+xlab('Time of Day')+ylab('Total Tweets')+ylim(c(0,100))+theme_bw()+scale_fill_brewer(palette = 'Dark2')# 2019-05-29的推特ggplot(data.hour.date2)+geom_bar(aes(x = Hour,y = Total.Tweets,fill = I('red')),stat = 'identity',alpha = 0.75,show.legend = FALSE)+geom_hline(yintercept = mean(data.hour.date2$Total.Tweets),col = I('black'),size = 1)+geom_text(aes(fontface = 'italic',label = paste('Average:',ceiling(mean(data.hour.date2$Total.Tweets)),'Tweets per hour'),x = 1,y = mean(data.hour.date2$Total.Tweets)+6),hjust = 'left',size = 4)+labs(title = 'Total Tweets per Hours - Prabowo Subianto',subtitle = '29 May 2019',caption = 'Twitter Crawling 28 - 29 May 2019')+xlab('Time of Day')+ylab('Total Tweets')+ylim(c(0,100))+theme_bw()+scale_fill_brewer(palette = 'Dark2')
文章插图
根据图2 ，我们得到了使用关键字“Joko Widodo”和“Prabowo Subianto”的用户之间的显著差异。关键词为“Joko Widodo”的tweet在某个特定时间（07:00–09:00 WIB）谈论Joko Widodo往往非常激烈， 08:00 WIB的tweet数量最多。它有348条推文。然而，在2019年5月28日至29日期间，关键词为“Prabowo Subianto”的推文往往会不断地谈论Prabowo Subianto 。 2019年5月28日至29日，每小时上传关键词为“Prabowo Subianto”的推文平均为36条。
# JOKO WIDODOdf.score.1 = subset(senti.jokowi,class == c('Negative','Positive'))colnames(df.score.1) = c('Score','Text','Sentiment')# Data vizggplot(df.score.1)+geom_density(aes(x = Score,fill = Sentiment),alpha = 0.75)+xlim(c(-11,11))+labs(title = 'Density Plot of Sentiment Scores',subtitle = 'Joko Widodo',caption = 'Twitter Crawling 28 - 29 May 2019')+xlab('Score')+ylab('Density')+theme_bw()+scale_fill_brewer(palette = 'Dark2')+theme(legend.position = 'bottom',legend.title = element_blank())# PRABOWO SUBIANTOdf.score.2 = subset(senti.prabowo,class == c('Negative','Positive'))colnames(df.score.2) = c('Score','Text','Sentiment')ggplot(df.score.2)+geom_density(aes(x = Score,fill = Sentiment),alpha = 0.75)+xlim(c(-11,11))+labs(title = 'Density Plot of Sentiment Scores',subtitle = 'Prabowo Subianto',caption = 'Twitter Crawling 28 - 29 May 2019')+xlab('Density')+ylab('Score')+theme_bw()+scale_fill_brewer(palette = 'Dark2')+theme(legend.position = 'bottom',legend.title = element_blank())
文章插图
图3是2019年5月28日至29日以“Joko Widodo”和“Prabowo Subianto”为关键词的多条推文的条形图。由图3(左)可以得出， Twitter用户在19:00-23:59 WIB上谈论Prabowo Subianto的频率较低。这是由于印尼人的休息时间造成的。然而，这些带有主题的推文总是在午夜更新，因为有的用户居住在国外，有的用户仍然活跃。然后，用户在04:00 WIB开始活动，在07:00 WIB达到高峰，然后下降，直到12:00 WIB再次上升。
# JOKO WIDODOdf.senti.score.1 = data.frame(table(senti.jokowi$score))colnames(df.senti.score.1) = c('Score','Freq')# 数据预处理df.senti.score.1$Score = as.character(df.senti.score.1$Score)df.senti.score.1$Score = as.numeric(df.senti.score.1$Score)Score1 = df.senti.score.1$Scoresign(df.senti.score.1[1,1])for (i in 1:nrow(df.senti.score.1)) {sign.row = sign(df.senti.score.1[i,'Score'])for (j in 1:ncol(df.senti.score.1)) {df.senti.score.1[i,j] = df.senti.score.1[i,j] * sign.row}}df.senti.score.1$Label = c(letters[1:nrow(df.senti.score.1)])df.senti.score.1$Sentiment = ifelse(df.senti.score.1$Freq < 0,'Negative','Positive')df.senti.score.1$Score1 = Score1# 数据可视化ggplot(df.senti.score.1)+geom_bar(aes(x = Label,y = Freq,fill = Sentiment),stat = 'identity',show.legend = FALSE)+# 积极情感geom_hline(yintercept = mean(abs(df.senti.score.1[which(df.senti.score.1$Sentiment == 'Positive'),'Freq'])),col = I('black'),size = 1)+geom_text(aes(fontface = 'italic',label = paste('Average Freq:',ceiling(mean(abs(df.senti.score.1[which(df.senti.score.1$Sentiment == 'Positive'),'Freq'])))),x = 10,y = mean(abs(df.senti.score.1[which(df.senti.score.1$Sentiment == 'Positive'),'Freq']))+30),hjust = 'right',size = 4)+# 消极情感geom_hline(yintercept = mean(df.senti.score.1[which(df.senti.score.1$Sentiment == 'Negative'),'Freq']),col = I('black'),size = 1)+geom_text(aes(fontface = 'italic',label = paste('Average Freq:',ceiling(mean(abs(df.senti.score.1[which(df.senti.score.1$Sentiment == 'Negative'),'Freq'])))),x = 5,y = mean(df.senti.score.1[which(df.senti.score.1$Sentiment == 'Negative'),'Freq'])-15),hjust = 'left',size = 4)+labs(title = 'Barplot of Sentiments',subtitle = 'Joko Widodo',caption = 'Twitter Crawling 28 - 29 May 2019')+xlab('Score')+scale_x_discrete(limits = df.senti.score.1$Label,labels = df.senti.score.1$Score1)+theme_bw()+scale_fill_brewer(palette = 'Dark2')# PRABOWO SUBIANTOdf.senti.score.2 = data.frame(table(senti.prabowo$score))colnames(df.senti.score.2) = c('Score','Freq')# 数据预处理df.senti.score.2$Score = as.character(df.senti.score.2$Score)df.senti.score.2$Score = as.numeric(df.senti.score.2$Score)Score2 = df.senti.score.2$Scoresign(df.senti.score.2[1,1])for (i in 1:nrow(df.senti.score.2)) {sign.row = sign(df.senti.score.2[i,'Score'])for (j in 1:ncol(df.senti.score.2)) {df.senti.score.2[i,j] = df.senti.score.2[i,j] * sign.row}}df.senti.score.2$Label = c(letters[1:nrow(df.senti.score.2)])df.senti.score.2$Sentiment = ifelse(df.senti.score.2$Freq


上一页
1
2
3
4
下一页
		  	


















设计语言|全新家族设计，三星Galaxy A32渲染图曝光 

信服|深信服何朝曦：安全不能只面向静态风险进行建设，应该从＂面向风险＂转向＂面向能力＂ 

Twitter|Twitter的Audio Spaces测试包括转录、扬声器控制和报告功能 

要来|折叠屏iPhone要来了？传已送至富士康进行测试 

目标和意义进行|关于智能的新思考：我们为什么探索性提出智能三定律？ 

曝光|微软新专利曝光 可以与会者面部表情和肢体语言为会议打分 

学习C语言的软件，就突然被我绿了？ 

Linux 之父对 C++ 进行了炮轰，C++不值得推荐？ 

关系数据源|业务人员可以进行自助ETL操作？这款BI工具你值得拥有 

LeetCode第1 题：两数之和 Go语言精解