Social Network Analysis using R - Twitter Data Analysis and Sentiment Analysis for a Network of Students Kindly note this is a preliminary post a sort of proof of concept with limited data and code . Given Twitter network data for 200 + nodes or participants . We extract sentiments from their Tweets. As seen from the Histograms of the Sentiment
analysis of the Twitter Data , sentiment
is positive amongst the students of 4th Year as compared
to students of the 2nd Year. Also as seen from the Histograms of the Sentiment analysis of the Twitter data Sentiment is much more positive in the Technology Stream than in the Business stream of study. ## ------------------------------------------- library(igraph) library(statnet) #statnet::update_statnet() ## Loading required package:
network ## Network attributes: ## vertices = 205 ##
vertices = 205 ## 4.2.3/
StudentID: library(parallel) ## As seen in the Histograms above the Sentiment is much more positive amongst the students of == 4th Year as compared to students of the 2nd Year.
#Histograms of Tweet Scores
for Business and Technology Students respectively ## ------------------------------------------- # Source Code Courtesy -- Jeff Leek (jtleek@gmail.com) # source(“http://biostat.jhsph.edu/~jleek/code/twitterMap.R”) # Main - https://github.com/JulianHill/R-Tutorials # Also See - Seems Doesnt Work on WINDOWS - https://github.com/vdimarco/twitterMap/blob/master/twitter.R install.packages("devtools") devtools::install_github("geoffjentry/twitteR") # # To be used -- install.packages("base64enc") # devtools::install_version("httr", version="0.6.0", repos="http://cran.us.r-project.org") library("RColorBrewer", lib.loc="~/R/win-library/3.1") library("geosphere", lib.loc="~/R/win-library/3.1") library("maps", lib.loc="~/R/win-library/3.1") # Rohit Comment - Seen below packages from other Twitter Example library("XML", lib.loc="~/R/win-library/3.1") library("twitteR", lib.loc="~/R/win-library/3.1") library("Rcpp", lib.loc="~/R/win-library/3.1") library("rjson", lib.loc="~/R/win-library/3.1") library("xml2", lib.loc="~/R/win-library/3.1") library("bit64", lib.loc="~/R/win-library/3.1") library("httr", lib.loc="~/R/win-library/3.1") library("RCurl", lib.loc="~/R/win-library/3.1") library("Rcpp", lib.loc="~/R/win-library/3.1") library("ROAuth", lib.loc="~/R/win-library/3.1") library("stringr", lib.loc="~/R/win-library/3.1") library("plyr", lib.loc="~/R/win-library/3.1") library("digest", lib.loc="~/R/win-library/3.1") download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem") api_key <- "XcRuw#################umUG" api_secret <- "IScXQXnz#############oGabr" access_token <- "73090441-###############B3eI7kjBmq4Y" access_token_secret <- "za5MLQMPAA333333333333333333333vT" setup_twitter_oauth(api_key,api_secret,access_token,access_token_secret) ## Rohit Comment - this oauth needs to be run for every session also after some break ...time not sure RDT<-searchTwitter("DhankarRohit") RDT # # searchTwitter("#rstats") # # # d.df=twListToDf(MLB.list) ### Rohit Comment ---- MLB.list is Doubtful # # ??twListToDf # # zz <- searchTwitter("#rstats") # twListToDF(zz) # -- Jeff Leek code - twitterMap("DhankarRohit") - twitterMap("DhankarRohit",userLocation="Gurgaon",plotType="both") twitterMap <- function(DhankarRohit,userLocation=NULL,fileName="twitterMap.pdf",nMax = 10,plotType=c("followers","both","following")){
# Get location data cat("Getting data from Twitter, this may take a moment.\n") tmp = getUser("DhankarRohit") if(is.null(userLocation)){ userLocation = location(tmp) userLocation = trim(userLocation) if(nchar(userLocation) < 2){stop("We can not find your location from Twitter")} }
# Not sure if USER NAMe is correct in this format == "DhankarRohit" # Or it should be == DhankarRohit , without any PUNCTUATION . followers=tmp$getFollowers(n=nMax) followersLocation = sapply(followers,function(x){location(x)}) # nMax number of Followers following = tmp$getFriends(n=nMax) followingLocation = sapply(following,function(x){location(x)}) # nMax number of Following
# Load the geographic data - these all load as Class - Promise but the moment # we see - str for these it turns into a DataFrame . As seen below --
data(world.cities) # str(world.cities) # How to find - population of Delhi ...in Data Frame pop_del<- # Have commented the Cities - data for US and Canada below - this # Was retained as such by J Leek. Jeff Leek (jtleek@gmail.com)
# data(us.cities) # data(canada.cities)
# Find the latitude and longitude of the user - USER being SELF cat("Getting geographic (latitude/longitude) of Twitter users.\n") userLL <- findLatLon(userLocation)$latlon if(any(is.na(userLL))){stop("We can't find the latitude and longitude of your location from Twitter")}
# Find the latitude and longitude of each of the followers/following # and calcualte the distance to the user
followersLL = matrix(NA,nrow=length(followers),ncol=4) followingLL = matrix(NA,nrow=length(following),ncol=4)
## Rohit Comment - for loop to run from - 1 to the Length of the Followers # Note while checking brackets for the FUNCTIONS like a FOR LOOP # the corresponding - Curly Bracket shall highlight properly when cursor placed # on the right side of the Curly like so === {|
for(i in 1:length(followers)){ if(length(followersLocation[[i]]) > 0){ tmpLL = findLatLon(trim(followersLocation[[i]])) if(any(!is.na(tmpLL$latlon))){ followersLL[i,] = c(unlist(tmpLL$latlon),distCosine(userLL,tmpLL$latlon),unlist(tmpLL$cont)) } } }
for(i in 1:length(following)){ if(length(followingLocation[[i]]) > 0){ tmpLL = findLatLon(trim(followingLocation[[i]])) if(any(!is.na(tmpLL$latlon))){ followingLL[i,] = c(unlist(tmpLL$latlon),distCosine(userLL,tmpLL$latlon),unlist(tmpLL$cont)) } } }
## Rohit Comment - reordering of the followers - Not sure why ?
followingLL = followingLL[order(-followingLL[,3]),] followersLL = followersLL[order(-followersLL[,3]),]
followingLL = followingLL[!is.na(followingLL[,1]),] followersLL = followersLL[!is.na(followersLL[,1]),]
cat("Plotting results.\n") # Set up the colors cols = brewer.pal(7,"Set2")
# Both followers and following - here == fileName is the same given in the first # line/command of the code if(plotType=="both"){ pdf(fileName,height=12,width=10) data(worldMapEnv) par(mfrow=c(2,1),mar=rep(0,4)) map('world',col="#191919",bg="black",fill=T,mar=rep(0,4),border=0)
mtext(paste("@","DhankarRohit"," Follower Map",sep=""),col="lightgrey") nFollowers = dim(followersLL)[1] for(i in 1:nFollowers){ greatC = getGreatCircle(userLL,followersLL[i,1:2]) lines(greatC,col=cols[followersLL[i,4]],lwd=0.8) }
legend(-180,0,legend = c(paste("Asia",sum(followingLL[,4]==1)),paste("Africa",sum(followingLL[,4]==2)),paste("N. America",sum(followingLL[,4]==3)),paste("S. America",sum(followingLL[,4]==4)),paste("Australia/N.Z.",sum(followingLL[,4]==5)),paste("Europe",sum(followingLL[,4]==6))),text.col=cols[1:6],bg="black",cex=0.75)
mtext("Created by @simplystats twitterMap",side=1,adj=1,cex=0.8,col="grey") dev.off() } ## Just followers if(plotType=="followers"){ pdf(fileName,height=6,width=10) data(worldMapEnv) map('world',col="#191919",bg="black",fill=T,mar=rep(0,4),border=0)
mtext(paste("@",userName," Follower Map",sep=""),col="lightgrey") nFollowers = dim(followersLL)[1] for(i in 1:nFollowers){ greatC = getGreatCircle(userLL,followersLL[i,1:2]) lines(greatC,col=cols[followersLL[i,4]],lwd=0.8) }
legend(-180,0,legend = c(paste("Asia",sum(followersLL[,4]==1)),paste("Africa",sum(followersLL[,4]==2)),paste("N. America",sum(followersLL[,4]==3)),paste("S. America",sum(followersLL[,4]==4)),paste("Australia/N.Z.",sum(followersLL[,4]==5)),paste("Europe",sum(followersLL[,4]==6))),text.col=cols[1:6],bg="black",cex=0.75) mtext("Created by @simplystats twitterMap",side=1,adj=1,cex=0.8,col="grey") dev.off()
} # Experiment --- to see how to get a Population of a City from # str(world.cities) # How to find - population of Delhi ...in Data Frame pop_del<- # http://stackoverflow.com/questions/15303283/how-to-do-vlookup-and-fill-down-like-in-excel-in-r # Complete StackOverflow Page in the same Folder # The read.table below makes a Data.Frame out of a Text== entry , simple text converted into Data.Frame houses <- read.table(text="Semi 1 Single 2 Row 3 Single 2 Apartment 4 Apartment 4 Row 3",col.names=c("HouseType","HouseTypeNo")) External Links / Recommended Reads :- 1/ http://arxiv.org/abs/1312.6635 --- Topic and Sentiment Analysis on Social Networks |