Social Network Analysis using R  - Twitter Data Analysis and Sentiment Analysis for a Network of Students Kindly note this is a preliminary post a sort of proof of concept with limited data and code . Given Twitter network data for 200 + nodes or participants . We extract sentiments from their Tweets. As seen from the Histograms of the Sentiment analysis of the Twitter Data , sentiment is positive amongst the students of 4th Year as compared to students of the 2nd Year. Also as seen from the Histograms of the Sentiment analysis of the Twitter data  Sentiment is much more positive in the Technology Stream than in the Business stream of study.  ## -------------------------------------------library(igraph) library(statnet) #statnet::update_statnet() ## Loading required package: network #1/ Load the dataset -- Loading the Network Class Object load("studentNetwork.RData") #2/ Make a plot of the Network -- plot(studentNetwork,main = "Student Network")## Network attributes:##   vertices = 205 ##   directed = FALSE ##   hyper = FALSE ##   loops = FALSE ##   multiple = FALSE ##   bipartite = FALSE ##  total edges = 203 ##    missing edges = 0 ##    non-missing edges = 203 ##  density = 0.009708274 ##   vertices = 205 ##   directed = FALSE ##   hyper = FALSE ##   loops = FALSE ##   multiple = FALSE ##   bipartite = FALSE ##  total edges = 203 ##    missing edges = 0 ##    non-missing edges = 203 ##  density = 0.009708274 ## ## 4.2/ Vertex attributes: accessed using %v% operator ## ##  4.2.1/ Course_of_Study: ##    character valued attribute ##    attribute summary: ##          Business         Fine_Arts      Liberal_Arts Physical_Sciences ##               109                 4                68                 6 ##        Technology ##                18 ## ##  4.2.2/ Sex: ##    character valued attribute ##    attribute summary: ##   F   M ##  99 106 ##  4.2.3/ StudentID: ##    integer valued attribute ##    205 values ## ##  4.2.4/ Tweets: ##    character valued attribute ##    attribute summary: ##    the 10 most common values are:___________truncated output _____                                                                                                          ## 4.2.5/  Year: ##    numeric valued attribute ##    attribute summary: ##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. ##   1.000   1.000   3.000   2.732   4.000   6.000 ## ## 4.3/No edge attributes : When present Edge attributes are - accessed using %e% operator library(parallel) library(foreach) #Extracting students' Tweets std.twt=studentNetwork%v%"Tweets" #Assigning Sentiment Scores to the Tweets tweet.score <- foreach(i=1:205,.combine='rbind') %dopar% { # the ".combine" is used as we want "foreach" to return a Numeric Vector   # and not a list   # %dopar% is used for Parallel processing   words<-unlist(strsplit(std.twt[i],split="\\|")) # This "|" splits the pipe delimited string into vectors of Single Words   Pos.match = match(words, unlist(Pos))   Neg.match = match(words, unlist(Neg))   Pos.match = !is.na(Pos.match) # Removing any NA from Pos.matches   # NA{base} - The generic function is.na indicates which elements are missing.   Neg.match = !is.na(Neg.match) ## As seen in the Histograms above the Sentiment is much more positive amongst the students of == 4th Year as compared to students of the 2nd Year.   #Histograms of Tweet Scores for Business and Technology Students respectively hist(tweet.score[student.CoS=="Business"], main="Sentiment - Business",      xlab="Tweet Score",col=symbol) hist(tweet.score[student.CoS=="Technology"], main="Sentiment - Technology",      xlab="Tweet Score",col=symbol)## ------------------------------------------- # Source Code Courtesy -- Jeff Leek (jtleek@gmail.com)# source(“http://biostat.jhsph.edu/~jleek/code/twitterMap.R”)# Main - https://github.com/JulianHill/R-Tutorials# Also See - Seems Doesnt Work on WINDOWS - https://github.com/vdimarco/twitterMap/blob/master/twitter.Rinstall.packages("devtools")devtools::install_github("geoffjentry/twitteR")# # To be used -- install.packages("base64enc")# devtools::install_version("httr", version="0.6.0", repos="http://cran.us.r-project.org")library("RColorBrewer", lib.loc="~/R/win-library/3.1")library("geosphere", lib.loc="~/R/win-library/3.1")library("maps", lib.loc="~/R/win-library/3.1")# Rohit Comment - Seen below packages from other Twitter Example library("XML", lib.loc="~/R/win-library/3.1")library("twitteR", lib.loc="~/R/win-library/3.1")library("Rcpp", lib.loc="~/R/win-library/3.1")library("rjson", lib.loc="~/R/win-library/3.1")library("xml2", lib.loc="~/R/win-library/3.1")library("bit64", lib.loc="~/R/win-library/3.1")library("httr", lib.loc="~/R/win-library/3.1")library("RCurl", lib.loc="~/R/win-library/3.1")library("Rcpp", lib.loc="~/R/win-library/3.1")library("ROAuth", lib.loc="~/R/win-library/3.1")library("stringr", lib.loc="~/R/win-library/3.1")library("plyr", lib.loc="~/R/win-library/3.1")library("digest", lib.loc="~/R/win-library/3.1")download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")api_key <- "XcRuw#################umUG"api_secret <- "IScXQXnz#############oGabr"access_token <- "73090441-###############B3eI7kjBmq4Y"access_token_secret <- "za5MLQMPAA333333333333333333333vT"setup_twitter_oauth(api_key,api_secret,access_token,access_token_secret)## Rohit Comment - this oauth needs to be run for every session also after some break ...time not sure RDT<-searchTwitter("DhankarRohit")RDT# # searchTwitter("#rstats")# # # d.df=twListToDf(MLB.list) ### Rohit Comment ----  MLB.list is Doubtful # # ??twListToDf# # zz <- searchTwitter("#rstats")# twListToDF(zz)# -- Jeff Leek code - twitterMap("DhankarRohit") - twitterMap("DhankarRohit",userLocation="Gurgaon",plotType="both")twitterMap <- function(DhankarRohit,userLocation=NULL,fileName="twitterMap.pdf",nMax = 10,plotType=c("followers","both","following")){    # Get location data  cat("Getting data from Twitter, this may take a moment.\n")  tmp = getUser("DhankarRohit")   if(is.null(userLocation)){    userLocation = location(tmp)    userLocation = trim(userLocation)    if(nchar(userLocation) < 2){stop("We can not find your location from Twitter")}  }  # Not sure if USER NAMe is correct in this format == "DhankarRohit"# Or it should be == DhankarRohit , without any PUNCTUATION .   followers=tmp\$getFollowers(n=nMax)  followersLocation = sapply(followers,function(x){location(x)}) # nMax number of Followers   following = tmp\$getFriends(n=nMax)  followingLocation = sapply(following,function(x){location(x)}) # nMax number of Following    # Load the geographic data - these all load as Class - Promise but the moment  # we see - str for these it turns into a DataFrame . As seen below --   data(world.cities) # str(world.cities) # How to find - population of Delhi ...in Data Frame pop_del<-  # Have commented the Cities - data for US and Canada below - this  # Was retained as such by J Leek. Jeff Leek (jtleek@gmail.com) #  data(us.cities)#  data(canada.cities)  # Find the latitude and longitude of the user - USER being SELF  cat("Getting geographic (latitude/longitude) of Twitter users.\n") userLL <- findLatLon(userLocation)\$latlon if(any(is.na(userLL))){stop("We can't find the latitude and longitude of your location from Twitter")}  # Find the latitude and longitude of each of the followers/following # and calcualte the distance to the user  followersLL = matrix(NA,nrow=length(followers),ncol=4) followingLL = matrix(NA,nrow=length(following),ncol=4)  ## Rohit Comment - for loop to run from - 1 to the Length of the Followers # Note while checking brackets for the FUNCTIONS like a FOR LOOP  # the corresponding - Curly Bracket shall highlight properly when cursor placed  # on the right side of the Curly like so === {|   for(i in 1:length(followers)){   if(length(followersLocation[[i]]) > 0){     tmpLL = findLatLon(trim(followersLocation[[i]]))     if(any(!is.na(tmpLL\$latlon))){       followersLL[i,] = c(unlist(tmpLL\$latlon),distCosine(userLL,tmpLL\$latlon),unlist(tmpLL\$cont))     }   } }  for(i in 1:length(following)){   if(length(followingLocation[[i]]) > 0){     tmpLL = findLatLon(trim(followingLocation[[i]]))     if(any(!is.na(tmpLL\$latlon))){       followingLL[i,] =  c(unlist(tmpLL\$latlon),distCosine(userLL,tmpLL\$latlon),unlist(tmpLL\$cont))     }   } }  ## Rohit Comment - reordering of the followers - Not sure why ?   followingLL = followingLL[order(-followingLL[,3]),] followersLL = followersLL[order(-followersLL[,3]),]  followingLL = followingLL[!is.na(followingLL[,1]),] followersLL = followersLL[!is.na(followersLL[,1]),]   cat("Plotting results.\n") # Set up the colors cols = brewer.pal(7,"Set2")  # Both followers and following - here == fileName is the same given in the first # line/command of the code  if(plotType=="both"){   pdf(fileName,height=12,width=10)   data(worldMapEnv)   par(mfrow=c(2,1),mar=rep(0,4))   map('world',col="#191919",bg="black",fill=T,mar=rep(0,4),border=0)      mtext(paste("@","DhankarRohit"," Follower Map",sep=""),col="lightgrey")   nFollowers = dim(followersLL)   for(i in 1:nFollowers){     greatC = getGreatCircle(userLL,followersLL[i,1:2])     lines(greatC,col=cols[followersLL[i,4]],lwd=0.8)   }      legend(-180,0,legend = c(paste("Asia",sum(followingLL[,4]==1)),paste("Africa",sum(followingLL[,4]==2)),paste("N. America",sum(followingLL[,4]==3)),paste("S. America",sum(followingLL[,4]==4)),paste("Australia/N.Z.",sum(followingLL[,4]==5)),paste("Europe",sum(followingLL[,4]==6))),text.col=cols[1:6],bg="black",cex=0.75)      mtext("Created by @simplystats twitterMap",side=1,adj=1,cex=0.8,col="grey")   dev.off() }## Just followersif(plotType=="followers"){  pdf(fileName,height=6,width=10)  data(worldMapEnv)  map('world',col="#191919",bg="black",fill=T,mar=rep(0,4),border=0)    mtext(paste("@",userName," Follower Map",sep=""),col="lightgrey")  nFollowers = dim(followersLL)  for(i in 1:nFollowers){    greatC = getGreatCircle(userLL,followersLL[i,1:2])    lines(greatC,col=cols[followersLL[i,4]],lwd=0.8)  }    legend(-180,0,legend = c(paste("Asia",sum(followersLL[,4]==1)),paste("Africa",sum(followersLL[,4]==2)),paste("N. America",sum(followersLL[,4]==3)),paste("S. America",sum(followersLL[,4]==4)),paste("Australia/N.Z.",sum(followersLL[,4]==5)),paste("Europe",sum(followersLL[,4]==6))),text.col=cols[1:6],bg="black",cex=0.75)  mtext("Created by @simplystats twitterMap",side=1,adj=1,cex=0.8,col="grey")  dev.off()  }# Experiment --- to see how to get a Population of a City from # str(world.cities)# How to find - population of Delhi ...in Data Frame pop_del<-#  http://stackoverflow.com/questions/15303283/how-to-do-vlookup-and-fill-down-like-in-excel-in-r # Complete StackOverflow Page in the same Folder # The read.table below makes a Data.Frame out of a Text== entry , simple text converted into Data.Frame houses <- read.table(text="Semi            1Single          2Row             3Single          2Apartment       4Apartment       4Row             3",col.names=c("HouseType","HouseTypeNo"))External Links / Recommended Reads :- 1/ http://arxiv.org/abs/1312.6635  --- Topic and Sentiment Analysis on Social Networks 2/ Analyzing Social Networks on Twitter