====== Lab 04 ====== ##################### # LAB 4: Centrality # ##################### # NOTE: if you have trouble because some packages are not installed, # see lab 1 for instructions on how to install all necessary packages. ############################################################# # # Lab 4 # # The purpose of this lab is to acquire centrality measures, # to determine how they are interrelated, and to discern # what they mean. # ############################################################## ### # 1. SETUP ### library(igraph) ### # 2. LOAD DATA ### # This lab uses SSL.dat (social interaction) and TSL.dat (task # interaction) from the S641 Semester 1 class in student_nets. # The class is a biology 2 class at a public high school. # load data: data(studentnets.S641, package = "NetData") # Reduce to non-zero edges and build a graph object s641_full_nonzero_edges <- subset(s641_full_data_frame, (social_tie > 0 | task_tie > 0)) head(s641_full_nonzero_edges) s641_full <- graph.data.frame(s641_full_nonzero_edges) summary(s641_full) # Create sub-graphs based on edge attributes and remove isolates s641_social <- delete.edges(s641_full, E(s641_full)[get.edge.attribute(s641_full,name = "social_tie")==0]) s641_social <- delete.vertices(s641_social, V(s641_social)[degree(s641_social)==0]) summary(s641_social) s641_task <- delete.edges(s641_full, E(s641_full)[get.edge.attribute(s641_full,name = "task_tie")==0]) s641_task <- delete.vertices(s641_task, V(s641_task)[degree(s641_task)==0]) summary(s641_task) # Look at the plots for each sub-graph social_layout <- layout.fruchterman.reingold(s641_social) plot(s641_social, layout=social_layout, edge.arrow.size=.5) # Note: click on the graph and then use the drop down menu to # save any plot you like -- it will save as a pdf. task_layout <- layout.fruchterman.reingold(s641_task) plot(s641_task, layout=task_layout, edge.arrow.size=.5) # Question #1 - what can you say about network centralization from these graphs? ### # 3. CALCULATE CENTRALITY MEASURES FOR SOCIAL ### # Indegree centrality measures how many people direct social # talk to the individual. indegree_social <- degree(s641_social, mode='in') indegree_social # Outdegree centrality measures how many people the actor directs # social talk to. outdegree_social <- degree(s641_social, mode='out') outdegree_social # Closeness is the mean geodesic distance between a given node and # all other nodes with paths from the given node to the other # node. This is close to being the mean shortest path, but # geodesic distances give higher values for more central nodes. # # In a directed network, we can think of in-closeness centrality # as the average number of steps one would have to go through to # get TO a given node FROM all other reachable nodes in the # network. Out-closeness centrality, not surprisingly, measures # the same thing with the directionality reversed. # In-closeness centrality incloseness_social <- closeness(s641_social, mode='in') incloseness_social # Out-closeness outcloseness_social <- closeness(s641_social, mode='out') outcloseness_social # Betweenness centrality measures the number of shortest paths # going through a specific vertex; it is returned by the # betweenness() function. (Recall that in the previous lab we used # a related measure called edge betweenness, which is returned by # the edge.betweenness() function.) betweenness_social <- betweenness(s641_social) betweenness_social # Eigenvector centrality gives greater weight to a node the more # it is connected to other highly connected nodes. A node # connected to five high-scoring nodes will have higher # eigenvector centrality than a node connected to five low-scoring # nodes. Thus, it is often interpreted as measuring a node's # network importance. # # In directed networks, there are 'In' and 'Out' versions. In # information flow studies, for instance, In-Eigenvector scores # would reflect which nodes are high on receiving information, # while Out-Eigenvector scores would reflect which nodes are high # on broadcasting information. # # For these data, we will simply symmetrize to generate an # undirected eigenvector centrality score. # # Note that, unlike the other centrality measures, evcent() # returns a complex object rather than a simple vector. Thus, # we need to first get the evcent() output and then select the # eigenvector scores from it. s641_social_undirected <- as.undirected(s641_social, mode='collapse') ev_obj_social <- evcent(s641_social_undirected) eigen_social <- ev_obj_social$vector eigen_social ##### # Extra Credit - what code would you write in R # to get the directed versions of eigenvector centrality? ##### # To get the summary table, we'll construct a data frame with # the vertices as rows and the centrality scores as columns. # # Note that the vertex IDs are NOT the same as the first column # of row numbers. This is because we previously removed isolates. central_social <- data.frame(V(s641_social)$name, indegree_social, outdegree_social, incloseness_social, outcloseness_social, betweenness_social, eigen_social) central_social # Now we'll examine the table to find the most central actors # according to the different measures we have. When looking at # each of these measures, it's a good idea to have your plot on # hand so you can sanity-check the results. plot(s641_social, vertex.size=10, vertex.label=V(s641_social)$name, edge.arrow.size = 0.5, layout=layout.fruchterman.reingold,main='Classroom S641 Social Talk') # Show table sorted by decreasing indegree. The order() function # returns a vector in ascending order; the minus sign flips it # to be descending order. Top actors are 18, 22 and 16. central_social[order(-central_social$indegree_social),] # Outdegree: 22, 18 and 19. central_social[order(-central_social$outdegree_social),] # In-closeness: 11, 15 and 18. # NOTE: For some reason, this operation returns strange values; # a visual inspection of the plot suggests that 11, 15, and 18 # are not central actors at all. This could be a bug. central_social[order(-central_social$incloseness_social),] # Out-closeness: 22, 16, and 19 central_social[order(-central_social$outcloseness_social),] # Eigenvector: 18, 19, and 16 central_social[order(-central_social$eigen_social),] # let's make a plot or two with these summary statistics # To visualize these data, we can create a barplot for each # centrality measure. In all cases, the y-axis is the value of # each category and the x-axis is the node number. barplot(central_social$indegree_social, names.arg=central_social$V.s641_social..name) barplot(central_social$outdegree_social, names.arg=central_social$V.s641_social..name) barplot(central_social$incloseness_social, names.arg=central_social$V.s641_social..name) barplot(central_social$outcloseness_social, names.arg=central_social$V.s641_social..name) barplot(central_social$betweenness_social, names.arg=central_social$V.s641_social..name) barplot(central_social$eigen_social, names.arg=central_social$V.s641_social..name) # Question #2 - What can we say about the social actors if we compare the bar plots? # Who seems to run the show in sociable affairs? Who seems to bridge sociable conversations? ### # 4. CORRELATIONS BETWEEN CENTRALITY MEASURES ### # Now we'll compute correlations betwee the columns to determine # how closely these measures of centrality are interrelated. # Generate a table of pairwise correlations. cor(central_social[,2:7]) # INTERPRETATION: # # Indegree and outdegree are very closely correlated (rho = 0.95), # indicating that social talk with others is reciprocated (i.e., # if you talk to others, they tend to talk back to you). # # The same is not true of incloseness and outcloseness (rho = # 0.38), indicating that the closeness calculated from inbound # paths is not strongly associated with with closeness from # outbound paths. # # In- and out-degree are highly correlated with eigenvector # centrality, indicating that the students that talk the most to # others (or, relatedly, are talked to the most by others) are # also the ones that are connected to other highly connected # students -- possibly indicating high density cliques around # these individuals. # # Betweennes shows the highest corelation with outdegree, follwed # by indegree. In the case of this particular network, it seems # that the individuals that talk to the most others are the # likeliest to serve as bridges between the particular cliques # (see, e.g., 22 in the plot). ### # 5. REPEAT FOR TASK TALK ### # Indegree # We should have 20 entries, indicating 2 isolates. indegree_task <- degree(s641_task, mode='in') indegree_task # Outdegree outdegree_task <- degree(s641_task, mode='out') outdegree_task # In-closeness incloseness_task <- closeness(s641_task, mode='in') incloseness_task # Out-closeness outcloseness_task <- closeness(s641_task, mode='out') outcloseness_task # Betweenness. Note that the closeness measures arent very high # for node 22, but the betweenness is off the charts. betweenness_task <- betweenness(s641_task) betweenness_task # Eigenvector s641_task_undirected <- as.undirected(s641_task, mode='collapse') ev_obj_task <- evcent(s641_task_undirected) eigen_task <-ev_obj_task$vector eigen_task # Generate a data frame with all centrality values central_task <- data.frame(V(s641_task)$name, indegree_task, outdegree_task, incloseness_task, outcloseness_task, betweenness_task, eigen_task) central_task # In-degree: 22, 18 and 17 central_task[order(-central_task$indegree_task),] # Outdegree: 22, 18 and 17 central_task[order(-central_task$outdegree_task),] # Incloseness: 22, 18 and 17 central_task[order(-central_task$incloseness_task),] # Outcloseness: 22, 18 and 17 central_task[order(-central_task$outcloseness_task),] # Eigenvector: 22, 18 and 17 central_task[order(-central_task$eigen_task),] # Look at barplots barplot(central_task$indegree_task, names.arg=central_task$V.s641_task..name) barplot(central_task$outdegree_task, names.arg=central_task$V.s641_task..name) barplot(central_task$incloseness_task, names.arg=central_task$V.s641_task..name) barplot(central_task$outcloseness_task, names.arg=central_task$V.s641_task..name) barplot(central_task$betweenness_task, names.arg=central_task$V.s641_task..name) barplot(central_task$eigen_task, names.arg=central_task$V.s641_task..name) # Question #3 - What can we say about the social actors if we compare the bar plots? # Who seems to run the show in task affairs? Who seems to bridge task conversations? ### # 6. TASK/SOCIAL CORRELATIONS ### # Note that in order to do this, we need to either have no missing # data or use pairwise complete observations. # # It would be nice if the centrality functions padded N/A or zero # data for the isolates, because then the dimensions of the two # matrices would be compatible. But right now we have 19 nodes for # social interaction and 20 nodes for task interaction. So first # we have to do some hacky R stuff to make them both have 22 # nodes. # First, we'll extract the node names from the SSL data, using # levels() because it's a factor and converting it to numbers so # we can match with the TSL data. Then we'll repeat for TSL. connectednodes_social = as.numeric(levels(central_social$V.s641_social..name))[central_social$V.s641_social..name] connectednodes_task = as.numeric(levels(central_task$V.s641_task..name))[central_task$V.s641_task..name] # Check that we did this correctly: SSL should have 19 nodes, and # TSL should have 20 nodes. length(connectednodes_social) length(connectednodes_task) # Extract matches for each data set, take that subset and use # columns 2 through 7 to create the correlation matrix. This # computes the correlations based only on the actors in both # graphs (18 in total). cor(central_social[which(connectednodes_social %in% connectednodes_task),2:7], central_task[which(connectednodes_task %in% connectednodes_social),2:7]) # INTERPRETATION: # # eigen_task is correlated with betweenness_social (rho=0.83) and # outdegree (rho=0.82), possibly because those who are # important in talk on tasks also serve as bridges for talk on # social issues and have many outbound ties. # # indegree_task and betweenness_social (rho=0.88), and # outdegree_task and betweenness_social (rho=0.88) are correlated, # possibly because the number of indegree and outdegree ties a # node has with respect to task talk, the more they serve as a # bridge on social talk. # # incloseness_task and incloseness_social (rho=0.86) are # correlated, meaning that those who serve in shortest parths past # on inbound ties are equivalent for both social talk and task # talk, which seems to make sense given the betweenness # correlations with network importance and degree between task and # social talk more interpretations are possible as well. # Question #4 - What can we infer about s641 from these results? # What sort of substantive story can we derive from it?