User Tools

Site Tools


sna_eg_stanford:lab06

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
sna_eg_stanford:lab06 [2019/11/29 09:14] – created hkimscilsna_eg_stanford:lab06 [2019/12/11 10:40] (current) – [no pdf] hkimscil
Line 39: Line 39:
 library(NetCluster) library(NetCluster)
  
 +# install.packages(c("sna", "NetCluster"))
 ### ###
 #2. LOADING AND FORMATTING DATA #2. LOADING AND FORMATTING DATA
Line 53: Line 54:
  
 # Create sub-graphs based on edge attributes # Create sub-graphs based on edge attributes
-m182_friend <- delete.edges(m182_full, E(m182_full)[E(m182_full)$friend_tie==0])+m182_friend <- delete_edges(m182_full, E(m182_full)[E(m182_full)$friend_tie==0])
 summary(m182_friend) summary(m182_friend)
 +m182_friend[]
  
-m182_social <- delete.edges(m182_full, E(m182_full)[E(m182_full)$social_tie==0])+m182_social <- delete_edges(m182_full, E(m182_full)[E(m182_full)$social_tie==0])
 summary(m182_social) summary(m182_social)
 +m182_social[]
  
-m182_task <- delete.edges(m182_full, E(m182_full)[E(m182_full)$task_tie==0])+m182_task <- delete_edges(m182_full, E(m182_full)[E(m182_full)$task_tie==0])
 summary(m182_task) summary(m182_task)
 +m182_task[]
  
 # Look at the plots for each sub-graph # Look at the plots for each sub-graph
Line 97: Line 101:
 # concatenate it with its transpose in order to capture both  # concatenate it with its transpose in order to capture both 
 # incoming and outgoing task interactions. # incoming and outgoing task interactions.
-m182_task_matrix_col_to_row <- t(m182_task_matrix_row_to_col)+m182_task_matrix_col_to_row <- t(as.matrix(m182_task_matrix_row_to_col))
 m182_task_matrix_col_to_row m182_task_matrix_col_to_row
  
Line 111: Line 115:
 m182_social_matrix_row_to_col_bin m182_social_matrix_row_to_col_bin
  
-m182_social_matrix_col_to_row <- t(m182_social_matrix_row_to_col)+m182_social_matrix_col_to_row <- t(as.matrix(m182_social_matrix_row_to_col))
 m182_social_matrix_col_to_row m182_social_matrix_col_to_row
  
Line 124: Line 128:
 # generate an n x n correlation matrix that shows the degree of # generate an n x n correlation matrix that shows the degree of
 # structural equivalence of each actor in the network.  # structural equivalence of each actor in the network. 
-m182_task_social_cors <- cor(m182_task_social_matrix)+m182_task_social_cors <- cor(as.matrix(m182_task_social_matrix))
 m182_task_social_cors m182_task_social_cors
  
Line 258: Line 262:
  
 # Task valued # Task valued
-task_mean <- mean(m182_task_matrix_row_to_col)+task_mean <- mean(as.matrix(m182_task_matrix_row_to_col))
 task_mean task_mean
  
-task_valued_blockmodel <- blockmodel(m182_task_matrix_row_to_col, clusters)+task_valued_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col), clusters)
 task_valued_blockmodel task_valued_blockmodel
  
Line 268: Line 272:
 task_density task_density
  
-task_binary_blockmodel <- blockmodel(m182_task_matrix_row_to_col_bin, clusters)+task_binary_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col_bin), clusters)
 task_binary_blockmodel task_binary_blockmodel
  
  
 # Social valued # Social valued
-social_mean <- mean(m182_social_matrix_row_to_col)+social_mean <- mean(as.matrix(m182_social_matrix_row_to_col))
 social_mean social_mean
  
-social_valued_blockmodel <- blockmodel(m182_social_matrix_row_to_col, clusters)+social_valued_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col), clusters)
 social_valued_blockmodel social_valued_blockmodel
  
Line 283: Line 287:
 social_density social_density
  
-social_binary_blockmodel <- blockmodel(m182_social_matrix_row_to_col_bin, clusters)+social_binary_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col_bin), clusters)
 social_binary_blockmodel social_binary_blockmodel
  
Line 304: Line 308:
 # 4. HIERARCHICAL CLUSTERING ON TRIAD CENSUS # 4. HIERARCHICAL CLUSTERING ON TRIAD CENSUS
 ### ###
 +
 +# triads package is outdated. Not available for update r versions.
 +# - hkim
  
 # Another way to think about roles within a network is by looking  # Another way to think about roles within a network is by looking 
Line 436: Line 443:
 # optimal number of components, available via the nFactors # optimal number of components, available via the nFactors
 # package.  # package. 
-ev <- eigen(cor(m182_task_social_matrix)) # get eigenvalues+ev <- eigen(cor(as.matrix(m182_task_social_matrix))) # get eigenvalues
 ap <- parallel(subject=nrow(m182_task_social_matrix), ap <- parallel(subject=nrow(m182_task_social_matrix),
  var=ncol(m182_task_social_matrix),  var=ncol(m182_task_social_matrix),
Line 460: Line 467:
 # using the number of factors determined above (note this may not # using the number of factors determined above (note this may not
 # be the same number as you get): # be the same number as you get):
-pca_m182_task_social = principal(m182_task_social_matrix, nfactors=5, rotate="varimax"+pca_m182_task_social = principal(as.matrix(m182_task_social_matrix), nfactors=5, rotate="varimax"
  
 # Let's take a look at the results in the R terminal: # Let's take a look at the results in the R terminal:
Line 497: Line 504:
 plot(m182_task_triad_hclust, main = "Triads") plot(m182_task_triad_hclust, main = "Triads")
 dev.off() dev.off()
 +
 +
 +
 +#####################
 +# Questions:
 +# (4) How do the results across blockmodel techniques differ? 
 +# Why might you use one over the other? Why might you want to 
 +# run more than one in your analyses?
 +#####################
 +</code>
 +
 +
 +====== Lab 06: Step 4 deleted ======
 +<code>
 +##########################################################################
 +# You may cite these labs as follows: McFarland, Daniel, Solomon Messing,
 +# Mike Nowak, and Sean Westwood. 2010. "Social Network Analysis          
 +# Labs in R." Stanford University.                                       
 +##########################################################################
 + 
 + 
 +##########################################################################
 +# LAB 6 - Blockmodeling Lab                                              
 +# The point of this lab is to introduce students to blockmodeling 
 +# techniques that call for a metric of structural equivalence, a method
 +# and rationale for the selection of the number of positions, and then 
 +# a means of summary representation (mean cutoff and reduced graph 
 +# presentation). Students will be shown how to identify positions using
 +# correlation as a metric of structural equivalence (euclidean distance 
 +# is used in earlier lab), and they will be taught how to identify more
 +# isomorphic notions of role-position using the triad census. Last, the 
 +# lab calls upon the user to compare positional techniques and come up 
 +# with a rationale for why they settle on one over another.  
 +##########################################################################
 + 
 +
 +
 +
 +# NOTE: if you have trouble because some packages are not installed, 
 +# see lab 1 for instructions on how to install all necessary packages.
 +
 +###
 +#1. SETUP
 +###
 +
 +library(igraph)
 +library(sna)
 +library(triads)
 +library(psych)
 +library(nFactors)
 +library(NetCluster)
 +
 +###
 +#2. LOADING AND FORMATTING DATA
 +###
 +
 +data(studentnets.M182, package = "NetData")
 +
 +# Reduce to non-zero edges and build a graph object
 +m182_full_nonzero_edges <- subset(m182_full_data_frame, (friend_tie > 0 | social_tie > 0 | task_tie > 0))
 +head(m182_full_nonzero_edges)
 +
 +m182_full <- graph.data.frame(m182_full_nonzero_edges) 
 +summary(m182_full)
 +
 +# Create sub-graphs based on edge attributes
 +m182_friend <- delete_edges(m182_full, E(m182_full)[E(m182_full)$friend_tie==0])
 +summary(m182_friend)
 +
 +m182_social <- delete_edges(m182_full, E(m182_full)[E(m182_full)$social_tie==0])
 +summary(m182_social)
 +
 +m182_task <- delete_edges(m182_full, E(m182_full)[E(m182_full)$task_tie==0])
 +summary(m182_task)
 +
 +# Look at the plots for each sub-graph
 +pdf("6.1_m182_studentnet_friend_social_task_plots.pdf", width = 10)
 +par(mfrow = c(1,3))
 +
 +friend_layout <- layout.fruchterman.reingold(m182_friend)
 +plot(m182_friend, layout=friend_layout, main = "friend", edge.arrow.size=.5)
 +
 +social_layout <- layout.fruchterman.reingold(m182_social)
 +plot(m182_social, layout=social_layout, main = "social", edge.arrow.size=.5)
 +
 +task_layout <- layout.fruchterman.reingold(m182_task)
 +plot(m182_task, layout=task_layout, main = "task", edge.arrow.size=.5)
 +dev.off()
 +
 +###
 +# 3. HIERARCHICAL CLUSTERING ON SOCIAL & TASK TIES
 +###
 +
 +# We'll use the "task" and "social" sub-graphs together as the
 +# basis for our structural equivalence methods. First, we'll use
 +# the task graph to generate an adjacency matrix.
 +#
 +# This matrix represents task interactions directed FROM the 
 +# row individual TO the column individual. 
 +m182_task_matrix_row_to_col <- get.adjacency(m182_task, attr='task_tie')
 +m182_task_matrix_row_to_col
 +
 +# To operate on a binary graph, simply leave off the "attr" 
 +# parameter:
 +m182_task_matrix_row_to_col_bin <- get.adjacency(m182_task)
 +m182_task_matrix_row_to_col_bin
 +
 +# For this lab, we'll use the valued graph. The next step is to 
 +# concatenate it with its transpose in order to capture both 
 +# incoming and outgoing task interactions.
 +m182_task_matrix_col_to_row <- t(as.matrix(m182_task_matrix_row_to_col))
 +m182_task_matrix_col_to_row
 +
 +m182_task_matrix <- rbind(m182_task_matrix_row_to_col, m182_task_matrix_col_to_row)
 +m182_task_matrix
 +
 +# Next, we'll use the same procedure to add social-interaction
 +# information.
 +m182_social_matrix_row_to_col <- get.adjacency(m182_social, attr='social_tie')
 +m182_social_matrix_row_to_col
 +
 +m182_social_matrix_row_to_col_bin <- get.adjacency(m182_social)
 +m182_social_matrix_row_to_col_bin
 +
 +m182_social_matrix_col_to_row <- t(as.matrix(m182_social_matrix_row_to_col))
 +m182_social_matrix_col_to_row
 +
 +m182_social_matrix <- rbind(m182_social_matrix_row_to_col, m182_social_matrix_col_to_row)
 +m182_social_matrix
 +
 +m182_task_social_matrix <- rbind(m182_task_matrix, m182_social_matrix)
 +m182_task_social_matrix
 +
 +# Now we have a single 4n x n matrix that represents both in- and
 +# out-directed task and social communication. From this, we can
 +# generate an n x n correlation matrix that shows the degree of
 +# structural equivalence of each actor in the network. 
 +m182_task_social_cors <- cor(as.matrix(m182_task_social_matrix))
 +m182_task_social_cors
 +
 +# To use correlation values in hierarchical NetCluster, they must 
 +# first be coerced into a "dissimilarity structure" using dist().
 +# We subtract the values from 1 so that they are all greater than 
 +# or equal to 0; thus, highly dissimilar (i.e., negatively 
 +# correlated) actors have higher values.
 +dissimilarity <- 1 - m182_task_social_cors
 +m182_task_social_dist <- as.dist(dissimilarity)
 +m182_task_social_dist
 +
 +# Note that it is also possible to use dist() directly on the 
 +# matrix. However, since cor() looks at associations between 
 +# columns and dist() looks at associations between rows, it is
 +# necessary to transpose the matrix first.
 +#
 +# A variety of distance metrics are available; Euclidean 
 +# is the default.
 +#m182_task_social_dist <- dist(t(m182_task_social_matrix))
 +#m182_task_social_dist
 +
 +# hclust() performs a hierarchical agglomerative NetCluster 
 +# operation based on the values in the dissimilarity matrix 
 +# yielded by as.dist() above. The standard visualization is a 
 +# dendrogram. By default, hclust() agglomerates clusters via a
 +# "complete linkakage" algorithm, determining cluster proximity
 +# by looking at the distance of the two points across clusters
 +# that are farthest away from one another. This can be changed via
 +# the "method" parameter.
 +
 +pdf("6.2_m182_studentnet_social_hclust.pdf")
 +m182_task_social_hclust <- hclust(m182_task_social_dist)
 +plot(m182_task_social_hclust)
 +dev.off()
 +
 +# cutree() allows us to use the output of hclust() to set
 +# different numbers of clusters and assign vertices to clusters
 +# as appropriate. For example:
 +cutree(m182_task_social_hclust, k=2)
 +
 +# Now we'll try to figure out the number of clusters that best 
 +# describes the underlying data. To do this, we'll loop through
 +# all of the possible numbers of clusters (1 through n, where n is
 +# the number of actors in the network). For each solution
 +# corresponding to a given number of clusters, we'll use cutree()
 +# to assign the vertices to their respective clusters 
 +# corresponding to that solution.
 +#
 +# From this, we can generate a matrix of within- and between-
 +# cluster correlations. Thus, when there is one cluster for each 
 +# vertex in the network, the cell values will be identical to the
 +# observed correlation matrix, and when there is one cluster for 
 +# the whole network, the values will all be equal to the average
 +# correlation across the observed matrix.
 +#
 +# We can then correlate each by-cluster matrix with the observed
 +# correlation matrix to see how well the by-cluster matrix fits
 +# the data. We'll store the correlation for each number of
 +# clusters in a vector, which we can then plot.
 +
 +# First, we initialize a vector for storing the correlations and 
 +# set a variable for our number of vertices.
 +clustered_observed_cors = vector()
 +num_vertices = length(V(m182_task))
 +
 +# Next, we loop through the different possible cluster 
 +# configurations, produce matrices of within- and between-
 +# cluster correlations, and correlate these by-cluster matrices
 +# with the observed correlation matrix.
 +
 +pdf("6.3_m182_studentnet_task_social_clustered_observed_corrs.pdf")
 +clustered_observed_cors <-clustConfigurations(num_vertices,m182_task_social_hclust,m182_task_social_cors)
 +clustered_observed_cors
 +plot(clustered_observed_cors$correlations)
 +dev.off()
 +
 +clustered_observed_cors$correlations
 +# From a visual inspection of the correlation matrix, we can 
 +# decide on the proper number of clusters in this network. 
 +# For this network, we'll use 4. (Note that the 1-cluster 
 +# solution doesn't appear on the plot because its correlation 
 +# with the observed correlation matrix is undefined.)
 +num_clusters = 4
 +clusters <- cutree(m182_task_social_hclust, k = num_clusters)
 +clusters
 +
 +cluster_cor_mat <- clusterCorr(m182_task_social_cors,
 +                                            clusters)
 +cluster_cor_mat
 +
 +# Let's look at the correlation between this cluster configuration 
 +# and the observed correlation matrix. This should match the 
 +# corresponding value from clustered_observed_cors above.
 +gcor(cluster_cor_mat, m182_task_social_cors)
 +
 +
 +#####################
 +# Questions:
 +# (1) What rationale do you have for selecting the number of 
 +# clusters / positions that you do?
 +#####################
 +  
 +
 +
 +### NOTE ON DEDUCTIVE CLUSTERING
 +
 +# It's pretty straightforward, using the code above, to explore
 +# your own deductive NetCluster. Simply supply your own cluster
 +# vector, where the elements in the vector are in the same order
 +# as the vertices in the matrix, and the values represent the
 +# cluster to which each vertex belongs. 
 +#
 +# For example, if you believed that actors 2, 7, and 8 formed one
 +# group, actor 16 former another group, and everyone else formed 
 +# a third group, you could represent this as follows:
 +deductive_clusters = c(1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1,
 +                       1, 3)
 +
 +# You could then examine the fitness of this cluster configuration
 +# as follows:
 +deductive_cluster_cor_mat <- generate_cluster_cor_mat(
 +  m182_task_social_cors,
 +  deductive_clusters)
 +deductive_cluster_cor_mat
 +gcor(deductive_cluster_cor_mat, m182_task_social_cors)
 +
 +### END NOTE ON DEDUCTIVE CLUSTERING
 +
 +# Now we'll use the 4-cluster solution to generate blockmodels, 
 +# using the raw tie data from the underlying task and social 
 +# networks.
 +
 +# Task valued
 +task_mean <- mean(as.matrix(m182_task_matrix_row_to_col))
 +task_mean
 +
 +task_valued_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col), clusters)
 +task_valued_blockmodel
 +
 +# Task binary
 +task_density <- graph.density(m182_task)
 +task_density
 +
 +task_binary_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col_bin), clusters)
 +task_binary_blockmodel
 +
 +
 +# Social valued
 +social_mean <- mean(as.matrix(m182_social_matrix_row_to_col))
 +social_mean
 +
 +social_valued_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col), clusters)
 +social_valued_blockmodel
 +
 +# Social binary
 +social_density <- graph.density(m182_social)
 +social_density
 +
 +social_binary_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col_bin), clusters)
 +social_binary_blockmodel
 +
 +# We can also permute the network to examine the within- and 
 +# between-cluster correlations. 
 +
 +cluster_cor_mat_per <- permute_matrix(clusters, cluster_cor_mat)
 +cluster_cor_mat_per
 +
 +
 +#####################
 +# Questions:
 +# (2) What is the story you get from viewing these clusters, 
 +# and their within and between cluster densities on task and 
 +# social interaction? What can you say about M182 from this?
 +#####################
 +
 +#
 +# 4. deleted.
 +#
 +
 +#####################
 +# Questions:
 +# (3) What does clustering of the triadic census afford us? 
 +# What roles do you see? Redo the initial blockmodel analysis
 +# without social interaction (only task) and then compare to 
 +# this solution. Do they differ? 
 +#
 +# Extra credit: Try running the triad census on task AND 
 +# social interaction separately and then correlating persons. 
 +# What result do you get? Is it different from our initial 
 +# blockmodel result? Show your code.  
 +######################
 +
 +
 +
 +###
 +# 5. FACTOR ANALYSIS
 +###
 +
 +# Note that although we are conducting a principal components
 +# analysis (PCA), which is technically not exactly the same as
 +# factor analysis, we will use the term "factor" to describe the
 +# individual components in our PCA. 
 +
 +# PCA is often used in network analysis as a form of detecting 
 +# individuals global positioning. We say "global" because these
 +# clusters aren't defined on local cohesion but from the overall 
 +# pattern of ties individuals have with all others (structural 
 +# equivalence). Identifying the first two largest components that
 +# organize the variance in tie patterns is one way of doing this.
 +
 +# We'll analyze the 4n x n matrix generated above.
 +
 +# First, we want to determine the ideal number of components
 +# (factors) to extract. We'll do this by examining the eigenvalues
 +# in a scree plot and examining how each number of factors stacks
 +# up to a few proposed non-graphical solutions to selecting the
 +# optimal number of components, available via the nFactors
 +# package. 
 +ev <- eigen(cor(as.matrix(m182_task_social_matrix))) # get eigenvalues
 +ap <- parallel(subject=nrow(m182_task_social_matrix),
 + var=ncol(m182_task_social_matrix),
 + rep=100,cent=.05)
 +nS <- nScree(ev$values, ap$eigen$qevpea)
 +
 +pdf("6.6_m182_studentnet_task_social_pca_scree.pdf")
 +plotnScree(nS) 
 +
 +# To draw a line across the graph where eigenvalues are = 1,
 +# use the following code:
 +plotnScree(nS) 
 +abline(h=1)
 +dev.off()
 +
 +
 +# For more information on this procedure, please see 
 +# the references provided in the parallel() documentation
 +# (type "?parallel" in the R command line with the package
 +# loaded).
 +
 +# Now we'll run a principal components analysis on the matrix,
 +# using the number of factors determined above (note this may not
 +# be the same number as you get):
 +pca_m182_task_social = principal(as.matrix(m182_task_social_matrix), nfactors=5, rotate="varimax"
 +
 +# Let's take a look at the results in the R terminal:
 +pca_m182_task_social 
 +
 +# You can see the standardized loadings for each factor for each
 +# node. Note that R sometimes puts the factors in a funky order
 +# (e.g. RC1, RC2, RC5, RC4, RC3) but all of the factors are there.
 +# You can see that the SS loadings, proportion of variance
 +# explained and cumulative variance explained is provided below. A
 +# Chi Square test of the factors and various other statistics are
 +# provided below. 
 +
 +# Note that the eigenvalues can be accessed via the following
 +# command:
 +pca_m182_task_social$values
 +
 +# Now we will use the factor loadings to cluster and compare that
 +# to our other NetCluster techniques, using dendrograms.
 +
 +# Take the distance based on Euclidian Distance
 +m182_task_factor_dist = dist(pca_m182_task_social$loadings)
 +
 +# And cluster
 +m182_task_factor_hclust <- hclust(m182_task_factor_dist)
 +
 +pdf("6.7_m182_studentnet_task_social_pca_hclust.pdf")
 +plot(m182_task_factor_hclust)
 +dev.off()
 +
 +# And compare to NetCluster based on correlations and triads:
 +pdf("6.8_m182_task_cluster_by_correlation_PCA_Triads.pdf")
 +par(mfrow = c(1,3))
 +plot(m182_task_social_hclust, main = "Correlation")
 +plot(m182_task_factor_hclust, main = "PCA")
 +plot(m182_task_triad_hclust, main = "Triads")
 +dev.off()
 +
 +
 +
 +#####################
 +# Questions:
 +# (4) How do the results across blockmodel techniques differ? 
 +# Why might you use one over the other? Why might you want to 
 +# run more than one in your analyses?
 +#####################
 +</code>
 +
 +====== no pdf ======
 +<code>
 +##########################################################################
 +# You may cite these labs as follows: McFarland, Daniel, Solomon Messing,
 +# Mike Nowak, and Sean Westwood. 2010. "Social Network Analysis          
 +# Labs in R." Stanford University.                                       
 +##########################################################################
 + 
 + 
 +##########################################################################
 +# LAB 6 - Blockmodeling Lab                                              
 +# The point of this lab is to introduce students to blockmodeling 
 +# techniques that call for a metric of structural equivalence, a method
 +# and rationale for the selection of the number of positions, and then 
 +# a means of summary representation (mean cutoff and reduced graph 
 +# presentation). Students will be shown how to identify positions using
 +# correlation as a metric of structural equivalence (euclidean distance 
 +# is used in earlier lab), and they will be taught how to identify more
 +# isomorphic notions of role-position using the triad census. Last, the 
 +# lab calls upon the user to compare positional techniques and come up 
 +# with a rationale for why they settle on one over another.  
 +##########################################################################
 + 
 +
 +
 +
 +# NOTE: if you have trouble because some packages are not installed, 
 +# see lab 1 for instructions on how to install all necessary packages.
 +
 +###
 +#1. SETUP
 +###
 +
 +library(igraph)
 +library(sna)
 +library(triads)
 +library(psych)
 +library(nFactors)
 +library(NetCluster)
 +
 +###
 +#2. LOADING AND FORMATTING DATA
 +###
 +
 +data(studentnets.M182, package = "NetData")
 +
 +# Reduce to non-zero edges and build a graph object
 +m182_full_nonzero_edges <- subset(m182_full_data_frame, (friend_tie > 0 | social_tie > 0 | task_tie > 0))
 +head(m182_full_nonzero_edges)
 +
 +m182_full <- graph.data.frame(m182_full_nonzero_edges) 
 +summary(m182_full)
 +
 +# Create sub-graphs based on edge attributes
 +m182_friend <- delete_edges(m182_full, E(m182_full)[E(m182_full)$friend_tie==0])
 +summary(m182_friend)
 +
 +m182_social <- delete_edges(m182_full, E(m182_full)[E(m182_full)$social_tie==0])
 +summary(m182_social)
 +
 +m182_task <- delete_edges(m182_full, E(m182_full)[E(m182_full)$task_tie==0])
 +summary(m182_task)
 +
 +# Look at the plots for each sub-graph
 +# pdf("6.1_m182_studentnet_friend_social_task_plots.pdf", width = 10)
 +par(mfrow = c(1,3))
 +
 +friend_layout <- layout.fruchterman.reingold(m182_friend)
 +plot(m182_friend, layout=friend_layout, main = "friend", edge.arrow.size=.5)
 +
 +social_layout <- layout.fruchterman.reingold(m182_social)
 +plot(m182_social, layout=social_layout, main = "social", edge.arrow.size=.5)
 +
 +task_layout <- layout.fruchterman.reingold(m182_task)
 +plot(m182_task, layout=task_layout, main = "task", edge.arrow.size=.5)
 +# dev.off()
 +# back to normal window pannel (no partition)
 +par(mfrow = c(1,1))
 +
 +
 +###
 +# 3. HIERARCHICAL CLUSTERING ON SOCIAL & TASK TIES
 +###
 +
 +# We'll use the "task" and "social" sub-graphs together as the
 +# basis for our structural equivalence methods. First, we'll use
 +# the task graph to generate an adjacency matrix.
 +#
 +# This matrix represents task interactions directed FROM the 
 +# row individual TO the column individual. 
 +m182_task_matrix_row_to_col <- get.adjacency(m182_task, attr='task_tie')
 +m182_task_matrix_row_to_col
 +
 +# To operate on a binary graph, simply leave off the "attr" 
 +# parameter:
 +m182_task_matrix_row_to_col_bin <- get.adjacency(m182_task)
 +m182_task_matrix_row_to_col_bin
 +
 +# For this lab, we'll use the valued graph. The next step is to 
 +# concatenate it with its transpose in order to capture both 
 +# incoming and outgoing task interactions.
 +m182_task_matrix_col_to_row <- t(as.matrix(m182_task_matrix_row_to_col))
 +m182_task_matrix_col_to_row
 +
 +m182_task_matrix <- rbind(m182_task_matrix_row_to_col, m182_task_matrix_col_to_row)
 +m182_task_matrix
 +
 +# Next, we'll use the same procedure to add social-interaction
 +# information.
 +m182_social_matrix_row_to_col <- get.adjacency(m182_social, attr='social_tie')
 +m182_social_matrix_row_to_col
 +
 +m182_social_matrix_row_to_col_bin <- get.adjacency(m182_social)
 +m182_social_matrix_row_to_col_bin
 +
 +m182_social_matrix_col_to_row <- t(as.matrix(m182_social_matrix_row_to_col))
 +m182_social_matrix_col_to_row
 +
 +m182_social_matrix <- rbind(m182_social_matrix_row_to_col, m182_social_matrix_col_to_row)
 +m182_social_matrix
 +
 +m182_task_social_matrix <- rbind(m182_task_matrix, m182_social_matrix)
 +m182_task_social_matrix
 +
 +# Now we have a single 4n x n matrix that represents both in- and
 +# out-directed task and social communication. From this, we can
 +# generate an n x n correlation matrix that shows the degree of
 +# structural equivalence of each actor in the network. 
 +m182_task_social_cors <- cor(as.matrix(m182_task_social_matrix))
 +m182_task_social_cors
 +
 +# To use correlation values in hierarchical NetCluster, they must 
 +# first be coerced into a "dissimilarity structure" using dist().
 +# We subtract the values from 1 so that they are all greater than 
 +# or equal to 0; thus, highly dissimilar (i.e., negatively 
 +# correlated) actors have higher values.
 +dissimilarity <- 1 - m182_task_social_cors
 +m182_task_social_dist <- as.dist(dissimilarity)
 +m182_task_social_dist
 +
 +# Note that it is also possible to use dist() directly on the 
 +# matrix. However, since cor() looks at associations between 
 +# columns and dist() looks at associations between rows, it is
 +# necessary to transpose the matrix first.
 +#
 +# A variety of distance metrics are available; Euclidean 
 +# is the default.
 +#m182_task_social_dist <- dist(t(m182_task_social_matrix))
 +#m182_task_social_dist
 +
 +# hclust() performs a hierarchical agglomerative NetCluster 
 +# operation based on the values in the dissimilarity matrix 
 +# yielded by as.dist() above. The standard visualization is a 
 +# dendrogram. By default, hclust() agglomerates clusters via a
 +# "complete linkakage" algorithm, determining cluster proximity
 +# by looking at the distance of the two points across clusters
 +# that are farthest away from one another. This can be changed via
 +# the "method" parameter.
 +
 +# pdf("6.2_m182_studentnet_social_hclust.pdf")
 +m182_task_social_hclust <- hclust(m182_task_social_dist)
 +plot(m182_task_social_hclust)
 +# dev.off()
 +
 +# cutree() allows us to use the output of hclust() to set
 +# different numbers of clusters and assign vertices to clusters
 +# as appropriate. For example:
 +cutree(m182_task_social_hclust, k=2)
 +
 +# Now we'll try to figure out the number of clusters that best 
 +# describes the underlying data. To do this, we'll loop through
 +# all of the possible numbers of clusters (1 through n, where n is
 +# the number of actors in the network). For each solution
 +# corresponding to a given number of clusters, we'll use cutree()
 +# to assign the vertices to their respective clusters 
 +# corresponding to that solution.
 +#
 +# From this, we can generate a matrix of within- and between-
 +# cluster correlations. Thus, when there is one cluster for each 
 +# vertex in the network, the cell values will be identical to the
 +# observed correlation matrix, and when there is one cluster for 
 +# the whole network, the values will all be equal to the average
 +# correlation across the observed matrix.
 +#
 +# We can then correlate each by-cluster matrix with the observed
 +# correlation matrix to see how well the by-cluster matrix fits
 +# the data. We'll store the correlation for each number of
 +# clusters in a vector, which we can then plot.
 +
 +# First, we initialize a vector for storing the correlations and 
 +# set a variable for our number of vertices.
 +clustered_observed_cors = vector()
 +num_vertices = length(V(m182_task))
 +
 +# Next, we loop through the different possible cluster 
 +# configurations, produce matrices of within- and between-
 +# cluster correlations, and correlate these by-cluster matrices
 +# with the observed correlation matrix.
 +
 +# pdf("6.3_m182_studentnet_task_social_clustered_observed_corrs.pdf")
 +clustered_observed_cors <-clustConfigurations(num_vertices,m182_task_social_hclust,m182_task_social_cors)
 +clustered_observed_cors
 +plot(clustered_observed_cors$correlations)
 +# dev.off()
 +
 +clustered_observed_cors$correlations
 +# From a visual inspection of the correlation matrix, we can 
 +# decide on the proper number of clusters in this network. 
 +# For this network, we'll use 4. (Note that the 1-cluster 
 +# solution doesn't appear on the plot because its correlation 
 +# with the observed correlation matrix is undefined.)
 +num_clusters = 4
 +clusters <- cutree(m182_task_social_hclust, k = num_clusters)
 +clusters
 +
 +cluster_cor_mat <- clusterCorr(m182_task_social_cors,
 +                                            clusters)
 +cluster_cor_mat
 +
 +# Let's look at the correlation between this cluster configuration 
 +# and the observed correlation matrix. This should match the 
 +# corresponding value from clustered_observed_cors above.
 +gcor(cluster_cor_mat, m182_task_social_cors)
 +
 +
 +#####################
 +# Questions:
 +# (1) What rationale do you have for selecting the number of 
 +# clusters / positions that you do?
 +#####################
 +  
 +
 +
 +### NOTE ON DEDUCTIVE CLUSTERING
 +
 +# It's pretty straightforward, using the code above, to explore
 +# your own deductive NetCluster. Simply supply your own cluster
 +# vector, where the elements in the vector are in the same order
 +# as the vertices in the matrix, and the values represent the
 +# cluster to which each vertex belongs. 
 +#
 +# For example, if you believed that actors 2, 7, and 8 formed one
 +# group, actor 16 former another group, and everyone else formed 
 +# a third group, you could represent this as follows:
 +deductive_clusters = c(1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1,
 +                       1, 3)
 +
 +# You could then examine the fitness of this cluster configuration
 +# as follows:
 +deductive_cluster_cor_mat <- generate_cluster_cor_mat(
 +  m182_task_social_cors,
 +  deductive_clusters)
 +deductive_cluster_cor_mat
 +gcor(deductive_cluster_cor_mat, m182_task_social_cors)
 +
 +### END NOTE ON DEDUCTIVE CLUSTERING
 +
 +# Now we'll use the 4-cluster solution to generate blockmodels, 
 +# using the raw tie data from the underlying task and social 
 +# networks.
 +
 +# Task valued
 +task_mean <- mean(as.matrix(m182_task_matrix_row_to_col))
 +task_mean
 +
 +task_valued_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col), clusters)
 +task_valued_blockmodel
 +
 +# Task binary
 +task_density <- graph.density(m182_task)
 +task_density
 +
 +task_binary_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col_bin), clusters)
 +task_binary_blockmodel
 +
 +
 +# Social valued
 +social_mean <- mean(as.matrix(m182_social_matrix_row_to_col))
 +social_mean
 +
 +social_valued_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col), clusters)
 +social_valued_blockmodel
 +
 +# Social binary
 +social_density <- graph.density(m182_social)
 +social_density
 +
 +social_binary_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col_bin), clusters)
 +social_binary_blockmodel
 +
 +# We can also permute the network to examine the within- and 
 +# between-cluster correlations. 
 +
 +cluster_cor_mat_per <- permute_matrix(clusters, cluster_cor_mat)
 +cluster_cor_mat_per
 +
 +
 +#####################
 +# Questions:
 +# (2) What is the story you get from viewing these clusters, 
 +# and their within and between cluster densities on task and 
 +# social interaction? What can you say about M182 from this?
 +#####################
 +
 +#
 +# 4. deleted.
 +#
 +
 +#####################
 +# Questions:
 +# (3) What does clustering of the triadic census afford us? 
 +# What roles do you see? Redo the initial blockmodel analysis
 +# without social interaction (only task) and then compare to 
 +# this solution. Do they differ? 
 +#
 +# Extra credit: Try running the triad census on task AND 
 +# social interaction separately and then correlating persons. 
 +# What result do you get? Is it different from our initial 
 +# blockmodel result? Show your code.  
 +######################
 +
 +
 +
 +###
 +# 5. FACTOR ANALYSIS
 +###
 +
 +# Note that although we are conducting a principal components
 +# analysis (PCA), which is technically not exactly the same as
 +# factor analysis, we will use the term "factor" to describe the
 +# individual components in our PCA. 
 +
 +# PCA is often used in network analysis as a form of detecting 
 +# individuals global positioning. We say "global" because these
 +# clusters aren't defined on local cohesion but from the overall 
 +# pattern of ties individuals have with all others (structural 
 +# equivalence). Identifying the first two largest components that
 +# organize the variance in tie patterns is one way of doing this.
 +
 +# We'll analyze the 4n x n matrix generated above.
 +
 +# First, we want to determine the ideal number of components
 +# (factors) to extract. We'll do this by examining the eigenvalues
 +# in a scree plot and examining how each number of factors stacks
 +# up to a few proposed non-graphical solutions to selecting the
 +# optimal number of components, available via the nFactors
 +# package. 
 +ev <- eigen(cor(as.matrix(m182_task_social_matrix))) # get eigenvalues
 +ap <- parallel(subject=nrow(m182_task_social_matrix),
 + var=ncol(m182_task_social_matrix),
 + rep=100,cent=.05)
 +nS <- nScree(ev$values, ap$eigen$qevpea)
 +
 +# pdf("6.6_m182_studentnet_task_social_pca_scree.pdf")
 +plotnScree(nS) 
 +
 +# To draw a line across the graph where eigenvalues are = 1,
 +# use the following code:
 +plotnScree(nS) 
 +abline(h=1)
 +# dev.off()
 +
 +
 +# For more information on this procedure, please see 
 +# the references provided in the parallel() documentation
 +# (type "?parallel" in the R command line with the package
 +# loaded).
 +
 +# Now we'll run a principal components analysis on the matrix,
 +# using the number of factors determined above (note this may not
 +# be the same number as you get):
 +pca_m182_task_social = principal(as.matrix(m182_task_social_matrix), nfactors=5, rotate="varimax"
 +
 +# Let's take a look at the results in the R terminal:
 +pca_m182_task_social 
 +
 +# You can see the standardized loadings for each factor for each
 +# node. Note that R sometimes puts the factors in a funky order
 +# (e.g. RC1, RC2, RC5, RC4, RC3) but all of the factors are there.
 +# You can see that the SS loadings, proportion of variance
 +# explained and cumulative variance explained is provided below. A
 +# Chi Square test of the factors and various other statistics are
 +# provided below. 
 +
 +# Note that the eigenvalues can be accessed via the following
 +# command:
 +pca_m182_task_social$values
 +
 +# Now we will use the factor loadings to cluster and compare that
 +# to our other NetCluster techniques, using dendrograms.
 +
 +# Take the distance based on Euclidian Distance
 +m182_task_factor_dist = dist(pca_m182_task_social$loadings)
 +
 +# And cluster
 +m182_task_factor_hclust <- hclust(m182_task_factor_dist)
 +
 +# pdf("6.7_m182_studentnet_task_social_pca_hclust.pdf")
 +plot(m182_task_factor_hclust)
 +# dev.off()
 +
 +# And compare to NetCluster based on correlations and triads:
 +# pdf("6.8_m182_task_cluster_by_correlation_PCA_Triads.pdf")
 +par(mfrow = c(1,2))
 +plot(m182_task_social_hclust, main = "Correlation")
 +plot(m182_task_factor_hclust, main = "PCA")
 +# the below one that didn't work
 +# plot(m182_task_triad_hclust, main = "Triads")
 +# dev.off()
  
  
sna_eg_stanford/lab06.1574986453.txt.gz · Last modified: 2019/11/29 09:14 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki