User Tools

Site Tools


sna_eg_stanford:lab06

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
Next revisionBoth sides next revision
sna_eg_stanford:lab06 [2019/12/03 15:16] hkimscilsna_eg_stanford:lab06 [2019/12/04 11:15] hkimscil
Line 39: Line 39:
 library(NetCluster) library(NetCluster)
  
 +# install.packages(c("sna", "NetCluster"))
 ### ###
 #2. LOADING AND FORMATTING DATA #2. LOADING AND FORMATTING DATA
Line 55: Line 56:
 m182_friend <- delete_edges(m182_full, E(m182_full)[E(m182_full)$friend_tie==0]) m182_friend <- delete_edges(m182_full, E(m182_full)[E(m182_full)$friend_tie==0])
 summary(m182_friend) summary(m182_friend)
 +m182_friend[]
  
 m182_social <- delete_edges(m182_full, E(m182_full)[E(m182_full)$social_tie==0]) m182_social <- delete_edges(m182_full, E(m182_full)[E(m182_full)$social_tie==0])
 summary(m182_social) summary(m182_social)
 +m182_social[]
  
 m182_task <- delete_edges(m182_full, E(m182_full)[E(m182_full)$task_tie==0]) m182_task <- delete_edges(m182_full, E(m182_full)[E(m182_full)$task_tie==0])
 summary(m182_task) summary(m182_task)
 +m182_task[]
  
 # Look at the plots for each sub-graph # Look at the plots for each sub-graph
Line 927: Line 931:
 ##################### #####################
 </code> </code>
 +
 +====== no pdf ======
 +<code>
 +##########################################################################
 +# You may cite these labs as follows: McFarland, Daniel, Solomon Messing,
 +# Mike Nowak, and Sean Westwood. 2010. "Social Network Analysis          
 +# Labs in R." Stanford University.                                       
 +##########################################################################
 + 
 + 
 +##########################################################################
 +# LAB 6 - Blockmodeling Lab                                              
 +# The point of this lab is to introduce students to blockmodeling 
 +# techniques that call for a metric of structural equivalence, a method
 +# and rationale for the selection of the number of positions, and then 
 +# a means of summary representation (mean cutoff and reduced graph 
 +# presentation). Students will be shown how to identify positions using
 +# correlation as a metric of structural equivalence (euclidean distance 
 +# is used in earlier lab), and they will be taught how to identify more
 +# isomorphic notions of role-position using the triad census. Last, the 
 +# lab calls upon the user to compare positional techniques and come up 
 +# with a rationale for why they settle on one over another.  
 +##########################################################################
 + 
 +
 +
 +
 +# NOTE: if you have trouble because some packages are not installed, 
 +# see lab 1 for instructions on how to install all necessary packages.
 +
 +###
 +#1. SETUP
 +###
 +
 +library(igraph)
 +library(sna)
 +library(triads)
 +library(psych)
 +library(nFactors)
 +library(NetCluster)
 +
 +###
 +#2. LOADING AND FORMATTING DATA
 +###
 +
 +data(studentnets.M182, package = "NetData")
 +
 +# Reduce to non-zero edges and build a graph object
 +m182_full_nonzero_edges <- subset(m182_full_data_frame, (friend_tie > 0 | social_tie > 0 | task_tie > 0))
 +head(m182_full_nonzero_edges)
 +
 +m182_full <- graph.data.frame(m182_full_nonzero_edges) 
 +summary(m182_full)
 +
 +# Create sub-graphs based on edge attributes
 +m182_friend <- delete_edges(m182_full, E(m182_full)[E(m182_full)$friend_tie==0])
 +summary(m182_friend)
 +
 +m182_social <- delete_edges(m182_full, E(m182_full)[E(m182_full)$social_tie==0])
 +summary(m182_social)
 +
 +m182_task <- delete_edges(m182_full, E(m182_full)[E(m182_full)$task_tie==0])
 +summary(m182_task)
 +
 +# Look at the plots for each sub-graph
 +# pdf("6.1_m182_studentnet_friend_social_task_plots.pdf", width = 10)
 +par(mfrow = c(1,3))
 +
 +friend_layout <- layout.fruchterman.reingold(m182_friend)
 +plot(m182_friend, layout=friend_layout, main = "friend", edge.arrow.size=.5)
 +
 +social_layout <- layout.fruchterman.reingold(m182_social)
 +plot(m182_social, layout=social_layout, main = "social", edge.arrow.size=.5)
 +
 +task_layout <- layout.fruchterman.reingold(m182_task)
 +plot(m182_task, layout=task_layout, main = "task", edge.arrow.size=.5)
 +# dev.off()
 +
 +###
 +# 3. HIERARCHICAL CLUSTERING ON SOCIAL & TASK TIES
 +###
 +
 +# We'll use the "task" and "social" sub-graphs together as the
 +# basis for our structural equivalence methods. First, we'll use
 +# the task graph to generate an adjacency matrix.
 +#
 +# This matrix represents task interactions directed FROM the 
 +# row individual TO the column individual. 
 +m182_task_matrix_row_to_col <- get.adjacency(m182_task, attr='task_tie')
 +m182_task_matrix_row_to_col
 +
 +# To operate on a binary graph, simply leave off the "attr" 
 +# parameter:
 +m182_task_matrix_row_to_col_bin <- get.adjacency(m182_task)
 +m182_task_matrix_row_to_col_bin
 +
 +# For this lab, we'll use the valued graph. The next step is to 
 +# concatenate it with its transpose in order to capture both 
 +# incoming and outgoing task interactions.
 +m182_task_matrix_col_to_row <- t(as.matrix(m182_task_matrix_row_to_col))
 +m182_task_matrix_col_to_row
 +
 +m182_task_matrix <- rbind(m182_task_matrix_row_to_col, m182_task_matrix_col_to_row)
 +m182_task_matrix
 +
 +# Next, we'll use the same procedure to add social-interaction
 +# information.
 +m182_social_matrix_row_to_col <- get.adjacency(m182_social, attr='social_tie')
 +m182_social_matrix_row_to_col
 +
 +m182_social_matrix_row_to_col_bin <- get.adjacency(m182_social)
 +m182_social_matrix_row_to_col_bin
 +
 +m182_social_matrix_col_to_row <- t(as.matrix(m182_social_matrix_row_to_col))
 +m182_social_matrix_col_to_row
 +
 +m182_social_matrix <- rbind(m182_social_matrix_row_to_col, m182_social_matrix_col_to_row)
 +m182_social_matrix
 +
 +m182_task_social_matrix <- rbind(m182_task_matrix, m182_social_matrix)
 +m182_task_social_matrix
 +
 +# Now we have a single 4n x n matrix that represents both in- and
 +# out-directed task and social communication. From this, we can
 +# generate an n x n correlation matrix that shows the degree of
 +# structural equivalence of each actor in the network. 
 +m182_task_social_cors <- cor(as.matrix(m182_task_social_matrix))
 +m182_task_social_cors
 +
 +# To use correlation values in hierarchical NetCluster, they must 
 +# first be coerced into a "dissimilarity structure" using dist().
 +# We subtract the values from 1 so that they are all greater than 
 +# or equal to 0; thus, highly dissimilar (i.e., negatively 
 +# correlated) actors have higher values.
 +dissimilarity <- 1 - m182_task_social_cors
 +m182_task_social_dist <- as.dist(dissimilarity)
 +m182_task_social_dist
 +
 +# Note that it is also possible to use dist() directly on the 
 +# matrix. However, since cor() looks at associations between 
 +# columns and dist() looks at associations between rows, it is
 +# necessary to transpose the matrix first.
 +#
 +# A variety of distance metrics are available; Euclidean 
 +# is the default.
 +#m182_task_social_dist <- dist(t(m182_task_social_matrix))
 +#m182_task_social_dist
 +
 +# hclust() performs a hierarchical agglomerative NetCluster 
 +# operation based on the values in the dissimilarity matrix 
 +# yielded by as.dist() above. The standard visualization is a 
 +# dendrogram. By default, hclust() agglomerates clusters via a
 +# "complete linkakage" algorithm, determining cluster proximity
 +# by looking at the distance of the two points across clusters
 +# that are farthest away from one another. This can be changed via
 +# the "method" parameter.
 +
 +# pdf("6.2_m182_studentnet_social_hclust.pdf")
 +m182_task_social_hclust <- hclust(m182_task_social_dist)
 +plot(m182_task_social_hclust)
 +# dev.off()
 +
 +# cutree() allows us to use the output of hclust() to set
 +# different numbers of clusters and assign vertices to clusters
 +# as appropriate. For example:
 +cutree(m182_task_social_hclust, k=2)
 +
 +# Now we'll try to figure out the number of clusters that best 
 +# describes the underlying data. To do this, we'll loop through
 +# all of the possible numbers of clusters (1 through n, where n is
 +# the number of actors in the network). For each solution
 +# corresponding to a given number of clusters, we'll use cutree()
 +# to assign the vertices to their respective clusters 
 +# corresponding to that solution.
 +#
 +# From this, we can generate a matrix of within- and between-
 +# cluster correlations. Thus, when there is one cluster for each 
 +# vertex in the network, the cell values will be identical to the
 +# observed correlation matrix, and when there is one cluster for 
 +# the whole network, the values will all be equal to the average
 +# correlation across the observed matrix.
 +#
 +# We can then correlate each by-cluster matrix with the observed
 +# correlation matrix to see how well the by-cluster matrix fits
 +# the data. We'll store the correlation for each number of
 +# clusters in a vector, which we can then plot.
 +
 +# First, we initialize a vector for storing the correlations and 
 +# set a variable for our number of vertices.
 +clustered_observed_cors = vector()
 +num_vertices = length(V(m182_task))
 +
 +# Next, we loop through the different possible cluster 
 +# configurations, produce matrices of within- and between-
 +# cluster correlations, and correlate these by-cluster matrices
 +# with the observed correlation matrix.
 +
 +# pdf("6.3_m182_studentnet_task_social_clustered_observed_corrs.pdf")
 +clustered_observed_cors <-clustConfigurations(num_vertices,m182_task_social_hclust,m182_task_social_cors)
 +clustered_observed_cors
 +plot(clustered_observed_cors$correlations)
 +# dev.off()
 +
 +clustered_observed_cors$correlations
 +# From a visual inspection of the correlation matrix, we can 
 +# decide on the proper number of clusters in this network. 
 +# For this network, we'll use 4. (Note that the 1-cluster 
 +# solution doesn't appear on the plot because its correlation 
 +# with the observed correlation matrix is undefined.)
 +num_clusters = 4
 +clusters <- cutree(m182_task_social_hclust, k = num_clusters)
 +clusters
 +
 +cluster_cor_mat <- clusterCorr(m182_task_social_cors,
 +                                            clusters)
 +cluster_cor_mat
 +
 +# Let's look at the correlation between this cluster configuration 
 +# and the observed correlation matrix. This should match the 
 +# corresponding value from clustered_observed_cors above.
 +gcor(cluster_cor_mat, m182_task_social_cors)
 +
 +
 +#####################
 +# Questions:
 +# (1) What rationale do you have for selecting the number of 
 +# clusters / positions that you do?
 +#####################
 +  
 +
 +
 +### NOTE ON DEDUCTIVE CLUSTERING
 +
 +# It's pretty straightforward, using the code above, to explore
 +# your own deductive NetCluster. Simply supply your own cluster
 +# vector, where the elements in the vector are in the same order
 +# as the vertices in the matrix, and the values represent the
 +# cluster to which each vertex belongs. 
 +#
 +# For example, if you believed that actors 2, 7, and 8 formed one
 +# group, actor 16 former another group, and everyone else formed 
 +# a third group, you could represent this as follows:
 +deductive_clusters = c(1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1,
 +                       1, 3)
 +
 +# You could then examine the fitness of this cluster configuration
 +# as follows:
 +deductive_cluster_cor_mat <- generate_cluster_cor_mat(
 +  m182_task_social_cors,
 +  deductive_clusters)
 +deductive_cluster_cor_mat
 +gcor(deductive_cluster_cor_mat, m182_task_social_cors)
 +
 +### END NOTE ON DEDUCTIVE CLUSTERING
 +
 +# Now we'll use the 4-cluster solution to generate blockmodels, 
 +# using the raw tie data from the underlying task and social 
 +# networks.
 +
 +# Task valued
 +task_mean <- mean(as.matrix(m182_task_matrix_row_to_col))
 +task_mean
 +
 +task_valued_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col), clusters)
 +task_valued_blockmodel
 +
 +# Task binary
 +task_density <- graph.density(m182_task)
 +task_density
 +
 +task_binary_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col_bin), clusters)
 +task_binary_blockmodel
 +
 +
 +# Social valued
 +social_mean <- mean(as.matrix(m182_social_matrix_row_to_col))
 +social_mean
 +
 +social_valued_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col), clusters)
 +social_valued_blockmodel
 +
 +# Social binary
 +social_density <- graph.density(m182_social)
 +social_density
 +
 +social_binary_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col_bin), clusters)
 +social_binary_blockmodel
 +
 +# We can also permute the network to examine the within- and 
 +# between-cluster correlations. 
 +
 +cluster_cor_mat_per <- permute_matrix(clusters, cluster_cor_mat)
 +cluster_cor_mat_per
 +
 +
 +#####################
 +# Questions:
 +# (2) What is the story you get from viewing these clusters, 
 +# and their within and between cluster densities on task and 
 +# social interaction? What can you say about M182 from this?
 +#####################
 +
 +#
 +# 4. deleted.
 +#
 +
 +#####################
 +# Questions:
 +# (3) What does clustering of the triadic census afford us? 
 +# What roles do you see? Redo the initial blockmodel analysis
 +# without social interaction (only task) and then compare to 
 +# this solution. Do they differ? 
 +#
 +# Extra credit: Try running the triad census on task AND 
 +# social interaction separately and then correlating persons. 
 +# What result do you get? Is it different from our initial 
 +# blockmodel result? Show your code.  
 +######################
 +
 +
 +
 +###
 +# 5. FACTOR ANALYSIS
 +###
 +
 +# Note that although we are conducting a principal components
 +# analysis (PCA), which is technically not exactly the same as
 +# factor analysis, we will use the term "factor" to describe the
 +# individual components in our PCA. 
 +
 +# PCA is often used in network analysis as a form of detecting 
 +# individuals global positioning. We say "global" because these
 +# clusters aren't defined on local cohesion but from the overall 
 +# pattern of ties individuals have with all others (structural 
 +# equivalence). Identifying the first two largest components that
 +# organize the variance in tie patterns is one way of doing this.
 +
 +# We'll analyze the 4n x n matrix generated above.
 +
 +# First, we want to determine the ideal number of components
 +# (factors) to extract. We'll do this by examining the eigenvalues
 +# in a scree plot and examining how each number of factors stacks
 +# up to a few proposed non-graphical solutions to selecting the
 +# optimal number of components, available via the nFactors
 +# package. 
 +ev <- eigen(cor(as.matrix(m182_task_social_matrix))) # get eigenvalues
 +ap <- parallel(subject=nrow(m182_task_social_matrix),
 + var=ncol(m182_task_social_matrix),
 + rep=100,cent=.05)
 +nS <- nScree(ev$values, ap$eigen$qevpea)
 +
 +# pdf("6.6_m182_studentnet_task_social_pca_scree.pdf")
 +plotnScree(nS) 
 +
 +# To draw a line across the graph where eigenvalues are = 1,
 +# use the following code:
 +plotnScree(nS) 
 +abline(h=1)
 +# dev.off()
 +
 +
 +# For more information on this procedure, please see 
 +# the references provided in the parallel() documentation
 +# (type "?parallel" in the R command line with the package
 +# loaded).
 +
 +# Now we'll run a principal components analysis on the matrix,
 +# using the number of factors determined above (note this may not
 +# be the same number as you get):
 +pca_m182_task_social = principal(as.matrix(m182_task_social_matrix), nfactors=5, rotate="varimax"
 +
 +# Let's take a look at the results in the R terminal:
 +pca_m182_task_social 
 +
 +# You can see the standardized loadings for each factor for each
 +# node. Note that R sometimes puts the factors in a funky order
 +# (e.g. RC1, RC2, RC5, RC4, RC3) but all of the factors are there.
 +# You can see that the SS loadings, proportion of variance
 +# explained and cumulative variance explained is provided below. A
 +# Chi Square test of the factors and various other statistics are
 +# provided below. 
 +
 +# Note that the eigenvalues can be accessed via the following
 +# command:
 +pca_m182_task_social$values
 +
 +# Now we will use the factor loadings to cluster and compare that
 +# to our other NetCluster techniques, using dendrograms.
 +
 +# Take the distance based on Euclidian Distance
 +m182_task_factor_dist = dist(pca_m182_task_social$loadings)
 +
 +# And cluster
 +m182_task_factor_hclust <- hclust(m182_task_factor_dist)
 +
 +# pdf("6.7_m182_studentnet_task_social_pca_hclust.pdf")
 +plot(m182_task_factor_hclust)
 +# dev.off()
 +
 +# And compare to NetCluster based on correlations and triads:
 +# pdf("6.8_m182_task_cluster_by_correlation_PCA_Triads.pdf")
 +par(mfrow = c(1,3))
 +plot(m182_task_social_hclust, main = "Correlation")
 +plot(m182_task_factor_hclust, main = "PCA")
 +plot(m182_task_triad_hclust, main = "Triads")
 +# dev.off()
 +
 +
 +
 +#####################
 +# Questions:
 +# (4) How do the results across blockmodel techniques differ? 
 +# Why might you use one over the other? Why might you want to 
 +# run more than one in your analyses?
 +#####################
 +</code>
 +
sna_eg_stanford/lab06.txt · Last modified: 2019/12/11 10:40 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki