sna_eg_stanford:lab06
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revisionNext revisionBoth sides next revision | ||
sna_eg_stanford:lab06 [2019/12/03 15:16] – hkimscil | sna_eg_stanford:lab06 [2019/12/04 11:15] – hkimscil | ||
---|---|---|---|
Line 39: | Line 39: | ||
library(NetCluster) | library(NetCluster) | ||
+ | # install.packages(c(" | ||
### | ### | ||
#2. LOADING AND FORMATTING DATA | #2. LOADING AND FORMATTING DATA | ||
Line 55: | Line 56: | ||
m182_friend <- delete_edges(m182_full, | m182_friend <- delete_edges(m182_full, | ||
summary(m182_friend) | summary(m182_friend) | ||
+ | m182_friend[] | ||
m182_social <- delete_edges(m182_full, | m182_social <- delete_edges(m182_full, | ||
summary(m182_social) | summary(m182_social) | ||
+ | m182_social[] | ||
m182_task <- delete_edges(m182_full, | m182_task <- delete_edges(m182_full, | ||
summary(m182_task) | summary(m182_task) | ||
+ | m182_task[] | ||
# Look at the plots for each sub-graph | # Look at the plots for each sub-graph | ||
Line 927: | Line 931: | ||
##################### | ##################### | ||
</ | </ | ||
+ | |||
+ | ====== no pdf ====== | ||
+ | < | ||
+ | ########################################################################## | ||
+ | # You may cite these labs as follows: McFarland, Daniel, Solomon Messing, | ||
+ | # Mike Nowak, and Sean Westwood. 2010. " | ||
+ | # Labs in R." Stanford University. | ||
+ | ########################################################################## | ||
+ | |||
+ | |||
+ | ########################################################################## | ||
+ | # LAB 6 - Blockmodeling Lab | ||
+ | # The point of this lab is to introduce students to blockmodeling | ||
+ | # techniques that call for a metric of structural equivalence, | ||
+ | # and rationale for the selection of the number of positions, and then | ||
+ | # a means of summary representation (mean cutoff and reduced graph | ||
+ | # presentation). Students will be shown how to identify positions using | ||
+ | # correlation as a metric of structural equivalence (euclidean distance | ||
+ | # is used in earlier lab), and they will be taught how to identify more | ||
+ | # isomorphic notions of role-position using the triad census. Last, the | ||
+ | # lab calls upon the user to compare positional techniques and come up | ||
+ | # with a rationale for why they settle on one over another. | ||
+ | ########################################################################## | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | # NOTE: if you have trouble because some packages are not installed, | ||
+ | # see lab 1 for instructions on how to install all necessary packages. | ||
+ | |||
+ | ### | ||
+ | #1. SETUP | ||
+ | ### | ||
+ | |||
+ | library(igraph) | ||
+ | library(sna) | ||
+ | library(triads) | ||
+ | library(psych) | ||
+ | library(nFactors) | ||
+ | library(NetCluster) | ||
+ | |||
+ | ### | ||
+ | #2. LOADING AND FORMATTING DATA | ||
+ | ### | ||
+ | |||
+ | data(studentnets.M182, | ||
+ | |||
+ | # Reduce to non-zero edges and build a graph object | ||
+ | m182_full_nonzero_edges <- subset(m182_full_data_frame, | ||
+ | head(m182_full_nonzero_edges) | ||
+ | |||
+ | m182_full <- graph.data.frame(m182_full_nonzero_edges) | ||
+ | summary(m182_full) | ||
+ | |||
+ | # Create sub-graphs based on edge attributes | ||
+ | m182_friend <- delete_edges(m182_full, | ||
+ | summary(m182_friend) | ||
+ | |||
+ | m182_social <- delete_edges(m182_full, | ||
+ | summary(m182_social) | ||
+ | |||
+ | m182_task <- delete_edges(m182_full, | ||
+ | summary(m182_task) | ||
+ | |||
+ | # Look at the plots for each sub-graph | ||
+ | # pdf(" | ||
+ | par(mfrow = c(1,3)) | ||
+ | |||
+ | friend_layout <- layout.fruchterman.reingold(m182_friend) | ||
+ | plot(m182_friend, | ||
+ | |||
+ | social_layout <- layout.fruchterman.reingold(m182_social) | ||
+ | plot(m182_social, | ||
+ | |||
+ | task_layout <- layout.fruchterman.reingold(m182_task) | ||
+ | plot(m182_task, | ||
+ | # dev.off() | ||
+ | |||
+ | ### | ||
+ | # 3. HIERARCHICAL CLUSTERING ON SOCIAL & TASK TIES | ||
+ | ### | ||
+ | |||
+ | # We'll use the " | ||
+ | # basis for our structural equivalence methods. First, we'll use | ||
+ | # the task graph to generate an adjacency matrix. | ||
+ | # | ||
+ | # This matrix represents task interactions directed FROM the | ||
+ | # row individual TO the column individual. | ||
+ | m182_task_matrix_row_to_col <- get.adjacency(m182_task, | ||
+ | m182_task_matrix_row_to_col | ||
+ | |||
+ | # To operate on a binary graph, simply leave off the " | ||
+ | # parameter: | ||
+ | m182_task_matrix_row_to_col_bin <- get.adjacency(m182_task) | ||
+ | m182_task_matrix_row_to_col_bin | ||
+ | |||
+ | # For this lab, we'll use the valued graph. The next step is to | ||
+ | # concatenate it with its transpose in order to capture both | ||
+ | # incoming and outgoing task interactions. | ||
+ | m182_task_matrix_col_to_row <- t(as.matrix(m182_task_matrix_row_to_col)) | ||
+ | m182_task_matrix_col_to_row | ||
+ | |||
+ | m182_task_matrix <- rbind(m182_task_matrix_row_to_col, | ||
+ | m182_task_matrix | ||
+ | |||
+ | # Next, we'll use the same procedure to add social-interaction | ||
+ | # information. | ||
+ | m182_social_matrix_row_to_col <- get.adjacency(m182_social, | ||
+ | m182_social_matrix_row_to_col | ||
+ | |||
+ | m182_social_matrix_row_to_col_bin <- get.adjacency(m182_social) | ||
+ | m182_social_matrix_row_to_col_bin | ||
+ | |||
+ | m182_social_matrix_col_to_row <- t(as.matrix(m182_social_matrix_row_to_col)) | ||
+ | m182_social_matrix_col_to_row | ||
+ | |||
+ | m182_social_matrix <- rbind(m182_social_matrix_row_to_col, | ||
+ | m182_social_matrix | ||
+ | |||
+ | m182_task_social_matrix <- rbind(m182_task_matrix, | ||
+ | m182_task_social_matrix | ||
+ | |||
+ | # Now we have a single 4n x n matrix that represents both in- and | ||
+ | # out-directed task and social communication. From this, we can | ||
+ | # generate an n x n correlation matrix that shows the degree of | ||
+ | # structural equivalence of each actor in the network. | ||
+ | m182_task_social_cors <- cor(as.matrix(m182_task_social_matrix)) | ||
+ | m182_task_social_cors | ||
+ | |||
+ | # To use correlation values in hierarchical NetCluster, they must | ||
+ | # first be coerced into a " | ||
+ | # We subtract the values from 1 so that they are all greater than | ||
+ | # or equal to 0; thus, highly dissimilar (i.e., negatively | ||
+ | # correlated) actors have higher values. | ||
+ | dissimilarity <- 1 - m182_task_social_cors | ||
+ | m182_task_social_dist <- as.dist(dissimilarity) | ||
+ | m182_task_social_dist | ||
+ | |||
+ | # Note that it is also possible to use dist() directly on the | ||
+ | # matrix. However, since cor() looks at associations between | ||
+ | # columns and dist() looks at associations between rows, it is | ||
+ | # necessary to transpose the matrix first. | ||
+ | # | ||
+ | # A variety of distance metrics are available; Euclidean | ||
+ | # is the default. | ||
+ | # | ||
+ | # | ||
+ | |||
+ | # hclust() performs a hierarchical agglomerative NetCluster | ||
+ | # operation based on the values in the dissimilarity matrix | ||
+ | # yielded by as.dist() above. The standard visualization is a | ||
+ | # dendrogram. By default, hclust() agglomerates clusters via a | ||
+ | # " | ||
+ | # by looking at the distance of the two points across clusters | ||
+ | # that are farthest away from one another. This can be changed via | ||
+ | # the " | ||
+ | |||
+ | # pdf(" | ||
+ | m182_task_social_hclust <- hclust(m182_task_social_dist) | ||
+ | plot(m182_task_social_hclust) | ||
+ | # dev.off() | ||
+ | |||
+ | # cutree() allows us to use the output of hclust() to set | ||
+ | # different numbers of clusters and assign vertices to clusters | ||
+ | # as appropriate. For example: | ||
+ | cutree(m182_task_social_hclust, | ||
+ | |||
+ | # Now we'll try to figure out the number of clusters that best | ||
+ | # describes the underlying data. To do this, we'll loop through | ||
+ | # all of the possible numbers of clusters (1 through n, where n is | ||
+ | # the number of actors in the network). For each solution | ||
+ | # corresponding to a given number of clusters, we'll use cutree() | ||
+ | # to assign the vertices to their respective clusters | ||
+ | # corresponding to that solution. | ||
+ | # | ||
+ | # From this, we can generate a matrix of within- and between- | ||
+ | # cluster correlations. Thus, when there is one cluster for each | ||
+ | # vertex in the network, the cell values will be identical to the | ||
+ | # observed correlation matrix, and when there is one cluster for | ||
+ | # the whole network, the values will all be equal to the average | ||
+ | # correlation across the observed matrix. | ||
+ | # | ||
+ | # We can then correlate each by-cluster matrix with the observed | ||
+ | # correlation matrix to see how well the by-cluster matrix fits | ||
+ | # the data. We'll store the correlation for each number of | ||
+ | # clusters in a vector, which we can then plot. | ||
+ | |||
+ | # First, we initialize a vector for storing the correlations and | ||
+ | # set a variable for our number of vertices. | ||
+ | clustered_observed_cors = vector() | ||
+ | num_vertices = length(V(m182_task)) | ||
+ | |||
+ | # Next, we loop through the different possible cluster | ||
+ | # configurations, | ||
+ | # cluster correlations, | ||
+ | # with the observed correlation matrix. | ||
+ | |||
+ | # pdf(" | ||
+ | clustered_observed_cors < | ||
+ | clustered_observed_cors | ||
+ | plot(clustered_observed_cors$correlations) | ||
+ | # dev.off() | ||
+ | |||
+ | clustered_observed_cors$correlations | ||
+ | # From a visual inspection of the correlation matrix, we can | ||
+ | # decide on the proper number of clusters in this network. | ||
+ | # For this network, we'll use 4. (Note that the 1-cluster | ||
+ | # solution doesn' | ||
+ | # with the observed correlation matrix is undefined.) | ||
+ | num_clusters = 4 | ||
+ | clusters <- cutree(m182_task_social_hclust, | ||
+ | clusters | ||
+ | |||
+ | cluster_cor_mat <- clusterCorr(m182_task_social_cors, | ||
+ | clusters) | ||
+ | cluster_cor_mat | ||
+ | |||
+ | # Let's look at the correlation between this cluster configuration | ||
+ | # and the observed correlation matrix. This should match the | ||
+ | # corresponding value from clustered_observed_cors above. | ||
+ | gcor(cluster_cor_mat, | ||
+ | |||
+ | |||
+ | ##################### | ||
+ | # Questions: | ||
+ | # (1) What rationale do you have for selecting the number of | ||
+ | # clusters / positions that you do? | ||
+ | ##################### | ||
+ | | ||
+ | |||
+ | |||
+ | ### NOTE ON DEDUCTIVE CLUSTERING | ||
+ | |||
+ | # It's pretty straightforward, | ||
+ | # your own deductive NetCluster. Simply supply your own cluster | ||
+ | # vector, where the elements in the vector are in the same order | ||
+ | # as the vertices in the matrix, and the values represent the | ||
+ | # cluster to which each vertex belongs. | ||
+ | # | ||
+ | # For example, if you believed that actors 2, 7, and 8 formed one | ||
+ | # group, actor 16 former another group, and everyone else formed | ||
+ | # a third group, you could represent this as follows: | ||
+ | deductive_clusters = c(1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, | ||
+ | 1, 3) | ||
+ | |||
+ | # You could then examine the fitness of this cluster configuration | ||
+ | # as follows: | ||
+ | deductive_cluster_cor_mat <- generate_cluster_cor_mat( | ||
+ | m182_task_social_cors, | ||
+ | deductive_clusters) | ||
+ | deductive_cluster_cor_mat | ||
+ | gcor(deductive_cluster_cor_mat, | ||
+ | |||
+ | ### END NOTE ON DEDUCTIVE CLUSTERING | ||
+ | |||
+ | # Now we'll use the 4-cluster solution to generate blockmodels, | ||
+ | # using the raw tie data from the underlying task and social | ||
+ | # networks. | ||
+ | |||
+ | # Task valued | ||
+ | task_mean <- mean(as.matrix(m182_task_matrix_row_to_col)) | ||
+ | task_mean | ||
+ | |||
+ | task_valued_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col), | ||
+ | task_valued_blockmodel | ||
+ | |||
+ | # Task binary | ||
+ | task_density <- graph.density(m182_task) | ||
+ | task_density | ||
+ | |||
+ | task_binary_blockmodel <- blockmodel(as.matrix(m182_task_matrix_row_to_col_bin), | ||
+ | task_binary_blockmodel | ||
+ | |||
+ | |||
+ | # Social valued | ||
+ | social_mean <- mean(as.matrix(m182_social_matrix_row_to_col)) | ||
+ | social_mean | ||
+ | |||
+ | social_valued_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col), | ||
+ | social_valued_blockmodel | ||
+ | |||
+ | # Social binary | ||
+ | social_density <- graph.density(m182_social) | ||
+ | social_density | ||
+ | |||
+ | social_binary_blockmodel <- blockmodel(as.matrix(m182_social_matrix_row_to_col_bin), | ||
+ | social_binary_blockmodel | ||
+ | |||
+ | # We can also permute the network to examine the within- and | ||
+ | # between-cluster correlations. | ||
+ | |||
+ | cluster_cor_mat_per <- permute_matrix(clusters, | ||
+ | cluster_cor_mat_per | ||
+ | |||
+ | |||
+ | ##################### | ||
+ | # Questions: | ||
+ | # (2) What is the story you get from viewing these clusters, | ||
+ | # and their within and between cluster densities on task and | ||
+ | # social interaction? | ||
+ | ##################### | ||
+ | |||
+ | # | ||
+ | # 4. deleted. | ||
+ | # | ||
+ | |||
+ | ##################### | ||
+ | # Questions: | ||
+ | # (3) What does clustering of the triadic census afford us? | ||
+ | # What roles do you see? Redo the initial blockmodel analysis | ||
+ | # without social interaction (only task) and then compare to | ||
+ | # this solution. Do they differ? | ||
+ | # | ||
+ | # Extra credit: Try running the triad census on task AND | ||
+ | # social interaction separately and then correlating persons. | ||
+ | # What result do you get? Is it different from our initial | ||
+ | # blockmodel result? Show your code. | ||
+ | ###################### | ||
+ | |||
+ | |||
+ | |||
+ | ### | ||
+ | # 5. FACTOR ANALYSIS | ||
+ | ### | ||
+ | |||
+ | # Note that although we are conducting a principal components | ||
+ | # analysis (PCA), which is technically not exactly the same as | ||
+ | # factor analysis, we will use the term " | ||
+ | # individual components in our PCA. | ||
+ | |||
+ | # PCA is often used in network analysis as a form of detecting | ||
+ | # individuals global positioning. We say " | ||
+ | # clusters aren't defined on local cohesion but from the overall | ||
+ | # pattern of ties individuals have with all others (structural | ||
+ | # equivalence). Identifying the first two largest components that | ||
+ | # organize the variance in tie patterns is one way of doing this. | ||
+ | |||
+ | # We'll analyze the 4n x n matrix generated above. | ||
+ | |||
+ | # First, we want to determine the ideal number of components | ||
+ | # (factors) to extract. We'll do this by examining the eigenvalues | ||
+ | # in a scree plot and examining how each number of factors stacks | ||
+ | # up to a few proposed non-graphical solutions to selecting the | ||
+ | # optimal number of components, available via the nFactors | ||
+ | # package. | ||
+ | ev <- eigen(cor(as.matrix(m182_task_social_matrix))) # get eigenvalues | ||
+ | ap <- parallel(subject=nrow(m182_task_social_matrix), | ||
+ | var=ncol(m182_task_social_matrix), | ||
+ | rep=100, | ||
+ | nS <- nScree(ev$values, | ||
+ | |||
+ | # pdf(" | ||
+ | plotnScree(nS) | ||
+ | |||
+ | # To draw a line across the graph where eigenvalues are = 1, | ||
+ | # use the following code: | ||
+ | plotnScree(nS) | ||
+ | abline(h=1) | ||
+ | # dev.off() | ||
+ | |||
+ | |||
+ | # For more information on this procedure, please see | ||
+ | # the references provided in the parallel() documentation | ||
+ | # (type "? | ||
+ | # loaded). | ||
+ | |||
+ | # Now we'll run a principal components analysis on the matrix, | ||
+ | # using the number of factors determined above (note this may not | ||
+ | # be the same number as you get): | ||
+ | pca_m182_task_social = principal(as.matrix(m182_task_social_matrix), | ||
+ | |||
+ | # Let's take a look at the results in the R terminal: | ||
+ | pca_m182_task_social | ||
+ | |||
+ | # You can see the standardized loadings for each factor for each | ||
+ | # node. Note that R sometimes puts the factors in a funky order | ||
+ | # (e.g. RC1, RC2, RC5, RC4, RC3) but all of the factors are there. | ||
+ | # You can see that the SS loadings, proportion of variance | ||
+ | # explained and cumulative variance explained is provided below. A | ||
+ | # Chi Square test of the factors and various other statistics are | ||
+ | # provided below. | ||
+ | |||
+ | # Note that the eigenvalues can be accessed via the following | ||
+ | # command: | ||
+ | pca_m182_task_social$values | ||
+ | |||
+ | # Now we will use the factor loadings to cluster and compare that | ||
+ | # to our other NetCluster techniques, using dendrograms. | ||
+ | |||
+ | # Take the distance based on Euclidian Distance | ||
+ | m182_task_factor_dist = dist(pca_m182_task_social$loadings) | ||
+ | |||
+ | # And cluster | ||
+ | m182_task_factor_hclust <- hclust(m182_task_factor_dist) | ||
+ | |||
+ | # pdf(" | ||
+ | plot(m182_task_factor_hclust) | ||
+ | # dev.off() | ||
+ | |||
+ | # And compare to NetCluster based on correlations and triads: | ||
+ | # pdf(" | ||
+ | par(mfrow = c(1,3)) | ||
+ | plot(m182_task_social_hclust, | ||
+ | plot(m182_task_factor_hclust, | ||
+ | plot(m182_task_triad_hclust, | ||
+ | # dev.off() | ||
+ | |||
+ | |||
+ | |||
+ | ##################### | ||
+ | # Questions: | ||
+ | # (4) How do the results across blockmodel techniques differ? | ||
+ | # Why might you use one over the other? Why might you want to | ||
+ | # run more than one in your analyses? | ||
+ | ##################### | ||
+ | </ | ||
+ |
sna_eg_stanford/lab06.txt · Last modified: 2019/12/11 10:40 by hkimscil