krackhardt_datasets
This is an old revision of the document!
Krackhardt Datasets
Krackhardt dataset in NetData packages
install.packages("NetData")
library(NetData)
data(package="NetData")
data(kracknets, package = "NetData")
head(krack_full_data_frame)
> head(krack_full_data_frame) ego alter advice_tie friendship_tie reports_to_tie 1 1 1 0 0 0 2 1 2 1 1 1 3 1 3 0 0 0 4 1 4 1 1 0 5 1 5 0 0 0 6 1 6 0 0 0 >
krack_full_nonzero_edges <- subset(krack_full_data_frame, (friendship_tie > 0 | advice_tie > 0 | reports_to_tie > 0)) head(krack_full_nonzero_edges)
> krack_full_nonzero_edges <- subset(krack_full_data_frame, (friendship_tie > 0 | advice_tie > 0 | reports_to_tie > 0)) > head(krack_full_nonzero_edges) ego alter advice_tie friendship_tie reports_to_tie 2 1 2 1 1 1 4 1 4 1 1 0 8 1 8 1 1 0 12 1 12 0 1 0 16 1 16 1 1 0 18 1 18 1 0 0 >
krack_full <- graph.data.frame(krack_full_nonzero_edges) summary(krack_full)
> krack_full <- graph.data.frame(krack_full_nonzero_edges) > summary(krack_full) IGRAPH 750f8b3 DN-- 21 232 -- + attr: name (v/c), advice_tie (e/n), friendship_tie (e/n), reports_to_tie (e/n) >
krack_friend <- delete.edges(krack_full, E(krack_full)[E(krack_full)$friendship_tie==0]) summary(krack_friend) krack_advice <- delete.edges(krack_full, E(krack_full)[E(krack_full)$advice_tie==0]) summary(krack_advice) krack_reports_to <- delete.edges(krack_full, E(krack_full)[E(krack_full)$reports_to_tie==0]) summary(krack_reports_to)
> krack_friend <- delete.edges(krack_full, E(krack_full)[E(krack_full)$friendship_tie==0]) > summary(krack_friend) IGRAPH 51e7962 DN-- 21 102 -- + attr: name (v/c), advice_tie (e/n), friendship_tie (e/n), reports_to_tie (e/n) > > krack_advice <- delete.edges(krack_full, E(krack_full)[E(krack_full)$advice_tie==0]) > summary(krack_advice) IGRAPH 51f2910 DN-- 21 190 -- + attr: name (v/c), advice_tie (e/n), friendship_tie (e/n), reports_to_tie (e/n) > > krack_reports_to <- delete.edges(krack_full, E(krack_full)[E(krack_full)$reports_to_tie==0]) > summary(krack_reports_to) IGRAPH 51fe4f9 DN-- 21 232 -- + attr: name (v/c), advice_tie (e/n), friendship_tie (e/n), reports_to_tie (e/n) >
par(mfrow = c(1,3)) krack_friend_layout <- layout.fruchterman.reingold(krack_friend) plot(krack_friend, layout=krack_friend_layout, main = "friend", edge.arrow.size=.5) krack_advice_layout <- layout.fruchterman.reingold(krack_advice) plot(krack_advice, layout=krack_advice_layout, main = "advice", edge.arrow.size=.5) krack_reports_to_layout <- layout.fruchterman.reingold(krack_reports_to) plot(krack_reports_to, layout=krack_reports_to_layout, main = "reports to", edge.arrow.size=.5) par(mfrow = c(1,1))
# We'll use the "task" and "social" sub-graphs together as the # basis for our structural equivalence methods. First, we'll use # the task graph to generate an adjacency matrix. # # This matrix represents task interactions directed FROM the # row individual TO the column individual. krack_reports_to_matrix_row_to_col <- get.adjacency(krack_reports_to, attr='reports_to_tie') krack_reports_to_matrix_row_to_col # To operate on a binary graph, simply leave off the "attr" # parameter: krack_reports_to_matrix_row_to_col_bin <- get.adjacency(krack_reports_to) krack_reports_to_matrix_row_to_col_bin # For this lab, we'll use the valued graph. The next step is to # concatenate it with its transpose in order to capture both # incoming and outgoing task interactions. krack_reports_to_matrix_col_to_row <- t(as.matrix(krack_reports_to_matrix_row_to_col)) krack_reports_to_matrix_col_to_row krack_reports_to_matrix <- rbind(krack_reports_to_matrix_row_to_col, krack_reports_to_matrix_col_to_row) krack_reports_to_matrix
# Next, we'll use the same procedure to add social-interaction # information. krack_advice_matrix_row_to_col <- get.adjacency(krack_advice, attr='advice_tie') krack_advice_matrix_row_to_col krack_advice_matrix_row_to_col_bin <- get.adjacency(krack_advice) krack_advice_matrix_row_to_col_bin krack_advice_matrix_col_to_row <- t(as.matrix(krack_advice_matrix_row_to_col)) krack_advice_matrix_col_to_row krack_advice_matrix <- rbind(krack_advice_matrix_row_to_col, krack_advice_matrix_col_to_row) krack_advice_matrix krack_reports_to_advice_matrix <- rbind(krack_reports_to_matrix, krack_advice_matrix) krack_reports_to_advice_matrix
# Now we have a single 4n x n matrix that represents both in- and # out-directed task and social communication. From this, we can # generate an n x n correlation matrix that shows the degree of # structural equivalence of each actor in the network. krack_reports_to_advice_cors <- cor(as.matrix(krack_reports_to_advice_matrix)) krack_reports_to_advice_cors
# To use correlation values in hierarchical NetCluster, they must
# first be coerced into a "dissimilarity structure" using dist().
# We subtract the values from 1 so that they are all greater than
# or equal to 0; thus, highly dissimilar (i.e., negatively
# correlated) actors have higher values.
dissimilarity <- 1 - krack_reports_to_advice_cors
krack_reports_to_dist <- as.dist(dissimilarity)
krack_reports_to_dist
# Note that it is also possible to use dist() directly on the
# matrix. However, since cor() looks at associations between
# columns and dist() looks at associations between rows, it is
# necessary to transpose the matrix first.
#
# A variety of distance metrics are available; Euclidean
# is the default.
#m182_task_social_dist <- dist(t(m182_task_social_matrix))
#m182_task_social_dist
# hclust() performs a hierarchical agglomerative NetCluster
# operation based on the values in the dissimilarity matrix
# yielded by as.dist() above. The standard visualization is a
# dendrogram. By default, hclust() agglomerates clusters via a
# "complete linkakage" algorithm, determining cluster proximity
# by looking at the distance of the two points across clusters
# that are farthest away from one another. This can be changed via
# the "method" parameter.
krack_reports_to_advice_hclust <- hclust(krack_reports_to_dist)
plot(krack_reports_to_advice_hclust)
# cutree() allows us to use the output of hclust() to set
# different numbers of clusters and assign vertices to clusters
# as appropriate. For example:
cutree(krack_reports_to_advice_hclust, k=2)
# Now we'll try to figure out the number of clusters that best
# describes the underlying data. To do this, we'll loop through
# all of the possible numbers of clusters (1 through n, where n is
# the number of actors in the network). For each solution
# corresponding to a given number of clusters, we'll use cutree()
# to assign the vertices to their respective clusters
# corresponding to that solution.
#
# From this, we can generate a matrix of within- and between-
# cluster correlations. Thus, when there is one cluster for each
# vertex in the network, the cell values will be identical to the
# observed correlation matrix, and when there is one cluster for
# the whole network, the values will all be equal to the average
# correlation across the observed matrix.
#
# We can then correlate each by-cluster matrix with the observed
# correlation matrix to see how well the by-cluster matrix fits
# the data. We'll store the correlation for each number of
# clusters in a vector, which we can then plot.
# First, we initialize a vector for storing the correlations and
# set a variable for our number of vertices.
clustered_observed_cors = vector()
num_vertices = length(V(krack_reports_to))
# Next, we loop through the different possible cluster
# configurations, produce matrices of within- and between-
# cluster correlations, and correlate these by-cluster matrices
# with the observed correlation matrix.
# pdf("6.3_m182_studentnet_task_social_clustered_observed_corrs.pdf")
clustered_observed_cors <-clustConfigurations(num_vertices, krack_reports_to_advice_hclust, krack_reports_to_advice_cors)
clustered_observed_cors
plot(clustered_observed_cors$correlations)
# dev.off()
clustered_observed_cors$correlations
# From a visual inspection of the correlation matrix, we can
# decide on the proper number of clusters in this network.
# For this network, we'll use 4. (Note that the 1-cluster
# solution doesn't appear on the plot because its correlation
# with the observed correlation matrix is undefined.)
num_clusters = 4
clusters <- cutree(krack_reports_to_advice_hclust, k = num_clusters)
clusters
cluster_cor_mat <- clusterCorr(krack_reports_to_advice_cors,
clusters)
cluster_cor_mat
# Let's look at the correlation between this cluster configuration
# and the observed correlation matrix. This should match the
# corresponding value from clustered_observed_cors above.
gcor(cluster_cor_mat, krack_reports_to_advice_cors)
#####################
# Questions:
# (1) What rationale do you have for selecting the number of
# clusters / positions that you do?
#####################
### NOTE ON DEDUCTIVE CLUSTERING
# It's pretty straightforward, using the code above, to explore
# your own deductive NetCluster. Simply supply your own cluster
# vector, where the elements in the vector are in the same order
# as the vertices in the matrix, and the values represent the
# cluster to which each vertex belongs.
#
# For example, if you believed that actors 2, 7, and 8 formed one
# group, actor 16 former another group, and everyone else formed
# a third group, you could represent this as follows:
deductive_clusters = c(1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1,
1, 3)
# You could then examine the fitness of this cluster configuration
# as follows:
deductive_cluster_cor_mat <- generate_cluster_cor_mat(
krack_reports_to_advice_cors,
deductive_clusters)
deductive_cluster_cor_mat
gcor(deductive_cluster_cor_mat, krack_reports_to_advice_cors)
### END NOTE ON DEDUCTIVE CLUSTERING
# Now we'll use the 4-cluster solution to generate blockmodels,
# using the raw tie data from the underlying task and social
# networks.
# Task valued
task_mean <- mean(as.matrix(krack_reports_to_matrix_row_to_col)_
task_mean
task_valued_blockmodel <- blockmodel(krack_reports_to_matrix_row_to_col, clusters)
task_valued_blockmodel
# Task binary
task_density <- graph.density(krack_reports_to)
task_density
task_binary_blockmodel <- blockmodel(as.matrix(krack_reports_to_matrix_row_to_col_bin), clusters)
task_binary_blockmodel
# Social valued
advice_mean <- mean(as.matrix(krack_advice_matrix_row_to_col))
advice_mean
advice_valued_blockmodel <- blockmodel(as.matrix(krack_advice_matrix_row_to_col), clusters)
advice_valued_blockmodel
# Social binary
advice_density <- graph.density(krack_advice)
advice_density
advice_binary_blockmodel <- blockmodel(as.matrix(krack_advice_matrix_row_to_col_bin), clusters)
advice_binary_blockmodel
# We can also permute the network to examine the within- and
# between-cluster correlations.
cluster_cor_mat_per <- permute_matrix(clusters, cluster_cor_mat)
cluster_cor_mat_per
#####################
# Questions:
# (2) What is the story you get from viewing these clusters,
# and their within and between cluster densities on task and
# social interaction? What can you say about M182 from this?
#####################
#####################
# Questions:
# (3) What does clustering of the triadic census afford us?
# What roles do you see? Redo the initial blockmodel analysis
# without social interaction (only task) and then compare to
# this solution. Do they differ?
#
# Extra credit: Try running the triad census on task AND
# social interaction separately and then correlating persons.
# What result do you get? Is it different from our initial
# blockmodel result? Show your code.
######################
###
# 5. FACTOR ANALYSIS
###
# Note that although we are conducting a principal components
# analysis (PCA), which is technically not exactly the same as
# factor analysis, we will use the term "factor" to describe the
# individual components in our PCA.
# PCA is often used in network analysis as a form of detecting
# individuals global positioning. We say "global" because these
# clusters aren't defined on local cohesion but from the overall
# pattern of ties individuals have with all others (structural
# equivalence). Identifying the first two largest components that
# organize the variance in tie patterns is one way of doing this.
# We'll analyze the 4n x n matrix generated above.
# First, we want to determine the ideal number of components
# (factors) to extract. We'll do this by examining the eigenvalues
# in a scree plot and examining how each number of factors stacks
# up to a few proposed non-graphical solutions to selecting the
# optimal number of components, available via the nFactors
# package.
ev <- eigen(cor(as.matrix(krack_reports_to_advice_matrix))) # get eigenvalues
ap <- parallel(subject=nrow(krack_reports_to_advice_matrix),
var=ncol(krack_reports_to_advice_matrix),
rep=100,cent=.05)
nS <- nScree(ev$values, ap$eigen$qevpea)
# pdf("6.6_m182_studentnet_task_social_pca_scree.pdf")
plotnScree(nS)
# To draw a line across the graph where eigenvalues are = 1,
# use the following code:
plotnScree(nS)
abline(h=1)
# dev.off()
# For more information on this procedure, please see
# the references provided in the parallel() documentation
# (type "?parallel" in the R command line with the package
# loaded).
# Now we'll run a principal components analysis on the matrix,
# using the number of factors determined above (note this may not
# be the same number as you get):
pca_krack_reports_to_advice = principal(as.matrix(krack_reports_to_advice_matrix), nfactors=5, rotate="varimax")
# Let's take a look at the results in the R terminal:
pca_krack_reports_to_advice
# You can see the standardized loadings for each factor for each
# node. Note that R sometimes puts the factors in a funky order
# (e.g. RC1, RC2, RC5, RC4, RC3) but all of the factors are there.
# You can see that the SS loadings, proportion of variance
# explained and cumulative variance explained is provided below. A
# Chi Square test of the factors and various other statistics are
# provided below.
# Note that the eigenvalues can be accessed via the following
# command:
pca_krack_reports_to_advice$values
# Now we will use the factor loadings to cluster and compare that
# to our other NetCluster techniques, using dendrograms.
# Take the distance based on Euclidian Distance
krack_reports_to_factor_dist = dist(pca_krack_reports_to_advice$loadings)
# And cluster
krack_reports_to_factor_hclust <- hclust(krack_reports_to_factor_dist)
# pdf("6.7_m182_studentnet_task_social_pca_hclust.pdf")
plot(krack_reports_to_factor_hclust)
# dev.off()
# And compare to NetCluster based on correlations and triads:
# pdf("6.8_m182_task_cluster_by_correlation_PCA_Triads.pdf")
par(mfrow = c(1,2))
plot(krack_reports_to_advice_hclust, main = "Correlation")
plot(krack_reports_to_factor_hclust, main = "PCA")
# plot(m182_task_triad_hclust, main = "Triads")
par(mfrow = c(1,1))
# dev.off()
#####################
# Questions:
# (4) How do the results across blockmodel techniques differ?
# Why might you use one over the other? Why might you want to
# run more than one in your analyses?
#####################
krackhardt_datasets.1575391208.txt.gz · Last modified: by hkimscil
