This is an old revision of the document!

Krackhardt Datasets

Krackhardt dataset in NetData packages

install.packages("NetData")
library(NetData)
data(package="NetData")
data(kracknets, package = "NetData")
head(krack_full_data_frame)

> head(krack_full_data_frame)
  ego alter advice_tie friendship_tie reports_to_tie
1   1     1          0              0              0
2   1     2          1              1              1
3   1     3          0              0              0
4   1     4          1              1              0
5   1     5          0              0              0
6   1     6          0              0              0
>

krack_full_nonzero_edges <- subset(krack_full_data_frame, (friendship_tie > 0 | advice_tie > 0 | reports_to_tie > 0))
head(krack_full_nonzero_edges)

> krack_full_nonzero_edges <- subset(krack_full_data_frame, (friendship_tie > 0 | advice_tie > 0 | reports_to_tie > 0))
> head(krack_full_nonzero_edges)
   ego alter advice_tie friendship_tie reports_to_tie
2    1     2          1              1              1
4    1     4          1              1              0
8    1     8          1              1              0
12   1    12          0              1              0
16   1    16          1              1              0
18   1    18          1              0              0
>

krack_full <- graph.data.frame(krack_full_nonzero_edges) 
summary(krack_full)

> krack_full <- graph.data.frame(krack_full_nonzero_edges) 
> summary(krack_full)
IGRAPH 750f8b3 DN-- 21 232 -- 
+ attr: name (v/c), advice_tie (e/n), friendship_tie (e/n), reports_to_tie (e/n)
>

krack_friend <- delete.edges(krack_full, E(krack_full)[E(krack_full)$friendship_tie==0])
summary(krack_friend)

krack_advice <- delete.edges(krack_full, E(krack_full)[E(krack_full)$advice_tie==0])
summary(krack_advice)
 
krack_reports_to <- delete.edges(krack_full, E(krack_full)[E(krack_full)$reports_to_tie==0])
summary(krack_reports_to)

> krack_friend <- delete.edges(krack_full, E(krack_full)[E(krack_full)$friendship_tie==0])
> summary(krack_friend)
IGRAPH 51e7962 DN-- 21 102 -- 
+ attr: name (v/c), advice_tie (e/n), friendship_tie (e/n), reports_to_tie (e/n)
> 
> krack_advice <- delete.edges(krack_full, E(krack_full)[E(krack_full)$advice_tie==0])
> summary(krack_advice)
IGRAPH 51f2910 DN-- 21 190 -- 
+ attr: name (v/c), advice_tie (e/n), friendship_tie (e/n), reports_to_tie (e/n)
> 
> krack_reports_to <- delete.edges(krack_full, E(krack_full)[E(krack_full)$reports_to_tie==0])
> summary(krack_reports_to)
IGRAPH 51fe4f9 DN-- 21 232 -- 
+ attr: name (v/c), advice_tie (e/n), friendship_tie (e/n), reports_to_tie (e/n)
>

par(mfrow = c(1,3))
krack_friend_layout <- layout.fruchterman.reingold(krack_friend)
plot(krack_friend, layout=krack_friend_layout, main = "friend", edge.arrow.size=.5)

krack_advice_layout <- layout.fruchterman.reingold(krack_advice)
plot(krack_advice, layout=krack_advice_layout, main = "advice", edge.arrow.size=.5)

krack_reports_to_layout <- layout.fruchterman.reingold(krack_reports_to)
plot(krack_reports_to, layout=krack_reports_to_layout, main = "reports to", edge.arrow.size=.5)
par(mfrow = c(1,1))

# We'll use the "task" and "social" sub-graphs together as the
# basis for our structural equivalence methods. First, we'll use
# the task graph to generate an adjacency matrix.
#
# This matrix represents task interactions directed FROM the 
# row individual TO the column individual. 
krack_reports_to_matrix_row_to_col <- get.adjacency(krack_reports_to, attr='reports_to_tie')
krack_reports_to_matrix_row_to_col

# To operate on a binary graph, simply leave off the "attr" 
# parameter:
krack_reports_to_matrix_row_to_col_bin <- get.adjacency(krack_reports_to)
krack_reports_to_matrix_row_to_col_bin

# For this lab, we'll use the valued graph. The next step is to 
# concatenate it with its transpose in order to capture both 
# incoming and outgoing task interactions.
krack_reports_to_matrix_col_to_row <- t(as.matrix(krack_reports_to_matrix_row_to_col))
krack_reports_to_matrix_col_to_row

krack_reports_to_matrix <- rbind(krack_reports_to_matrix_row_to_col, krack_reports_to_matrix_col_to_row)
krack_reports_to_matrix

# Next, we'll use the same procedure to add social-interaction
# information.
krack_advice_matrix_row_to_col <- get.adjacency(krack_advice, attr='advice_tie')
krack_advice_matrix_row_to_col
 
krack_advice_matrix_row_to_col_bin <- get.adjacency(krack_advice)
krack_advice_matrix_row_to_col_bin
 
krack_advice_matrix_col_to_row <- t(as.matrix(krack_advice_matrix_row_to_col))
krack_advice_matrix_col_to_row
 
krack_advice_matrix <- rbind(krack_advice_matrix_row_to_col, krack_advice_matrix_col_to_row)
krack_advice_matrix
 
krack_reports_to_advice_matrix <- rbind(krack_reports_to_matrix, krack_advice_matrix)
krack_reports_to_advice_matrix

# Now we have a single 4n x n matrix that represents both in- and
# out-directed task and social communication. From this, we can
# generate an n x n correlation matrix that shows the degree of
# structural equivalence of each actor in the network. 
krack_reports_to_advice_cors <- cor(as.matrix(krack_reports_to_advice_matrix))
krack_reports_to_advice_cors

# To use correlation values in hierarchical NetCluster, they must 
# first be coerced into a "dissimilarity structure" using dist().
# We subtract the values from 1 so that they are all greater than 
# or equal to 0; thus, highly dissimilar (i.e., negatively 
# correlated) actors have higher values.
dissimilarity <- 1 - krack_reports_to_advice_cors
krack_reports_to_dist <- as.dist(dissimilarity)
krack_reports_to_dist
 
# Note that it is also possible to use dist() directly on the 
# matrix. However, since cor() looks at associations between 
# columns and dist() looks at associations between rows, it is
# necessary to transpose the matrix first.
#
# A variety of distance metrics are available; Euclidean 
# is the default.
#m182_task_social_dist <- dist(t(m182_task_social_matrix))
#m182_task_social_dist
 
# hclust() performs a hierarchical agglomerative NetCluster 
# operation based on the values in the dissimilarity matrix 
# yielded by as.dist() above. The standard visualization is a 
# dendrogram. By default, hclust() agglomerates clusters via a
# "complete linkakage" algorithm, determining cluster proximity
# by looking at the distance of the two points across clusters
# that are farthest away from one another. This can be changed via
# the "method" parameter.
 
krack_reports_to_advice_hclust <- hclust(krack_reports_to_dist)
plot(krack_reports_to_advice_hclust)
 
# cutree() allows us to use the output of hclust() to set
# different numbers of clusters and assign vertices to clusters
# as appropriate. For example:
cutree(krack_reports_to_advice_hclust, k=2)
 
# Now we'll try to figure out the number of clusters that best 
# describes the underlying data. To do this, we'll loop through
# all of the possible numbers of clusters (1 through n, where n is
# the number of actors in the network). For each solution
# corresponding to a given number of clusters, we'll use cutree()
# to assign the vertices to their respective clusters 
# corresponding to that solution.
#
# From this, we can generate a matrix of within- and between-
# cluster correlations. Thus, when there is one cluster for each 
# vertex in the network, the cell values will be identical to the
# observed correlation matrix, and when there is one cluster for 
# the whole network, the values will all be equal to the average
# correlation across the observed matrix.
#
# We can then correlate each by-cluster matrix with the observed
# correlation matrix to see how well the by-cluster matrix fits
# the data. We'll store the correlation for each number of
# clusters in a vector, which we can then plot.
 
# First, we initialize a vector for storing the correlations and 
# set a variable for our number of vertices.
clustered_observed_cors = vector()
num_vertices = length(V(krack_reports_to))
 
# Next, we loop through the different possible cluster 
# configurations, produce matrices of within- and between-
# cluster correlations, and correlate these by-cluster matrices
# with the observed correlation matrix.
 
# pdf("6.3_m182_studentnet_task_social_clustered_observed_corrs.pdf")
clustered_observed_cors <-clustConfigurations(num_vertices, krack_reports_to_advice_hclust, krack_reports_to_advice_cors)
clustered_observed_cors
plot(clustered_observed_cors$correlations)
# dev.off()
 
clustered_observed_cors$correlations
# From a visual inspection of the correlation matrix, we can 
# decide on the proper number of clusters in this network. 
# For this network, we'll use 4. (Note that the 1-cluster 
# solution doesn't appear on the plot because its correlation 
# with the observed correlation matrix is undefined.)
num_clusters = 4
clusters <- cutree(krack_reports_to_advice_hclust, k = num_clusters)
clusters
 
cluster_cor_mat <- clusterCorr(krack_reports_to_advice_cors,
                                            clusters)
cluster_cor_mat
 
# Let's look at the correlation between this cluster configuration 
# and the observed correlation matrix. This should match the 
# corresponding value from clustered_observed_cors above.
gcor(cluster_cor_mat, krack_reports_to_advice_cors)

code