sna_eg_stanford:lab02
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| sna_eg_stanford:lab02 [2019/11/29 00:33] – hkimscil | sna_eg_stanford:lab02 [2019/12/10 23:52] (current) – [no pdf] hkimscil | ||
|---|---|---|---|
| Line 614: | Line 614: | ||
| </ | </ | ||
| + | ====== no pdf ====== | ||
| + | < | ||
| + | #################################################################### | ||
| + | # LAB 2: Methodological beginnings - Density, Reciprocity, | ||
| + | # Transitivity, | ||
| + | #################################################################### | ||
| + | |||
| + | |||
| + | # NOTE: if you have trouble because some packages are not installed, | ||
| + | # see lab 1 for instructions on how to install all necessary packages. | ||
| + | # Also see Lab 1 for prior functions. | ||
| + | |||
| + | |||
| + | ############################################################## | ||
| + | # | ||
| + | # Lab 2 | ||
| + | # | ||
| + | # The purpose of this lab is to acquire basic cohesion | ||
| + | # metrics of density, reciprocity, | ||
| + | # and transitivity. In addition, we'll develop triadic | ||
| + | # analyses and a measure of ego-network heterogenity. | ||
| + | # | ||
| + | ############################################################## | ||
| + | |||
| + | |||
| + | |||
| + | ### | ||
| + | # 1. SET UP SESSION | ||
| + | ### | ||
| + | install.packages(" | ||
| + | |||
| + | library(igraph) | ||
| + | library(NetData) | ||
| + | |||
| + | |||
| + | ### | ||
| + | # 2. LOAD DATA | ||
| + | ### | ||
| + | |||
| + | # We would ordinarily need to follow the same proceedure we did for the Krackhardt data | ||
| + | # as we did in lab 1; see that lab for detail. | ||
| + | |||
| + | data(kracknets, | ||
| + | |||
| + | # Reduce to non-zero edges and build a graph object | ||
| + | krack_full_nonzero_edges <- subset(krack_full_data_frame, | ||
| + | head(krack_full_nonzero_edges) | ||
| + | |||
| + | krack_full <- graph.data.frame(krack_full_nonzero_edges) | ||
| + | summary(krack_full) | ||
| + | |||
| + | # Set vertex attributes | ||
| + | for (i in V(krack_full)) { | ||
| + | for (j in names(attributes)) { | ||
| + | krack_full <- set.vertex.attribute(krack_full, | ||
| + | } | ||
| + | } | ||
| + | summary(krack_full) | ||
| + | |||
| + | # Create sub-graphs based on edge attributes | ||
| + | krack_advice <- delete.edges(krack_full, | ||
| + | summary(krack_advice) | ||
| + | |||
| + | krack_friendship <- delete.edges(krack_full, | ||
| + | summary(krack_friendship) | ||
| + | |||
| + | krack_reports_to <- delete.edges(krack_full, | ||
| + | summary(krack_reports_to) | ||
| + | |||
| + | |||
| + | ### | ||
| + | # 3. NODE-LEVEL STATISTICS | ||
| + | ### | ||
| + | |||
| + | # Compute the indegree and outdegree for each node, first in the | ||
| + | # full graph (accounting for all tie types) and then in each | ||
| + | # tie-specific sub-graph. | ||
| + | deg_full_in <- degree(krack_full, | ||
| + | deg_full_out <- degree(krack_full, | ||
| + | deg_full_in | ||
| + | deg_full_out | ||
| + | |||
| + | deg_advice_in <- degree(krack_advice, | ||
| + | deg_advice_out <- degree(krack_advice, | ||
| + | deg_advice_in | ||
| + | deg_advice_out | ||
| + | |||
| + | deg_friendship_in <- degree(krack_friendship, | ||
| + | deg_friendship_out <- degree(krack_friendship, | ||
| + | deg_friendship_in | ||
| + | deg_friendship_out | ||
| + | |||
| + | deg_reports_to_in <- degree(krack_reports_to, | ||
| + | deg_reports_to_out <- degree(krack_reports_to, | ||
| + | deg_reports_to_in | ||
| + | deg_reports_to_out | ||
| + | |||
| + | # Reachability can only be computed on one vertex at a time. To | ||
| + | # get graph-wide statistics, change the value of " | ||
| + | # manually or write a for loop. (Remember that, unlike R objects, | ||
| + | # igraph objects are numbered from 0.) | ||
| + | |||
| + | reachability <- function(g, m) { | ||
| + | reach_mat = matrix(nrow = vcount(g), | ||
| + | ncol = vcount(g)) | ||
| + | for (i in 1: | ||
| + | reach_mat[i, | ||
| + | this_node_reach <- subcomponent(g, | ||
| + | |||
| + | for (j in 1: | ||
| + | alter = this_node_reach[j] | ||
| + | reach_mat[i, | ||
| + | } | ||
| + | } | ||
| + | return(reach_mat) | ||
| + | } | ||
| + | |||
| + | reach_full_in <- reachability(krack_full, | ||
| + | reach_full_out <- reachability(krack_full, | ||
| + | reach_full_in | ||
| + | reach_full_out | ||
| + | |||
| + | reach_advice_in <- reachability(krack_advice, | ||
| + | reach_advice_out <- reachability(krack_advice, | ||
| + | reach_advice_in | ||
| + | reach_advice_out | ||
| + | |||
| + | reach_friendship_in <- reachability(krack_friendship, | ||
| + | reach_friendship_out <- reachability(krack_friendship, | ||
| + | reach_friendship_in | ||
| + | reach_friendship_out | ||
| + | |||
| + | reach_reports_to_in <- reachability(krack_reports_to, | ||
| + | reach_reports_to_out <- reachability(krack_reports_to, | ||
| + | reach_reports_to_in | ||
| + | reach_reports_to_out | ||
| + | |||
| + | |||
| + | # Often we want to know path distances between individuals in a network. | ||
| + | # This is often done by calculating geodesics, or shortest paths between | ||
| + | # each ij pair. One can symmetrize the data to do this (see lab 1), or | ||
| + | # calculate it for outward and inward ties separately. Averaging geodesics | ||
| + | # for the entire network provides an average distance or sort of cohesiveness | ||
| + | # score. Dichotomizing distances reveals reach, and an average of reach for | ||
| + | # a network reveals what percent of a network is connected in some way. | ||
| + | |||
| + | # Compute shortest paths between each pair of nodes. | ||
| + | sp_full_in <- shortest.paths(krack_full, | ||
| + | sp_full_out <- shortest.paths(krack_full, | ||
| + | sp_full_in | ||
| + | sp_full_out | ||
| + | |||
| + | sp_advice_in <- shortest.paths(krack_advice, | ||
| + | sp_advice_out <- shortest.paths(krack_advice, | ||
| + | sp_advice_in | ||
| + | sp_advice_out | ||
| + | |||
| + | sp_friendship_in <- shortest.paths(krack_friendship, | ||
| + | sp_friendship_out <- shortest.paths(krack_friendship, | ||
| + | sp_friendship_in | ||
| + | sp_friendship_out | ||
| + | |||
| + | sp_reports_to_in <- shortest.paths(krack_reports_to, | ||
| + | sp_reports_to_out <- shortest.paths(krack_reports_to, | ||
| + | sp_reports_to_in | ||
| + | sp_reports_to_out | ||
| + | |||
| + | |||
| + | # Assemble node-level stats into single data frame for export as CSV. | ||
| + | |||
| + | # First, we have to compute average values by node for reachability and | ||
| + | # shortest path. (We don't have to do this for degree because it is | ||
| + | # already expressed as a node-level value.) | ||
| + | reach_full_in_vec <- vector() | ||
| + | reach_full_out_vec <- vector() | ||
| + | reach_advice_in_vec <- vector() | ||
| + | reach_advice_out_vec <- vector() | ||
| + | reach_friendship_in_vec <- vector() | ||
| + | reach_friendship_out_vec <- vector() | ||
| + | reach_reports_to_in_vec <- vector() | ||
| + | reach_reports_to_out_vec <- vector() | ||
| + | |||
| + | sp_full_in_vec <- vector() | ||
| + | sp_full_out_vec <- vector() | ||
| + | sp_advice_in_vec <- vector() | ||
| + | sp_advice_out_vec <- vector() | ||
| + | sp_friendship_in_vec <- vector() | ||
| + | sp_friendship_out_vec <- vector() | ||
| + | sp_reports_to_in_vec <- vector() | ||
| + | sp_reports_to_out_vec <- vector() | ||
| + | |||
| + | for (i in 1: | ||
| + | reach_full_in_vec[i] <- mean(reach_full_in[i, | ||
| + | reach_full_out_vec[i] <- mean(reach_full_out[i, | ||
| + | reach_advice_in_vec[i] <- mean(reach_advice_in[i, | ||
| + | reach_advice_out_vec[i] <- mean(reach_advice_out[i, | ||
| + | reach_friendship_in_vec[i] <- mean(reach_friendship_in[i, | ||
| + | reach_friendship_out_vec[i] <- mean(reach_friendship_out[i, | ||
| + | reach_reports_to_in_vec[i] <- mean(reach_reports_to_in[i, | ||
| + | reach_reports_to_out_vec[i] <- mean(reach_reports_to_out[i, | ||
| + | |||
| + | sp_full_in_vec[i] <- mean(sp_full_in[i, | ||
| + | sp_full_out_vec[i] <- mean(sp_full_out[i, | ||
| + | sp_advice_in_vec[i] <- mean(sp_advice_in[i, | ||
| + | sp_advice_out_vec[i] <- mean(sp_advice_out[i, | ||
| + | sp_friendship_in_vec[i] <- mean(sp_friendship_in[i, | ||
| + | sp_friendship_out_vec[i] <- mean(sp_friendship_out[i, | ||
| + | sp_reports_to_in_vec[i] <- mean(sp_reports_to_in[i, | ||
| + | sp_reports_to_out_vec[i] <- mean(sp_reports_to_out[i, | ||
| + | } | ||
| + | |||
| + | # Next, we assemble all of the vectors of node-levelvalues into a | ||
| + | # single data frame, which we can export as a CSV to our working | ||
| + | # directory. | ||
| + | node_stats_df <- cbind(deg_full_in, | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | |||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | |||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | |||
| + | write.csv(node_stats_df, | ||
| + | |||
| + | # Question #1 - What do these statistics tell us about | ||
| + | # each network and its individuals in general? | ||
| + | |||
| + | ### | ||
| + | # 3. NETWORK-LEVEL STATISTICS | ||
| + | ### | ||
| + | |||
| + | # Many initial analyses of networks begin with distances and reach, | ||
| + | # and then move towards global summary statistics of the network. | ||
| + | # | ||
| + | # As a reminder, entering a question mark followed by a function | ||
| + | # name (e.g., ? | ||
| + | # This can be helpful to understand how, exactly, stats are calculated. | ||
| + | |||
| + | # Degree | ||
| + | mean(deg_full_in) | ||
| + | sd(deg_full_in) | ||
| + | mean(deg_full_out) | ||
| + | sd(deg_full_out) | ||
| + | |||
| + | mean(deg_advice_in) | ||
| + | sd(deg_advice_in) | ||
| + | mean(deg_advice_out) | ||
| + | sd(deg_advice_out) | ||
| + | |||
| + | mean(deg_friendship_in) | ||
| + | sd(deg_friendship_in) | ||
| + | mean(deg_friendship_out) | ||
| + | sd(deg_friendship_out) | ||
| + | |||
| + | mean(deg_reports_to_in) | ||
| + | sd(deg_reports_to_in) | ||
| + | mean(deg_reports_to_out) | ||
| + | sd(deg_reports_to_out) | ||
| + | |||
| + | |||
| + | # Shortest paths | ||
| + | # ***Why do in and out come up with the same results? | ||
| + | # In and out shortest paths are simply transposes of one another; | ||
| + | # thus, when we compute statistics across the whole network they have to be the same. | ||
| + | |||
| + | mean(sp_full_in[which(sp_full_in != Inf)]) | ||
| + | sd(sp_full_in[which(sp_full_in != Inf)]) | ||
| + | mean(sp_full_out[which(sp_full_out != Inf)]) | ||
| + | sd(sp_full_out[which(sp_full_out != Inf)]) | ||
| + | |||
| + | mean(sp_advice_in[which(sp_advice_in != Inf)]) | ||
| + | sd(sp_advice_in[which(sp_advice_in != Inf)]) | ||
| + | mean(sp_advice_out[which(sp_advice_out != Inf)]) | ||
| + | sd(sp_advice_out[which(sp_advice_out != Inf)]) | ||
| + | |||
| + | mean(sp_friendship_in[which(sp_friendship_in != Inf)]) | ||
| + | sd(sp_friendship_in[which(sp_friendship_in != Inf)]) | ||
| + | mean(sp_friendship_out[which(sp_friendship_out != Inf)]) | ||
| + | sd(sp_friendship_out[which(sp_friendship_out != Inf)]) | ||
| + | |||
| + | mean(sp_reports_to_in[which(sp_reports_to_in != Inf)]) | ||
| + | sd(sp_reports_to_in[which(sp_reports_to_in != Inf)]) | ||
| + | mean(sp_reports_to_out[which(sp_reports_to_out != Inf)]) | ||
| + | sd(sp_reports_to_out[which(sp_reports_to_out != Inf)]) | ||
| + | |||
| + | # Reachability | ||
| + | mean(reach_full_in[which(reach_full_in != Inf)]) | ||
| + | sd(reach_full_in[which(reach_full_in != Inf)]) | ||
| + | mean(reach_full_out[which(reach_full_out != Inf)]) | ||
| + | sd(reach_full_out[which(reach_full_out != Inf)]) | ||
| + | |||
| + | mean(reach_advice_in[which(reach_advice_in != Inf)]) | ||
| + | sd(reach_advice_in[which(reach_advice_in != Inf)]) | ||
| + | mean(reach_advice_out[which(reach_advice_out != Inf)]) | ||
| + | sd(reach_advice_out[which(reach_advice_out != Inf)]) | ||
| + | |||
| + | mean(reach_friendship_in[which(reach_friendship_in != Inf)]) | ||
| + | sd(reach_friendship_in[which(reach_friendship_in != Inf)]) | ||
| + | mean(reach_friendship_out[which(reach_friendship_out != Inf)]) | ||
| + | sd(reach_friendship_out[which(reach_friendship_out != Inf)]) | ||
| + | |||
| + | mean(reach_reports_to_in[which(reach_reports_to_in != Inf)]) | ||
| + | sd(reach_reports_to_in[which(reach_reports_to_in != Inf)]) | ||
| + | mean(reach_reports_to_out[which(reach_reports_to_out != Inf)]) | ||
| + | sd(reach_reports_to_out[which(reach_reports_to_out != Inf)]) | ||
| + | |||
| + | # Density | ||
| + | graph.density(krack_full) | ||
| + | graph.density(krack_advice) | ||
| + | graph.density(krack_friendship) | ||
| + | graph.density(krack_reports_to) | ||
| + | |||
| + | # Reciprocity | ||
| + | reciprocity(krack_full) | ||
| + | reciprocity(krack_advice) | ||
| + | reciprocity(krack_friendship) | ||
| + | reciprocity(krack_reports_to) | ||
| + | |||
| + | # Transitivity | ||
| + | transitivity(krack_full) | ||
| + | transitivity(krack_advice) | ||
| + | transitivity(krack_friendship) | ||
| + | transitivity(krack_reports_to) | ||
| + | |||
| + | # Triad census. Here we'll first build a vector of labels for | ||
| + | # the different triad types. Then we'll combine this vector | ||
| + | # with the triad censuses for the different networks, which | ||
| + | # we'll export as a CSV. | ||
| + | |||
| + | census_labels = c(' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | tc_full <- triad.census(krack_full) | ||
| + | tc_advice <- triad.census(krack_advice) | ||
| + | tc_friendship <- triad.census(krack_friendship) | ||
| + | tc_reports_to <- triad.census(krack_reports_to) | ||
| + | |||
| + | triad_df <- data.frame(census_labels, | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | triad_df | ||
| + | |||
| + | # To export any of these vectors to a CSV for use in another program, simply | ||
| + | # use the write.csv() command: | ||
| + | write.csv(triad_df, | ||
| + | |||
| + | # Question #2 - (a) How do the three networks differ on network statictics? | ||
| + | # (b) What does the triad census tell us? Can you calculate the likelihood of | ||
| + | # any triad' | ||
| + | # on triads. Calculate the degree of clustering and hierarchy in Excel. | ||
| + | # What do we learn from that? | ||
| + | </ | ||
| + | |||
| + | ====== omitted ====== | ||
| + | < | ||
| + | ### | ||
| + | # 4. HETEROGENEITY | ||
| + | ### | ||
| + | |||
| + | # Miller and McPherson write about processes of homophily and | ||
| + | # here we take a brief look at one version of this issue. | ||
| + | # In particular, we look at the extent to which each actor' | ||
| + | # " | ||
| + | |||
| + | # We'll use a statistic called the IQV, or Index of Qualitative | ||
| + | # Variation. This is just an implementation of Blau's Index of | ||
| + | # Heterogeneity (known to economists as the Herfindahl-Hirschman | ||
| + | # index), normalized so that perfect heterogeneity (i.e., equal | ||
| + | # distribution across categories) equals 1. | ||
| + | |||
| + | # NOTE that this code only works with categorical variables that | ||
| + | # have been numerically coded to integer values that ascend | ||
| + | # sequentially from 0; you may have to recode your data to get this | ||
| + | # to work properly. | ||
| + | # We are interested in many of the attributes of nodes. | ||
| + | # time and to make our lives better we are going to create a function | ||
| + | # that will provide an IQV statistic for any network and for | ||
| + | # any categorical variable. | ||
| + | # create code that is both reusable and easier to edit. | ||
| + | |||
| + | # Functions have names and receive arguments. | ||
| + | # anytime you call table() you are calling the table function. | ||
| + | # We could write code to duplicate the table function for each | ||
| + | # of our variables, but it is faster to write a single general tool | ||
| + | # that will provide frequencies for any variable. If I have | ||
| + | # a dataframe with the variable gender and I want to see the | ||
| + | # split of males and females I would pass the argument | ||
| + | # " | ||
| + | # similar model here. Understanding each step is less important | ||
| + | # than understanding the usefulness and power of functions. | ||
| + | |||
| + | get_iqvs <- function(graph, | ||
| + | |||
| + | #we have now defined a function, get_iqvs, that will take the | ||
| + | # graph " | ||
| + | # variable " | ||
| + | #variables graph or attribute they correspond to the graph and | ||
| + | # variable we passed (provided) to the function | ||
| + | |||
| + | mat <- get.adjacency(graph) | ||
| + | |||
| + | # To make this function work on a wide variety of variables we | ||
| + | # find out how many coded levels (unique responses) exist for | ||
| + | # the attribute variable programatically | ||
| + | |||
| + | attr_levels = get.vertex.attribute(graph, | ||
| + | | ||
| + | | ||
| + | |||
| + | num_levels = length(unique(attr_levels)) | ||
| + | iqvs = rep(0, nrow(mat)) | ||
| + | |||
| + | # Now that we know how many levels exist we want to loop | ||
| + | # (go through) each actor in the network. Loops iterate through | ||
| + | # each value in a range. | ||
| + | # in the range of egos starting at the first and ending at the | ||
| + | # last. The function nrow provides the number of rows in an | ||
| + | # object and the ":" | ||
| + | # the curly braces of the for loop ego will represent exactly | ||
| + | # one value between 1 and the number of rows in the graph | ||
| + | # object, iterating by one during each execution of the loop. | ||
| + | |||
| + | for (ego in 1: | ||
| + | |||
| + | # initialize actor-specific variables | ||
| + | alter_attr_counts = rep(0, num_levels) | ||
| + | num_alters_this_ego = 0 | ||
| + | sq_fraction_sum = 0 | ||
| + | |||
| + | # For each ego we want to check each tied alter for the same | ||
| + | # level on the variable attribute as the ego. | ||
| + | |||
| + | for (alter in 1: | ||
| + | |||
| + | # only examine alters that are actually tied to ego | ||
| + | if (mat[ego, alter] == 1) { | ||
| + | |||
| + | num_alters_this_ego = num_alters_this_ego + 1 | ||
| + | |||
| + | # get the alter' | ||
| + | alter_attr = get.vertex.attribute(graph, | ||
| + | attribute, (alter - 1)) | ||
| + | |||
| + | # increment the count of alters with this level | ||
| + | # of the attribute by 1 | ||
| + | alter_attr_counts[alter_attr + 1] = | ||
| + | alter_attr_counts[alter_attr + 1] + 1 | ||
| + | } | ||
| + | } | ||
| + | |||
| + | # now that we're done looping through all of the alters, | ||
| + | # get the squared fraction for each level of the attribute | ||
| + | # out of the total number of attributes | ||
| + | for (i in 1: | ||
| + | attr_fraction = alter_attr_counts[i] / | ||
| + | num_alters_this_ego | ||
| + | sq_fraction_sum = sq_fraction_sum + attr_fraction ^ 2 | ||
| + | } | ||
| + | |||
| + | # now we can compute the ego's blau index... | ||
| + | blau_index = 1 - sq_fraction_sum | ||
| + | |||
| + | # and the ego's IQV, which is just a normalized blau index | ||
| + | iqvs[ego] = blau_index / (1 - (1 / num_levels)) | ||
| + | } | ||
| + | |||
| + | # The final part of a function returns the calculated value. | ||
| + | # So if we called get_iqvs(testgraph, | ||
| + | # provide the iqvs for gender in the test graph. | ||
| + | # intersted in race we could simply change the function call | ||
| + | # to get_iqvs(testgraph, | ||
| + | # code again for different variables. | ||
| + | |||
| + | return(iqvs) | ||
| + | } | ||
| + | |||
| + | |||
| + | |||
| + | # For this data set, we'll look at homophily across departments, | ||
| + | # which is already coded 0-4, so no recoding is needed. | ||
| + | |||
| + | advice_iqvs <- get_iqvs(krack_advice, | ||
| + | advice_iqvs | ||
| + | |||
| + | friendship_iqvs <- get_iqvs(krack_friendship, | ||
| + | friendship_iqvs | ||
| + | |||
| + | reports_to_iqvs <- get_iqvs(krack_reports_to, | ||
| + | reports_to_iqvs | ||
| + | |||
| + | # Question #3 - What does the herfindahl index reveal about | ||
| + | # attribute sorting in networks? What does it mean for each network? | ||
| + | |||
| + | |||
| + | ##### | ||
| + | # Extra-credit: | ||
| + | # of homophily or segregation in a network? How might we code that in R? | ||
| + | ##### | ||
| + | |||
| + | ##### | ||
| + | # Tau statistic (code by Sam Pimentel) | ||
| + | ##### | ||
| + | |||
| + | |||
| + | #R code for generating random graphs: | ||
| + | #requires packages ergm, intergraph | ||
| + | |||
| + | #set up weighting vectors for clustering and hierarchy | ||
| + | clust.mask <- rep(0,16) | ||
| + | clust.mask[c(1, | ||
| + | hier.mask <- rep(1,16) | ||
| + | hier.mask[c(6: | ||
| + | |||
| + | #compute triad count and triad proportion for a given weighting vector | ||
| + | mask.stat <- function(my.graph, | ||
| + | n.nodes <- vcount(my.graph) | ||
| + | n.edges <- ecount(my.graph) | ||
| + | #set probability of edge formation in random graph to proportion of possible edges present in original | ||
| + | p.edge <- n.edges/ | ||
| + | r.graph <- as.network(n.nodes, | ||
| + | ## r.igraph <- as.igraph(r.graph) | ||
| + | r.igraph <- asIgraph(r.graph) | ||
| + | ################################## | ||
| + | tc.graph <- triad.census(r.igraph) | ||
| + | clust <- sum(tc.graph*my.mask) | ||
| + | clust.norm <- clust/ | ||
| + | return(c(clust, | ||
| + | } | ||
| + | |||
| + | #build 100 random graphs and compute their clustering and hierarchy measurements to create an empirical null distribution | ||
| + | emp.distro <- function(this.graph){ | ||
| + | clust <- matrix(rep(0, | ||
| + | hier <- matrix(rep(0, | ||
| + | for(i in c(1:100)){ | ||
| + | | ||
| + | | ||
| + | } | ||
| + | my.mat <- rbind(clust, | ||
| + | rownames(my.mat) <- c(" | ||
| + | return(my.mat) | ||
| + | } | ||
| + | |||
| + | #fix randomization if desired so results are replicable | ||
| + | # | ||
| + | #compute empirical distributions for each network | ||
| + | hc_advice <- emp.distro(krack_advice) | ||
| + | hc_friend <- emp.distro(krack_friendship) | ||
| + | hc_report <- emp.distro(krack_reports_to) | ||
| + | |||
| + | #find empirical p-value | ||
| + | get.p <- function(val, | ||
| + | { | ||
| + | distro.n <- sort(distro) | ||
| + | distro.n <- distro.n - median(distro.n) | ||
| + | val.n <- val - median(distro.n) | ||
| + | p.val <- sum(abs(distro.n) > abs(val.n))/ | ||
| + | return(p.val) | ||
| + | } | ||
| + | get.p(198, hc_full[1, | ||
| + | get.p(194, hc_advice[1, | ||
| + | get.p(525, hc_friend[1, | ||
| + | get.p(1003, hc_report[1, | ||
| + | get.p(979, hc_full[3, | ||
| + | get.p(1047, hc_advice[3, | ||
| + | get.p(1135, hc_friend[3, | ||
| + | get.p(1314, hc_report[3, | ||
| + | |||
| + | # | ||
| + | |||
| + | #clustering | ||
| + | c(sort(hc_advice[1, | ||
| + | c(sort(hc_friend[1, | ||
| + | c(sort(hc_report[1, | ||
| + | |||
| + | #hierarchy | ||
| + | c(sort(hc_advice[3, | ||
| + | c(sort(hc_friend[3, | ||
| + | c(sort(hc_report[3, | ||
| + | |||
| + | </ | ||
| + | |||
sna_eg_stanford/lab02.1574987580.txt.gz · Last modified: by hkimscil
