sna_eg_stanford:lab01
Differences
This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
sna_eg_stanford:lab01 [2019/11/29 09:10] – created hkimscil | sna_eg_stanford:lab01 [2022/11/28 22:26] (current) – hkimscil | ||
---|---|---|---|
Line 82: | Line 82: | ||
# directly by referencing the URL in the read.table() function, | # directly by referencing the URL in the read.table() function, | ||
# as follows: | # as follows: | ||
- | advice_data_frame <- read.table(' | + | # advice_data_frame <- read.table(' |
- | friendship_data_frame <- read.table(' | + | # friendship_data_frame <- read.table(' |
- | reports_to_data_frame <- read.table(' | + | # reports_to_data_frame <- read.table(' |
- | + | ||
+ | advice_data_frame <- read.table(' | ||
+ | friendship_data_frame <- read.table(' | ||
+ | reports_to_data_frame <- read.table(' | ||
+ | |||
+ | |||
+ | |||
# If the files you want to work with are on your local machine, | # If the files you want to work with are on your local machine, | ||
# the easiest way to access them is to first set your working | # the easiest way to access them is to first set your working | ||
Line 114: | Line 121: | ||
# header=T, which tells R that the first row of data contains | # header=T, which tells R that the first row of data contains | ||
# column names. | # column names. | ||
- | attributes <- read.csv(' | + | # attributes <- read.csv(' |
+ | attributes <- read.csv(' | ||
attributes | attributes | ||
- | + | ||
# Other commands may be used to load data from files in different | # Other commands may be used to load data from files in different | ||
# formats. read.delim() is a general function for loading any | # formats. read.delim() is a general function for loading any | ||
Line 303: | Line 312: | ||
# example: setwd("/ | # example: setwd("/ | ||
- | setwd("" | + | # setwd("" |
| | ||
# First, let's plot the network with all possible ties. | # First, let's plot the network with all possible ties. | ||
pdf(" | pdf(" | ||
- | plot(krack_full) | + | plot(krack_full, main=" |
dev.off() | dev.off() | ||
Line 319: | Line 328: | ||
summary(krack_advice_only) | summary(krack_advice_only) | ||
pdf(" | pdf(" | ||
- | plot(krack_advice_only) | + | plot(krack_advice_only, main=" |
dev.off() | dev.off() | ||
Line 328: | Line 337: | ||
summary(krack_friendship_only) | summary(krack_friendship_only) | ||
pdf(" | pdf(" | ||
- | plot(krack_friendship_only) | + | plot(krack_friendship_only, main=" |
dev.off() | dev.off() | ||
Line 337: | Line 346: | ||
summary(krack_reports_to_only) | summary(krack_reports_to_only) | ||
pdf(" | pdf(" | ||
- | plot(krack_reports_to_only) | + | plot(krack_reports_to_only, main=" |
dev.off() | dev.off() | ||
Line 451: | Line 460: | ||
write.graph(krack_full, | write.graph(krack_full, | ||
</ | </ | ||
+ | |||
+ | ====== no pdf ====== | ||
+ | < | ||
+ | ########################################################################## | ||
+ | # You may cite these labs as follows: McFarland, Daniel, Solomon Messing, | ||
+ | # Mike Nowak, and Sean Westwood. 2010. " | ||
+ | # Labs in R." Stanford University. | ||
+ | ########################################################################## | ||
+ | |||
+ | |||
+ | ########################################################################## | ||
+ | # LAB 1 - Introductory Lab | ||
+ | # The point of this lab is to introduce students to the packages of | ||
+ | # SNA and Igraph, to cover some basic R commands, to load and manage | ||
+ | # data, to generate graph visualizations, | ||
+ | # use elsewhere. | ||
+ | ########################################################################## | ||
+ | |||
+ | ### | ||
+ | # 0. R BASICS | ||
+ | ### | ||
+ | |||
+ | # Any line starting with # is a " | ||
+ | # R. Any other line is treated as a command. Run commands by | ||
+ | # copying and pasting them into the R Console. | ||
+ | # | ||
+ | # If (when) you get confused, a good place to start is with R's | ||
+ | # built-in help functionality. R offers detailed help files for | ||
+ | # each function and each package. To access help type ?[function | ||
+ | # or package name] in the console. For example, for help on the | ||
+ | # " | ||
+ | ?sum | ||
+ | |||
+ | # To install all packages need for Social Network Analysis | ||
+ | # Labs in R, uncomment and run the following code: | ||
+ | |||
+ | # | ||
+ | |||
+ | # You only need to run this once per computer! | ||
+ | |||
+ | # To load the packages from , you need to call the " | ||
+ | # command. Note that you need to do this each session; packages | ||
+ | # don't load automatically by default (though you can set this | ||
+ | # as a preference if you'd like). | ||
+ | |||
+ | # For this lab, we will use the " | ||
+ | # A manual is available at | ||
+ | # http:// | ||
+ | library(igraph) | ||
+ | |||
+ | # Sometimes, different packages overlap in functionality and | ||
+ | # cause unexpected behavior when both are loaded simultaneously. | ||
+ | # If you ever want to remove an existing library, use the | ||
+ | # " | ||
+ | # | ||
+ | # detach(package: | ||
+ | |||
+ | # IMPORTANT NOTE: Unlike in most languages, R objects are numbered | ||
+ | # from 1 instead of 0, so if you want the first element in a | ||
+ | # vector, you would reference it by vector_name[1]. HOWEVER, | ||
+ | # igraph objects are numbered starting from 0. This can lead to | ||
+ | # lots of confusion, since it's not always obvious at first which | ||
+ | # objects are native to R and which belong to igraph. | ||
+ | |||
+ | |||
+ | ### | ||
+ | # 1. LOADING DATA | ||
+ | ### | ||
+ | |||
+ | # The <- operator sets a variable equal to something. In this case, | ||
+ | # we will set a number of basic R data structures, called " | ||
+ | # frames," | ||
+ | # | ||
+ | # read.table() is the most common R command for loading data from | ||
+ | # files in which values are in tabular format. The function loads | ||
+ | # the table into a data frame object, which is the basic data type | ||
+ | # for most operations in R. By default, R assumes that the table | ||
+ | # has no header and is delimited by any white space; these | ||
+ | # settings are fine for our purposes here. | ||
+ | # | ||
+ | # One handy aspect of R is that you can read in data from a URL | ||
+ | # directly by referencing the URL in the read.table() function, | ||
+ | # as follows: | ||
+ | advice_data_frame <- read.table(' | ||
+ | friendship_data_frame <- read.table(' | ||
+ | reports_to_data_frame <- read.table(' | ||
+ | |||
+ | # If the files you want to work with are on your local machine, | ||
+ | # the easiest way to access them is to first set your working | ||
+ | # directory via the setwd() command, and then reference the | ||
+ | # files by name: | ||
+ | # | ||
+ | # setwd(' | ||
+ | # your_data_frame <- read.table(' | ||
+ | |||
+ | # Note that when you set a variable equal to something, if all | ||
+ | # goes well R will not provide any feedback. To see the data we | ||
+ | # just loaded, it's necessary to call the variables directly. | ||
+ | advice_data_frame | ||
+ | |||
+ | # Since this is a bit long, we can see just the top six rows via | ||
+ | # head()... | ||
+ | head(friendship_data_frame) | ||
+ | |||
+ | # ... or the bottom six rows via tail(). | ||
+ | tail(reports_to_data_frame) | ||
+ | |||
+ | # To view your data in a spreadsheet-like window, use the command ' | ||
+ | # fix(reports_to_data_frame) | ||
+ | |||
+ | # The attribute data for this lab is in a comma-separated-value | ||
+ | # (CSV) file. read.csv() loads a CSV file into a data frame | ||
+ | # object. In this case, we do have a header row, so we set | ||
+ | # header=T, which tells R that the first row of data contains | ||
+ | # column names. | ||
+ | attributes <- read.csv(' | ||
+ | attributes | ||
+ | |||
+ | # Other commands may be used to load data from files in different | ||
+ | # formats. read.delim() is a general function for loading any | ||
+ | # delimited text file. The default is tab-delimited, | ||
+ | # be overridden by setting the " | ||
+ | # | ||
+ | # f <- read.delim(" | ||
+ | # f <- read.delim(" | ||
+ | # | ||
+ | # The ' | ||
+ | # custom data types, such as SPSS files via read.spss() and | ||
+ | # STATA files via read.dta(). | ||
+ | |||
+ | # When data files are part of an R package you can read them as | ||
+ | # follows: | ||
+ | # | ||
+ | # data(kracknets, | ||
+ | |||
+ | # In the future, we will load data this way. However, it is useful | ||
+ | # to get a sense of how things often must be done in R. | ||
+ | |||
+ | |||
+ | ### | ||
+ | # 2.2. LOADING GRAPHS | ||
+ | ### | ||
+ | |||
+ | # For convenience, | ||
+ | # imported data frames. c() is a common generic R function that | ||
+ | # combines its arguments into a single vector. | ||
+ | colnames(advice_data_frame) <- c(' | ||
+ | head(advice_data_frame) | ||
+ | |||
+ | colnames(friendship_data_frame) <- c(' | ||
+ | head(friendship_data_frame) | ||
+ | |||
+ | colnames(reports_to_data_frame) <- c(' | ||
+ | head(reports_to_data_frame) | ||
+ | |||
+ | # Take a look at each data frame using the ' | ||
+ | # need to close each fix window before R will evaluate the next line of code. | ||
+ | |||
+ | # fix(advice_data_frame) | ||
+ | # fix(friendship_data_frame) | ||
+ | # fix(reports_to_data_frame) | ||
+ | |||
+ | # Before we merge these data, we need to make sure ' | ||
+ | # same across data sets. We can compare each row using the == syntax. | ||
+ | # The command below should return TRUE for every row if all ego rows | ||
+ | # are the same for advice and friendship: | ||
+ | advice_data_frame$ego == friendship_data_frame$ego | ||
+ | |||
+ | # That's a lot of output to sort through. Instead, we can just have R return | ||
+ | # which row entries are not equal using the syntax below: | ||
+ | which(advice_data_frame$ego != friendship_data_frame$ego) | ||
+ | |||
+ | # Repeat for other variables | ||
+ | which(advice_data_frame$alter != friendship_data_frame$alter) | ||
+ | which(reports_to_data_frame$alter != friendship_data_frame$alter) | ||
+ | which(reports_to_data_frame$ego != friendship_data_frame$ego) | ||
+ | |||
+ | # Now that we've verified they are all the same, we can combine them into | ||
+ | # a single data frame. | ||
+ | krack_full_data_frame <- cbind(advice_data_frame, | ||
+ | friendship_data_frame$friendship_tie, | ||
+ | reports_to_data_frame$reports_to_tie) | ||
+ | head(krack_full_data_frame) | ||
+ | |||
+ | # Notice that the last two variable names are now | ||
+ | # " | ||
+ | # and " | ||
+ | # That's a little long. We can rename them | ||
+ | # as follows: | ||
+ | |||
+ | names(krack_full_data_frame)[4: | ||
+ | " | ||
+ | head(krack_full_data_frame) | ||
+ | |||
+ | # Another way to build the data frame is to use R' | ||
+ | # data.frame syntax from the start: | ||
+ | krack_full_data_frame <- data.frame(ego = advice_data_frame[, | ||
+ | alter = advice_data_frame[, | ||
+ | advice_tie = advice_data_frame[, | ||
+ | friendship_tie = friendship_data_frame[, | ||
+ | reports_to_tie = reports_to_data_frame[, | ||
+ | head(krack_full_data_frame) | ||
+ | |||
+ | |||
+ | # Now let's move on to some data processing. | ||
+ | |||
+ | # Reduce to non-zero edges so that the edge list only contains | ||
+ | # actual ties of some type. | ||
+ | krack_full_nonzero_edges <- subset(krack_full_data_frame, | ||
+ | (advice_tie > 0 | friendship_tie > 0 | reports_to_tie > 0)) | ||
+ | head(krack_full_nonzero_edges) | ||
+ | |||
+ | # Now we can import our data into a " | ||
+ | # graph.data.frame() function. Coercing the data into a graph | ||
+ | # object is what allows us to perform network-analysis techniques. | ||
+ | krack_full <- graph.data.frame(krack_full_nonzero_edges) | ||
+ | summary(krack_full) | ||
+ | |||
+ | # By default, graph.data.frame() treats the first two columns of | ||
+ | # a data frame as an edge list and any remaining columns as | ||
+ | # edge attributes. Thus, the 232 edges appearing in the summary() | ||
+ | # output refer to the 232 pairs of vertices that are joined by | ||
+ | # *any type* of tie. The tie types themselves are listed as edge | ||
+ | # attributes. | ||
+ | |||
+ | # To get a vector of edges for a specific type of tie, use the | ||
+ | # get.edge.attribute() function. | ||
+ | get.edge.attribute(krack_full, | ||
+ | get.edge.attribute(krack_full, | ||
+ | get.edge.attribute(krack_full, | ||
+ | |||
+ | # If you would like to symmetrize the network, making all | ||
+ | # asymmetric ties symmetric, use the as.undirected() | ||
+ | # function: | ||
+ | krack_full_symmetrized <- as.undirected(krack_full, | ||
+ | summary(krack_full_symmetrized) | ||
+ | |||
+ | |||
+ | |||
+ | ### | ||
+ | # 3. ADDING VERTEX ATTRIBUTES TO A GRAPH OBJECT | ||
+ | ### | ||
+ | |||
+ | # One way to add the attributes to your graph object is to iterate | ||
+ | # through each attribute and each vertex. This means that we will | ||
+ | # add one attribute at a time to each vertex in the network. | ||
+ | # | ||
+ | # V(krack_full) returns a list of the IDs of each vertex in the | ||
+ | # graph. names(attributes) returns a list of the column names in | ||
+ | # the attributes table. The double-for loop tells R to repeat the | ||
+ | # code between the brackets once for each attribute and once for | ||
+ | # each vertex. | ||
+ | for (i in V(krack_full)) { | ||
+ | for (j in names(attributes)) { | ||
+ | krack_full <- set.vertex.attribute(krack_full, | ||
+ | | ||
+ | index = i, | ||
+ | | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # A shorter way is to just read in attribute names when you | ||
+ | # create the graph object: | ||
+ | |||
+ | # First create a vector of vertex labels, in this case 1:n | ||
+ | attributes = cbind(1: | ||
+ | |||
+ | krack_full <- graph.data.frame(d = krack_full_nonzero_edges, | ||
+ | | ||
+ | |||
+ | # Note that we now have ' | ||
+ | # listed alongside ' | ||
+ | summary(krack_full) | ||
+ | |||
+ | # We can see a list of the values for a given attribute for all of | ||
+ | # the actors in the network. | ||
+ | get.vertex.attribute(krack_full, | ||
+ | get.vertex.attribute(krack_full, | ||
+ | get.vertex.attribute(krack_full, | ||
+ | get.vertex.attribute(krack_full, | ||
+ | |||
+ | |||
+ | ### | ||
+ | # 4. VISUALIZE THE NETWORKS | ||
+ | ### | ||
+ | |||
+ | # We can use R's general-purpose plot() method to generate custom | ||
+ | # visualizations of the network. | ||
+ | |||
+ | # R only lets us look at one plot at a time. To make our work easier | ||
+ | # we will save our plots as PDF files. | ||
+ | # the code between the PDF function and " | ||
+ | |||
+ | # In order to save PDF files we must tell R where to put them. We do | ||
+ | # this with the setwd() command. | ||
+ | # folder where you will output the files here. | ||
+ | |||
+ | # In OS X you can get this information by selecting the folder, right | ||
+ | # clicking and selecting "Get Info." | ||
+ | |||
+ | # In Windows you can get this information by selecting the folder, right | ||
+ | # clicking and selecting " | ||
+ | # " | ||
+ | |||
+ | # example: setwd("/ | ||
+ | setwd("" | ||
+ | | ||
+ | # First, let's plot the network with all possible ties. | ||
+ | # pdf(" | ||
+ | plot(krack_full, | ||
+ | # dev.off() | ||
+ | |||
+ | # This is a bit of a jumble, so let's look at the networks for | ||
+ | # single edge types. | ||
+ | |||
+ | # advice only | ||
+ | krack_advice_only <- delete.edges(krack_full, | ||
+ | E(krack_full)[get.edge.attribute(krack_full, | ||
+ | name = " | ||
+ | summary(krack_advice_only) | ||
+ | |||
+ | # pdf(" | ||
+ | plot(krack_advice_only, | ||
+ | # dev.off() | ||
+ | |||
+ | # friendship only | ||
+ | krack_friendship_only <- delete.edges(krack_full, | ||
+ | E(krack_full)[get.edge.attribute(krack_full, | ||
+ | name = " | ||
+ | summary(krack_friendship_only) | ||
+ | # pdf(" | ||
+ | plot(krack_friendship_only, | ||
+ | # dev.off() | ||
+ | |||
+ | # reports-to only | ||
+ | krack_reports_to_only <- delete.edges(krack_full, | ||
+ | E(krack_full)[get.edge.attribute(krack_full, | ||
+ | name = " | ||
+ | summary(krack_reports_to_only) | ||
+ | # pdf(" | ||
+ | plot(krack_reports_to_only, | ||
+ | # dev.off() | ||
+ | |||
+ | # Still kind of messy, so let's clean things up a bit. For | ||
+ | # simplicity, we'll focus on reports_to ties for now. | ||
+ | |||
+ | # First, we can optimize the layout by applying the layout | ||
+ | # algorithm to the specific set of ties we care about. Here | ||
+ | # we'll use Fruchterman-Rheingold; | ||
+ | # described in the igraph help page for " | ||
+ | # can be accessed by entering ?layout. | ||
+ | |||
+ | reports_to_layout <- layout.fruchterman.reingold(krack_reports_to_only) | ||
+ | # pdf(" | ||
+ | plot(krack_reports_to_only, | ||
+ | | ||
+ | # dev.off() | ||
+ | |||
+ | # Now let's color-code vertices by department and clean up the | ||
+ | # plot by removing vertex labels and shrinking the arrow size. | ||
+ | dept_vertex_colors = get.vertex.attribute(krack_full," | ||
+ | colors = c(' | ||
+ | dept_vertex_colors[dept_vertex_colors == 0] = colors[1] | ||
+ | dept_vertex_colors[dept_vertex_colors == 1] = colors[2] | ||
+ | dept_vertex_colors[dept_vertex_colors == 2] = colors[3] | ||
+ | dept_vertex_colors[dept_vertex_colors == 3] = colors[4] | ||
+ | dept_vertex_colors[dept_vertex_colors == 4] = colors[5] | ||
+ | |||
+ | # pdf(" | ||
+ | plot(krack_reports_to_only, | ||
+ | layout=reports_to_layout, | ||
+ | vertex.color=dept_vertex_colors, | ||
+ | vertex.label=NA, | ||
+ | edge.arrow.size=.5, | ||
+ | main=" | ||
+ | # dev.off() | ||
+ | # Now let's set the vertex size by tenure. | ||
+ | tenure_vertex_sizes = get.vertex.attribute(krack_full," | ||
+ | |||
+ | # pdf(" | ||
+ | plot(krack_reports_to_only, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | # dev.off() | ||
+ | |||
+ | # Now let's incorporate additional tie types. We'll use the | ||
+ | # layout generated by the reports-to ties but overlay the | ||
+ | # advice and friendship ties in red and blue. | ||
+ | |||
+ | tie_type_colors = c(rgb(1, | ||
+ | E(krack_full)$color[ E(krack_full)$advice_tie==1 ] = tie_type_colors[1] | ||
+ | E(krack_full)$color[ E(krack_full)$friendship_tie==1 ] = tie_type_colors[2] | ||
+ | E(krack_full)$color[ E(krack_full)$reports_to_tie==1 ] = tie_type_colors[3] | ||
+ | E(krack_full)$arrow.size=.5 | ||
+ | V(krack_full)$color = dept_vertex_colors | ||
+ | V(krack_full)$frame = dept_vertex_colors | ||
+ | |||
+ | # pdf(" | ||
+ | plot(krack_full, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | |||
+ | |||
+ | # Add a legend. Note that the plot window must be open for this to | ||
+ | # work. | ||
+ | legend(1, | ||
+ | 1.25, | ||
+ | | ||
+ | ' | ||
+ | ' | ||
+ | col = tie_type_colors, | ||
+ | | ||
+ | cex = .7) | ||
+ | # dev.off() | ||
+ | |||
+ | # Another option for visualizing different network ties relative | ||
+ | # to one another is to overlay the edges from one tie type on the | ||
+ | # structure generated by another tie type. Here we can use the | ||
+ | # reports-to layout but show the friendship ties: | ||
+ | |||
+ | # pdf(" | ||
+ | plot(krack_friendship_only, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | # dev.off() | ||
+ | |||
+ | |||
+ | ### | ||
+ | # 5. EXPORT THE NETWORK | ||
+ | ### | ||
+ | |||
+ | # The write.graph() function exports a graph object in various | ||
+ | # formats readable by other programs. There is no explicit | ||
+ | # option for a UCINET data type, but you can export the graph | ||
+ | # as a Pajek object by setting the ' | ||
+ | # Note that the file will appear in whichever directory is set | ||
+ | # as the default in R's preferences, | ||
+ | # changed this via setwd(). | ||
+ | write.graph(krack_full, | ||
+ | |||
+ | # For a more general file type (e.g., importable to Excel), | ||
+ | # use the " | ||
+ | # write the attributes; only the ties are maintained. | ||
+ | write.graph(krack_full, | ||
+ | </ | ||
+ |
sna_eg_stanford/lab01.1574986255.txt.gz · Last modified: 2019/11/29 09:10 by hkimscil