User Tools

Site Tools


sna_eg_stanford:lab01

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
sna_eg_stanford:lab01 [2019/11/29 09:10] – created hkimscilsna_eg_stanford:lab01 [2022/11/28 22:26] (current) hkimscil
Line 82: Line 82:
 # directly by referencing the URL in the read.table() function, # directly by referencing the URL in the read.table() function,
 # as follows:  # as follows: 
-advice_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Advice.txt'+advice_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Advice.txt'
-friendship_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Friendship.txt'+friendship_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Friendship.txt'
-reports_to_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-ReportsTo.txt'+reports_to_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-ReportsTo.txt') 
- + 
 +advice_data_frame <- read.table('https://www.stat.cmu.edu/~brian/780/stanford%20social%20network%20labs/00%20data/Krack-High-Tec-edgelist-Advice.txt'
 +friendship_data_frame <- read.table('https://www.stat.cmu.edu/~brian/780/stanford%20social%20network%20labs/00%20data/Krack-High-Tec-edgelist-Friendship.txt'
 +reports_to_data_frame <- read.table('https://www.stat.cmu.edu/~brian/780/stanford%20social%20network%20labs/00%20data/Krack-High-Tec-edgelist-ReportsTo.txt'
 + 
 + 
 + 
 # If the files you want to work with are on your local machine,  # If the files you want to work with are on your local machine, 
 # the easiest way to access them is to first set your working  # the easiest way to access them is to first set your working 
Line 114: Line 121:
 # header=T, which tells R that the first row of data contains # header=T, which tells R that the first row of data contains
 # column names. # column names.
-attributes <- read.csv('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-Attributes.csv', header=T)+attributes <- read.csv('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-Attributes.csv', header=T) 
 +attributes <- read.csv('https://www.stat.cmu.edu/~brian/780/stanford%20social%20network%20labs/00%20data/Krack-High-Tec-Attributes.csv', header=T)
 attributes attributes
- + 
 # Other commands may be used to load data from files in different  # Other commands may be used to load data from files in different 
 # formats. read.delim() is a general function for loading any # formats. read.delim() is a general function for loading any
Line 303: Line 312:
  
 # example: setwd("/Users/seanwestwood/Desktop/lab_1") # example: setwd("/Users/seanwestwood/Desktop/lab_1")
-setwd("")+setwd("")
      
 # First, let's plot the network with all possible ties. # First, let's plot the network with all possible ties.
 pdf("1.1_Krackhardt_Full.pdf") pdf("1.1_Krackhardt_Full.pdf")
-plot(krack_full)+plot(krack_full, main="krack_full")
 dev.off() dev.off()
    
Line 319: Line 328:
 summary(krack_advice_only) summary(krack_advice_only)
 pdf("1.2_Krackhardt_Advice.pdf") pdf("1.2_Krackhardt_Advice.pdf")
-plot(krack_advice_only)+plot(krack_advice_only, main="krack advice only")
 dev.off() dev.off()
    
Line 328: Line 337:
 summary(krack_friendship_only) summary(krack_friendship_only)
 pdf("1.3_Krackhardt_Friendship.pdf") pdf("1.3_Krackhardt_Friendship.pdf")
-plot(krack_friendship_only)+plot(krack_friendship_only, main="krack_friendship_only")
 dev.off() dev.off()
  
Line 337: Line 346:
 summary(krack_reports_to_only) summary(krack_reports_to_only)
 pdf("1.4_Krackhardt_Reports.pdf") pdf("1.4_Krackhardt_Reports.pdf")
-plot(krack_reports_to_only)+plot(krack_reports_to_only, main="krack_reports_to_only")
 dev.off() dev.off()
    
Line 451: Line 460:
 write.graph(krack_full, file='krack_full.txt', format="edgelist") write.graph(krack_full, file='krack_full.txt', format="edgelist")
 </code> </code>
 +
 +====== no pdf ======
 +<code>
 +##########################################################################
 +# You may cite these labs as follows: McFarland, Daniel, Solomon Messing,
 +# Mike Nowak, and Sean Westwood. 2010. "Social Network Analysis          
 +# Labs in R." Stanford University.                                       
 +##########################################################################
 + 
 + 
 +##########################################################################
 +# LAB 1 - Introductory Lab                                              
 +# The point of this lab is to introduce students to the packages of          
 +# SNA and Igraph, to cover some basic R commands, to load and manage      
 +# data, to generate graph visualizations, and to export the data for 
 +# use elsewhere.                   
 +##########################################################################
 + 
 +###
 +# 0. R BASICS 
 +###
 + 
 +# Any line starting with # is a "comment" line and is ignored by
 +# R. Any other line is treated as a command. Run commands by 
 +# copying and pasting them into the R Console.
 +#
 +# If (when) you get confused, a good place to start is with R's
 +# built-in help functionality. R offers detailed help files for
 +# each function and each package. To access help type ?[function
 +# or package name] in the console. For example, for help on the
 +# "sum" function, type:
 +?sum
 + 
 +# To install all packages need for Social Network Analysis 
 +# Labs in R, uncomment and run the following code:
 + 
 +#source("http://sna.stanford.edu/setup.R")
 +
 +# You only need to run this once per computer!
 + 
 +# To load the packages from , you need to call the "library"
 +# command. Note that you need to do this each session; packages
 +# don't load automatically by default (though you can set this 
 +# as a preference if you'd like).
 + 
 +# For this lab, we will use the "igraph" package.
 +# A manual is available at 
 +# http://cran.r-project.org/web/packages/igraph/igraph.pdf.
 +library(igraph) 
 + 
 +# Sometimes, different packages overlap in functionality and 
 +# cause unexpected behavior when both are loaded simultaneously.
 +# If you ever want to remove an existing library, use the 
 +# "detach" command:
 +#
 +# detach(package:igraph)
 + 
 +# IMPORTANT NOTE: Unlike in most languages, R objects are numbered
 +# from 1 instead of 0, so if you want the first element in a
 +# vector, you would reference it by vector_name[1]. HOWEVER,
 +# igraph objects are numbered starting from 0. This can lead to 
 +# lots of confusion, since it's not always obvious at first which 
 +# objects are native to R and which belong to igraph. 
 + 
 + 
 +###
 +# 1. LOADING DATA
 +###
 + 
 +# The <- operator sets a variable equal to something. In this case,
 +# we will set a number of basic R data structures, called "data 
 +# frames," to hold the contents of the files we will open. 
 +#
 +# read.table() is the most common R command for loading data from
 +# files in which values are in tabular format. The function loads
 +# the table into a data frame object, which is the basic data type
 +# for most operations in R. By default, R assumes that the table
 +# has no header and is delimited by any white space; these
 +# settings are fine for our purposes here.
 +#
 +# One handy aspect of R is that you can read in data from a URL 
 +# directly by referencing the URL in the read.table() function,
 +# as follows: 
 +advice_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Advice.txt')
 +friendship_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Friendship.txt')
 +reports_to_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-ReportsTo.txt')
 + 
 +# If the files you want to work with are on your local machine, 
 +# the easiest way to access them is to first set your working 
 +# directory via the setwd() command, and then reference the 
 +# files by name:
 +#
 +# setwd('path/to/your_directory')
 +# your_data_frame <- read.table('your_file_name')
 + 
 +# Note that when you set a variable equal to something, if all 
 +# goes well R will not provide any feedback. To see the data we
 +# just loaded, it's necessary to call the variables directly.
 +advice_data_frame
 + 
 +# Since this is a bit long, we can see just the top six rows via
 +# head()...
 +head(friendship_data_frame)
 + 
 +# ... or the bottom six rows via tail().
 +tail(reports_to_data_frame)
 +
 +# To view your data in a spreadsheet-like window, use the command 'fix()'
 +# fix(reports_to_data_frame)
 +
 +# The attribute data for this lab is in a comma-separated-value
 +# (CSV) file. read.csv() loads a CSV file into a data frame
 +# object. In this case, we do have a header row, so we set
 +# header=T, which tells R that the first row of data contains
 +# column names.
 +attributes <- read.csv('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-Attributes.csv', header=T)
 +attributes
 + 
 +# Other commands may be used to load data from files in different 
 +# formats. read.delim() is a general function for loading any
 +# delimited text file. The default is tab-delimited, but this can 
 +# be overridden by setting the "sep" parameter. For example:
 +#
 +#     f <- read.delim("tab_delimited_file.txt")
 +#     f <- read.delim("colon_delimited_file.txt", sep=':')
 +#
 +# The 'foreign' package will allow you to read a few other 
 +# custom data types, such as SPSS files via read.spss() and 
 +# STATA files via read.dta().
 + 
 +# When data files are part of an R package you can read them as 
 +# follows:
 +#
 +# data(kracknets, package = "NetData")
 + 
 +# In the future, we will load data this way. However, it is useful 
 +# to get a sense of how things often must be done in R.
 + 
 + 
 +###
 +# 2.2. LOADING GRAPHS
 +###
 + 
 +# For convenience, we can assign column names to our newly 
 +# imported data frames. c() is a common generic R function that 
 +# combines its arguments into a single vector.
 +colnames(advice_data_frame) <- c('ego', 'alter', 'advice_tie')
 +head(advice_data_frame)
 + 
 +colnames(friendship_data_frame) <- c('ego', 'alter', 'friendship_tie')
 +head(friendship_data_frame)
 + 
 +colnames(reports_to_data_frame) <- c('ego', 'alter', 'reports_to_tie')
 +head(reports_to_data_frame)
 + 
 +# Take a look at each data frame using the 'fix()" function. Note that you'll 
 +# need to close each fix window before R will evaluate the next line of code.
 +
 +# fix(advice_data_frame)
 +# fix(friendship_data_frame)
 +# fix(reports_to_data_frame)
 + 
 +# Before we merge these data, we need to make sure 'ego' and 'alter' are the
 +# same across data sets. We can compare each row using the == syntax. 
 +# The command below should return TRUE for every row if all ego rows
 +# are the same for advice and friendship:
 +advice_data_frame$ego == friendship_data_frame$ego
 + 
 +# That's a lot of output to sort through. Instead, we can just have R return 
 +# which row entries are not equal using the syntax below:
 +which(advice_data_frame$ego != friendship_data_frame$ego)
 + 
 +# Repeat for other variables
 +which(advice_data_frame$alter != friendship_data_frame$alter)
 +which(reports_to_data_frame$alter != friendship_data_frame$alter)
 +which(reports_to_data_frame$ego != friendship_data_frame$ego)
 + 
 +# Now that we've verified they are all the same, we can combine them into 
 +# a single data frame. 
 +krack_full_data_frame <- cbind(advice_data_frame, 
 + friendship_data_frame$friendship_tie, 
 + reports_to_data_frame$reports_to_tie)
 +head(krack_full_data_frame)
 + 
 +# Notice that the last two variable names are now 
 +# "reports_to_data_frame$reports_to_tie"
 +# and "friendship_data_frame$friendship_tie"
 +# That's a little long. We can rename them
 +# as follows:
 +
 +names(krack_full_data_frame)[4:5] <- c("friendship_tie", 
 + "reports_to_tie")  
 +head(krack_full_data_frame)
 + 
 +# Another way to build the data frame is to use R'
 +# data.frame syntax from the start:
 +krack_full_data_frame <- data.frame(ego = advice_data_frame[,1],
 + alter = advice_data_frame[,2],
 + advice_tie = advice_data_frame[,3],
 + friendship_tie = friendship_data_frame[,3], 
 + reports_to_tie = reports_to_data_frame[,3])
 +head(krack_full_data_frame)
 + 
 + 
 +# Now let's move on to some data processing.
 + 
 +# Reduce to non-zero edges so that the edge list only contains
 +# actual ties of some type.
 +krack_full_nonzero_edges <- subset(krack_full_data_frame, 
 + (advice_tie > 0 | friendship_tie > 0 | reports_to_tie > 0))
 +head(krack_full_nonzero_edges)
 + 
 +# Now we can import our data into a "graph" object using igraph'
 +# graph.data.frame() function. Coercing the data into a graph
 +# object is what allows us to perform network-analysis techniques.
 +krack_full <- graph.data.frame(krack_full_nonzero_edges) 
 +summary(krack_full)
 + 
 +# By default, graph.data.frame() treats the first two columns of 
 +# a data frame as an edge list and any remaining columns as 
 +# edge attributes. Thus, the 232 edges appearing in the summary()
 +# output refer to the 232 pairs of vertices that are joined by 
 +# *any type* of tie. The tie types themselves are listed as edge 
 +# attributes.
 + 
 +# To get a vector of edges for a specific type of tie, use the 
 +# get.edge.attribute() function.
 +get.edge.attribute(krack_full, 'advice_tie')
 +get.edge.attribute(krack_full, 'friendship_tie')
 +get.edge.attribute(krack_full, 'reports_to_tie')
 + 
 +# If you would like to symmetrize the network, making all 
 +# asymmetric ties symmetric, use the as.undirected()
 +# function: 
 +krack_full_symmetrized <- as.undirected(krack_full, mode='collapse')
 +summary(krack_full_symmetrized)
 + 
 + 
 + 
 +###
 +# 3. ADDING VERTEX ATTRIBUTES TO A GRAPH OBJECT
 +###
 + 
 +# One way to add the attributes to your graph object is to iterate
 +# through each attribute and each vertex. This means that we will
 +# add one attribute at a time to each vertex in the network.
 +#
 +# V(krack_full) returns a list of the IDs of each vertex in the 
 +# graph. names(attributes) returns a list of the column names in
 +# the attributes table. The double-for loop tells R to repeat the
 +# code between the brackets once for each attribute and once for
 +# each vertex.
 +for (i in V(krack_full)) {
 +    for (j in names(attributes)) {
 +        krack_full <- set.vertex.attribute(krack_full, 
 +                                           j, 
 +                                           index = i, 
 +                                           attributes[i + 1, j])
 +    }
 +}
 + 
 +# A shorter way is to just read in attribute names when you
 +# create the graph object:
 + 
 +# First create a vector of vertex labels, in this case 1:n
 +attributes = cbind(1:length(attributes[,1]), attributes)
 + 
 +krack_full <- graph.data.frame(d = krack_full_nonzero_edges, 
 +                               vertices = attributes) 
 + 
 +# Note that we now have 'AGE,' 'TENURE,' 'LEVEL,' and 'DEPT'
 +# listed alongside 'name' as vertex attributes.
 +summary(krack_full)
 + 
 +# We can see a list of the values for a given attribute for all of
 +# the actors in the network.
 +get.vertex.attribute(krack_full, 'AGE')
 +get.vertex.attribute(krack_full, 'TENURE')
 +get.vertex.attribute(krack_full, 'LEVEL')
 +get.vertex.attribute(krack_full, 'DEPT')
 + 
 + 
 +###
 +# 4. VISUALIZE THE NETWORKS
 +###
 + 
 +# We can use R's general-purpose plot() method to generate custom
 +# visualizations of the network.
 +
 +# R only lets us look at one plot at a time.  To make our work easier
 +# we will save our plots as PDF files.  To jus create a plot execute 
 +# the code between the PDF function and "dev.off()".
 +
 +# In order to save PDF files we must tell R where to put them.  We do
 +# this with the setwd() command.  You must put the full path to the
 +# folder where you will output the files here.
 +
 +# In OS X you can get this information by selecting the folder, right
 +# clicking and selecting "Get Info."  The path is listed under "Where."
 +
 +# In Windows you can get this information by selecting the folder, right
 +# clicking and selecting "Properties."  The path information is listed 
 +# "location".
 +
 +# example: setwd("/Users/seanwestwood/Desktop/lab_1")
 +setwd("")
 +  
 +# First, let's plot the network with all possible ties.
 +# pdf("1.1_Krackhardt_Full.pdf")
 +plot(krack_full, main="krack_full")
 +# dev.off()
 + 
 +# This is a bit of a jumble, so let's look at the networks for
 +# single edge types.
 + 
 +# advice only
 +krack_advice_only <- delete.edges(krack_full, 
 +    E(krack_full)[get.edge.attribute(krack_full,
 +    name = "advice_tie") == 0])
 +summary(krack_advice_only)
 +
 +# pdf("1.2_Krackhardt_Advice.pdf")
 +plot(krack_advice_only, main="krack_advice_only")
 +# dev.off()
 + 
 +# friendship only
 +krack_friendship_only <- delete.edges(krack_full, 
 +    E(krack_full)[get.edge.attribute(krack_full, 
 +    name = "friendship_tie") == 0])
 +summary(krack_friendship_only)
 +# pdf("1.3_Krackhardt_Friendship.pdf")
 +plot(krack_friendship_only, main="krack_friendship_only")
 +# dev.off()
 +
 +# reports-to only
 +krack_reports_to_only <- delete.edges(krack_full, 
 +    E(krack_full)[get.edge.attribute(krack_full, 
 +    name = "reports_to_tie") == 0])
 +summary(krack_reports_to_only)
 +# pdf("1.4_Krackhardt_Reports.pdf")
 +plot(krack_reports_to_only, main="krack_reports_to_only")
 +# dev.off()
 + 
 +# Still kind of messy, so let's clean things up a bit. For 
 +# simplicity, we'll focus on reports_to ties for now.
 + 
 +# First, we can optimize the layout by applying the layout 
 +# algorithm to the specific set of ties we care about. Here 
 +# we'll use Fruchterman-Rheingold; other options are 
 +# described in the igraph help page for "layout," which 
 +# can be accessed by entering ?layout.
 +
 +reports_to_layout <- layout.fruchterman.reingold(krack_reports_to_only)
 +# pdf("1.5_Krackhardt_Reports_Fruchterman_Reingold.pdf")
 +plot(krack_reports_to_only, 
 +     layout=reports_to_layout, main="krack_reports_to_only")
 +# dev.off()
 + 
 +# Now let's color-code vertices by department and clean up the 
 +# plot by removing vertex labels and shrinking the arrow size. 
 +dept_vertex_colors = get.vertex.attribute(krack_full,"DEPT")
 +colors = c('Black', 'Red', 'Blue', 'Yellow', 'Green')
 +dept_vertex_colors[dept_vertex_colors == 0] = colors[1]
 +dept_vertex_colors[dept_vertex_colors == 1] = colors[2]
 +dept_vertex_colors[dept_vertex_colors == 2] = colors[3]
 +dept_vertex_colors[dept_vertex_colors == 3] = colors[4] 
 +dept_vertex_colors[dept_vertex_colors == 4] = colors[5]
 +
 +# pdf("1.6_Krackhardt_Reports_Color.pdf"
 +plot(krack_reports_to_only, 
 +    layout=reports_to_layout, 
 +    vertex.color=dept_vertex_colors, 
 +    vertex.label=NA, 
 +    edge.arrow.size=.5, 
 +    main="krack_reports_to_only")
 +# dev.off() 
 +# Now let's set the vertex size by tenure.
 +tenure_vertex_sizes = get.vertex.attribute(krack_full,"TENURE")
 +
 +# pdf("1.7_Krackhardt_Reports_Vertex_Size.pdf"
 +plot(krack_reports_to_only, 
 +     layout=reports_to_layout, 
 +     vertex.color=dept_vertex_colors, 
 +     vertex.label=NA, 
 +     edge.arrow.size=.5,
 +     vertex.size=tenure_vertex_sizes, main="krack_reports_to_only")
 +# dev.off() 
 + 
 +# Now let's incorporate additional tie types. We'll use the 
 +# layout generated by the reports-to ties but overlay the 
 +# advice and friendship ties in red and blue.
 +
 +tie_type_colors = c(rgb(1,0,0,.5), rgb(0,0,1,.5), rgb(0,0,0,.5))
 +E(krack_full)$color[ E(krack_full)$advice_tie==1 ] = tie_type_colors[1]
 +E(krack_full)$color[ E(krack_full)$friendship_tie==1 ] = tie_type_colors[2]
 +E(krack_full)$color[ E(krack_full)$reports_to_tie==1 ] = tie_type_colors[3]
 +E(krack_full)$arrow.size=.5 
 +V(krack_full)$color = dept_vertex_colors
 +V(krack_full)$frame = dept_vertex_colors
 +
 +# pdf("1.8_Krackhardt_Overlayed_Ties.pdf")
 +plot(krack_full, 
 +     layout=reports_to_layout, 
 +     vertex.color=dept_vertex_colors, 
 +     vertex.label=NA, 
 +     edge.arrow.size=.5,
 +     vertex.size=tenure_vertex_sizes, main="krack_full")
 + 
 + 
 +# Add a legend. Note that the plot window must be open for this to 
 +# work.
 +legend(1, 
 +       1.25,
 +       legend = c('Advice', 
 +                  'Friendship',
 +                  'Reports To'), 
 +       col = tie_type_colors, 
 +       lty=1,
 +       cex = .7)
 +# dev.off() 
 + 
 +# Another option for visualizing different network ties relative 
 +# to one another is to overlay the edges from one tie type on the 
 +# structure generated by another tie type. Here we can use the
 +# reports-to layout but show the friendship ties:
 +
 +# pdf("1.9_Krackhardt_Overlayed_Structure.pdf")
 +plot(krack_friendship_only, 
 +     layout=reports_to_layout, 
 +     vertex.color=dept_vertex_colors, 
 +     vertex.label=NA, 
 +     edge.arrow.size=.5,
 +     vertex.size=tenure_vertex_sizes, 
 +     main='Krackhardt High-Tech Managers')
 +# dev.off() 
 + 
 + 
 +###
 +# 5. EXPORT THE NETWORK
 +###
 + 
 +# The write.graph() function exports a graph object in various
 +# formats readable by other programs. There is no explicit
 +# option for a UCINET data type, but you can export the graph
 +# as a Pajek object by setting the 'format' parameter to 'pajek.'
 +# Note that the file will appear in whichever directory is set 
 +# as the default in R's preferences, unless you previously 
 +# changed this via setwd().
 +write.graph(krack_full, file='krack_full.dl', format="pajek")
 + 
 +# For a more general file type (e.g., importable to Excel),
 +# use the "edgelist" format. Note that neither of these will
 +# write the attributes; only the ties are maintained.
 +write.graph(krack_full, file='krack_full.txt', format="edgelist")
 +</code>
 +
sna_eg_stanford/lab01.1574986255.txt.gz · Last modified: 2019/11/29 09:10 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki