This is an old revision of the document!

Lab 01

##########################################################################
# You may cite these labs as follows: McFarland, Daniel, Solomon Messing,
# Mike Nowak, and Sean Westwood. 2010. "Social Network Analysis          
# Labs in R." Stanford University.                                       
##########################################################################
 
 
##########################################################################
# LAB 1 - Introductory Lab                                              
# The point of this lab is to introduce students to the packages of          
# SNA and Igraph, to cover some basic R commands, to load and manage      
# data, to generate graph visualizations, and to export the data for 
# use elsewhere.                   
##########################################################################
 
###
# 0. R BASICS 
###
 
# Any line starting with # is a "comment" line and is ignored by
# R. Any other line is treated as a command. Run commands by 
# copying and pasting them into the R Console.
#
# If (when) you get confused, a good place to start is with R's
# built-in help functionality. R offers detailed help files for
# each function and each package. To access help type ?[function
# or package name] in the console. For example, for help on the
# "sum" function, type:
?sum
 
# To install all packages need for Social Network Analysis 
# Labs in R, uncomment and run the following code:
 
#source("http://sna.stanford.edu/setup.R")

# You only need to run this once per computer!
 
# To load the packages from , you need to call the "library"
# command. Note that you need to do this each session; packages
# don't load automatically by default (though you can set this 
# as a preference if you'd like).
 
# For this lab, we will use the "igraph" package.
# A manual is available at 
# http://cran.r-project.org/web/packages/igraph/igraph.pdf.
library(igraph) 
 
# Sometimes, different packages overlap in functionality and 
# cause unexpected behavior when both are loaded simultaneously.
# If you ever want to remove an existing library, use the 
# "detach" command:
#
# detach(package:igraph)
 
# IMPORTANT NOTE: Unlike in most languages, R objects are numbered
# from 1 instead of 0, so if you want the first element in a
# vector, you would reference it by vector_name[1]. HOWEVER,
# igraph objects are numbered starting from 0. This can lead to 
# lots of confusion, since it's not always obvious at first which 
# objects are native to R and which belong to igraph. 
 
 
###
# 1. LOADING DATA
###
 
# The <- operator sets a variable equal to something. In this case,
# we will set a number of basic R data structures, called "data 
# frames," to hold the contents of the files we will open. 
#
# read.table() is the most common R command for loading data from
# files in which values are in tabular format. The function loads
# the table into a data frame object, which is the basic data type
# for most operations in R. By default, R assumes that the table
# has no header and is delimited by any white space; these
# settings are fine for our purposes here.
#
# One handy aspect of R is that you can read in data from a URL 
# directly by referencing the URL in the read.table() function,
# as follows: 
advice_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Advice.txt')
friendship_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Friendship.txt')
reports_to_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-ReportsTo.txt')
 
# If the files you want to work with are on your local machine, 
# the easiest way to access them is to first set your working 
# directory via the setwd() command, and then reference the 
# files by name:
#
# setwd('path/to/your_directory')
# your_data_frame <- read.table('your_file_name')
 
# Note that when you set a variable equal to something, if all 
# goes well R will not provide any feedback. To see the data we
# just loaded, it's necessary to call the variables directly.
advice_data_frame
 
# Since this is a bit long, we can see just the top six rows via
# head()...
head(friendship_data_frame)
 
# ... or the bottom six rows via tail().
tail(reports_to_data_frame)

# To view your data in a spreadsheet-like window, use the command 'fix()'. 
fix(reports_to_data_frame)

# The attribute data for this lab is in a comma-separated-value
# (CSV) file. read.csv() loads a CSV file into a data frame
# object. In this case, we do have a header row, so we set
# header=T, which tells R that the first row of data contains
# column names.
attributes <- read.csv('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-Attributes.csv', header=T)
attributes
 
# Other commands may be used to load data from files in different 
# formats. read.delim() is a general function for loading any
# delimited text file. The default is tab-delimited, but this can 
# be overridden by setting the "sep" parameter. For example:
#
#     f <- read.delim("tab_delimited_file.txt")
#     f <- read.delim("colon_delimited_file.txt", sep=':')
#
# The 'foreign' package will allow you to read a few other 
# custom data types, such as SPSS files via read.spss() and 
# STATA files via read.dta().
 
# When data files are part of an R package you can read them as 
# follows:
#
# data(kracknets, package = "NetData")
 
# In the future, we will load data this way. However, it is useful 
# to get a sense of how things often must be done in R.
 
 
###
# 2.2. LOADING GRAPHS
###
 
# For convenience, we can assign column names to our newly 
# imported data frames. c() is a common generic R function that 
# combines its arguments into a single vector.
colnames(advice_data_frame) <- c('ego', 'alter', 'advice_tie')
head(advice_data_frame)
 
colnames(friendship_data_frame) <- c('ego', 'alter', 'friendship_tie')
head(friendship_data_frame)
 
colnames(reports_to_data_frame) <- c('ego', 'alter', 'reports_to_tie')
head(reports_to_data_frame)
 
# Take a look at each data frame using the 'fix()" function. Note that you'll 
# need to close each fix window before R will evaluate the next line of code.
fix(advice_data_frame)
fix(friendship_data_frame)
fix(reports_to_data_frame)
 
# Before we merge these data, we need to make sure 'ego' and 'alter' are the
# same across data sets. We can compare each row using the == syntax. 
# The command below should return TRUE for every row if all ego rows
# are the same for advice and friendship:
advice_data_frame$ego == friendship_data_frame$ego
 
# That's a lot of output to sort through. Instead, we can just have R return 
# which row entries are not equal using the syntax below:
which(advice_data_frame$ego != friendship_data_frame$ego)
 
# Repeat for other variables
which(advice_data_frame$alter != friendship_data_frame$alter)
which(reports_to_data_frame$alter != friendship_data_frame$alter)
which(reports_to_data_frame$ego != friendship_data_frame$ego)
 
# Now that we've verified they are all the same, we can combine them into 
# a single data frame. 
krack_full_data_frame <- cbind(advice_data_frame, 
	friendship_data_frame$friendship_tie, 
	reports_to_data_frame$reports_to_tie)
head(krack_full_data_frame)
 
# Notice that the last two variable names are now 
# "reports_to_data_frame$reports_to_tie"
# and "friendship_data_frame$friendship_tie". 
# That's a little long. We can rename them
# as follows:

names(krack_full_data_frame)[4:5] <- c("friendship_tie", 
	"reports_to_tie")  
head(krack_full_data_frame)
 
# Another way to build the data frame is to use R's 
# data.frame syntax from the start:
krack_full_data_frame <- data.frame(ego = advice_data_frame[,1],
	alter = advice_data_frame[,2],
	advice_tie = advice_data_frame[,3],
	friendship_tie = friendship_data_frame[,3], 
	reports_to_tie = reports_to_data_frame[,3])
head(krack_full_data_frame)
 
 
# Now let's move on to some data processing.
 
# Reduce to non-zero edges so that the edge list only contains
# actual ties of some type.
krack_full_nonzero_edges <- subset(krack_full_data_frame, 
	(advice_tie > 0 | friendship_tie > 0 | reports_to_tie > 0))
head(krack_full_nonzero_edges)
 
# Now we can import our data into a "graph" object using igraph's 
# graph.data.frame() function. Coercing the data into a graph
# object is what allows us to perform network-analysis techniques.
krack_full <- graph.data.frame(krack_full_nonzero_edges) 
summary(krack_full)
 
# By default, graph.data.frame() treats the first two columns of 
# a data frame as an edge list and any remaining columns as 
# edge attributes. Thus, the 232 edges appearing in the summary()
# output refer to the 232 pairs of vertices that are joined by 
# *any type* of tie. The tie types themselves are listed as edge 
# attributes.
 
# To get a vector of edges for a specific type of tie, use the 
# get.edge.attribute() function.
get.edge.attribute(krack_full, 'advice_tie')
get.edge.attribute(krack_full, 'friendship_tie')
get.edge.attribute(krack_full, 'reports_to_tie')
 
# If you would like to symmetrize the network, making all 
# asymmetric ties symmetric, use the as.undirected()
# function: 
krack_full_symmetrized <- as.undirected(krack_full, mode='collapse')
summary(krack_full_symmetrized)
 
 
 
###
# 3. ADDING VERTEX ATTRIBUTES TO A GRAPH OBJECT
###
 
# One way to add the attributes to your graph object is to iterate
# through each attribute and each vertex. This means that we will
# add one attribute at a time to each vertex in the network.
#
# V(krack_full) returns a list of the IDs of each vertex in the 
# graph. names(attributes) returns a list of the column names in
# the attributes table. The double-for loop tells R to repeat the
# code between the brackets once for each attribute and once for
# each vertex.
for (i in V(krack_full)) {
    for (j in names(attributes)) {
        krack_full <- set.vertex.attribute(krack_full, 
                                           j, 
                                           index = i, 
                                           attributes[i + 1, j])
    }
}
 
# A shorter way is to just read in attribute names when you
# create the graph object:
 
# First create a vector of vertex labels, in this case 1:n
attributes = cbind(1:length(attributes[,1]), attributes)
 
krack_full <- graph.data.frame(d = krack_full_nonzero_edges, 
                               vertices = attributes) 
 
# Note that we now have 'AGE,' 'TENURE,' 'LEVEL,' and 'DEPT'
# listed alongside 'name' as vertex attributes.
summary(krack_full)
 
# We can see a list of the values for a given attribute for all of
# the actors in the network.
get.vertex.attribute(krack_full, 'AGE')
get.vertex.attribute(krack_full, 'TENURE')
get.vertex.attribute(krack_full, 'LEVEL')
get.vertex.attribute(krack_full, 'DEPT')
 
 
###
# 4. VISUALIZE THE NETWORKS
###
 
# We can use R's general-purpose plot() method to generate custom
# visualizations of the network.

# R only lets us look at one plot at a time.  To make our work easier
# we will save our plots as PDF files.  To jus create a plot execute 
# the code between the PDF function and "dev.off()".

# In order to save PDF files we must tell R where to put them.  We do
# this with the setwd() command.  You must put the full path to the
# folder where you will output the files here.

# In OS X you can get this information by selecting the folder, right
# clicking and selecting "Get Info."  The path is listed under "Where."

# In Windows you can get this information by selecting the folder, right
# clicking and selecting "Properties."  The path information is listed 
# "location".

# example: setwd("/Users/seanwestwood/Desktop/lab_1")
# setwd("")
  
# First, let's plot the network with all possible ties.
pdf("1.1_Krackhardt_Full.pdf")
plot(krack_full, main="krack_full")
dev.off()
 
# This is a bit of a jumble, so let's look at the networks for
# single edge types.
 
# advice only
krack_advice_only <- delete.edges(krack_full, 
    E(krack_full)[get.edge.attribute(krack_full,
    name = "advice_tie") == 0])
summary(krack_advice_only)
pdf("1.2_Krackhardt_Advice.pdf")
plot(krack_advice_only, main="krack advice only")
dev.off()
 
# friendship only
krack_friendship_only <- delete.edges(krack_full, 
    E(krack_full)[get.edge.attribute(krack_full, 
    name = "friendship_tie") == 0])
summary(krack_friendship_only)
pdf("1.3_Krackhardt_Friendship.pdf")
plot(krack_friendship_only, main="krack_friendship_only")
dev.off()

# reports-to only
krack_reports_to_only <- delete.edges(krack_full, 
    E(krack_full)[get.edge.attribute(krack_full, 
    name = "reports_to_tie") == 0])
summary(krack_reports_to_only)
pdf("1.4_Krackhardt_Reports.pdf")
plot(krack_reports_to_only, main="krack_reports_to_only")
dev.off()
 
# Still kind of messy, so let's clean things up a bit. For 
# simplicity, we'll focus on reports_to ties for now.
 
# First, we can optimize the layout by applying the layout 
# algorithm to the specific set of ties we care about. Here 
# we'll use Fruchterman-Rheingold; other options are 
# described in the igraph help page for "layout," which 
# can be accessed by entering ?layout.

reports_to_layout <- layout.fruchterman.reingold(krack_reports_to_only)
pdf("1.5_Krackhardt_Reports_Fruchterman_Reingold.pdf")
plot(krack_reports_to_only, 
     layout=reports_to_layout)
dev.off()
 
# Now let's color-code vertices by department and clean up the 
# plot by removing vertex labels and shrinking the arrow size. 
dept_vertex_colors = get.vertex.attribute(krack_full,"DEPT")
colors = c('Black', 'Red', 'Blue', 'Yellow', 'Green')
dept_vertex_colors[dept_vertex_colors == 0] = colors[1]
dept_vertex_colors[dept_vertex_colors == 1] = colors[2]
dept_vertex_colors[dept_vertex_colors == 2] = colors[3]
dept_vertex_colors[dept_vertex_colors == 3] = colors[4] 
dept_vertex_colors[dept_vertex_colors == 4] = colors[5]

pdf("1.6_Krackhardt_Reports_Color.pdf") 
plot(krack_reports_to_only, 
    layout=reports_to_layout, 
    vertex.color=dept_vertex_colors, 
    vertex.label=NA, 
    edge.arrow.size=.5)
dev.off() 
# Now let's set the vertex size by tenure.
tenure_vertex_sizes = get.vertex.attribute(krack_full,"TENURE")

pdf("1.7_Krackhardt_Reports_Vertex_Size.pdf") 
plot(krack_reports_to_only, 
     layout=reports_to_layout, 
     vertex.color=dept_vertex_colors, 
     vertex.label=NA, 
     edge.arrow.size=.5,
     vertex.size=tenure_vertex_sizes)
dev.off() 
 
# Now let's incorporate additional tie types. We'll use the 
# layout generated by the reports-to ties but overlay the 
# advice and friendship ties in red and blue.

tie_type_colors = c(rgb(1,0,0,.5), rgb(0,0,1,.5), rgb(0,0,0,.5))
E(krack_full)$color[ E(krack_full)$advice_tie==1 ] = tie_type_colors[1]
E(krack_full)$color[ E(krack_full)$friendship_tie==1 ] = tie_type_colors[2]
E(krack_full)$color[ E(krack_full)$reports_to_tie==1 ] = tie_type_colors[3]
E(krack_full)$arrow.size=.5 
V(krack_full)$color = dept_vertex_colors
V(krack_full)$frame = dept_vertex_colors

pdf("1.8_Krackhardt_Overlayed_Ties.pdf")
plot(krack_full, 
     layout=reports_to_layout, 
     vertex.color=dept_vertex_colors, 
     vertex.label=NA, 
     edge.arrow.size=.5,
     vertex.size=tenure_vertex_sizes)
 
 
# Add a legend. Note that the plot window must be open for this to 
# work.
legend(1, 
       1.25,
       legend = c('Advice', 
                  'Friendship',
                  'Reports To'), 
       col = tie_type_colors, 
       lty=1,
       cex = .7)
dev.off() 
 
# Another option for visualizing different network ties relative 
# to one another is to overlay the edges from one tie type on the 
# structure generated by another tie type. Here we can use the
# reports-to layout but show the friendship ties:

pdf("1.9_Krackhardt_Overlayed_Structure.pdf")
plot(krack_friendship_only, 
     layout=reports_to_layout, 
     vertex.color=dept_vertex_colors, 
     vertex.label=NA, 
     edge.arrow.size=.5,
     vertex.size=tenure_vertex_sizes, 
     main='Krackhardt High-Tech Managers')
dev.off() 
 
 
###
# 5. EXPORT THE NETWORK
###
 
# The write.graph() function exports a graph object in various
# formats readable by other programs. There is no explicit
# option for a UCINET data type, but you can export the graph
# as a Pajek object by setting the 'format' parameter to 'pajek.'
# Note that the file will appear in whichever directory is set 
# as the default in R's preferences, unless you previously 
# changed this via setwd().
write.graph(krack_full, file='krack_full.dl', format="pajek")
 
# For a more general file type (e.g., importable to Excel),
# use the "edgelist" format. Note that neither of these will
# write the attributes; only the ties are maintained.
write.graph(krack_full, file='krack_full.txt', format="edgelist")

no pdf

##########################################################################
# You may cite these labs as follows: McFarland, Daniel, Solomon Messing,
# Mike Nowak, and Sean Westwood. 2010. "Social Network Analysis          
# Labs in R." Stanford University.                                       
##########################################################################
 
 
##########################################################################
# LAB 1 - Introductory Lab                                              
# The point of this lab is to introduce students to the packages of          
# SNA and Igraph, to cover some basic R commands, to load and manage      
# data, to generate graph visualizations, and to export the data for 
# use elsewhere.                   
##########################################################################
 
###
# 0. R BASICS 
###
 
# Any line starting with # is a "comment" line and is ignored by
# R. Any other line is treated as a command. Run commands by 
# copying and pasting them into the R Console.
#
# If (when) you get confused, a good place to start is with R's
# built-in help functionality. R offers detailed help files for
# each function and each package. To access help type ?[function
# or package name] in the console. For example, for help on the
# "sum" function, type:
?sum
 
# To install all packages need for Social Network Analysis 
# Labs in R, uncomment and run the following code:
 
#source("http://sna.stanford.edu/setup.R")

# You only need to run this once per computer!
 
# To load the packages from , you need to call the "library"
# command. Note that you need to do this each session; packages
# don't load automatically by default (though you can set this 
# as a preference if you'd like).
 
# For this lab, we will use the "igraph" package.
# A manual is available at 
# http://cran.r-project.org/web/packages/igraph/igraph.pdf.
library(igraph) 
 
# Sometimes, different packages overlap in functionality and 
# cause unexpected behavior when both are loaded simultaneously.
# If you ever want to remove an existing library, use the 
# "detach" command:
#
# detach(package:igraph)
 
# IMPORTANT NOTE: Unlike in most languages, R objects are numbered
# from 1 instead of 0, so if you want the first element in a
# vector, you would reference it by vector_name[1]. HOWEVER,
# igraph objects are numbered starting from 0. This can lead to 
# lots of confusion, since it's not always obvious at first which 
# objects are native to R and which belong to igraph. 
 
 
###
# 1. LOADING DATA
###
 
# The <- operator sets a variable equal to something. In this case,
# we will set a number of basic R data structures, called "data 
# frames," to hold the contents of the files we will open. 
#
# read.table() is the most common R command for loading data from
# files in which values are in tabular format. The function loads
# the table into a data frame object, which is the basic data type
# for most operations in R. By default, R assumes that the table
# has no header and is delimited by any white space; these
# settings are fine for our purposes here.
#
# One handy aspect of R is that you can read in data from a URL 
# directly by referencing the URL in the read.table() function,
# as follows: 
advice_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Advice.txt')
friendship_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-Friendship.txt')
reports_to_data_frame <- read.table('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-edgelist-ReportsTo.txt')
 
# If the files you want to work with are on your local machine, 
# the easiest way to access them is to first set your working 
# directory via the setwd() command, and then reference the 
# files by name:
#
# setwd('path/to/your_directory')
# your_data_frame <- read.table('your_file_name')
 
# Note that when you set a variable equal to something, if all 
# goes well R will not provide any feedback. To see the data we
# just loaded, it's necessary to call the variables directly.
advice_data_frame
 
# Since this is a bit long, we can see just the top six rows via
# head()...
head(friendship_data_frame)
 
# ... or the bottom six rows via tail().
tail(reports_to_data_frame)

# To view your data in a spreadsheet-like window, use the command 'fix()'. 
# fix(reports_to_data_frame)

# The attribute data for this lab is in a comma-separated-value
# (CSV) file. read.csv() loads a CSV file into a data frame
# object. In this case, we do have a header row, so we set
# header=T, which tells R that the first row of data contains
# column names.
attributes <- read.csv('http://sna.stanford.edu/sna_R_labs/data/Krack-High-Tec-Attributes.csv', header=T)
attributes
 
# Other commands may be used to load data from files in different 
# formats. read.delim() is a general function for loading any
# delimited text file. The default is tab-delimited, but this can 
# be overridden by setting the "sep" parameter. For example:
#
#     f <- read.delim("tab_delimited_file.txt")
#     f <- read.delim("colon_delimited_file.txt", sep=':')
#
# The 'foreign' package will allow you to read a few other 
# custom data types, such as SPSS files via read.spss() and 
# STATA files via read.dta().
 
# When data files are part of an R package you can read them as 
# follows:
#
# data(kracknets, package = "NetData")
 
# In the future, we will load data this way. However, it is useful 
# to get a sense of how things often must be done in R.
 
 
###
# 2.2. LOADING GRAPHS
###
 
# For convenience, we can assign column names to our newly 
# imported data frames. c() is a common generic R function that 
# combines its arguments into a single vector.
colnames(advice_data_frame) <- c('ego', 'alter', 'advice_tie')
head(advice_data_frame)
 
colnames(friendship_data_frame) <- c('ego', 'alter', 'friendship_tie')
head(friendship_data_frame)
 
colnames(reports_to_data_frame) <- c('ego', 'alter', 'reports_to_tie')
head(reports_to_data_frame)
 
# Take a look at each data frame using the 'fix()" function. Note that you'll 
# need to close each fix window before R will evaluate the next line of code.

# fix(advice_data_frame)
# fix(friendship_data_frame)
# fix(reports_to_data_frame)
 
# Before we merge these data, we need to make sure 'ego' and 'alter' are the
# same across data sets. We can compare each row using the == syntax. 
# The command below should return TRUE for every row if all ego rows
# are the same for advice and friendship:
advice_data_frame$ego == friendship_data_frame$ego
 
# That's a lot of output to sort through. Instead, we can just have R return 
# which row entries are not equal using the syntax below:
which(advice_data_frame$ego != friendship_data_frame$ego)
 
# Repeat for other variables
which(advice_data_frame$alter != friendship_data_frame$alter)
which(reports_to_data_frame$alter != friendship_data_frame$alter)
which(reports_to_data_frame$ego != friendship_data_frame$ego)
 
# Now that we've verified they are all the same, we can combine them into 
# a single data frame. 
krack_full_data_frame <- cbind(advice_data_frame, 
	friendship_data_frame$friendship_tie, 
	reports_to_data_frame$reports_to_tie)
head(krack_full_data_frame)
 
# Notice that the last two variable names are now 
# "reports_to_data_frame$reports_to_tie"
# and "friendship_data_frame$friendship_tie". 
# That's a little long. We can rename them
# as follows:

names(krack_full_data_frame)[4:5] <- c("friendship_tie", 
	"reports_to_tie")  
head(krack_full_data_frame)
 
# Another way to build the data frame is to use R's 
# data.frame syntax from the start:
krack_full_data_frame <- data.frame(ego = advice_data_frame[,1],
	alter = advice_data_frame[,2],
	advice_tie = advice_data_frame[,3],
	friendship_tie = friendship_data_frame[,3], 
	reports_to_tie = reports_to_data_frame[,3])
head(krack_full_data_frame)
 
 
# Now let's move on to some data processing.
 
# Reduce to non-zero edges so that the edge list only contains
# actual ties of some type.
krack_full_nonzero_edges <- subset(krack_full_data_frame, 
	(advice_tie > 0 | friendship_tie > 0 | reports_to_tie > 0))
head(krack_full_nonzero_edges)
 
# Now we can import our data into a "graph" object using igraph's 
# graph.data.frame() function. Coercing the data into a graph
# object is what allows us to perform network-analysis techniques.
krack_full <- graph.data.frame(krack_full_nonzero_edges) 
summary(krack_full)
 
# By default, graph.data.frame() treats the first two columns of 
# a data frame as an edge list and any remaining columns as 
# edge attributes. Thus, the 232 edges appearing in the summary()
# output refer to the 232 pairs of vertices that are joined by 
# *any type* of tie. The tie types themselves are listed as edge 
# attributes.
 
# To get a vector of edges for a specific type of tie, use the 
# get.edge.attribute() function.
get.edge.attribute(krack_full, 'advice_tie')
get.edge.attribute(krack_full, 'friendship_tie')
get.edge.attribute(krack_full, 'reports_to_tie')
 
# If you would like to symmetrize the network, making all 
# asymmetric ties symmetric, use the as.undirected()
# function: 
krack_full_symmetrized <- as.undirected(krack_full, mode='collapse')
summary(krack_full_symmetrized)
 
 
 
###
# 3. ADDING VERTEX ATTRIBUTES TO A GRAPH OBJECT
###
 
# One way to add the attributes to your graph object is to iterate
# through each attribute and each vertex. This means that we will
# add one attribute at a time to each vertex in the network.
#
# V(krack_full) returns a list of the IDs of each vertex in the 
# graph. names(attributes) returns a list of the column names in
# the attributes table. The double-for loop tells R to repeat the
# code between the brackets once for each attribute and once for
# each vertex.
for (i in V(krack_full)) {
    for (j in names(attributes)) {
        krack_full <- set.vertex.attribute(krack_full, 
                                           j, 
                                           index = i, 
                                           attributes[i + 1, j])
    }
}
 
# A shorter way is to just read in attribute names when you
# create the graph object:
 
# First create a vector of vertex labels, in this case 1:n
attributes = cbind(1:length(attributes[,1]), attributes)
 
krack_full <- graph.data.frame(d = krack_full_nonzero_edges, 
                               vertices = attributes) 
 
# Note that we now have 'AGE,' 'TENURE,' 'LEVEL,' and 'DEPT'
# listed alongside 'name' as vertex attributes.
summary(krack_full)
 
# We can see a list of the values for a given attribute for all of
# the actors in the network.
get.vertex.attribute(krack_full, 'AGE')
get.vertex.attribute(krack_full, 'TENURE')
get.vertex.attribute(krack_full, 'LEVEL')
get.vertex.attribute(krack_full, 'DEPT')
 
 
###
# 4. VISUALIZE THE NETWORKS
###
 
# We can use R's general-purpose plot() method to generate custom
# visualizations of the network.

# R only lets us look at one plot at a time.  To make our work easier
# we will save our plots as PDF files.  To jus create a plot execute 
# the code between the PDF function and "dev.off()".

# In order to save PDF files we must tell R where to put them.  We do
# this with the setwd() command.  You must put the full path to the
# folder where you will output the files here.

# In OS X you can get this information by selecting the folder, right
# clicking and selecting "Get Info."  The path is listed under "Where."

# In Windows you can get this information by selecting the folder, right
# clicking and selecting "Properties."  The path information is listed 
# "location".

# example: setwd("/Users/seanwestwood/Desktop/lab_1")
setwd("")
  
# First, let's plot the network with all possible ties.
# pdf("1.1_Krackhardt_Full.pdf")
plot(krack_full, main="krack_full")
# dev.off()
 
# This is a bit of a jumble, so let's look at the networks for
# single edge types.
 
# advice only
krack_advice_only <- delete.edges(krack_full, 
    E(krack_full)[get.edge.attribute(krack_full,
    name = "advice_tie") == 0])
summary(krack_advice_only)

# pdf("1.2_Krackhardt_Advice.pdf")
plot(krack_advice_only, main="krack_advice_only")
# dev.off()
 
# friendship only
krack_friendship_only <- delete.edges(krack_full, 
    E(krack_full)[get.edge.attribute(krack_full, 
    name = "friendship_tie") == 0])
summary(krack_friendship_only)
# pdf("1.3_Krackhardt_Friendship.pdf")
plot(krack_friendship_only, main="krack_friendship_only")
# dev.off()

# reports-to only
krack_reports_to_only <- delete.edges(krack_full, 
    E(krack_full)[get.edge.attribute(krack_full, 
    name = "reports_to_tie") == 0])
summary(krack_reports_to_only)
# pdf("1.4_Krackhardt_Reports.pdf")
plot(krack_reports_to_only, main="krack_reports_to_only")
# dev.off()
 
# Still kind of messy, so let's clean things up a bit. For 
# simplicity, we'll focus on reports_to ties for now.
 
# First, we can optimize the layout by applying the layout 
# algorithm to the specific set of ties we care about. Here 
# we'll use Fruchterman-Rheingold; other options are 
# described in the igraph help page for "layout," which 
# can be accessed by entering ?layout.

reports_to_layout <- layout.fruchterman.reingold(krack_reports_to_only)
# pdf("1.5_Krackhardt_Reports_Fruchterman_Reingold.pdf")
plot(krack_reports_to_only, 
     layout=reports_to_layout, main="krack_reports_to_only")
# dev.off()
 
# Now let's color-code vertices by department and clean up the 
# plot by removing vertex labels and shrinking the arrow size. 
dept_vertex_colors = get.vertex.attribute(krack_full,"DEPT")
colors = c('Black', 'Red', 'Blue', 'Yellow', 'Green')
dept_vertex_colors[dept_vertex_colors == 0] = colors[1]
dept_vertex_colors[dept_vertex_colors == 1] = colors[2]
dept_vertex_colors[dept_vertex_colors == 2] = colors[3]
dept_vertex_colors[dept_vertex_colors == 3] = colors[4] 
dept_vertex_colors[dept_vertex_colors == 4] = colors[5]

# pdf("1.6_Krackhardt_Reports_Color.pdf") 
plot(krack_reports_to_only, 
    layout=reports_to_layout, 
    vertex.color=dept_vertex_colors, 
    vertex.label=NA, 
    edge.arrow.size=.5, 
    main="krack_reports_to_only")
# dev.off() 
# Now let's set the vertex size by tenure.
tenure_vertex_sizes = get.vertex.attribute(krack_full,"TENURE")

# pdf("1.7_Krackhardt_Reports_Vertex_Size.pdf") 
plot(krack_reports_to_only, 
     layout=reports_to_layout, 
     vertex.color=dept_vertex_colors, 
     vertex.label=NA, 
     edge.arrow.size=.5,
     vertex.size=tenure_vertex_sizes, main="krack_reports_to_only")
# dev.off() 
 
# Now let's incorporate additional tie types. We'll use the 
# layout generated by the reports-to ties but overlay the 
# advice and friendship ties in red and blue.

tie_type_colors = c(rgb(1,0,0,.5), rgb(0,0,1,.5), rgb(0,0,0,.5))
E(krack_full)$color[ E(krack_full)$advice_tie==1 ] = tie_type_colors[1]
E(krack_full)$color[ E(krack_full)$friendship_tie==1 ] = tie_type_colors[2]
E(krack_full)$color[ E(krack_full)$reports_to_tie==1 ] = tie_type_colors[3]
E(krack_full)$arrow.size=.5 
V(krack_full)$color = dept_vertex_colors
V(krack_full)$frame = dept_vertex_colors

# pdf("1.8_Krackhardt_Overlayed_Ties.pdf")
plot(krack_full, 
     layout=reports_to_layout, 
     vertex.color=dept_vertex_colors, 
     vertex.label=NA, 
     edge.arrow.size=.5,
     vertex.size=tenure_vertex_sizes, main="krack_full")
 
 
# Add a legend. Note that the plot window must be open for this to 
# work.
legend(1, 
       1.25,
       legend = c('Advice', 
                  'Friendship',
                  'Reports To'), 
       col = tie_type_colors, 
       lty=1,
       cex = .7)
# dev.off() 
 
# Another option for visualizing different network ties relative 
# to one another is to overlay the edges from one tie type on the 
# structure generated by another tie type. Here we can use the
# reports-to layout but show the friendship ties:

# pdf("1.9_Krackhardt_Overlayed_Structure.pdf")
plot(krack_friendship_only, 
     layout=reports_to_layout, 
     vertex.color=dept_vertex_colors, 
     vertex.label=NA, 
     edge.arrow.size=.5,
     vertex.size=tenure_vertex_sizes, 
     main='Krackhardt High-Tech Managers')
# dev.off() 
 
 
###
# 5. EXPORT THE NETWORK
###
 
# The write.graph() function exports a graph object in various
# formats readable by other programs. There is no explicit
# option for a UCINET data type, but you can export the graph
# as a Pajek object by setting the 'format' parameter to 'pajek.'
# Note that the file will appear in whichever directory is set 
# as the default in R's preferences, unless you previously 
# changed this via setwd().
write.graph(krack_full, file='krack_full.dl', format="pajek")
 
# For a more general file type (e.g., importable to Excel),
# use the "edgelist" format. Note that neither of these will
# write the attributes; only the ties are maintained.
write.graph(krack_full, file='krack_full.txt', format="edgelist")