r:social_network_analysis_tutorial
                This is an old revision of the document!
Table of Contents
T1.
Dataset
- star-wars-network-edges.csv
 "source","target","weight" "C-3PO","R2-D2",17 "LUKE","R2-D2",13 "OBI-WAN","R2-D2",6 "LEIA","R2-D2",5 "HAN","R2-D2",5 "CHEWBACCA","R2-D2",3 "DODONNA","R2-D2",1 "CHEWBACCA","OBI-WAN",7 "C-3PO","CHEWBACCA",5 "CHEWBACCA","LUKE",16 "CHEWBACCA","HAN",19 "CHEWBACCA","LEIA",11 "CHEWBACCA","DARTH VADER",1 "CHEWBACCA","DODONNA",1 "CAMIE","LUKE",2 "BIGGS","CAMIE",2 "BIGGS","LUKE",4 "DARTH VADER","LEIA",1 "BERU","LUKE",3 "BERU","OWEN",3 "BERU","C-3PO",2 "LUKE","OWEN",3 "C-3PO","LUKE",18 "C-3PO","OWEN",2 "C-3PO","LEIA",6 "LEIA","LUKE",17 "BERU","LEIA",1 "LUKE","OBI-WAN",19 "C-3PO","OBI-WAN",6 "LEIA","OBI-WAN",1 "MOTTI","TARKIN",2 "DARTH VADER","MOTTI",1 "DARTH VADER","TARKIN",7 "HAN","OBI-WAN",9 "HAN","LUKE",26 "GREEDO","HAN",1 "HAN","JABBA",1 "C-3PO","HAN",6 "LEIA","MOTTI",1 "LEIA","TARKIN",1 "HAN","LEIA",13 "DARTH VADER","OBI-WAN",1 "DODONNA","GOLD LEADER",1 "DODONNA","WEDGE",1 "DODONNA","LUKE",1 "GOLD LEADER","WEDGE",1 "GOLD LEADER","LUKE",1 "LUKE","WEDGE",2 "BIGGS","LEIA",1 "LEIA","RED LEADER",1 "LUKE","RED LEADER",3 "BIGGS","RED LEADER",3 "BIGGS","C-3PO",1 "C-3PO","RED LEADER",1 "RED LEADER","WEDGE",3 "GOLD LEADER","RED LEADER",1 "BIGGS","WEDGE",2 "RED LEADER","RED TEN",1 "BIGGS","GOLD LEADER",1 "LUKE","RED TEN",1
- star-wars-network-nodes.csv
 "name","id" "R2-D2",0 "CHEWBACCA",1 "C-3PO",2 "LUKE",3 "DARTH VADER",4 "CAMIE",5 "BIGGS",6 "LEIA",7 "BERU",8 "OWEN",9 "OBI-WAN",10 "MOTTI",11 "TARKIN",12 "HAN",13 "GREEDO",14 "JABBA",15 "DODONNA",16 "GOLD LEADER",17 "WEDGE",18 "RED LEADER",19 "RED TEN",20 "GOLD FIVE",21
Analysis
t.e <- read.csv("http://commres.net/wiki/_export/code/r/social_network_analysis_tutorial?codeblock=0", sep = ",")
head(t.e)
t.n <- read.csv("http://commres.net/wiki/_export/code/r/social_network_analysis_tutorial?codeblock=1", sep = ",")
head(t.n)
> t.e <- read.csv("http://commres.net/wiki/_export/code/r/social_network_analysis_tutorial?codeblock=0", sep = ",")
> head(t.e)
     source target weight
1     C-3PO  R2-D2     17
2      LUKE  R2-D2     13
3   OBI-WAN  R2-D2      6
4      LEIA  R2-D2      5
5       HAN  R2-D2      5
6 CHEWBACCA  R2-D2      3
>
> t.n <- read.csv("http://commres.net/wiki/_export/code/r/social_network_analysis_tutorial?codeblock=1", sep = ",")
> head(t.n)
         name id
1       R2-D2  0
2   CHEWBACCA  1
3       C-3PO  2
4        LUKE  3
5 DARTH VADER  4
6       CAMIE  5
> 
library(igraph) g <- graph_from_data_frame(d=t.e, vertices=t.n, directed=FALSE) g
> library(igraph) > g <- graph_from_data_frame(d=t.e, vertices=t.n, directed=FALSE) > g IGRAPH 9b91a04 UNW- 22 60 -- + attr: name (v/c), id (v/n), weight (e/n) + edges from 9b91a04 (vertex names): [1] R2-D2 --C-3PO R2-D2 --LUKE R2-D2 --OBI-WAN [4] R2-D2 --LEIA R2-D2 --HAN R2-D2 --CHEWBACCA [7] R2-D2 --DODONNA CHEWBACCA --OBI-WAN CHEWBACCA --C-3PO [10] CHEWBACCA --LUKE CHEWBACCA --HAN CHEWBACCA --LEIA [13] CHEWBACCA --DARTH VADER CHEWBACCA --DODONNA LUKE --CAMIE [16] CAMIE --BIGGS LUKE --BIGGS DARTH VADER--LEIA [19] LUKE --BERU BERU --OWEN C-3PO --BERU [22] LUKE --OWEN C-3PO --LUKE C-3PO --OWEN + ... omitted several edges >
IGRAPH 9b91a04 UNW- 22 60 -- 
+ attr: name (v/c), id (v/n), weight (e/n)
+ edges from 9b91a04 (vertex names):
Vertices = Nodes
IGRAPH ID(#)
U: Undirected
N: Named graph
W: Weighted graph
name (v/c): name = vetices(nodes) graph, character
id(v/n): id = vertices(nodes) graph, numeric
weight(e/n): weight = edge graph, numeric
V(g) # print nodes
> V(g) # print nodes + 22/22 vertices, named, from 9b91a04: [1] R2-D2 CHEWBACCA C-3PO LUKE DARTH VADER CAMIE [7] BIGGS LEIA BERU OWEN OBI-WAN MOTTI [13] TARKIN HAN GREEDO JABBA DODONNA GOLD LEADER [19] WEDGE RED LEADER RED TEN GOLD FIVE > V(g)$name # print name attributes in nodes [1] "R2-D2" "CHEWBACCA" "C-3PO" "LUKE" "DARTH VADER" [6] "CAMIE" "BIGGS" "LEIA" "BERU" "OWEN" [11] "OBI-WAN" "MOTTI" "TARKIN" "HAN" "GREEDO" [16] "JABBA" "DODONNA" "GOLD LEADER" "WEDGE" "RED LEADER" [21] "RED TEN" "GOLD FIVE" > V(g)$id # id attributes of the nodes [1] 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 >
vertex_attr(g) # all attributes of the nodes
> vertex_attr(g) # all attributes of the nodes $name [1] "R2-D2" "CHEWBACCA" "C-3PO" "LUKE" "DARTH VADER" [6] "CAMIE" "BIGGS" "LEIA" "BERU" "OWEN" [11] "OBI-WAN" "MOTTI" "TARKIN" "HAN" "GREEDO" [16] "JABBA" "DODONNA" "GOLD LEADER" "WEDGE" "RED LEADER" [21] "RED TEN" "GOLD FIVE" $id [1] 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 >
E(g) # print edges
> E(g) # edges + 60/60 edges from 9b91a04 (vertex names): [1] R2-D2 --C-3PO R2-D2 --LUKE R2-D2 --OBI-WAN [4] R2-D2 --LEIA R2-D2 --HAN R2-D2 --CHEWBACCA [7] R2-D2 --DODONNA CHEWBACCA --OBI-WAN CHEWBACCA --C-3PO [10] CHEWBACCA --LUKE CHEWBACCA --HAN CHEWBACCA --LEIA [13] CHEWBACCA --DARTH VADER CHEWBACCA --DODONNA LUKE --CAMIE [16] CAMIE --BIGGS LUKE --BIGGS DARTH VADER--LEIA [19] LUKE --BERU BERU --OWEN C-3PO --BERU [22] LUKE --OWEN C-3PO --LUKE C-3PO --OWEN [25] C-3PO --LEIA LUKE --LEIA LEIA --BERU [28] LUKE --OBI-WAN C-3PO --OBI-WAN LEIA --OBI-WAN + ... omitted several edges >
E(g)$weight # print weight attributes in edge
> E(g)$weight # print weight attributes in edge [1] 17 13 6 5 5 3 1 7 5 16 19 11 1 1 2 2 4 1 3 3 2 3 18 2 6 17 [27] 1 19 6 1 2 1 7 9 26 1 1 6 1 1 13 1 1 1 1 1 1 2 1 1 3 3 [53] 1 1 3 1 2 1 1 1 >
edge_attr(g) # all attributes of the edges
> edge_attr(g) # all attributes of the edges $weight [1] 17 13 6 5 5 3 1 7 5 16 19 11 1 1 2 2 4 1 3 3 2 3 18 2 6 17 [27] 1 19 6 1 2 1 7 9 26 1 1 6 1 1 13 1 1 1 1 1 1 2 1 1 3 3 [53] 1 1 3 1 2 1 1 1
g[] # adjacency matrix
> g[] # adjacency matrix 22 x 22 sparse Matrix of class "dgCMatrix" [[ suppressing 22 column names ‘R2-D2’, ‘CHEWBACCA’, ‘C-3PO’ ... ]] ## 컬럼 id 생략 R2-D2 . 3 17 13 . . . 5 . . 6 . . 5 . . 1 . . . . . CHEWBACCA 3 . 5 16 1 . . 11 . . 7 . . 19 . . 1 . . . . . C-3PO 17 5 . 18 . . 1 6 2 2 6 . . 6 . . . . . 1 . . LUKE 13 16 18 . . 2 4 17 3 3 19 . . 26 . . 1 1 2 3 1 . DARTH VADER . 1 . . . . . 1 . . 1 1 7 . . . . . . . . . CAMIE . . . 2 . . 2 . . . . . . . . . . . . . . . BIGGS . . 1 4 . 2 . 1 . . . . . . . . . 1 2 3 . . LEIA 5 11 6 17 1 . 1 . 1 . 1 1 1 13 . . . . . 1 . . BERU . . 2 3 . . . 1 . 3 . . . . . . . . . . . . OWEN . . 2 3 . . . . 3 . . . . . . . . . . . . . OBI-WAN 6 7 6 19 1 . . 1 . . . . . 9 . . . . . . . . MOTTI . . . . 1 . . 1 . . . . 2 . . . . . . . . . TARKIN . . . . 7 . . 1 . . . 2 . . . . . . . . . . HAN 5 19 6 26 . . . 13 . . 9 . . . 1 1 . . . . . . GREEDO . . . . . . . . . . . . . 1 . . . . . . . . JABBA . . . . . . . . . . . . . 1 . . . . . . . . DODONNA 1 1 . 1 . . . . . . . . . . . . . 1 1 . . . GOLD LEADER . . . 1 . . 1 . . . . . . . . . 1 . 1 1 . . WEDGE . . . 2 . . 2 . . . . . . . . . 1 1 . 3 . . RED LEADER . . 1 3 . . 3 1 . . . . . . . . . 1 3 . 1 . RED TEN . . . 1 . . . . . . . . . . . . . . . 1 . . GOLD FIVE . . . . . . . . . . . . . . . . . . . . . .
g[1,] # first row of adjacency matrix data.frame(g[1,]) # easy on eyes
> g[1,] # first row of adjacency matrix
      R2-D2   CHEWBACCA       C-3PO        LUKE DARTH VADER       CAMIE       BIGGS        LEIA 
          0           3          17          13           0           0           0           5 
       BERU        OWEN     OBI-WAN       MOTTI      TARKIN         HAN      GREEDO       JABBA 
          0           0           6           0           0           5           0           0 
    DODONNA GOLD LEADER       WEDGE  RED LEADER     RED TEN   GOLD FIVE 
          1           0           0           0           0           0 
> 
> data.frame(g[1,])
            g.1...
R2-D2            0
CHEWBACCA        3
C-3PO           17
LUKE            13
DARTH VADER      0
CAMIE            0
BIGGS            0
LEIA             5
BERU             0
OWEN             0
OBI-WAN          6
MOTTI            0
TARKIN           0
HAN              5
GREEDO           0
JABBA            0
DODONNA          1
GOLD LEADER      0
WEDGE            0
RED LEADER       0
RED TEN          0
GOLD FIVE        0
Vis
plot(g)
par(mar=c(0,0,0,0)) plot(g)
par(mar=c(0,0,0,0))
plot(g,
     vertex.color = "grey", # change color of nodes
     vertex.label.color = "black", # change color of labels
     vertex.label.cex = .75, # change size of labels to 75% of original size
     edge.curved=.15, # add a 15% curve to the edges
     edge.color="grey20") # change edge color to grey
strength(g) as.data.frame(strength(g))
> strength(g)
      R2-D2   CHEWBACCA       C-3PO        LUKE DARTH VADER       CAMIE       BIGGS 
         50          63          64         129          11           4          14 
       LEIA        BERU        OWEN     OBI-WAN       MOTTI      TARKIN         HAN 
         59           9           8          49           4          10          80 
     GREEDO       JABBA     DODONNA GOLD LEADER       WEDGE  RED LEADER     RED TEN 
          1           1           5           5           9          13           2 
  GOLD FIVE 
          0 
> data.frame(strength(g))
            strength.g.
R2-D2                50
CHEWBACCA            63
C-3PO                64
LUKE                129
DARTH VADER          11
CAMIE                 4
BIGGS                14
LEIA                 59
BERU                  9
OWEN                  8
OBI-WAN              49
MOTTI                 4
TARKIN               10
HAN                  80
GREEDO                1
JABBA                 1
DODONNA               5
GOLD LEADER           5
WEDGE                 9
RED LEADER           13
RED TEN               2
GOLD FIVE             0
> 
V(g)$size <- strength(g) par(mar=c(0,0,0,0)) plot(g)
# taking the log to improve it V(g)$size <- log(strength(g)) * 4 + 3 par(mar=c(0,0,0,0)) plot(g)
plot(g,
     vertex.color = "grey", # change color of nodes
     vertex.label.color = "black", # change color of labels
     vertex.label.cex = .75, # change size of labels to 75% of original size
     edge.curved=.25, # add a 25% curve to the edges
     edge.color="grey20") # change edge color to grey
V(g)$label <- ifelse( strength(g)>=10, V(g)$name, NA )
par(mar=c(0,0,0,0))
plot(g, 
     vertex.color = "grey", # change color of nodes
     vertex.label.color = "black", # change color of labels
     vertex.label.cex = .75, # change size of labels to 75% of original size
     edge.curved=.25, # add a 25% curve to the edges
     edge.color="grey20") # change edge color to grey
t.n
Using ifelse
t.n
> t.n
          name id
1        R2-D2  0
2    CHEWBACCA  1
3        C-3PO  2
4         LUKE  3
5  DARTH VADER  4
6        CAMIE  5
7        BIGGS  6
8         LEIA  7
9         BERU  8
10        OWEN  9
11     OBI-WAN 10
12       MOTTI 11
13      TARKIN 12
14         HAN 13
15      GREEDO 14
16       JABBA 15
17     DODONNA 16
18 GOLD LEADER 17
19       WEDGE 18
20  RED LEADER 19
21     RED TEN 20
22   GOLD FIVE 21
> 
t.n$name=="R2-D2"
> t.n$name=="R2-D2" [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE >
as.data.frame(t.n$name=="R2-D2")
> as.data.frame(t.n$name=="R2-D2") t.n$name == "R2-D2" 1 TRUE 2 FALSE 3 FALSE 4 FALSE 5 FALSE 6 FALSE 7 FALSE 8 FALSE 9 FALSE 10 FALSE 11 FALSE 12 FALSE 13 FALSE 14 FALSE 15 FALSE 16 FALSE 17 FALSE 18 FALSE 19 FALSE 20 FALSE 21 FALSE 22 FALSE >
tmp.a <- as.data.frame(t.n) tmp.b <- as.data.frame(t.n$name=="R2-D2") tmp <- data.frame(tmp.a, tmp.b) tmp
> tmp.a <- as.data.frame(t.n)
> tmp.b <- as.data.frame(t.n$name=="R2-D2")
> tmp <- data.frame(tmp.a, tmp.b)
> tmp
          name id t.n.name.....R2.D2.
1        R2-D2  0                TRUE
2    CHEWBACCA  1               FALSE
3        C-3PO  2               FALSE
4         LUKE  3               FALSE
5  DARTH VADER  4               FALSE
6        CAMIE  5               FALSE
7        BIGGS  6               FALSE
8         LEIA  7               FALSE
9         BERU  8               FALSE
10        OWEN  9               FALSE
11     OBI-WAN 10               FALSE
12       MOTTI 11               FALSE
13      TARKIN 12               FALSE
14         HAN 13               FALSE
15      GREEDO 14               FALSE
16       JABBA 15               FALSE
17     DODONNA 16               FALSE
18 GOLD LEADER 17               FALSE
19       WEDGE 18               FALSE
20  RED LEADER 19               FALSE
21     RED TEN 20               FALSE
22   GOLD FIVE 21               FALSE
ifelse(t.n$name=="R2-D2", "yes", "no")
> ifelse(t.n$name=="R2-D2", "yes", "no") [1] "yes" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" [13] "no" "no" "no" "no" "no" "no" "no" "no" "no" "no" >
ifelse(grepl("R", t.n$name), "yes", "no") # grep function 
> ifelse(grepl("R", t.n$name), "yes", "no")
 [1] "yes" "no"  "no"  "no"  "yes" "no"  "no"  "no"  "yes" "no"  "no"  "no" 
[13] "yes" "no"  "yes" "no"  "no"  "yes" "no"  "yes" "yes" "no" 
> 
tmp.a <- data.frame(t.n$name)
tmp.b <- data.frame(ifelse(grepl("R", t.n$name), "yes", "no"))
data.frame(tmp.a, tmp.b)
> tmp.a <- data.frame(t.n$name)
> tmp.b <- data.frame(ifelse(grepl("R", t.n$name), "yes", "no"))
> data.frame(tmp.a, tmp.b)
      t.n.name ifelse.grepl..R...t.n.name....yes....no..
1        R2-D2                                       yes
2    CHEWBACCA                                        no
3        C-3PO                                        no
4         LUKE                                        no
5  DARTH VADER                                       yes
6        CAMIE                                        no
7        BIGGS                                        no
8         LEIA                                        no
9         BERU                                       yes
10        OWEN                                        no
11     OBI-WAN                                        no
12       MOTTI                                        no
13      TARKIN                                       yes
14         HAN                                        no
15      GREEDO                                       yes
16       JABBA                                        no
17     DODONNA                                        no
18 GOLD LEADER                                       yes
19       WEDGE                                        no
20  RED LEADER                                       yes
21     RED TEN                                       yes
22   GOLD FIVE                                        no
> 
Coloring nodes
dark_side <- c("DARTH VADER", "MOTTI", "TARKIN")
light_side <- c("R2-D2", "CHEWBACCA", "C-3PO", "LUKE", "CAMIE", "BIGGS",
                "LEIA", "BERU", "OWEN", "OBI-WAN", "HAN", "DODONNA",
                "GOLD LEADER", "WEDGE", "RED LEADER", "RED TEN", "GOLD FIVE")
other <- c("GREEDO", "JABBA")
# node we'll create a new color variable as a node property
V(g)$color <- NA
V(g)$color[V(g)$name %in% dark_side] <- "red"
V(g)$color[V(g)$name %in% light_side] <- "gold"
V(g)$color[V(g)$name %in% other] <- "grey20"
vertex_attr(g)
> dark_side <- c("DARTH VADER", "MOTTI", "TARKIN")
> light_side <- c("R2-D2", "CHEWBACCA", "C-3PO", "LUKE", "CAMIE", "BIGGS",
+                 "LEIA", "BERU", "OWEN", "OBI-WAN", "HAN", "DODONNA",
+                 "GOLD LEADER", "WEDGE", "RED LEADER", "RED TEN", "GOLD FIVE")
> other <- c("GREEDO", "JABBA")
> # node we'll create a new color variable as a node property
> V(g)$color <- NA
> V(g)$color[V(g)$name %in% dark_side] <- "red"
> V(g)$color[V(g)$name %in% light_side] <- "gold"
> V(g)$color[V(g)$name %in% other] <- "grey20"
> vertex_attr(g)
$name
 [1] "R2-D2"       "CHEWBACCA"   "C-3PO"       "LUKE"        "DARTH VADER"
 [6] "CAMIE"       "BIGGS"       "LEIA"        "BERU"        "OWEN"       
[11] "OBI-WAN"     "MOTTI"       "TARKIN"      "HAN"         "GREEDO"     
[16] "JABBA"       "DODONNA"     "GOLD LEADER" "WEDGE"       "RED LEADER" 
[21] "RED TEN"     "GOLD FIVE"  
$id
 [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21
$size
 [1] 18.648092 19.572539 19.635532 22.439250 12.591581  8.545177 13.556229
 [8] 19.310150 11.788898 11.317766 18.567281  8.545177 12.210340 20.528107
[15]  3.000000  3.000000  9.437752  9.437752 11.788898 13.259797  5.772589
[22]      -Inf
$label
 [1] "R2-D2"       "CHEWBACCA"   "C-3PO"       "LUKE"        "DARTH VADER"
 [6] NA            "BIGGS"       "LEIA"        NA            NA           
[11] "OBI-WAN"     NA            "TARKIN"      "HAN"         NA           
[16] NA            NA            NA            NA            "RED LEADER" 
[21] NA            NA           
$color
 [1] "gold"   "gold"   "gold"   "gold"   "red"    "gold"   "gold"   "gold"  
 [9] "gold"   "gold"   "gold"   "red"    "red"    "gold"   "grey20" "grey20"
[17] "gold"   "gold"   "gold"   "gold"   "gold"   "gold"  
> 
par(mar=c(0,0,0,0)) plot(g)
%in%
# what does %in% do? 1 %in% c(1,2,3,4) 1 %in% c(2,3,4)
> 1 %in% c(1,2,3,4) [1] TRUE > 1 %in% c(2,3,4) [1] FALSE >
r/social_network_analysis_tutorial.1574810201.txt.gz · Last modified:  by hkimscil
                
                







