User Tools

Site Tools


r:social_network_analysis_tutorial

This is an old revision of the document!


T1.

Dataset

star-wars-network-edges.csv
"source","target","weight"
"C-3PO","R2-D2",17
"LUKE","R2-D2",13
"OBI-WAN","R2-D2",6
"LEIA","R2-D2",5
"HAN","R2-D2",5
"CHEWBACCA","R2-D2",3
"DODONNA","R2-D2",1
"CHEWBACCA","OBI-WAN",7
"C-3PO","CHEWBACCA",5
"CHEWBACCA","LUKE",16
"CHEWBACCA","HAN",19
"CHEWBACCA","LEIA",11
"CHEWBACCA","DARTH VADER",1
"CHEWBACCA","DODONNA",1
"CAMIE","LUKE",2
"BIGGS","CAMIE",2
"BIGGS","LUKE",4
"DARTH VADER","LEIA",1
"BERU","LUKE",3
"BERU","OWEN",3
"BERU","C-3PO",2
"LUKE","OWEN",3
"C-3PO","LUKE",18
"C-3PO","OWEN",2
"C-3PO","LEIA",6
"LEIA","LUKE",17
"BERU","LEIA",1
"LUKE","OBI-WAN",19
"C-3PO","OBI-WAN",6
"LEIA","OBI-WAN",1
"MOTTI","TARKIN",2
"DARTH VADER","MOTTI",1
"DARTH VADER","TARKIN",7
"HAN","OBI-WAN",9
"HAN","LUKE",26
"GREEDO","HAN",1
"HAN","JABBA",1
"C-3PO","HAN",6
"LEIA","MOTTI",1
"LEIA","TARKIN",1
"HAN","LEIA",13
"DARTH VADER","OBI-WAN",1
"DODONNA","GOLD LEADER",1
"DODONNA","WEDGE",1
"DODONNA","LUKE",1
"GOLD LEADER","WEDGE",1
"GOLD LEADER","LUKE",1
"LUKE","WEDGE",2
"BIGGS","LEIA",1
"LEIA","RED LEADER",1
"LUKE","RED LEADER",3
"BIGGS","RED LEADER",3
"BIGGS","C-3PO",1
"C-3PO","RED LEADER",1
"RED LEADER","WEDGE",3
"GOLD LEADER","RED LEADER",1
"BIGGS","WEDGE",2
"RED LEADER","RED TEN",1
"BIGGS","GOLD LEADER",1
"LUKE","RED TEN",1
star-wars-network-nodes.csv
"name","id"
"R2-D2",0
"CHEWBACCA",1
"C-3PO",2
"LUKE",3
"DARTH VADER",4
"CAMIE",5
"BIGGS",6
"LEIA",7
"BERU",8
"OWEN",9
"OBI-WAN",10
"MOTTI",11
"TARKIN",12
"HAN",13
"GREEDO",14
"JABBA",15
"DODONNA",16
"GOLD LEADER",17
"WEDGE",18
"RED LEADER",19
"RED TEN",20
"GOLD FIVE",21

Analysis

t.e <- read.csv("http://commres.net/wiki/_export/code/r/social_network_analysis_tutorial?codeblock=0", sep = ",")
head(t.e)
t.n <- read.csv("http://commres.net/wiki/_export/code/r/social_network_analysis_tutorial?codeblock=1", sep = ",")
head(t.n)
> t.e <- read.csv("http://commres.net/wiki/_export/code/r/social_network_analysis_tutorial?codeblock=0", sep = ",")
> head(t.e)
     source target weight
1     C-3PO  R2-D2     17
2      LUKE  R2-D2     13
3   OBI-WAN  R2-D2      6
4      LEIA  R2-D2      5
5       HAN  R2-D2      5
6 CHEWBACCA  R2-D2      3
>
> t.n <- read.csv("http://commres.net/wiki/_export/code/r/social_network_analysis_tutorial?codeblock=1", sep = ",")
> head(t.n)
         name id
1       R2-D2  0
2   CHEWBACCA  1
3       C-3PO  2
4        LUKE  3
5 DARTH VADER  4
6       CAMIE  5
> 
library(igraph)
g <- graph_from_data_frame(d=t.e, vertices=t.n, directed=FALSE)
g
> library(igraph)
> g <- graph_from_data_frame(d=t.e, vertices=t.n, directed=FALSE)
> g
IGRAPH 9b91a04 UNW- 22 60 -- 
+ attr: name (v/c), id (v/n), weight (e/n)
+ edges from 9b91a04 (vertex names):
 [1] R2-D2      --C-3PO       R2-D2      --LUKE        R2-D2      --OBI-WAN    
 [4] R2-D2      --LEIA        R2-D2      --HAN         R2-D2      --CHEWBACCA  
 [7] R2-D2      --DODONNA     CHEWBACCA  --OBI-WAN     CHEWBACCA  --C-3PO      
[10] CHEWBACCA  --LUKE        CHEWBACCA  --HAN         CHEWBACCA  --LEIA       
[13] CHEWBACCA  --DARTH VADER CHEWBACCA  --DODONNA     LUKE       --CAMIE      
[16] CAMIE      --BIGGS       LUKE       --BIGGS       DARTH VADER--LEIA       
[19] LUKE       --BERU        BERU       --OWEN        C-3PO      --BERU       
[22] LUKE       --OWEN        C-3PO      --LUKE        C-3PO      --OWEN       
+ ... omitted several edges
> 

IGRAPH 9b91a04 UNW- 22 60 --
+ attr: name (v/c), id (v/n), weight (e/n)
+ edges from 9b91a04 (vertex names):

Vertices = Nodes

IGRAPH ID(#)
U: Undirected
N: Named graph
W: Weighted graph
name (v/c): name = vetices(nodes) graph, character
id(v/n): id = vertices(nodes) graph, numeric
weight(e/n): weight = edge graph, numeric

V(g)        # print nodes
> V(g)        # print nodes
+ 22/22 vertices, named, from 9b91a04:
 [1] R2-D2       CHEWBACCA   C-3PO       LUKE        DARTH VADER CAMIE      
 [7] BIGGS       LEIA        BERU        OWEN        OBI-WAN     MOTTI      
[13] TARKIN      HAN         GREEDO      JABBA       DODONNA     GOLD LEADER
[19] WEDGE       RED LEADER  RED TEN     GOLD FIVE  
> V(g)$name     # print name attributes in nodes
 [1] "R2-D2"       "CHEWBACCA"   "C-3PO"       "LUKE"        "DARTH VADER"
 [6] "CAMIE"       "BIGGS"       "LEIA"        "BERU"        "OWEN"       
[11] "OBI-WAN"     "MOTTI"       "TARKIN"      "HAN"         "GREEDO"     
[16] "JABBA"       "DODONNA"     "GOLD LEADER" "WEDGE"       "RED LEADER" 
[21] "RED TEN"     "GOLD FIVE"  
> V(g)$id       # id attributes of the nodes
 [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21
> 
vertex_attr(g) # all attributes of the nodes
> vertex_attr(g) # all attributes of the nodes
$name
 [1] "R2-D2"       "CHEWBACCA"   "C-3PO"       "LUKE"        "DARTH VADER"
 [6] "CAMIE"       "BIGGS"       "LEIA"        "BERU"        "OWEN"       
[11] "OBI-WAN"     "MOTTI"       "TARKIN"      "HAN"         "GREEDO"     
[16] "JABBA"       "DODONNA"     "GOLD LEADER" "WEDGE"       "RED LEADER" 
[21] "RED TEN"     "GOLD FIVE"  

$id
 [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21

> 
E(g) # print edges
> E(g) # edges
+ 60/60 edges from 9b91a04 (vertex names):
 [1] R2-D2      --C-3PO       R2-D2      --LUKE        R2-D2      --OBI-WAN    
 [4] R2-D2      --LEIA        R2-D2      --HAN         R2-D2      --CHEWBACCA  
 [7] R2-D2      --DODONNA     CHEWBACCA  --OBI-WAN     CHEWBACCA  --C-3PO      
[10] CHEWBACCA  --LUKE        CHEWBACCA  --HAN         CHEWBACCA  --LEIA       
[13] CHEWBACCA  --DARTH VADER CHEWBACCA  --DODONNA     LUKE       --CAMIE      
[16] CAMIE      --BIGGS       LUKE       --BIGGS       DARTH VADER--LEIA       
[19] LUKE       --BERU        BERU       --OWEN        C-3PO      --BERU       
[22] LUKE       --OWEN        C-3PO      --LUKE        C-3PO      --OWEN       
[25] C-3PO      --LEIA        LUKE       --LEIA        LEIA       --BERU       
[28] LUKE       --OBI-WAN     C-3PO      --OBI-WAN     LEIA       --OBI-WAN    
+ ... omitted several edges
> 
E(g)$weight   # print weight attributes in edge
> E(g)$weight   # print weight attributes in edge
 [1] 17 13  6  5  5  3  1  7  5 16 19 11  1  1  2  2  4  1  3  3  2  3 18  2  6 17
[27]  1 19  6  1  2  1  7  9 26  1  1  6  1  1 13  1  1  1  1  1  1  2  1  1  3  3
[53]  1  1  3  1  2  1  1  1
> 
edge_attr(g) # all attributes of the edges
> edge_attr(g) # all attributes of the edges
$weight
 [1] 17 13  6  5  5  3  1  7  5 16 19 11  1  1  2  2  4  1  3  3  2  3 18  2  6 17
[27]  1 19  6  1  2  1  7  9 26  1  1  6  1  1 13  1  1  1  1  1  1  2  1  1  3  3
[53]  1  1  3  1  2  1  1  1
g[] # adjacency matrix
> g[] # adjacency matrix
22 x 22 sparse Matrix of class "dgCMatrix"
   [[ suppressing 22 column names ‘R2-D2’, ‘CHEWBACCA’, ‘C-3PO’ ... ]] ## 컬럼 id 생략
R2-D2        .  3 17 13 . . .  5 . .  6 . .  5 . . 1 . . . . .
CHEWBACCA    3  .  5 16 1 . . 11 . .  7 . . 19 . . 1 . . . . .
C-3PO       17  5  . 18 . . 1  6 2 2  6 . .  6 . . . . . 1 . .
LUKE        13 16 18  . . 2 4 17 3 3 19 . . 26 . . 1 1 2 3 1 .
DARTH VADER  .  1  .  . . . .  1 . .  1 1 7  . . . . . . . . .
CAMIE        .  .  .  2 . . 2  . . .  . . .  . . . . . . . . .
BIGGS        .  .  1  4 . 2 .  1 . .  . . .  . . . . 1 2 3 . .
LEIA         5 11  6 17 1 . 1  . 1 .  1 1 1 13 . . . . . 1 . .
BERU         .  .  2  3 . . .  1 . 3  . . .  . . . . . . . . .
OWEN         .  .  2  3 . . .  . 3 .  . . .  . . . . . . . . .
OBI-WAN      6  7  6 19 1 . .  1 . .  . . .  9 . . . . . . . .
MOTTI        .  .  .  . 1 . .  1 . .  . . 2  . . . . . . . . .
TARKIN       .  .  .  . 7 . .  1 . .  . 2 .  . . . . . . . . .
HAN          5 19  6 26 . . . 13 . .  9 . .  . 1 1 . . . . . .
GREEDO       .  .  .  . . . .  . . .  . . .  1 . . . . . . . .
JABBA        .  .  .  . . . .  . . .  . . .  1 . . . . . . . .
DODONNA      1  1  .  1 . . .  . . .  . . .  . . . . 1 1 . . .
GOLD LEADER  .  .  .  1 . . 1  . . .  . . .  . . . 1 . 1 1 . .
WEDGE        .  .  .  2 . . 2  . . .  . . .  . . . 1 1 . 3 . .
RED LEADER   .  .  1  3 . . 3  1 . .  . . .  . . . . 1 3 . 1 .
RED TEN      .  .  .  1 . . .  . . .  . . .  . . . . . . 1 . .
GOLD FIVE    .  .  .  . . . .  . . .  . . .  . . . . . . . . .
g[1,] # first row of adjacency matrix
data.frame(g[1,]) # easy on eyes
> g[1,] # first row of adjacency matrix
      R2-D2   CHEWBACCA       C-3PO        LUKE DARTH VADER       CAMIE       BIGGS        LEIA 
          0           3          17          13           0           0           0           5 
       BERU        OWEN     OBI-WAN       MOTTI      TARKIN         HAN      GREEDO       JABBA 
          0           0           6           0           0           5           0           0 
    DODONNA GOLD LEADER       WEDGE  RED LEADER     RED TEN   GOLD FIVE 
          1           0           0           0           0           0 
> 
> data.frame(g[1,])
            g.1...
R2-D2            0
CHEWBACCA        3
C-3PO           17
LUKE            13
DARTH VADER      0
CAMIE            0
BIGGS            0
LEIA             5
BERU             0
OWEN             0
OBI-WAN          6
MOTTI            0
TARKIN           0
HAN              5
GREEDO           0
JABBA            0
DODONNA          1
GOLD LEADER      0
WEDGE            0
RED LEADER       0
RED TEN          0
GOLD FIVE        0

Vis

plot(g)

par(mar=c(0,0,0,0))
plot(g)

par(mar=c(0,0,0,0))
plot(g,
     vertex.color = "grey", # change color of nodes
     vertex.label.color = "black", # change color of labels
     vertex.label.cex = .75, # change size of labels to 75% of original size
     edge.curved=.15, # add a 15% curve to the edges
     edge.color="grey20") # change edge color to grey

strength(g)
as.data.frame(strength(g))
> strength(g)
      R2-D2   CHEWBACCA       C-3PO        LUKE DARTH VADER       CAMIE       BIGGS 
         50          63          64         129          11           4          14 
       LEIA        BERU        OWEN     OBI-WAN       MOTTI      TARKIN         HAN 
         59           9           8          49           4          10          80 
     GREEDO       JABBA     DODONNA GOLD LEADER       WEDGE  RED LEADER     RED TEN 
          1           1           5           5           9          13           2 
  GOLD FIVE 
          0 
> data.frame(strength(g))
            strength.g.
R2-D2                50
CHEWBACCA            63
C-3PO                64
LUKE                129
DARTH VADER          11
CAMIE                 4
BIGGS                14
LEIA                 59
BERU                  9
OWEN                  8
OBI-WAN              49
MOTTI                 4
TARKIN               10
HAN                  80
GREEDO                1
JABBA                 1
DODONNA               5
GOLD LEADER           5
WEDGE                 9
RED LEADER           13
RED TEN               2
GOLD FIVE             0
> 
V(g)$size <- strength(g)
par(mar=c(0,0,0,0))
plot(g)

# taking the log to improve it
V(g)$size <- log(strength(g)) * 4 + 3
par(mar=c(0,0,0,0))
plot(g)

plot(g,
     vertex.color = "grey", # change color of nodes
     vertex.label.color = "black", # change color of labels
     vertex.label.cex = .75, # change size of labels to 75% of original size
     edge.curved=.25, # add a 25% curve to the edges
     edge.color="grey20") # change edge color to grey

V(g)$label <- ifelse( strength(g)>=10, V(g)$name, NA )
par(mar=c(0,0,0,0))
plot(g, 
     vertex.color = "grey", # change color of nodes
     vertex.label.color = "black", # change color of labels
     vertex.label.cex = .75, # change size of labels to 75% of original size
     edge.curved=.25, # add a 25% curve to the edges
     edge.color="grey20") # change edge color to grey

t.n

Using ifelse

t.n
> t.n
          name id
1        R2-D2  0
2    CHEWBACCA  1
3        C-3PO  2
4         LUKE  3
5  DARTH VADER  4
6        CAMIE  5
7        BIGGS  6
8         LEIA  7
9         BERU  8
10        OWEN  9
11     OBI-WAN 10
12       MOTTI 11
13      TARKIN 12
14         HAN 13
15      GREEDO 14
16       JABBA 15
17     DODONNA 16
18 GOLD LEADER 17
19       WEDGE 18
20  RED LEADER 19
21     RED TEN 20
22   GOLD FIVE 21
> 
t.n$name=="R2-D2"
> t.n$name=="R2-D2"
 [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
> 
as.data.frame(t.n$name=="R2-D2")
> as.data.frame(t.n$name=="R2-D2")
   t.n$name == "R2-D2"
1                 TRUE
2                FALSE
3                FALSE
4                FALSE
5                FALSE
6                FALSE
7                FALSE
8                FALSE
9                FALSE
10               FALSE
11               FALSE
12               FALSE
13               FALSE
14               FALSE
15               FALSE
16               FALSE
17               FALSE
18               FALSE
19               FALSE
20               FALSE
21               FALSE
22               FALSE
> 
tmp.a <- as.data.frame(t.n)
tmp.b <- as.data.frame(t.n$name=="R2-D2")
tmp <- data.frame(tmp.a, tmp.b)
tmp
> tmp.a <- as.data.frame(t.n)
> tmp.b <- as.data.frame(t.n$name=="R2-D2")
> tmp <- data.frame(tmp.a, tmp.b)
> tmp
          name id t.n.name.....R2.D2.
1        R2-D2  0                TRUE
2    CHEWBACCA  1               FALSE
3        C-3PO  2               FALSE
4         LUKE  3               FALSE
5  DARTH VADER  4               FALSE
6        CAMIE  5               FALSE
7        BIGGS  6               FALSE
8         LEIA  7               FALSE
9         BERU  8               FALSE
10        OWEN  9               FALSE
11     OBI-WAN 10               FALSE
12       MOTTI 11               FALSE
13      TARKIN 12               FALSE
14         HAN 13               FALSE
15      GREEDO 14               FALSE
16       JABBA 15               FALSE
17     DODONNA 16               FALSE
18 GOLD LEADER 17               FALSE
19       WEDGE 18               FALSE
20  RED LEADER 19               FALSE
21     RED TEN 20               FALSE
22   GOLD FIVE 21               FALSE
ifelse(t.n$name=="R2-D2", "yes", "no")
> ifelse(t.n$name=="R2-D2", "yes", "no")
 [1] "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[13] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
> 
ifelse(grepl("R", t.n$name), "yes", "no") # grep function 
> ifelse(grepl("R", t.n$name), "yes", "no")
 [1] "yes" "no"  "no"  "no"  "yes" "no"  "no"  "no"  "yes" "no"  "no"  "no" 
[13] "yes" "no"  "yes" "no"  "no"  "yes" "no"  "yes" "yes" "no" 
> 
tmp.a <- data.frame(t.n$name)
tmp.b <- data.frame(ifelse(grepl("R", t.n$name), "yes", "no"))
data.frame(tmp.a, tmp.b)
> tmp.a <- data.frame(t.n$name)
> tmp.b <- data.frame(ifelse(grepl("R", t.n$name), "yes", "no"))
> data.frame(tmp.a, tmp.b)
      t.n.name ifelse.grepl..R...t.n.name....yes....no..
1        R2-D2                                       yes
2    CHEWBACCA                                        no
3        C-3PO                                        no
4         LUKE                                        no
5  DARTH VADER                                       yes
6        CAMIE                                        no
7        BIGGS                                        no
8         LEIA                                        no
9         BERU                                       yes
10        OWEN                                        no
11     OBI-WAN                                        no
12       MOTTI                                        no
13      TARKIN                                       yes
14         HAN                                        no
15      GREEDO                                       yes
16       JABBA                                        no
17     DODONNA                                        no
18 GOLD LEADER                                       yes
19       WEDGE                                        no
20  RED LEADER                                       yes
21     RED TEN                                       yes
22   GOLD FIVE                                        no
> 

Coloring nodes

dark_side <- c("DARTH VADER", "MOTTI", "TARKIN")
light_side <- c("R2-D2", "CHEWBACCA", "C-3PO", "LUKE", "CAMIE", "BIGGS",
                "LEIA", "BERU", "OWEN", "OBI-WAN", "HAN", "DODONNA",
                "GOLD LEADER", "WEDGE", "RED LEADER", "RED TEN", "GOLD FIVE")
other <- c("GREEDO", "JABBA")
# node we'll create a new color variable as a node property
V(g)$color <- NA
V(g)$color[V(g)$name %in% dark_side] <- "red"
V(g)$color[V(g)$name %in% light_side] <- "gold"
V(g)$color[V(g)$name %in% other] <- "grey20"
vertex_attr(g)
> dark_side <- c("DARTH VADER", "MOTTI", "TARKIN")
> light_side <- c("R2-D2", "CHEWBACCA", "C-3PO", "LUKE", "CAMIE", "BIGGS",
+                 "LEIA", "BERU", "OWEN", "OBI-WAN", "HAN", "DODONNA",
+                 "GOLD LEADER", "WEDGE", "RED LEADER", "RED TEN", "GOLD FIVE")
> other <- c("GREEDO", "JABBA")
> # node we'll create a new color variable as a node property
> V(g)$color <- NA
> V(g)$color[V(g)$name %in% dark_side] <- "red"
> V(g)$color[V(g)$name %in% light_side] <- "gold"
> V(g)$color[V(g)$name %in% other] <- "grey20"
> vertex_attr(g)
$name
 [1] "R2-D2"       "CHEWBACCA"   "C-3PO"       "LUKE"        "DARTH VADER"
 [6] "CAMIE"       "BIGGS"       "LEIA"        "BERU"        "OWEN"       
[11] "OBI-WAN"     "MOTTI"       "TARKIN"      "HAN"         "GREEDO"     
[16] "JABBA"       "DODONNA"     "GOLD LEADER" "WEDGE"       "RED LEADER" 
[21] "RED TEN"     "GOLD FIVE"  

$id
 [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21

$size
 [1] 18.648092 19.572539 19.635532 22.439250 12.591581  8.545177 13.556229
 [8] 19.310150 11.788898 11.317766 18.567281  8.545177 12.210340 20.528107
[15]  3.000000  3.000000  9.437752  9.437752 11.788898 13.259797  5.772589
[22]      -Inf

$label
 [1] "R2-D2"       "CHEWBACCA"   "C-3PO"       "LUKE"        "DARTH VADER"
 [6] NA            "BIGGS"       "LEIA"        NA            NA           
[11] "OBI-WAN"     NA            "TARKIN"      "HAN"         NA           
[16] NA            NA            NA            NA            "RED LEADER" 
[21] NA            NA           

$color
 [1] "gold"   "gold"   "gold"   "gold"   "red"    "gold"   "gold"   "gold"  
 [9] "gold"   "gold"   "gold"   "red"    "red"    "gold"   "grey20" "grey20"
[17] "gold"   "gold"   "gold"   "gold"   "gold"   "gold"  

> 
par(mar=c(0,0,0,0))
plot(g)

%in%

# what does %in% do?
1 %in% c(1,2,3,4)
1 %in% c(2,3,4)
> 1 %in% c(1,2,3,4)
[1] TRUE
> 1 %in% c(2,3,4)
[1] FALSE
> 
dark_side <- c("DARTH VADER", "MOTTI", "TARKIN")
light_side <- c("R2-D2", "CHEWBACCA", "C-3PO", "LUKE", "CAMIE", "BIGGS",
                "LEIA", "BERU", "OWEN", "OBI-WAN", "HAN", "DODONNA",
                "GOLD LEADER", "WEDGE", "RED LEADER", "RED TEN", "GOLD FIVE")
other <- c("GREEDO", "JABBA")
# node we'll create a new color variable as a node property
V(g)$color <- NA # V(g)에 color라는 컬럼을 만든다
V(g)$color[V(g)$name %in% dark_side] <- "red" 
# V(g)$name이 dark_side에 있는 이름이면 color 컬럼에 "red"를 적는다.
V(g)$color[V(g)$name %in% light_side] <- "gold"
V(g)$color[V(g)$name %in% other] <- "grey20"
vertex_attr(g)
r/social_network_analysis_tutorial.1574810344.txt.gz · Last modified: 2019/11/27 08:19 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki