sna_and_clustering
This is an old revision of the document!
sna (social network analysis) and clustering
in R
#
library(igraph)
data <- read.csv("http://commres.net/wiki/_media/r/socialnetworkdata.csv", header=T)
head(data)
str(data)
y <- data.frame(data$first, data$second)
head(y)
# net <- graph.data.frame(y, directed=T)
net <- graph_from_data_frame(y, directed=T)
head(net)
net
V(net) # vertex in net data
who.net <- V(net) # 52/52 vertices
data.frame(who.net)
E(net) # edge info in net data
rel.net <- E(net)
rel.net # output 290/290 edges
# 52 by 290 data set
V(net)$degree <- degree(net)
degree.net <- data.frame(degree(net))
V(net)$degree
str(who.net)
who.net$name
hist(V(net)$degree)
set.seed(222)
plot(net,
vertex.color = 'lightblue',
vertext.size = 2,
edge.arrow.size = 0.1,
vertex.label.cex = 0.8)
plot(net,
vertex.color = rainbow(52),
vertex.size = V(net)$degree*0.3,
edge.arrow.size = 0.1,
layout=layout.fruchterman.reingold)
# layout layout.fruchterman.reingold
# layout.graphopt
plot(net,
vertex.color = rainbow(52),
vertex.size = V(net)$degree*0.8,
edge.arrow.size = 0.1,
layout=layout.graphopt)
plot(net,
vertex.color = rainbow(52),
vertex.size = V(net)$degree*0.4,
edge.arrow.size = 0.1,
layout=layout.kamada.kawai)
betweenness(net)
degree(net)
closeness(net)
sort(closeness(net))
hub_score(net)
hits_scores(net)
hs <- hits_scores(net)$hub # outlinks
as <- hits_scores(net)$authority # inlinks
hs
as
set.seed(123)
plot(net,
vertex.size=hs*30,
main = 'Hubs',
vertex.color = rainbow(52),
edge.arrow.size=0.1,
layout = layout.kamada.kawai)
set.seed(123)
plot(net,
vertex.size=as*30,
main = 'Authorities',
vertex.color = rainbow(52),
edge.arrow.size=0.1,
layout = layout.kamada.kawai)
net <- graph_from_data_frame(y, directed = F)
cnet <- cluster_edge_betweenness(net)
# cluster_fast_greedy(net)
# cluster_fluid_communities(net, 2)
# cluster_infomap(net)
# cluster_label_prop()
# cluster_spinglass()
# cluster.distribution()
# components(net)
infomap <- cluster_infomap(net)
c.infomap <- cluster_infomap(net)
plot(cnet,
net,
vertex.size = 10,
vertex.label.cex = 0.8)
plot(infomap,
net,
vertex.size = 10,
vertex.label.cex = 0.8)
plot(infomap,
net,
vertex.size = 10,
vertex.label.cex = 0.8)
# clustering in R
# clusterdata.csv
orgs <- read.csv("http://commres.net/wiki/_media/r/clusterdata.csv", header=T)
str(orgs)
head(orgs)
pairs(orgs[2:9])
plot(Fuel_Cost ~ Sales, data = orgs)
with(orgs, text(Fuel_Cost ~ Sales,
labels=orgs$Company,
pos=4))
# normalization (standardization)
z <- orgs[,-c(1,1)] # remove the first column
means <- apply(z, 2, mean)
sds <- apply(z, 2, sd)
nor <- scale(z, center=means, scale=sds)
distance <- dist(nor)
distance
orgs.hclust = hclust(distance)
plot(orgs.hclust)
plot(orgs.hclust,
labels = orgs$Company,
main='Default from hclust')
plot(orgs.hclust,
hang=-1,
labels=orgs$Company,
main='Default from hclust')
orgs.hclust.average<-hclust(distance, method="average")
plot(orgs.hclust.average, hang=-1,
labels=orgs$Company,
main='hclust average')
member.by.3 <- cutree(orgs.hclust, 3)
table(member.by.3)
member.by.5 <- cutree(orgs.hclust, 5)
table(member.by.5)
aggregate(nor,list(member.by.3),mean)
aggregate(nor,list(member.by.3),sd)
wss <- (nrow(nor)-1)*sum(apply(nor,2,var))
for (i in 2:20)
wss[i] <- sum(kmeans(nor, centers=i)$withinss)
plot(1:20, wss,
type="b",
xlab="Number of Clusters",
ylab="Within groups sum of squares")
# kmeans clustering
set.seed(123)
kc<-kmeans(nor, 3)
kc
install.packages("cluster")
library(cluster)
ot<-nor
datadistshortset <- dist(ot, method = "euclidean")
hc1 <- hclust(datadistshortset, method = "complete" )
pamvshortset <- pam(datadistshortset, 4, diss = FALSE)
clusplot(pamvshortset, shade = FALSE,
labels=2,
col.clus="blue",
col.p="red",
span=FALSE,
main="Cluster Mapping",
cex=1.2)
sna_and_clustering.1732157267.txt.gz · Last modified: by hkimscil
