Class page
multivariate statistics in R
network analysis in R
https://lagunita.stanford.edu
Network Analysis in R using igraph package – from Datacamp
Marketing analysis in r statistics from Datacamp
Introduction to R and others
using theories 연구문제와 가설 and making hypotheses
Installing R
Some basics
You should be knoweldgeable about research question and hypothesis building. However, we will be deal with the issue in the class. Please read the two and 커뮤니케이션_연구문제_제기와_가설 individually. The materials will be on quizzes.
Navigating software
Assignment for all
Group assignment
ga04.making.hypothesis 가설 연습 ajoubb
probability
General Statistics
> mdata <- split(mtcars$mpg, mtcars$am)
> mdata
$`0`
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
$`1`
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> stack(mdata)
values ind
1 21.4 0
2 18.7 0
3 18.1 0
4 14.3 0
5 24.4 0
6 22.8 0
7 19.2 0
8 17.8 0
9 16.4 0
10 17.3 0
11 15.2 0
12 10.4 0
13 10.4 0
14 14.7 0
15 21.5 0
16 15.5 0
17 15.2 0
18 13.3 0
19 19.2 0
20 21.0 1
21 21.0 1
22 22.8 1
23 32.4 1
24 30.4 1
25 33.9 1
26 27.3 1
27 26.0 1
28 30.4 1
29 15.8 1
30 19.7 1
31 15.0 1
32 21.4 1
> mdata
$`0`
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
$`1`
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> t.test(mpg~am, data=mtcars)
Welch Two Sample t-test
data: mpg by am
t = -3.7671, df = 18.332, p-value = 0.001374
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-11.280194 -3.209684
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
> t.test(mpg~am, data=mtcars, var.equal=T)
Two Sample t-test
data: mpg by am
t = -4.1061, df = 30, p-value = 0.000285
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-10.84837 -3.64151
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
> m1 <- mdata[[1]]
> m2 <- mdata[[2]]
> m1
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
> m2
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> m1.var <- var(m1)
> m2.var <- var(m2)
> m1.n <- length(m1)
> m2.n <- length(m2)
> m1.df <- length(m1)-1
> m2.df <- length(m2)-1
> m1.ss <- m1.var*m1.df
> m2.ss <- m2.var*m2.df
> m1.ss
[1] 264.5874
> m2.ss
[1] 456.3092
> m12.ss <- m1.ss+m2.ss
> m12.ss
[1] 720.8966
> m12.df <- m1.df+m2.df
> pv <- m12.ss/m12.df
> pv
[1] 24.02989
> pv/m1.n
[1] 1.264731
> pv/m2.n
[1] 1.848453
> m.se <- sqrt((pv/m1.n)+(pv/m2.n))
> m.se
[1] 1.764422
> m1.m <- mean(m1)
> m2.m <- mean(m2)
> m.tvalue <- (m1.m-m2.m)/m.se
> m.tvalue
[1] -4.106127
> t.test(mpg~am, data=mtcars, var.equal=T)
Two Sample t-test
data: mpg by am
t = -4.1061, df = 30, p-value = 0.000285
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-10.84837 -3.64151
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
stats4each = function(x,y) {
meani <- tapply(x,y,mean)
vari <- tapply(x,y,var)
ni <- tapply(x,y,length)
dfi <- tapply(x,y,length)-1
ssi <- tapply(x,y,var)*(tapply(x,y,length)-1)
out <- rbind(meani,vari,ni,dfi,ssi)
return(out)
}
library(MASS)
tempd <- iris
x <- tempd$Species
y <- tempd$Sepal.Width
tempd <- mtcars
x <- tempd$gear
y <- tempd$mpg
tempd <- mtcars
x <- tempd$am
y <- tempd$mpg
x <- factor(x)
dfbetween <- nlevels(x)-1
stats <- stats4each(y, x)
stats
sswithin <- sum(stats[5,])
sstotal <- var(y)*(length(y)-1)
ssbetween <- sstotal-sswithin
round(sswithin,2)
round(ssbetween,2)
round(sstotal,2)
dfwithin <- sum(stats[4,])
dftotal <- length(y)-1
dfwithin
dfbetween
dftotal
mswithin <- sswithin / dfwithin
msbetween <- ssbetween / dfbetween
mstotal <- sstotal / dftotal
round(mswithin,2)
round(msbetween,2)
round(mstotal,2)
fval <- round(msbetween/mswithin,2)
fval
siglevel <- pf(q=fval, df1=dfbetween, df2=dfwithin, lower.tail=FALSE)
siglevel
mod <- aov(y~x, data=tempd)
summary(mod)
attach(mtcars) cor(mpg, hp) mycor <- cov(mpg,hp)/(sd(mpg)*sd(hp)) mycor sp <- cov(mpg,hp)*(length(mtcars$hp)-1) ssx <- var(mpg)*(length(mtcars$mpg)-1) ssy <- var(hp)*(length(mtcars$hp)-1) mycor2 <- sp/sqrt(ssx*ssy) mycor2 mycor2 == mycor mycor == cor(mpg,hp) mycor2 == cor(mpg,hp)
correlation
regression
multiple regression
Mid-term period
correlation
regression
multiple regression
Partial and semipartial correlation
using dummy variables
Statistical Regression Methods
Sequential Regression
social network analysis
social network analysis tutorial
sna in r
Stanford University egs.
Quiz 2 (on Friday Dec. the 6th) covers:
Some R outputs will be used to ask the related concepts and ideas (the above).
Group Presentation
Final-term covers:
correlation
regression
multiple regression
partial and semipartial correlation
using dummy variables
factor analysis
social network analysis
sna tutorial
sna in r
SNA e.g. lab 06
Some R outputs will be used to ask the related concepts and ideas (the above).