====== Lecturer's note: MA, 2019 ======
In class
small <- c(0.6739635, 1.5524619, 0.3250562, 1.2143595, 1.3107692, 2.1739663, 1.6187899, 0.8872657, 1.9170283, 0.7767406)
medium <- c(10.526448, 9.205156, 11.427756, 8.53318, 9.763317, 9.806662, 9.150245, 10.058465, 9.18233, 7.949692)
big <- c(99.83624, 100.70852, 99.73202, 98.53608, 100.74444, 98.58961, 100.46707, 99.88068, 100.46724, 100.49814)
dframe <- data.frame(small, medium, big)
fib <- c(0,1,1,2,3,5,8,13,21,34)
a1 <- c(1,1,2,3,5,8,13,21)
a2 <- c(2,4,5,3,6,9,18,25)
- select all elements that exceeds +- 1/2 sd from the mean of the column, medium in dframe (dframe%%$%%medium).
- choose only odd numbers from the variable fib.
- combine a1 and a2 into a data frame, dframe2
- name a1 and a2 to x and y in dfram2
1
> mmed <- mean(dframe$medium)
> smed <- sd(dframe$medium)
> smed <- .5 * smed
> lowmed <- mmed - smed
> himed <- mmed + smed
> hi <- dframe$medium > himed
> low <- dframe$medium < lowmed
> hi
> low
> res <- subset(dframe$medium, low|hi)
> res
[1] 10.526448 11.427756 8.533180 10.058465 7.949692
>
2
> is.odd <- fib%%2==1
> is.odd
[1] FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE
> fib[is.odd]
[1] 1 1 3 5 13 21
>
3,4
> a1 <- c(1,1,2,3,5,8,13,21)
> a2 <- c(2,4,5,3,6,9,18,25)
> x <- data.frame(a1, a2)
> x
a1 a2
1 1 2
2 1 4
3 2 5
4 3 3
5 5 6
6 8 9
7 13 18
8 21 25
> colnames(x) <- c("x", "y")
> x
x y
1 1 2
2 1 4
3 2 5
4 3 3
5 5 6
6 8 9
7 13 18
8 21 25
>
> colnames(x)[1] <- c("k")
====== t.test: mtcars ======
> mdata <- split(mtcars$mpg, mtcars$am)
> mdata
$`0`
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
$`1`
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> stack(mdata)
values ind
1 21.4 0
2 18.7 0
3 18.1 0
4 14.3 0
5 24.4 0
6 22.8 0
7 19.2 0
8 17.8 0
9 16.4 0
10 17.3 0
11 15.2 0
12 10.4 0
13 10.4 0
14 14.7 0
15 21.5 0
16 15.5 0
17 15.2 0
18 13.3 0
19 19.2 0
20 21.0 1
21 21.0 1
22 22.8 1
23 32.4 1
24 30.4 1
25 33.9 1
26 27.3 1
27 26.0 1
28 30.4 1
29 15.8 1
30 19.7 1
31 15.0 1
32 21.4 1
> mdata
$`0`
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
$`1`
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> t.test(mpg~am, data=mtcars)
Welch Two Sample t-test
data: mpg by am
t = -3.7671, df = 18.332, p-value = 0.001374
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-11.280194 -3.209684
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
> t.test(mpg~am, data=mtcars, var.equal=T)
Two Sample t-test
data: mpg by am
t = -4.1061, df = 30, p-value = 0.000285
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-10.84837 -3.64151
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
> m1 <- mdata[[1]]
> m2 <- mdata[[2]]
> m1
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
> m2
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> m1.var <- var(m1)
> m2.var <- var(m2)
> m1.n <- length(m1)
> m2.n <- length(m2)
> m1.df <- length(m1)-1
> m2.df <- length(m2)-1
> m1.ss <- m1.var*m1.df
> m2.ss <- m2.var*m2.df
> m1.ss
[1] 264.5874
> m2.ss
[1] 456.3092
> m12.ss <- m1.ss+m2.ss
> m12.ss
[1] 720.8966
> m12.df <- m1.df+m2.df
> pv <- m12.ss/m12.df
> pv
[1] 24.02989
> pv/m1.n
[1] 1.264731
> pv/m2.n
[1] 1.848453
> m.se <- sqrt((pv/m1.n)+(pv/m2.n))
> m.se
[1] 1.764422
> m1.m <- mean(m1)
> m2.m <- mean(m2)
> m.tvalue <- (m1.m-m2.m)/m.se
> m.tvalue
[1] -4.106127
> t.test(mpg~am, data=mtcars, var.equal=T)
Two Sample t-test
data: mpg by am
t = -4.1061, df = 30, p-value = 0.000285
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-10.84837 -3.64151
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
====== anova: mtcars ======
stats4each = function(x,y) {
meani <- tapply(x,y,mean)
vari <- tapply(x,y,var)
ni <- tapply(x,y,length)
dfi <- tapply(x,y,length)-1
ssi <- tapply(x,y,var)*(tapply(x,y,length)-1)
out <- rbind(meani,vari,ni,dfi,ssi)
return(out)
}
library(MASS)
tempd <- iris
x <- tempd$Species
y <- tempd$Sepal.Width
tempd <- mtcars
x <- tempd$gear
y <- tempd$mpg
tempd <- mtcars
x <- tempd$am
y <- tempd$mpg
x <- factor(x)
dfbetween <- nlevels(x)-1
stats <- stats4each(y, x)
stats
sswithin <- sum(stats[5,])
sstotal <- var(y)*(length(y)-1)
ssbetween <- sstotal-sswithin
round(sswithin,2)
round(ssbetween,2)
round(sstotal,2)
dfwithin <- sum(stats[4,])
dftotal <- length(y)-1
dfwithin
dfbetween
dftotal
mswithin <- sswithin / dfwithin
msbetween <- ssbetween / dfbetween
mstotal <- sstotal / dftotal
round(mswithin,2)
round(msbetween,2)
round(mstotal,2)
fval <- round(msbetween/mswithin,2)
fval
siglevel <- pf(q=fval, df1=dfbetween, df2=dfwithin, lower.tail=FALSE)
siglevel
mod <- aov(y~x, data=tempd)
summary(mod)
====== cor ======
attach(mtcars)
cor(mpg, hp)
mycor <- cov(mpg,hp)/(sd(mpg)*sd(hp))
mycor
sp <- cov(mpg,hp)*(length(mtcars$hp)-1)
ssx <- var(mpg)*(length(mtcars$mpg)-1)
ssy <- var(hp)*(length(mtcars$hp)-1)
mycor2 <- sp/sqrt(ssx*ssy)
mycor2
mycor2 == mycor
mycor == cor(mpg,hp)
mycor2 == cor(mpg,hp)