c:ma:2019:lecturer
Table of Contents
Lecturer's note: MA, 2019
In class
small <- c(0.6739635, 1.5524619, 0.3250562, 1.2143595, 1.3107692, 2.1739663, 1.6187899, 0.8872657, 1.9170283, 0.7767406) medium <- c(10.526448, 9.205156, 11.427756, 8.53318, 9.763317, 9.806662, 9.150245, 10.058465, 9.18233, 7.949692) big <- c(99.83624, 100.70852, 99.73202, 98.53608, 100.74444, 98.58961, 100.46707, 99.88068, 100.46724, 100.49814) dframe <- data.frame(small, medium, big)
fib <- c(0,1,1,2,3,5,8,13,21,34)
a1 <- c(1,1,2,3,5,8,13,21) a2 <- c(2,4,5,3,6,9,18,25)
- select all elements that exceeds +- 1/2 sd from the mean of the column, medium in dframe (dframe$medium).
- choose only odd numbers from the variable fib.
- combine a1 and a2 into a data frame, dframe2
- name a1 and a2 to x and y in dfram2
1
> mmed <- mean(dframe$medium) > smed <- sd(dframe$medium) > smed <- .5 * smed > lowmed <- mmed - smed > himed <- mmed + smed > hi <- dframe$medium > himed > low <- dframe$medium < lowmed > hi > low > res <- subset(dframe$medium, low|hi) > res [1] 10.526448 11.427756 8.533180 10.058465 7.949692 >
2
> is.odd <- fib%%2==1 > is.odd [1] FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE > fib[is.odd] [1] 1 1 3 5 13 21 >
3,4
> a1 <- c(1,1,2,3,5,8,13,21) > a2 <- c(2,4,5,3,6,9,18,25) > x <- data.frame(a1, a2) > x a1 a2 1 1 2 2 1 4 3 2 5 4 3 3 5 5 6 6 8 9 7 13 18 8 21 25
> colnames(x) <- c("x", "y")
> x
x y
1 1 2
2 1 4
3 2 5
4 3 3
5 5 6
6 8 9
7 13 18
8 21 25
>
> colnames(x)[1] <- c("k")
t.test: mtcars
> mdata <- split(mtcars$mpg, mtcars$am)
> mdata
$`0`
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
$`1`
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> stack(mdata)
values ind
1 21.4 0
2 18.7 0
3 18.1 0
4 14.3 0
5 24.4 0
6 22.8 0
7 19.2 0
8 17.8 0
9 16.4 0
10 17.3 0
11 15.2 0
12 10.4 0
13 10.4 0
14 14.7 0
15 21.5 0
16 15.5 0
17 15.2 0
18 13.3 0
19 19.2 0
20 21.0 1
21 21.0 1
22 22.8 1
23 32.4 1
24 30.4 1
25 33.9 1
26 27.3 1
27 26.0 1
28 30.4 1
29 15.8 1
30 19.7 1
31 15.0 1
32 21.4 1
> mdata
$`0`
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
$`1`
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> t.test(mpg~am, data=mtcars)
Welch Two Sample t-test
data: mpg by am
t = -3.7671, df = 18.332, p-value = 0.001374
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-11.280194 -3.209684
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
> t.test(mpg~am, data=mtcars, var.equal=T)
Two Sample t-test
data: mpg by am
t = -4.1061, df = 30, p-value = 0.000285
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-10.84837 -3.64151
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
> m1 <- mdata[[1]]
> m2 <- mdata[[2]]
> m1
[1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
> m2
[1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> m1.var <- var(m1)
> m2.var <- var(m2)
> m1.n <- length(m1)
> m2.n <- length(m2)
> m1.df <- length(m1)-1
> m2.df <- length(m2)-1
> m1.ss <- m1.var*m1.df
> m2.ss <- m2.var*m2.df
> m1.ss
[1] 264.5874
> m2.ss
[1] 456.3092
> m12.ss <- m1.ss+m2.ss
> m12.ss
[1] 720.8966
> m12.df <- m1.df+m2.df
> pv <- m12.ss/m12.df
> pv
[1] 24.02989
> pv/m1.n
[1] 1.264731
> pv/m2.n
[1] 1.848453
> m.se <- sqrt((pv/m1.n)+(pv/m2.n))
> m.se
[1] 1.764422
> m1.m <- mean(m1)
> m2.m <- mean(m2)
> m.tvalue <- (m1.m-m2.m)/m.se
> m.tvalue
[1] -4.106127
> t.test(mpg~am, data=mtcars, var.equal=T)
Two Sample t-test
data: mpg by am
t = -4.1061, df = 30, p-value = 0.000285
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-10.84837 -3.64151
sample estimates:
mean in group 0 mean in group 1
17.14737 24.39231
anova: mtcars
stats4each = function(x,y) {
meani <- tapply(x,y,mean)
vari <- tapply(x,y,var)
ni <- tapply(x,y,length)
dfi <- tapply(x,y,length)-1
ssi <- tapply(x,y,var)*(tapply(x,y,length)-1)
out <- rbind(meani,vari,ni,dfi,ssi)
return(out)
}
library(MASS)
tempd <- iris
x <- tempd$Species
y <- tempd$Sepal.Width
tempd <- mtcars
x <- tempd$gear
y <- tempd$mpg
tempd <- mtcars
x <- tempd$am
y <- tempd$mpg
x <- factor(x)
dfbetween <- nlevels(x)-1
stats <- stats4each(y, x)
stats
sswithin <- sum(stats[5,])
sstotal <- var(y)*(length(y)-1)
ssbetween <- sstotal-sswithin
round(sswithin,2)
round(ssbetween,2)
round(sstotal,2)
dfwithin <- sum(stats[4,])
dftotal <- length(y)-1
dfwithin
dfbetween
dftotal
mswithin <- sswithin / dfwithin
msbetween <- ssbetween / dfbetween
mstotal <- sstotal / dftotal
round(mswithin,2)
round(msbetween,2)
round(mstotal,2)
fval <- round(msbetween/mswithin,2)
fval
siglevel <- pf(q=fval, df1=dfbetween, df2=dfwithin, lower.tail=FALSE)
siglevel
mod <- aov(y~x, data=tempd)
summary(mod)
cor
attach(mtcars) cor(mpg, hp) mycor <- cov(mpg,hp)/(sd(mpg)*sd(hp)) mycor sp <- cov(mpg,hp)*(length(mtcars$hp)-1) ssx <- var(mpg)*(length(mtcars$mpg)-1) ssy <- var(hp)*(length(mtcars$hp)-1) mycor2 <- sp/sqrt(ssx*ssy) mycor2 mycor2 == mycor mycor == cor(mpg,hp) mycor2 == cor(mpg,hp)
c/ma/2019/lecturer.txt · Last modified: by hkimscil
