====== Lecturer's note: MA, 2019 ====== In class

small <- c(0.6739635, 1.5524619, 0.3250562, 1.2143595, 1.3107692, 2.1739663, 1.6187899, 0.8872657, 1.9170283, 0.7767406)
medium <- c(10.526448, 9.205156, 11.427756, 8.53318, 9.763317, 9.806662, 9.150245, 10.058465, 9.18233, 7.949692)
big <- c(99.83624, 100.70852, 99.73202, 98.53608, 100.74444, 98.58961, 100.46707, 99.88068, 100.46724, 100.49814)

dframe <- data.frame(small, medium, big)

fib <- c(0,1,1,2,3,5,8,13,21,34)

a1 <- c(1,1,2,3,5,8,13,21)
a2 <- c(2,4,5,3,6,9,18,25)

- select all elements that exceeds +- 1/2 sd from the mean of the column, medium in dframe (dframe%%$%%medium). - choose only odd numbers from the variable fib. - combine a1 and a2 into a data frame, dframe2 - name a1 and a2 to x and y in dfram2 1


> mmed <- mean(dframe$medium)
> smed <- sd(dframe$medium)
> smed <- .5 * smed
> lowmed <- mmed - smed
> himed <- mmed + smed
> hi <- dframe$medium > himed
> low <- dframe$medium < lowmed
> hi
> low
> res <- subset(dframe$medium, low|hi)
> res
[1] 10.526448 11.427756  8.533180 10.058465  7.949692
>


> is.odd <- fib%%2==1
> is.odd
 [1] FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
> fib[is.odd]
[1]  1  1  3  5 13 21
>

3,4


> a1 <- c(1,1,2,3,5,8,13,21)
> a2 <- c(2,4,5,3,6,9,18,25)
> x <- data.frame(a1, a2)
> x
  a1 a2
1  1  2
2  1  4
3  2  5
4  3  3
5  5  6
6  8  9
7 13 18
8 21 25


> colnames(x) <- c("x", "y")
> x
   x  y
1  1  2
2  1  4
3  2  5
4  3  3
5  5  6
6  8  9
7 13 18
8 21 25
> 
> colnames(x)[1] <- c("k")

====== t.test: mtcars ======


> mdata <- split(mtcars$mpg, mtcars$am)
> mdata
$`0`
 [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2

$`1`
 [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4

> stack(mdata)
   values ind
1    21.4   0
2    18.7   0
3    18.1   0
4    14.3   0
5    24.4   0
6    22.8   0
7    19.2   0
8    17.8   0
9    16.4   0
10   17.3   0
11   15.2   0
12   10.4   0
13   10.4   0
14   14.7   0
15   21.5   0
16   15.5   0
17   15.2   0
18   13.3   0
19   19.2   0
20   21.0   1
21   21.0   1
22   22.8   1
23   32.4   1
24   30.4   1
25   33.9   1
26   27.3   1
27   26.0   1
28   30.4   1
29   15.8   1
30   19.7   1
31   15.0   1
32   21.4   1
> mdata
$`0`
 [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2

$`1`
 [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4

> t.test(mpg~am, data=mtcars)

	Welch Two Sample t-test

data:  mpg by am
t = -3.7671, df = 18.332, p-value = 0.001374
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -11.280194  -3.209684
sample estimates:
mean in group 0 mean in group 1 
       17.14737        24.39231 

> t.test(mpg~am, data=mtcars, var.equal=T)

	Two Sample t-test

data:  mpg by am
t = -4.1061, df = 30, p-value = 0.000285
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -10.84837  -3.64151
sample estimates:
mean in group 0 mean in group 1 
       17.14737        24.39231 

> m1 <- mdata[[1]]
> m2 <- mdata[[2]]
> m1
 [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
[13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2
> m2
 [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0
[13] 21.4
> m1.var <- var(m1)
> m2.var <- var(m2)
> m1.n <- length(m1)
> m2.n <- length(m2)
> m1.df <- length(m1)-1
> m2.df <- length(m2)-1
> m1.ss <- m1.var*m1.df
> m2.ss <- m2.var*m2.df
> m1.ss
[1] 264.5874
> m2.ss
[1] 456.3092
> m12.ss <- m1.ss+m2.ss
> m12.ss
[1] 720.8966
> m12.df <- m1.df+m2.df
> pv <- m12.ss/m12.df
> pv
[1] 24.02989
> pv/m1.n
[1] 1.264731
> pv/m2.n
[1] 1.848453
> m.se <- sqrt((pv/m1.n)+(pv/m2.n))
> m.se
[1] 1.764422
> m1.m <- mean(m1)
> m2.m <- mean(m2)
> m.tvalue <- (m1.m-m2.m)/m.se
> m.tvalue
[1] -4.106127


> t.test(mpg~am, data=mtcars, var.equal=T)

	Two Sample t-test

data:  mpg by am
t = -4.1061, df = 30, p-value = 0.000285
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -10.84837  -3.64151
sample estimates:
mean in group 0 mean in group 1 
       17.14737        24.39231

====== anova: mtcars ======


stats4each = function(x,y) {
   meani <- tapply(x,y,mean)
   vari <- tapply(x,y,var)
   ni <- tapply(x,y,length)
   dfi <- tapply(x,y,length)-1
   ssi <- tapply(x,y,var)*(tapply(x,y,length)-1)
   out <- rbind(meani,vari,ni,dfi,ssi)
   
   return(out)  
}

library(MASS)

tempd <- iris
x <- tempd$Species
y <- tempd$Sepal.Width

tempd <- mtcars
x <- tempd$gear
y <- tempd$mpg

tempd <- mtcars
x <- tempd$am
y <- tempd$mpg


x <- factor(x)
dfbetween <- nlevels(x)-1

stats <- stats4each(y, x)
stats 

sswithin <- sum(stats[5,])
sstotal <- var(y)*(length(y)-1)
ssbetween <- sstotal-sswithin

round(sswithin,2)
round(ssbetween,2)
round(sstotal,2)

dfwithin <- sum(stats[4,])
dftotal <- length(y)-1

dfwithin
dfbetween
dftotal

mswithin <- sswithin / dfwithin
msbetween <- ssbetween / dfbetween
mstotal <- sstotal / dftotal

round(mswithin,2)
round(msbetween,2)
round(mstotal,2)

fval <- round(msbetween/mswithin,2)
fval
siglevel <- pf(q=fval, df1=dfbetween, df2=dfwithin, lower.tail=FALSE)
siglevel

mod <- aov(y~x, data=tempd)
summary(mod)

====== cor ======


attach(mtcars)
cor(mpg, hp)

mycor <- cov(mpg,hp)/(sd(mpg)*sd(hp))
mycor

sp <- cov(mpg,hp)*(length(mtcars$hp)-1)
ssx <- var(mpg)*(length(mtcars$mpg)-1)
ssy <- var(hp)*(length(mtcars$hp)-1)

mycor2 <- sp/sqrt(ssx*ssy)
mycor2

mycor2 == mycor
mycor == cor(mpg,hp)
mycor2 == cor(mpg,hp)