====== Lecturer's note: MA, 2019 ====== In class small <- c(0.6739635, 1.5524619, 0.3250562, 1.2143595, 1.3107692, 2.1739663, 1.6187899, 0.8872657, 1.9170283, 0.7767406) medium <- c(10.526448, 9.205156, 11.427756, 8.53318, 9.763317, 9.806662, 9.150245, 10.058465, 9.18233, 7.949692) big <- c(99.83624, 100.70852, 99.73202, 98.53608, 100.74444, 98.58961, 100.46707, 99.88068, 100.46724, 100.49814) dframe <- data.frame(small, medium, big) fib <- c(0,1,1,2,3,5,8,13,21,34) a1 <- c(1,1,2,3,5,8,13,21) a2 <- c(2,4,5,3,6,9,18,25) - select all elements that exceeds +- 1/2 sd from the mean of the column, medium in dframe (dframe%%$%%medium). - choose only odd numbers from the variable fib. - combine a1 and a2 into a data frame, dframe2 - name a1 and a2 to x and y in dfram2 1 > mmed <- mean(dframe$medium) > smed <- sd(dframe$medium) > smed <- .5 * smed > lowmed <- mmed - smed > himed <- mmed + smed > hi <- dframe$medium > himed > low <- dframe$medium < lowmed > hi > low > res <- subset(dframe$medium, low|hi) > res [1] 10.526448 11.427756 8.533180 10.058465 7.949692 > 2 > is.odd <- fib%%2==1 > is.odd [1] FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE > fib[is.odd] [1] 1 1 3 5 13 21 > 3,4 > a1 <- c(1,1,2,3,5,8,13,21) > a2 <- c(2,4,5,3,6,9,18,25) > x <- data.frame(a1, a2) > x a1 a2 1 1 2 2 1 4 3 2 5 4 3 3 5 5 6 6 8 9 7 13 18 8 21 25 > colnames(x) <- c("x", "y") > x x y 1 1 2 2 1 4 3 2 5 4 3 3 5 5 6 6 8 9 7 13 18 8 21 25 > > colnames(x)[1] <- c("k") ====== t.test: mtcars ====== > mdata <- split(mtcars$mpg, mtcars$am) > mdata $`0` [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 [13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2 $`1` [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 [13] 21.4 > stack(mdata) values ind 1 21.4 0 2 18.7 0 3 18.1 0 4 14.3 0 5 24.4 0 6 22.8 0 7 19.2 0 8 17.8 0 9 16.4 0 10 17.3 0 11 15.2 0 12 10.4 0 13 10.4 0 14 14.7 0 15 21.5 0 16 15.5 0 17 15.2 0 18 13.3 0 19 19.2 0 20 21.0 1 21 21.0 1 22 22.8 1 23 32.4 1 24 30.4 1 25 33.9 1 26 27.3 1 27 26.0 1 28 30.4 1 29 15.8 1 30 19.7 1 31 15.0 1 32 21.4 1 > mdata $`0` [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 [13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2 $`1` [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 [13] 21.4 > t.test(mpg~am, data=mtcars) Welch Two Sample t-test data: mpg by am t = -3.7671, df = 18.332, p-value = 0.001374 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -11.280194 -3.209684 sample estimates: mean in group 0 mean in group 1 17.14737 24.39231 > t.test(mpg~am, data=mtcars, var.equal=T) Two Sample t-test data: mpg by am t = -4.1061, df = 30, p-value = 0.000285 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -10.84837 -3.64151 sample estimates: mean in group 0 mean in group 1 17.14737 24.39231 > m1 <- mdata[[1]] > m2 <- mdata[[2]] > m1 [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 [13] 10.4 14.7 21.5 15.5 15.2 13.3 19.2 > m2 [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 [13] 21.4 > m1.var <- var(m1) > m2.var <- var(m2) > m1.n <- length(m1) > m2.n <- length(m2) > m1.df <- length(m1)-1 > m2.df <- length(m2)-1 > m1.ss <- m1.var*m1.df > m2.ss <- m2.var*m2.df > m1.ss [1] 264.5874 > m2.ss [1] 456.3092 > m12.ss <- m1.ss+m2.ss > m12.ss [1] 720.8966 > m12.df <- m1.df+m2.df > pv <- m12.ss/m12.df > pv [1] 24.02989 > pv/m1.n [1] 1.264731 > pv/m2.n [1] 1.848453 > m.se <- sqrt((pv/m1.n)+(pv/m2.n)) > m.se [1] 1.764422 > m1.m <- mean(m1) > m2.m <- mean(m2) > m.tvalue <- (m1.m-m2.m)/m.se > m.tvalue [1] -4.106127 > t.test(mpg~am, data=mtcars, var.equal=T) Two Sample t-test data: mpg by am t = -4.1061, df = 30, p-value = 0.000285 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -10.84837 -3.64151 sample estimates: mean in group 0 mean in group 1 17.14737 24.39231 ====== anova: mtcars ====== stats4each = function(x,y) { meani <- tapply(x,y,mean) vari <- tapply(x,y,var) ni <- tapply(x,y,length) dfi <- tapply(x,y,length)-1 ssi <- tapply(x,y,var)*(tapply(x,y,length)-1) out <- rbind(meani,vari,ni,dfi,ssi) return(out) } library(MASS) tempd <- iris x <- tempd$Species y <- tempd$Sepal.Width tempd <- mtcars x <- tempd$gear y <- tempd$mpg tempd <- mtcars x <- tempd$am y <- tempd$mpg x <- factor(x) dfbetween <- nlevels(x)-1 stats <- stats4each(y, x) stats sswithin <- sum(stats[5,]) sstotal <- var(y)*(length(y)-1) ssbetween <- sstotal-sswithin round(sswithin,2) round(ssbetween,2) round(sstotal,2) dfwithin <- sum(stats[4,]) dftotal <- length(y)-1 dfwithin dfbetween dftotal mswithin <- sswithin / dfwithin msbetween <- ssbetween / dfbetween mstotal <- sstotal / dftotal round(mswithin,2) round(msbetween,2) round(mstotal,2) fval <- round(msbetween/mswithin,2) fval siglevel <- pf(q=fval, df1=dfbetween, df2=dfwithin, lower.tail=FALSE) siglevel mod <- aov(y~x, data=tempd) summary(mod) ====== cor ====== attach(mtcars) cor(mpg, hp) mycor <- cov(mpg,hp)/(sd(mpg)*sd(hp)) mycor sp <- cov(mpg,hp)*(length(mtcars$hp)-1) ssx <- var(mpg)*(length(mtcars$mpg)-1) ssy <- var(hp)*(length(mtcars$hp)-1) mycor2 <- sp/sqrt(ssx*ssy) mycor2 mycor2 == mycor mycor == cor(mpg,hp) mycor2 == cor(mpg,hp)