User Tools

Site Tools


c:ms:2025:schedule:w11.lecture.note

You should read correlation, regression, multiple regression, and deriviation of a and b in a simple regression as well.

Covariance

Variance and Covariance
\begin{eqnarray} \text{Var (X, X)} & = & \displaystyle \frac {SS}{df} \nonumber \\ & = & \displaystyle \frac {\sum{(\text{error})^2}}{n-1} \nonumber \\ & = & \displaystyle \frac {\sum_{i=1}^{n} {(X_i-\overline{X}}) (X_i-\overline{X}) } {n-1} \\ \text{Cov (X, Y)} & = & \displaystyle \frac {SP}{df} \nonumber \\ & = & \displaystyle \frac {\sum{(\text{product of errors from each mean})}}{n-1} \nonumber \\ & = & \displaystyle \frac {\sum_{i=1}^{n} {(X_i-\overline{X}}) (Y_i-\overline{Y}) } {n-1} \\ \end{eqnarray}

Note that

  • covariance value can be negative.
  • covariance value can be affected by the measurement unit
    • e.g. Y = 억단위 투자금 vs Y = 원단위 투자금
    • 큰 의미 없이 covariance 값이 달라진다.
  • 이를 해결하기 위해서
    • covariance value / each x and y unit measurement (sd)

Correlation

\begin{eqnarray} \text{correlation, r} & = & \frac {\text{Cov (X,Y)}} {sd(X) * sd(Y)} \\ & = & \frac {\text{SP (X,Y)} }{ \sqrt{\text{SS.x}*\text{SS.y}}} \nonumber \end{eqnarray}

F-test (anova)

for reminding
\begin{eqnarray*} \text{F} & = & \frac {\text{MS}_\text{between group}} {\text{MS}_\text{within group}} \\ & = & \frac {\text{Difference due to the treatment (IV) } }{ \text{Difference due to random error} } \\ & = & \frac {\text{Difference explained by IV } }{ \text{Difference remained random} } \\ \text{R}^\text{2} & = & \frac {\text{SS}_\text{between}} {\text{SS}_\text{total} } \hfill \\ \end{eqnarray*}

Check in R

# data prep
rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } 
set.seed(191)
n.x <- 40 
m.x <- 100
s.x <- 10
x <- rnorm2(n.x, m.x, s.x)
x

# y has a linear relationship with x
a <- 2
b <- -10
n.y <- n.x 
re <- rnorm(n.y, 0, 20)
y <- (a*x) + b + re
y

# covariance x and y
cov(x,y)
sd(x)
sd(y)
# correlation coefficient 
cov(x,y)/(sd(x)*sd(y))
cor(x,y)

n.x
df.x <- n.x-1
df.y <- df.x
sp.xy <- cov(x,y) * df.x  # 혹은 아래처럼 구할 수 있다
sp.xy2 <- sum((x-m.x)*(y-m.y))
sp.xy == sp.xy2

ss.x <- var(x) * df.x
ss.y <- var(y) * df.y
sp.xy/sqrt(ss.x*ss.y)
cor(x,y)

Check in R: output

> 
> # data prep
> rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } 
> set.seed(191)
> n.x <- 40 
> m.x <- 100
> s.x <- 10
> x <- rnorm2(n.x, m.x, s.x)
> x
           [,1]
 [1,]  99.35817
 [2,] 107.72169
 [3,] 111.04574
 [4,] 100.65968
 [5,] 103.85350
 [6,] 122.91296
 [7,] 102.07927
 [8,]  96.92756
 [9,] 115.96195
[10,]  94.24901
[11,]  97.30033
[12,] 110.23739
[13,] 114.73077
[14,] 102.28326
[15,] 112.89297
[16,]  96.12475
[17,] 100.23641
[18,]  88.18567
[19,]  94.75283
[20,] 110.71928
[21,] 105.81396
[22,]  99.89274
[23,] 104.14215
[24,] 103.82149
[25,]  97.85588
[26,] 104.26732
[27,]  90.79210
[28,]  86.64193
[29,]  91.08626
[30,]  77.77150
[31,] 116.52461
[32,]  99.52034
[33,]  85.34661
[34,]  96.14854
[35,] 100.53925
[36,]  95.45190
[37,]  77.35589
[38,] 100.05830
[39,]  92.06457
[40,]  92.67148
attr(,"scaled:center")
[1] -0.08680858
attr(,"scaled:scale")
[1] 1.039345
> 
> # y has a linear relationship with x
> a <- 2
> b <- -10
> n.y <- n.x 
> re <- rnorm(n.y, 0, 20)
> y <- (a*x) + b + re
> y
          [,1]
 [1,] 175.7367
 [2,] 200.0202
 [3,] 241.0439
 [4,] 166.0401
 [5,] 195.5682
 [6,] 210.8411
 [7,] 181.9459
 [8,] 193.8544
 [9,] 225.4417
[10,] 182.5276
[11,] 194.5656
[12,] 232.4421
[13,] 255.3070
[14,] 209.0313
[15,] 219.1602
[16,] 181.7819
[17,] 214.3124
[18,] 187.2961
[19,] 207.3581
[20,] 235.0318
[21,] 199.2244
[22,] 208.1031
[23,] 182.2575
[24,] 201.4683
[25,] 169.6124
[26,] 173.7941
[27,] 161.0332
[28,] 152.1292
[29,] 153.4874
[30,] 147.7652
[31,] 189.2685
[32,] 188.5691
[33,] 155.5620
[34,] 172.8731
[35,] 184.9119
[36,] 183.3182
[37,] 119.8266
[38,] 162.6349
[39,] 170.0872
[40,] 150.7684
attr(,"scaled:center")
[1] -0.08680858
attr(,"scaled:scale")
[1] 1.039345
> cov(x,y)
         [,1]
[1,] 222.3867
> sd(x)
[1] 10
> sd(y)
[1] 28.35979
> cov(x,y)/(sd(x)*sd(y))
          [,1]
[1,] 0.7841619
> cor(x,y)
          [,1]
[1,] 0.7841619
> 
> n.x
[1] 40
> df.x <- n.x-1
> df.y <- df.x
> sp.xy <- cov(x,y) * df.x
> ss.x <- var(x) * df.x
> ss.y <- var(y) * df.y
> sp.xy/sqrt(ss.x*ss.y)
          [,1]
[1,] 0.7841619
> cor(x,y)
          [,1]
[1,] 0.7841619
> 
> 

code

rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } 
set.seed(191)
n.x <- 40
m.x <- 100
s.x <- 10
x <- rnorm2(n.x, m.x, s.x)
x
ss.x <- sum((m.x - x)^2)
df.x <- n.x - 1
ss.x/df.x
var(x)

# linear relationship
a <- 2
b <- -10
n.y <- n.x 
re <- rnorm(n.y, 0, 20)
y <- (a*x) + b + re
y

m.y <- mean(y)
df.y <- n.y - 1
ss.y <- sum((y-m.y)^2)
ss.y/df.y
var(y)

#
prod <- (x-m.x)*(y-m.y)
sp <- sum(prod)
sp/df.y # covariance 값 
cov(x,y) # covariance 펑션

s.x <- sd(x) # x변인에 대한 standard deviation 값
s.y <- sd(y) # y
(sp/df.y)/(s.x*s.y) # correlation 값
cor(x,y) # correlation 펑션

df.y 
df.x

cov(x,y)/(s.x*s.y)
# covariance를 각각의 sd값을 곱한여 나눠 준 값이 r
(sp/df.y)/sqrt((ss.x/df.x)*(ss.y/df.y)) 
# 분모 분자에서 n-1을 제거하고 보면
sp/sqrt(ss.x*ss.y) 
cor(x,y)

# graph with ggplot2
df <- data.frame(x,y)
library(ggplot2)
ggplot(data=df, aes(x,y)) +
  geom_point(color="blue", size=1.5, pch=1.5) +
  geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth", color="red", size=1)

# note that the red line is y.hat = a + bx, 
# where error value y.hat - y is minimized

lm.df <- lm(y~x, data = df)
summary(lm.df)
a <- summary(lm.df)$coefficients[1]
b <- summary(lm.df)$coefficients[2]
a
b
# how to derive a and b 
# how to derive coefficient values 
# see wiki doc at 
# http://commres.net/wiki/deriviation_of_a_and_b_in_a_simple_regression
b2 <- sp/ss.x
b3 <- cov(x,y)/var(x,x) # b2 == b3
# m.y = a + (b*m.x) 이므로
a2 <- m.y - (b2*m.x)

b3
b2
b

a2
a

#
# reg, res, total error 
# 
y.hat <- a+(b*x)
y.hat2 <- predict(lm.df)
head(y)
head(y.hat) 
m.y

tmp <- data.frame(x, y, y.hat)
head(tmp)
plot(x,y) # 원래 데이터
plot(x,y.hat) # 리그레션라인 예측선
plot(x,tmp$m.y) # 평균 예측선

error <- y - y.hat
explained <- y.hat - m.y
tmp2 <- data.frame(tmp, explained, error)
head(tmp2)

head(y.hat == explained + m.y)
head(y == explained + error + m.y)

# note the below
plot(error)
hist(error)
plot(x,error)
cor(x,error)
################
plot(x,explained)
cor(x,explained)
plot(x,(explained+m.y))
cor(x,(explained+m.y))

head(explained+error)
plot(x,(explained+error))
head(explained+error+m.y == y)
plot(x,(explained+error+m.y))

cor(x,(explained+error))
cor(x,(explained+error+m.y))
cor(x,y)
# see this also
round(cor(tmp2),3)
###############

# sum of square values 
ss.res <- sum(error^2)
ss.reg <- sum(explained^2)
ss.y
ss.res
ss.reg
ss.res + ss.reg

# degrees of freedom values
k <- 2 # number of parameters
df.reg <- k - 1 # number of parameter estimation a and b (slope)
df.res <- n - df.reg - 1
ms.res <- ss.res/df.res
ms.reg <- ss.reg/df.reg

ss.reg
df.reg
ms.reg 

ss.res
df.res
ms.res

# r square 
ss.y
ss.reg
r.sq <- ss.reg/ss.y
r.sq
# 위의 r.sq 값이 충분히 컸는지를 알아보는 것은
# ss.reg가 충분히 컸는지를 알아보는 것
# 이는 IV, x가 y를 설명하는데 충분히 기여했는지
# 테스트하는 것. 이는 F-test를 이용해서 하게 된다
# F = MS.bet / MS.within
# regression의 경우는 F = MS.reg / MS.res
f.cal <- ms.reg/ms.res
f.cal
pf(f.cal, df.reg, df.res, lower.tail = F)
# check anova test 
anova(lm.df)
summary(lm.df)

# standard error for b coefficient
# b coeffficient 값은 estimation 
# n.y 개의 데이터에서 구한 coefficient값 b는 
# 실제 x, y 모집단을 대표하는 b값은 위의
# estimation에서 standard error값을 구간으로 갖는
# 범위일 것. 

res <- resid(lm.df)
data.frame(head(res))
data.frame(head(error))
se.b <- sqrt(ms.res/ss.x)
se.b
# as x increased by 1 unit, y would increase b +- 2se.b
b + c(-2*se.b, 2*se.b) 
b

# to be exact 
t.crit <- qt(.975, df.res)
t.crit
b + c(-t.crit*se.b, t.crit*se.b)
b

t.cal <- lm.df$coefficients[2]/se.b
t.cal


pt(t.cal, df.res, lower.tail = F)*2
pf(f.cal, df.reg, df.res, lower.tail = F)

# see also
t.cal
t.cal^2
f.cal

code output

> rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } 
> set.seed(191)
> n.x <- 40
> m.x <- 100
> s.x <- 10
> x <- rnorm2(n.x, m.x, s.x)
> x
           [,1]
 [1,]  99.35817
 [2,] 107.72169
 [3,] 111.04574
 [4,] 100.65968
 [5,] 103.85350
 [6,] 122.91296
 [7,] 102.07927
 [8,]  96.92756
 [9,] 115.96195
[10,]  94.24901
[11,]  97.30033
[12,] 110.23739
[13,] 114.73077
[14,] 102.28326
[15,] 112.89297
[16,]  96.12475
[17,] 100.23641
[18,]  88.18567
[19,]  94.75283
[20,] 110.71928
[21,] 105.81396
[22,]  99.89274
[23,] 104.14215
[24,] 103.82149
[25,]  97.85588
[26,] 104.26732
[27,]  90.79210
[28,]  86.64193
[29,]  91.08626
[30,]  77.77150
[31,] 116.52461
[32,]  99.52034
[33,]  85.34661
[34,]  96.14854
[35,] 100.53925
[36,]  95.45190
[37,]  77.35589
[38,] 100.05830
[39,]  92.06457
[40,]  92.67148
attr(,"scaled:center")
[1] -0.08680858
attr(,"scaled:scale")
[1] 1.039345
> ss.x <- sum((m.x - x)^2)
> df.x <- n.x - 1
> ss.x/df.x
[1] 100
> var(x)
     [,1]
[1,]  100
> 
> # linear relationship
> a <- 2
> b <- -10
> n.y <- n.x 
> re <- rnorm(n.y, 0, 20)
> y <- (a*x) + b + re
> y
          [,1]
 [1,] 175.7367
 [2,] 200.0202
 [3,] 241.0439
 [4,] 166.0401
 [5,] 195.5682
 [6,] 210.8411
 [7,] 181.9459
 [8,] 193.8544
 [9,] 225.4417
[10,] 182.5276
[11,] 194.5656
[12,] 232.4421
[13,] 255.3070
[14,] 209.0313
[15,] 219.1602
[16,] 181.7819
[17,] 214.3124
[18,] 187.2961
[19,] 207.3581
[20,] 235.0318
[21,] 199.2244
[22,] 208.1031
[23,] 182.2575
[24,] 201.4683
[25,] 169.6124
[26,] 173.7941
[27,] 161.0332
[28,] 152.1292
[29,] 153.4874
[30,] 147.7652
[31,] 189.2685
[32,] 188.5691
[33,] 155.5620
[34,] 172.8731
[35,] 184.9119
[36,] 183.3182
[37,] 119.8266
[38,] 162.6349
[39,] 170.0872
[40,] 150.7684
attr(,"scaled:center")
[1] -0.08680858
attr(,"scaled:scale")
[1] 1.039345
> 
> m.y <- mean(y)
> df.y <- n.y - 1
> ss.y <- sum((y-m.y)^2)
> ss.y/df.y
[1] 804.2779
> var(y)
         [,1]
[1,] 804.2779
> 
> #
> prod <- (x-m.x)*(y-m.y)
> sp <- sum(prod)
> sp/df.y # covariance 값 
[1] 222.3867
> cov(x,y) # covariance 펑션
         [,1]
[1,] 222.3867
> 
> s.x <- sd(x) # x변인에 대한 standard deviation 값
> s.y <- sd(y) # y
> (sp/df.y)/(s.x*s.y) # correlation 값
[1] 0.7841619
> cor(x,y) # correlation 펑션
          [,1]
[1,] 0.7841619
> 
> df.y 
[1] 39
> df.x
[1] 39
> 
> cov(x,y)/(s.x*s.y)
          [,1]
[1,] 0.7841619
> # covariance를 각각의 sd값을 곱한여 나눠 준 값이 r
> (sp/df.y)/sqrt((ss.x/df.x)*(ss.y/df.y)) 
[1] 0.7841619
> # 분모 분자에서 n-1을 제거하고 보면
> sp/sqrt(ss.x*ss.y) 
[1] 0.7841619
> cor(x,y)
          [,1]
[1,] 0.7841619
> 
> # graph with ggplot2
> df <- data.frame(x,y)
> library(ggplot2)
> ggplot(data=df, aes(x,y)) +
+   geom_point(color="blue", size=1.5, pch=1.5) +
+   geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") +
+   stat_smooth(method = "lm",
+               formula = y ~ x,
+               geom = "smooth", color="red", size=1)
> 
> # note that the red line is y.hat = a + bx, 
> # where error value y.hat - y is minimized
> 
> lm.df <- lm(y~x, data = df)
> summary(lm.df)

Call:
lm(formula = y ~ x, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-35.880 -11.932  -0.458  12.199  34.148 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -33.9867    28.6879  -1.185    0.243    
x             2.2239     0.2855   7.790 2.16e-09 ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 17.83 on 38 degrees of freedom
Multiple R-squared:  0.6149,	Adjusted R-squared:  0.6048 
F-statistic: 60.68 on 1 and 38 DF,  p-value: 2.157e-09

> a <- summary(lm.df)$coefficients[1]
> b <- summary(lm.df)$coefficients[2]
> a
[1] -33.98667
> b
[1] 2.223867
> # how to derive a and b 
> # how to derive coefficient values 
> # see wiki doc at 
> # http://commres.net/wiki/deriviation_of_a_and_b_in_a_simple_regression
> b2 <- sp/ss.x
> b3 <- cov(x,y)/var(x,x) # b2 == b3
> # m.y = a + (b*m.x) 이므로
> a2 <- m.y - (b2*m.x)
> 
> b3
         [,1]
[1,] 2.223867
> b2
[1] 2.223867
> b
[1] 2.223867
> 
> a2
[1] -33.98667
> a
[1] -33.98667
> 
> #
> # reg, res, total error 
> # 
> y.hat <- a+(b*x)
> y.hat2 <- predict(lm.df)
> head(y)
         [,1]
[1,] 175.7367
[2,] 200.0202
[3,] 241.0439
[4,] 166.0401
[5,] 195.5682
[6,] 210.8411
> head(y.hat) 
         [,1]
[1,] 186.9727
[2,] 205.5720
[3,] 212.9643
[4,] 189.8671
[5,] 196.9697
[6,] 239.3554
> m.y
[1] 188.4
> 
> tmp <- data.frame(x, y, y.hat)
> head(tmp)
          x        y    y.hat
1  99.35817 175.7367 186.9727
2 107.72169 200.0202 205.5720
3 111.04574 241.0439 212.9643
4 100.65968 166.0401 189.8671
5 103.85350 195.5682 196.9697
6 122.91296 210.8411 239.3554
> plot(x,y) # 원래 데이터
> plot(x,y.hat) # 리그레션라인 예측선
> plot(x,tmp$m.y) # 평균 예측선
> 
> error <- y - y.hat
> explained <- y.hat - m.y
> tmp2 <- data.frame(tmp, explained, error)
> head(tmp2)
          x        y    y.hat explained      error
1  99.35817 175.7367 186.9727 -1.427352 -11.235956
2 107.72169 200.0202 205.5720 17.172003  -5.551819
3 111.04574 241.0439 212.9643 24.564266  28.079646
4 100.65968 166.0401 189.8671  1.467035 -23.826982
5 103.85350 195.5682 196.9697  8.569671  -1.401474
6 122.91296 210.8411 239.3554 50.955365 -28.514250
> 
> head(y.hat == explained + m.y)
     [,1]
[1,] TRUE
[2,] TRUE
[3,] TRUE
[4,] TRUE
[5,] TRUE
[6,] TRUE
> head(y == explained + error + m.y)
     [,1]
[1,] TRUE
[2,] TRUE
[3,] TRUE
[4,] TRUE
[5,] TRUE
[6,] TRUE
> 
> # note the below
> plot(error)
> hist(error)
> plot(x,error)
> cor(x,error)
             [,1]
[1,] 7.981058e-17
> ################
> plot(x,explained)
> cor(x,explained)
     [,1]
[1,]    1
> plot(x,(explained+m.y))
> cor(x,(explained+m.y))
     [,1]
[1,]    1
> 
> head(explained+error)
           [,1]
[1,] -12.663308
[2,]  11.620185
[3,]  52.643911
[4,] -22.359948
[5,]   7.168197
[6,]  22.441115
> plot(x,(explained+error))
> head(explained+error+m.y == y)
     [,1]
[1,] TRUE
[2,] TRUE
[3,] TRUE
[4,] TRUE
[5,] TRUE
[6,] TRUE
> plot(x,(explained+error+m.y))
> 
> cor(x,(explained+error))
          [,1]
[1,] 0.7841619
> cor(x,(explained+error+m.y))
          [,1]
[1,] 0.7841619
> cor(x,y)
          [,1]
[1,] 0.7841619
> # see this also
> round(cor(tmp2),3)
              x     y y.hat explained error
x         1.000 0.784 1.000     1.000 0.000
y         0.784 1.000 0.784     0.784 0.621
y.hat     1.000 0.784 1.000     1.000 0.000
explained 1.000 0.784 1.000     1.000 0.000
error     0.000 0.621 0.000     0.000 1.000
> ###############
> 
> # sum of square values 
> ss.res <- sum(error^2)
> ss.reg <- sum(explained^2)
> ss.y
[1] 31366.84
> ss.res
[1] 12079.06
> ss.reg
[1] 19287.78
> ss.res + ss.reg
[1] 31366.84
> 
> # degrees of freedom values
> k <- 2 # number of parameters
> df.reg <- k - 1 # number of parameter estimation a and b (slope)
> df.res <- n - df.reg - 1
> ms.res <- ss.res/df.res
> ms.reg <- ss.reg/df.reg
> 
> ss.reg
[1] 19287.78
> df.reg
[1] 1
> ms.reg 
[1] 19287.78
> 
> ss.res
[1] 12079.06
> df.res
[1] 38
> ms.res
[1] 317.87
> 
> # r square 
> ss.y
[1] 31366.84
> ss.reg
[1] 19287.78
> r.sq <- ss.reg/ss.y
> r.sq
[1] 0.6149098
> # 위의 r.sq 값이 충분히 컸는지를 알아보는 것은
> # ss.reg가 충분히 컸는지를 알아보는 것
> # 이는 IV, x가 y를 설명하는데 충분히 기여했는지
> # 테스트하는 것. 이는 F-test를 이용해서 하게 된다
> # F = MS.bet / MS.within
> # regression의 경우는 F = MS.reg / MS.res
> f.cal <- ms.reg/ms.res
> f.cal
[1] 60.67819
> pf(f.cal, df.reg, df.res, lower.tail = F)
[1] 2.157284e-09
> # check anova test 
> anova(lm.df)
Analysis of Variance Table

Response: y
          Df Sum Sq Mean Sq F value    Pr(>F)    
x          1  19288 19287.8  60.678 2.157e-09 ***
Residuals 38  12079   317.9                      
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> summary(lm.df)

Call:
lm(formula = y ~ x, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-35.880 -11.932  -0.458  12.199  34.148 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -33.9867    28.6879  -1.185    0.243    
x             2.2239     0.2855   7.790 2.16e-09 ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 17.83 on 38 degrees of freedom
Multiple R-squared:  0.6149,	Adjusted R-squared:  0.6048 
F-statistic: 60.68 on 1 and 38 DF,  p-value: 2.157e-09

> 
> # standard error for b coefficient
> # b coeffficient 값은 estimation 
> # n.y 개의 데이터에서 구한 coefficient값 b는 
> # 실제 x, y 모집단을 대표하는 b값은 위의
> # estimation에서 standard error값을 구간으로 갖는
> # 범위일 것. 
> 
> res <- resid(lm.df)
> data.frame(head(res))
   head.res.
1 -11.235956
2  -5.551819
3  28.079646
4 -23.826982
5  -1.401474
6 -28.514250
> data.frame(head(error))
  head.error.
1  -11.235956
2   -5.551819
3   28.079646
4  -23.826982
5   -1.401474
6  -28.514250
> se.b <- sqrt(ms.res/ss.x)
> se.b
[1] 0.285491
> # as x increased by 1 unit, y would increase b +- 2se.b
> b + c(-2*se.b, 2*se.b) 
[1] 1.652885 2.794849
> b
[1] 2.223867
> 
> # to be exact 
> t.crit <- qt(.975, df.res)
> t.crit
[1] 2.024394
> b + c(-t.crit*se.b, t.crit*se.b)
[1] 1.645921 2.801813
> b
[1] 2.223867
> 
> t.cal <- lm.df$coefficients[2]/se.b
> t.cal
       x 
7.789621 
> 
> 
> pt(t.cal, df.res, lower.tail = F)*2
           x 
2.157284e-09 
> pf(f.cal, df.reg, df.res, lower.tail = F)
[1] 2.157284e-09
> 
> # see also
> t.cal
       x 
7.789621 
> t.cal^2
       x 
60.67819 
> f.cal
[1] 60.67819
>
>






c/ms/2025/schedule/w11.lecture.note.txt · Last modified: 2025/05/21 08:59 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki