Differences

This shows you the differences between two versions of the page.

--- c:ms:regression_lecture_note_for_r [2024/05/28 22:33] – hkimscil
+++ c:ms:regression_lecture_note_for_r [2024/06/06 08:07] (current) – [Graphics output] hkimscil
@@ Line 1: / Line 1: @@
+====== Code ======
+You need to install some packages
+<code>
+install.packages("lm.beta")
+install.packages("ppcor")
+install.packages("ggplot2")
+</code>
 <code>
 ################
 rnorm2 <- function(n,mean,sd) { mean+sd*scale(rnorm(n)) }
-n <- 400
+n <- 16
 set.seed(101)
@@ Line 12: / Line 20: @@
 points(rand.00, col="red")
-b = 170 / 265
+b <- 170 / 265
+b <- round(b,1)
 b
 y <- b * x + rand.00
@@ Line 30: / Line 39: @@
 lm.y.x <- lm(y ~ x, data = df)
 summary(lm.y.x)
-# str(lm.y.x)
+###########################
+# double check with this
+###########################
+str(lm.y.x)
+y.res <- lm.y.x$residuals
+y.pre <- lm.y.x$fitted.values
+y.exp <- y.pre - m.y
+plot(x,y.pre)
+plot(x,y.res)
+plot(x,y.exp)
+###########################
 lm.y.x$coefficients
 intercept <- lm.y.x$coefficients[1]
@@ Line 74: / Line 94: @@
 b
 a
+# we got these from the above
+slope
+intercept
 summary(lm.y.x)
 lm.y.x$coefficients
 # str(lm.y.x)
@@ Line 97: / Line 119: @@
 r.square <- ss.reg / ss.y
 r.square
+sqrt(r.square)
+cor(x,y)
 df.tot <- df.y
@@ Line 130: / Line 153: @@
 res <- lm.y.x$residuals
 reg <- lm.y.x$fitted.values-m.y
+# The above can be derived as follows
+y.hat <- intercept + (slope * x)
+y.hat
+head(y.hat)
+head(lm.y.x$fitted.values)
+y.pre <- y.hat
+y.res <- y - y.pre
+y.res
+head(res)
+head(y.res)
+y.reg <- y.pre - m.y
+head(reg)
+head(y.reg)
+########################################
 # This is essentially a perfect linear
@@ Line 142: / Line 181: @@
 summary(lm.res.x)
 summary(lm.y.x)
-</code>
+########################################
+# Now let's get back to the lm output
+# this is to evalue the significance of
+# the slope of x (b)
+summary(lm.y.x)
+# the slope, we already got this
+b <- slope
+# intercept
+a <- intercept
+# the real data look like this
+plot(x,y)
+abline(h = mean(y), lwd=1.5, col="red")
+abline(lm(y ~ x), lwd=1.5, col="blue")
+# blue line is regression line
+# this slope is an estimate of a real
+# thing (population) rather than the
+# sample we are using right now)
+library(ggplot2)
+ggplot(data = df, aes(x,y)) +
+  geom_point() +
+  geom_smooth(method="lm", color="red", fill = "blue")
+# the real slope should reside in between
+# the violet area. The area is estimated
+# with standard error of slop (regression
+# coefficient)
+# in word
+# se for b is
+# from summary(lm.y.x)
+summary(lm.y.x)
+se.b <- 0.0401
+ci.se.b <- 1.96 * se.b
+# this is the lowest estimate
+b - ci.se.b
+# the highest estimate
+b + ci.se.b
+# calculated estimate
+b
+# how to get se for b?
+# https://www.statology.org/standard-error-of-regression-slope/
+# see also http://commres.net/wiki/standard_error_of_regression_coefficients
+ss.res
+ss.x
+# we didn't get ss.x
+ss.x <- sum((x-m.x)^2)
+ss.x
+sqrt( (ss.res/(n-2)) / ss.x )
+# the above is the same as the below
+sqrt( ss.res / (n-2)*ss.x )
+sqrt( 1/(n-2) * (ss.res/ss.x))
+se.b <- sqrt( 1/(n-2) * (ss.res/ss.x))
+# now
+summary(lm.y.x)
+# if b is 0, b does not do anything (null hypothesis)
+# To test if b is working, we do t-test by
+# b (current coefficient) - 0 (not working) / se
+# = t.calculated. We test if this is significant
+# tp(t.b.cal, df=n-2)
+t.b.cal <- (b - 0)/se.b
+t.b.cal
+# k = num of predictor variables
+k <- 1
+df.t <- n-k-1
+*pt(t.b.cal, df.t, lower.tail = F)
+pf(t.b.cal^2, 1, df.t, lower.tail = F)
+</code>
+====== Output ======
 <code>
+> ################
 > rnorm2 <- function(n,mean,sd) { mean+sd*scale(rnorm(n)) }
-> n <- 400
+> n <- 16
 >
 > set.seed(101)
@@ Line 157: / Line 275: @@
 > points(rand.00, col="red")
 >
-> b = 170 / 265
+> b <- 170 / 265
+> b <- round(b,1)
 > b
-[1] 0.6415094
+[1] 0.6
 > y <- b * x + rand.00
 >
@@ Line 165: / Line 284: @@
 > head(df)
          x        y
- 260.3289 164.1701
+ 256.4615 144.9723
-273.9897 149.7065
+273.7812 167.6050
- 254.9034 140.6106
+ 249.5828 141.2775
- 268.7321 179.5289
+ 267.1155 135.1836
- 270.2313 176.5863
+ 269.0162 161.7509
- 283.6541 172.5179
+ 286.0342 183.7182
 >
 > # plot method 0
@@ Line 189: / Line 308: @@
 Residuals:
     Min      1Q  Median      3Q     Max
--39.653  -8.878  -0.226   7.322  30.630
+-25.508  -5.847   2.617   7.595  15.963
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept)   0.8022    10.6435   0.075     0.94
+(Intercept) -52.8818    54.9507  -0.962  0.35220
-x             0.6385     0.0401  15.922   <2e-16 ***
+x             0.7996     0.2071   3.862  0.00173 **
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 12.01 on 398 degrees of freedom
+Residual standard error: 12.03 on 14 degrees of freedom
-Multiple R-squared:  0.3891,	Adjusted R-squared:  0.3876
+Multiple R-squared:  0.5158,	Adjusted R-squared:  0.4812
-F-statistic: 253.5 on 1 and 398 DF,  p-value: < 2.2e-16
+F-statistic: 14.91 on 1 and 14 DF,  p-value: 0.001727
-> # str(lm.y.x)
+>
+> ###########################
+> # double check with this
+> ###########################
+> str(lm.y.x)
+List of 12
+ $ coefficients : Named num [1:2] -52.9 0.8
+  ..- attr(*, "names")= chr [1:2] "(Intercept)" "x"
+ $ residuals    : Named num [1:16] -7.2 1.58 -5.4 -25.51 -0.46 ...
+  ..- attr(*, "names")= chr [1:16] "1" "2" "3" "4" ...
+ $ effects      : Named num [1:16] -636 -46.45 -3.351 -24.236 0.728 ...
+  ..- attr(*, "names")= chr [1:16] "(Intercept)" "x" "" "" ...
+ $ rank         : int 2
+ $ fitted.values: Named num [1:16] 152 166 147 161 162 ...
+  ..- attr(*, "names")= chr [1:16] "1" "2" "3" "4" ...
+ $ assign       : int [1:2] 0 1
+ $ qr           :List of 5
+  ..$ qr   : num [1:16, 1:2] -4 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 ...
+  .. ..- attr(*, "dimnames")=List of 2
+  .. .. ..$ : chr [1:16] "1" "2" "3" "4" ...
+  .. .. ..$ : chr [1:2] "(Intercept)" "x"
+  .. ..- attr(*, "assign")= int [1:2] 0 1
+  ..$ qraux: num [1:2] 1.25 1.18
+  ..$ pivot: int [1:2] 1 2
+  ..$ tol  : num 1e-07
+  ..$ rank : int 2
+  ..- attr(*, "class")= chr "qr"
+ $ df.residual  : int 14
+ $ xlevels      : Named list()
+ $ call         : language lm(formula = y ~ x, data = df)
+ $ terms        :Classes 'terms', 'formula'  language y ~ x
+  .. ..- attr(*, "variables")= language list(y, x)
+  .. ..- attr(*, "factors")= int [1:2, 1] 0 1
+  .. .. ..- attr(*, "dimnames")=List of 2
+  .. .. .. ..$ : chr [1:2] "y" "x"
+  .. .. .. ..$ : chr "x"
+  .. ..- attr(*, "term.labels")= chr "x"
+  .. ..- attr(*, "order")= int 1
+  .. ..- attr(*, "intercept")= int 1
+  .. ..- attr(*, "response")= int 1
+  .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
+  .. ..- attr(*, "predvars")= language list(y, x)
+  .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
+  .. .. ..- attr(*, "names")= chr [1:2] "y" "x"
+ $ model        :'data.frame':	16 obs. of  2 variables:
+  ..$ y: num [1:16] 145 168 141 135 162 ...
+  ..$ x: num [1:16] 256 274 250 267 269 ...
+  ..- attr(*, "terms")=Classes 'terms', 'formula'  language y ~ x
+  .. .. ..- attr(*, "variables")= language list(y, x)
+  .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
+  .. .. .. ..- attr(*, "dimnames")=List of 2
+  .. .. .. .. ..$ : chr [1:2] "y" "x"
+  .. .. .. .. ..$ : chr "x"
+  .. .. ..- attr(*, "term.labels")= chr "x"
+  .. .. ..- attr(*, "order")= int 1
+  .. .. ..- attr(*, "intercept")= int 1
+  .. .. ..- attr(*, "response")= int 1
+  .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
+  .. .. ..- attr(*, "predvars")= language list(y, x)
+  .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
+  .. .. .. ..- attr(*, "names")= chr [1:2] "y" "x"
+ - attr(*, "class")= chr "lm"
+> y.res <- lm.y.x$residuals
+> y.pre <- lm.y.x$fitted.values
+> y.exp <- y.pre - m.y
+> plot(x,y.pre)
+> plot(x,y.res)
+> plot(x,y.exp)
+> ###########################
 > lm.y.x$coefficients
 (Intercept)           x
-.8021833   0.6384823
+-52.8817788   0.7995539
 > intercept <- lm.y.x$coefficients[1]
 > slope <- lm.y.x$coefficients[2]
 > intercept
 (Intercept)
-.8021833
+  -52.88178
 > slope
         x
-.6384823
+.7995539
 >
 > # library(ggplot2)
@@ Line 224: / Line 411: @@
 > ss.y <- sum((y-mean(y))^2)
 > ss.y
-[1] 94052.83
+[1] 4183.193
 > df.y <- length(y)-1
 > df.y
-[1] 399
+[1] 15
 > ss.y/df.y
-[1] 235.7214
+[1] 278.8795
 > var(y)
          [,1]
-[1,] 235.7214
+[1,] 278.8795
 >
 > m.x <- mean(x)
@@ Line 239: / Line 426: @@
 > df.tot <- n-1
 > sp/df.tot
-[1] 143.6585
+[1] 179.8996
 > cov.xy <- cov(x,y)
 >
@@ Line 248: / Line 435: @@
 > r.xy
           [,1]
-[1,] 0.6237932
+[1,] 0.7181756
 > cor(x,y)
           [,1]
-[1,] 0.6237932
+[1,] 0.7181756
 > cor.test(x,y)
@@ Line 257: / Line 444: @@
 data:  x and y
-t = 15.922, df = 398, p-value < 2.2e-16
+t = 3.8616, df = 14, p-value = 0.001727
 alternative hypothesis: true correlation is not equal to 0
 percent confidence interval:
-.5599929 0.6802388
+.3454526 0.8951901
 sample estimates:
       cor
-.6237932
+.7181756
 >
@@ Line 274: / Line 461: @@
 > b
           [,1]
-[1,] 0.6384823
+[1,] 0.7995539
 > a
           [,1]
-[1,] 0.8021833
+[1,] -52.88178
+> # we got these from the above
+> slope
+        x
+.7995539
+> intercept
+(Intercept)
+  -52.88178
 >
 >
@@ Line 287: / Line 481: @@
 Residuals:
     Min      1Q  Median      3Q     Max
--39.653  -8.878  -0.226   7.322  30.630
+-25.508  -5.847   2.617   7.595  15.963
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept)   0.8022    10.6435   0.075     0.94
+(Intercept) -52.8818    54.9507  -0.962  0.35220
-x             0.6385     0.0401  15.922   <2e-16 ***
+x             0.7996     0.2071   3.862  0.00173 **
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 12.01 on 398 degrees of freedom
+Residual standard error: 12.03 on 14 degrees of freedom
-Multiple R-squared:  0.3891,	Adjusted R-squared:  0.3876
+Multiple R-squared:  0.5158,	Adjusted R-squared:  0.4812
-F-statistic: 253.5 on 1 and 398 DF,  p-value: < 2.2e-16
+F-statistic: 14.91 on 1 and 14 DF,  p-value: 0.001727
 > lm.y.x$coefficients
 (Intercept)           x
-.8021833   0.6384823
+-52.8817788   0.7995539
->
 > # str(lm.y.x)
 >
@@ Line 309: / Line 502: @@
 > y <- y
 > m.y
-[1] 170
+[1] 159
 >
 > res <- y - y.pred
 > reg <- y.pred - m.y
 > ss.y
-[1] 94052.83
+[1] 4183.193
 > ss.res <- sum(res^2)
 > ss.reg <- sum(reg^2)
 > ss.y
-[1] 94052.83
+[1] 4183.193
 > ss.res
-[1] 57455.18
+[1] 2025.602
 > ss.reg
-[1] 36597.65
+[1] 2157.592
 > ss.res + ss.reg
-[1] 94052.83
+[1] 4183.193
 >
 > r.square <- ss.reg / ss.y
 > r.square
-[1] 0.389118
+[1] 0.5157762
->
+> sqrt(r.square)
+[1] 0.7181756
+> cor(x,y)
+          [,1]
+[1,] 0.7181756
 >
 > df.tot <- df.y
@@ Line 338: / Line 535: @@
 [1] 1
 > df.res
-[1] 398
+[1] 14
 > df.tot
-[1] 399
+[1] 15
 >
 > ss.tot <- ss.y
 > ss.reg
-[1] 36597.65
+[1] 2157.592
 > ss.res
-[1] 57455.18
+[1] 2025.602
 > ss.tot
-[1] 94052.83
+[1] 4183.193
 >
 > ms.reg <- ss.reg / df.reg
 > ms.res <- ss.res / df.res
 > ms.reg
-[1] 36597.65
+[1] 2157.592
 > ms.res
-[1] 144.3597
+[1] 144.6858
 >
 > f.cal <- ms.reg/ms.res
 > f.cal
-[1] 253.517
+[1] 14.91225
 >
 > p.val <- pf(f.cal, df.reg, df.res, lower.tail = F)
 > p.val
-[1] 1.623694e-44
+[1] 0.001727459
 > anova(lm.y.x)
 Analysis of Variance Table
 Response: y
-           Df Sum Sq Mean Sq F value    Pr(>F)
+          Df Sum Sq Mean Sq F value   Pr(>F)
-x           1  36598   36598  253.52 < 2.2e-16 ***
+x          1 2157.6 2157.59  14.912 0.001727 **
-Residuals 398  57455     144
+Residuals 14 2025.6  144.69
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
@@ Line 380: / Line 577: @@
 > res <- lm.y.x$residuals
 > reg <- lm.y.x$fitted.values-m.y
+>
+> # The above can be derived as follows
+> y.hat <- intercept + (slope * x)
+> y.hat
+          [,1]
+ [1,] 152.1730
+ [2,] 166.0210
+ [3,] 146.6731
+ [4,] 160.6914
+ [5,] 162.2112
+ [6,] 175.8179
+ [7,] 167.0666
+ [8,] 155.5354
+ [9,] 171.7678
+[10,] 153.7931
+[11,] 165.6110
+[12,] 144.7831
+[13,] 179.8185
+[14,] 134.1906
+[15,] 153.5815
+[16,] 154.2648
+attr(,"scaled:center")
+[1] 0.1070574
+attr(,"scaled:scale")
+[1] 0.7608404
+> head(y.hat)
+         [,1]
+[1,] 152.1730
+[2,] 166.0210
+[3,] 146.6731
+[4,] 160.6914
+[5,] 162.2112
+[6,] 175.8179
+> head(lm.y.x$fitted.values)
+        2        3        4        5        6
+.1730 166.0210 146.6731 160.6914 162.2112 175.8179
+> y.pre <- y.hat
+> y.res <- y - y.pre
+> y.res
+             [,1]
+ [1,]  -7.2007773
+ [2,]   1.5839803
+ [3,]  -5.3956693
+ [4,] -25.5078178
+ [5,]  -0.4602376
+ [6,]   7.9002872
+ [7,]  -3.0767953
+ [8,] -16.3176727
+ [9,]   9.3951797
+[10,] -15.1613471
+[11,]   7.1934473
+[12,]   4.4883827
+[13,]   3.6498214
+[14,]  15.4541201
+[15,]  15.9625789
+[16,]   7.4925197
+attr(,"scaled:center")
+[1] 0.1070574
+attr(,"scaled:scale")
+[1] 0.7608404
+> head(res)
+           2           3           4           5           6
+ -7.2007773   1.5839803  -5.3956693 -25.5078178  -0.4602376   7.9002872
+> head(y.res)
+            [,1]
+[1,]  -7.2007773
+[2,]   1.5839803
+[3,]  -5.3956693
+[4,] -25.5078178
+[5,]  -0.4602376
+[6,]   7.9002872
+>
+> y.reg <- y.pre - m.y
+> head(reg)
+          2          3          4          5          6
+ -6.826963   7.021016 -12.326872   1.691427   3.211157  16.817938
+> head(y.reg)
+           [,1]
+[1,]  -6.826963
+[2,]   7.021016
+[3,] -12.326872
+[4,]   1.691427
+[5,]   3.211157
+[6,]  16.817938
+> ########################################
 >
 > # This is essentially a perfect linear
@@ Line 392: / Line 674: @@
 Residuals:
        Min         1Q     Median         3Q        Max
--6.828e-14 -7.900e-15 -5.500e-16  6.410e-15  3.979e-13
+-1.216e-14 -9.993e-15 -2.381e-15  7.912e-15  1.822e-14
 Coefficients:
               Estimate Std. Error    t value Pr(>|t|)
-(Intercept) -1.692e+02  1.937e-14 -8.733e+15   <2e-16 ***
+(Intercept) -2.119e+02  5.321e-14 -3.982e+15   <2e-16 ***
-x            6.385e-01  7.299e-17  8.747e+15   <2e-16 ***
+x            7.996e-01  2.005e-16  3.988e+15   <2e-16 ***
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 2.187e-14 on 398 degrees of freedom
+Residual standard error: 1.165e-14 on 14 degrees of freedom
 Multiple R-squared:      1,	Adjusted R-squared:      1
-F-statistic: 7.651e+31 on 1 and 398 DF,  p-value: < 2.2e-16
+F-statistic: 1.59e+31 on 1 and 14 DF,  p-value: < 2.2e-16
+경고메시지(들):
+summary.lm(lm.reg.x)에서:
+  essentially perfect fit: summary may be unreliable
 >
 > # The relationship between residuals and
@@ Line 416: / Line 701: @@
 Residuals:
     Min      1Q  Median      3Q     Max
--39.653  -8.878  -0.226   7.322  30.630
+-25.508  -5.847   2.617   7.595  15.963
 Coefficients:
               Estimate Std. Error t value Pr(>|t|)
-(Intercept) -3.586e-15  1.064e+01       0        1
+(Intercept) -1.956e-14  5.495e+01       0        1
-x            1.186e-17  4.010e-02       0        1
+x            6.880e-17  2.071e-01       0        1
-Residual standard error: 12.01 on 398 degrees of freedom
+Residual standard error: 12.03 on 14 degrees of freedom
-Multiple R-squared:  6.744e-33,	Adjusted R-squared:  -0.002513
+Multiple R-squared:  1.449e-32,	Adjusted R-squared:  -0.07143
-F-statistic: 2.684e-30 on 1 and 398 DF,  p-value: 1
+F-statistic: 2.029e-31 on 1 and 14 DF,  p-value: 1
 > summary(lm.y.x)
@@ Line 434: / Line 719: @@
 Residuals:
     Min      1Q  Median      3Q     Max
--39.653  -8.878  -0.226   7.322  30.630
+-25.508  -5.847   2.617   7.595  15.963
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept)   0.8022    10.6435   0.075     0.94
+(Intercept) -52.8818    54.9507  -0.962  0.35220
-x             0.6385     0.0401  15.922   <2e-16 ***
+x             0.7996     0.2071   3.862  0.00173 **
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 12.01 on 398 degrees of freedom
+Residual standard error: 12.03 on 14 degrees of freedom
-Multiple R-squared:  0.3891,	Adjusted R-squared:  0.3876
+Multiple R-squared:  0.5158,	Adjusted R-squared:  0.4812
-F-statistic: 253.5 on 1 and 398 DF,  p-value: < 2.2e-16
+F-statistic: 14.91 on 1 and 14 DF,  p-value: 0.001727
 >
+> ########################################
+> # Now let's get back to the lm output
+> # this is to evalue the significance of
+> # the slope of x (b)
+> summary(lm.y.x)
+Call:
+lm(formula = y ~ x, data = df)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-25.508  -5.847   2.617   7.595  15.963
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept) -52.8818    54.9507  -0.962  0.35220
+x             0.7996     0.2071   3.862  0.00173 **
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 12.03 on 14 degrees of freedom
+Multiple R-squared:  0.5158,	Adjusted R-squared:  0.4812
+F-statistic: 14.91 on 1 and 14 DF,  p-value: 0.001727
 >
+> # the slope, we already got this
+> b <- slope
+> # intercept
+> a <- intercept
 >
+> # the real data look like this
+> plot(x,y)
+> abline(h = mean(y), lwd=1.5, col="red")
+> abline(lm(y ~ x), lwd=1.5, col="blue")
+>
+> # blue line is regression line
+> # this slope is an estimate of a real
+> # thing (population) rather than the
+> # sample we are using right now)
+> library(ggplot2)
+>
+> ggplot(data = df, aes(x,y)) +
++   geom_point() +
++   geom_smooth(method="lm", color="red", fill = "blue")
+`geom_smooth()` using formula = 'y ~ x'
+>
+> # the real slope should reside in between
+> # the violet area. The area is estimated
+> # with standard error of slop (regression
+> # coefficient)
+>
+> # in word
+> # se for b is
+> # from summary(lm.y.x)
+>
+> summary(lm.y.x)
+Call:
+lm(formula = y ~ x, data = df)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-25.508  -5.847   2.617   7.595  15.963
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept) -52.8818    54.9507  -0.962  0.35220
+x             0.7996     0.2071   3.862  0.00173 **
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 12.03 on 14 degrees of freedom
+Multiple R-squared:  0.5158,	Adjusted R-squared:  0.4812
+F-statistic: 14.91 on 1 and 14 DF,  p-value: 0.001727
+> se.b <- 0.0401
+> ci.se.b <- 1.96 * se.b
+> # this is the lowest estimate
+> b - ci.se.b
+        x
+.7209579
+> # the highest estimate
+> b + ci.se.b
+        x
+.8781499
+> # calculated estimate
+> b
+        x
+.7995539
+>
+> # how to get se for b?
+> # https://www.statology.org/standard-error-of-regression-slope/
+> # see also http://commres.net/wiki/standard_error_of_regression_coefficients
+>
+> ss.res
+[1] 2025.602
+> ss.x
+[1] 3375
+> # we didn't get ss.x
+> ss.x <- sum((x-m.x)^2)
+> ss.x
+[1] 3375
+>
+> sqrt( (ss.res/(n-2)) / ss.x )
+[1] 0.2070504
+> # the above is the same as the below
+> sqrt( ss.res / (n-2)*ss.x )
+[1] 698.7952
+> sqrt( 1/(n-2) * (ss.res/ss.x))
+[1] 0.2070504
+> se.b <- sqrt( 1/(n-2) * (ss.res/ss.x))
+>
+> # now
+> summary(lm.y.x)
+Call:
+lm(formula = y ~ x, data = df)
+Residuals:
+    Min      1Q  Median      3Q     Max
+-25.508  -5.847   2.617   7.595  15.963
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept) -52.8818    54.9507  -0.962  0.35220
+x             0.7996     0.2071   3.862  0.00173 **
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 12.03 on 14 degrees of freedom
+Multiple R-squared:  0.5158,	Adjusted R-squared:  0.4812
+F-statistic: 14.91 on 1 and 14 DF,  p-value: 0.001727
+> # if b is 0, b does not do anything (null hypothesis)
+> # To test if b is working, we do t-test by
+> # b (current coefficient) - 0 (not working) / se
+> # = t.calculated. We test if this is significant
+> # tp(t.b.cal, df=n-2)
+>
+> t.b.cal <- (b - 0)/se.b
+> t.b.cal
+       x
+.861639
+> # k = num of predictor variables
+> k <- 1
+> df.t <- n-k-1
+>
+> 2*pt(t.b.cal, df.t, lower.tail = F)
+          x
+.001727459
+> pf(t.b.cal^2, 1, df.t, lower.tail = F)
+          x
+.001727459
+>
 </code>
+====== Graphics output ======
+{{:c:ms:pasted:20240606-170733.png}}
+{{:c:ms:pasted:20240606-170404.png}}
+{{:c:ms:pasted:20240606-170419.png}}
+{{:c:ms:pasted:20240606-170434.png}}
+{{:c:ms:pasted:20240606-170447.png}}
+{{:c:ms:pasted:20240606-170507.png}}
+{{:c:ms:pasted:20240606-170523.png}}