c:ms:2026:schedule:w10.lecture.note
This is an old revision of the document!
- deriviation of a and b in a simple regression regression 에서 a 와 b 구하기
rs01
rm(list=ls())
rnorm2 <- function(n,mean,sd) {
mean + sd * scale(rnorm(n))
}
ss <- function(x) {
sum((x-mean(x))^2)
}
sp <- function(x, y) {
sum((x-mean(x))*(y-mean(y)))
}
set.seed(1)
n <- 40
x <- rnorm(n, 40, 4)
a <- 10
b <- 5
y <- a + rnorm(n, 1, 35) + b * x
dat <- data.frame(x, y)
# library(ggplot2)
# ggplot(datb, aes(x = xb, y = yb)) +
# geom_point() + # Adds scatter points
# geom_smooth(method = "lm", se = TRUE)
cor.test(dat$x, dat$y)
m.lma <- lm(y ~ x, data=dat)
summary(m.lma)
plot(dat)
abline(m.lma)
text(x=mean(x)+2*sd(x), y=mean(y)+2*sd(y), pos=1, col='black', labels = paste("r =", round(cor(x, y),5)))
abline(v=mean(x), col='blue')
abline(h=mean(y), col='red')
text(x=mean(x), y=mean(y)-sd(y), pos=4, col='blue', labels = paste("mean(x)\n=", round(mean(x),5)))
text(x=mean(x)-sd(x), y=mean(y)+sd(y), pos=4, col='red', labels = paste("mean(y)\n=", round(mean(y),5)))
cov(dat)
var(dat)
cov(dat$x, dat$y)
cor(dat$x, dat$y)
cor.test(dat$x, dat$y)
sp.xy <- sp(x, y)
sp.xy
df.xy <- length(x)-1
sp.xy/df.xy
cov(dat)
ss.x <- ss(x)
ss.y <- ss(y)
r.cal <- sp(x,y)/sqrt(ss.x*ss.y)
r.cal
# commres.net
ss.x <- ss(x)
b.cal <- sp.xy / ss.x
a.cal <- mean(y) - b.cal * mean(x)
a.cal
b.cal
m.lma
summary(m.lma)
y.hat <- a.cal + b.cal * x
y.hat2 <- predict(m.lma, newdata=dat)
data.frame(y.hat)
data.frame(y.hat2)
y.m <- mean(y)
y.obs <- y
y.obs
y.m
sum((y.obs-y.m)^2)
ss(y)
res <- y.obs - y.hat
res
m.lma$residuals
reg <- y.hat - y.m
ss.res <- sum(res^2)
ss.reg <- sum(reg^2)
ss.tot <- ss(y)
ss.tot
ss.res
ss.reg
ss.reg+ss.res
df.tot <- length(dat$y)-1
df.reg <- 2 - 1
df.res <- df.tot - df.reg
ms.tot <- ss.tot/df.tot
var(y)
ms.tot
ms.reg <- ss.reg / df.reg
ms.res <- ss.res / df.res
ms.reg
ms.res
f.cal <- ms.reg / ms.res
p.val <- pf(f.cal, df.reg, df.res, lower.tail = F)
f.cal
p.val
r.sq <- ss.reg / ss.tot
r.sq
a.cal
b.cal
# residual standard error
summary(m.lma)
sigma(m.lma)
rse <- sqrt(ss.res / (n-2))
rse
# standard error for b
r.square <- summary(m.lma)$r.square
(r.cal)^2
r.square
1-r.square
sqrt((1-r.square)/(n-2))
cor.se <- sqrt((ss.res/ss.tot)/(n-2)) # se for residual
cor.se
t.r <- r.cal/cor.se
p.r <- pt(t.r, n-2, lower.tail = F)*2
cat(t.r, p.r)
cor.test(x,y)
# standard error for b1 (regression slope)
res.se <- sqrt(ss.res/(n-2)) # residual se (se for regression)
res.se
ss.x
bse <- res.se/sqrt(ss.x)
bse
b.cal
t.b <- b.cal/bse
p.b <- pt(t.b, n-2, lower.tail = F)*2
cat(t.b, p.b)
##################
############################
# Install and load MASS package if you haven't already
# install.packages("MASS")
library(MASS)
# 1. Define inputs
means <- c(40, 100, 3.0) # Desired means for Var1, Var2, Var3
# allowance, iq, cgpa
sds <- c(8, 15, .5) # Desired standard deviations
corr_matrix <- matrix(c(1, 0.05, 0.4,
0.05, 1, 0.5,
0.4, 0.5, 1),
nrow = 3) # Target correlation matrix
# 2. Create Covariance Matrix
# Create diagonal matrix of SDs
sd_diag <- diag(sds)
sd_diag
# Calculate covariance matrix
cov_matrix <- sd_diag %*% corr_matrix %*% sd_diag
# 3. Simulate data
set.seed(123) # For reproducibility
n_samples <- 100
nb <- n_samples
simulated_data <- mvrnorm(n = n_samples, mu = means, Sigma = cov_matrix)
# Convert to data frame for easier use
simulated_df <- as.data.frame(simulated_data)
# colnames(simulated_df) <- c("all", "iq", "cgpa")
colnames(simulated_df) <- c("V1", "V2", "V3")
# Verify correlations (should be close to target)
cor(simulated_df)
head(simulated_df)
str(simulated_df)
sdf <- simulated_df
colnames(sdf) <- c("all", "iq", "cgpa")
lm.3.21 <- lm(cgpa~all+iq, data=sdf)
summary(lm.3.21)
attach(sdf)
sp.31 <- sp(cgpa, all)
sp.32 <- sp(cgpa, iq)
cov(sdf)
cov.31 <- cov(cgpa, all)
cov.32 <- cov(cgpa, iq)
df.tot <- nb-1
sp.31/df.tot
sp.32/df.tot
ss.1 <- ss(all)
ss.2 <- ss(iq)
ss.3 <- ss(cgpa)
sp.12 <- sp(all, iq)
sp.21 <- sp(iq, all)
b1 <- sp.31/(sp.12)
b1
b2 <- sp.32/ss.2
b2
summary(lm.3.21)
################
# cov 를 구하는 방식에 따르면 그래프의 내용으로
# cor 값이 왜 양수인지를 설명할 수 있다
means <- c(40, 100, 3.0) # Desired means for Var1, Var2, Var3
sds <- c(8, 15, .5) # Desired standard deviations
corr_matrix <- matrix(c(1, 0.05, 0.9,
0.05, 1, 0.01,
0.9, 0.01, 1),
nrow = 3) # Target correlation matrix
# 2. Create Covariance Matrix
# Create diagonal matrix of SDs
sd_diag <- diag(sds)
sd_diag
# Calculate covariance matrix
cov_matrix <- sd_diag %*% corr_matrix %*% sd_diag
cov_matrix
# 3. Simulate data
set.seed(123) # For reproducibility
n_samples <- 100
nb <- n_samples
simulated_data <- mvrnorm(n = n_samples, mu = means, Sigma = cov_matrix)
sdat <- data.frame(simulated_data)
sdat
cor.test(sdat$X1, sdat$X2)
plot(sdat$X1, sdat$X2)
abline(v=mean(sdat$X1), col='blue')
abline(h=mean(sdat$X2), col='red')
text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X2)+sd(sdat$X2), labels="1 (-)", pos=1, col='blue')
text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X2)+sd(sdat$X2), labels="2 (+)", pos=2, col='blue')
text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X2)-sd(sdat$X2), labels="3 (-)", pos=3, col='blue')
text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X2)-sd(sdat$X2), labels="4 (+)", pos=4, col='blue')
cor(sdat$X1, sdat$X2)
plot(sdat$X1, sdat$X3)
abline(v=mean(sdat$X1), col='blue')
abline(h=mean(sdat$X3), col='red')
text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X3)+sd(sdat$X3), labels="1 (-)", pos=1, col='blue')
text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X3)+sd(sdat$X3), labels="2 (+)", pos=2, col='blue')
text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X3)-sd(sdat$X3), labels="3 (-)", pos=3, col='blue')
text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X3)-sd(sdat$X3), labels="4 (+)", pos=4, col='blue')
cor(sdat$X1, sdat$X3)
ro01
> rm(list=ls())
>
> rnorm2 <- function(n,mean,sd) {
+ mean + sd * scale(rnorm(n))
+ }
> ss <- function(x) {
+ sum((x-mean(x))^2)
+ }
> sp <- function(x, y) {
+ sum((x-mean(x))*(y-mean(y)))
+ }
>
> set.seed(1)
> n <- 40
> x <- rnorm(n, 40, 4)
> a <- 10
> b <- 5
> y <- a + rnorm(n, 1, 35) + b * x
> dat <- data.frame(x, y)
>
> # library(ggplot2)
> # ggplot(datb, aes(x = xb, y = yb)) +
> # geom_point() + # Adds scatter points
> # geom_smooth(method = "lm", se = TRUE)
>
> cor.test(dat$x, dat$y)
Pearson's product-moment correlation
data: dat$x and dat$y
t = 4.9195, df = 38, p-value = 1.707e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3875647 0.7831105
sample estimates:
cor
0.6237667
> m.lma <- lm(y ~ x, data=dat)
> summary(m.lma)
Call:
lm(formula = y ~ x, data = dat)
Residuals:
Min 1Q Median 3Q Max
-65.304 -22.086 -2.542 14.342 72.912
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -69.434 58.453 -1.188 0.242
x 7.097 1.443 4.920 1.71e-05 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 31.95 on 38 degrees of freedom
Multiple R-squared: 0.3891, Adjusted R-squared: 0.373
F-statistic: 24.2 on 1 and 38 DF, p-value: 1.707e-05
>
> plot(dat)
> abline(m.lma)
> text(x=mean(x)+2*sd(x), y=mean(y)+2*sd(y), pos=1, col='black', labels = paste("r =", round(cor(x, y),5)))
> abline(v=mean(x), col='blue')
> abline(h=mean(y), col='red')
> text(x=mean(x), y=mean(y)-sd(y), pos=4, col='blue', labels = paste("mean(x)\n=", round(mean(x),5)))
> text(x=mean(x)-sd(x), y=mean(y)+sd(y), pos=4, col='red', labels = paste("mean(y)\n=", round(mean(y),5)))
>
> cov(dat)
x y
x 12.57885 89.26937
y 89.26937 1628.24418
> var(dat)
x y
x 12.57885 89.26937
y 89.26937 1628.24418
> cov(dat$x, dat$y)
[1] 89.26937
> cor(dat$x, dat$y)
[1] 0.6237667
> cor.test(dat$x, dat$y)
Pearson's product-moment correlation
data: dat$x and dat$y
t = 4.9195, df = 38, p-value = 1.707e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3875647 0.7831105
sample estimates:
cor
0.6237667
>
> sp.xy <- sp(x, y)
> sp.xy
[1] 3481.506
> df.xy <- length(x)-1
> sp.xy/df.xy
[1] 89.26937
> cov(dat)
x y
x 12.57885 89.26937
y 89.26937 1628.24418
> ss.x <- ss(x)
> ss.y <- ss(y)
> r.cal <- sp(x,y)/sqrt(ss.x*ss.y)
> r.cal
[1] 0.6237667
>
> # commres.net
> ss.x <- ss(x)
> b.cal <- sp.xy / ss.x
> a.cal <- mean(y) - b.cal * mean(x)
> a.cal
[1] -69.43374
> b.cal
[1] 7.096781
> m.lma
Call:
lm(formula = y ~ x, data = dat)
Coefficients:
(Intercept) x
-69.434 7.097
> summary(m.lma)
Call:
lm(formula = y ~ x, data = dat)
Residuals:
Min 1Q Median 3Q Max
-65.304 -22.086 -2.542 14.342 72.912
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -69.434 58.453 -1.188 0.242
x 7.097 1.443 4.920 1.71e-05 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 31.95 on 38 degrees of freedom
Multiple R-squared: 0.3891, Adjusted R-squared: 0.373
F-statistic: 24.2 on 1 and 38 DF, p-value: 1.707e-05
>
> y.hat <- a.cal + b.cal * x
> y.hat2 <- predict(m.lma, newdata=dat)
> data.frame(y.hat)
y.hat
1 196.6543
2 219.6506
3 190.7164
4 259.7229
5 223.7913
6 191.1468
7 228.2742
8 235.3964
9 230.7823
10 205.7684
11 257.3526
12 225.5040
13 196.8023
14 151.5685
15 246.3711
16 213.1620
17 213.9779
18 241.2303
19 237.7496
20 231.2967
21 240.5246
22 236.6401
23 216.5542
24 157.9655
25 232.0326
26 212.8442
27 210.0149
28 172.6871
29 200.8642
30 226.3017
31 253.0065
32 211.5197
33 225.4424
34 212.9101
35 175.3467
36 202.6570
37 203.2447
38 212.7538
39 245.6641
40 236.1019
> data.frame(y.hat2)
y.hat2
1 196.6543
2 219.6506
3 190.7164
4 259.7229
5 223.7913
6 191.1468
7 228.2742
8 235.3964
9 230.7823
10 205.7684
11 257.3526
12 225.5040
13 196.8023
14 151.5685
15 246.3711
16 213.1620
17 213.9779
18 241.2303
19 237.7496
20 231.2967
21 240.5246
22 236.6401
23 216.5542
24 157.9655
25 232.0326
26 212.8442
27 210.0149
28 172.6871
29 200.8642
30 226.3017
31 253.0065
32 211.5197
33 225.4424
34 212.9101
35 175.3467
36 202.6570
37 203.2447
38 212.7538
39 245.6641
40 236.1019
>
> y.m <- mean(y)
> y.obs <- y
>
> y.obs
[1] 192.7126 205.8052 218.6811 262.3888 193.4837
[6] 169.8283 233.5089 252.6651 218.5835 235.7310
[11] 255.1693 197.3759 210.5144 127.1783 283.6544
[16] 279.4153 197.8234 193.3320 247.3646 218.1511
[21] 313.4362 225.2693 236.6322 172.1930 197.3820
[26] 216.4852 144.7105 232.8794 206.8009 295.4002
[31] 254.8164 184.0961 240.1289 177.2305 139.5816
[36] 212.9007 187.5990 209.8524 235.6025 205.6303
> y.m
[1] 217.0499
>
> sum((y.obs-y.m)^2)
[1] 63501.52
> ss(y)
[1] 63501.52
>
> res <- y.obs - y.hat
> res
[1] -3.941684 -13.845403 27.964735 2.665889
[5] -30.307576 -21.318465 5.234736 17.268727
[9] -12.198772 29.962596 -2.183295 -28.128092
[13] 13.712107 -24.390249 37.283390 66.253356
[17] -16.154466 -47.898288 9.614998 -13.145540
[21] 72.911540 -11.370779 20.077988 14.227511
[25] -34.650622 3.640985 -65.304380 60.192299
[29] 5.936666 69.098576 1.809915 -27.423536
[33] 14.686468 -35.679652 -35.765104 10.243725
[37] -15.645761 -2.901348 -10.061608 -30.471587
> m.lma$residuals
1 2 3 4 5
-3.941684 -13.845403 27.964735 2.665889 -30.307576
6 7 8 9 10
-21.318465 5.234736 17.268727 -12.198772 29.962596
11 12 13 14 15
-2.183295 -28.128092 13.712107 -24.390249 37.283390
16 17 18 19 20
66.253356 -16.154466 -47.898288 9.614998 -13.145540
21 22 23 24 25
72.911540 -11.370779 20.077988 14.227511 -34.650622
26 27 28 29 30
3.640985 -65.304380 60.192299 5.936666 69.098576
31 32 33 34 35
1.809915 -27.423536 14.686468 -35.679652 -35.765104
36 37 38 39 40
10.243725 -15.645761 -2.901348 -10.061608 -30.471587
>
> reg <- y.hat - y.m
> ss.res <- sum(res^2)
> ss.reg <- sum(reg^2)
> ss.tot <- ss(y)
> ss.tot
[1] 63501.52
> ss.res
[1] 38794.04
> ss.reg
[1] 24707.48
> ss.reg+ss.res
[1] 63501.52
>
> df.tot <- length(dat$y)-1
> df.reg <- 2 - 1
> df.res <- df.tot - df.reg
>
> ms.tot <- ss.tot/df.tot
> var(y)
[1] 1628.244
> ms.tot
[1] 1628.244
>
> ms.reg <- ss.reg / df.reg
> ms.res <- ss.res / df.res
> ms.reg
[1] 24707.48
> ms.res
[1] 1020.896
>
>
> f.cal <- ms.reg / ms.res
> p.val <- pf(f.cal, df.reg, df.res, lower.tail = F)
> f.cal
[1] 24.20177
> p.val
[1] 1.706524e-05
>
> r.sq <- ss.reg / ss.tot
> r.sq
[1] 0.3890849
>
> a.cal
[1] -69.43374
> b.cal
[1] 7.096781
>
> # residual standard error
> summary(m.lma)
Call:
lm(formula = y ~ x, data = dat)
Residuals:
Min 1Q Median 3Q Max
-65.304 -22.086 -2.542 14.342 72.912
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -69.434 58.453 -1.188 0.242
x 7.097 1.443 4.920 1.71e-05 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 31.95 on 38 degrees of freedom
Multiple R-squared: 0.3891, Adjusted R-squared: 0.373
F-statistic: 24.2 on 1 and 38 DF, p-value: 1.707e-05
> sigma(m.lma)
[1] 31.95146
> rse <- sqrt(ss.res / (n-2))
> rse
[1] 31.95146
>
> # standard error for b
> r.square <- summary(m.lma)$r.square
> (r.cal)^2
[1] 0.3890849
> r.square
[1] 0.3890849
> 1-r.square
[1] 0.6109151
> sqrt((1-r.square)/(n-2))
[1] 0.126794
> cor.se <- sqrt((ss.res/ss.tot)/(n-2)) # se for residual
> cor.se
[1] 0.126794
>
> t.r <- r.cal/cor.se
> p.r <- pt(t.r, n-2, lower.tail = F)*2
> cat(t.r, p.r)
4.919529 1.706524e-05> cor.test(x,y)
Pearson's product-moment correlation
data: x and y
t = 4.9195, df = 38, p-value = 1.707e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3875647 0.7831105
sample estimates:
cor
0.6237667
>
> # standard error for b1 (regression slope)
> res.se <- sqrt(ss.res/(n-2)) # residual se (se for regression)
> res.se
[1] 31.95146
> ss.x
[1] 490.5753
> bse <- res.se/sqrt(ss.x)
> bse
[1] 1.442573
> b.cal
[1] 7.096781
> t.b <- b.cal/bse
> p.b <- pt(t.b, n-2, lower.tail = F)*2
> cat(t.b, p.b)
4.919529 1.706524e-05>
> ##################
> ############################
> # Install and load MASS package if you haven't already
> # install.packages("MASS")
> library(MASS)
>
> # 1. Define inputs
> means <- c(40, 100, 3.0) # Desired means for Var1, Var2, Var3
> # allowance, iq, cgpa
> sds <- c(8, 15, .5) # Desired standard deviations
> corr_matrix <- matrix(c(1, 0.05, 0.4,
+ 0.05, 1, 0.5,
+ 0.4, 0.5, 1),
+ nrow = 3) # Target correlation matrix
>
> # 2. Create Covariance Matrix
> # Create diagonal matrix of SDs
> sd_diag <- diag(sds)
> sd_diag
[,1] [,2] [,3]
[1,] 8 0 0.0
[2,] 0 15 0.0
[3,] 0 0 0.5
>
> # Calculate covariance matrix
> cov_matrix <- sd_diag %*% corr_matrix %*% sd_diag
>
> # 3. Simulate data
> set.seed(123) # For reproducibility
> n_samples <- 100
> nb <- n_samples
> simulated_data <- mvrnorm(n = n_samples, mu = means, Sigma = cov_matrix)
>
> # Convert to data frame for easier use
> simulated_df <- as.data.frame(simulated_data)
> # colnames(simulated_df) <- c("all", "iq", "cgpa")
> colnames(simulated_df) <- c("V1", "V2", "V3")
>
> # Verify correlations (should be close to target)
> cor(simulated_df)
V1 V2 V3
V1 1.00000000 0.09388135 0.4325705
V2 0.09388135 1.00000000 0.5655917
V3 0.43257052 0.56559168 1.0000000
> head(simulated_df)
V1 V2 V3
1 34.62462 108.60573 3.869495
2 42.16696 103.36635 3.617166
3 37.16023 76.70009 2.455708
4 37.18199 99.04391 3.130241
5 32.33711 98.35040 2.631378
6 38.68389 74.29577 2.370633
> str(simulated_df)
'data.frame': 100 obs. of 3 variables:
$ V1: num 34.6 42.2 37.2 37.2 32.3 ...
$ V2: num 108.6 103.4 76.7 99 98.4 ...
$ V3: num 3.87 3.62 2.46 3.13 2.63 ...
>
> sdf <- simulated_df
> colnames(sdf) <- c("all", "iq", "cgpa")
>
> lm.3.21 <- lm(cgpa~all+iq, data=sdf)
> summary(lm.3.21)
Call:
lm(formula = cgpa ~ all + iq, data = sdf)
Residuals:
Min 1Q Median 3Q Max
-0.73106 -0.25793 -0.04869 0.24249 0.81092
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.11516 0.31708 0.363 0.717
all 0.02479 0.00483 5.133 1.47e-06 ***
iq 0.01946 0.00274 7.101 2.06e-10 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3712 on 97 degrees of freedom
Multiple R-squared: 0.4652, Adjusted R-squared: 0.4541
F-statistic: 42.18 on 2 and 97 DF, p-value: 6.582e-14
> attach(sdf)
The following objects are masked from sdf (pos = 6):
all, cgpa, iq
> sp.31 <- sp(cgpa, all)
> sp.32 <- sp(cgpa, iq)
> cov(sdf)
all iq cgpa
all 60.197853 9.962597 1.6863568
iq 9.962597 187.070586 3.8869397
cgpa 1.686357 3.886940 0.2524667
> cov.31 <- cov(cgpa, all)
> cov.32 <- cov(cgpa, iq)
> df.tot <- nb-1
> sp.31/df.tot
[1] 1.686357
> sp.32/df.tot
[1] 3.88694
>
> ss.1 <- ss(all)
> ss.2 <- ss(iq)
> ss.3 <- ss(cgpa)
> sp.12 <- sp(all, iq)
> sp.21 <- sp(iq, all)
>
> b1 <- sp.31/(sp.12)
> b1
[1] 0.1692688
> b2 <- sp.32/ss.2
> b2
[1] 0.02077793
> summary(lm.3.21)
Call:
lm(formula = cgpa ~ all + iq, data = sdf)
Residuals:
Min 1Q Median 3Q Max
-0.73106 -0.25793 -0.04869 0.24249 0.81092
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.11516 0.31708 0.363 0.717
all 0.02479 0.00483 5.133 1.47e-06 ***
iq 0.01946 0.00274 7.101 2.06e-10 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3712 on 97 degrees of freedom
Multiple R-squared: 0.4652, Adjusted R-squared: 0.4541
F-statistic: 42.18 on 2 and 97 DF, p-value: 6.582e-14
>
>
> ################
> # cov 를 구하는 방식에 따르면 그래프의 내용으로
> # cor 값이 왜 양수인지를 설명할 수 있다
> means <- c(40, 100, 3.0) # Desired means for Var1, Var2, Var3
> sds <- c(8, 15, .5) # Desired standard deviations
> corr_matrix <- matrix(c(1, 0.05, 0.9,
+ 0.05, 1, 0.01,
+ 0.9, 0.01, 1),
+ nrow = 3) # Target correlation matrix
>
> # 2. Create Covariance Matrix
> # Create diagonal matrix of SDs
> sd_diag <- diag(sds)
> sd_diag
[,1] [,2] [,3]
[1,] 8 0 0.0
[2,] 0 15 0.0
[3,] 0 0 0.5
>
> # Calculate covariance matrix
> cov_matrix <- sd_diag %*% corr_matrix %*% sd_diag
> cov_matrix
[,1] [,2] [,3]
[1,] 64.0 6.000 3.600
[2,] 6.0 225.000 0.075
[3,] 3.6 0.075 0.250
>
> # 3. Simulate data
> set.seed(123) # For reproducibility
> n_samples <- 100
> nb <- n_samples
> simulated_data <- mvrnorm(n = n_samples, mu = means, Sigma = cov_matrix)
> sdat <- data.frame(simulated_data)
> sdat
X1 X2 X3
1 34.61675 108.61744 3.163956
2 42.16256 103.37587 3.403127
3 37.16409 76.69732 2.809737
4 37.18044 99.04614 2.960093
5 32.33849 98.34414 2.479809
6 38.68873 74.29237 2.852685
7 33.48833 93.32099 2.469239
8 27.40274 119.46838 2.137528
9 37.32901 110.41433 3.195801
10 47.58356 106.41012 3.408479
11 34.72343 81.81363 2.749595
12 44.64794 94.42300 3.321618
13 26.84967 94.47137 2.532521
14 39.50108 98.35649 2.861675
15 44.46764 108.18116 3.026789
16 41.38501 73.11239 3.473628
17 40.57072 92.50215 2.945131
18 35.99392 129.68393 2.582215
19 32.84239 89.73427 2.339755
20 32.10668 107.39491 2.267057
21 41.54219 115.97905 2.943581
22 32.55288 103.55109 2.710066
23 36.64452 115.53330 3.033661
24 38.35466 111.00760 3.048028
25 55.06838 108.82495 3.760541
26 35.73830 125.48944 2.742768
27 41.41926 87.36544 2.941776
28 40.54528 97.67647 2.877667
29 32.94869 117.35515 2.774157
30 38.74316 81.21744 2.730479
31 51.26617 93.17492 4.068185
32 43.76910 104.29082 3.187936
33 39.82657 86.56354 3.052558
34 36.14692 86.95613 2.637517
35 23.16233 88.28957 1.939267
36 48.66014 89.33543 3.214822
37 28.03630 92.12748 2.294712
38 45.93459 100.70840 3.424908
39 55.40245 104.02051 3.934461
40 28.69906 106.13542 2.185648
41 45.99815 110.20952 3.155028
42 38.02978 103.19604 2.776037
43 28.14191 119.44543 2.633423
44 26.71491 67.92251 2.041198
45 26.54660 82.36068 2.222969
46 36.36706 117.00178 3.188458
47 28.56071 106.47705 2.325290
48 45.76711 106.79340 3.021906
49 56.33243 87.67770 3.791184
50 29.76951 101.63341 2.526536
51 46.14966 95.96646 3.269975
52 46.16013 100.19912 3.225144
53 42.67928 100.54399 3.075765
54 31.18730 79.77510 2.546294
55 39.15327 103.42185 3.295496
56 36.91655 77.34084 2.833382
57 45.34460 123.05946 3.509233
58 36.69359 91.34351 2.960676
59 47.72894 97.85179 3.413750
60 36.90880 96.87259 2.496307
61 48.19550 93.99313 3.356063
62 31.91356 107.84546 2.428309
63 30.12886 105.37214 2.447209
64 66.41797 114.31148 4.755703
65 37.24379 116.19833 3.323727
66 42.19160 95.35961 3.465238
67 44.83146 93.08871 3.251667
68 36.13105 99.34861 2.400962
69 43.61453 86.01480 3.135781
70 41.79859 69.14501 3.156951
71 38.54505 107.42833 3.092811
72 41.79883 134.61153 3.270027
73 39.15813 84.92720 3.118826
74 57.39914 110.00218 3.666512
75 34.45761 110.53892 2.859638
76 30.68617 84.94549 2.395306
77 40.45846 104.25955 3.058844
78 43.15856 118.21479 3.173014
79 43.37719 97.15119 3.286828
80 36.41929 102.21937 2.800805
81 31.53130 100.22956 2.159844
82 49.85667 93.84622 3.723130
83 37.41190 105.66295 2.931257
84 32.73620 90.59375 2.543617
85 38.23606 103.37699 2.922223
86 38.23953 95.08296 2.935583
87 48.24248 83.22042 3.532548
88 40.41318 93.44876 3.387432
89 46.20239 104.66356 3.296775
90 35.37183 82.91990 2.795476
91 41.14230 85.03686 3.335768
92 37.08973 91.87255 3.074384
93 40.60752 96.39187 3.287286
94 33.21227 109.68303 2.480376
95 28.75489 79.98481 2.824217
96 56.27310 108.40781 3.922468
97 43.54966 67.01818 3.644380
98 29.17490 77.38738 2.122459
99 35.25397 103.71669 2.732525
100 31.09722 115.74638 2.750965
> cor.test(sdat$X1, sdat$X2)
Pearson's product-moment correlation
data: sdat$X1 and sdat$X2
t = 0.93346, df = 98, p-value = 0.3529
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1044669 0.2850397
sample estimates:
cor
0.09387751
> plot(sdat$X1, sdat$X2)
> abline(v=mean(sdat$X1), col='blue')
> abline(h=mean(sdat$X2), col='red')
> text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X2)+sd(sdat$X2), labels="1 (-)", pos=1, col='blue')
> text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X2)+sd(sdat$X2), labels="2 (+)", pos=2, col='blue')
> text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X2)-sd(sdat$X2), labels="3 (-)", pos=3, col='blue')
> text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X2)-sd(sdat$X2), labels="4 (+)", pos=4, col='blue')
> cor(sdat$X1, sdat$X2)
[1] 0.09387751
>
> plot(sdat$X1, sdat$X3)
> abline(v=mean(sdat$X1), col='blue')
> abline(h=mean(sdat$X3), col='red')
> text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X3)+sd(sdat$X3), labels="1 (-)", pos=1, col='blue')
> text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X3)+sd(sdat$X3), labels="2 (+)", pos=2, col='blue')
> text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X3)-sd(sdat$X3), labels="3 (-)", pos=3, col='blue')
> text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X3)-sd(sdat$X3), labels="4 (+)", pos=4, col='blue')
> cor(sdat$X1, sdat$X3)
[1] 0.9079756
>
c/ms/2026/schedule/w10.lecture.note.1778025733.txt.gz · Last modified: by hkimscil
