User Tools

Site Tools


c:ms:2026:schedule:w10.lecture.note

This is an old revision of the document!


rs01

rm(list=ls())

rnorm2 <- function(n,mean,sd) { 
  mean + sd * scale(rnorm(n)) 
}
ss <- function(x) {
  sum((x-mean(x))^2)
}
sp <- function(x, y) {
  sum((x-mean(x))*(y-mean(y)))
}

set.seed(1)
n <- 40
x <- rnorm(n, 40, 4)
a <- 10
b <- 5
y <- a + rnorm(n, 1, 35) + b * x
dat <- data.frame(x, y)

# library(ggplot2)
# ggplot(datb, aes(x = xb, y = yb)) +
#  geom_point() +                          # Adds scatter points
#  geom_smooth(method = "lm", se = TRUE) 

cor.test(dat$x, dat$y)
m.lma <- lm(y ~ x, data=dat)
summary(m.lma)

plot(dat)
abline(m.lma)
text(x=mean(x)+2*sd(x), y=mean(y)+2*sd(y), pos=1, col='black', labels = paste("r =", round(cor(x, y),5)))
abline(v=mean(x), col='blue')
abline(h=mean(y), col='red')
text(x=mean(x), y=mean(y)-sd(y), pos=4, col='blue', labels = paste("mean(x)\n=", round(mean(x),5)))
text(x=mean(x)-sd(x), y=mean(y)+sd(y), pos=4, col='red', labels = paste("mean(y)\n=", round(mean(y),5)))

cov(dat)
var(dat)
cov(dat$x, dat$y)
cor(dat$x, dat$y)
cor.test(dat$x, dat$y)

sp.xy <- sp(x, y)
sp.xy
df.xy <- length(x)-1
sp.xy/df.xy
cov(dat)
ss.x <- ss(x)
ss.y <- ss(y)
r.cal <- sp(x,y)/sqrt(ss.x*ss.y)
r.cal

# commres.net
ss.x <- ss(x)
b.cal <- sp.xy / ss.x
a.cal <- mean(y) - b.cal * mean(x)
a.cal
b.cal
m.lma
summary(m.lma)

y.hat <- a.cal + b.cal * x
y.hat2 <- predict(m.lma, newdata=dat)
data.frame(y.hat) 
data.frame(y.hat2)

y.m <- mean(y)
y.obs <- y

y.obs
y.m

sum((y.obs-y.m)^2)
ss(y)

res <- y.obs - y.hat
res
m.lma$residuals

reg <- y.hat - y.m
ss.res <- sum(res^2)
ss.reg <- sum(reg^2)
ss.tot <- ss(y)
ss.tot
ss.res
ss.reg
ss.reg+ss.res

df.tot <- length(dat$y)-1
df.reg <- 2 - 1
df.res <- df.tot - df.reg

ms.tot <- ss.tot/df.tot
var(y)
ms.tot

ms.reg <- ss.reg / df.reg
ms.res <- ss.res / df.res
ms.reg
ms.res


f.cal <- ms.reg / ms.res
p.val <- pf(f.cal, df.reg, df.res, lower.tail = F)
f.cal
p.val

r.sq <- ss.reg / ss.tot
r.sq

a.cal
b.cal

# residual standard error 
summary(m.lma)
sigma(m.lma)
rse <- sqrt(ss.res / (n-2))
rse 

# standard error for b 
r.square <- summary(m.lma)$r.square
(r.cal)^2
r.square
1-r.square
sqrt((1-r.square)/(n-2))
cor.se <- sqrt((ss.res/ss.tot)/(n-2)) # se for residual
cor.se

t.r <- r.cal/cor.se
p.r <- pt(t.r, n-2, lower.tail = F)*2
cat(t.r, p.r)
cor.test(x,y)

# standard error for b1 (regression slope)
res.se <- sqrt(ss.res/(n-2)) # residual se (se for regression)
res.se
ss.x
bse <- res.se/sqrt(ss.x)
bse
b.cal
t.b <- b.cal/bse 
p.b <- pt(t.b, n-2, lower.tail = F)*2
cat(t.b, p.b)

##################
############################
# Install and load MASS package if you haven't already
# install.packages("MASS")
library(MASS)

# 1. Define inputs
means <- c(40, 100, 3.0) # Desired means for Var1, Var2, Var3
# allowance, iq, cgpa
sds <- c(8, 15, .5)    # Desired standard deviations
corr_matrix <- matrix(c(1, 0.05, 0.4,
                        0.05, 1, 0.5,
                        0.4, 0.5, 1),
                      nrow = 3) # Target correlation matrix

# 2. Create Covariance Matrix
# Create diagonal matrix of SDs
sd_diag <- diag(sds)
sd_diag

# Calculate covariance matrix
cov_matrix <- sd_diag %*% corr_matrix %*% sd_diag

# 3. Simulate data 
set.seed(123) # For reproducibility
n_samples <- 100
nb <- n_samples
simulated_data <- mvrnorm(n = n_samples, mu = means, Sigma = cov_matrix)

# Convert to data frame for easier use
simulated_df <- as.data.frame(simulated_data)
# colnames(simulated_df) <- c("all", "iq", "cgpa")
colnames(simulated_df) <- c("V1", "V2", "V3")

# Verify correlations (should be close to target)
cor(simulated_df)
head(simulated_df)
str(simulated_df)

sdf <- simulated_df 
colnames(sdf) <- c("all", "iq", "cgpa")

lm.3.21 <- lm(cgpa~all+iq, data=sdf)
summary(lm.3.21)
attach(sdf)  
sp.31 <- sp(cgpa, all)
sp.32 <- sp(cgpa, iq)
cov(sdf)
cov.31 <- cov(cgpa, all)
cov.32 <- cov(cgpa, iq)
df.tot <- nb-1
sp.31/df.tot
sp.32/df.tot

ss.1 <- ss(all)
ss.2 <- ss(iq)
ss.3 <- ss(cgpa)
sp.12 <- sp(all, iq)
sp.21 <- sp(iq, all)

b1 <- sp.31/(sp.12)
b1
b2 <- sp.32/ss.2
b2
summary(lm.3.21)


################
# cov 를 구하는 방식에 따르면 그래프의 내용으로 
# cor 값이 왜 양수인지를 설명할 수 있다
means <- c(40, 100, 3.0) # Desired means for Var1, Var2, Var3
sds <- c(8, 15, .5)    # Desired standard deviations
corr_matrix <- matrix(c(1, 0.05, 0.9,
                        0.05, 1, 0.01,
                        0.9, 0.01, 1),
                      nrow = 3) # Target correlation matrix

# 2. Create Covariance Matrix
# Create diagonal matrix of SDs
sd_diag <- diag(sds)
sd_diag

# Calculate covariance matrix
cov_matrix <- sd_diag %*% corr_matrix %*% sd_diag
cov_matrix

# 3. Simulate data 
set.seed(123) # For reproducibility
n_samples <- 100
nb <- n_samples
simulated_data <- mvrnorm(n = n_samples, mu = means, Sigma = cov_matrix)
sdat <- data.frame(simulated_data)
sdat
cor.test(sdat$X1, sdat$X2)
plot(sdat$X1, sdat$X2)
abline(v=mean(sdat$X1), col='blue')
abline(h=mean(sdat$X2), col='red')
text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X2)+sd(sdat$X2), labels="1 (-)", pos=1, col='blue')
text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X2)+sd(sdat$X2), labels="2 (+)", pos=2, col='blue')
text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X2)-sd(sdat$X2), labels="3 (-)", pos=3, col='blue')
text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X2)-sd(sdat$X2), labels="4 (+)", pos=4, col='blue')
cor(sdat$X1, sdat$X2)

plot(sdat$X1, sdat$X3)
abline(v=mean(sdat$X1), col='blue')
abline(h=mean(sdat$X3), col='red')
text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X3)+sd(sdat$X3), labels="1 (-)", pos=1, col='blue')
text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X3)+sd(sdat$X3), labels="2 (+)", pos=2, col='blue')
text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X3)-sd(sdat$X3), labels="3 (-)", pos=3, col='blue')
text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X3)-sd(sdat$X3), labels="4 (+)", pos=4, col='blue')
cor(sdat$X1, sdat$X3)

ro01

> rm(list=ls())
> 
> rnorm2 <- function(n,mean,sd) { 
+   mean + sd * scale(rnorm(n)) 
+ }
> ss <- function(x) {
+   sum((x-mean(x))^2)
+ }
> sp <- function(x, y) {
+   sum((x-mean(x))*(y-mean(y)))
+ }
> 
> set.seed(1)
> n <- 40
> x <- rnorm(n, 40, 4)
> a <- 10
> b <- 5
> y <- a + rnorm(n, 1, 35) + b * x
> dat <- data.frame(x, y)
> 
> # library(ggplot2)
> # ggplot(datb, aes(x = xb, y = yb)) +
> #  geom_point() +                          # Adds scatter points
> #  geom_smooth(method = "lm", se = TRUE) 
> 
> cor.test(dat$x, dat$y)

	Pearson's product-moment correlation

data:  dat$x and dat$y
t = 4.9195, df = 38, p-value = 1.707e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.3875647 0.7831105
sample estimates:
      cor 
0.6237667 

> m.lma <- lm(y ~ x, data=dat)
> summary(m.lma)

Call:
lm(formula = y ~ x, data = dat)

Residuals:
    Min      1Q  Median      3Q     Max 
-65.304 -22.086  -2.542  14.342  72.912 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -69.434     58.453  -1.188    0.242    
x              7.097      1.443   4.920 1.71e-05 ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 31.95 on 38 degrees of freedom
Multiple R-squared:  0.3891,	Adjusted R-squared:  0.373 
F-statistic:  24.2 on 1 and 38 DF,  p-value: 1.707e-05

> 
> plot(dat)
> abline(m.lma)
> text(x=mean(x)+2*sd(x), y=mean(y)+2*sd(y), pos=1, col='black', labels = paste("r =", round(cor(x, y),5)))
> abline(v=mean(x), col='blue')
> abline(h=mean(y), col='red')
> text(x=mean(x), y=mean(y)-sd(y), pos=4, col='blue', labels = paste("mean(x)\n=", round(mean(x),5)))
> text(x=mean(x)-sd(x), y=mean(y)+sd(y), pos=4, col='red', labels = paste("mean(y)\n=", round(mean(y),5)))
> 
> cov(dat)
         x          y
x 12.57885   89.26937
y 89.26937 1628.24418
> var(dat)
         x          y
x 12.57885   89.26937
y 89.26937 1628.24418
> cov(dat$x, dat$y)
[1] 89.26937
> cor(dat$x, dat$y)
[1] 0.6237667
> cor.test(dat$x, dat$y)

	Pearson's product-moment correlation

data:  dat$x and dat$y
t = 4.9195, df = 38, p-value = 1.707e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.3875647 0.7831105
sample estimates:
      cor 
0.6237667 

> 
> sp.xy <- sp(x, y)
> sp.xy
[1] 3481.506
> df.xy <- length(x)-1
> sp.xy/df.xy
[1] 89.26937
> cov(dat)
         x          y
x 12.57885   89.26937
y 89.26937 1628.24418
> ss.x <- ss(x)
> ss.y <- ss(y)
> r.cal <- sp(x,y)/sqrt(ss.x*ss.y)
> r.cal
[1] 0.6237667
> 
> # commres.net
> ss.x <- ss(x)
> b.cal <- sp.xy / ss.x
> a.cal <- mean(y) - b.cal * mean(x)
> a.cal
[1] -69.43374
> b.cal
[1] 7.096781
> m.lma

Call:
lm(formula = y ~ x, data = dat)

Coefficients:
(Intercept)            x  
    -69.434        7.097  

> summary(m.lma)

Call:
lm(formula = y ~ x, data = dat)

Residuals:
    Min      1Q  Median      3Q     Max 
-65.304 -22.086  -2.542  14.342  72.912 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -69.434     58.453  -1.188    0.242    
x              7.097      1.443   4.920 1.71e-05 ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 31.95 on 38 degrees of freedom
Multiple R-squared:  0.3891,	Adjusted R-squared:  0.373 
F-statistic:  24.2 on 1 and 38 DF,  p-value: 1.707e-05

> 
> y.hat <- a.cal + b.cal * x
> y.hat2 <- predict(m.lma, newdata=dat)
> data.frame(y.hat) 
      y.hat
1  196.6543
2  219.6506
3  190.7164
4  259.7229
5  223.7913
6  191.1468
7  228.2742
8  235.3964
9  230.7823
10 205.7684
11 257.3526
12 225.5040
13 196.8023
14 151.5685
15 246.3711
16 213.1620
17 213.9779
18 241.2303
19 237.7496
20 231.2967
21 240.5246
22 236.6401
23 216.5542
24 157.9655
25 232.0326
26 212.8442
27 210.0149
28 172.6871
29 200.8642
30 226.3017
31 253.0065
32 211.5197
33 225.4424
34 212.9101
35 175.3467
36 202.6570
37 203.2447
38 212.7538
39 245.6641
40 236.1019
> data.frame(y.hat2)
     y.hat2
1  196.6543
2  219.6506
3  190.7164
4  259.7229
5  223.7913
6  191.1468
7  228.2742
8  235.3964
9  230.7823
10 205.7684
11 257.3526
12 225.5040
13 196.8023
14 151.5685
15 246.3711
16 213.1620
17 213.9779
18 241.2303
19 237.7496
20 231.2967
21 240.5246
22 236.6401
23 216.5542
24 157.9655
25 232.0326
26 212.8442
27 210.0149
28 172.6871
29 200.8642
30 226.3017
31 253.0065
32 211.5197
33 225.4424
34 212.9101
35 175.3467
36 202.6570
37 203.2447
38 212.7538
39 245.6641
40 236.1019
> 
> y.m <- mean(y)
> y.obs <- y
> 
> y.obs
 [1] 192.7126 205.8052 218.6811 262.3888 193.4837
 [6] 169.8283 233.5089 252.6651 218.5835 235.7310
[11] 255.1693 197.3759 210.5144 127.1783 283.6544
[16] 279.4153 197.8234 193.3320 247.3646 218.1511
[21] 313.4362 225.2693 236.6322 172.1930 197.3820
[26] 216.4852 144.7105 232.8794 206.8009 295.4002
[31] 254.8164 184.0961 240.1289 177.2305 139.5816
[36] 212.9007 187.5990 209.8524 235.6025 205.6303
> y.m
[1] 217.0499
> 
> sum((y.obs-y.m)^2)
[1] 63501.52
> ss(y)
[1] 63501.52
> 
> res <- y.obs - y.hat
> res
 [1]  -3.941684 -13.845403  27.964735   2.665889
 [5] -30.307576 -21.318465   5.234736  17.268727
 [9] -12.198772  29.962596  -2.183295 -28.128092
[13]  13.712107 -24.390249  37.283390  66.253356
[17] -16.154466 -47.898288   9.614998 -13.145540
[21]  72.911540 -11.370779  20.077988  14.227511
[25] -34.650622   3.640985 -65.304380  60.192299
[29]   5.936666  69.098576   1.809915 -27.423536
[33]  14.686468 -35.679652 -35.765104  10.243725
[37] -15.645761  -2.901348 -10.061608 -30.471587
> m.lma$residuals
         1          2          3          4          5 
 -3.941684 -13.845403  27.964735   2.665889 -30.307576 
         6          7          8          9         10 
-21.318465   5.234736  17.268727 -12.198772  29.962596 
        11         12         13         14         15 
 -2.183295 -28.128092  13.712107 -24.390249  37.283390 
        16         17         18         19         20 
 66.253356 -16.154466 -47.898288   9.614998 -13.145540 
        21         22         23         24         25 
 72.911540 -11.370779  20.077988  14.227511 -34.650622 
        26         27         28         29         30 
  3.640985 -65.304380  60.192299   5.936666  69.098576 
        31         32         33         34         35 
  1.809915 -27.423536  14.686468 -35.679652 -35.765104 
        36         37         38         39         40 
 10.243725 -15.645761  -2.901348 -10.061608 -30.471587 
> 
> reg <- y.hat - y.m
> ss.res <- sum(res^2)
> ss.reg <- sum(reg^2)
> ss.tot <- ss(y)
> ss.tot
[1] 63501.52
> ss.res
[1] 38794.04
> ss.reg
[1] 24707.48
> ss.reg+ss.res
[1] 63501.52
> 
> df.tot <- length(dat$y)-1
> df.reg <- 2 - 1
> df.res <- df.tot - df.reg
> 
> ms.tot <- ss.tot/df.tot
> var(y)
[1] 1628.244
> ms.tot
[1] 1628.244
> 
> ms.reg <- ss.reg / df.reg
> ms.res <- ss.res / df.res
> ms.reg
[1] 24707.48
> ms.res
[1] 1020.896
> 
> 
> f.cal <- ms.reg / ms.res
> p.val <- pf(f.cal, df.reg, df.res, lower.tail = F)
> f.cal
[1] 24.20177
> p.val
[1] 1.706524e-05
> 
> r.sq <- ss.reg / ss.tot
> r.sq
[1] 0.3890849
> 
> a.cal
[1] -69.43374
> b.cal
[1] 7.096781
> 
> # residual standard error 
> summary(m.lma)

Call:
lm(formula = y ~ x, data = dat)

Residuals:
    Min      1Q  Median      3Q     Max 
-65.304 -22.086  -2.542  14.342  72.912 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -69.434     58.453  -1.188    0.242    
x              7.097      1.443   4.920 1.71e-05 ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 31.95 on 38 degrees of freedom
Multiple R-squared:  0.3891,	Adjusted R-squared:  0.373 
F-statistic:  24.2 on 1 and 38 DF,  p-value: 1.707e-05

> sigma(m.lma)
[1] 31.95146
> rse <- sqrt(ss.res / (n-2))
> rse 
[1] 31.95146
> 
> # standard error for b 
> r.square <- summary(m.lma)$r.square
> (r.cal)^2
[1] 0.3890849
> r.square
[1] 0.3890849
> 1-r.square
[1] 0.6109151
> sqrt((1-r.square)/(n-2))
[1] 0.126794
> cor.se <- sqrt((ss.res/ss.tot)/(n-2)) # se for residual
> cor.se
[1] 0.126794
> 
> t.r <- r.cal/cor.se
> p.r <- pt(t.r, n-2, lower.tail = F)*2
> cat(t.r, p.r)
4.919529 1.706524e-05> cor.test(x,y)

	Pearson's product-moment correlation

data:  x and y
t = 4.9195, df = 38, p-value = 1.707e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.3875647 0.7831105
sample estimates:
      cor 
0.6237667 

> 
> # standard error for b1 (regression slope)
> res.se <- sqrt(ss.res/(n-2)) # residual se (se for regression)
> res.se
[1] 31.95146
> ss.x
[1] 490.5753
> bse <- res.se/sqrt(ss.x)
> bse
[1] 1.442573
> b.cal
[1] 7.096781
> t.b <- b.cal/bse 
> p.b <- pt(t.b, n-2, lower.tail = F)*2
> cat(t.b, p.b)
4.919529 1.706524e-05> 
> ##################
> ############################
> # Install and load MASS package if you haven't already
> # install.packages("MASS")
> library(MASS)
> 
> # 1. Define inputs
> means <- c(40, 100, 3.0) # Desired means for Var1, Var2, Var3
> # allowance, iq, cgpa
> sds <- c(8, 15, .5)    # Desired standard deviations
> corr_matrix <- matrix(c(1, 0.05, 0.4,
+                         0.05, 1, 0.5,
+                         0.4, 0.5, 1),
+                       nrow = 3) # Target correlation matrix
> 
> # 2. Create Covariance Matrix
> # Create diagonal matrix of SDs
> sd_diag <- diag(sds)
> sd_diag
     [,1] [,2] [,3]
[1,]    8    0  0.0
[2,]    0   15  0.0
[3,]    0    0  0.5
> 
> # Calculate covariance matrix
> cov_matrix <- sd_diag %*% corr_matrix %*% sd_diag
> 
> # 3. Simulate data 
> set.seed(123) # For reproducibility
> n_samples <- 100
> nb <- n_samples
> simulated_data <- mvrnorm(n = n_samples, mu = means, Sigma = cov_matrix)
> 
> # Convert to data frame for easier use
> simulated_df <- as.data.frame(simulated_data)
> # colnames(simulated_df) <- c("all", "iq", "cgpa")
> colnames(simulated_df) <- c("V1", "V2", "V3")
> 
> # Verify correlations (should be close to target)
> cor(simulated_df)
           V1         V2        V3
V1 1.00000000 0.09388135 0.4325705
V2 0.09388135 1.00000000 0.5655917
V3 0.43257052 0.56559168 1.0000000
> head(simulated_df)
        V1        V2       V3
1 34.62462 108.60573 3.869495
2 42.16696 103.36635 3.617166
3 37.16023  76.70009 2.455708
4 37.18199  99.04391 3.130241
5 32.33711  98.35040 2.631378
6 38.68389  74.29577 2.370633
> str(simulated_df)
'data.frame':	100 obs. of  3 variables:
 $ V1: num  34.6 42.2 37.2 37.2 32.3 ...
 $ V2: num  108.6 103.4 76.7 99 98.4 ...
 $ V3: num  3.87 3.62 2.46 3.13 2.63 ...
> 
> sdf <- simulated_df 
> colnames(sdf) <- c("all", "iq", "cgpa")
> 
> lm.3.21 <- lm(cgpa~all+iq, data=sdf)
> summary(lm.3.21)

Call:
lm(formula = cgpa ~ all + iq, data = sdf)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73106 -0.25793 -0.04869  0.24249  0.81092 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.11516    0.31708   0.363    0.717    
all          0.02479    0.00483   5.133 1.47e-06 ***
iq           0.01946    0.00274   7.101 2.06e-10 ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3712 on 97 degrees of freedom
Multiple R-squared:  0.4652,	Adjusted R-squared:  0.4541 
F-statistic: 42.18 on 2 and 97 DF,  p-value: 6.582e-14

> attach(sdf)  
The following objects are masked from sdf (pos = 6):

    all, cgpa, iq
> sp.31 <- sp(cgpa, all)
> sp.32 <- sp(cgpa, iq)
> cov(sdf)
           all         iq      cgpa
all  60.197853   9.962597 1.6863568
iq    9.962597 187.070586 3.8869397
cgpa  1.686357   3.886940 0.2524667
> cov.31 <- cov(cgpa, all)
> cov.32 <- cov(cgpa, iq)
> df.tot <- nb-1
> sp.31/df.tot
[1] 1.686357
> sp.32/df.tot
[1] 3.88694
> 
> ss.1 <- ss(all)
> ss.2 <- ss(iq)
> ss.3 <- ss(cgpa)
> sp.12 <- sp(all, iq)
> sp.21 <- sp(iq, all)
> 
> b1 <- sp.31/(sp.12)
> b1
[1] 0.1692688
> b2 <- sp.32/ss.2
> b2
[1] 0.02077793
> summary(lm.3.21)

Call:
lm(formula = cgpa ~ all + iq, data = sdf)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73106 -0.25793 -0.04869  0.24249  0.81092 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.11516    0.31708   0.363    0.717    
all          0.02479    0.00483   5.133 1.47e-06 ***
iq           0.01946    0.00274   7.101 2.06e-10 ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3712 on 97 degrees of freedom
Multiple R-squared:  0.4652,	Adjusted R-squared:  0.4541 
F-statistic: 42.18 on 2 and 97 DF,  p-value: 6.582e-14

> 
> 
> ################
> # cov 를 구하는 방식에 따르면 그래프의 내용으로 
> # cor 값이 왜 양수인지를 설명할 수 있다
> means <- c(40, 100, 3.0) # Desired means for Var1, Var2, Var3
> sds <- c(8, 15, .5)    # Desired standard deviations
> corr_matrix <- matrix(c(1, 0.05, 0.9,
+                         0.05, 1, 0.01,
+                         0.9, 0.01, 1),
+                       nrow = 3) # Target correlation matrix
> 
> # 2. Create Covariance Matrix
> # Create diagonal matrix of SDs
> sd_diag <- diag(sds)
> sd_diag
     [,1] [,2] [,3]
[1,]    8    0  0.0
[2,]    0   15  0.0
[3,]    0    0  0.5
> 
> # Calculate covariance matrix
> cov_matrix <- sd_diag %*% corr_matrix %*% sd_diag
> cov_matrix
     [,1]    [,2]  [,3]
[1,] 64.0   6.000 3.600
[2,]  6.0 225.000 0.075
[3,]  3.6   0.075 0.250
> 
> # 3. Simulate data 
> set.seed(123) # For reproducibility
> n_samples <- 100
> nb <- n_samples
> simulated_data <- mvrnorm(n = n_samples, mu = means, Sigma = cov_matrix)
> sdat <- data.frame(simulated_data)
> sdat
          X1        X2       X3
1   34.61675 108.61744 3.163956
2   42.16256 103.37587 3.403127
3   37.16409  76.69732 2.809737
4   37.18044  99.04614 2.960093
5   32.33849  98.34414 2.479809
6   38.68873  74.29237 2.852685
7   33.48833  93.32099 2.469239
8   27.40274 119.46838 2.137528
9   37.32901 110.41433 3.195801
10  47.58356 106.41012 3.408479
11  34.72343  81.81363 2.749595
12  44.64794  94.42300 3.321618
13  26.84967  94.47137 2.532521
14  39.50108  98.35649 2.861675
15  44.46764 108.18116 3.026789
16  41.38501  73.11239 3.473628
17  40.57072  92.50215 2.945131
18  35.99392 129.68393 2.582215
19  32.84239  89.73427 2.339755
20  32.10668 107.39491 2.267057
21  41.54219 115.97905 2.943581
22  32.55288 103.55109 2.710066
23  36.64452 115.53330 3.033661
24  38.35466 111.00760 3.048028
25  55.06838 108.82495 3.760541
26  35.73830 125.48944 2.742768
27  41.41926  87.36544 2.941776
28  40.54528  97.67647 2.877667
29  32.94869 117.35515 2.774157
30  38.74316  81.21744 2.730479
31  51.26617  93.17492 4.068185
32  43.76910 104.29082 3.187936
33  39.82657  86.56354 3.052558
34  36.14692  86.95613 2.637517
35  23.16233  88.28957 1.939267
36  48.66014  89.33543 3.214822
37  28.03630  92.12748 2.294712
38  45.93459 100.70840 3.424908
39  55.40245 104.02051 3.934461
40  28.69906 106.13542 2.185648
41  45.99815 110.20952 3.155028
42  38.02978 103.19604 2.776037
43  28.14191 119.44543 2.633423
44  26.71491  67.92251 2.041198
45  26.54660  82.36068 2.222969
46  36.36706 117.00178 3.188458
47  28.56071 106.47705 2.325290
48  45.76711 106.79340 3.021906
49  56.33243  87.67770 3.791184
50  29.76951 101.63341 2.526536
51  46.14966  95.96646 3.269975
52  46.16013 100.19912 3.225144
53  42.67928 100.54399 3.075765
54  31.18730  79.77510 2.546294
55  39.15327 103.42185 3.295496
56  36.91655  77.34084 2.833382
57  45.34460 123.05946 3.509233
58  36.69359  91.34351 2.960676
59  47.72894  97.85179 3.413750
60  36.90880  96.87259 2.496307
61  48.19550  93.99313 3.356063
62  31.91356 107.84546 2.428309
63  30.12886 105.37214 2.447209
64  66.41797 114.31148 4.755703
65  37.24379 116.19833 3.323727
66  42.19160  95.35961 3.465238
67  44.83146  93.08871 3.251667
68  36.13105  99.34861 2.400962
69  43.61453  86.01480 3.135781
70  41.79859  69.14501 3.156951
71  38.54505 107.42833 3.092811
72  41.79883 134.61153 3.270027
73  39.15813  84.92720 3.118826
74  57.39914 110.00218 3.666512
75  34.45761 110.53892 2.859638
76  30.68617  84.94549 2.395306
77  40.45846 104.25955 3.058844
78  43.15856 118.21479 3.173014
79  43.37719  97.15119 3.286828
80  36.41929 102.21937 2.800805
81  31.53130 100.22956 2.159844
82  49.85667  93.84622 3.723130
83  37.41190 105.66295 2.931257
84  32.73620  90.59375 2.543617
85  38.23606 103.37699 2.922223
86  38.23953  95.08296 2.935583
87  48.24248  83.22042 3.532548
88  40.41318  93.44876 3.387432
89  46.20239 104.66356 3.296775
90  35.37183  82.91990 2.795476
91  41.14230  85.03686 3.335768
92  37.08973  91.87255 3.074384
93  40.60752  96.39187 3.287286
94  33.21227 109.68303 2.480376
95  28.75489  79.98481 2.824217
96  56.27310 108.40781 3.922468
97  43.54966  67.01818 3.644380
98  29.17490  77.38738 2.122459
99  35.25397 103.71669 2.732525
100 31.09722 115.74638 2.750965
> cor.test(sdat$X1, sdat$X2)

	Pearson's product-moment correlation

data:  sdat$X1 and sdat$X2
t = 0.93346, df = 98, p-value = 0.3529
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.1044669  0.2850397
sample estimates:
       cor 
0.09387751 

> plot(sdat$X1, sdat$X2)
> abline(v=mean(sdat$X1), col='blue')
> abline(h=mean(sdat$X2), col='red')
> text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X2)+sd(sdat$X2), labels="1 (-)", pos=1, col='blue')
> text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X2)+sd(sdat$X2), labels="2 (+)", pos=2, col='blue')
> text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X2)-sd(sdat$X2), labels="3 (-)", pos=3, col='blue')
> text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X2)-sd(sdat$X2), labels="4 (+)", pos=4, col='blue')
> cor(sdat$X1, sdat$X2)
[1] 0.09387751
> 
> plot(sdat$X1, sdat$X3)
> abline(v=mean(sdat$X1), col='blue')
> abline(h=mean(sdat$X3), col='red')
> text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X3)+sd(sdat$X3), labels="1 (-)", pos=1, col='blue')
> text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X3)+sd(sdat$X3), labels="2 (+)", pos=2, col='blue')
> text(x=mean(sdat$X1)+sd(sdat$X1), y=mean(sdat$X3)-sd(sdat$X3), labels="3 (-)", pos=3, col='blue')
> text(x=mean(sdat$X1)-sd(sdat$X1), y=mean(sdat$X3)-sd(sdat$X3), labels="4 (+)", pos=4, col='blue')
> cor(sdat$X1, sdat$X3)
[1] 0.9079756
> 
c/ms/2026/schedule/w10.lecture.note.1778025733.txt.gz · Last modified: by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki