Differences

This shows you the differences between two versions of the page.

--- interaction_effects_in_regression_analysis [2017/11/30 12:34] – [Ex. 2] hkimscil
+++ interaction_effects_in_regression_analysis [2025/06/16 13:00] (current) – [E.g.2] hkimscil
@@ Line 6: / Line 6: @@
 ====== E.g. 1 One category and one continuous ======
 Data 만들기
+<code>
+x<-runif(50,0,10)
+f1<-gl(n=2,k=25,labels=c("Low","High"))
+modmat<-model.matrix(~x*f1,data.frame(f1=f1,x=x))
+coeff<-c(1,3,-2,1.5)
+y<-rnorm(n=50,mean=modmat%*%coeff,sd=0.5)
+dat<-data.frame(y=y,f1=f1,x=x)
+dat
+mod <- lm(y~x*f1)
+summary(mod)
+library(ggplot2)
+library(jtools) # in case not loading, install.packages("jtools")
+library(interactions)
+interact_plot(mod, pred = "x", modx = "f1", plot.points = TRUE)
+</code>
 <code>> x<-runif(50,0,10)
 > f1<-gl(n=2,k=25,labels=c("Low","High"))
@@ Line 15: / Line 36: @@
 작물의 무게가 온도와 토양의 질소에 영향을 받을까? 라는 연구문제에서 추출된 데이터. 작물무게에 대한 질소함유량의 영향과 온도의 영향, 그리고 그 두 변인의 상호작용효과에 대해서 알고 싶다.
-<code>> dat
+<code>> head(dat)
             y   f1           x
   21.8693480  Low 7.128280776
@@ Line 23: / Line 44: @@
    7.1786120  Low 1.734487077
   20.9093423  Low 6.526126855
-  25.8828775  Low 8.139958519
+</code>
-   5.3130186  Low 1.629746817
-   6.7978354  Low 1.820823189
-20.2008030  Low 6.450495571
-  5.6307366  Low 1.438263836
-24.5176666  Low 7.829027227
-  2.9836344  Low 0.602117125
-24.9027322  Low 8.016709497
-  7.1347831  Low 2.223906002
-21.0629404  Low 6.658385394
-  4.9330174  Low 1.169058718
-23.3587524  Low 7.323669230
-11.5338189  Low 3.576785533
-28.2193423  Low 9.245026903
-  5.9288641  Low 1.655559973
-  4.4854811  Low 1.199908606
-18.5213789  Low 5.978340823
-  3.5410098  Low 0.717360801
-14.1031612  Low 4.403464922
-  4.2389757 High 1.241753022
-23.2650544 High 5.428188895
-24.0330453 High 5.586834035
-16.6518724 High 3.913568701
-  4.2317570 High 1.012214390
-26.1115118 High 5.969692939
-13.1004694 High 3.023571330
-39.8678989 High 9.080975151
-16.2227452 High 3.806260177
-24.8683087 High 5.562874263
-43.4090915 High 9.818060326
--0.2444546 High 0.007114746
-11.1822149 High 2.640837491
-43.5794651 High 9.933570819
-25.3860623 High 5.960662568
-26.5072704 High 6.139642103
-30.6778013 High 6.968976315
-27.4982726 High 6.315991175
-16.3318687 High 3.883612270
-37.8328875 High 8.564988615
-12.6958950 High 2.722899497
-32.9697332 High 7.371928061
-39.6831930 High 9.026269785
-41.7586348 High 9.542581048
-15.9704872 High 3.644464843</code>
 <code>> mod <- lm(y~x*f1)
@@ Line 86: / Line 64: @@
 Residual standard error: 0.4842 on 46 degrees of freedom
 Multiple R-squared:  0.9983,    Adjusted R-squared:  0.9981
-F-statistic:  8792 on 3 and 46 DF,  p-value: < 2.2e-16</code>
+F-statistic:  8792 on 3 and 46 DF,  p-value: < 2.2e-16
+</code>
+<code>
+regression formula:
+y hat ~ 1.35817 + 2.95059*x + -2.63301*f1High + 1.59598*x:f1High
+x: 질소
+f1: High | Low
+</code>
+  * when f1High = 0,
+    * ''y hat ~ 1.35817 + 2.95059*x''
+    * x변인(질소)이 0인 상태일 때의 작물의 무게가 1.3g정도 된다는 것을 말한다.
+    * 다음의 x는 x변인(질소)의 단위가 1씩 증가할 때마다 작물의 무게는 2.9씩 증가한다는 것을 말한다. (일반적인 regression line 해석)
+  * when f1High = 1,
+    * ''y hat ~ 1.35817 + 2.95059*x + -2.63301*(1) + 1.59598*x:(1)''
+    * ''y hat ~ 1.35817 + -2.63301*(1) + 2.95059*x + 1.59598*x:(1)''
+    * ''y hat ~ -1.27493 + 4.54657x''
+    * x:f1High는 온도가 High일경우에 x의 영향력이 1.59 더 많다는 것을 말한다 (1.59598*x:(1)).
+    * 즉, x의 기울기(slope)는 온도에 따라서 변하는데, 온도가 높은 상태일 경우에 2.90 + 1.59 = 4.54임을 말한다.
+    * 또한 온도가 높을 상태일 때의 절편은 -1.27493 가 된다.
-coefficient 해석
-  - 우선 f1High 라는 변인은 High 상태(온도가 높은 상태)를 의미. 따라서 default 값(제어된 값)은 온도가 낮은 상태 (low).
-  - 절편 = 온도가 Low인 상태(제어상태)이며 x변인(질소)이 0인 상태일 때의 작물의 무게가 1.3g정도 된다는 것을 말한다.
-  - 다음의 x는 x변인(질소)의 단위가 1씩 증가할 때마다 작물의 무게는 2.9씩 증가한다는 것을 말한다. (일반적인 regression line 해석)
-  - 다음 x:f1High는 온도가 High일경우에 x의 영향력이 1.59 더 많다는 것을 말한다. 즉, x의 기울기(slope)는 온도에 따라서 변하는데, 온도가 높은 상태일 경우에 2.90 + 1.59 = 4.54임을 말한다.
 <code>library(ggplot2)
 library(jtools) # in case not loading, install.packages("jtools")
@@ Line 105: / Line 96: @@
 ====== Two category variables ======
-<code>> set.seed(12)
+<code>
+> set.seed(12)
 > f1<-gl(n=2,k=30,labels=c("Low","High"))
 > f2<-as.factor(rep(c("A","B","C"),times=20))
@@ Line 201: / Line 193: @@
 </code>
-  - 온도: Low, High
+  - 온도: f1High, f1Low
   - 질소: A, B, C (Low, Medium, High)
   - 각각 Low가 control인 상태 (아웃풋에 High와 Medium, High)에 대한 정보가 출력)
 이 때 coefficients를 이용해서 모델을 해석해보면
+''y hat ~ 0.97849 + 3.00306*f1High + -1.97878*f2B + -4.00206*f2C + 0.98924*f1High:f2B + -1.16620*f1High:f2C''
+|        | f2A  |  f2B | f2C |
+| f1Low | <wrap>f1High = 0,
+f2B = 0,
+f2C = 0
+''y hat ~ 0.97849 + 3.00306*f1High + -1.97878*f2B + -4.00206*f2C + 0.98924*f1High:f2B + -1.16620*f1High:f2C''
+''y hat ~ 0.97849 + 3.00306*(0) + -1.97878*(0) + -4.00206*(0) + 0.98924*(0):(0) + -1.16620*(0):(0)''
+''y hat ~ 0.97849 + ''
+</wrap>     | <wrap>f1High = 0,
+f2A = 0,
+f2C = 0
+''y hat ~ 0.97849 + 3.00306*f1High + -1.97878*f2B + -4.00206*f2C + 0.98924*f1High:f2B + -1.16620*f1High:f2C''
+''y hat ~ 0.97849 + 3.00306*(0) + -1.97878*(1) + -4.00206*(0) + 0.98924*(0):(1) + -1.16620*(0):(0)''
+''y hat ~ 0.97849 + -1.97878*(1)''
+</wrap>    | <wrap>f1High = 0,
+f2A = 0,
+f2B = 0
+''y hat ~ 0.97849 + 3.00306*f1High + -1.97878*f2B + -4.00206*f2C + 0.98924*f1High:f2B + -1.16620*f1High:f2C''
+''y hat ~ 0.97849 + 3.00306*(0) + -1.97878*(0) + -4.00206*(1) + 0.98924*(0):(0) + -1.16620*(0):(1)''
+''y hat ~ 0.97849 + -4.00206*(1)''
+</wrap>  |
+| f1High  | <wrap>f1Low = 0,
+f1High = 1,
+f2B = 0,
+f2C = 0
+''y hat ~ 0.97849 + 3.00306*f1High + -1.97878*f2B + -4.00206*f2C + 0.98924*f1High:f2B + -1.16620*f1High:f2C''
+''y hat ~ 0.97849 + 3.00306*(1) + -1.97878*(0) + -4.00206*(0) + 0.98924*(1):(0) + -1.16620*(1):(1)''
+''y hat ~ 0.97849 + 3.00306*(1) + -1.16620*(1):(1)''
+''y hat ~ 2.81535''
+</wrap>  | <wrap>f1Low = 0,
+f1High = 1,
+f2A = 0,
+f2B = 1,
+f2C = 0
+''y hat ~ 0.97849 + 3.00306*f1High + -1.97878*f2B + -4.00206*f2C + 0.98924*f1High:f2B + -1.16620*f1High:f2C''
+''y hat ~ 0.97849 + 3.00306*(1) + -1.97878*(1) + -4.00206*(0) + 0.98924*(1):(1) + -1.16620*(1):(0)''
+''y hat ~ 0.97849 + 3.00306*(1) + 0.98924*(1):(1)''
+''y hat ~ 4.97079''
+</wrap> | <wrap>f1Low = 0,
+f1High = 1,
+f2A = 0,
+f2B = 0,
+f2C = 1
+''y hat ~ 0.97849 + 3.00306*f1High + -1.97878*f2B + -4.00206*f2C + 0.98924*f1High:f2B + -1.16620*f1High:f2C''
+''y hat ~ 0.97849 + 3.00306*(1) + -1.97878*(0) + -4.00206*(1) + 0.98924*(1):(0) + -1.16620*(1):(1)''
+''y hat ~ 0.97849 + 3.00306*(1) + -4.00206*(1) + -1.16620*(1):(1)''
+''y hat ~ -1.18671''
+</wrap>  |
+|        | f2A  | f2B  | f2C  |
+| f1Low  | <wrap>
+''y hat ~ 0.97849 ''
+</wrap>     | <wrap>
+''y hat ~ -1.00029''
+</wrap>    | <wrap>
+''y hat -3.02357''
+</wrap>  |
+| f1High  | <wrap>
+''y hat ~ 2.81535''
+</wrap>  | <wrap>
+''y hat ~ 4.97079''
+</wrap> | <wrap>
+''y hat ~ -1.18671''
+</wrap>  |
   - 우선 f1High, f2B, f2C 가 나타나는 것은 f1Low, f2A가 default임을 의미 (즉, 온도가 낮고 질소함유량이 낮은 상태).
@@ Line 215: / Line 273: @@
   - f1High:f2C : 질소가 High이고 온도도 High인 상태 -1.16 감소한다.
-<code>interact_plot(mod2, pred = "f1", modx = "f2")</code>
+<code>
-{{:r:interaction.effects.2.jpeg}}
+> interact_plot(mod2, pred = "f1", modx = "f2")
+</code>
+{{:pasted:20250616-072703.png?400}}
+<code>
+> interact_plot(mod2, pred = "f2", modx = "f1")
+</code>
+{{:pasted:20250616-072946.png?400}}
+:r:interaction.effects.2.jpeg
 ====== Two continuous variables ======
 <code># third case interaction between two continuous variables
@@ Line 301: / Line 366: @@
 Residual standard error: 0.3264 on 46 degrees of freedom
 Multiple R-squared:      1,    Adjusted R-squared:      1
-F-statistic: 4.661e+05 on 3 and 46 DF,  p-value: < 2.2e-16</code>
+F-statistic: 4.661e+05 on 3 and 46 DF,  p-value: < 2.2e-16
+</code>
+<code>
+y hat ~  0.965921 + 1.995115*x1 + -0.993288*x2 + 1.499595*x1*x2
+y hat ~  0.965921 + 1.995115*x1 + (-0.993288 + 1.499595*x1)*x2
+y hat ~  0.965921 + (1.995115 + 1.499595*x2)*x1 + -0.993288*x2
+</code>
   - (Intercept): at 0°C and with nitrogen concentration of 0 mg/g일때의 작물의 바이오매스량이 0.96 mg/g
-  - x1: nitrogen concentration of 0mg/g인 상태에서 1°C 증가할 때 마다 작물의 바이오매스량이 2 mg/g 증가
+  - x1:
-  - x2: temperature of 0°C 상태에서 nitrogen concentration 이 1 mg/g 증가할 때 마다 작물의 바이오매스량이 ~1 mg/g 정도씩 감소
+    - nitrogen concentration of 0 mg/g인 상태에서
-  - x1:x2 : 질소량이 1씩 증가할 때 마다, 온도의 영향력은 1.5식 증가한다. 예를 들면 질소량이 0일 경우, 온도와 작물 간의 기울기는 약 2인데, 질소의 양이 1 증가하고 온도가 1 증가하면 기울기는 2 + 1.5 = 3.5가 된다.
+    - 온도 x1이 1°C 증가할 때 마다
+    - 작물의 바이오매스량이 약 2 mg/g 증가
+  - x2:
+    - temperature of 0°C 상태에서
+    - x2인 nitrogen concentration 이 1 mg/g 증가할 때 마다
+    - 작물의 바이오매스량이 ~1 mg/g 정도씩 감소
+  - (위의 마지막 식에서) x1:x2 = x1*x2 : 질소량이 1씩 증가할 때 마다, 온도의 영향력은 1.5식 증가한다. 예를 들면 질소량이 0일 경우, 온도와 작물 간의 기울기는 약 2인데, 질소의 양이 1 증가하고 온도가 1 증가하면 기울기는 2 + 1.5 = 3.5가 된다.
+  - <WRAP box><code>
+# 0.97 = 1 로 보면
+x2=1: 0.97 + *3.5 x1 + -1 (1=x2)
++ 3.5 x1
+x2=2: 0.97 + *5.0 x1 + -1 (2=x2)
+      -1 + 5.0 x1
+x2=3: 0.97 + *6.5 x1 + -1 (3=x2)
+      -2 + 6.5 x1
+x2=4: 0.97 + *8.0 x1 + -1 (4=x2)
+      -3 + 8.0 x1
+x2=5: 0.97 + *9.5 x1 + -1 (5=x2)
+      -4 + 9.5 x1
+</code>
+<code>
+*(1.995115 + 1.499595*x2):
+x2=1: 3.495 ~ *3.5
+x2=2: 4.994 ~ *5.0
+x2=3: 6.494 ~ *6.5
+x2=4: 7.994 ~ *8.0
+x2=5: 9.493 ~ *9.5
+</code>
+</WRAP>
 {{https://biologyforfun.files.wordpress.com/2014/04/interaction31.png}}
@@ Line 318: / Line 420: @@
 ====== E.g.2  ======
 {{:r:states.rds}}
-Download the data file to c:/Rstatistics first. Then
+<code>
-do
+# states.data <- readRDS("c:/Rstatistics/dataSets/states.rds")
-<code>states.data <- readRDS("c:/Rstatistics/dataSets/states.rds") </code>
+states.data <- readRDS(url("http://commres.net/wiki/_media/r/states.rds"))
+</code>
 Or, read the above data file directly
 <code>
 z <- gzcon(url("http://commres.net/wiki/_media/r/states.rds"))
-data <- readRDS(z)
+states.data <- readRDS(z)
 head(states.data,5)
@@ Line 368: / Line 471: @@
 </code>
 <code>
-> data.info <- data.frame(attributes(data)[c("names", "var.labels")])
+> data.info <- data.frame(attributes(states.data)[c("names", "var.labels")])
 > # attributes(data) reveals various attributes of the data file,
 > # which contains variable names and labels.
@@ Line 494: / Line 597: @@
 >
 </code>
+<code>
+y hat = 1048 + -0.004 expense + -3.809 percent + 0.00025 expense:percent
+y hat = 1048 + -0.004 x1 + -3.809 x2 + 0.00025 x1x2
+y hat = 1048 + (-0.004 + 0.00025 x2) x1 + -3.809 x2
+</code>
+<code>
+> mm1 <- mean(states.data$percent) - sd(states.data$percent)
+> mp1 <- mean(states.data$percent) + sd(states.data$percent)
+> mp2 <- mean(states.data$percept) + (2*sd(states.data$percent)
+> mo0 <- mean(states.data$percent)
+> mm1
+[1] 9.571891
+> mp1
+[1] 61.95752
+> mp2
+[1] 88.15033
+> mo0
+[1] 35.76471
+</code>
+<code>
+> k <- c(mm1, mo0, mp1, mp2)
+> k
+[1]  9.571891 35.764706 61.957520 88.150335
+> ic <- 1048 + (-3.809 * k)
+> ic
+[1] 1011.5407  911.7722  812.0038  712.2354
+> x1.slope <- -0.004 + 0.00025 * k
+> x1.slope
+[1] -0.001607027  0.004941176  0.011489380  0.018037584
+>
+</code>
+<code>
+y hat when mm1 ~ 1011.5407 + -0.001607027 x1
+y hat when mo0 ~ 911.7722  +  0.004941176 x1
+y hat when mp1 ~ 812.0038  +  0.011489380 x1
+y hat when mp2 ~ 712.2354  +  0.018037584 x1
+</code>
+  * 위의 y hat은 (회귀식) 각각 percent 가 mean-1sd, mean, mean+1sd, 그리고 mean+2sd 일 경우의 식이 된다.
+  * x1 은 expense 이다.
+  * x2 는 10, 36, 62, 88% 의 응시율이다
+  * 10% 일 때는 expense가 한단위 증가할 때 마다 csat점수는 -0.0016의 기울기를 갖는다
+  * 36% 일 경우는 0.005 점 증가율을 갖는다
+  * 62  일 경우는 0.012 점 증가율을 갖는다
+  * 88  일 경우는 0.018 점 증가율을 갖는다
 <code>library(ggplot2)
 library(jtools) # in case not loading, install.packages("jtools")
+library(interactions) # interact_plot requires this package now
 </code>
@@ Line 505: / Line 661: @@
 {{:r:twoIV.model.jpeg}}
+{{:pasted:20230606-230743.png}}
+{{:pasted:20230606-230804.png}}
+===== Analysis again =====
+<code>
+attach(states.data)
+n.1 <- lm(csat~expense)
+n.2 <- lm(csat~percent)
+n.12 <- lm(csat~expense+percent)
+n.12i <- lm(csat~expense*percent)
+n.1i <- lm(csat~expense+expense:percent)
+n.2i <- lm(csat~percent+expense:percent)
+n.2i.temp <- lm(csat~percent+percent:expense)
+n.i <- lm(csat~expense:percent)
+s.n.1 <- summary(n.1)
+s.n.2 <- summary(n.2)
+s.n.12 <- summary(n.12)
+s.n.12i <- summary(n.12i)
+s.n.1i <- summary(n.1i)
+s.n.2i <- summary(n.2i)
+s.n.2i.temp <- summary(n.2i.temp)
+s.n.i <- summary(n.i)
+s.n.12i
+# y hat ~ 1048 + -0.003917 x1 + -3.809 x2 + 0.000249 x1 x2
+# y hat ~ 1048 + -0.003917 x1 + (-3.809 + 0.000249 x1) x2
+# x2를 (percent를) 중심으로 보기
+e.mm1 <- mean(expense)-sd(expense)
+e.m0 <- mean(expense)
+e.mp1 <- mean(expense)+sd(expense)
+e.mp2 <- mean(expense)+(2*sd(expense))
+# x1의 case가 4가지 (holding constants)
+k <- c(e.mm1, e.m0, e.mp1, e.mp2)
+ic <- 1048 + (-0.003917*k)
+slp <- -3.809 + (0.000249*k)
+ic
+slp
+# y hat ~ 1032.979 - 2.85 x2
+# y hat ~ 1027.491 - 2.51 x2
+# y hat ~ 1022.002 - 2.16 x2
+# y hat ~ 1016.514 - 1.81 x2
+interact_plot(n.12i,
+       pred = "percent",
+       modx = "expense",
+       modx.values = k)
+# or
+mne <- min(expense)
+mxe <- max(expense)
+kk <- seq(mne, mxe, by = 2000)
+interact_plot(n.12i,
+       pred = "percent",
+       modx = "expense",
+       modx.values = kk)
+</code>
+<code>
+> n.1 <- lm(csat~expense)
+> n.2 <- lm(csat~percent)
+> n.12 <- lm(csat~expense+percent)
+> n.12i <- lm(csat~expense*percent)
+> n.1i <- lm(csat~expense+expense:percent)
+> n.2i <- lm(csat~percent+expense:percent)
+> n.2i.temp <- lm(csat~percent+percent:expense)
+> n.i <- lm(csat~expense:percent)
+>
+> s.n.1 <- summary(n.1)
+> s.n.2 <- summary(n.2)
+> s.n.12 <- summary(n.12)
+> s.n.12i <- summary(n.12i)
+> s.n.1i <- summary(n.1i)
+> s.n.2i <- summary(n.2i)
+> s.n.2i.temp <- summary(n.2i.temp)
+> s.n.i <- summary(n.i)
+>
+> s.n.12i
+Call:
+lm(formula = csat ~ expense * percent)
+Residuals:
+   Min     1Q Median     3Q    Max
+-65.36 -19.61  -3.05  17.53  76.18
+Coefficients:
+                 Estimate Std. Error t value Pr(>|t|)
+(Intercept)      1.05e+03   3.56e+01   29.41  < 2e-16 ***
+expense         -3.92e-03   7.76e-03   -0.51    0.616
+percent         -3.81e+00   7.04e-01   -5.41  2.1e-06 ***
+expense:percent  2.49e-04   1.31e-04    1.90    0.063 .
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 30.8 on 47 degrees of freedom
+Multiple R-squared:  0.801,	Adjusted R-squared:  0.788
+F-statistic: 63.1 on 3 and 47 DF,  p-value: <2e-16
+>
+> # y hat ~ 1048 + -0.003917 x1 + -3.809 x2 + 0.000249 x1 x2
+> # y hat ~ 1048 + -0.003917 x1 + (-3.809 + 0.000249 x1) x2
+> # x2를 (percent를) 중심으로 보기
+>
+> e.mm1 <- mean(expense)-sd(expense)
+> e.m0 <- mean(expense)
+> e.mp1 <- mean(expense)+sd(expense)
+> e.mp2 <- mean(expense)+(2*sd(expense))
+>
+> # x1의 case가 4가지 (holding constants)
+> k <- c(e.mm1, e.m0, e.mp1, e.mp2)
+> ic <- 1048 + (-0.003917*k)
+> slp <- -3.809 + (0.000249*k)
+> ic
+[1] 1033 1027 1022 1017
+> slp
+[1] -2.854 -2.505 -2.156 -1.807
+>
+> # y hat ~ 1032.979 - 2.85 x2
+> # y hat ~ 1027.491 - 2.51 x2
+> # y hat ~ 1022.002 - 2.16 x2
+> # y hat ~ 1016.514 - 1.81 x2
+>
+> interact_plot(n.12i,
++               pred = "percent",
++               modx = "expense",
++               modx.values = k)
+> # or
+> mne <- min(expense)
+> mxe <- max(expense)
+> kk <- seq(mne, mxe, by = 2000)
+> interact_plot(n.12i,
++               pred = "percent",
++               modx = "expense",
++               modx.values = kk)
+>
+</code>
+아래 그림에서처럼 대학지원 퍼센티지가 높아지면 성적이 떨어지는 경향을 보이는데, 이 경향은 해당 주가 얼마나 sat교육에 돈을 투자하는가에 따라서 달라진다. 많이 투자하는 경우에는 지원율이 높아도 떨어지는 비율이 지원율이 낮은 경우보다 현저히 낮다.
+{{:pasted:20230607-004047.png}}
+===== 언제 interaction effect를 분석에 넣는가? =====
+interaction effects가 significant할 때에 넣는다
+significant하지 않을 때에는 additive model을 (+사인 모델) 사용한다.
 ===== One categorical IV =====
@@ Line 613: / Line 917: @@
 [5] "Murder"     "HS Grad"    "Frost"      "Area"
 </code>
+아래의 경우 interaction effect는 중요한 의미를 갖는다. additive model에서는 murder가 중요한 역할을 하지 않지만, interactive model에서는 Illiteracy와 결합하여 중요한 역할을 하는 것으로 해석될 수 있다.
-<code>fiti <- lm(Income ~ Illiteracy * Murder, data = as.data.frame(state.x77))
+<code>
+fit <- lm(Income ~ Illiteracy + Murder, data = as.data.frame(state.x77))
+fiti <- lm(Income ~ Illiteracy * Murder, data = as.data.frame(state.x77))
+summary(fit)
 summary(fiti)
+</code>
+<code>
+> fit <- lm(Income ~ Illiteracy + Murder, data = as.data.frame(state.x77))
+> fiti <- lm(Income ~ Illiteracy * Murder, data = as.data.frame(state.x77))
+> summary(fit)
+Call:
+lm(formula = Income ~ Illiteracy + Murder, data = as.data.frame(state.x77))
+Residuals:
+   Min     1Q Median     3Q    Max
+-880.9 -397.3  -51.3  333.1 1960.7
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)   4890.5      187.6   26.06   <2e-16 ***
+Illiteracy    -548.7      184.6   -2.97   0.0046 **
+Murder          25.4       30.5    0.83   0.4089
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 560 on 47 degrees of freedom
+Multiple R-squared:  0.203,	Adjusted R-squared:  0.169
+F-statistic: 5.98 on 2 and 47 DF,  p-value: 0.00486
+> summary(fiti)
 Call:
 lm(formula = Income ~ Illiteracy * Murder, data = as.data.frame(state.x77))
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--955.20 -325.99   10.66  299.96 1892.12
+-955.2 -326.0   10.7  300.0 1892.1
 Coefficients:
                   Estimate Std. Error t value Pr(>|t|)
-(Intercept)        3822.61     405.33   9.431 2.54e-12 ***
+(Intercept)         3822.6      405.3    9.43  2.5e-12 ***
-Illiteracy          617.34     434.85   1.420  0.16245
+Illiteracy           617.3      434.9    1.42   0.1624
-Murder              146.82      50.33   2.917  0.00544 **
+Murder               146.8       50.3    2.92   0.0054 **
-Illiteracy:Murder  -117.10      40.13  -2.918  0.00544 **
+Illiteracy:Murder   -117.1       40.1   -2.92   0.0054 **
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 520.1 on 46 degrees of freedom
+Residual standard error: 520 on 46 degrees of freedom
-Multiple R-squared:  0.3273,	Adjusted R-squared:  0.2834
+Multiple R-squared:  0.327,	Adjusted R-squared:  0.283
-F-statistic: 7.461 on 3 and 46 DF,  p-value: 0.000359
+F-statistic: 7.46 on 3 and 46 DF,  p-value: 0.000359
 >
 </code>
 <code>> interact_plot(fiti, pred = "Illiteracy", modx = "Murder")</code>
@@ Line 645: / Line 978: @@
 {{:r:state.x77.points.jpeg?600}}
-<code>fitiris <- lm(Petal.Length ~ Petal.Width * Species, data = iris)
-interact_plot(fitiris, pred = "Petal.Width", modx = "Species")</code>
-{{:r:fitiris.jpeg?600}}
+====== Eg. 4 ======
+<code>
+# Load the data
+data("marketing", package = "datarium")
+set.seed(123)
+training.samples <- marketing$sales %>%
+    createDataPartition(p = 0.8, list = FALSE)
+train.data  <- marketing[training.samples, ]
+test.data <- marketing[-training.samples, ]
+model2 <- lm(sales ~ youtube + facebook + youtube:facebook,
+             data = marketing)
+# Or simply, use this:
+model2 <- lm(sales ~ youtube*facebook, data = train.data)
+# Summarize the model
+summary(model2)
+interact_plot(model2, pred="youtube", modx="facebook")
+</code>
 ====== Ex. ======
 Use Cars93 dataset.
@@ Line 662: / Line 1012: @@
 Interprete what has been found.
-====== Ex. 2 ======
+====== Ex. 3 ======
 <code>library(foreign)
 library(msm)