sequential_regression
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
sequential_regression [2018/11/09 08:12] – [Enter] hkimscil | sequential_regression [2022/05/22 21:50] (current) – hkimscil | ||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== Sequential or Hierarchical regression ====== | ||
+ | 연구자가 판단하여 독립변인들 중 필요한 것들을 묶어서 스테이지 별로 (단계 별) 넣고 분석하는 것을 말한다. Stepwise regression은 이를 컴퓨터나 계산방법을 통하여 수행하게 된다. | ||
====== 데이터 ====== | ====== 데이터 ====== | ||
^ DATA for regression analysis | ^ DATA for regression analysis | ||
Line 55: | Line 57: | ||
x1sq - x1psq ~= x2sq - x2psq | x1sq - x1psq ~= x2sq - x2psq | ||
0.311211 ~= 0.311583 | 0.311211 ~= 0.311583 | ||
+ | |||
+ | R에서 보는 예는 아래를 참조 | ||
+ | |||
</ | </ | ||
====== Seq. ====== | ====== Seq. ====== | ||
Line 66: | Line 71: | ||
| c. Dependent Variable: 통장갯수 | | c. Dependent Variable: 통장갯수 | ||
- | 증가한 r< | + | 증가한 r< |
| ANOVAa | | ANOVAa | ||
Line 93: | Line 98: | ||
http:// | http:// | ||
https:// | https:// | ||
+ | |||
+ | ====== r ====== | ||
+ | < | ||
+ | datavar <- read.csv(" | ||
+ | datavar | ||
+ | m1 <- lm(bankaccount~income+famnum, | ||
+ | summary(m1) | ||
+ | library(ppcor) | ||
+ | spcor(datavar) | ||
+ | pcor(datavar) | ||
+ | |||
+ | </ | ||
+ | < | ||
+ | > datavar <- read.csv(" | ||
+ | > datavar | ||
+ | | ||
+ | 1 6 220 5 | ||
+ | 2 5 190 6 | ||
+ | 3 7 260 3 | ||
+ | 4 7 200 4 | ||
+ | 5 8 330 2 | ||
+ | 6 | ||
+ | 7 8 210 3 | ||
+ | 8 | ||
+ | 9 9 320 1 | ||
+ | 10 | ||
+ | > m1 <- lm(bankaccount~income+famnum, | ||
+ | > summary(m1) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = bankaccount ~ income + famnum, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -1.2173 -0.5779 -0.1515 | ||
+ | |||
+ | Coefficients: | ||
+ | | ||
+ | (Intercept) | ||
+ | income | ||
+ | famnum | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 0.9301 on 7 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > library(ppcor) | ||
+ | > spcor(datavar) | ||
+ | $estimate | ||
+ | bankaccount | ||
+ | bankaccount | ||
+ | income | ||
+ | famnum | ||
+ | |||
+ | $p.value | ||
+ | bankaccount | ||
+ | bankaccount | ||
+ | income | ||
+ | famnum | ||
+ | |||
+ | $statistic | ||
+ | bankaccount | ||
+ | bankaccount | ||
+ | income | ||
+ | famnum | ||
+ | |||
+ | $n | ||
+ | [1] 10 | ||
+ | |||
+ | $gp | ||
+ | [1] 1 | ||
+ | |||
+ | $method | ||
+ | [1] " | ||
+ | |||
+ | > pcor(datavar) | ||
+ | $estimate | ||
+ | bankaccount | ||
+ | bankaccount | ||
+ | income | ||
+ | famnum | ||
+ | |||
+ | $p.value | ||
+ | bankaccount | ||
+ | bankaccount | ||
+ | income | ||
+ | famnum | ||
+ | |||
+ | $statistic | ||
+ | bankaccount | ||
+ | bankaccount | ||
+ | income | ||
+ | famnum | ||
+ | |||
+ | $n | ||
+ | [1] 10 | ||
+ | |||
+ | $gp | ||
+ | [1] 1 | ||
+ | |||
+ | $method | ||
+ | [1] " | ||
+ | |||
+ | ## zero-order correlation | ||
+ | > cor(datavar) | ||
+ | bankaccount | ||
+ | bankaccount | ||
+ | income | ||
+ | famnum | ||
+ | > | ||
+ | |||
+ | </ | ||
+ | semipartial (part): spcor() | ||
+ | |||
+ | | | bankaccount | ||
+ | | bankaccount | ||
+ | | income | ||
+ | | famnum | ||
+ | |||
+ | |||
+ | |||
+ | | | bankaccount | ||
+ | | bankaccount | ||
+ | | income | ||
+ | | famnum | ||
+ | |||
+ | |||
+ | | | bankaccount | ||
+ | | bankaccount | ||
+ | | income | ||
+ | | famnum | ||
+ | |||
+ | < | ||
+ | sp.b.i <- 0.5646726 ## (1) | ||
+ | c.b.i <- 0.7944312 ## (3) | ||
+ | |||
+ | sp.b.f <- -0.4086619 ## (2) | ||
+ | c.b.f <- -0.6922935 ## (4) | ||
+ | |||
+ | c.b.i.sq <- c.b.i^2 ## (3)^2 | ||
+ | sp.b.i.sq <- sp.b.i^2 ## (1)^2 | ||
+ | c.b.i.sq - sp.b.i.sq | ||
+ | |||
+ | c.b.f.sq <- c.b.f^2 ## (4)^2 | ||
+ | sp.b.f.sq <- sp.b.f^2 ## (1)^2 | ||
+ | c.b.f.sq - sp.b.f.sq | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > sp.b.i <- 0.5646726 | ||
+ | > c.b.i <- 0.7944312 | ||
+ | > | ||
+ | > sp.b.f <- -0.4086619 | ||
+ | > c.b.f <- -0.6922935 | ||
+ | > | ||
+ | > c.b.i.sq <- c.b.i^2 ## (3)^2 | ||
+ | > sp.b.i.sq <- sp.b.i^2 | ||
+ | > | ||
+ | > c.b.i.sq - sp.b.i.sq | ||
+ | [1] 0.3123 | ||
+ | > | ||
+ | > c.b.f.sq <- c.b.f^2 ## (4)^2 | ||
+ | > sp.b.f.sq <- sp.b.f^2 | ||
+ | > | ||
+ | > c.b.f.sq - sp.b.f.sq | ||
+ | [1] 0.3123 | ||
+ | </ | ||
+ | |||
+ | 0.3123 가 두 독립변인이 DV에 같이 (공히) 미치는 영향력 분량이다. | ||
+ | |||
+ | < | ||
+ | pcor.test(datavar$bankaccount, | ||
+ | pcor.test(datavar$bankaccount, | ||
+ | |||
+ | spcor.test(datavar$bankaccount, | ||
+ | spcor.test(datavar$bankaccount, | ||
+ | </ | ||
+ | . . . | ||
+ | < | ||
+ | > pcor.test(datavar$bankaccount, | ||
+ | | ||
+ | 1 0.7825112 0.01267595 | ||
+ | > pcor.test(datavar$bankaccount, | ||
+ | | ||
+ | 1 -0.672856 0.04702022 -2.406425 10 1 pearson | ||
+ | > | ||
+ | > spcor.test(datavar$bankaccount, | ||
+ | | ||
+ | 1 0.5646726 0.113182 | ||
+ | > spcor.test(datavar$bankaccount, | ||
+ | estimate | ||
+ | 1 -0.4086619 0.2748117 -1.184655 10 1 pearson | ||
+ | > | ||
+ | > | ||
+ | |||
+ | </ | ||
+ | |||
+ | ====== e.g. 3. College enrollment in New Mexico University ====== | ||
+ | < | ||
+ | > datavar <- read.csv(" | ||
+ | > str(datavar) | ||
+ | ' | ||
+ | $ YEAR : int 1 2 3 4 5 6 7 8 9 10 ... | ||
+ | $ ROLL : int 5501 5945 6629 7556 8716 9369 9920 10167 11084 12504 ... | ||
+ | $ UNEM : num 8.1 7 7.3 7.5 7 6.4 6.5 6.4 6.3 7.7 ... | ||
+ | $ HGRAD: int 9552 9680 9731 11666 14675 15265 15484 15723 16501 16890 ... | ||
+ | $ INC : int 1923 1961 1979 2030 2112 2192 2235 2351 2411 2475 ... | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | onePredictorModel <- lm(ROLL ~ UNEM, data = datavar) | ||
+ | twoPredictorModel <- lm(ROLL ~ UNEM + HGRAD, data = datavar) | ||
+ | threePredictorModel <- lm(ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | summary(twoPredictorModel) | ||
+ | summary(threePredictorModel) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -7640.0 -1046.5 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | UNEM 1133.8 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 3049 on 27 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -2102.2 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) -8.256e+03 | ||
+ | UNEM | ||
+ | HGRAD 9.423e-01 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1313 on 26 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > </ | ||
+ | < | ||
+ | > summary(threePredictorModel) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -1148.84 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) -9.153e+03 | ||
+ | UNEM | ||
+ | HGRAD 4.065e-01 | ||
+ | INC 4.275e+00 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 670.4 on 25 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | </ | ||
+ | |||
+ | < | ||
+ | Analysis of Variance Table | ||
+ | |||
+ | Model 1: ROLL ~ UNEM | ||
+ | Model 2: ROLL ~ UNEM + HGRAD | ||
+ | Model 3: ROLL ~ UNEM + HGRAD + INC | ||
+ | Res.Df | ||
+ | 1 27 251084710 | ||
+ | 2 | ||
+ | 3 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | ====== e.g. 4. Happiness | ||
+ | {{: | ||
+ | |||
+ | < | ||
+ | # Import data (simulated data for this example) | ||
+ | # myData <- read.csv(' | ||
+ | myData <- read.csv(" | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > str(myData) | ||
+ | ' | ||
+ | $ happiness: int 5 5 6 4 3 5 5 5 4 4 ... | ||
+ | $ age : int 24 28 25 26 20 25 24 24 26 26 ... | ||
+ | $ gender | ||
+ | $ friends | ||
+ | $ pets : int 3 1 0 2 0 0 5 2 1 4 ... | ||
+ | > myData$gender <- factor(myData$gender) | ||
+ | > str(myData) | ||
+ | ' | ||
+ | $ happiness: int 5 5 6 4 3 5 5 5 4 4 ... | ||
+ | $ age : int 24 28 25 26 20 25 24 24 26 26 ... | ||
+ | $ gender | ||
+ | $ friends | ||
+ | $ pets : int 3 1 0 2 0 0 5 2 1 4 ... | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > m0 <- lm(happiness ~ 1, data = myData) | ||
+ | > anova(m0) | ||
+ | Analysis of Variance Table | ||
+ | |||
+ | Response: happiness | ||
+ | Df Sum Sq Mean Sq F value Pr(>F) | ||
+ | Residuals 99 240.84 | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # 불필요하지만 위의 분석이 variance와 | ||
+ | # 같은 것이라는 것을 아래처럼 확인한다. | ||
+ | > attach(myData) | ||
+ | The following objects are masked from myData (pos = 3): | ||
+ | |||
+ | age, friends, gender, happiness, pets | ||
+ | |||
+ | > var(happiness) | ||
+ | [1] 2.432727 | ||
+ | > length(happiness) | ||
+ | [1] 100 | ||
+ | > df.happiness <- length(happiness) - 1 | ||
+ | > df.happiness # degrees of freedom | ||
+ | [1] 99 | ||
+ | > ss.happiness <- var(happiness)* df.happiness # sum of square (ss) value for happiness variable | ||
+ | > ss.happiness | ||
+ | [1] 240.84 | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > m1 <- lm(happiness ~ age + gender, data=myData) | ||
+ | > summary(m1) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = happiness ~ age + gender, data = myData) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -3.6688 -1.0094 -0.1472 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | age | ||
+ | genderMale | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1.553 on 97 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # m1은 이미 위에서 실행 | ||
+ | > m2 <- lm(happiness ~ age + gender + friends, data=myData) | ||
+ | > m3 <- lm(happiness ~ age + gender + friends + pets, data = myData) # Model 3: Adding pets variable | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > anova(m1, m2, m3) | ||
+ | Analysis of Variance Table | ||
+ | |||
+ | Model 1: happiness ~ age + gender | ||
+ | Model 2: happiness ~ age + gender + friends | ||
+ | Model 3: happiness ~ age + gender + friends + pets | ||
+ | Res.Df | ||
+ | 1 97 233.97 | ||
+ | 2 96 209.27 | ||
+ | 3 95 193.42 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > summary(m1) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = happiness ~ age + gender, data = myData) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -3.6688 -1.0094 -0.1472 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | age | ||
+ | genderMale | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1.553 on 97 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > summary(m2) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = happiness ~ age + gender + friends, data = myData) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -3.5758 -1.0204 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | age | ||
+ | genderMale | ||
+ | friends | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1.476 on 96 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > summary(m3) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = happiness ~ age + gender + friends + pets, data = myData) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -3.0556 -1.0183 -0.1109 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | age | ||
+ | genderMale | ||
+ | friends | ||
+ | pets | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1.427 on 95 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | Report in research paper | ||
+ | {{: | ||
+ | {{: | ||
+ | |||
+ | ====== e.g. 5: Stock Market ====== | ||
+ | see [[: | ||
+ | |||
+ | ====== e.g. 6: SWISS ====== | ||
+ | |||
sequential_regression.txt · Last modified: 2022/05/22 21:50 by hkimscil