Differences

This shows you the differences between two versions of the page.

--- factor_analysis [2019/09/14 16:46] – [etc.] hkimscil
+++ factor_analysis [2019/11/01 09:14] – [E.g. 2] hkimscil
@@ Line 424: / Line 424: @@
 {{:r:dataset_exploratoryFactorAnalysis.csv}}
+<code>
+mydata <- read.csv("http://commres.net/wiki/_media/r/dataset_exploratoryfactoranalysis.csv")
+# if data as NAs, it is better to omit them:
+my.data <- na.omit(my.data)
+head(my.data)
+</code>
+<code>
+> mydata <- read.csv("http://commres.net/wiki/_media/r/dataset_exploratoryfactoranalysis.csv")
+> # if data as NAs, it is better to omit them:
+> my.data <- na.omit(my.data)
+> head(my.data)
+  BIO GEO CHEM ALG CALC STAT
+   1   1    1   1    1    1
+   4   4    3   4    4    4
+   2   1    3   4    1    1
+   2   3    2   4    4    3
+   3   1    2   2    3    4
+   1   1    1   4    4    4
+</code>
+<code>
+n.factors <- 2
+fit <- factanal(my.data,
+                n.factors,                # number of factors to extract
+                scores=c("regression"),
+                rotation="none")
+print(fit, digits=2, cutoff=.3, sort=TRUE)
+</code>
+<code>
+> n.factors <- 2
+>
+> fit <- factanal(my.data,
++                 n.factors,                # number of factors to extract
++                 scores=c("regression"),
++                 rotation="none")
+>
+> print(fit, digits=2, cutoff=.3, sort=TRUE)
+Call:
+factanal(x = my.data, factors = n.factors, scores = c("regression"),     rotation = "none")
+Uniquenesses:
+ BIO  GEO CHEM  ALG CALC STAT
+.25 0.37 0.25 0.37 0.05 0.71
+Loadings:
+     Factor1 Factor2
+ALG   0.78
+CALC  0.97
+STAT  0.53
+BIO   0.30    0.81
+GEO           0.74
+CHEM          0.84
+               Factor1 Factor2
+SS loadings       2.06    1.93
+Proportion Var    0.34    0.32
+Cumulative Var    0.34    0.66
+Test of the hypothesis that 2 factors are sufficient.
+The chi square statistic is 2.94 on 4 degrees of freedom.
+The p-value is 0.568
+>
+</code>
+<code>head(fit$scores)
+</code>
+<code>
+> head(fit$scores)
+     Factor1     Factor2
+-1.9089514 -0.52366961
+  0.9564370  0.89249862
+-1.5797564  0.33784901
+  0.7909078 -0.28205710
+-0.1127541 -0.03603192
+  0.6901869 -1.31361815
+>
+</code>
+another solution
 <code>
 //Exploratory Factor Analysis Example
@@ Line 433: / Line 516: @@
 > data <- read.csv("dataset_EFA.csv")
 > data <- read.csv("http://commres.net/wiki/_media/r/dataset_exploratoryfactoranalysis.csv")
+> data <- read.csv("https://github.com/manirath/BigData/blob/master/dataset_EFA.csv")
 > #display the data (warning: large output - only the first 10 rows are shown here)
@@ Line 1151: / Line 1235: @@
 {{:r:secu_com_finance_2007.csv}}
+<code>
+Sys.setlocale("LC_ALL","Korean")
+secu_com_finance_2007 <- read.csv("http://commres.net/wiki/_media/r/secu_com_finance_2007.csv")
+secu_com_finance_2007
+# V1 : 총자본순이익율
+# V2 : 자기자본순이익율
+# V3 : 자기자본비율
+# V4 : 부채비율
+# V5 : 자기자본회전율
+# 표준화 변환 (standardization)
+secu_com_finance_2007 <- transform(secu_com_finance_2007,
+    V1_s = scale(V1),
+    V2_s = scale(V2),
+    V3_s = scale(V3),
+    V4_s = scale(V4),
+    V5_s = scale(V5))
+# 부채비율(V4_s)을 방향(max(V4_s)-V4_s) 변환
+secu_com_finance_2007 <- transform(secu_com_finance_2007, V4_s2 = max(V4_s) - V4_s)
+# variable selection
+secu_com_finance_2007_2 <- secu_com_finance_2007[,c("company", "V1_s", "V2_s", "V3_s", "V4_s2", "V5_s")]
+# Correlation analysis
+cor(secu_com_finance_2007_2[,-1])
+round(cor(secu_com_finance_2007_2[,-1]), digits=3) # 반올림
+# Scatter plot matrix
+plot(secu_com_finance_2007_2[,-1])
+# Scree Plot
+plot(prcomp(secu_com_finance_2007_2[,c(2:6)]), type="l", sub = "Scree Plot")
+</code>
+<code>
+# 요인분석(maximum likelihood factor analysis)
+# rotation = "varimax"
+secu_factanal <- factanal(secu_com_finance_2007_2[,2:6],
+    factors = 2,
+    rotation = "varimax", # "varimax", "promax", "none"
+    scores="regression") # "regression", "Bartlett"
+print(secu_factanal)
+</code>
+<code>
+print(secu_factanal$loadings, cutoff=0) # display every loadings
+# factor scores plotting
+secu_factanal$scores
+plot(secu_factanal$scores, main="Biplot of the first 2 factors")
+# 관측치별 이름 매핑(rownames mapping)
+text(secu_factanal$scores[,1], secu_factanal$scores[,2],
+   labels = secu_com_finance_2007$company,
+   cex = 0.7, pos = 3, col = "blue")
+# factor loadings plotting
+points(secu_factanal$loadings, pch=19, col = "red")
+text(secu_factanal$loadings[,1], secu_factanal$loadings[,2],
+   labels = rownames(secu_factanal$loadings),
+   cex = 0.8, pos = 3, col = "red")
+# plotting lines between (0,0) and (factor loadings by Var.)
+segments(0,0,secu_factanal$loadings[1,1], secu_factanal$loadings[1,2])
+segments(0,0,secu_factanal$loadings[2,1], secu_factanal$loadings[2,2])
+segments(0,0,secu_factanal$loadings[3,1], secu_factanal$loadings[3,2])
+segments(0,0,secu_factanal$loadings[4,1], secu_factanal$loadings[4,2])
+segments(0,0,secu_factanal$loadings[5,1], secu_factanal$loadings[5,2])
+</code>
+====== e.g ======
+http://www.di.fc.ul.pt/~jpn/r/factoranalysis/factoranalysis.html
 ====== etc.  ======
 <del>see http://geog.uoregon.edu/bartlein/courses/geog495/lec16.html</del>