User Tools

Site Tools


factor_analysis

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
Next revisionBoth sides next revision
factor_analysis [2019/09/14 16:46] – [etc.] hkimscilfactor_analysis [2019/11/01 09:14] – [E.g. 2] hkimscil
Line 424: Line 424:
 {{:r:dataset_exploratoryFactorAnalysis.csv}} {{:r:dataset_exploratoryFactorAnalysis.csv}}
  
 +<code>
 +mydata <- read.csv("http://commres.net/wiki/_media/r/dataset_exploratoryfactoranalysis.csv")
 +# if data as NAs, it is better to omit them:
 +my.data <- na.omit(my.data)
 +head(my.data)
 +</code>
 +<code>
 +> mydata <- read.csv("http://commres.net/wiki/_media/r/dataset_exploratoryfactoranalysis.csv")
 +> # if data as NAs, it is better to omit them:
 +> my.data <- na.omit(my.data)
 +> head(my.data)
 +  BIO GEO CHEM ALG CALC STAT
 +1        1      1    1
 +2        3      4    4
 +3        3      1    1
 +4        2      4    3
 +5        2      3    4
 +6        1      4    4
 +</code>
 +
 +<code>
 +n.factors <- 2   
 +
 +fit <- factanal(my.data, 
 +                n.factors,                # number of factors to extract
 +                scores=c("regression"),
 +                rotation="none")
 +
 +print(fit, digits=2, cutoff=.3, sort=TRUE)
 +</code>
 +<code>
 +> n.factors <- 2   
 +
 +> fit <- factanal(my.data, 
 ++                 n.factors,                # number of factors to extract
 ++                 scores=c("regression"),
 ++                 rotation="none")
 +
 +> print(fit, digits=2, cutoff=.3, sort=TRUE)
 +
 +Call:
 +factanal(x = my.data, factors = n.factors, scores = c("regression"),     rotation = "none")
 +
 +Uniquenesses:
 + BIO  GEO CHEM  ALG CALC STAT 
 +0.25 0.37 0.25 0.37 0.05 0.71 
 +
 +Loadings:
 +     Factor1 Factor2
 +ALG   0.78          
 +CALC  0.97          
 +STAT  0.53          
 +BIO   0.30    0.81  
 +GEO           0.74  
 +CHEM          0.84  
 +
 +               Factor1 Factor2
 +SS loadings       2.06    1.93
 +Proportion Var    0.34    0.32
 +Cumulative Var    0.34    0.66
 +
 +Test of the hypothesis that 2 factors are sufficient.
 +The chi square statistic is 2.94 on 4 degrees of freedom.
 +The p-value is 0.568 
 +
 +</code>
 +
 +<code>head(fit$scores)
 +</code>
 +
 +<code>
 +> head(fit$scores)
 +     Factor1     Factor2
 +1 -1.9089514 -0.52366961
 +2  0.9564370  0.89249862
 +3 -1.5797564  0.33784901
 +4  0.7909078 -0.28205710
 +5 -0.1127541 -0.03603192
 +6  0.6901869 -1.31361815
 +
 +</code>
 +
 +another solution
 <code> <code>
 //Exploratory Factor Analysis Example //Exploratory Factor Analysis Example
Line 433: Line 516:
 > data <- read.csv("dataset_EFA.csv") > data <- read.csv("dataset_EFA.csv")
 > data <- read.csv("http://commres.net/wiki/_media/r/dataset_exploratoryfactoranalysis.csv") > data <- read.csv("http://commres.net/wiki/_media/r/dataset_exploratoryfactoranalysis.csv")
 +> data <- read.csv("https://github.com/manirath/BigData/blob/master/dataset_EFA.csv")
  
 > #display the data (warning: large output - only the first 10 rows are shown here) > #display the data (warning: large output - only the first 10 rows are shown here)
Line 1151: Line 1235:
 {{:r:secu_com_finance_2007.csv}} {{:r:secu_com_finance_2007.csv}}
  
 +<code>
 +Sys.setlocale("LC_ALL","Korean")
 +secu_com_finance_2007 <- read.csv("http://commres.net/wiki/_media/r/secu_com_finance_2007.csv")
 +secu_com_finance_2007
 +
 +# V1 : 총자본순이익율
 +# V2 : 자기자본순이익율
 +# V3 : 자기자본비율
 +# V4 : 부채비율
 +# V5 : 자기자본회전율
 +
 +# 표준화 변환 (standardization)
 +secu_com_finance_2007 <- transform(secu_com_finance_2007, 
 +    V1_s = scale(V1), 
 +    V2_s = scale(V2), 
 +    V3_s = scale(V3), 
 +    V4_s = scale(V4),
 +    V5_s = scale(V5))
 +
 +# 부채비율(V4_s)을 방향(max(V4_s)-V4_s) 변환
 +secu_com_finance_2007 <- transform(secu_com_finance_2007, V4_s2 = max(V4_s) - V4_s)
 +
 +# variable selection
 +secu_com_finance_2007_2 <- secu_com_finance_2007[,c("company", "V1_s", "V2_s", "V3_s", "V4_s2", "V5_s")]
 + 
 +# Correlation analysis
 +cor(secu_com_finance_2007_2[,-1])
 +
 +round(cor(secu_com_finance_2007_2[,-1]), digits=3) # 반올림
 +
 +# Scatter plot matrix
 +plot(secu_com_finance_2007_2[,-1])
 +
 +# Scree Plot
 +plot(prcomp(secu_com_finance_2007_2[,c(2:6)]), type="l", sub = "Scree Plot")
 +
 +</code>
 +
 +<code>
 +# 요인분석(maximum likelihood factor analysis)
 +# rotation = "varimax"
 +secu_factanal <- factanal(secu_com_finance_2007_2[,2:6], 
 +    factors = 2, 
 +    rotation = "varimax", # "varimax", "promax", "none" 
 +    scores="regression") # "regression", "Bartlett"
 +print(secu_factanal)
 +
 +</code>
 +
 +<code>
 +print(secu_factanal$loadings, cutoff=0) # display every loadings
 +
 +# factor scores plotting
 +secu_factanal$scores
 +
 +plot(secu_factanal$scores, main="Biplot of the first 2 factors")
 +
 +# 관측치별 이름 매핑(rownames mapping)
 +text(secu_factanal$scores[,1], secu_factanal$scores[,2], 
 +   labels = secu_com_finance_2007$company, 
 +   cex = 0.7, pos = 3, col = "blue")
 +
 +# factor loadings plotting
 +points(secu_factanal$loadings, pch=19, col = "red")
 +
 +
 +text(secu_factanal$loadings[,1], secu_factanal$loadings[,2], 
 +   labels = rownames(secu_factanal$loadings), 
 +   cex = 0.8, pos = 3, col = "red")
 +
 +# plotting lines between (0,0) and (factor loadings by Var.)
 +segments(0,0,secu_factanal$loadings[1,1], secu_factanal$loadings[1,2])
 +segments(0,0,secu_factanal$loadings[2,1], secu_factanal$loadings[2,2])
 +segments(0,0,secu_factanal$loadings[3,1], secu_factanal$loadings[3,2])
 +segments(0,0,secu_factanal$loadings[4,1], secu_factanal$loadings[4,2])
 +segments(0,0,secu_factanal$loadings[5,1], secu_factanal$loadings[5,2])
 +
 +
 +
 +</code>
 +====== e.g ======
 +http://www.di.fc.ul.pt/~jpn/r/factoranalysis/factoranalysis.html
 ====== etc.  ====== ====== etc.  ======
 <del>see http://geog.uoregon.edu/bartlein/courses/geog495/lec16.html</del> <del>see http://geog.uoregon.edu/bartlein/courses/geog495/lec16.html</del>
factor_analysis.txt · Last modified: 2023/11/06 02:53 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki