Subset and summarize the output of factor analyses

Subset and summarize the results of Principal Component Analysis (PCA), Correspondence Analysis (CA), Multiple Correspondence Analysis (MCA), Factor Analysis of Mixed Data (FAMD), Multiple Factor Analysis (MFA) and Hierarchical Multiple Factor Analysis (HMFA) functions from several packages.

facto_summarize(X, element, node.level = 1, group.names,
  result = c("coord", "cos2", "contrib"), axes = 1:2, select = NULL)

Arguments

X	an object of class PCA, CA, MCA, FAMD, MFA and HMFA [FactoMineR]; prcomp and princomp [stats]; dudi, pca, coa and acm [ade4]; ca [ca package]; expoOutput [ExPosition].
element	the element to subset from the output. Possible values are "row" or "col" for CA; "var" or "ind" for PCA and MCA; "mca.cor" for MCA; 'quanti.var', 'quali.var' , 'group' or 'ind' for FAMD, MFA and HMFA.
node.level	a single number indicating the HMFA node level.
group.names	a vector containing the name of the groups (by default, NULL and the group are named group.1, group.2 and so on).
result	the result to be extracted for the element. Possible values are the combination of c("cos2", "contrib", "coord")
axes	a numeric vector specifying the axes of interest. Default values are 1:2 for axes 1 and 2.
select	a selection of variables. Allowed values are NULL or a list containing the arguments name, cos2 or contrib. Default is list(name = NULL, cos2 = NULL, contrib = NULL): name: is a character vector containing variable names to be selected cos2: if cos2 is in [0, 1], ex: 0.6, then variables with a cos2 > 0.6 are selected. if cos2 > 1, ex: 5, then the top 5 variables with the highest cos2 are selected contrib: if contrib > 1, ex: 5, then the top 5 variables with the highest cos2 are selected.

Value

A data frame containing the (total) coord, cos2 and the contribution for the axes.

Details

If length(axes) > 1, then the columns contrib and cos2 correspond to the total contributions and total cos2 of the axes. In this case, the column coord is calculated as x^2 + y^2 + ...+; x, y, ... are the coordinates of the points on the specified axes.

References

http://www.sthda.com/english/

Examples

# Principal component analysis
# +++++++++++++++++++++++++++++
data(decathlon2)
decathlon2.active <- decathlon2[1:23, 1:10]
res.pca <- prcomp(decathlon2.active,  scale = TRUE)

# Summarize variables on axes 1:2
facto_summarize(res.pca, "var", axes = 1:2)[,-1]
#>                     Dim.1       Dim.2     coord      cos2   contrib
#> X100m        -0.850625692  0.17939806 0.7557477 0.7557477 12.674495
#> Long.jump     0.794180641 -0.28085695 0.7096035 0.7096035 11.900620
#> Shot.put      0.733912733 -0.08540412 0.5459218 0.5459218  9.155546
#> High.jump     0.610083985  0.46521415 0.5886267 0.5886267  9.871741
#> X400m        -0.701603377 -0.29017826 0.5764507 0.5764507  9.667541
#> X110m.hurdle -0.764125197  0.02474081 0.5844994 0.5844994  9.802524
#> Discus        0.743209016 -0.04966086 0.5548258 0.5548258  9.304874
#> Pole.vault   -0.217268042 -0.80745110 0.6991827 0.6991827 11.725854
#> Javeline      0.428226639 -0.38610928 0.3324584 0.3324584  5.575594
#> X1500m        0.004278487 -0.78448019 0.6154275 0.6154275 10.321212
# Select the top 5 contributing variables
facto_summarize(res.pca, "var", axes = 1:2,
           select = list(contrib = 5))[,-1]
#>                   Dim.1      Dim.2     coord      cos2   contrib
#> X100m      -0.850625692  0.1793981 0.7557477 0.7557477 12.674495
#> Long.jump   0.794180641 -0.2808570 0.7096035 0.7096035 11.900620
#> Pole.vault -0.217268042 -0.8074511 0.6991827 0.6991827 11.725854
#> X1500m      0.004278487 -0.7844802 0.6154275 0.6154275 10.321212
#> High.jump   0.610083985  0.4652142 0.5886267 0.5886267  9.871741
# Select variables with cos2 >= 0.6
facto_summarize(res.pca, "var", axes = 1:2,
           select = list(cos2 = 0.6))[,-1]
#>                   Dim.1      Dim.2     coord      cos2  contrib
#> X100m      -0.850625692  0.1793981 0.7557477 0.7557477 12.67450
#> Long.jump   0.794180641 -0.2808570 0.7096035 0.7096035 11.90062
#> Pole.vault -0.217268042 -0.8074511 0.6991827 0.6991827 11.72585
#> X1500m      0.004278487 -0.7844802 0.6154275 0.6154275 10.32121
# Select by names
facto_summarize(res.pca, "var", axes = 1:2,
     select = list(name = c("X100m", "Discus", "Javeline")))[,-1]
#>               Dim.1       Dim.2     coord      cos2   contrib
#> X100m    -0.8506257  0.17939806 0.7557477 0.7557477 12.674495
#> Discus    0.7432090 -0.04966086 0.5548258 0.5548258  9.304874
#> Javeline  0.4282266 -0.38610928 0.3324584 0.3324584  5.575594

# Summarize individuals on axes 1:2
facto_summarize(res.pca, "ind", axes = 1:2)[,-1]
#>                  Dim.1      Dim.2      coord      cos2    contrib
#> SEBRLE       0.1912074 -1.5541282  2.4518746 0.5050034  1.7878218
#> CLAY         0.7901217 -2.4204156  6.4827039 0.5057178  4.7269626
#> BERNARD     -1.3292592 -1.6118687  4.3650507 0.4871654  3.1828434
#> YURKOV      -0.8694134  0.4328779  0.9432630 0.1199355  0.6877946
#> ZSIVOCZKY   -0.1057450  2.0233632  4.1051806 0.5779938  2.9933552
#> McMULLEN     0.1185550  0.9916237  0.9973729 0.1543704  0.7272497
#> MARTINEAU   -2.3923532  1.2849234  7.3743818 0.5205607  5.3771432
#> HERNU       -1.8910497 -1.1784614  4.9648401 0.5543447  3.6201890
#> BARRAS      -1.7744575  0.4125321  3.3188820 0.6495490  2.4200135
#> NOOL        -2.7770058  1.5726757 10.1850700 0.6469840  7.4265994
#> BOURGUIGNON -4.4137335 -1.2635770 21.0776704 0.9301572 15.3691054
#> Sebrle       3.4514485 -1.2169193 13.3933893 0.7593400  9.7659944
#> Clay         3.3162243 -1.6232908 13.6324164 0.8523470  9.9402847
#> Karpov       4.0703560  0.7983510 17.2051623 0.8138146 12.5454070
#> Macey        1.8484623  2.0638828  7.6764252 0.8165181  5.5973828
#> Warners      1.3873514 -0.2819083  2.0042163 0.2662078  1.4614049
#> Zsivoczky    0.4715533  0.9267436  1.0812163 0.2190667  0.7883854
#> Hernu        0.2763118  1.1657260  1.4352654 0.4666709  1.0465457
#> Bernard      1.3672590  1.4780354  4.0539857 0.6274807  2.9560256
#> Schwarzl    -0.7102777 -0.6584251  0.9380181 0.2170229  0.6839702
#> Pogorelov   -0.2143524 -0.8610557  0.7873639 0.1337231  0.5741185
#> Schoenbeck  -0.4953166 -1.3000530  1.9354762 0.5291161  1.4112821
#> Barras      -0.3158867  0.8193681  0.7711485 0.1466237  0.5622947

# Correspondence Analysis
# ++++++++++++++++++++++++++
# Install and load FactoMineR to compute CA
# install.packages("FactoMineR")
library("FactoMineR")
data("housetasks")
res.ca <- CA(housetasks, graph = FALSE)
# Summarize row variables on axes 1:2
facto_summarize(res.ca, "row", axes = 1:2)[,-1]
#>                 Dim.1      Dim.2     coord      cos2    contrib
#> Laundry    -0.9918368  0.4953220 1.2290841 0.9245395 12.5556235
#> Main_meal  -0.8755855  0.4901092 1.0068569 0.9739621  8.9413526
#> Dinner     -0.6925740  0.3081043 0.5745869 0.9303433  3.6018327
#> Breakfeast -0.5086002  0.4528038 0.4637054 0.9051733  3.7680288
#> Tidying    -0.3938084 -0.4343444 0.3437401 0.9748275  2.4340753
#> Dishes     -0.1889641 -0.4419662 0.2310416 0.7642703  1.5153488
#> Shopping   -0.1176813 -0.4033171 0.1765136 0.8113088  1.2294289
#> Official    0.2266324  0.2536132 0.1156819 0.1194711  0.6445857
#> Driving     0.7417696  0.6534143 0.9771724 0.7672477  7.8836989
#> Finances    0.2707669 -0.6178684 0.4550760 0.9973464  2.9847396
#> Insurance   0.6470759 -0.4737832 0.6431778 0.8848140  5.1890738
#> Repairs     1.5287787  0.8642647 3.0841176 0.9326072 29.5364916
#> Holidays    0.2524863 -1.4350066 2.1229933 0.9921522 19.7157199
# Summarize column variables on axes 1:2
facto_summarize(res.ca, "col", axes = 1:2)[,-1]
#>                   Dim.1      Dim.2      coord      cos2   contrib
#> Wife        -0.83762154  0.3652207 0.83499601 0.9543242 29.079014
#> Alternating -0.06218462  0.2915938 0.08889388 0.1098815  1.310538
#> Husband      1.16091847  0.6019199 1.71003929 0.9795683 37.815957
#> Jointly      0.14942609 -1.0265791 1.07619274 0.9979998 31.794491

# Multiple Correspondence Analysis
# +++++++++++++++++++++++++++++++++
library(FactoMineR)
data(poison)
res.mca <- MCA(poison, quanti.sup = 1:2,
              quali.sup = 3:4, graph=FALSE)
# Summarize variables on axes 1:2
res <- facto_summarize(res.mca, "var", axes = 1:2)
head(res)
#>              name      Dim.1       Dim.2      coord      cos2   contrib
#> Nausea_n Nausea_n  0.2673909  0.12139029 0.08623348 0.3090033  1.319847
#> Nausea_y Nausea_y -0.9581506 -0.43498187 1.10726185 0.3090033  4.729453
#> Vomit_n   Vomit_n  0.4790279 -0.40919465 0.39690803 0.5953620  4.662117
#> Vomit_y   Vomit_y -0.7185419  0.61379197 0.89304306 0.5953620  6.993176
#> Abdo_n     Abdo_n  1.3180221 -0.03574501 1.73845988 0.8457372 11.138239
#> Abdo_y     Abdo_y -0.6411999  0.01738946 0.41143974 0.8457372  5.418603
# Summarize individuals on axes 1:2
res <- facto_summarize(res.mca, "ind", axes = 1:2)
head(res)
#>   name      Dim.1       Dim.2     coord       cos2   contrib
#> 1    1 -0.4525811 -0.26415072 0.2746052 0.46457063 1.0751792
#> 2    2  0.8361700 -0.03193457 0.7002000 0.55670644 2.7415373
#> 3    3 -0.4481892  0.13538726 0.2192032 0.59815656 0.8582602
#> 4    4  0.8803694 -0.08536230 0.7823370 0.75476958 3.0631335
#> 5    5 -0.4481892  0.13538726 0.2192032 0.59815656 0.8582602
#> 6    6 -0.3594324 -0.43604390 0.3193260 0.06143111 1.2502771

# Multiple factor Analysis
# +++++++++++++++++++++++++++++++++
library(FactoMineR)
data(poison)
res.mfa <- MFA(poison, group=c(2,2,5,6), type=c("s","n","n","n"),
               name.group=c("desc","desc2","symptom","eat"),
               num.group.sup=1:2, graph=FALSE)
# Summarize categorcial variables on axes 1:2
res <- facto_summarize(res.mfa, "quali.var", axes = 1:2)
head(res)
#>              name      Dim.1        Dim.2     coord      cos2   contrib
#> Nausea_n Nausea_n  0.2995559 -0.008263233 0.0898020 0.5679634 0.6601587
#> Nausea_y Nausea_y -1.0734086  0.029609918 1.1530827 0.5679634 2.3655688
#> Vomit_n   Vomit_n  0.4923055 -0.335833158 0.3551486 0.7818420 2.4633487
#> Vomit_y   Vomit_y -0.7384582  0.503749737 0.7990843 0.7818420 3.6950230
#> Abdo_n     Abdo_n  1.4594717 -0.253368918 2.1942534 0.9439283 6.8916745
#> Abdo_y     Abdo_y -0.7100132  0.123260555 0.5193120 0.9439283 3.3527065
# Summarize individuals on axes 1:2
res <- facto_summarize(res.mfa, "ind", axes = 1:2)
head(res)
#>   name      Dim.1       Dim.2     coord       cos2   contrib
#> 1    1 -0.8980293 -0.19968268 0.8463298 0.37413632 0.6692610
#> 2    2  1.6550439 -0.41095346 2.9080530 0.42074506 2.2996311
#> 3    3 -0.8673037  0.09906989 0.7620305 0.51312594 0.6025988
#> 4    4  1.7839172 -0.56856945 3.5056316 0.66185891 2.7721845
#> 5    5 -0.8673037  0.09906989 0.7620305 0.51312594 0.6025988
#> 6    6 -1.1229099 -1.07440938 2.4152822 0.06519297 1.9099576

Arguments

Value

Details

References

Examples

Contents

Author