Hu and Bentler (1999) simulation

Author

Phil Chalmers

Simulation from:

Hu, L. & Bentler, P. M. (1999). Cutoff criteria for fit indices in covariance structure analysis: Conventional criteria versus new alternatives. Structural Equation Modeling, 6, 1-31

User-defined functions

User-defined helper function for the generate-analyse steps to generate appropriate syntax for lavaan, as well as defined fixed objects re-used throughout the simulation (could be placed in an external .R file and sourced in prior to running runSimulation() instead).

# L' matrix
simple <- complex <- matrix(c(.7, .7, .75, .8, .8, numeric(15),
                              .7, .7, .75, .8, .8, numeric(15),
                              .7, .7, .75, .8, .8), nrow=3, byrow=TRUE)
complex[c(3,11,27)] <- c(.7, .7, .7)

# residual variances
simple.resids <- complex.resids <- 1 - colSums(simple^2)
complex.resids[c(1,4,9)] <- c(.51, .36, .36)

# latent correlations
fcor <- matrix(1,3,3)
fcor[lower.tri(fcor)] <- fcor[upper.tri(fcor)] <- c(.5, .4, .3)

# variable distribution properties
kurtosis.common_2.3 <- c(-1, 2, 5)
kurtosis.unique_2.4 <- c(-1,.5,2.5,4.5,6.5,-1,1,3,5,7,
                         -.5,1.5,3.5,5.5,7.5)
Ztransform <- function(n=1) sqrt(rchisq(n, df=5)) / sqrt(3)


# organize all into fixed_object list
fo <- list(simple=simple, simple.resids=simple.resids,
           complex=complex, complex.resids=complex.resids,
           fcor=fcor, kurtosis.unique_2.4=kurtosis.unique_2.4,
           kurtosis.common_2.3=kurtosis.common_2.3,
           Ztransform=Ztransform)

rm(list=setdiff(ls(), "fo"))


#' Generate lavaan syntax for Hu and Bentler (1999) simulation
#'
#' @param model simple/complex
#' @param fit_model true/mis1/mis2
#' @examples
#' cat(genLavaanSyntax('simple', 'true'))
#' cat(genLavaanSyntax('complex', 'mis2'))
#'
genLavaanSyntax <- function(model, fit_model){
    ret <- if(model == 'simple'){
        add <- switch(fit_model,
                      true = "",
                      mis1 = "\nf1 ~~ 0*f2",
                      mis2 = "\nf1 ~~ 0*f2\nf1 ~~ 0*f3")
        sprintf("
        f1 =~ NA*V1 + V2 + V3 + V4 + 0.8*V5
        f2 =~ NA*V6 + V7 + V8 + V9 + 0.8*V10
        f3 =~ NA*V11 + V12 + V13 + V14 + 0.8*V15%s", add)
    } else if(model == 'complex'){
        # complex
        add <- switch(fit_model,
                      true = c(' + V1', '+ V9'),
                      mis1 = c('', '+ V9'),
                      mis2 = c('', ''))
        sprintf("
        f1 =~ NA*V1 + V2 + V3 + V4 + 0.8*V5
        f2 =~ NA*V6 + V7 + V8 + V9 + 0.8*V10%s
        f3 =~ NA*V11 + V12 + V13 + V14 + 0.8*V15 + V4%s", add[1], add[2])
    } else stop('model type not included')
    ret
}

Simulation code

The follow implements the data generation conditions and subsequent analyses using the lavaan packages ML criteria. Note that in the summarise step only the general behaviour of the fit statistics is reported rather the cut-values. However, use of reSummarise() is demonstrated in the following section to obtain the cut-off criteria decided by the front-end user.

library(SimDesign)

Design <- createDesign(model=c('simple', 'complex'),
                       gen_condition=c('normal', paste0('nonnormal', 1:3),
                                       'elliptical',
                                       paste0('nonnormal_dep', 1:2)),
                       N = c(150, 250, 500, 1000, 2500, 5000),
                       fit_model=c('true', 'mis1', 'mis2'))
# source('HuBentler1999_extras.R')  # source instead of defining in same document
# str(fo)

#-------------------------------------------------------------------

Generate <- function(condition, fixed_objects = NULL) {
    Attach(condition)
    L <- with(fixed_objects, if(model == 'simple') simple else complex)
    sigma.e <- diag(with(fixed_objects,
                    if(model == 'simple') simple.resids else complex.resids))
    sigma <- fixed_objects$fcor
    Z <- ifelse(gen_condition %in% c(paste0('nonnormal_dep', 1:2), 'elliptical'),
                fixed_objects$Ztransform(), 1)

    # theta
    theta <- if(gen_condition %in% c('normal', 'nonnormal3', 'nonnormal_dep1')){
        rmvnorm(N, sigma=sigma) / Z
    } else if(gen_condition %in% c('nonnormal1', 'nonnormal2', 'nonnormal_dep2')){
        rValeMaurelli(N, sigma = sigma, kurt=fixed_objects$kurtosis.common_2.3) / Z
    } else if(gen_condition == 'elliptical'){
        Sigma <- diag(18)
        Sigma[1:3, 1:3] <- sigma
        tmp <- rtelliptical(N, mu=numeric(18),
                            lower=rep(-Inf, 18), upper=rep(Inf, 18),
                            Sigma=Sigma, dist = 'Normal') / Z
        epsilon <- tmp[,4:18]
        tmp[,1:3]
    }

    # epsilon
    epsilon <- if(gen_condition == 'normal'){
        rmvnorm(N, sigma=sigma.e) / Z
    } else if(gen_condition == 'elliptical'){
        epsilon
    } else {
        rValeMaurelli(N, sigma=sigma.e,
                      kurt=fixed_objects$kurtosis.unique_2.4) / Z
    }

    # X = L * theta + e
    dat <- as.data.frame(t(t(L) %*% t(theta)) + epsilon)
    dat
}

Analyse <- function(condition, dat, fixed_objects = NULL) {
    Attach(condition)
    syntax <- genLavaanSyntax(model=model, fit_model=fit_model)
    mod <- sem(syntax, data=dat)
    if(!lavInspect(mod, 'converged')) stop('Model did not converge')
    ret <- fitMeasures(mod)
    ret
}

Summarise <- function(condition, results, fixed_objects = NULL) {
    ret <- c(mean=colMeans(results), sd=apply(results, 2, sd))
    ret
}

#-------------------------------------------------------------------

res <- runSimulation(design=Design, replications=1000, parallel=TRUE,
                     debug='none', store_results=TRUE,
                     generate=Generate, fixed_objects=fo,
                     analyse=Analyse, summarise=Summarise,
                     packages=c("relliptical", "lavaan"),
                     filename='HuBentler1999')
res

# A tibble: 252 × 103
   model   gen_condition     N fit_model mean.npar mean.fmin mean.chisq mean.df
   <chr>   <chr>         <dbl> <chr>         <dbl>     <dbl>      <dbl>   <dbl>
 1 simple  normal          150 true             33   0.30457     91.370      87
 2 complex normal          150 true             36   0.56044    168.13       84
 3 simple  nonnormal1      150 true             33   0.30722     92.166      87
 4 complex nonnormal1      150 true             36   0.57259    171.78       84
 5 simple  nonnormal2      150 true             33   0.30296     90.889      87
 6 complex nonnormal2      150 true             36   0.57394    172.18       84
 7 simple  nonnormal3      150 true             33   0.30719     92.156      87
 8 complex nonnormal3      150 true             36   0.56485    169.45       84
 9 simple  elliptical      150 true             33   0.30405     91.216      87
10 complex elliptical      150 true             36   0.37740    113.22       84
# ℹ 242 more rows
# ℹ 95 more variables: mean.pvalue <dbl>, mean.baseline.chisq <dbl>,
#   mean.baseline.df <dbl>, mean.baseline.pvalue <dbl>, mean.cfi <dbl>,
#   mean.tli <dbl>, mean.nnfi <dbl>, mean.rfi <dbl>, mean.nfi <dbl>,
#   mean.pnfi <dbl>, mean.ifi <dbl>, mean.rni <dbl>, mean.logl <dbl>,
#   mean.unrestricted.logl <dbl>, mean.aic <dbl>, mean.bic <dbl>,
#   mean.ntotal <dbl>, mean.bic2 <dbl>, mean.rmsea <dbl>, …

Specific cut-offs

Above simulation does not contain the specific cut-offs utilized by the authors (e.g., CFI > .9 or RMSEA < .08). However, because the results were stored in the final object via store_results = TRUE these can be obtained by resummarising the results. Doing so just requires applying the reSummarise() function.

# new summarise function for obtaining specific cut-offs
Summarise <- function(condition, results, fixed_objects = NULL) {
   cut1 <- colMeans(results[,c('cfi', 'tli', 'nnfi', 'gfi', 'agfi', 'mfi')] > .90)
   cut2 <- colMeans(results[,c('rmsea', 'srmr')] < .08)
   ret <- c(cut90=cut1, cut08=cut2)
   ret
}

newres <- reSummarise(Summarise, results=res)
newres

# A tibble: 252 × 12
   model  gen_condition     N fit_model cut90.cfi cut90.tli cut90.nnfi cut90.gfi
   <chr>  <chr>         <dbl> <chr>         <dbl>     <dbl>      <dbl>     <dbl>
 1 simple normal          150 true          1         1          1         0.996
 2 compl… normal          150 true          0.963     0.777      0.777     0.031
 3 simple nonnormal1      150 true          1         1          1         0.994
 4 compl… nonnormal1      150 true          0.946     0.726      0.726     0.016
 5 simple nonnormal2      150 true          1         1          1         0.998
 6 compl… nonnormal2      150 true          0.932     0.708      0.708     0.019
 7 simple nonnormal3      150 true          1         1          1         0.992
 8 compl… nonnormal3      150 true          0.95      0.775      0.775     0.017
 9 simple elliptical      150 true          0.994     0.983      0.983     0.994
10 compl… elliptical      150 true          0.955     0.859      0.859     0.777
# ℹ 242 more rows
# ℹ 4 more variables: cut90.agfi <dbl>, cut90.mfi <dbl>, cut08.rmsea <dbl>,
#   cut08.srmr <dbl>

Some brief analyses of the results given the data generation conditions.

library(dplyr)

newres %>% group_by(fit_model, model) %>% 
    summarise(cfi=mean(cut90.cfi), rmsea=mean(cut08.rmsea)) -> tab1

`summarise()` has grouped output by 'fit_model'. You can override using the
`.groups` argument.

knitr::kable(tab1)

fit_model	model	cfi	rmsea
mis1	complex	0.957	0.206
mis1	simple	0.997	0.999
mis2	complex	0.059	0.135
mis2	simple	0.991	0.997
true	complex	0.990	0.516
true	simple	1.000	1.000

newres %>% group_by(fit_model, model, gen_condition) %>% 
    summarise(cfi=mean(cut90.cfi), rmsea=mean(cut08.rmsea)) -> tab2

`summarise()` has grouped output by 'fit_model', 'model'. You can override
using the `.groups` argument.

knitr::kable(tab2)

fit_model	model	gen_condition	cfi	rmsea
mis1	complex	elliptical	0.978	0.998
mis1	complex	nonnormal1	0.945	0.067
mis1	complex	nonnormal2	0.946	0.061
mis1	complex	nonnormal3	0.960	0.083
mis1	complex	nonnormal_dep1	0.964	0.082
mis1	complex	nonnormal_dep2	0.946	0.062
mis1	complex	normal	0.960	0.088
mis1	simple	elliptical	0.977	1.000
mis1	simple	nonnormal1	1.000	0.999
mis1	simple	nonnormal2	1.000	1.000
mis1	simple	nonnormal3	1.000	0.999
mis1	simple	nonnormal_dep1	1.000	0.999
mis1	simple	nonnormal_dep2	1.000	0.999
mis1	simple	normal	1.000	0.999
mis2	complex	elliptical	0.362	0.946
mis2	complex	nonnormal1	0.009	0.000
mis2	complex	nonnormal2	0.007	0.000
mis2	complex	nonnormal3	0.010	0.000
mis2	complex	nonnormal_dep1	0.010	0.000
mis2	complex	nonnormal_dep2	0.009	0.000
mis2	complex	normal	0.008	0.000
mis2	simple	elliptical	0.939	0.999
mis2	simple	nonnormal1	1.000	0.996
mis2	simple	nonnormal2	1.000	0.998
mis2	simple	nonnormal3	1.000	0.995
mis2	simple	nonnormal_dep1	1.000	0.995
mis2	simple	nonnormal_dep2	1.000	0.998
mis2	simple	normal	1.000	0.996
true	complex	elliptical	0.992	0.999
true	complex	nonnormal1	0.989	0.332
true	complex	nonnormal2	0.987	0.330
true	complex	nonnormal3	0.991	0.529
true	complex	nonnormal_dep1	0.992	0.539
true	complex	nonnormal_dep2	0.988	0.336
true	complex	normal	0.992	0.546
true	simple	elliptical	0.999	1.000
true	simple	nonnormal1	1.000	1.000
true	simple	nonnormal2	1.000	1.000
true	simple	nonnormal3	1.000	1.000
true	simple	nonnormal_dep1	1.000	1.000
true	simple	nonnormal_dep2	1.000	1.000
true	simple	normal	1.000	1.000

Same as above, but now with more strict cutoffs (CFI >.95, RMSEA < .05).

# new summarise function for obtaining specific cut-offs
Summarise <- function(condition, results, fixed_objects = NULL) {
   cut1 <- colMeans(results[,c('cfi', 'tli', 'nnfi', 'gfi', 'agfi', 'mfi')] > .95)
   cut2 <- colMeans(results[,c('rmsea', 'srmr')] < .05)
   ret <- c(cut95=cut1, cut05=cut2)
   ret
}

newres2 <- reSummarise(Summarise, results=res)
newres2

# A tibble: 252 × 12
   model  gen_condition     N fit_model cut95.cfi cut95.tli cut95.nnfi cut95.gfi
   <chr>  <chr>         <dbl> <chr>         <dbl>     <dbl>      <dbl>     <dbl>
 1 simple normal          150 true          0.998     0.99       0.99      0.004
 2 compl… normal          150 true          0.195     0.084      0.084     0    
 3 simple nonnormal1      150 true          0.998     0.994      0.994     0.009
 4 compl… nonnormal1      150 true          0.156     0.058      0.058     0    
 5 simple nonnormal2      150 true          1         0.997      0.997     0.009
 6 compl… nonnormal2      150 true          0.153     0.059      0.059     0    
 7 simple nonnormal3      150 true          0.998     0.992      0.992     0.009
 8 compl… nonnormal3      150 true          0.186     0.065      0.065     0    
 9 simple elliptical      150 true          0.901     0.866      0.866     0.009
10 compl… elliptical      150 true          0.525     0.397      0.397     0.001
# ℹ 242 more rows
# ℹ 4 more variables: cut95.agfi <dbl>, cut95.mfi <dbl>, cut05.rmsea <dbl>,
#   cut05.srmr <dbl>

newres2 %>% group_by(fit_model, model) %>% 
    summarise(cfi=mean(cut95.cfi), rmsea=mean(cut05.rmsea)) -> tab3

`summarise()` has grouped output by 'fit_model'. You can override using the
`.groups` argument.

knitr::kable(tab3)

fit_model	model	cfi	rmsea
mis1	complex	0.040	0.048
mis1	simple	0.900	0.708
mis2	complex	0.001	0.002
mis2	simple	0.770	0.200
true	complex	0.157	0.117
true	simple	0.997	0.995

newres2 %>% group_by(fit_model, model, gen_condition) %>% 
    summarise(cfi=mean(cut95.cfi), rmsea=mean(cut05.rmsea)) -> tab4

`summarise()` has grouped output by 'fit_model', 'model'. You can override
using the `.groups` argument.

knitr::kable(tab4)

fit_model	model	gen_condition	cfi	rmsea
mis1	complex	elliptical	0.215	0.334
mis1	complex	nonnormal1	0.009	0.000
mis1	complex	nonnormal2	0.008	0.000
mis1	complex	nonnormal3	0.012	0.000
mis1	complex	nonnormal_dep1	0.013	0.000
mis1	complex	nonnormal_dep2	0.010	0.000
mis1	complex	normal	0.011	0.000
mis1	simple	elliptical	0.495	0.909
mis1	simple	nonnormal1	0.969	0.746
mis1	simple	nonnormal2	0.971	0.744
mis1	simple	nonnormal3	0.966	0.596
mis1	simple	nonnormal_dep1	0.966	0.599
mis1	simple	nonnormal_dep2	0.974	0.753
mis1	simple	normal	0.962	0.606
mis2	complex	elliptical	0.009	0.017
mis2	complex	nonnormal1	0.000	0.000
mis2	complex	nonnormal2	0.000	0.000
mis2	complex	nonnormal3	0.000	0.000
mis2	complex	nonnormal_dep1	0.000	0.000
mis2	complex	nonnormal_dep2	0.000	0.000
mis2	complex	normal	0.000	0.000
mis2	simple	elliptical	0.086	0.702
mis2	simple	nonnormal1	0.905	0.143
mis2	simple	nonnormal2	0.906	0.142
mis2	simple	nonnormal3	0.870	0.091
mis2	simple	nonnormal_dep1	0.865	0.086
mis2	simple	nonnormal_dep2	0.909	0.148
mis2	simple	normal	0.853	0.086
true	complex	elliptical	0.745	0.807
true	complex	nonnormal1	0.047	0.001
true	complex	nonnormal2	0.044	0.001
true	complex	nonnormal3	0.068	0.002
true	complex	nonnormal_dep1	0.076	0.002
true	complex	nonnormal_dep2	0.044	0.001
true	complex	normal	0.075	0.002
true	simple	elliptical	0.983	0.994
true	simple	nonnormal1	1.000	0.996
true	simple	nonnormal2	1.000	0.995
true	simple	nonnormal3	1.000	0.995
true	simple	nonnormal_dep1	1.000	0.996
true	simple	nonnormal_dep2	1.000	0.995
true	simple	normal	1.000	0.995