Hu and Bentler (1999) simulation

Author

Phil Chalmers

Simulation from:

User-defined functions

User-defined helper function for the generate-analyse steps to generate appropriate syntax for lavaan, as well as defined fixed objects re-used throughout the simulation (could be placed in an external .R file and sourced in prior to running runSimulation() instead).

# L' matrix
simple <- complex <- matrix(c(.7, .7, .75, .8, .8, numeric(15),
                              .7, .7, .75, .8, .8, numeric(15),
                              .7, .7, .75, .8, .8), nrow=3, byrow=TRUE)
complex[c(3,11,27)] <- c(.7, .7, .7)

# residual variances
simple.resids <- complex.resids <- 1 - colSums(simple^2)
complex.resids[c(1,4,9)] <- c(.51, .36, .36)

# latent correlations
fcor <- matrix(1,3,3)
fcor[lower.tri(fcor)] <- fcor[upper.tri(fcor)] <- c(.5, .4, .3)

# variable distribution properties
kurtosis.common_2.3 <- c(-1, 2, 5)
kurtosis.unique_2.4 <- c(-1,.5,2.5,4.5,6.5,-1,1,3,5,7,
                         -.5,1.5,3.5,5.5,7.5)
Ztransform <- function(n=1) sqrt(rchisq(n, df=5)) / sqrt(3)


# organize all into fixed_object list
fo <- list(simple=simple, simple.resids=simple.resids,
           complex=complex, complex.resids=complex.resids,
           fcor=fcor, kurtosis.unique_2.4=kurtosis.unique_2.4,
           kurtosis.common_2.3=kurtosis.common_2.3,
           Ztransform=Ztransform)

rm(list=setdiff(ls(), "fo"))


#' Generate lavaan syntax for Hu and Bentler (1999) simulation
#'
#' @param model simple/complex
#' @param fit_model true/mis1/mis2
#' @examples
#' cat(genLavaanSyntax('simple', 'true'))
#' cat(genLavaanSyntax('complex', 'mis2'))
#'
genLavaanSyntax <- function(model, fit_model){
    ret <- if(model == 'simple'){
        add <- switch(fit_model,
                      true = "",
                      mis1 = "\nf1 ~~ 0*f2",
                      mis2 = "\nf1 ~~ 0*f2\nf1 ~~ 0*f3")
        sprintf("
        f1 =~ NA*V1 + V2 + V3 + V4 + 0.8*V5
        f2 =~ NA*V6 + V7 + V8 + V9 + 0.8*V10
        f3 =~ NA*V11 + V12 + V13 + V14 + 0.8*V15%s", add)
    } else if(model == 'complex'){
        # complex
        add <- switch(fit_model,
                      true = c(' + V1', '+ V9'),
                      mis1 = c('', '+ V9'),
                      mis2 = c('', ''))
        sprintf("
        f1 =~ NA*V1 + V2 + V3 + V4 + 0.8*V5
        f2 =~ NA*V6 + V7 + V8 + V9 + 0.8*V10%s
        f3 =~ NA*V11 + V12 + V13 + V14 + 0.8*V15 + V4%s", add[1], add[2])
    } else stop('model type not included')
    ret
}

Simulation code

The follow implements the data generation conditions and subsequent analyses using the lavaan packages ML criteria. Note that in the summarise step only the general behaviour of the fit statistics is reported rather the cut-values. However, use of reSummarise() is demonstrated in the following section to obtain the cut-off criteria decided by the front-end user.

library(SimDesign)

Design <- createDesign(model=c('simple', 'complex'),
                       gen_condition=c('normal', paste0('nonnormal', 1:3),
                                       'elliptical',
                                       paste0('nonnormal_dep', 1:2)),
                       N = c(150, 250, 500, 1000, 2500, 5000),
                       fit_model=c('true', 'mis1', 'mis2'))
# source('HuBentler1999_extras.R')  # source instead of defining in same document
# str(fo)

#-------------------------------------------------------------------

Generate <- function(condition, fixed_objects = NULL) {
    Attach(condition)
    L <- with(fixed_objects, if(model == 'simple') simple else complex)
    sigma.e <- diag(with(fixed_objects,
                    if(model == 'simple') simple.resids else complex.resids))
    sigma <- fixed_objects$fcor
    Z <- ifelse(gen_condition %in% c(paste0('nonnormal_dep', 1:2), 'elliptical'),
                fixed_objects$Ztransform(), 1)

    # theta
    theta <- if(gen_condition %in% c('normal', 'nonnormal3', 'nonnormal_dep1')){
        rmvnorm(N, sigma=sigma) / Z
    } else if(gen_condition %in% c('nonnormal1', 'nonnormal2', 'nonnormal_dep2')){
        rValeMaurelli(N, sigma = sigma, kurt=fixed_objects$kurtosis.common_2.3) / Z
    } else if(gen_condition == 'elliptical'){
        Sigma <- diag(18)
        Sigma[1:3, 1:3] <- sigma
        tmp <- rtelliptical(N, mu=numeric(18),
                            lower=rep(-Inf, 18), upper=rep(Inf, 18),
                            Sigma=Sigma, dist = 'Normal') / Z
        epsilon <- tmp[,4:18]
        tmp[,1:3]
    }

    # epsilon
    epsilon <- if(gen_condition == 'normal'){
        rmvnorm(N, sigma=sigma.e) / Z
    } else if(gen_condition == 'elliptical'){
        epsilon
    } else {
        rValeMaurelli(N, sigma=sigma.e,
                      kurt=fixed_objects$kurtosis.unique_2.4) / Z
    }

    # X = L * theta + e
    dat <- as.data.frame(t(t(L) %*% t(theta)) + epsilon)
    dat
}

Analyse <- function(condition, dat, fixed_objects = NULL) {
    Attach(condition)
    syntax <- genLavaanSyntax(model=model, fit_model=fit_model)
    mod <- sem(syntax, data=dat)
    if(!lavInspect(mod, 'converged')) stop('Model did not converge')
    ret <- fitMeasures(mod)
    ret
}

Summarise <- function(condition, results, fixed_objects = NULL) {
    ret <- c(mean=colMeans(results), sd=apply(results, 2, sd))
    ret
}

#-------------------------------------------------------------------

res <- runSimulation(design=Design, replications=1000, parallel=TRUE,
                     debug='none', store_results=TRUE,
                     generate=Generate, fixed_objects=fo,
                     analyse=Analyse, summarise=Summarise,
                     packages=c("relliptical", "lavaan"),
                     filename='HuBentler1999')
res
# A tibble: 252 × 103
   model   gen_condition     N fit_model mean.npar mean.fmin mean.chisq mean.df
   <chr>   <chr>         <dbl> <chr>         <dbl>     <dbl>      <dbl>   <dbl>
 1 simple  normal          150 true             33   0.30457     91.370      87
 2 complex normal          150 true             36   0.56044    168.13       84
 3 simple  nonnormal1      150 true             33   0.30722     92.166      87
 4 complex nonnormal1      150 true             36   0.57259    171.78       84
 5 simple  nonnormal2      150 true             33   0.30296     90.889      87
 6 complex nonnormal2      150 true             36   0.57394    172.18       84
 7 simple  nonnormal3      150 true             33   0.30719     92.156      87
 8 complex nonnormal3      150 true             36   0.56485    169.45       84
 9 simple  elliptical      150 true             33   0.30405     91.216      87
10 complex elliptical      150 true             36   0.37740    113.22       84
# ℹ 242 more rows
# ℹ 95 more variables: mean.pvalue <dbl>, mean.baseline.chisq <dbl>,
#   mean.baseline.df <dbl>, mean.baseline.pvalue <dbl>, mean.cfi <dbl>,
#   mean.tli <dbl>, mean.nnfi <dbl>, mean.rfi <dbl>, mean.nfi <dbl>,
#   mean.pnfi <dbl>, mean.ifi <dbl>, mean.rni <dbl>, mean.logl <dbl>,
#   mean.unrestricted.logl <dbl>, mean.aic <dbl>, mean.bic <dbl>,
#   mean.ntotal <dbl>, mean.bic2 <dbl>, mean.rmsea <dbl>, …

Specific cut-offs

Above simulation does not contain the specific cut-offs utilized by the authors (e.g., CFI > .9 or RMSEA < .08). However, because the results were stored in the final object via store_results = TRUE these can be obtained by resummarising the results. Doing so just requires applying the reSummarise() function.

# new summarise function for obtaining specific cut-offs
Summarise <- function(condition, results, fixed_objects = NULL) {
   cut1 <- colMeans(results[,c('cfi', 'tli', 'nnfi', 'gfi', 'agfi', 'mfi')] > .90)
   cut2 <- colMeans(results[,c('rmsea', 'srmr')] < .08)
   ret <- c(cut90=cut1, cut08=cut2)
   ret
}

newres <- reSummarise(Summarise, results=res)
newres
# A tibble: 252 × 12
   model  gen_condition     N fit_model cut90.cfi cut90.tli cut90.nnfi cut90.gfi
   <chr>  <chr>         <dbl> <chr>         <dbl>     <dbl>      <dbl>     <dbl>
 1 simple normal          150 true          1         1          1         0.996
 2 compl… normal          150 true          0.963     0.777      0.777     0.031
 3 simple nonnormal1      150 true          1         1          1         0.994
 4 compl… nonnormal1      150 true          0.946     0.726      0.726     0.016
 5 simple nonnormal2      150 true          1         1          1         0.998
 6 compl… nonnormal2      150 true          0.932     0.708      0.708     0.019
 7 simple nonnormal3      150 true          1         1          1         0.992
 8 compl… nonnormal3      150 true          0.95      0.775      0.775     0.017
 9 simple elliptical      150 true          0.994     0.983      0.983     0.994
10 compl… elliptical      150 true          0.955     0.859      0.859     0.777
# ℹ 242 more rows
# ℹ 4 more variables: cut90.agfi <dbl>, cut90.mfi <dbl>, cut08.rmsea <dbl>,
#   cut08.srmr <dbl>

Some brief analyses of the results given the data generation conditions.

library(dplyr)

newres %>% group_by(fit_model, model) %>% 
    summarise(cfi=mean(cut90.cfi), rmsea=mean(cut08.rmsea)) -> tab1
`summarise()` has grouped output by 'fit_model'. You can override using the
`.groups` argument.
knitr::kable(tab1)
fit_model model cfi rmsea
mis1 complex 0.957 0.206
mis1 simple 0.997 0.999
mis2 complex 0.059 0.135
mis2 simple 0.991 0.997
true complex 0.990 0.516
true simple 1.000 1.000
newres %>% group_by(fit_model, model, gen_condition) %>% 
    summarise(cfi=mean(cut90.cfi), rmsea=mean(cut08.rmsea)) -> tab2
`summarise()` has grouped output by 'fit_model', 'model'. You can override
using the `.groups` argument.
knitr::kable(tab2)
fit_model model gen_condition cfi rmsea
mis1 complex elliptical 0.978 0.998
mis1 complex nonnormal1 0.945 0.067
mis1 complex nonnormal2 0.946 0.061
mis1 complex nonnormal3 0.960 0.083
mis1 complex nonnormal_dep1 0.964 0.082
mis1 complex nonnormal_dep2 0.946 0.062
mis1 complex normal 0.960 0.088
mis1 simple elliptical 0.977 1.000
mis1 simple nonnormal1 1.000 0.999
mis1 simple nonnormal2 1.000 1.000
mis1 simple nonnormal3 1.000 0.999
mis1 simple nonnormal_dep1 1.000 0.999
mis1 simple nonnormal_dep2 1.000 0.999
mis1 simple normal 1.000 0.999
mis2 complex elliptical 0.362 0.946
mis2 complex nonnormal1 0.009 0.000
mis2 complex nonnormal2 0.007 0.000
mis2 complex nonnormal3 0.010 0.000
mis2 complex nonnormal_dep1 0.010 0.000
mis2 complex nonnormal_dep2 0.009 0.000
mis2 complex normal 0.008 0.000
mis2 simple elliptical 0.939 0.999
mis2 simple nonnormal1 1.000 0.996
mis2 simple nonnormal2 1.000 0.998
mis2 simple nonnormal3 1.000 0.995
mis2 simple nonnormal_dep1 1.000 0.995
mis2 simple nonnormal_dep2 1.000 0.998
mis2 simple normal 1.000 0.996
true complex elliptical 0.992 0.999
true complex nonnormal1 0.989 0.332
true complex nonnormal2 0.987 0.330
true complex nonnormal3 0.991 0.529
true complex nonnormal_dep1 0.992 0.539
true complex nonnormal_dep2 0.988 0.336
true complex normal 0.992 0.546
true simple elliptical 0.999 1.000
true simple nonnormal1 1.000 1.000
true simple nonnormal2 1.000 1.000
true simple nonnormal3 1.000 1.000
true simple nonnormal_dep1 1.000 1.000
true simple nonnormal_dep2 1.000 1.000
true simple normal 1.000 1.000

Same as above, but now with more strict cutoffs (CFI >.95, RMSEA < .05).

# new summarise function for obtaining specific cut-offs
Summarise <- function(condition, results, fixed_objects = NULL) {
   cut1 <- colMeans(results[,c('cfi', 'tli', 'nnfi', 'gfi', 'agfi', 'mfi')] > .95)
   cut2 <- colMeans(results[,c('rmsea', 'srmr')] < .05)
   ret <- c(cut95=cut1, cut05=cut2)
   ret
}

newres2 <- reSummarise(Summarise, results=res)
newres2
# A tibble: 252 × 12
   model  gen_condition     N fit_model cut95.cfi cut95.tli cut95.nnfi cut95.gfi
   <chr>  <chr>         <dbl> <chr>         <dbl>     <dbl>      <dbl>     <dbl>
 1 simple normal          150 true          0.998     0.99       0.99      0.004
 2 compl… normal          150 true          0.195     0.084      0.084     0    
 3 simple nonnormal1      150 true          0.998     0.994      0.994     0.009
 4 compl… nonnormal1      150 true          0.156     0.058      0.058     0    
 5 simple nonnormal2      150 true          1         0.997      0.997     0.009
 6 compl… nonnormal2      150 true          0.153     0.059      0.059     0    
 7 simple nonnormal3      150 true          0.998     0.992      0.992     0.009
 8 compl… nonnormal3      150 true          0.186     0.065      0.065     0    
 9 simple elliptical      150 true          0.901     0.866      0.866     0.009
10 compl… elliptical      150 true          0.525     0.397      0.397     0.001
# ℹ 242 more rows
# ℹ 4 more variables: cut95.agfi <dbl>, cut95.mfi <dbl>, cut05.rmsea <dbl>,
#   cut05.srmr <dbl>
newres2 %>% group_by(fit_model, model) %>% 
    summarise(cfi=mean(cut95.cfi), rmsea=mean(cut05.rmsea)) -> tab3
`summarise()` has grouped output by 'fit_model'. You can override using the
`.groups` argument.
knitr::kable(tab3)
fit_model model cfi rmsea
mis1 complex 0.040 0.048
mis1 simple 0.900 0.708
mis2 complex 0.001 0.002
mis2 simple 0.770 0.200
true complex 0.157 0.117
true simple 0.997 0.995
newres2 %>% group_by(fit_model, model, gen_condition) %>% 
    summarise(cfi=mean(cut95.cfi), rmsea=mean(cut05.rmsea)) -> tab4
`summarise()` has grouped output by 'fit_model', 'model'. You can override
using the `.groups` argument.
knitr::kable(tab4)
fit_model model gen_condition cfi rmsea
mis1 complex elliptical 0.215 0.334
mis1 complex nonnormal1 0.009 0.000
mis1 complex nonnormal2 0.008 0.000
mis1 complex nonnormal3 0.012 0.000
mis1 complex nonnormal_dep1 0.013 0.000
mis1 complex nonnormal_dep2 0.010 0.000
mis1 complex normal 0.011 0.000
mis1 simple elliptical 0.495 0.909
mis1 simple nonnormal1 0.969 0.746
mis1 simple nonnormal2 0.971 0.744
mis1 simple nonnormal3 0.966 0.596
mis1 simple nonnormal_dep1 0.966 0.599
mis1 simple nonnormal_dep2 0.974 0.753
mis1 simple normal 0.962 0.606
mis2 complex elliptical 0.009 0.017
mis2 complex nonnormal1 0.000 0.000
mis2 complex nonnormal2 0.000 0.000
mis2 complex nonnormal3 0.000 0.000
mis2 complex nonnormal_dep1 0.000 0.000
mis2 complex nonnormal_dep2 0.000 0.000
mis2 complex normal 0.000 0.000
mis2 simple elliptical 0.086 0.702
mis2 simple nonnormal1 0.905 0.143
mis2 simple nonnormal2 0.906 0.142
mis2 simple nonnormal3 0.870 0.091
mis2 simple nonnormal_dep1 0.865 0.086
mis2 simple nonnormal_dep2 0.909 0.148
mis2 simple normal 0.853 0.086
true complex elliptical 0.745 0.807
true complex nonnormal1 0.047 0.001
true complex nonnormal2 0.044 0.001
true complex nonnormal3 0.068 0.002
true complex nonnormal_dep1 0.076 0.002
true complex nonnormal_dep2 0.044 0.001
true complex normal 0.075 0.002
true simple elliptical 0.983 0.994
true simple nonnormal1 1.000 0.996
true simple nonnormal2 1.000 0.995
true simple nonnormal3 1.000 0.995
true simple nonnormal_dep1 1.000 0.996
true simple nonnormal_dep2 1.000 0.995
true simple normal 1.000 0.995