Flora and Curran (2004) simulation

Author

Phil Chalmers

Simulation from:

Flora, D. B. & Curran, P. J. (2004). An Empirical Evaluation of Alternative Methods of Estimation for Confirmatory Factor Analysis With Ordinal Data. Psychological Methods, 9, 466-491

User-defined functions

User-defined helper function for the generate-analyse steps to generate appropriate syntax for lavaan (could be placed in an external .R file and sourced in prior to running runSimulation() instead).

#' @param J number of variables
#' @param analyse logical; is syntax being used to data generation or analysis?
#' @examples
#' cat(genLavaanSyntax(factors=1, indicators=10))
#' cat(genLavaanSyntax(factors=2, indicators=10))
#' cat(genLavaanSyntax(factors=1, indicators=10, analyse = TRUE))
#' cat(genLavaanSyntax(factors=2, indicators=10, analyse = TRUE))
#'
genLavaanSyntax <- function(factors, indicators, analyse = FALSE){
    ret <- if(factors == 1){
        if(analyse){
            paste0(paste0('f1 =~ NA*x1 + ', paste0(paste0('x', 2:indicators),
                                                   collapse=' + ')),
                   '\nf1 ~~ 1*f1')
        } else {
            paste0(paste0('f1 =~ ', paste0(rep(.7, indicators), 
                                           paste0('*x', 1:indicators),
                                           collapse=' + '), ' \n'),
                   paste0(sprintf('x%s ~~ 0.51*x%s', 1:indicators, 1:indicators), 
                          collapse=' \n'))
        }
    } else if(factors == 2){
        if(analyse){
            paste0(paste0('f1 =~ NA*x1 + ',
                          paste0(paste0('x', 2:indicators), collapse=' + ')),
                   paste0(sprintf('\nf2 =~ NA*x%s + ', indicators+1),
                          paste0(paste0('x', 2:indicators + indicators), collapse=' + ')),
                   '\nf1 ~~ 1*f1 \nf2 ~~ 1*f2 \nf1 ~~ f2')
        } else {
            paste0(paste0('f1 =~ ', paste0(rep(.7, indicators), 
                                           paste0('*x', 1:indicators),
                                           collapse=' + '), ' \n'),
                   paste0('f2 =~ ', paste0(rep(.7, indicators), 
                                           paste0('*x', 1:indicators + indicators),
                                           collapse=' + '), ' \n'),
                   'f1 ~~ .3*f2 \n',
                   paste0(sprintf('x%s ~~ 0.51*x%s', 1:indicators, 1:indicators), 
                          collapse=' \n'))
        }
    } else stop('factors input is incorrect')
    ret
}

Simulation code

library(SimDesign)

Design <- createDesign(N = c(100, 200, 500, 1000),
                       categories = c(2, 5),
                       skewness_kurtosis = list(c(0, 0), c(.75, 1.75), c(.75, 3.75),
                                                c(1.25, 1.75), c(1.25, 3.75)),
                       factors = c(1, 2),
                       indicators = c(5, 10),
                       estimator = c('WLSMV', 'WLS'),
                       # remove poorly converging combinations 
                       subset = !(estimator == 'WLS' & N %in% c(100, 200) &
                                      factors == 2 & indicators == 10))

######################################################

Generate <- function(condition, fixed_objects = NULL) {
    Attach(condition)
    syntax <- genLavaanSyntax(factors=factors, indicators=indicators)
    cdat <- simulateData(syntax, model.type='cfa', sample.nobs=N,
                         skewness=skewness_kurtosis[1L],
                         kurtosis=skewness_kurtosis[2L])
    tau <- if(categories == 5)
        c(-1.645, -0.643, 0.643, 1.645) else 0
    # data generation fix described in Flora's (2002) unpublished dissertation
    if(categories == 5 && all(skewness_kurtosis == c(1.25, 1.75)))
        tau[1] <- -1.125
    dat <- apply(cdat, 2, function(x, tau){
        dat <- numeric(length(x))
        for(i in 1:length(tau))
            dat[x > tau[i]] <- i
        dat
    }, tau=tau)
    # throw error if number of categories not correct
    if(!all(apply(dat, 2, function(x) length(unique(x))) == categories))
        stop('Number of categories generated is incorrect')
    dat
}

Analyse <- function(condition, dat, fixed_objects = NULL) {
    Attach(condition)
    syntax <- genLavaanSyntax(factors=factors, indicators=indicators, analyse=TRUE)
    mod <- cfa(syntax, dat, ordered=colnames(dat), estimator=estimator)

    # check that model and coefficients are reasonable
    if(!lavInspect(mod, 'converged')) stop('Model did not converge')
    pick_lambdas <- matrix(TRUE, indicators*factors, factors)
    if(factors == 2)
        pick_lambdas[(indicators+1):(indicators*3)] <- FALSE
    cfs <- lavInspect(mod, what="std")$lambda[pick_lambdas]
    if(any(cfs > 1 | cfs < -1))
        stop('Model contains Heywood cases')
    if(factors > 2 && abs(lavInspect(mod, what="std")$psi[2,1]) >= 1)
        stop('Latent variable psi matrix not positive definite')

    # extract desired results
    fit <- fitMeasures(mod)
    ses <- lavInspect(mod, what="se")$lambda[pick_lambdas]
    fitstats <- if(estimator == 'WLS') fit[c('chisq', 'df', 'pvalue')]
    else if(estimator == 'WLSMV') fit[c('chisq.scaled', 'df.scaled', 'pvalue.scaled')]
    names(fitstats) <- c('chisq', 'df', 'pvalue')
    phi21 <- if(factors == 2)
        lavInspect(mod, what="std")$psi[1,2] else NULL

    ret <- c(fitstats, mean_ses=mean(ses), lambda=cfs, phi21=phi21)
    ret
}

Summarise <- function(condition, results, fixed_objects = NULL) {
    # model parameters
    lambdas <- results[ , grepl('lambda', colnames(results))]
    pool_mean_lambdas <- mean(apply(lambdas, 2, mean))     # Equation 10
    pool_SD_lambdas <- sqrt(mean(apply(lambdas, 2, var)))  # Equation 11
    RB_phi21 <- if(condition$factors == 2)
        bias(results$phi21, parameter=.3, type='relative', percent=TRUE) else NULL
    mean_se <- mean(results$mean_ses)

    # goodness-of-fit
    edr_05 <- EDR(results$pvalue, alpha = .05)
    mean_X2 <- mean(results$chisq)
    sd_X2 <- sd(results$chisq)
    RB_X2 <- bias(results$chisq, parameter=results$df, type='relative',
                  percent=TRUE, unname=TRUE)

    ret <- c(mean_X2=mean_X2, sd_X2=sd_X2, edr_05=edr_05,
             pool_mean_lambdas=pool_mean_lambdas,
             pool_SD_lambdas=pool_SD_lambdas, mean_se=mean_se,
             RB_X2=RB_X2, RB_phi21=RB_phi21)
    ret
}

######################################################

# run simulation
res <- runSimulation(design=Design, replications=500, generate=Generate,
                     analyse=Analyse, summarise=Summarise, packages='lavaan', 
                     parallel=TRUE, max_errors=100, save_results=TRUE,
                     filename='FloraCurran2004')
res

# A tibble: 300 × 20
       N categories skewness_kurtosis factors indicators estimator mean_X2
   <dbl>      <dbl> <list>              <dbl>      <dbl> <chr>       <dbl>
 1   100          2 <dbl [2]>               1          5 WLSMV      4.7961
 2   200          2 <dbl [2]>               1          5 WLSMV      4.8579
 3   500          2 <dbl [2]>               1          5 WLSMV      5.0052
 4  1000          2 <dbl [2]>               1          5 WLSMV      5.0903
 5   100          5 <dbl [2]>               1          5 WLSMV      4.8931
 6   200          5 <dbl [2]>               1          5 WLSMV      4.9868
 7   500          5 <dbl [2]>               1          5 WLSMV      5.1779
 8  1000          5 <dbl [2]>               1          5 WLSMV      5.1632
 9   100          2 <dbl [2]>               1          5 WLSMV      4.7838
10   200          2 <dbl [2]>               1          5 WLSMV      4.9057
# ℹ 290 more rows
# ℹ 13 more variables: sd_X2 <dbl>, edr_05 <dbl>, pool_mean_lambdas <dbl>,
#   pool_SD_lambdas <dbl>, mean_se <dbl>, RB_X2 <dbl>, RB_phi21 <dbl>,
#   REPLICATIONS <dbl>, SIM_TIME <chr>, COMPLETED <chr>, SEED <int>,
#   ERRORS <int>, WARNINGS <int>

Extras

Recreate Table 5.

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

# separate s and k values in skewness_kurtosis, create % variable, 
#    select only 5 categories, rename columns to match
res %>% mutate(s = sapply(res$skewness_kurtosis, function(x) x[1]),
               k = sapply(res$skewness_kurtosis, function(x) x[2]),
               "% reject" = edr_05 * 100) %>% 
    filter(categories == 5) %>% 
    rename(M='mean_X2', SD='sd_X2', RB='RB_X2') -> res_5

# construct Table 5 information from Flora and Curran (2004)
res_5 %>%
    filter(indicators == 10, factors == 1, estimator == 'WLS') %>%  
    select(N, s, k, M, SD, RB, "% reject") -> WLS
res_5 %>% 
    filter(indicators == 10, factors == 1, estimator == 'WLSMV') %>%  
    select(N, s, k, RB, "% reject") -> WLSMV
full_join(WLS, WLSMV, by = c("N", "s", "k"), suffix=c('', '.DWLS')) %>% 
    arrange(N, s, k) -> tab5
pander::pander(tab5)

N	s	k	M	SD	RB	% reject	RB.DWLS	% reject.DWLS
100	0	0	58.9	19.87	68.28	62.6	4.445	5.4
100	0.75	1.75	58.57	19.78	67.34	62.4	7.908	6.2
100	0.75	3.75	59.28	19.95	69.38	65.4	5.044	7
100	1.25	1.75	65.5	19.34	87.15	78.6	12.24	9
100	1.25	3.75	57.7	18.52	64.87	60.8	4.526	5.6
200	0	0	44.02	11.96	25.76	26.6	2.3	5.4
200	0.75	1.75	45.44	12.91	29.82	32.2	1.372	5.2
200	0.75	3.75	44.99	12.68	28.53	29.4	3.738	7.4
200	1.25	1.75	49.07	13.61	40.2	41.6	7.238	7.8
200	1.25	3.75	45.41	12.46	29.73	32.8	4.783	7.8
500	0	0	38.93	9.75	11.22	13.8	0.7476	6.2
500	0.75	1.75	39.3	9.575	12.29	14.2	0.6126	5.8
500	0.75	3.75	38.65	9.413	10.42	12.2	1.136	4.2
500	1.25	1.75	42.11	10.93	20.32	24	6.473	7.2
500	1.25	3.75	38.39	10.15	9.694	13	0.8003	5.2
1000	0	0	37.17	9.303	6.195	10.6	1.289	6.2
1000	0.75	1.75	37.29	9.378	6.537	9	0.9196	5.2
1000	0.75	3.75	36.29	8.943	3.676	7.8	0.6644	6.4
1000	1.25	1.75	39.66	10.71	13.3	16.6	6.984	8.6
1000	1.25	3.75	36.76	8.834	5.017	8.4	1.826	5

Recreate Figure 6.

library(ggplot2)

# generate summary data for Figure 6 in Flora and Curran (2004)
res_5 %>%
  filter(indicators == 5) %>%
  mutate(TypeI = edr_05 * 100,
         est_fact = factor(estimator):factor(factors)) %>%
  group_by(N, est_fact) %>%
  summarise(mean_TypeI = mean(TypeI)) -> fig6dat

`summarise()` has grouped output by 'N'. You can override using the `.groups`
argument.

labels <- c("Model 1 (full WLS estimation)", "Model 3 (full WLS estimation)",
            "Model 1 (robust WLS estimation)", "Model 3 (robust WLS estimation)")

# draw graphic using the ggplot2 package
ggplot(fig6dat,
       aes(x=factor(N), y=mean_TypeI, linetype=est_fact,
           shape=est_fact, group=est_fact)) +
  geom_line() + geom_point(size=4) +
  geom_hline(yintercept=5, colour='red', linetype='dashed') +
  xlab("Sample Size") + ylab("Type I Error Rate") +
  scale_linetype_discrete(labels=labels) +
  scale_shape_discrete(labels=labels) +
  ylim(0, 80) + theme_bw() +
  theme(legend.title=element_blank(),
        legend.position = c(.8, .7),
        axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank())

Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.