#' @param J number of variables
#' @param analyse logical; is syntax being used to data generation or analysis?
#' @examples
#' cat(genLavaanSyntax(factors=1, indicators=10))
#' cat(genLavaanSyntax(factors=2, indicators=10))
#' cat(genLavaanSyntax(factors=1, indicators=10, analyse = TRUE))
#' cat(genLavaanSyntax(factors=2, indicators=10, analyse = TRUE))
#'
genLavaanSyntax <- function(factors, indicators, analyse = FALSE){
ret <- if(factors == 1){
if(analyse){
paste0(paste0('f1 =~ NA*x1 + ', paste0(paste0('x', 2:indicators),
collapse=' + ')),
'\nf1 ~~ 1*f1')
} else {
paste0(paste0('f1 =~ ', paste0(rep(.7, indicators),
paste0('*x', 1:indicators),
collapse=' + '), ' \n'),
paste0(sprintf('x%s ~~ 0.51*x%s', 1:indicators, 1:indicators),
collapse=' \n'))
}
} else if(factors == 2){
if(analyse){
paste0(paste0('f1 =~ NA*x1 + ',
paste0(paste0('x', 2:indicators), collapse=' + ')),
paste0(sprintf('\nf2 =~ NA*x%s + ', indicators+1),
paste0(paste0('x', 2:indicators + indicators), collapse=' + ')),
'\nf1 ~~ 1*f1 \nf2 ~~ 1*f2 \nf1 ~~ f2')
} else {
paste0(paste0('f1 =~ ', paste0(rep(.7, indicators),
paste0('*x', 1:indicators),
collapse=' + '), ' \n'),
paste0('f2 =~ ', paste0(rep(.7, indicators),
paste0('*x', 1:indicators + indicators),
collapse=' + '), ' \n'),
'f1 ~~ .3*f2 \n',
paste0(sprintf('x%s ~~ 0.51*x%s', 1:indicators, 1:indicators),
collapse=' \n'))
}
} else stop('factors input is incorrect')
ret
}Flora and Curran (2004) simulation
Simulation from:
- Flora, D. B. & Curran, P. J. (2004). An Empirical Evaluation of Alternative Methods of Estimation for Confirmatory Factor Analysis With Ordinal Data. Psychological Methods, 9, 466-491
User-defined functions
User-defined helper function for the generate-analyse steps to generate appropriate syntax for lavaan (could be placed in an external .R file and sourced in prior to running runSimulation() instead).
Simulation code
library(SimDesign)
Design <- createDesign(N = c(100, 200, 500, 1000),
categories = c(2, 5),
skewness_kurtosis = list(c(0, 0), c(.75, 1.75), c(.75, 3.75),
c(1.25, 1.75), c(1.25, 3.75)),
factors = c(1, 2),
indicators = c(5, 10),
estimator = c('WLSMV', 'WLS'),
# remove poorly converging combinations
subset = !(estimator == 'WLS' & N %in% c(100, 200) &
factors == 2 & indicators == 10))
######################################################
Generate <- function(condition, fixed_objects = NULL) {
Attach(condition)
syntax <- genLavaanSyntax(factors=factors, indicators=indicators)
cdat <- simulateData(syntax, model.type='cfa', sample.nobs=N,
skewness=skewness_kurtosis[1L],
kurtosis=skewness_kurtosis[2L])
tau <- if(categories == 5)
c(-1.645, -0.643, 0.643, 1.645) else 0
# data generation fix described in Flora's (2002) unpublished dissertation
if(categories == 5 && all(skewness_kurtosis == c(1.25, 1.75)))
tau[1] <- -1.125
dat <- apply(cdat, 2, function(x, tau){
dat <- numeric(length(x))
for(i in 1:length(tau))
dat[x > tau[i]] <- i
dat
}, tau=tau)
# throw error if number of categories not correct
if(!all(apply(dat, 2, function(x) length(unique(x))) == categories))
stop('Number of categories generated is incorrect')
dat
}
Analyse <- function(condition, dat, fixed_objects = NULL) {
Attach(condition)
syntax <- genLavaanSyntax(factors=factors, indicators=indicators, analyse=TRUE)
mod <- cfa(syntax, dat, ordered=colnames(dat), estimator=estimator)
# check that model and coefficients are reasonable
if(!lavInspect(mod, 'converged')) stop('Model did not converge')
pick_lambdas <- matrix(TRUE, indicators*factors, factors)
if(factors == 2)
pick_lambdas[(indicators+1):(indicators*3)] <- FALSE
cfs <- lavInspect(mod, what="std")$lambda[pick_lambdas]
if(any(cfs > 1 | cfs < -1))
stop('Model contains Heywood cases')
if(factors > 2 && abs(lavInspect(mod, what="std")$psi[2,1]) >= 1)
stop('Latent variable psi matrix not positive definite')
# extract desired results
fit <- fitMeasures(mod)
ses <- lavInspect(mod, what="se")$lambda[pick_lambdas]
fitstats <- if(estimator == 'WLS') fit[c('chisq', 'df', 'pvalue')]
else if(estimator == 'WLSMV') fit[c('chisq.scaled', 'df.scaled', 'pvalue.scaled')]
names(fitstats) <- c('chisq', 'df', 'pvalue')
phi21 <- if(factors == 2)
lavInspect(mod, what="std")$psi[1,2] else NULL
ret <- c(fitstats, mean_ses=mean(ses), lambda=cfs, phi21=phi21)
ret
}
Summarise <- function(condition, results, fixed_objects = NULL) {
# model parameters
lambdas <- results[ , grepl('lambda', colnames(results))]
pool_mean_lambdas <- mean(apply(lambdas, 2, mean)) # Equation 10
pool_SD_lambdas <- sqrt(mean(apply(lambdas, 2, var))) # Equation 11
RB_phi21 <- if(condition$factors == 2)
bias(results$phi21, parameter=.3, type='relative', percent=TRUE) else NULL
mean_se <- mean(results$mean_ses)
# goodness-of-fit
edr_05 <- EDR(results$pvalue, alpha = .05)
mean_X2 <- mean(results$chisq)
sd_X2 <- sd(results$chisq)
RB_X2 <- bias(results$chisq, parameter=results$df, type='relative',
percent=TRUE, unname=TRUE)
ret <- c(mean_X2=mean_X2, sd_X2=sd_X2, edr_05=edr_05,
pool_mean_lambdas=pool_mean_lambdas,
pool_SD_lambdas=pool_SD_lambdas, mean_se=mean_se,
RB_X2=RB_X2, RB_phi21=RB_phi21)
ret
}
######################################################
# run simulation
res <- runSimulation(design=Design, replications=500, generate=Generate,
analyse=Analyse, summarise=Summarise, packages='lavaan',
parallel=TRUE, max_errors=100, save_results=TRUE,
filename='FloraCurran2004')
res# A tibble: 300 × 20
N categories skewness_kurtosis factors indicators estimator mean_X2
<dbl> <dbl> <list> <dbl> <dbl> <chr> <dbl>
1 100 2 <dbl [2]> 1 5 WLSMV 4.7961
2 200 2 <dbl [2]> 1 5 WLSMV 4.8579
3 500 2 <dbl [2]> 1 5 WLSMV 5.0052
4 1000 2 <dbl [2]> 1 5 WLSMV 5.0903
5 100 5 <dbl [2]> 1 5 WLSMV 4.8931
6 200 5 <dbl [2]> 1 5 WLSMV 4.9868
7 500 5 <dbl [2]> 1 5 WLSMV 5.1779
8 1000 5 <dbl [2]> 1 5 WLSMV 5.1632
9 100 2 <dbl [2]> 1 5 WLSMV 4.7838
10 200 2 <dbl [2]> 1 5 WLSMV 4.9057
# ℹ 290 more rows
# ℹ 13 more variables: sd_X2 <dbl>, edr_05 <dbl>, pool_mean_lambdas <dbl>,
# pool_SD_lambdas <dbl>, mean_se <dbl>, RB_X2 <dbl>, RB_phi21 <dbl>,
# REPLICATIONS <dbl>, SIM_TIME <chr>, COMPLETED <chr>, SEED <int>,
# ERRORS <int>, WARNINGS <int>
Extras
Recreate Table 5.
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# separate s and k values in skewness_kurtosis, create % variable,
# select only 5 categories, rename columns to match
res %>% mutate(s = sapply(res$skewness_kurtosis, function(x) x[1]),
k = sapply(res$skewness_kurtosis, function(x) x[2]),
"% reject" = edr_05 * 100) %>%
filter(categories == 5) %>%
rename(M='mean_X2', SD='sd_X2', RB='RB_X2') -> res_5
# construct Table 5 information from Flora and Curran (2004)
res_5 %>%
filter(indicators == 10, factors == 1, estimator == 'WLS') %>%
select(N, s, k, M, SD, RB, "% reject") -> WLS
res_5 %>%
filter(indicators == 10, factors == 1, estimator == 'WLSMV') %>%
select(N, s, k, RB, "% reject") -> WLSMV
full_join(WLS, WLSMV, by = c("N", "s", "k"), suffix=c('', '.DWLS')) %>%
arrange(N, s, k) -> tab5
pander::pander(tab5)| N | s | k | M | SD | RB | % reject | RB.DWLS | % reject.DWLS |
|---|---|---|---|---|---|---|---|---|
| 100 | 0 | 0 | 58.9 | 19.87 | 68.28 | 62.6 | 4.445 | 5.4 |
| 100 | 0.75 | 1.75 | 58.57 | 19.78 | 67.34 | 62.4 | 7.908 | 6.2 |
| 100 | 0.75 | 3.75 | 59.28 | 19.95 | 69.38 | 65.4 | 5.044 | 7 |
| 100 | 1.25 | 1.75 | 65.5 | 19.34 | 87.15 | 78.6 | 12.24 | 9 |
| 100 | 1.25 | 3.75 | 57.7 | 18.52 | 64.87 | 60.8 | 4.526 | 5.6 |
| 200 | 0 | 0 | 44.02 | 11.96 | 25.76 | 26.6 | 2.3 | 5.4 |
| 200 | 0.75 | 1.75 | 45.44 | 12.91 | 29.82 | 32.2 | 1.372 | 5.2 |
| 200 | 0.75 | 3.75 | 44.99 | 12.68 | 28.53 | 29.4 | 3.738 | 7.4 |
| 200 | 1.25 | 1.75 | 49.07 | 13.61 | 40.2 | 41.6 | 7.238 | 7.8 |
| 200 | 1.25 | 3.75 | 45.41 | 12.46 | 29.73 | 32.8 | 4.783 | 7.8 |
| 500 | 0 | 0 | 38.93 | 9.75 | 11.22 | 13.8 | 0.7476 | 6.2 |
| 500 | 0.75 | 1.75 | 39.3 | 9.575 | 12.29 | 14.2 | 0.6126 | 5.8 |
| 500 | 0.75 | 3.75 | 38.65 | 9.413 | 10.42 | 12.2 | 1.136 | 4.2 |
| 500 | 1.25 | 1.75 | 42.11 | 10.93 | 20.32 | 24 | 6.473 | 7.2 |
| 500 | 1.25 | 3.75 | 38.39 | 10.15 | 9.694 | 13 | 0.8003 | 5.2 |
| 1000 | 0 | 0 | 37.17 | 9.303 | 6.195 | 10.6 | 1.289 | 6.2 |
| 1000 | 0.75 | 1.75 | 37.29 | 9.378 | 6.537 | 9 | 0.9196 | 5.2 |
| 1000 | 0.75 | 3.75 | 36.29 | 8.943 | 3.676 | 7.8 | 0.6644 | 6.4 |
| 1000 | 1.25 | 1.75 | 39.66 | 10.71 | 13.3 | 16.6 | 6.984 | 8.6 |
| 1000 | 1.25 | 3.75 | 36.76 | 8.834 | 5.017 | 8.4 | 1.826 | 5 |
Recreate Figure 6.
library(ggplot2)
# generate summary data for Figure 6 in Flora and Curran (2004)
res_5 %>%
filter(indicators == 5) %>%
mutate(TypeI = edr_05 * 100,
est_fact = factor(estimator):factor(factors)) %>%
group_by(N, est_fact) %>%
summarise(mean_TypeI = mean(TypeI)) -> fig6dat`summarise()` has grouped output by 'N'. You can override using the `.groups`
argument.
labels <- c("Model 1 (full WLS estimation)", "Model 3 (full WLS estimation)",
"Model 1 (robust WLS estimation)", "Model 3 (robust WLS estimation)")
# draw graphic using the ggplot2 package
ggplot(fig6dat,
aes(x=factor(N), y=mean_TypeI, linetype=est_fact,
shape=est_fact, group=est_fact)) +
geom_line() + geom_point(size=4) +
geom_hline(yintercept=5, colour='red', linetype='dashed') +
xlab("Sample Size") + ylab("Type I Error Rate") +
scale_linetype_discrete(labels=labels) +
scale_shape_discrete(labels=labels) +
ylim(0, 80) + theme_bw() +
theme(legend.title=element_blank(),
legend.position = c(.8, .7),
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.