# SimDesign::SimFunctions(nAnalyse=2)
library(SimDesign)
Design <- createDesign(nitems = c(20,40),
ability_mean = c(0,-1,1),
sample_size = c(500,1000),
model = c('1PL','2PL','3PL'),
estimator=c('MML')) # add 'MCMC' when supported
Design
# source in helper functions + fixed_objects definition
source("KangCohen2007_extras.R")
#-------------------------------------------------------------------
Generate <- function(condition, fixed_objects = NULL) {
Attach(condition)
# get population generating parameters
theta <- matrix(rnorm(sample_size, mean=ability_mean))
pars <- get_parameters(condition, fixed_objects)
# simulate response data (via mirt)
dat <- with(pars,
simdata(sample_size, a=a, d=d, guess=c,
Theta = theta, itemtype = '3PL'))
dat
}
Analyse.MML <- function(condition, dat, fixed_objects = NULL) {
Attach(condition)
AnalyseIf(estimator == "MML")
# MML estimation
fit <- sprintf("Theta = 1-%i
START = (1-%i, a1, 1.0)
FIXED = (1-%i, a1)", nitems, nitems, nitems)
mod1PL <- mirt(dat, model=fit, itemtype='2PL', verbose = FALSE)
mod2PL <- mirt(dat, model=1, itemtype='2PL', verbose = FALSE)
mod3PL <- mirt(dat, model=1, itemtype='3PL', verbose = FALSE)
stopifnot(extract.mirt(mod1PL, 'converged'))
stopifnot(extract.mirt(mod2PL, 'converged'))
stopifnot(extract.mirt(mod3PL, 'converged'))
# just return model comparison information (main purpose of the paper)
ret <- as.data.frame(c(M1PL=anova(mod1PL)[c('AIC', 'BIC')],
M2PL=anova(mod2PL)[c('AIC', 'BIC')],
M3PL=anova(mod3PL)[c('AIC', 'BIC')],
LR12=anova(mod1PL, mod2PL, verbose=FALSE)[2, c('X2', 'p')],
LR23=anova(mod2PL, mod3PL, verbose=FALSE)[2, c('X2', 'p')]))
ret
}
Analyse.MCMC <- function(condition, dat, fixed_objects = NULL) {
Attach(condition)
AnalyseIf(estimator == "MCMC")
# TODO
}
Summarise <- function(condition, results, fixed_objects = NULL) {
nms <- colnames(results)
means <- colMeans(results[,!grepl(".p\\b", nms)])
sds <- apply(results[,!grepl(".p\\b", nms)], 2, sd)
BIC <- best_info(results[,grepl(".BIC", nms)])
ret <- c(means=means, sds=sds, BIC)
if(condition$estimator == 'MML'){
AIC <- best_info(results[,grepl(".AIC\\b", nms)])
ps <- results[,grepl(".p\\b", nms)] < .05
ps[ps[,2], 1] <- FALSE ## if 3PLM test sig, 1PL vs 2PL doesn't matter
LR <- colMeans(cbind(ifelse(rowSums(ps) == 0, TRUE, FALSE), ps))
names(LR) <- c('M1PL.LR', 'M2PL.LR', 'M3PL.LR')
ret <- c(ret, AIC, LR)
}
ret
}
#-------------------------------------------------------------------
res <- runSimulation(design=Design, replications=500, generate=Generate,
analyse=list(Analyse.MML, Analyse.MCMC), # unnamed list to avoid extra labels
summarise=Summarise, fixed_objects=fo,
packages='mirt', parallel=TRUE, max_errors = Inf,
filename='KangCohen')
resKang and Cohen (2007) simulation 2
Simulation from:
- Kang T, Cohen AS. IRT Model Selection Methods for Dichotomous Items. Applied Psychological Measurement. 2007;31(4):331-358. doi:10.1177/0146621606292213
- https://journals.sagepub.com/doi/10.1177/0146621606292213
However, this simulation is only a partial one as I was mainly interested in replicating the IRT estimation results that utilized modal estimation rather than a complete MCMC form. Plus, the article reports the DIC information criteria for the Bayesian models, which these days is generally regarded as sub-optimal. Moreover, computation of a partial Bayes factor could have been replaced with a Bayes factor computation via bridge-sampling methods.
Simulation code
The following code uses TODO placeholders in case users are interested in investigating the MCMC results themselves in the context of this code-base.
# A tibble: 36 × 36
nitems ability_mean sample_size model estimator means.M1PL.AIC means.M1PL.BIC
<dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
1 20 0 500 1PL MML 11752. 11836.
2 40 0 500 1PL MML 22827. 22996.
3 20 -1 500 1PL MML 10904. 10988.
4 40 -1 500 1PL MML 20718. 20886.
5 20 1 500 1PL MML 10516. 10600.
6 40 1 500 1PL MML 20943. 21112.
7 20 0 1000 1PL MML 23495. 23593.
8 40 0 1000 1PL MML 45625. 45821.
9 20 -1 1000 1PL MML 21806. 21905.
10 40 -1 1000 1PL MML 41344. 41540.
# ℹ 26 more rows
# ℹ 29 more variables: means.M2PL.AIC <dbl>, means.M2PL.BIC <dbl>,
# means.M3PL.AIC <dbl>, means.M3PL.BIC <dbl>, means.LR12.X2 <dbl>,
# means.LR23.X2 <dbl>, sds.M1PL.AIC <dbl>, sds.M1PL.BIC <dbl>,
# sds.M2PL.AIC <dbl>, sds.M2PL.BIC <dbl>, sds.M3PL.AIC <dbl>,
# sds.M3PL.BIC <dbl>, sds.LR12.X2 <dbl>, sds.LR23.X2 <dbl>, M1PL.BIC <dbl>,
# M2PL.BIC <dbl>, M3PL.BIC <dbl>, M1PL.AIC <dbl>, M2PL.AIC <dbl>, …
Replication of several Tables and Figures
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# information criteria means/sds
Table6_means <- res %>% filter(model == "1PL") %>%
select(nitems:sample_size, estimator, means.M1PL.AIC:means.LR23.X2)
Table6_sds <- res %>% filter(model == "1PL") %>%
select(nitems:sample_size, estimator, sds.M1PL.AIC:sds.LR23.X2)
Table6_means# A tibble: 12 × 12
nitems ability_mean sample_size estimator means.M1PL.AIC means.M1PL.BIC
<dbl> <dbl> <dbl> <chr> <dbl> <dbl>
1 20 0 500 MML 11752. 11836.
2 40 0 500 MML 22827. 22996.
3 20 -1 500 MML 10904. 10988.
4 40 -1 500 MML 20718. 20886.
5 20 1 500 MML 10516. 10600.
6 40 1 500 MML 20943. 21112.
7 20 0 1000 MML 23495. 23593.
8 40 0 1000 MML 45625. 45821.
9 20 -1 1000 MML 21806. 21905.
10 40 -1 1000 MML 41344. 41540.
11 20 1 1000 MML 21029. 21128.
12 40 1 1000 MML 41834. 42030.
# ℹ 6 more variables: means.M2PL.AIC <dbl>, means.M2PL.BIC <dbl>,
# means.M3PL.AIC <dbl>, means.M3PL.BIC <dbl>, means.LR12.X2 <dbl>,
# means.LR23.X2 <dbl>
Table6_sds# A tibble: 12 × 12
nitems ability_mean sample_size estimator sds.M1PL.AIC sds.M1PL.BIC
<dbl> <dbl> <dbl> <chr> <dbl> <dbl>
1 20 0 500 MML 89.760 89.760
2 40 0 500 MML 171.49 171.49
3 20 -1 500 MML 128.20 128.20
4 40 -1 500 MML 242.41 242.41
5 20 1 500 MML 140.10 140.10
6 40 1 500 MML 211.36 211.36
7 20 0 1000 MML 126.89 126.89
8 40 0 1000 MML 236.82 236.82
9 20 -1 1000 MML 169.64 169.64
10 40 -1 1000 MML 330.16 330.16
11 20 1 1000 MML 191.37 191.37
12 40 1 1000 MML 330.26 330.26
# ℹ 6 more variables: sds.M2PL.AIC <dbl>, sds.M2PL.BIC <dbl>,
# sds.M3PL.AIC <dbl>, sds.M3PL.BIC <dbl>, sds.LR12.X2 <dbl>,
# sds.LR23.X2 <dbl>
Table7_means <- res %>% filter(model == "2PL") %>%
select(nitems:sample_size, estimator, means.M1PL.AIC:means.LR23.X2)
Table7_sds <- res %>% filter(model == "2PL") %>%
select(nitems:sample_size, estimator, sds.M1PL.AIC:sds.LR23.X2)
Table7_means# A tibble: 12 × 12
nitems ability_mean sample_size estimator means.M1PL.AIC means.M1PL.BIC
<dbl> <dbl> <dbl> <chr> <dbl> <dbl>
1 20 0 500 MML 11639. 11724.
2 40 0 500 MML 22464. 22632.
3 20 -1 500 MML 10552. 10636.
4 40 -1 500 MML 20040. 20209.
5 20 1 500 MML 10468. 10553.
6 40 1 500 MML 20677. 20846.
7 20 0 1000 MML 23244. 23342.
8 40 0 1000 MML 44924. 45120.
9 20 -1 1000 MML 21090. 21188.
10 40 -1 1000 MML 40082. 40279.
11 20 1 1000 MML 20918. 21016.
12 40 1 1000 MML 41319. 41515.
# ℹ 6 more variables: means.M2PL.AIC <dbl>, means.M2PL.BIC <dbl>,
# means.M3PL.AIC <dbl>, means.M3PL.BIC <dbl>, means.LR12.X2 <dbl>,
# means.LR23.X2 <dbl>
Table7_sds# A tibble: 12 × 12
nitems ability_mean sample_size estimator sds.M1PL.AIC sds.M1PL.BIC
<dbl> <dbl> <dbl> <chr> <dbl> <dbl>
1 20 0 500 MML 86.271 86.271
2 40 0 500 MML 151.07 151.07
3 20 -1 500 MML 128.38 128.38
4 40 -1 500 MML 221.59 221.59
5 20 1 500 MML 124.54 124.54
6 40 1 500 MML 219.77 219.77
7 20 0 1000 MML 131.34 131.34
8 40 0 1000 MML 215.62 215.62
9 20 -1 1000 MML 172.37 172.37
10 40 -1 1000 MML 318.69 318.69
11 20 1 1000 MML 196.67 196.67
12 40 1 1000 MML 304.49 304.49
# ℹ 6 more variables: sds.M2PL.AIC <dbl>, sds.M2PL.BIC <dbl>,
# sds.M3PL.AIC <dbl>, sds.M3PL.BIC <dbl>, sds.LR12.X2 <dbl>,
# sds.LR23.X2 <dbl>
Table8_means <- res %>% filter(model == "3PL") %>%
select(nitems:sample_size, estimator, means.M1PL.AIC:means.LR23.X2)
Table8_sds <- res %>% filter(model == "3PL") %>%
select(nitems:sample_size, estimator, sds.M1PL.AIC:sds.LR23.X2)
Table8_means# A tibble: 12 × 12
nitems ability_mean sample_size estimator means.M1PL.AIC means.M1PL.BIC
<dbl> <dbl> <dbl> <chr> <dbl> <dbl>
1 20 0 500 MML 11090. 11174.
2 40 0 500 MML 22297. 22465.
3 20 -1 500 MML 12674. 12758.
4 40 -1 500 MML 25272. 25441.
5 20 1 500 MML 8540.0 8624.3
6 40 1 500 MML 17425. 17594.
7 20 0 1000 MML 22268. 22366.
8 40 0 1000 MML 44570. 44766.
9 20 -1 1000 MML 25422. 25521.
10 40 -1 1000 MML 50583. 50779.
11 20 1 1000 MML 17146. 17244.
12 40 1 1000 MML 34812. 35008.
# ℹ 6 more variables: means.M2PL.AIC <dbl>, means.M2PL.BIC <dbl>,
# means.M3PL.AIC <dbl>, means.M3PL.BIC <dbl>, means.LR12.X2 <dbl>,
# means.LR23.X2 <dbl>
Table8_sds# A tibble: 12 × 12
nitems ability_mean sample_size estimator sds.M1PL.AIC sds.M1PL.BIC
<dbl> <dbl> <dbl> <chr> <dbl> <dbl>
1 20 0 500 MML 281.58 281.58
2 40 0 500 MML 413.93 413.93
3 20 -1 500 MML 246.03 246.03
4 40 -1 500 MML 336.87 336.87
5 20 1 500 MML 303.05 303.05
6 40 1 500 MML 420.76 420.76
7 20 0 1000 MML 486.20 486.20
8 40 0 1000 MML 762.52 762.52
9 20 -1 1000 MML 444.38 444.38
10 40 -1 1000 MML 663.71 663.71
11 20 1 1000 MML 568.87 568.87
12 40 1 1000 MML 795.56 795.56
# ℹ 6 more variables: sds.M2PL.AIC <dbl>, sds.M2PL.BIC <dbl>,
# sds.M3PL.AIC <dbl>, sds.M3PL.BIC <dbl>, sds.LR12.X2 <dbl>,
# sds.LR23.X2 <dbl>
##################################
## proportion of competing models selected (more general measure than nose count)
Table9 <- res %>% filter(estimator == "MML") %>%
select(nitems:model, M1PL.BIC:M3PL.LR) %>%
arrange(nitems, sample_size, ability_mean, model)
Table9# A tibble: 36 × 13
nitems ability_mean sample_size model M1PL.BIC M2PL.BIC M3PL.BIC M1PL.AIC
<dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
1 20 -1 500 1PL 1 0 0 0.992
2 20 -1 500 2PL 0.018 0.982 0 0
3 20 -1 500 3PL 0 1 0 0
4 20 0 500 1PL 1 0 0 0.994
5 20 0 500 2PL 0 1 0 0
6 20 0 500 3PL 0.268 0.732 0 0
7 20 1 500 1PL 1 0 0 0.996
8 20 1 500 2PL 0.004 0.996 0 0
9 20 1 500 3PL 0.786 0.214 0 0
10 20 -1 1000 1PL 1 0 0 0.998
# ℹ 26 more rows
# ℹ 5 more variables: M2PL.AIC <dbl>, M3PL.AIC <dbl>, M1PL.LR <dbl>,
# M2PL.LR <dbl>, M3PL.LR <dbl>
##################################
# graphics
library(tidyr)
long <- Table9 %>%
pivot_longer(M1PL.BIC:M3PL.LR, names_to='fit_stat') %>%
separate(fit_stat, c('fit', 'stat')) %>%
mutate(stat=factor(stat), fit=factor(fit), model=factor(model))
long# A tibble: 324 × 7
nitems ability_mean sample_size model fit stat value
<dbl> <dbl> <dbl> <fct> <fct> <fct> <dbl>
1 20 -1 500 1PL M1PL BIC 1
2 20 -1 500 1PL M2PL BIC 0
3 20 -1 500 1PL M3PL BIC 0
4 20 -1 500 1PL M1PL AIC 0.992
5 20 -1 500 1PL M2PL AIC 0.008
6 20 -1 500 1PL M3PL AIC 0
7 20 -1 500 1PL M1PL LR 0.93
8 20 -1 500 1PL M2PL LR 0.068
9 20 -1 500 1PL M3PL LR 0.002
10 20 -1 500 2PL M1PL BIC 0.018
# ℹ 314 more rows
library(ggplot2)
# unnormalized
# ggplot(long, aes(x=fit, y=value, fill=stat)) +
# geom_bar(stat = "identity", position = 'dodge') +
# facet_grid(model ~ nitems)
# normalized
sub1 <- long %>% group_by(fit, stat, model, nitems) %>%
summarise(mvalue = mean(value)) %>% ungroup %>%
group_by(model, stat, nitems) %>% mutate(value = mvalue / sum(mvalue)) `summarise()` has grouped output by 'fit', 'stat', 'model'. You can override
using the `.groups` argument.
(fig1 <- ggplot(sub1, aes(x=fit, y=value, fill=stat)) +
geom_bar(stat = "identity", position = 'dodge') +
facet_grid(model ~ nitems)) +
ggtitle("Figure 1", subtitle = "Model Selection Proportions by Test Length")# unnormalized
# ggplot(long, aes(x=fit, y=value, fill=stat)) +
# geom_bar(stat = "identity", position = 'dodge') +
# facet_grid(model ~ sample_size)
# normalized
sub2 <- long %>% group_by(fit, stat, model, sample_size) %>%
summarise(mvalue = mean(value)) %>% ungroup %>%
group_by(model, stat, sample_size) %>% mutate(value = mvalue / sum(mvalue)) `summarise()` has grouped output by 'fit', 'stat', 'model'. You can override
using the `.groups` argument.
ggplot(sub2, aes(x=fit, y=value, fill=stat)) +
geom_bar(stat = "identity", position = 'dodge') +
facet_grid(model ~ sample_size) +
ggtitle("Figure 2", subtitle = "Model Selection Proportions by Sample Size")# unnormalized
# ggplot(long, aes(x=fit, y=value, fill=stat)) +
# geom_bar(stat = "identity", position = 'dodge') +
# facet_grid(model ~ ability_mean)
# normalized
sub3 <- long %>% group_by(fit, stat, model, ability_mean) %>%
summarise(mvalue = mean(value)) %>% ungroup %>%
group_by(model, stat, ability_mean) %>% mutate(value = mvalue / sum(mvalue)) `summarise()` has grouped output by 'fit', 'stat', 'model'. You can override
using the `.groups` argument.
ggplot(sub3, aes(x=fit, y=value, fill=stat)) +
geom_bar(stat = "identity", position = 'dodge') +
facet_grid(model ~ ability_mean) +
ggtitle("Figure 3", subtitle = "Model Selection Proportions by Ability Distribution")Replication comments
The results generally replicate well using the mirt software instead of BILOG-MG, at least for the 1PL and 2PL models. For the 3PL models, it seems that mirt was better able to determine whether the true model was in fact from a population generated with 3PL models when using the likelihood ratio approach, as well as the AIC criteria (see the bottom rows of the above 3 figures in comparison to the original figures in Kang and Cohen, 2007). Similar patterns appear for BIC as well, though it’s not entirely clear whether this is due to Monte Carlo sampling error or not (original paper used \(R=50\) replication per cell, whereas this simulation used \(R=500\)).
In any event, it’s unclear why exactly this is the case (perhaps it’s due to mirt’s logit transformed version of the pseudo-guessing parameter during estimation?) but in any case it seems that correctly selecting the 3PL model using the likelihood-ratio test may not be as poor as the author’s originally observed. As such, I’d be more inclined to recommend using the likelihood ratio statistics or AIC when selecting between competing models,
Note that the above replication does not collapse the \(N(-1,1)\) and \(N(1,1)\) ability distribution results into a single object as these distributions have very different behaviour in the model comparison results (as they should for the 3PL model, as higher ability individuals would be less influenced by guessing, so the pseudo-guessing parameter would have notably more sampling variability). It’s unclear why the authors chose to collapse these categories, but in any event based on the above results alone I would advise against doing so as the marginal results may be misleading.