Kang and Cohen (2007) simulation 2

Author

Phil Chalmers

Simulation from:

However, this simulation is only a partial one as I was mainly interested in replicating the IRT estimation results that utilized modal estimation rather than a complete MCMC form. Plus, the article reports the DIC information criteria for the Bayesian models, which these days is generally regarded as sub-optimal. Moreover, computation of a partial Bayes factor could have been replaced with a Bayes factor computation via bridge-sampling methods.

Simulation code

The following code uses TODO placeholders in case users are interested in investigating the MCMC results themselves in the context of this code-base.

# SimDesign::SimFunctions(nAnalyse=2)
library(SimDesign)

Design <- createDesign(nitems = c(20,40),
                       ability_mean = c(0,-1,1),
                       sample_size = c(500,1000),
                       model = c('1PL','2PL','3PL'),
                       estimator=c('MML')) # add 'MCMC' when supported
Design

# source in helper functions + fixed_objects definition
source("KangCohen2007_extras.R")

#-------------------------------------------------------------------

Generate <- function(condition, fixed_objects = NULL) {
    Attach(condition)
    
    # get population generating parameters
    theta <- matrix(rnorm(sample_size, mean=ability_mean))
    pars <- get_parameters(condition, fixed_objects)
    
    # simulate response data (via mirt)
    dat <- with(pars, 
                simdata(sample_size, a=a, d=d, guess=c, 
                        Theta = theta, itemtype = '3PL'))
    dat
}

Analyse.MML <- function(condition, dat, fixed_objects = NULL) {
    Attach(condition)
    AnalyseIf(estimator == "MML")
    
    # MML estimation
    fit <- sprintf("Theta = 1-%i
                   START = (1-%i, a1, 1.0)
                   FIXED = (1-%i, a1)", nitems, nitems, nitems)
    mod1PL <- mirt(dat, model=fit, itemtype='2PL', verbose = FALSE)
    mod2PL <- mirt(dat, model=1, itemtype='2PL', verbose = FALSE)
    mod3PL <- mirt(dat, model=1, itemtype='3PL', verbose = FALSE)
      
    stopifnot(extract.mirt(mod1PL, 'converged'))
    stopifnot(extract.mirt(mod2PL, 'converged'))
    stopifnot(extract.mirt(mod3PL, 'converged'))
    
    # just return model comparison information (main purpose of the paper)
    ret <- as.data.frame(c(M1PL=anova(mod1PL)[c('AIC', 'BIC')], 
                           M2PL=anova(mod2PL)[c('AIC', 'BIC')],
                           M3PL=anova(mod3PL)[c('AIC', 'BIC')],
                           LR12=anova(mod1PL, mod2PL, verbose=FALSE)[2, c('X2', 'p')],
                           LR23=anova(mod2PL, mod3PL, verbose=FALSE)[2, c('X2', 'p')]))
    ret
}

Analyse.MCMC <- function(condition, dat, fixed_objects = NULL) {
    Attach(condition)
    AnalyseIf(estimator == "MCMC")
    
    # TODO
    
}

Summarise <- function(condition, results, fixed_objects = NULL) {
    nms <- colnames(results)
    means <- colMeans(results[,!grepl(".p\\b", nms)])
    sds <- apply(results[,!grepl(".p\\b", nms)], 2, sd)
    BIC <- best_info(results[,grepl(".BIC", nms)])
    ret <- c(means=means, sds=sds, BIC)
    if(condition$estimator == 'MML'){
        AIC <- best_info(results[,grepl(".AIC\\b", nms)])
        ps <- results[,grepl(".p\\b", nms)] < .05
        ps[ps[,2], 1] <- FALSE     ## if 3PLM test sig, 1PL vs 2PL doesn't matter
        LR <- colMeans(cbind(ifelse(rowSums(ps) == 0, TRUE, FALSE), ps))
        names(LR) <- c('M1PL.LR', 'M2PL.LR', 'M3PL.LR')
        ret <- c(ret, AIC, LR)
    }
    ret
}

#-------------------------------------------------------------------

res <- runSimulation(design=Design, replications=500, generate=Generate, 
                     analyse=list(Analyse.MML, Analyse.MCMC), # unnamed list to avoid extra labels
                     summarise=Summarise, fixed_objects=fo,
                     packages='mirt', parallel=TRUE, max_errors = Inf, 
                     filename='KangCohen')
res
# A tibble: 36 × 36
   nitems ability_mean sample_size model estimator means.M1PL.AIC means.M1PL.BIC
    <dbl>        <dbl>       <dbl> <chr> <chr>              <dbl>          <dbl>
 1     20            0         500 1PL   MML               11752.         11836.
 2     40            0         500 1PL   MML               22827.         22996.
 3     20           -1         500 1PL   MML               10904.         10988.
 4     40           -1         500 1PL   MML               20718.         20886.
 5     20            1         500 1PL   MML               10516.         10600.
 6     40            1         500 1PL   MML               20943.         21112.
 7     20            0        1000 1PL   MML               23495.         23593.
 8     40            0        1000 1PL   MML               45625.         45821.
 9     20           -1        1000 1PL   MML               21806.         21905.
10     40           -1        1000 1PL   MML               41344.         41540.
# ℹ 26 more rows
# ℹ 29 more variables: means.M2PL.AIC <dbl>, means.M2PL.BIC <dbl>,
#   means.M3PL.AIC <dbl>, means.M3PL.BIC <dbl>, means.LR12.X2 <dbl>,
#   means.LR23.X2 <dbl>, sds.M1PL.AIC <dbl>, sds.M1PL.BIC <dbl>,
#   sds.M2PL.AIC <dbl>, sds.M2PL.BIC <dbl>, sds.M3PL.AIC <dbl>,
#   sds.M3PL.BIC <dbl>, sds.LR12.X2 <dbl>, sds.LR23.X2 <dbl>, M1PL.BIC <dbl>,
#   M2PL.BIC <dbl>, M3PL.BIC <dbl>, M1PL.AIC <dbl>, M2PL.AIC <dbl>, …

Replication of several Tables and Figures

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# information criteria means/sds
Table6_means <- res %>% filter(model == "1PL") %>%
    select(nitems:sample_size, estimator, means.M1PL.AIC:means.LR23.X2)
Table6_sds <- res %>% filter(model == "1PL") %>%
    select(nitems:sample_size, estimator, sds.M1PL.AIC:sds.LR23.X2)
Table6_means
# A tibble: 12 × 12
   nitems ability_mean sample_size estimator means.M1PL.AIC means.M1PL.BIC
    <dbl>        <dbl>       <dbl> <chr>              <dbl>          <dbl>
 1     20            0         500 MML               11752.         11836.
 2     40            0         500 MML               22827.         22996.
 3     20           -1         500 MML               10904.         10988.
 4     40           -1         500 MML               20718.         20886.
 5     20            1         500 MML               10516.         10600.
 6     40            1         500 MML               20943.         21112.
 7     20            0        1000 MML               23495.         23593.
 8     40            0        1000 MML               45625.         45821.
 9     20           -1        1000 MML               21806.         21905.
10     40           -1        1000 MML               41344.         41540.
11     20            1        1000 MML               21029.         21128.
12     40            1        1000 MML               41834.         42030.
# ℹ 6 more variables: means.M2PL.AIC <dbl>, means.M2PL.BIC <dbl>,
#   means.M3PL.AIC <dbl>, means.M3PL.BIC <dbl>, means.LR12.X2 <dbl>,
#   means.LR23.X2 <dbl>
Table6_sds
# A tibble: 12 × 12
   nitems ability_mean sample_size estimator sds.M1PL.AIC sds.M1PL.BIC
    <dbl>        <dbl>       <dbl> <chr>            <dbl>        <dbl>
 1     20            0         500 MML             89.760       89.760
 2     40            0         500 MML            171.49       171.49 
 3     20           -1         500 MML            128.20       128.20 
 4     40           -1         500 MML            242.41       242.41 
 5     20            1         500 MML            140.10       140.10 
 6     40            1         500 MML            211.36       211.36 
 7     20            0        1000 MML            126.89       126.89 
 8     40            0        1000 MML            236.82       236.82 
 9     20           -1        1000 MML            169.64       169.64 
10     40           -1        1000 MML            330.16       330.16 
11     20            1        1000 MML            191.37       191.37 
12     40            1        1000 MML            330.26       330.26 
# ℹ 6 more variables: sds.M2PL.AIC <dbl>, sds.M2PL.BIC <dbl>,
#   sds.M3PL.AIC <dbl>, sds.M3PL.BIC <dbl>, sds.LR12.X2 <dbl>,
#   sds.LR23.X2 <dbl>
Table7_means <- res %>% filter(model == "2PL") %>%
    select(nitems:sample_size, estimator, means.M1PL.AIC:means.LR23.X2)
Table7_sds <- res %>% filter(model == "2PL") %>%
    select(nitems:sample_size, estimator, sds.M1PL.AIC:sds.LR23.X2)
Table7_means
# A tibble: 12 × 12
   nitems ability_mean sample_size estimator means.M1PL.AIC means.M1PL.BIC
    <dbl>        <dbl>       <dbl> <chr>              <dbl>          <dbl>
 1     20            0         500 MML               11639.         11724.
 2     40            0         500 MML               22464.         22632.
 3     20           -1         500 MML               10552.         10636.
 4     40           -1         500 MML               20040.         20209.
 5     20            1         500 MML               10468.         10553.
 6     40            1         500 MML               20677.         20846.
 7     20            0        1000 MML               23244.         23342.
 8     40            0        1000 MML               44924.         45120.
 9     20           -1        1000 MML               21090.         21188.
10     40           -1        1000 MML               40082.         40279.
11     20            1        1000 MML               20918.         21016.
12     40            1        1000 MML               41319.         41515.
# ℹ 6 more variables: means.M2PL.AIC <dbl>, means.M2PL.BIC <dbl>,
#   means.M3PL.AIC <dbl>, means.M3PL.BIC <dbl>, means.LR12.X2 <dbl>,
#   means.LR23.X2 <dbl>
Table7_sds
# A tibble: 12 × 12
   nitems ability_mean sample_size estimator sds.M1PL.AIC sds.M1PL.BIC
    <dbl>        <dbl>       <dbl> <chr>            <dbl>        <dbl>
 1     20            0         500 MML             86.271       86.271
 2     40            0         500 MML            151.07       151.07 
 3     20           -1         500 MML            128.38       128.38 
 4     40           -1         500 MML            221.59       221.59 
 5     20            1         500 MML            124.54       124.54 
 6     40            1         500 MML            219.77       219.77 
 7     20            0        1000 MML            131.34       131.34 
 8     40            0        1000 MML            215.62       215.62 
 9     20           -1        1000 MML            172.37       172.37 
10     40           -1        1000 MML            318.69       318.69 
11     20            1        1000 MML            196.67       196.67 
12     40            1        1000 MML            304.49       304.49 
# ℹ 6 more variables: sds.M2PL.AIC <dbl>, sds.M2PL.BIC <dbl>,
#   sds.M3PL.AIC <dbl>, sds.M3PL.BIC <dbl>, sds.LR12.X2 <dbl>,
#   sds.LR23.X2 <dbl>
Table8_means <- res %>% filter(model == "3PL") %>%
    select(nitems:sample_size, estimator, means.M1PL.AIC:means.LR23.X2)
Table8_sds <- res %>% filter(model == "3PL") %>%
    select(nitems:sample_size, estimator, sds.M1PL.AIC:sds.LR23.X2)
Table8_means
# A tibble: 12 × 12
   nitems ability_mean sample_size estimator means.M1PL.AIC means.M1PL.BIC
    <dbl>        <dbl>       <dbl> <chr>              <dbl>          <dbl>
 1     20            0         500 MML              11090.         11174. 
 2     40            0         500 MML              22297.         22465. 
 3     20           -1         500 MML              12674.         12758. 
 4     40           -1         500 MML              25272.         25441. 
 5     20            1         500 MML               8540.0         8624.3
 6     40            1         500 MML              17425.         17594. 
 7     20            0        1000 MML              22268.         22366. 
 8     40            0        1000 MML              44570.         44766. 
 9     20           -1        1000 MML              25422.         25521. 
10     40           -1        1000 MML              50583.         50779. 
11     20            1        1000 MML              17146.         17244. 
12     40            1        1000 MML              34812.         35008. 
# ℹ 6 more variables: means.M2PL.AIC <dbl>, means.M2PL.BIC <dbl>,
#   means.M3PL.AIC <dbl>, means.M3PL.BIC <dbl>, means.LR12.X2 <dbl>,
#   means.LR23.X2 <dbl>
Table8_sds
# A tibble: 12 × 12
   nitems ability_mean sample_size estimator sds.M1PL.AIC sds.M1PL.BIC
    <dbl>        <dbl>       <dbl> <chr>            <dbl>        <dbl>
 1     20            0         500 MML             281.58       281.58
 2     40            0         500 MML             413.93       413.93
 3     20           -1         500 MML             246.03       246.03
 4     40           -1         500 MML             336.87       336.87
 5     20            1         500 MML             303.05       303.05
 6     40            1         500 MML             420.76       420.76
 7     20            0        1000 MML             486.20       486.20
 8     40            0        1000 MML             762.52       762.52
 9     20           -1        1000 MML             444.38       444.38
10     40           -1        1000 MML             663.71       663.71
11     20            1        1000 MML             568.87       568.87
12     40            1        1000 MML             795.56       795.56
# ℹ 6 more variables: sds.M2PL.AIC <dbl>, sds.M2PL.BIC <dbl>,
#   sds.M3PL.AIC <dbl>, sds.M3PL.BIC <dbl>, sds.LR12.X2 <dbl>,
#   sds.LR23.X2 <dbl>
##################################
## proportion of competing models selected (more general measure than nose count)
Table9 <- res %>% filter(estimator == "MML") %>%
    select(nitems:model, M1PL.BIC:M3PL.LR) %>%
    arrange(nitems, sample_size, ability_mean, model)
Table9
# A tibble: 36 × 13
   nitems ability_mean sample_size model M1PL.BIC M2PL.BIC M3PL.BIC M1PL.AIC
    <dbl>        <dbl>       <dbl> <chr>    <dbl>    <dbl>    <dbl>    <dbl>
 1     20           -1         500 1PL      1        0            0    0.992
 2     20           -1         500 2PL      0.018    0.982        0    0    
 3     20           -1         500 3PL      0        1            0    0    
 4     20            0         500 1PL      1        0            0    0.994
 5     20            0         500 2PL      0        1            0    0    
 6     20            0         500 3PL      0.268    0.732        0    0    
 7     20            1         500 1PL      1        0            0    0.996
 8     20            1         500 2PL      0.004    0.996        0    0    
 9     20            1         500 3PL      0.786    0.214        0    0    
10     20           -1        1000 1PL      1        0            0    0.998
# ℹ 26 more rows
# ℹ 5 more variables: M2PL.AIC <dbl>, M3PL.AIC <dbl>, M1PL.LR <dbl>,
#   M2PL.LR <dbl>, M3PL.LR <dbl>
##################################

# graphics
library(tidyr)
long <- Table9 %>% 
    pivot_longer(M1PL.BIC:M3PL.LR, names_to='fit_stat') %>% 
    separate(fit_stat, c('fit', 'stat')) %>% 
    mutate(stat=factor(stat), fit=factor(fit), model=factor(model))
long
# A tibble: 324 × 7
   nitems ability_mean sample_size model fit   stat  value
    <dbl>        <dbl>       <dbl> <fct> <fct> <fct> <dbl>
 1     20           -1         500 1PL   M1PL  BIC   1    
 2     20           -1         500 1PL   M2PL  BIC   0    
 3     20           -1         500 1PL   M3PL  BIC   0    
 4     20           -1         500 1PL   M1PL  AIC   0.992
 5     20           -1         500 1PL   M2PL  AIC   0.008
 6     20           -1         500 1PL   M3PL  AIC   0    
 7     20           -1         500 1PL   M1PL  LR    0.93 
 8     20           -1         500 1PL   M2PL  LR    0.068
 9     20           -1         500 1PL   M3PL  LR    0.002
10     20           -1         500 2PL   M1PL  BIC   0.018
# ℹ 314 more rows
library(ggplot2)

# unnormalized
# ggplot(long, aes(x=fit, y=value, fill=stat)) +
#     geom_bar(stat = "identity", position = 'dodge') + 
#     facet_grid(model ~ nitems)

# normalized
sub1 <- long %>% group_by(fit, stat, model, nitems) %>% 
    summarise(mvalue = mean(value)) %>% ungroup %>% 
    group_by(model, stat, nitems) %>% mutate(value = mvalue / sum(mvalue)) 
`summarise()` has grouped output by 'fit', 'stat', 'model'. You can override
using the `.groups` argument.
(fig1 <- ggplot(sub1, aes(x=fit, y=value, fill=stat)) +
        geom_bar(stat = "identity", position = 'dodge') + 
        facet_grid(model ~ nitems)) + 
    ggtitle("Figure 1", subtitle = "Model Selection Proportions by Test Length")

# unnormalized
# ggplot(long, aes(x=fit, y=value, fill=stat)) +
#         geom_bar(stat = "identity", position = 'dodge') +
#         facet_grid(model ~ sample_size)

# normalized
sub2 <- long %>% group_by(fit, stat, model, sample_size) %>% 
    summarise(mvalue = mean(value)) %>% ungroup %>% 
    group_by(model, stat, sample_size) %>% mutate(value = mvalue / sum(mvalue)) 
`summarise()` has grouped output by 'fit', 'stat', 'model'. You can override
using the `.groups` argument.
ggplot(sub2, aes(x=fit, y=value, fill=stat)) +
    geom_bar(stat = "identity", position = 'dodge') +
    facet_grid(model ~ sample_size) + 
    ggtitle("Figure 2", subtitle = "Model Selection Proportions by Sample Size")

# unnormalized
# ggplot(long, aes(x=fit, y=value, fill=stat)) +
#         geom_bar(stat = "identity", position = 'dodge') + 
#         facet_grid(model ~ ability_mean)

# normalized
sub3 <- long %>% group_by(fit, stat, model, ability_mean) %>% 
    summarise(mvalue = mean(value)) %>% ungroup %>% 
    group_by(model, stat, ability_mean) %>% mutate(value = mvalue / sum(mvalue)) 
`summarise()` has grouped output by 'fit', 'stat', 'model'. You can override
using the `.groups` argument.
ggplot(sub3, aes(x=fit, y=value, fill=stat)) +
    geom_bar(stat = "identity", position = 'dodge') +
    facet_grid(model ~ ability_mean)  + 
    ggtitle("Figure 3", subtitle = "Model Selection Proportions by Ability Distribution")

Replication comments

The results generally replicate well using the mirt software instead of BILOG-MG, at least for the 1PL and 2PL models. For the 3PL models, it seems that mirt was better able to determine whether the true model was in fact from a population generated with 3PL models when using the likelihood ratio approach, as well as the AIC criteria (see the bottom rows of the above 3 figures in comparison to the original figures in Kang and Cohen, 2007). Similar patterns appear for BIC as well, though it’s not entirely clear whether this is due to Monte Carlo sampling error or not (original paper used \(R=50\) replication per cell, whereas this simulation used \(R=500\)).

In any event, it’s unclear why exactly this is the case (perhaps it’s due to mirt’s logit transformed version of the pseudo-guessing parameter during estimation?) but in any case it seems that correctly selecting the 3PL model using the likelihood-ratio test may not be as poor as the author’s originally observed. As such, I’d be more inclined to recommend using the likelihood ratio statistics or AIC when selecting between competing models,

Note that the above replication does not collapse the \(N(-1,1)\) and \(N(1,1)\) ability distribution results into a single object as these distributions have very different behaviour in the model comparison results (as they should for the 3PL model, as higher ability individuals would be less influenced by guessing, so the pseudo-guessing parameter would have notably more sampling variability). It’s unclear why the authors chose to collapse these categories, but in any event based on the above results alone I would advise against doing so as the marginal results may be misleading.