Adapted from the sirt
package examples (rasch.mirtlc
).
library(mirt)
library(sirt)
set.seed(979)
I <- 9
N <- 5000
b <- seq( - 1.5, 1.5 , len=I)
b <- rep(b,3)
# define class locations
theta.k <- c(-3.0, -4.1, -2.8 , 1.7 , 2.3 , 1.8 ,
0.2 , 0.4 , -0.1 ,
2.6 , 0.1, -0.9, -1.1 ,-0.7 , 0.9 )
Nclasses <- 5
theta.k0 <- theta.k <- matrix( theta.k , Nclasses , 3 , byrow=TRUE )
pi.k <- c(.20,.25,.25,.10,.15)
theta <- theta.k[ rep( 1:Nclasses , round(N*pi.k) ) , ]
dimensions <- rep( 1:3 , each=I)
# simulate item responses
dat <- matrix( NA , nrow=N , ncol=I*3)
for (ii in 1:(3*I) ){
dat[,ii] <- 1 * ( runif(N) < plogis( theta[, dimensions[ii] ] - b[ ii] ) )
}
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
## Warning in runif(N) < plogis(theta[, dimensions[ii]] - b[ii]): longer object
## length is not a multiple of shorter object length
colnames(dat) <- paste0( rep( LETTERS[1:3] , each=I ) , 1:(3*I) )
# estimate model
mod1 <- rasch.mirtlc( dat , Nclasses=Nclasses , dimensions=dimensions ,progress = FALSE,
modeltype="MLC1" ,
ref.item= c(5,14,23) , glob.conv=.0005, conv1=.0005)
round(
cbind( mod1$theta.k , mod1$pi.k ) , 3 )
## [,1] [,2] [,3] [,4]
## [1,] -2.776 -3.791 -2.667 0.250
## [2,] -0.989 -0.605 0.957 0.151
## [3,] 0.332 0.418 -0.046 0.246
## [4,] 2.601 0.171 -0.854 0.101
## [5,] 1.791 2.330 1.844 0.252
cbind(
theta.k , pi.k )
## pi.k
## [1,] -3.0 -4.1 -2.8 0.20
## [2,] 1.7 2.3 1.8 0.25
## [3,] 0.2 0.4 -0.1 0.25
## [4,] 2.6 0.1 -0.9 0.10
## [5,] -1.1 -0.7 0.9 0.15
# plot class locations
plot( 1:3 , mod1$theta.k[1,] , xlim=c(1,3) , ylim=c(-5,3) , col=1 , pch=1 , type="n" ,
axes=FALSE, xlab="Dimension" , ylab="Location")
axis(1 , 1:3 ) ; axis(2) ; axis(4)
for (cc in 1:Nclasses){ # cc <- 1
lines(1:3, mod1$theta.k[cc,] , col=cc , lty=cc )
points(1:3, mod1$theta.k[cc,] , col=cc , pch =cc )
}
# estimate model with gdm function in CDM package
library(CDM)
## **********************************
## ** CDM 8.2-6 (2022-08-25 15:43:23)
## ** Cognitive Diagnostic Models **
## **********************************
# define Q-matrix
Qmatrix <- matrix(0,3*I,3)
Qmatrix[ cbind( 1:(3*I) , rep(1:3 , each=I) ) ] <- 1
set.seed(9176)
# random starting values for theta locations
theta.k <- matrix( 2*rnorm(5*3) , 5 , 3 )
colnames(theta.k) <- c("Dim1","Dim2","Dim3")
# try possibly different starting values
# estimate model in CDM
b.constraint <- cbind( c(5,14,23) , 1 , 0 )
mod2 <- CDM::gdm( dat , theta.k = theta.k , b.constraint=b.constraint, skillspace="est",
progress=FALSE, irtmodel="1PL", Qmatrix=Qmatrix)
summary(mod2)
## -----------------------------------------------------------------------------
## CDM 8.2-6 (2022-08-25 15:43:23)
##
## Date of Analysis: 2023-07-05 16:12:05
## Time difference of 6.676571 secs
## Computation Time: 6.676571
##
## General Diagnostic Model
##
## Call:
## CDM::gdm(data = dat, theta.k = theta.k, irtmodel = "1PL", Qmatrix = Qmatrix,
## skillspace = "est", b.constraint = b.constraint, progress = FALSE)
##
## 5000 Cases, 27 Items, 1 Group(s) , 3 Dimension(s)
## Saturated skill space with estimated trait grid
##
## -----------------------------------------------------------------------------
## Number of iterations= 1000
## Maximum number of iterations was reached.
##
## Deviance = 127957.13 | Log Likelihood = -63978.56
## Number of persons = 5000
## Number of estimated parameters = 43
## Number of estimated item parameters = 24
## 24 Intercepts and 0 Slopes
## 0 centered intercepts and 0 centered slopes
## Number of estimated distribution parameters = 19
##
## AIC = 128043 | penalty = 86 | AIC = -2*LL + 2*p
## AICc = 128044 | penalty = 86.76 | AICc = -2*LL + 2*p + 2*p*(p+1)/(n-p-1) (bias corrected AIC)
## BIC = 128323 | penalty = 366.24 | BIC = -2*LL + log(n)*p
## CAIC = 128366 | penalty = 409.24 | CAIC = -2*LL + [log(n)+1]*p (consistent AIC)
##
## -----------------------------------------------------------------------------
## Trait Distribution
##
## M Trait:
## Dim1 Dim2 Dim3
## Group1 -0.058 -0.356 -0.17
##
## SD Trait:
## Dim1 Dim2 Dim3
## Group1 1.94 2.323 1.715
##
## Skewness Trait:
## Dim1 Dim2 Dim3
## Group1 -0.258 -0.528 -0.383
##
## Correlations Trait:
## Group 1
## Dim1 Dim2 Dim3
## Dim1 1.000 0.909 0.726
## Dim2 0.909 1.000 0.926
## Dim3 0.726 0.926 1.000
##
##
## Estimated Skill Distribution
## theta.k.Dim1 theta.k.Dim2 theta.k.Dim3 pi.k
## 1 -2.857 -3.989 -2.740 0.24997
## 2 0.348 0.443 -0.051 0.24939
## 3 -1.010 -0.593 0.962 0.15093
## 4 2.694 0.189 -0.876 0.09866
## 5 1.816 2.395 1.869 0.25105
##
## EAP Reliability:
## Dim1 Dim2 Dim3
## [1,] 0.959 0.985 0.97
## -----------------------------------------------------------------------------
## Item Parameters
## item N M b.Cat1 a.Dim1 a.Dim2 a.Dim3 itemfit.rmsea
## 1 A1 5000 0.686 1.379 1 0 0 0.014
## 2 A2 5000 0.641 1.007 1 0 0 0.010
## 3 A3 5000 0.591 0.617 1 0 0 0.013
## 4 A4 5000 0.537 0.211 1 0 0 0.014
## 5 A5 5000 0.508 0.000 1 0 0 0.010
## 6 A6 5000 0.437 -0.508 1 0 0 0.025
## 7 A7 5000 0.385 -0.865 1 0 0 0.007
## 8 A8 5000 0.322 -1.307 1 0 0 0.012
## 9 A9 5000 0.283 -1.588 1 0 0 0.014
## 10 B10 5000 0.672 1.443 0 1 0 0.014
## 11 B11 5000 0.631 1.075 0 1 0 0.009
## 12 B12 5000 0.576 0.603 0 1 0 0.008
## 13 B13 5000 0.542 0.348 0 1 0 0.018
## 14 B14 5000 0.491 0.000 0 1 0 0.014
## 15 B15 5000 0.427 -0.467 0 1 0 0.005
## 16 B16 5000 0.372 -0.843 0 1 0 0.009
## 17 B17 5000 0.326 -1.171 0 1 0 0.009
## 18 B18 5000 0.266 -1.616 0 1 0 0.015
## 19 C19 5000 0.705 1.509 0 0 1 0.016
## 20 C20 5000 0.642 1.022 0 0 1 0.010
## 21 C21 5000 0.593 0.665 0 0 1 0.013
## 22 C22 5000 0.538 0.297 0 0 1 0.019
## 23 C23 5000 0.492 0.000 0 0 1 0.009
## 24 C24 5000 0.419 -0.465 0 0 1 0.008
## 25 C25 5000 0.359 -0.844 0 0 1 0.009
## 26 C26 5000 0.303 -1.204 0 0 1 0.011
## 27 C27 5000 0.249 -1.574 0 0 1 0.018
##
## Mean of RMSEA item fit: 0.012
#------
# estimate model with MultiLCIRT package
library(MultiLCIRT)
## Error in library(MultiLCIRT): there is no package called 'MultiLCIRT'
# aggregated dataset in order to input only unique response patterns
out <- MultiLCIRT::aggr_data( as.matrix(dat) , fort=TRUE)
## Error in loadNamespace(x): there is no package called 'MultiLCIRT'
S <- out$data_dis
## Error in eval(expr, envir, enclos): object 'out' not found
yv <- out$freq
## Error in eval(expr, envir, enclos): object 'out' not found
# define matrix to allocate each item to one dimension
multi1 <- matrix( 1:(3*I) , nrow=3 , byrow=TRUE )
# define reference items in item-dimension allocation matrix
multi1[ 1 , c(1,5) ] <- c(5,1)
multi1[ 2 , c(10,14) - 9 ] <- c(14,9)
multi1[ 3 , c(19,23) - 18 ] <- c(23,19)
# Rasch model with 5 latent classes (random start: start=1)
mod3 <- MultiLCIRT::est_multi_poly(S,yv,k=5,start=1,link=1,multi=multi1,tol=10^-5 ,
output=TRUE , disp=TRUE , fort=TRUE)
## Error in loadNamespace(x): there is no package called 'MultiLCIRT'
# estimated location points and class probabilities in MultiLCIRT
cbind( t( mod3$Th ) , mod3$piv )
## Error in t(mod3$Th): object 'mod3' not found
# compare results with rasch.mirtlc
cbind( mod1$theta.k , mod1$pi.k )
## [,1] [,2] [,3] [,4]
## [1,] -2.7759840 -3.7907986 -2.66677341 0.2501500
## [2,] -0.9891769 -0.6050983 0.95692485 0.1506892
## [3,] 0.3315768 0.4175680 -0.04566194 0.2458049
## [4,] 2.6005153 0.1707981 -0.85368888 0.1011480
## [5,] 1.7910290 2.3303750 1.84441426 0.2522079
# simulated data parameters
cbind( theta.k , pi.k )
## Dim1 Dim2 Dim3 pi.k
## [1,] -1.47036160 -1.6476895 -1.3486075 0.20
## [2,] 1.64299895 -0.6714749 2.3263009 0.25
## [3,] -5.16325156 -1.7710264 -0.3443295 0.25
## [4,] 0.07923444 1.0165802 -2.5669845 0.10
## [5,] 1.34947782 1.5344728 1.0521664 0.15
#----
# estimate model with cutomized input in mirt
library(mirt)
#-- define Theta design matrix for 5 classes
Theta <- diag(5)
Theta <- cbind( Theta , Theta , Theta )
r1 <- rownames(Theta) <- paste0("C",1:5)
colnames(Theta) <- c( paste0(r1 , "D1") , paste0(r1 , "D2") , paste0(r1 , "D3") )
Theta
## C1D1 C2D1 C3D1 C4D1 C5D1 C1D2 C2D2 C3D2 C4D2 C5D2 C1D3 C2D3 C3D3 C4D3 C5D3
## C1 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0
## C2 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0
## C3 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0
## C4 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0
## C5 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1
I <- ncol(dat) # I = 27
mirtmodel <- mirt::mirt.model("
C1D1 = 1-9
C2D1 = 1-9
C3D1 = 1-9
C4D1 = 1-9
C5D1 = 1-9
C1D2 = 10-18
C2D2 = 10-18
C3D2 = 10-18
C4D2 = 10-18
C5D2 = 10-18
C1D3 = 19-27
C2D3 = 19-27
C3D3 = 19-27
C4D3 = 19-27
C5D3 = 19-27
CONSTRAIN = (1-9,a1),(1-9,a2),(1-9,a3),(1-9,a4),(1-9,a5),
(10-18,a6),(10-18,a7),(10-18,a8),(10-18,a9),(10-18,a10),
(19-27,a11),(19-27,a12),(19-27,a13),(19-27,a14),(19-27,a15)
")
#-- get initial parameter values
mod.pars <- mirt::mirt(dat, model=mirtmodel , pars = "values")
#-- redefine initial parameter values
# set all d parameters initially to zero
ind <- which( ( mod.pars$name == "d" ) )
mod.pars[ ind ,"value" ] <- 0
# fix item difficulties of reference items to zero
mod.pars[ ind[ c(5,14,23) ] , "est"] <- FALSE
mod.pars[ind,]
## group item class name parnum value lbound ubound est prior.type prior_1
## 16 all A1 dich d 16 0 -Inf Inf TRUE none NaN
## 34 all A2 dich d 34 0 -Inf Inf TRUE none NaN
## 52 all A3 dich d 52 0 -Inf Inf TRUE none NaN
## 70 all A4 dich d 70 0 -Inf Inf TRUE none NaN
## 88 all A5 dich d 88 0 -Inf Inf FALSE none NaN
## 106 all A6 dich d 106 0 -Inf Inf TRUE none NaN
## 124 all A7 dich d 124 0 -Inf Inf TRUE none NaN
## 142 all A8 dich d 142 0 -Inf Inf TRUE none NaN
## 160 all A9 dich d 160 0 -Inf Inf TRUE none NaN
## 178 all B10 dich d 178 0 -Inf Inf TRUE none NaN
## 196 all B11 dich d 196 0 -Inf Inf TRUE none NaN
## 214 all B12 dich d 214 0 -Inf Inf TRUE none NaN
## 232 all B13 dich d 232 0 -Inf Inf TRUE none NaN
## 250 all B14 dich d 250 0 -Inf Inf FALSE none NaN
## 268 all B15 dich d 268 0 -Inf Inf TRUE none NaN
## 286 all B16 dich d 286 0 -Inf Inf TRUE none NaN
## 304 all B17 dich d 304 0 -Inf Inf TRUE none NaN
## 322 all B18 dich d 322 0 -Inf Inf TRUE none NaN
## 340 all C19 dich d 340 0 -Inf Inf TRUE none NaN
## 358 all C20 dich d 358 0 -Inf Inf TRUE none NaN
## 376 all C21 dich d 376 0 -Inf Inf TRUE none NaN
## 394 all C22 dich d 394 0 -Inf Inf TRUE none NaN
## 412 all C23 dich d 412 0 -Inf Inf FALSE none NaN
## 430 all C24 dich d 430 0 -Inf Inf TRUE none NaN
## 448 all C25 dich d 448 0 -Inf Inf TRUE none NaN
## 466 all C26 dich d 466 0 -Inf Inf TRUE none NaN
## 484 all C27 dich d 484 0 -Inf Inf TRUE none NaN
## prior_2
## 16 NaN
## 34 NaN
## 52 NaN
## 70 NaN
## 88 NaN
## 106 NaN
## 124 NaN
## 142 NaN
## 160 NaN
## 178 NaN
## 196 NaN
## 214 NaN
## 232 NaN
## 250 NaN
## 268 NaN
## 286 NaN
## 304 NaN
## 322 NaN
## 340 NaN
## 358 NaN
## 376 NaN
## 394 NaN
## 412 NaN
## 430 NaN
## 448 NaN
## 466 NaN
## 484 NaN
# initial item parameters of cluster locations (a1,...,a15)
ind <- which( ( mod.pars$name %in% paste0("a", c(1,6,11) ) ) & ( mod.pars$est ) )
mod.pars[ind,"value"] <- -2
ind <- which( ( mod.pars$name %in% paste0("a", c(1,6,11)+1 ) ) & ( mod.pars$est ) )
mod.pars[ind,"value"] <- -1
ind <- which( ( mod.pars$name %in% paste0("a", c(1,6,11)+2 ) ) & ( mod.pars$est ) )
mod.pars[ind,"value"] <- 0
ind <- which( ( mod.pars$name %in% paste0("a", c(1,6,11)+3 ) ) & ( mod.pars$est ) )
mod.pars[ind,"value"] <- 1
ind <- which( ( mod.pars$name %in% paste0("a", c(1,6,11)+4 ) ) & ( mod.pars$est ) )
mod.pars[ind,"value"] <- 0
#-- define prior for latent class analysis
lca_prior <- function(Theta,Etable){
TP <- nrow(Theta)
if ( is.null(Etable) ){ prior <- rep( 1/TP , TP ) }
if ( ! is.null(Etable) ){
prior <- ( rowSums(Etable[ , seq(1,2*I,2)]) + rowSums(Etable[,seq(2,2*I,2)]) )/I
}
prior <- prior / sum(prior)
return(prior)
}
#-- estimate model in mirt
mod4 <- mirt::mirt(dat, mirtmodel , pars = mod.pars , verbose=FALSE ,
technical = list( customTheta=Theta , customPriorFun = lca_prior ,
MAXQUAD = 1E20) )
## EM quadrature for high dimensional models are better handled
## with the "QMCEM" or "MCEM" method
# correct number of estimated parameters
nest <- as.integer(sum(mod.pars$est) + nrow(Theta)-1 )
# extract coefficients
# source.all(pfsirt)
cmod4 <- coef(mod4, simplify=TRUE)
# estimated item difficulties
dfr <- data.frame( "sim"=b , "mirt"=-cmod4$items[,'d'] , "sirt"=mod1$item$thresh )
round( dfr , 4 )
## sim mirt sirt
## A1 -1.500 -1.3777 -1.3382
## A2 -1.125 -1.0054 -0.9774
## A3 -0.750 -0.6152 -0.6016
## A4 -0.375 -0.2094 -0.2060
## A5 0.000 0.0000 0.0000
## A6 0.375 0.5090 0.4984
## A7 0.750 0.8666 0.8504
## A8 1.125 1.3084 1.2847
## A9 1.500 1.5896 1.5620
## B10 -1.500 -1.5027 -1.4499
## B11 -1.125 -1.1001 -1.0735
## B12 -0.750 -0.6269 -0.6168
## B13 -0.375 -0.3718 -0.3662
## B14 0.000 0.0000 0.0000
## B15 0.375 0.4439 0.4399
## B16 0.750 0.8200 0.8100
## B17 1.125 1.1483 1.1332
## B18 1.500 1.5932 1.5694
## C19 -1.500 -1.5097 -1.4679
## C20 -1.125 -1.0228 -0.9983
## C21 -0.750 -0.6656 -0.6518
## C22 -0.375 -0.2977 -0.2918
## C23 0.000 0.0000 0.0000
## C24 0.375 0.4646 0.4590
## C25 0.750 0.8433 0.8334
## C26 1.125 1.2036 1.1896
## C27 1.500 1.5732 1.5548
sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] CDM_8.2-6 mvtnorm_1.2-2 sirt_3.12-66 ltm_1.2-0
## [5] polycor_0.8-1 msm_1.7 MASS_7.3-58.2 mirt_1.39.4
## [9] lattice_0.20-45 knitr_1.43
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.10 highr_0.10 compiler_4.2.2
## [4] dcurver_0.9.2 tools_4.2.2 evaluate_0.21
## [7] lifecycle_1.0.3 gtable_0.3.3 nlme_3.1-162
## [10] mgcv_1.8-41 rlang_1.1.1 Matrix_1.5-3
## [13] cli_3.6.1 rstudioapi_0.14 commonmark_1.9.0
## [16] parallel_4.2.2 TAM_4.1-4 expm_0.999-7
## [19] xfun_0.39 gridExtra_2.3 cluster_2.1.4
## [22] admisc_0.32 grid_4.2.2 glue_1.6.2
## [25] pbapply_1.7-0 survival_3.4-0 GPArotation_2023.3-1
## [28] splines_4.2.2 mime_0.12 permute_0.9-7
## [31] Deriv_4.1.3 markdown_1.7 vegan_2.6-4