mirt example for latent class analysis

Contributed by Alexander Robitzsch

The following example demonstrates how the customTheta and customPriorFun inputs can be used to estimate an unconditional latent class analysis. Results are compared to the sirt package.

library(mirt)
library(sirt)

#******
# use data.read dataset

data(data.read,package="sirt")
dat <- data.read

# define three latent classes
nclass <- 3
Theta <- diag(nclass)

# define mirt model
I <- ncol(dat)  # I = 12
mirtmodel <- nclass

# get parameters
mod.pars <- mirt(dat, model=mirtmodel, pars = "values")   

# modify parameters: only slopes refer to item-class probabilities
set.seed(9976)

# set starting values for class specific item probabilities
mod.pars[ mod.pars$name == "d" ,"value" ]  <- 0
mod.pars[ mod.pars$name == "d" ,"est" ]  <- FALSE

b1 <- qnorm( colMeans( dat ) )
mod.pars[ mod.pars$name == "a1" ,"value" ]  <- b1

# random starting values for other classes
mod.pars[ mod.pars$name %in% c("a2","a3") ,"value" ]  <- b1+runif( ncol(dat)*(nclass-1) , -1 ,1 )
head(mod.pars)

##   group item class name parnum     value lbound ubound   est prior.type prior_1
## 1   all   A1  dich   a1      1 1.0390521   -Inf    Inf  TRUE       none     NaN
## 2   all   A1  dich   a2      2 0.6981548   -Inf    Inf  TRUE       none     NaN
## 3   all   A1  dich   a3      3 0.1645603   -Inf    Inf  TRUE       none     NaN
## 4   all   A1  dich    d      4 0.0000000   -Inf    Inf FALSE       none     NaN
## 5   all   A1  dich    g      5 0.0000000      0      1 FALSE       none     NaN
## 6   all   A1  dich    u      6 1.0000000      0      1 FALSE       none     NaN
##   prior_2
## 1     NaN
## 2     NaN
## 3     NaN
## 4     NaN
## 5     NaN
## 6     NaN

#*** define prior for latent class analysis
lca_prior <- function(Theta,Etable){
  TP <- nrow(Theta)  
  if ( is.null(Etable) ){
      prior <- rep( 1/TP , TP )
  } else {  
      prior <- rowSums(Etable)
  }
  prior <- prior / sum(prior) 
  return(prior)
}

#*** estimate model
mod1 <- mirt(dat, mirtmodel , technical = list(
            customTheta=Theta , customPriorFun = lca_prior) ,
            pars = mod.pars, verbose=FALSE )

## EM cycles terminated after 500 iterations.

#*** extract item parameters
pars1 <- mod1@pars

## Error in eval(expr, envir, enclos): no slot of name "pars" for this object of class "SingleGroupClass"

dfr <- sapply( 1:I , FUN = function(ii){
      pars.ii <- pars1[[ii]]@par
      return(pars.ii)
      } )

## Error in FUN(X[[i]], ...): object 'pars1' not found

coef1 <- t(dfr)

#*** inspect estimated distribution
mod1@Prior[[1]]   #class proportions

## Error in eval(expr, envir, enclos): no slot of name "Prior" for this object of class "SingleGroupClass"

#*** extract class-specific item-probabilities
probs <- apply( coef1[ , c("a1","a2","a3") ] , 2 , plogis )

## Error in coef1[, c("a1", "a2", "a3")]: subscript out of bounds

probs

##       prob.class1 prob.class2 prob.class3
##  [1,]  0.08454814   0.8692890   0.4769186
##  [2,]  0.07800775   0.8612512   0.4696295
##  [3,]  0.07506595   0.8273935   0.4512297
##  [4,]  0.03608832   0.7977851   0.4169367
##  [5,]  0.13570005   0.7222430   0.4289715
##  [6,]  0.19234963   0.7342591   0.4633044
##  [7,]  0.17775734   0.7212482   0.4495028
##  [8,]  0.21619808   0.7689960   0.4925970
##  [9,]  0.03471106   0.7117222   0.3732166
## [10,]  0.26985287   0.9312928   0.6005728

print(mod1)

## 
## Call:
## mirt(data = dat, model = mirtmodel, pars = mod.pars, verbose = FALSE, 
##     technical = list(customTheta = Theta, customPriorFun = lca_prior))
## 
## Full-information item factor analysis with 3 factor(s).
## FAILED TO CONVERGE within 1e-04 tolerance after 500 EM iterations.
## mirt version: 1.39.4 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 3
## Latent density type: Gaussian 
## 
## Log-likelihood = -1940.367
## Estimated parameters: 33 
## AIC = 3946.733
## BIC = 4071.903; SABIC = 3967.227
## G2 (4062) = 778.77, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN

#-----   compare estimated LCA in sirt
mod2 <- sirt::rasch.mirtlc( dat , Nclasses = 3 ,seed= -30 ,  nstarts=2, progress = FALSE )   
summary(mod2)

## ---------------------------------------------------------------------------------------------------------- 
## sirt 3.12-66 (2022-05-16 12:27:54) 
## 
## Date of Analysis: 2023-07-05 16:11:55 
## Time difference of 0.04520082 secs
## Computation time: 0.04520082 
## 
## Multidimensional Item Response Latent Class Model 
## 
## Latent Class Model with 3 Classes  -  1 Group(s)
## ---------------------------------------------------------------------------------------------------------- 
## Number of iterations= 43 
## Deviance= 3814.47  | Log Likelihood= -1907.24 
## Number of persons= 328 
## Number of estimated item parameters= 36 
## Number of estimated distribution parameters= 2 
## Number of estimated parameters= 38 
## AIC= 3890.47  | penalty= 76    | AIC=-2*LL + 2*p  
## AICc= 3900.73  | penalty= 86.26    | AICc=-2*LL + 2*p + 2*p*(p+1)/(n-p-1)  (bias corrected AIC)
## BIC= 4034.61  | penalty= 220.13    | BIC=-2*LL + log(n)*p  
## CAIC= 4072.61  | penalty= 258.13   | CAIC=-2*LL + [log(n)+1]*p  (consistent AIC)
## 
## ---------------------------------------------------------------------------------------------------------- 
## Trait Distribution
## Class1 Class2 Class3 
##  0.064  0.451  0.485 
## ---------------------------------------------------------------------------------------------------------- 
## Item Parameters 
## Item Probabilities
##    Class1 Class2 Class3
## A1  0.834  0.692  1.000
## A2  0.556  0.513  0.971
## A3  0.486  0.309  0.818
## A4  0.289  0.267  0.663
## B1  0.608  0.582  0.849
## B2  0.363  0.374  0.648
## B3  0.674  0.861  0.984
## B4  0.508  0.493  0.883
## C1  0.153  0.972  1.000
## C2  0.050  0.609  0.899
## C3  0.000  0.895  0.966
## C4  0.000  0.702  0.862

par( mfrow=c(2,1))
matplot( probs , type="l" , xlab="Item" , main="mirt::mirt")
matplot( t(mod2$pjk) , type="l" , xlab="Item" , main="sirt::rasch.mirtlc" )

plot of chunk unnamed-chunk-1

par( mfrow=c(1,1))

# compare class probabilities
cbind( sort(mod2$pi.k), sort(mod1@Prior[[1]]))

## Error in sort(mod1@Prior[[1]]): no slot of name "Prior" for this object of class "SingleGroupClass"

sessionInfo()

## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] mvtnorm_1.2-2   sirt_3.12-66    ltm_1.2-0       polycor_0.8-1  
## [5] msm_1.7         MASS_7.3-58.2   mirt_1.39.4     lattice_0.20-45
## [9] knitr_1.43     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.10          highr_0.10           compiler_4.2.2      
##  [4] dcurver_0.9.2        tools_4.2.2          evaluate_0.21       
##  [7] lifecycle_1.0.3      gtable_0.3.3         nlme_3.1-162        
## [10] mgcv_1.8-41          rlang_1.1.1          Matrix_1.5-3        
## [13] cli_3.6.1            rstudioapi_0.14      commonmark_1.9.0    
## [16] parallel_4.2.2       TAM_4.1-4            expm_0.999-7        
## [19] xfun_0.39            gridExtra_2.3        cluster_2.1.4       
## [22] admisc_0.32          grid_4.2.2           glue_1.6.2          
## [25] pbapply_1.7-0        survival_3.4-0       CDM_8.2-6           
## [28] GPArotation_2023.3-1 splines_4.2.2        mime_0.12           
## [31] permute_0.9-7        Deriv_4.1.3          markdown_1.7        
## [34] vegan_2.6-4