IRT approaches to longitudinal data

IRT approaches to longitudinal data

Two approaches are demonstrated here for longitudinal data. A direct modelling approach, where a latent variable is estimated for each time point, and a linear latent growth curve approach which estimates the variability of person trajectories. The latter is limited to only 3 dimensions for any number of time points, whereas the former increases in complexity as the time points increase.

Direct estimation of latent factors for each wave

Two tier approach used in Cai 2010’s two-tier article. This example requires 4 dimensions of integration (1 for each time point + residual item factor). Below identical models are specified, however the approach used (syntax vs R containers) are presented for posterity.

library(mirt)
## Loading required package: stats4
## Loading required package: lattice
load('longitudinal-IRT.Rdata')
head(dat)
##   Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 Item_7 Item_8 Item_9 Item_10
## 1      2      0      0      0      1      0      0      1      0       0
## 2      2      0      1      0      1      0      0      2      2       0
## 3      2      0      0      0      0      0      2      0      1       0
## 4      2      1      2      2      0      2      2      1      1       2
## 5      0      1      2      0      2      0      2      0      0       2
## 6      0      1      0      0      0      2      0      1      0       2
##   Item_11 Item_12 Item_13 Item_14 Item_15 Item_16 Item_17 Item_18 Item_19
## 1       0       2       0       1       0       2       0       2       0
## 2       0       2       2       2       2       0       0       2       0
## 3       0       0       2       2       0       1       0       0       0
## 4       2       2       2       2       0       2       2       2       1
## 5       0       2       2       1       0       2       1       2       0
## 6       1       2       0       0       0       1       0       0       0
##   Item_20 Item_1.1 Item_2.1 Item_3.1 Item_4.1 Item_5.1 Item_6.1 Item_7.1
## 1       2        0        0        1        2        0        2        0
## 2       2        2        0        2        2        0        2        2
## 3       0        0        0        1        0        1        0        2
## 4       2        2        2        1        2        1        2        1
## 5       2        2        0        2        0        1        0        0
## 6       2        0        0        0        2        0        0        0
##   Item_8.1 Item_9.1 Item_10.1 Item_11.1 Item_12.1 Item_13.1 Item_14.1 Item_15.1
## 1        0        1         2         0         0         2         2         0
## 2        1        0         2         1         2         2         1         0
## 3        0        0         0         1         1         0         1         0
## 4        2        2         2         2         2         1         2         2
## 5        2        0         2         2         1         0         2         2
## 6        1        1         2         0         0         0         1         0
##   Item_16.1 Item_17.1 Item_18.1 Item_19.1 Item_20.1 Item_1.2 Item_2.2 Item_3.2
## 1         2         0         2         0         2        2        1        0
## 2         2         1         2         0         2        2        0        0
## 3         0         0         0         1         0        2        1        2
## 4         2         2         2         2         2        2        2        2
## 5         2         1         2         2         2        2        0        2
## 6         1         0         1         0         0        0        2        0
##   Item_4.2 Item_5.2 Item_6.2 Item_7.2 Item_8.2 Item_9.2 Item_10.2 Item_11.2
## 1        2        0        0        1        1        0         0         1
## 2        2        1        0        0        2        2         2         2
## 3        0        1        0        1        1        2         2         0
## 4        2        1        2        2        2        2         2         0
## 5        0        0        2        0        1        2         1         0
## 6        2        0        2        2        2        1         0         1
##   Item_12.2 Item_13.2 Item_14.2 Item_15.2 Item_16.2 Item_17.2 Item_18.2
## 1         2         0         2         0         2         0         0
## 2         0         0         2         0         2         0         2
## 3         2         1         1         0         2         2         0
## 4         2         2         2         2         2         2         2
## 5         2         2         2         0         2         2         2
## 6         2         2         2         2         2         0         0
##   Item_19.2 Item_20.2
## 1         0         0
## 2         1         0
## 3         2         2
## 4         2         2
## 5         2         2
## 6         0         2

Model constructed using R containers

model <- 'Time1 = 1-20
          Time2 = 21-40
          Time3 = 41-60
          COV = Time1*Time2*Time3, Time2*Time2, Time3*Time3
          MEAN = Time2, Time3'
itemloadings <- rep(1:20, times = 3) 

#construct constraints manually 
#obtain starting values
sv <- bfactor(dat, itemloadings, model, pars='values')
head(sv)
##   group   item  class name parnum value lbound ubound   est prior.type prior_1
## 1   all Item_1 graded   a1      1 0.851   -Inf    Inf  TRUE       none     NaN
## 2   all Item_1 graded   a2      2 0.000   -Inf    Inf FALSE       none     NaN
## 3   all Item_1 graded   a3      3 0.000   -Inf    Inf FALSE       none     NaN
## 4   all Item_1 graded   a4      4 0.851   -Inf    Inf  TRUE       none     NaN
## 5   all Item_1 graded   a5      5 0.000   -Inf    Inf FALSE       none     NaN
## 6   all Item_1 graded   a6      6 0.000   -Inf    Inf FALSE       none     NaN
##   prior_2
## 1     NaN
## 2     NaN
## 3     NaN
## 4     NaN
## 5     NaN
## 6     NaN
# set up within time constraints
wtconstr <- sv$parnum[(sv$name == 'a1' | sv$name == 'a2' | sv$name == 'a3') & sv$est]

# create constraint list
constraints <- list()
itemnames <- colnames(dat)
pick <- c(0,20,40)
for(i in 1:20){
    
    # across time item constraints
    constraints[[paste0('slope.', i)]] <- sv$parnum[sv$name == paste0('a',3+i) & sv$est]
    for(j in 1:2){
        constraints[[paste0('intercept.', i, '_', j)]] <- 
            sv$parnum[sv$name == paste0('d',j) & (sv$item %in% itemnames[pick + i]) & sv$est]
    }
    
    #across time constraints
    constraints[[paste0('Time.', i)]] <- wtconstr[pick + i]
}

#estimate model (low quadrature just as an example; not as accurate.
#                Could use as starting values for model with higher quadpts via pars=mod2values(mod))
(mod <- bfactor(dat, itemloadings, model, constrain=constraints, quadpts=7, TOL=1e-3,
                optimizer = 'nlminb'))
## 
Iteration: 1, Log-Lik: -50000.701, Max-Change: 0.69775
Iteration: 2, Log-Lik: -49989.124, Max-Change: 0.56257
Iteration: 3, Log-Lik: -49510.166, Max-Change: 0.34350
## Warning: Latent trait variance-covariance matrix became non-positive definite.
## Error: sigma matrix contains negative eigenvalues
coef(mod, simplify=TRUE)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'coef': object 'mod' not found
coef(mod)$GroupPars[,c('MEAN_2', 'MEAN_3', 'COV_11', 'COV_21', 'COV_31', 'COV_22', 
                       'COV_32', 'COV_33')]
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'coef': object 'mod' not found

When computing latent trait scores it’s best to use a method that isn’t as affected by higher dimensionality, such as the MAP approach.

Model constructed using mirt’s sytax input

The following uses the mirt.model() syntax approach and provides equivalent results, so choose whatever you prefer. That said, due to the pattern and complexity it may be better to use string construction approaches, such as those available via sprintf() and the stringr package.

itemloadings <- rep(1:20, times = 3) # will be associated with slopes a4, a5, ..., a23
model <- 'Time1 = 1-20
          Time2 = 21-40
          Time3 = 41-60
          COV = Time1*Time2*Time3, Time2*Time2, Time3*Time3
          MEAN = Time2, Time3
          CONSTRAIN = (1,21,41, a1,a2,a3), (2,22,42, a1,a2,a3), (3,23,43, a1,a2,a3), 
                      (4,24,44, a1,a2,a3), (5,25,45, a1,a2,a3), (6,26,46, a1,a2,a3),
                      (7,27,47, a1,a2,a3), (8,28,48, a1,a2,a3), (9,29,49, a1,a2,a3), 
                      (10,30,50, a1,a2,a3), (11,31,51, a1,a2,a3), (12,32,52, a1,a2,a3),
                      (13,33,53, a1,a2,a3), (14,34,54, a1,a2,a3), (15,35,55, a1,a2,a3),
                      (16,36,56, a1,a2,a3), (17,37,57, a1,a2,a3), (18,38,58, a1,a2,a3),
                      (19,39,59, a1,a2,a3), (20,40,60, a1,a2,a3),     # time-slope 
                      
                      (1,21,41, a4), (2,22,42, a5), (3,23,43, a6), 
                      (4,24,44, a7), (5,25,45, a8), (6,26,46, a9),
                      (7,27,47, a10), (8,28,48, a11), (9,29,49, a12), 
                      (10,30,50, a13), (11,31,51, a14), (12,32,52, a15),
                      (13,33,53, a16), (14,34,54, a17), (15,35,55, a18),
                      (16,36,56, a19), (17,37,57, a20), (18,38,58, a21),
                      (19,39,59, a22), (20,40,60, a23),     # item-time slope 
                      
                      (1,21,41, d1), (2,22,42, d1), (3,23,43, d1), 
                      (4,24,44, d1), (5,25,45, d1), (6,26,46, d1),
                      (7,27,47, d1), (8,28,48, d1), (9,29,49, d1), 
                      (10,30,50, d1), (11,31,51, d1), (12,32,52, d1),
                      (13,33,53, d1), (14,34,54, d1), (15,35,55, d1),
                      (16,36,56, d1), (17,37,57, d1), (18,38,58, d1),
                      (19,39,59, d1), (20,40,60, d1),     # item-time intercept 1 
                      
                      (1,21,41, d2), (2,22,42, d2), (3,23,43, d2), 
                      (4,24,44, d2), (5,25,45, d2), (6,26,46, d2),
                      (7,27,47, d2), (8,28,48, d2), (9,29,49, d2), 
                      (10,30,50, d2), (11,31,51, d2), (12,32,52, d2),
                      (13,33,53, d2), (14,34,54, d2), (15,35,55, d2),
                      (16,36,56, d2), (17,37,57, d2), (18,38,58, d2),
                      (19,39,59, d2), (20,40,60, d2)     # item-time intercept 2 
                     '

#estimate model (low quadrature just as an example; not as accurate.
#                Could use as starting values for model with higher quadpts via pars=mod2values(mod))
(mod <- bfactor(dat, itemloadings, model, quadpts=7, TOL=1e-3, optimizer = 'nlminb'))

coef(mod, simplify=TRUE)
coef(mod)$GroupPars[,c('MEAN_2', 'MEAN_3', 'COV_11', 'COV_21', 'COV_31', 'COV_22', 
                       'COV_32', 'COV_33')]

Linear latent growth curve IRT

Specific factors represent residual terms per item. Requires only 3 dimensions (max 3 dimensions of integration for unidimensional models). Below identical models are specified, however the approach used (syntax vs R containers) are presented for posterity.

Model constructed using R containers

model <- mirt.model('Intercept = 1-60
                     Slope = 1-60
                     COV = Intercept*Slope, Intercept*Intercept, Slope*Slope
                     MEAN = Intercept, Slope')
itemloadings <- rep(1:20, times = 3) 

#construct constraints manually
#obtain starting values
sv <- bfactor(dat, itemloadings, model, pars='values')

# time constants for linear trend
sv$value[sv$name == 'a1'] <- 1
sv$est[sv$name == 'a1'] <- FALSE
sv$value[sv$name == 'a2'] <- rep(0:2, each=20)
sv$est[sv$name == 'a2'] <- FALSE

# create constraint list
constraints <- list()
itemnames <- colnames(dat)
pick <- c(0,20,40)
for(i in 1:20){    
    # across time item constraints
    constraints[[paste0('slope.', i)]] <- sv$parnum[sv$name == paste0('a',2+i) & sv$est]
    for(j in 1:2){
        constraints[[paste0('intercept.', i, '_', j)]] <- 
            sv$parnum[sv$name == paste0('d',j) & (sv$item %in% itemnames[pick + i]) & sv$est]
    }
}

#estimate model (low quadrature just as an example; not as accurate.
#                Could use as starting values for model with higher quadpts via pars=mod2values(mod))
mod2 <- bfactor(dat, itemloadings, model, constrain=constraints, quadpts=9, pars=sv, 
                optimizer = 'nlminb')
## 
Iteration: 1, Log-Lik: -50645.079, Max-Change: 1.20216
Iteration: 2, Log-Lik: -50167.537, Max-Change: 0.30470
Iteration: 3, Log-Lik: -50011.984, Max-Change: 0.12046
Iteration: 4, Log-Lik: -50018.821, Max-Change: 0.08804
Iteration: 5, Log-Lik: -49994.442, Max-Change: 0.02745
## Warning: Latent trait variance-covariance matrix became non-positive definite.
## Error: sigma matrix contains negative eigenvalues
coef(mod2, simplify=TRUE)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'coef': object 'mod2' not found
coef(mod2)$GroupPars[,c('MEAN_1', 'MEAN_2', 'COV_11', 'COV_21', 'COV_22')]
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'coef': object 'mod2' not found

Model constructed using mirt’s sytax input

The following uses the mirt.model() syntax approach and provides equivalent results, so choose whatever you prefer. That said, due to the pattern and complexity it may be better to use string construction approaches, such as those available via sprintf() and the stringr package.

itemloadings <- rep(1:20, times = 3)  # will be associated with slopes a3, a4, ..., a22
model <- mirt.model('Intercept = 1-60
                     Slope = 1-60
                     COV = Intercept*Slope, Intercept*Intercept, Slope*Slope
                     MEAN = Intercept, Slope
                     FIXED = (1-60, a1), (1-60, a2)
                     START = (1-60, a1, 1.0), (1-20, a2, 0.0), (21-40, a2, 1.0), (41-60, a2, 2.0)
                     CONSTRAIN = (1,21,41, a3), (2,22,42, a4), (3,23,43, a5), 
                      (4,24,44, a6), (5,25,45, a7), (6,26,46, a8),
                      (7,27,47, a9), (8,28,48, a10), (9,29,49, a11), 
                      (10,30,50, a12), (11,31,51, a13), (12,32,52, a14),
                      (13,33,53, a15), (14,34,54, a16), (15,35,55, a17),
                      (16,36,56, a18), (17,37,57, a19), (18,38,58, a20),
                      (19,39,59, a21), (20,40,60, a22),     # item-time slope 
                      
                      (1,21,41, d1), (2,22,42, d1), (3,23,43, d1), 
                      (4,24,44, d1), (5,25,45, d1), (6,26,46, d1),
                      (7,27,47, d1), (8,28,48, d1), (9,29,49, d1), 
                      (10,30,50, d1), (11,31,51, d1), (12,32,52, d1),
                      (13,33,53, d1), (14,34,54, d1), (15,35,55, d1),
                      (16,36,56, d1), (17,37,57, d1), (18,38,58, d1),
                      (19,39,59, d1), (20,40,60, d1),     # item-time intercept 1 
                      
                      (1,21,41, d2), (2,22,42, d2), (3,23,43, d2), 
                      (4,24,44, d2), (5,25,45, d2), (6,26,46, d2),
                      (7,27,47, d2), (8,28,48, d2), (9,29,49, d2), 
                      (10,30,50, d2), (11,31,51, d2), (12,32,52, d2),
                      (13,33,53, d2), (14,34,54, d2), (15,35,55, d2),
                      (16,36,56, d2), (17,37,57, d2), (18,38,58, d2),
                      (19,39,59, d2), (20,40,60, d2)     # item-time intercept 2 
                    ')

mod2 <- bfactor(dat, itemloadings, model, quadpts=9, optimizer = 'nlminb')

coef(mod2, simplify=TRUE)

coef(mod2)$GroupPars[,c('MEAN_1', 'MEAN_2', 'COV_11', 'COV_21', 'COV_22')]