Two approaches are demonstrated here for longitudinal data. A direct modelling approach, where a latent variable is estimated for each time point, and a linear latent growth curve approach which estimates the variability of person trajectories. The latter is limited to only 3 dimensions for any number of time points, whereas the former increases in complexity as the time points increase.
Two tier approach used in Cai 2010’s two-tier article. This example requires 4 dimensions of integration (1 for each time point + residual item factor). Below identical models are specified, however the approach used (syntax vs R containers) are presented for posterity.
library(mirt)
## Loading required package: stats4
## Loading required package: lattice
load('longitudinal-IRT.Rdata')
head(dat)
## Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 Item_7 Item_8 Item_9 Item_10
## 1 2 0 0 0 1 0 0 1 0 0
## 2 2 0 1 0 1 0 0 2 2 0
## 3 2 0 0 0 0 0 2 0 1 0
## 4 2 1 2 2 0 2 2 1 1 2
## 5 0 1 2 0 2 0 2 0 0 2
## 6 0 1 0 0 0 2 0 1 0 2
## Item_11 Item_12 Item_13 Item_14 Item_15 Item_16 Item_17 Item_18 Item_19
## 1 0 2 0 1 0 2 0 2 0
## 2 0 2 2 2 2 0 0 2 0
## 3 0 0 2 2 0 1 0 0 0
## 4 2 2 2 2 0 2 2 2 1
## 5 0 2 2 1 0 2 1 2 0
## 6 1 2 0 0 0 1 0 0 0
## Item_20 Item_1.1 Item_2.1 Item_3.1 Item_4.1 Item_5.1 Item_6.1 Item_7.1
## 1 2 0 0 1 2 0 2 0
## 2 2 2 0 2 2 0 2 2
## 3 0 0 0 1 0 1 0 2
## 4 2 2 2 1 2 1 2 1
## 5 2 2 0 2 0 1 0 0
## 6 2 0 0 0 2 0 0 0
## Item_8.1 Item_9.1 Item_10.1 Item_11.1 Item_12.1 Item_13.1 Item_14.1 Item_15.1
## 1 0 1 2 0 0 2 2 0
## 2 1 0 2 1 2 2 1 0
## 3 0 0 0 1 1 0 1 0
## 4 2 2 2 2 2 1 2 2
## 5 2 0 2 2 1 0 2 2
## 6 1 1 2 0 0 0 1 0
## Item_16.1 Item_17.1 Item_18.1 Item_19.1 Item_20.1 Item_1.2 Item_2.2 Item_3.2
## 1 2 0 2 0 2 2 1 0
## 2 2 1 2 0 2 2 0 0
## 3 0 0 0 1 0 2 1 2
## 4 2 2 2 2 2 2 2 2
## 5 2 1 2 2 2 2 0 2
## 6 1 0 1 0 0 0 2 0
## Item_4.2 Item_5.2 Item_6.2 Item_7.2 Item_8.2 Item_9.2 Item_10.2 Item_11.2
## 1 2 0 0 1 1 0 0 1
## 2 2 1 0 0 2 2 2 2
## 3 0 1 0 1 1 2 2 0
## 4 2 1 2 2 2 2 2 0
## 5 0 0 2 0 1 2 1 0
## 6 2 0 2 2 2 1 0 1
## Item_12.2 Item_13.2 Item_14.2 Item_15.2 Item_16.2 Item_17.2 Item_18.2
## 1 2 0 2 0 2 0 0
## 2 0 0 2 0 2 0 2
## 3 2 1 1 0 2 2 0
## 4 2 2 2 2 2 2 2
## 5 2 2 2 0 2 2 2
## 6 2 2 2 2 2 0 0
## Item_19.2 Item_20.2
## 1 0 0
## 2 1 0
## 3 2 2
## 4 2 2
## 5 2 2
## 6 0 2
model <- 'Time1 = 1-20
Time2 = 21-40
Time3 = 41-60
COV = Time1*Time2*Time3, Time2*Time2, Time3*Time3
MEAN = Time2, Time3'
itemloadings <- rep(1:20, times = 3)
#construct constraints manually
#obtain starting values
sv <- bfactor(dat, itemloadings, model, pars='values')
head(sv)
## group item class name parnum value lbound ubound est prior.type prior_1
## 1 all Item_1 graded a1 1 0.851 -Inf Inf TRUE none NaN
## 2 all Item_1 graded a2 2 0.000 -Inf Inf FALSE none NaN
## 3 all Item_1 graded a3 3 0.000 -Inf Inf FALSE none NaN
## 4 all Item_1 graded a4 4 0.851 -Inf Inf TRUE none NaN
## 5 all Item_1 graded a5 5 0.000 -Inf Inf FALSE none NaN
## 6 all Item_1 graded a6 6 0.000 -Inf Inf FALSE none NaN
## prior_2
## 1 NaN
## 2 NaN
## 3 NaN
## 4 NaN
## 5 NaN
## 6 NaN
# set up within time constraints
wtconstr <- sv$parnum[(sv$name == 'a1' | sv$name == 'a2' | sv$name == 'a3') & sv$est]
# create constraint list
constraints <- list()
itemnames <- colnames(dat)
pick <- c(0,20,40)
for(i in 1:20){
# across time item constraints
constraints[[paste0('slope.', i)]] <- sv$parnum[sv$name == paste0('a',3+i) & sv$est]
for(j in 1:2){
constraints[[paste0('intercept.', i, '_', j)]] <-
sv$parnum[sv$name == paste0('d',j) & (sv$item %in% itemnames[pick + i]) & sv$est]
}
#across time constraints
constraints[[paste0('Time.', i)]] <- wtconstr[pick + i]
}
#estimate model (low quadrature just as an example; not as accurate.
# Could use as starting values for model with higher quadpts via pars=mod2values(mod))
(mod <- bfactor(dat, itemloadings, model, constrain=constraints, quadpts=7, TOL=1e-3,
optimizer = 'nlminb'))
##
Iteration: 1, Log-Lik: -50000.701, Max-Change: 0.69775
Iteration: 2, Log-Lik: -49989.124, Max-Change: 0.56257
Iteration: 3, Log-Lik: -49510.166, Max-Change: 0.34350
## Warning: Latent trait variance-covariance matrix became non-positive definite.
## Error: sigma matrix contains negative eigenvalues
coef(mod, simplify=TRUE)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'coef': object 'mod' not found
coef(mod)$GroupPars[,c('MEAN_2', 'MEAN_3', 'COV_11', 'COV_21', 'COV_31', 'COV_22',
'COV_32', 'COV_33')]
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'coef': object 'mod' not found
When computing latent trait scores it’s best to use a method that isn’t as affected by higher dimensionality, such as the MAP approach.
The following uses the mirt.model()
syntax approach and provides equivalent results, so choose whatever you prefer. That said, due to the pattern and complexity it may be better to use string construction approaches, such as those available via sprintf()
and the stringr
package.
itemloadings <- rep(1:20, times = 3) # will be associated with slopes a4, a5, ..., a23
model <- 'Time1 = 1-20
Time2 = 21-40
Time3 = 41-60
COV = Time1*Time2*Time3, Time2*Time2, Time3*Time3
MEAN = Time2, Time3
CONSTRAIN = (1,21,41, a1,a2,a3), (2,22,42, a1,a2,a3), (3,23,43, a1,a2,a3),
(4,24,44, a1,a2,a3), (5,25,45, a1,a2,a3), (6,26,46, a1,a2,a3),
(7,27,47, a1,a2,a3), (8,28,48, a1,a2,a3), (9,29,49, a1,a2,a3),
(10,30,50, a1,a2,a3), (11,31,51, a1,a2,a3), (12,32,52, a1,a2,a3),
(13,33,53, a1,a2,a3), (14,34,54, a1,a2,a3), (15,35,55, a1,a2,a3),
(16,36,56, a1,a2,a3), (17,37,57, a1,a2,a3), (18,38,58, a1,a2,a3),
(19,39,59, a1,a2,a3), (20,40,60, a1,a2,a3), # time-slope
(1,21,41, a4), (2,22,42, a5), (3,23,43, a6),
(4,24,44, a7), (5,25,45, a8), (6,26,46, a9),
(7,27,47, a10), (8,28,48, a11), (9,29,49, a12),
(10,30,50, a13), (11,31,51, a14), (12,32,52, a15),
(13,33,53, a16), (14,34,54, a17), (15,35,55, a18),
(16,36,56, a19), (17,37,57, a20), (18,38,58, a21),
(19,39,59, a22), (20,40,60, a23), # item-time slope
(1,21,41, d1), (2,22,42, d1), (3,23,43, d1),
(4,24,44, d1), (5,25,45, d1), (6,26,46, d1),
(7,27,47, d1), (8,28,48, d1), (9,29,49, d1),
(10,30,50, d1), (11,31,51, d1), (12,32,52, d1),
(13,33,53, d1), (14,34,54, d1), (15,35,55, d1),
(16,36,56, d1), (17,37,57, d1), (18,38,58, d1),
(19,39,59, d1), (20,40,60, d1), # item-time intercept 1
(1,21,41, d2), (2,22,42, d2), (3,23,43, d2),
(4,24,44, d2), (5,25,45, d2), (6,26,46, d2),
(7,27,47, d2), (8,28,48, d2), (9,29,49, d2),
(10,30,50, d2), (11,31,51, d2), (12,32,52, d2),
(13,33,53, d2), (14,34,54, d2), (15,35,55, d2),
(16,36,56, d2), (17,37,57, d2), (18,38,58, d2),
(19,39,59, d2), (20,40,60, d2) # item-time intercept 2
'
#estimate model (low quadrature just as an example; not as accurate.
# Could use as starting values for model with higher quadpts via pars=mod2values(mod))
(mod <- bfactor(dat, itemloadings, model, quadpts=7, TOL=1e-3, optimizer = 'nlminb'))
coef(mod, simplify=TRUE)
coef(mod)$GroupPars[,c('MEAN_2', 'MEAN_3', 'COV_11', 'COV_21', 'COV_31', 'COV_22',
'COV_32', 'COV_33')]
Specific factors represent residual terms per item. Requires only 3 dimensions (max 3 dimensions of integration for unidimensional models). Below identical models are specified, however the approach used (syntax vs R containers) are presented for posterity.
model <- mirt.model('Intercept = 1-60
Slope = 1-60
COV = Intercept*Slope, Intercept*Intercept, Slope*Slope
MEAN = Intercept, Slope')
itemloadings <- rep(1:20, times = 3)
#construct constraints manually
#obtain starting values
sv <- bfactor(dat, itemloadings, model, pars='values')
# time constants for linear trend
sv$value[sv$name == 'a1'] <- 1
sv$est[sv$name == 'a1'] <- FALSE
sv$value[sv$name == 'a2'] <- rep(0:2, each=20)
sv$est[sv$name == 'a2'] <- FALSE
# create constraint list
constraints <- list()
itemnames <- colnames(dat)
pick <- c(0,20,40)
for(i in 1:20){
# across time item constraints
constraints[[paste0('slope.', i)]] <- sv$parnum[sv$name == paste0('a',2+i) & sv$est]
for(j in 1:2){
constraints[[paste0('intercept.', i, '_', j)]] <-
sv$parnum[sv$name == paste0('d',j) & (sv$item %in% itemnames[pick + i]) & sv$est]
}
}
#estimate model (low quadrature just as an example; not as accurate.
# Could use as starting values for model with higher quadpts via pars=mod2values(mod))
mod2 <- bfactor(dat, itemloadings, model, constrain=constraints, quadpts=9, pars=sv,
optimizer = 'nlminb')
##
Iteration: 1, Log-Lik: -50645.079, Max-Change: 1.20216
Iteration: 2, Log-Lik: -50167.537, Max-Change: 0.30470
Iteration: 3, Log-Lik: -50011.984, Max-Change: 0.12046
Iteration: 4, Log-Lik: -50018.821, Max-Change: 0.08804
Iteration: 5, Log-Lik: -49994.442, Max-Change: 0.02745
## Warning: Latent trait variance-covariance matrix became non-positive definite.
## Error: sigma matrix contains negative eigenvalues
coef(mod2, simplify=TRUE)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'coef': object 'mod2' not found
coef(mod2)$GroupPars[,c('MEAN_1', 'MEAN_2', 'COV_11', 'COV_21', 'COV_22')]
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'coef': object 'mod2' not found
The following uses the mirt.model()
syntax approach and provides equivalent results, so choose whatever you prefer. That said, due to the pattern and complexity it may be better to use string construction approaches, such as those available via sprintf()
and the stringr
package.
itemloadings <- rep(1:20, times = 3) # will be associated with slopes a3, a4, ..., a22
model <- mirt.model('Intercept = 1-60
Slope = 1-60
COV = Intercept*Slope, Intercept*Intercept, Slope*Slope
MEAN = Intercept, Slope
FIXED = (1-60, a1), (1-60, a2)
START = (1-60, a1, 1.0), (1-20, a2, 0.0), (21-40, a2, 1.0), (41-60, a2, 2.0)
CONSTRAIN = (1,21,41, a3), (2,22,42, a4), (3,23,43, a5),
(4,24,44, a6), (5,25,45, a7), (6,26,46, a8),
(7,27,47, a9), (8,28,48, a10), (9,29,49, a11),
(10,30,50, a12), (11,31,51, a13), (12,32,52, a14),
(13,33,53, a15), (14,34,54, a16), (15,35,55, a17),
(16,36,56, a18), (17,37,57, a19), (18,38,58, a20),
(19,39,59, a21), (20,40,60, a22), # item-time slope
(1,21,41, d1), (2,22,42, d1), (3,23,43, d1),
(4,24,44, d1), (5,25,45, d1), (6,26,46, d1),
(7,27,47, d1), (8,28,48, d1), (9,29,49, d1),
(10,30,50, d1), (11,31,51, d1), (12,32,52, d1),
(13,33,53, d1), (14,34,54, d1), (15,35,55, d1),
(16,36,56, d1), (17,37,57, d1), (18,38,58, d1),
(19,39,59, d1), (20,40,60, d1), # item-time intercept 1
(1,21,41, d2), (2,22,42, d2), (3,23,43, d2),
(4,24,44, d2), (5,25,45, d2), (6,26,46, d2),
(7,27,47, d2), (8,28,48, d2), (9,29,49, d2),
(10,30,50, d2), (11,31,51, d2), (12,32,52, d2),
(13,33,53, d2), (14,34,54, d2), (15,35,55, d2),
(16,36,56, d2), (17,37,57, d2), (18,38,58, d2),
(19,39,59, d2), (20,40,60, d2) # item-time intercept 2
')
mod2 <- bfactor(dat, itemloadings, model, quadpts=9, optimizer = 'nlminb')
coef(mod2, simplify=TRUE)
coef(mod2)$GroupPars[,c('MEAN_1', 'MEAN_2', 'COV_11', 'COV_21', 'COV_22')]