| simdata {mirt} | R Documentation | 
Simulates response patterns for compensatory and noncompensatory MIRT models
from multivariate normally distributed factor (\theta) scores, or from
a user input matrix of \theta's.
simdata(
  a,
  d,
  N,
  itemtype,
  sigma = NULL,
  mu = NULL,
  guess = 0,
  upper = 1,
  nominal = NULL,
  t = NULL,
  Theta = NULL,
  gpcm_mats = list(),
  returnList = FALSE,
  model = NULL,
  equal.K = TRUE,
  which.items = NULL,
  mins = 0,
  lca_cats = NULL,
  prob.list = NULL
)
a | 
 a matrix/vector of slope parameters. If slopes are to be constrained to
zero then use   | 
d | 
 a matrix/vector of intercepts. The matrix should have as many columns as
the item with the largest number of categories, and filled empty locations
with   | 
N | 
 sample size  | 
itemtype | 
 a character vector of length  If the internal class of the object is specified instead, the inputs can
be   | 
sigma | 
 a covariance matrix of the underlying distribution. Default is
the identity matrix. Used when   | 
mu | 
 a mean vector of the underlying distribution. Default is a vector
of zeros. Used when   | 
guess | 
 a vector of guessing parameters for each item; only applicable for dichotomous items. Must be either a scalar value that will affect all of the dichotomous items, or a vector with as many values as to be simulated items  | 
upper | 
 same as   | 
nominal | 
 a matrix of specific item category slopes for nominal models.
Should be the dimensions as the intercept specification with one less column, with   | 
t | 
 matrix of t-values for the 'ggum' itemtype, where each row corresponds to a given item.
Also determines the number of categories, where   | 
Theta | 
 a user specified matrix of the underlying ability parameters,
where   | 
gpcm_mats | 
 a list of matrices specifying the scoring scheme for generalized partial
credit models (see   | 
returnList | 
 logical; return a list containing the data, item objects defined
by   | 
model | 
 a single group object, typically returned by functions such as   | 
equal.K | 
 logical; when a   | 
which.items | 
 an integer vector used to indicate which items to simulate when a
  | 
mins | 
 an integer vector (or single value to be used for each item) indicating what
the lowest category should be. If   | 
lca_cats | 
 a vector indicating how many categories each lca item should have. If not supplied then it is assumed that 2 categories should be generated for each item  | 
prob.list | 
 an optional list containing matrix/data.frames of probabilities values for each category to be simulated. This is useful when creating customized probability functions to be sampled from  | 
Returns a data matrix simulated from the parameters, or a list containing the data, item objects, and Theta matrix.
Phil Chalmers rphilip.chalmers@gmail.com
Chalmers, R., P. (2012). mirt: A Multidimensional Item Response Theory Package for the R Environment. Journal of Statistical Software, 48(6), 1-29. doi:10.18637/jss.v048.i06
Reckase, M. D. (2009). Multidimensional Item Response Theory. New York: Springer.
### Parameters from Reckase (2009), p. 153
set.seed(1234)
a <- matrix(c(
 .7471, .0250, .1428,
 .4595, .0097, .0692,
 .8613, .0067, .4040,
1.0141, .0080, .0470,
 .5521, .0204, .1482,
1.3547, .0064, .5362,
1.3761, .0861, .4676,
 .8525, .0383, .2574,
1.0113, .0055, .2024,
 .9212, .0119, .3044,
 .0026, .0119, .8036,
 .0008, .1905,1.1945,
 .0575, .0853, .7077,
 .0182, .3307,2.1414,
 .0256, .0478, .8551,
 .0246, .1496, .9348,
 .0262, .2872,1.3561,
 .0038, .2229, .8993,
 .0039, .4720, .7318,
 .0068, .0949, .6416,
 .3073, .9704, .0031,
 .1819, .4980, .0020,
 .4115,1.1136, .2008,
 .1536,1.7251, .0345,
 .1530, .6688, .0020,
 .2890,1.2419, .0220,
 .1341,1.4882, .0050,
 .0524, .4754, .0012,
 .2139, .4612, .0063,
 .1761,1.1200, .0870),30,3,byrow=TRUE)*1.702
d <- matrix(c(.1826,-.1924,-.4656,-.4336,-.4428,-.5845,-1.0403,
  .6431,.0122,.0912,.8082,-.1867,.4533,-1.8398,.4139,
  -.3004,-.1824,.5125,1.1342,.0230,.6172,-.1955,-.3668,
  -1.7590,-.2434,.4925,-.3410,.2896,.006,.0329),ncol=1)*1.702
mu <- c(-.4, -.7, .1)
sigma <- matrix(c(1.21,.297,1.232,.297,.81,.252,1.232,.252,1.96),3,3)
dataset1 <- simdata(a, d, 2000, itemtype = '2PL')
dataset2 <- simdata(a, d, 2000, itemtype = '2PL', mu = mu, sigma = sigma)
#mod <- mirt(dataset1, 3, method = 'MHRM')
#coef(mod)
## No test: 
### Unidimensional graded response model with 5 categories each
a <- matrix(rlnorm(20,.2,.3))
# for the graded model, ensure that there is enough space between the intercepts,
# otherwise closer categories will not be selected often (minimum distance of 0.3 here)
diffs <- t(apply(matrix(runif(20*4, .3, 1), 20), 1, cumsum))
diffs <- -(diffs - rowMeans(diffs))
d <- diffs + rnorm(20)
dat <- simdata(a, d, 500, itemtype = 'graded')
# mod <- mirt(dat, 1)
### An example of a mixed item, bifactor loadings pattern with correlated specific factors
a <- matrix(c(
.8,.4,NA,
.4,.4,NA,
.7,.4,NA,
.8,NA,.4,
.4,NA,.4,
.7,NA,.4),ncol=3,byrow=TRUE)
d <- matrix(c(
-1.0,NA,NA,
 1.5,NA,NA,
 0.0,NA,NA,
0.0,-1.0,1.5,  #the first 0 here is the recommended constraint for nominal
0.0,1.0,-1, #the first 0 here is the recommended constraint for gpcm
2.0,0.0,NA),ncol=3,byrow=TRUE)
nominal <- matrix(NA, nrow(d), ncol(d))
# the first 0 and last (ncat - 1) = 2 values are the recommended constraints
nominal[4, ] <- c(0,1.2,2)
sigma <- diag(3)
sigma[2,3] <- sigma[3,2] <- .25
items <- c('2PL','2PL','2PL','nominal','gpcm','graded')
dataset <- simdata(a,d,2000,items,sigma=sigma,nominal=nominal)
#mod <- bfactor(dataset, c(1,1,1,2,2,2), itemtype=c(rep('2PL', 3), 'nominal', 'gpcm','graded'))
#coef(mod)
#### Convert standardized factor loadings to slopes
F2a <- function(F, D=1.702){
    h2 <- rowSums(F^2)
    a <- (F / sqrt(1 - h2)) * D
    a
}
(F <- matrix(c(rep(.7, 5), rep(.5,5))))
##       [,1]
##  [1,]  0.7
##  [2,]  0.7
##  [3,]  0.7
##  [4,]  0.7
##  [5,]  0.7
##  [6,]  0.5
##  [7,]  0.5
##  [8,]  0.5
##  [9,]  0.5
## [10,]  0.5
(a <- F2a(F))
##            [,1]
##  [1,] 1.6682937
##  [2,] 1.6682937
##  [3,] 1.6682937
##  [4,] 1.6682937
##  [5,] 1.6682937
##  [6,] 0.9826502
##  [7,] 0.9826502
##  [8,] 0.9826502
##  [9,] 0.9826502
## [10,] 0.9826502
d <- rnorm(10)
dat <- simdata(a, d, 5000, itemtype = '2PL')
mod <- mirt(dat, 1)
coef(mod, simplify=TRUE)$items
##                a1           d g u
## Item_1  1.5912124  0.09329250 0 1
## Item_2  1.6611479  0.77704003 0 1
## Item_3  1.6100289  0.04348744 0 1
## Item_4  1.5251147 -0.39866322 0 1
## Item_5  1.7126206 -0.69888431 0 1
## Item_6  0.9352647 -1.71863199 0 1
## Item_7  0.9211469  0.73522267 0 1
## Item_8  0.9893392 -1.25501247 0 1
## Item_9  0.9999014  0.41687594 0 1
## Item_10 0.9416579 -1.00423353 0 1
summary(mod)
##            F1    h2
## Item_1  0.683 0.466
## Item_2  0.698 0.488
## Item_3  0.687 0.472
## Item_4  0.667 0.445
## Item_5  0.709 0.503
## Item_6  0.482 0.232
## Item_7  0.476 0.227
## Item_8  0.503 0.253
## Item_9  0.507 0.257
## Item_10 0.484 0.234
## 
## SS loadings:  3.577 
## Proportion Var:  0.358 
## 
## Factor correlations: 
## 
##    F1
## F1  1
mod2 <- mirt(dat, 'F1 = 1-10
                   CONSTRAIN = (1-5, a1), (6-10, a1)')
summary(mod2)
##            F1    h2
## Item_1  0.689 0.475
## Item_2  0.689 0.475
## Item_3  0.689 0.475
## Item_4  0.689 0.475
## Item_5  0.689 0.475
## Item_6  0.491 0.241
## Item_7  0.491 0.241
## Item_8  0.491 0.241
## Item_9  0.491 0.241
## Item_10 0.491 0.241
## 
## SS loadings:  3.576 
## Proportion Var:  0.358 
## 
## Factor correlations: 
## 
##    F1
## F1  1
anova(mod2, mod)
##           AIC    SABIC       HQ      BIC    logLik    X2 df   p
## mod2 58058.43 58098.51 58085.85 58136.64 -29017.22             
## mod  58068.01 58134.80 58113.69 58198.35 -29014.01 6.425  8 0.6
#### Convert classical 3PL paramerization into slope-intercept form
nitems <- 50
as <- rlnorm(nitems, .2, .2)
bs <- rnorm(nitems, 0, 1)
gs <- rbeta(nitems, 5, 17)
# convert first item (only intercepts differ in resulting transformation)
traditional2mirt(c('a'=as[1], 'b'=bs[1], 'g'=gs[1], 'u'=1), cls='3PL')
##         a1          d          g          u 
##  1.2795115 -0.1107008  0.2525144  1.0000000
# convert all difficulties to intercepts
ds <- numeric(nitems)
for(i in 1:nitems)
   ds[i] <- traditional2mirt(c('a'=as[i], 'b'=bs[i], 'g'=gs[i], 'u'=1),
                             cls='3PL')[2]
dat <- simdata(as, ds, N=5000, guess=gs, itemtype = '3PL')
# estimate with beta prior for guessing parameters
# mod <- mirt(dat, model="Theta = 1-50
#                         PRIOR = (1-50, g, expbeta, 5, 17)", itemtype = '3PL')
# coef(mod, simplify=TRUE, IRTpars=TRUE)$items
# data.frame(as, bs, gs, us=1)
#### Unidimensional nonlinear factor pattern
theta <- rnorm(2000)
Theta <- cbind(theta,theta^2)
a <- matrix(c(
.8,.4,
.4,.4,
.7,.4,
.8,NA,
.4,NA,
.7,NA),ncol=2,byrow=TRUE)
d <- matrix(rnorm(6))
itemtype <- rep('2PL',6)
nonlindata <- simdata(a=a, d=d, itemtype=itemtype, Theta=Theta)
#model <- '
#F1 = 1-6
#(F1 * F1) = 1-3'
#mod <- mirt(nonlindata, model)
#coef(mod)
#### 2PLNRM model for item 4 (with 4 categories), 2PL otherwise
a <- matrix(rlnorm(4,0,.2))
# first column of item 4 is the intercept for the correct category of 2PL model,
#    otherwise nominal model configuration
d <- matrix(c(
-1.0,NA,NA,NA,
 1.5,NA,NA,NA,
 0.0,NA,NA,NA,
 1, 0.0,-0.5,0.5),ncol=4,byrow=TRUE)
nominal <- matrix(NA, nrow(d), ncol(d))
nominal[4, ] <- c(NA,0,.5,.6)
items <- c(rep('2PL',3),'nestlogit')
dataset <- simdata(a,d,2000,items,nominal=nominal)
#mod <- mirt(dataset, 1, itemtype = c('2PL', '2PL', '2PL', '2PLNRM'), key=c(NA,NA,NA,0))
#coef(mod)
#itemplot(mod,4)
# return list of simulation parameters
listobj <- simdata(a,d,2000,items,nominal=nominal, returnList=TRUE)
str(listobj)
## List of 3
##  $ itemobjects:List of 4
##   ..$ :Formal class 'dich' [package "mirt"] with 23 slots
##   .. .. ..@ par          : num [1:4] 0.863 -1 -999 999
##   .. .. .. ..- attr(*, "na.action")= 'omit' int [1:3] 3 4 5
##   .. .. ..@ SEpar        : num(0) 
##   .. .. ..@ parnames     : chr(0) 
##   .. .. ..@ est          : logi(0) 
##   .. .. ..@ dps          :function ()  
##   .. .. ..@ dps2         :function ()  
##   .. .. ..@ constr       : logi(0) 
##   .. .. ..@ itemclass    : int(0) 
##   .. .. ..@ parnum       : num(0) 
##   .. .. ..@ nfact        : int 1
##   .. .. ..@ nfixedeffects: num(0) 
##   .. .. ..@ fixed.design : num[0 , 0 ] 
##   .. .. ..@ dat          : num[0 , 0 ] 
##   .. .. ..@ ncat         : int 2
##   .. .. ..@ gradient     : num(0) 
##   .. .. ..@ hessian      : num[0 , 0 ] 
##   .. .. ..@ itemtrace    : num[0 , 0 ] 
##   .. .. ..@ lbound       : num(0) 
##   .. .. ..@ ubound       : num(0) 
##   .. .. ..@ any.prior    : logi(0) 
##   .. .. ..@ prior.type   : int(0) 
##   .. .. ..@ prior_1      : num(0) 
##   .. .. ..@ prior_2      : num(0) 
##   ..$ :Formal class 'dich' [package "mirt"] with 23 slots
##   .. .. ..@ par          : num [1:4] 1.25 1.5 -999 999
##   .. .. .. ..- attr(*, "na.action")= 'omit' int [1:3] 3 4 5
##   .. .. ..@ SEpar        : num(0) 
##   .. .. ..@ parnames     : chr(0) 
##   .. .. ..@ est          : logi(0) 
##   .. .. ..@ dps          :function ()  
##   .. .. ..@ dps2         :function ()  
##   .. .. ..@ constr       : logi(0) 
##   .. .. ..@ itemclass    : int(0) 
##   .. .. ..@ parnum       : num(0) 
##   .. .. ..@ nfact        : int 1
##   .. .. ..@ nfixedeffects: num(0) 
##   .. .. ..@ fixed.design : num[0 , 0 ] 
##   .. .. ..@ dat          : num[0 , 0 ] 
##   .. .. ..@ ncat         : int 2
##   .. .. ..@ gradient     : num(0) 
##   .. .. ..@ hessian      : num[0 , 0 ] 
##   .. .. ..@ itemtrace    : num[0 , 0 ] 
##   .. .. ..@ lbound       : num(0) 
##   .. .. ..@ ubound       : num(0) 
##   .. .. ..@ any.prior    : logi(0) 
##   .. .. ..@ prior.type   : int(0) 
##   .. .. ..@ prior_1      : num(0) 
##   .. .. ..@ prior_2      : num(0) 
##   ..$ :Formal class 'dich' [package "mirt"] with 23 slots
##   .. .. ..@ par          : num [1:4] 1.02 0 -999 999
##   .. .. .. ..- attr(*, "na.action")= 'omit' int [1:3] 3 4 5
##   .. .. ..@ SEpar        : num(0) 
##   .. .. ..@ parnames     : chr(0) 
##   .. .. ..@ est          : logi(0) 
##   .. .. ..@ dps          :function ()  
##   .. .. ..@ dps2         :function ()  
##   .. .. ..@ constr       : logi(0) 
##   .. .. ..@ itemclass    : int(0) 
##   .. .. ..@ parnum       : num(0) 
##   .. .. ..@ nfact        : int 1
##   .. .. ..@ nfixedeffects: num(0) 
##   .. .. ..@ fixed.design : num[0 , 0 ] 
##   .. .. ..@ dat          : num[0 , 0 ] 
##   .. .. ..@ ncat         : int 2
##   .. .. ..@ gradient     : num(0) 
##   .. .. ..@ hessian      : num[0 , 0 ] 
##   .. .. ..@ itemtrace    : num[0 , 0 ] 
##   .. .. ..@ lbound       : num(0) 
##   .. .. ..@ ubound       : num(0) 
##   .. .. ..@ any.prior    : logi(0) 
##   .. .. ..@ prior.type   : int(0) 
##   .. .. ..@ prior_1      : num(0) 
##   .. .. ..@ prior_2      : num(0) 
##   ..$ :Formal class 'nestlogit' [package "mirt"] with 24 slots
##   .. .. ..@ correctcat   : int 1
##   .. .. ..@ par          : num [1:10] 0.771 1 -999 999 0 ...
##   .. .. ..@ SEpar        : num(0) 
##   .. .. ..@ parnames     : chr(0) 
##   .. .. ..@ est          : logi(0) 
##   .. .. ..@ dps          :function ()  
##   .. .. ..@ dps2         :function ()  
##   .. .. ..@ constr       : logi(0) 
##   .. .. ..@ itemclass    : int(0) 
##   .. .. ..@ parnum       : num(0) 
##   .. .. ..@ nfact        : int 1
##   .. .. ..@ nfixedeffects: num(0) 
##   .. .. ..@ fixed.design : num[0 , 0 ] 
##   .. .. ..@ dat          : num[0 , 0 ] 
##   .. .. ..@ ncat         : int 4
##   .. .. ..@ gradient     : num(0) 
##   .. .. ..@ hessian      : num[0 , 0 ] 
##   .. .. ..@ itemtrace    : num[0 , 0 ] 
##   .. .. ..@ lbound       : num(0) 
##   .. .. ..@ ubound       : num(0) 
##   .. .. ..@ any.prior    : logi(0) 
##   .. .. ..@ prior.type   : int(0) 
##   .. .. ..@ prior_1      : num(0) 
##   .. .. ..@ prior_2      : num(0) 
##  $ data       : num [1:2000, 1:4] 1 1 1 0 0 1 0 1 0 1 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:4] "Item_1" "Item_2" "Item_3" "Item_4"
##  $ Theta      : num [1:2000, 1] -0.399 1.931 1.621 -0.424 1.343 ...
# generate dataset from converged model
mod <- mirt(Science, 1, itemtype = c(rep('gpcm', 3), 'nominal'))
sim <- simdata(model=mod, N=1000)
head(sim)
##      Comfort Work Future Benefit
## [1,]       4    3      3       4
## [2,]       3    2      3       3
## [3,]       3    2      4       2
## [4,]       4    3      4       3
## [5,]       3    3      2       3
## [6,]       2    3      3       2
Theta <- matrix(rnorm(100))
sim <- simdata(model=mod, Theta=Theta)
head(sim)
##      Comfort Work Future Benefit
## [1,]       3    3      4       4
## [2,]       4    2      3       2
## [3,]       3    2      3       2
## [4,]       3    3      3       3
## [5,]       3    3      3       3
## [6,]       3    3      4       3
# alternatively, define a suitable object with functions from the mirtCAT package
# help(generate.mirt_object)
library(mirtCAT)
## Loading required package: shiny
nitems <- 50
a1 <- rlnorm(nitems, .2,.2)
d <- rnorm(nitems)
g <- rbeta(nitems, 20, 80)
pars <- data.frame(a1=a1, d=d, g=g)
head(pars)
##          a1          d         g
## 1 1.1271598 -0.6479570 0.2637693
## 2 1.3539630 -1.4832170 0.1903528
## 3 1.1640507  0.3390045 0.1782070
## 4 0.9568311 -1.3578148 0.1389409
## 5 1.0394247  0.3835373 0.2361323
## 6 1.1479152  1.3522255 0.1990177
obj <- generate.mirt_object(pars, '3PL')
dat <- simdata(N=200, model=obj)
#### 10 item GGUMs test with 4 categories each
a <- rlnorm(10, .2, .2)
b <- rnorm(10) #passed to d= input, but used as the b parameters
diffs <- t(apply(matrix(runif(10*3, .3, 1), 10), 1, cumsum))
t <- -(diffs - rowMeans(diffs))
dat <- simdata(a, b, 1000, 'ggum', t=t)
apply(dat, 2, table)
##   Item_1 Item_2 Item_3 Item_4 Item_5 Item_6 Item_7 Item_8 Item_9 Item_10
## 0    477    496    674    332    438    417    558    458    475     513
## 1    299    309    218    388    364    365    288    320    314     305
## 2    153    155     79    232    158    175    109    167    162     138
## 3     71     40     29     48     40     43     45     55     49      44
# mod <- mirt(dat, 1, 'ggum')
# coef(mod)
######
# prob.list example
# custom probability function that returns a matrix
fun <- function(a, b, theta){
    P <- 1 / (1 + exp(-a * (theta-b)))
    cbind(1-P, P)
}
set.seed(1)
theta <- matrix(rnorm(100))
prob.list <- list()
nitems <- 5
a <- rlnorm(nitems, .2, .2); b <- rnorm(nitems, 0, 1/2)
for(i in 1:nitems) prob.list[[i]] <- fun(a[i], b[i], theta)
str(prob.list)
## List of 5
##  $ : num [1:100, 1:2] 0.836 0.68 0.865 0.317 0.645 ...
##  $ : num [1:100, 1:2] 0.771 0.554 0.813 0.179 0.509 ...
##  $ : num [1:100, 1:2] 0.75 0.569 0.788 0.239 0.532 ...
##  $ : num [1:100, 1:2] 0.737 0.503 0.785 0.146 0.457 ...
##  $ : num [1:100, 1:2] 0.828 0.669 0.858 0.308 0.634 ...
dat <- simdata(prob.list=prob.list)
head(dat)
##      Item_1 Item_2 Item_3 Item_4 Item_5
## [1,]      0      0      0      1      1
## [2,]      0      0      0      1      0
## [3,]      0      0      0      0      0
## [4,]      1      1      0      0      0
## [5,]      1      1      0      0      0
## [6,]      0      1      0      1      0
# prob.list input is useful when defining custom items as well
name <- 'old2PL'
par <- c(a = .5, b = -2)
est <- c(TRUE, TRUE)
P.old2PL <- function(par,Theta, ncat){
     a <- par[1]
     b <- par[2]
     P1 <- 1 / (1 + exp(-1*a*(Theta - b)))
     cbind(1-P1, P1)
}
x <- createItem(name, par=par, est=est, P=P.old2PL)
prob.list[[1]] <- x@P(x@par, theta)
## End(No test)