Surrogate Function Approximation via the Generalized Linear Model

Given a simulation that was executed with runSimulation, potentially with the argument store_results = TRUE to store the unsummarised analysis results, fit a surrogate function approximation (SFA) model to the results and (optionally) perform a root-solving step to solve a target quantity. See Schoemann et al. (2014) for details.

SFA(
  results,
  formula,
  family = "binomial",
  b = NULL,
  design = NULL,
  CI = 0.95,
  interval = NULL,
  ...
)

# S3 method for class 'SFA'
print(x, ...)

Arguments

results: data returned from runSimulation. This can be the original results object or the extracted results stored when using store_results = TRUE included to store the analysis results.
formula: formula to specify for the regression model
family: character vector indicating the family of GLMs to use (see family)
b: (optional) Target quantity to use for root solving given the fitted surrogate function (e.g., find sample size associated with SFA implied power of .80)
design: (optional) data.frame object containing all the information relevant for the surrogate model (passed to newdata in predict) with an NA value in the variable to be solved
CI: advertised confidence interval of SFA prediction around solved target
interval: interval to be passed to uniroot if not specified then the lowest and highest values from results for the respective variable will be used
...: additional arguments to pass to glm
x: an object of class SFA

References

Chalmers, R. P., & Adkins, M. C. (2020). Writing Effective and Reliable Monte Carlo Simulations with the SimDesign Package. The Quantitative Methods for Psychology, 16(4), 248-280. doi:10.20982/tqmp.16.4.p248

Schoemann, A. M., Miller, P., Pornprasertmanit, S., and Wu, W. (2014). Using Monte Carlo simulations to determine power and sample size for planned missing designs. International Journal of Behavioral Development, SAGE Publications, 38, 471-479.

Author

Phil Chalmers rphilip.chalmers@gmail.com

Examples

if (FALSE) { # \dontrun{

# create long Design object to fit surrogate over
Design <- createDesign(N = 100:500,
                       d = .2)
Design

#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions

Generate <- function(condition, fixed_objects) {
    Attach(condition)
    group1 <- rnorm(N)
    group2 <- rnorm(N, mean=d)
    dat <- data.frame(group = gl(2, N, labels=c('G1', 'G2')),
                      DV = c(group1, group2))
    dat
}

Analyse <- function(condition, dat, fixed_objects) {
    p <- c(p = t.test(DV ~ group, dat, var.equal=TRUE)$p.value)
    p
}

Summarise <- function(condition, results, fixed_objects) {
    ret <- EDR(results, alpha = .05)
    ret
}

#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Estimate power over N

# Use small number of replications given range of sample sizes
## note that due to the lower replications disabling the
## RAM printing will help reduce overhead

sim <- runSimulation(design=Design, replications=10,
                     generate=Generate, analyse=Analyse,
                     summarise=Summarise, store_results=TRUE, save=FALSE,
                     progress=FALSE, control=list(print_RAM=FALSE))
sim

# total of 4010 replication
sum(sim$REPLICATIONS)

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# fitted model
sfa <- SFA(sim_results, formula = sig ~ N)
sfa
summary(sfa)

# plot the observed and SFA expected values
plot(p ~ N, sim, las=1, pch=16, main='Rejection rates with R=10')
pred <- predict(sfa, type = 'response')
lines(sim_results$N, pred, col='red', lty=2)

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N,
                b=.8, design=design)
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)


################
# example with smaller range but higher precision
Design <- createDesign(N = 375:425,
                       d = .2)
Design

sim2 <- runSimulation(design=Design, replications=100,
                     generate=Generate, analyse=Analyse,
                     summarise=Summarise, store_results=TRUE, save=FALSE,
                     progress=FALSE, control=list(print_RAM=FALSE))
sim2
sum(sim2$REPLICATIONS) # more replications in total

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim2)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# fitted model
sfa <- SFA(sim_results, formula = sig ~ N)
sfa
summary(sfa)

# plot the observed and SFA expected values
plot(p ~ N, sim2, las=1, pch=16, main='Rejection rates with R=100')
pred <- predict(sfa, type = 'response')
lines(sim_results$N, pred, col='red', lty=2)

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)

###################
# vary multiple parameters (e.g., sample size + effect size) to fit
# multi-parameter surrogate

Design <- createDesign(N = seq(from=10, to=500, by=10),
                       d = seq(from=.1, to=.5, by=.1))
Design

sim3 <- runSimulation(design=Design, replications=50,
                      generate=Generate, analyse=Analyse,
                      summarise=Summarise, store_results=TRUE, save=FALSE,
                      progress=FALSE, control=list(print_RAM=FALSE))
sim3
sum(sim3$REPLICATIONS)

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim3)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# additive effects (logit(sig) ~ N + d)
sfa0 <- SFA(sim_results, formula = sig ~ N+d)
sfa0

# multiplicative effects (logit(sig) ~ N + d + N:d)
sfa <- SFA(sim_results, formula = sig ~ N*d)
sfa

# multiplicative better fit (sample size interacts with effect size)
anova(sfa0, sfa, test = "LRT")
summary(sfa)

# plot the observed and SFA expected values
library(ggplot2)
sim3$pred <- predict(sfa, type = 'response', newdata=sim3)
ggplot(sim3, aes(N, p, color = factor(d))) +
  geom_point() + geom_line(aes(y=pred)) +
  facet_wrap(~factor(d))

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N * d,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)

# root prediction where d *not* used in original data
design <- data.frame(N=NA, d=.25)
sfa.root <- SFA(sim_results, formula = sig ~ N * d,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.25)

} # }

Surrogate Function Approximation via the Generalized Linear Model

Arguments

References

See also

Author

Examples