#-------------------------------------------------------------------
# Custom functions required (not defined in other R packages)
#' Jacknife variance test
#'
#' @param DV a numeric vector
#' @param group a character/factor vector containing the group membership
<- function(DV, group){
Jacknife_vartest stopifnot(length(DV) == length(group))
<- unique(group)
nms <- length(nms)
g <- s2 <- Ui. <- numeric(g)
Ns <- vector('list', g)
Uij for(i in 1:g){
<- group == nms[i]
pick <- subset(DV, pick)
sub <- length(sub)
Ns[i] <- var(sub)
s2[i] <- numeric(Ns[i])
sij2 for(j in 1:length(sub))
<- var(sub[-j])
sij2[j] <- Ns[i] * log(s2[i]) - (Ns[i]-1) * log(sij2)
Uij[[i]] <- mean(Uij[[i]])
Ui.[i]
}<- mean(do.call(c, Uij))
U.. <- sum(Ns * (Ui. - U..)^2) / (g-1)
num <- 0
den for(i in 1:g)
<- den + sum((Uij[[i]] - Ui.[i])^2) / sum(Ns-1)
den <- num/den
J <- g-1; df2 <- sum(Ns-1)
df1 <- pf(J, df1, df2, lower.tail = FALSE)
p.value data.frame(J=J, df1=df1, df2=df2, p.value=p.value, row.names="")
}
#' Layard's variance test
#'
#' @param DV a numeric vector
#' @param group a character/factor vector containing the group membership
<- function(DV, group){
Layard_vartest stopifnot(length(DV) == length(group))
<- unique(group)
nms <- table(group)
Ns <- sum(Ns)
N <- length(nms)
g <- numeric(g)
log_s2 <- numeric(N)
deviation for(i in 1:g){
<- group == nms[i]
pick <- subset(DV, pick)
sub <- mean(sub)
xbar <- sub - xbar
deviation[pick] <- log(var(sub))
log_s2[i]
}<- N * sum(deviation^4) / (sum(deviation^2)^2) - 3
gamma <- 2 + (1 - g/N) * gamma
tau2 <- sum( (Ns-1) * (log_s2 - sum((Ns-1) * log_s2)/sum(Ns-1))^2) / tau2
S <- g - 1
df <- pchisq(S, df, lower.tail=FALSE)
p.value data.frame(S=S, df=df, p.value=p.value, row.names = "")
}
Brown and Forsythe (1974) simulation
Simulation from:
- Brown, M. B. and Forsythe, A. B. (1974). Robust tests for the equality of variances. Journal of the American Statistical Association, 69(346), 364–367.
User-defined functions for analyses
Simulation code
#-------------------------------------------------------------------
# Define the design conditions
library(SimDesign)
<- createDesign(var_ratio=c(4, 2, 1, 1/2, 1/4),
Design N=c(80, 20),
groups_equal=c(TRUE, FALSE),
distribution=c('Gaussian', 't4', 'Chi4', 'Cauchy'),
# remove redundant or not-applicable rows
subset = !(groups_equal & var_ratio < 1) |
!(distribution != 'Gaussian' & var_ratio != 1))
# Brown and Forsythe use different sample sizes when groups were unequal
<- with(Design, !groups_equal & N == 80)
pick1 <- with(Design, !groups_equal & N == 20)
pick2 $N[pick1] <- 60
Design$N[pick2] <- 30
Design
Design
#-------------------------------------------------------------------
<- function(condition, fixed_objects = NULL) {
Generate Attach(condition)
<- ifelse(var_ratio <= 1, 1, sqrt(var_ratio))
sd1 <- ifelse(var_ratio <= 1, sqrt(1/var_ratio), 1)
sd2 if(groups_equal){
<- N2 <- N/2
N1 else {
} <- N * 1/3
N1 <- N - N1
N2
}<- switch(distribution,
dv Gaussian = c(rnorm(N1, sd = sd1), rnorm(N2, sd = sd2)),
t4 = c(rt(N1, df = 4), rt(N2, df = 4)),
Chi4 = c(rchisq(N1, df = 4), rchisq(N2, df = 4)),
Cauchy = c(rcauchy(N1), rcauchy(N2))
)<- data.frame(DV=dv,
dat group=c(rep('G1', N1), rep('G2', N2)))
dat
}
<- function(condition, dat, fixed_objects = NULL) {
Analyse Attach(dat)
<- var.test(DV ~ group)$p.value
F_test <- Jacknife_vartest(DV, group)$p.value
Jacknife <- Layard_vartest(DV, group)$p.value
Layard <- levene.test(DV, group=group, location = 'median')$p.value
W50 <- levene.test(DV, group=group, location = 'trim.mean',
W10 trim.alpha = .1)$p.value
<- levene.test(DV, group=group, location = 'mean')$p.value
Levene <- c(F=F_test, Jacknife=Jacknife, Layard=Layard,
ret Levene=Levene, W10=W10, W50=W50)
ret
}
<- function(condition, results, fixed_objects = NULL) {
Summarise <- EDR(results, alpha = .05)
ret
ret
}
#-------------------------------------------------------------------
# Run the MCS
<- runSimulation(design=Design, replications=1000, parallel=TRUE,
res generate=Generate, analyse=Analyse,
summarise=Summarise, packages='lawstat',
filename='BF_simulation')
res
# A tibble: 68 × 14
var_ratio N groups_equal distribution F Jacknife Layard Levene W10
<dbl> <dbl> <lgl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 80 TRUE Gaussian 0.995 0.99 0.99 0.978 0.978
2 2 80 TRUE Gaussian 0.538 0.52 0.533 0.478 0.472
3 1 80 TRUE Gaussian 0.047 0.039 0.041 0.039 0.04
4 0.5 80 TRUE Gaussian 0.577 0.56 0.564 0.51 0.501
5 0.25 80 TRUE Gaussian 0.99 0.984 0.986 0.978 0.975
6 4 20 TRUE Gaussian 0.517 0.421 0.517 0.439 0.42
7 2 20 TRUE Gaussian 0.153 0.134 0.177 0.152 0.137
8 1 20 TRUE Gaussian 0.047 0.059 0.07 0.059 0.052
9 0.5 20 TRUE Gaussian 0.166 0.134 0.189 0.135 0.129
10 0.25 20 TRUE Gaussian 0.513 0.412 0.52 0.435 0.403
# ℹ 58 more rows
# ℹ 5 more variables: W50 <dbl>, REPLICATIONS <int>, SIM_TIME <chr>,
# COMPLETED <chr>, SEED <int>
Extras
# Type I errors
<- subset(res, var_ratio == 1)
TypeI TypeI
# A tibble: 16 × 14
var_ratio N groups_equal distribution F Jacknife Layard Levene W10
<dbl> <dbl> <lgl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 80 TRUE Gaussian 0.047 0.039 0.041 0.039 0.04
2 1 20 TRUE Gaussian 0.047 0.059 0.07 0.059 0.052
3 1 60 FALSE Gaussian 0.056 0.059 0.063 0.055 0.053
4 1 30 FALSE Gaussian 0.049 0.044 0.059 0.053 0.05
5 1 80 TRUE t4 0.264 0.078 0.06 0.046 0.046
6 1 20 TRUE t4 0.14 0.055 0.075 0.055 0.045
7 1 60 FALSE t4 0.207 0.081 0.06 0.055 0.049
8 1 30 FALSE t4 0.173 0.066 0.069 0.058 0.049
9 1 80 TRUE Chi4 0.189 0.066 0.059 0.086 0.066
10 1 20 TRUE Chi4 0.132 0.071 0.102 0.093 0.077
11 1 60 FALSE Chi4 0.161 0.081 0.072 0.112 0.078
12 1 30 FALSE Chi4 0.162 0.098 0.101 0.109 0.079
13 1 80 TRUE Cauchy 0.782 0.194 0.295 0.134 0.025
14 1 20 TRUE Cauchy 0.633 0.175 0.394 0.176 0.042
15 1 60 FALSE Cauchy 0.766 0.2 0.303 0.165 0.027
16 1 30 FALSE Cauchy 0.666 0.204 0.349 0.177 0.051
# ℹ 5 more variables: W50 <dbl>, REPLICATIONS <int>, SIM_TIME <chr>,
# COMPLETED <chr>, SEED <int>
# Power
<- subset(res, var_ratio != 1)
Power Power
# A tibble: 52 × 14
var_ratio N groups_equal distribution F Jacknife Layard Levene W10
<dbl> <dbl> <lgl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 80 TRUE Gaussian 0.995 0.99 0.99 0.978 0.978
2 2 80 TRUE Gaussian 0.538 0.52 0.533 0.478 0.472
3 0.5 80 TRUE Gaussian 0.577 0.56 0.564 0.51 0.501
4 0.25 80 TRUE Gaussian 0.99 0.984 0.986 0.978 0.975
5 4 20 TRUE Gaussian 0.517 0.421 0.517 0.439 0.42
6 2 20 TRUE Gaussian 0.153 0.134 0.177 0.152 0.137
7 0.5 20 TRUE Gaussian 0.166 0.134 0.189 0.135 0.129
8 0.25 20 TRUE Gaussian 0.513 0.412 0.52 0.435 0.403
9 4 60 FALSE Gaussian 0.92 0.903 0.876 0.895 0.89
10 2 60 FALSE Gaussian 0.43 0.389 0.371 0.36 0.353
# ℹ 42 more rows
# ℹ 5 more variables: W50 <dbl>, REPLICATIONS <int>, SIM_TIME <chr>,
# COMPLETED <chr>, SEED <int>