library(dplyr)
library(purrr)
library(ggplot2)
<- 1000000
n <- rbinom(n, 2, 0.4) %>% scale(scale=FALSE)
g1 <- rbinom(n, 2, 0.4) %>% scale(scale=FALSE)
g2 <- rbinom(n, 2, 0.4) %>% scale(scale=FALSE)
g3 <- rnorm(n)
u <- rnorm(n)
e1 <- rnorm(n)
e2 <- g1 + g2 + u + e1
bmi <- u + bmi + g3 + e2
cancer_liability <- rbinom(n, 1, plogis(cancer_liability-3)) cancer
Background
table(cancer)
summary(glm(cancer ~ bmi + u))
summary(lm(bmi ~ g1))
summary(lm(bmi ~ g1, subset=cancer==1))
summary(lm(bmi ~ g1, subset=cancer==0))
summary(lm(bmi ~ g1 * cancer))
<- function(nid, beta_u1, beta_u2, beta_x=1, beta_g1=1, beta_g2=1, beta_g3=1, p1=0.4, p2=0.4, p3=0.4, ve1=1, ve2=1, intercept=-3, sim=1, scale=c("additive", "log-additive")[1], transform=TRUE) {
sim <- c(as.list(environment())) %>% as_tibble()
out
<- rbinom(nid, 2, p1) %>% scale(scale=FALSE)
g1 <- rbinom(nid, 2, p2) %>% scale(scale=FALSE)
g2 <- rbinom(nid, 2, p3) %>% scale(scale=FALSE)
g3 <- rnorm(nid)
u <- rnorm(nid, sd=sqrt(ve1))
e1 <- rnorm(nid, sd=sqrt(ve2))
e2 if(scale=="additive") {
<- g1 * beta_g1 + g2 * beta_g2 + u * beta_u1 + e1
bmi else if(scale == "log-additive") {
} <- exp(g1 * beta_g1) * exp(g2 * beta_g2) * exp(u * beta_u1) * exp(e1)
bmi else {
} stop("scale must be additive or log-additive")
}
if(scale == "additive") {
<- intercept + u * beta_u2 + bmi * beta_x + g3 * beta_g3 + e2
cancer_liability else if(scale == "log-additive") {
} <- exp(intercept) * exp(u * beta_u2) * exp(bmi * beta_x) * exp(g3 * beta_g3) * exp(e2)
cancer_liability else {
} stop("scale must be additive or log-additive")
}
<- rbinom(nid, 1, plogis(cancer_liability))
cancer
if(transform) {
<- log(bmi)
bmi
}
<- summary(lm(bmi ~ g1))
mod1 <- summary(lm(bmi ~ g1, subset=cancer == 0))
mod2 <- summary(lm(bmi ~ g1, subset=cancer == 1))
mod3 <- summary(lm(bmi ~ g3 * cancer))
mod4
<- tibble(
o model = c(
"mod1",
"mod2",
"mod3",
"mod4_main",
"mod4_gxe"
),bhat = c(
$coef[2,1],
mod1$coef[2,1],
mod2$coef[2,1],
mod3$coef[2,1],
mod4$coef[4,1]
mod4
)
)<- bind_cols(o, out)
o
<- tibble(g1, g2, g3, u, bmi, cancer)
dat
return(list(o=o, dat=dat))
}
sim(100000, 1, 1, scale="log-additive")$dat %>% {hist(log(.$bmi))}
<- expand.grid(
param n=100000,
beta_u1 = c(0, 1),
beta_u2 = c(0, 1),
beta_g2 = c(0, 1),
beta_g3 = c(0, 1),
beta_x = c(0, 1),
sim=1:10
)
<- pmap(param, sim) %>% bind_rows() res
ggplot(res %>% filter(beta_g3 == 0 & !grepl("mod4", model)), aes(x=as.factor(beta_g2), y=bhat)) +
geom_boxplot(aes(fill=model, colour=as.factor(beta_x))) +
facet_grid(beta_u1 ~ beta_u2, labeller = label_both)
The bias in bhat_g1bmi|c is based on all ancestors of bmi when conditioning on cancer status
<- sim(100000, beta_u1=0, beta_u2=0, beta_g3=0, ve1=0, beta_g2=0)$dat
d summary(lm(g1 ~ g2, d, subset=d$cancer == 1))
summary(lm(bmi ~ g1, d, subset=d$cancer == 0))
summary(lm(bmi ~ g1, d))
<- 10000
n <- rnorm(n)
g1 <- rnorm(n)
g2 <- rnorm(n)
g3 <- rnorm(n)
e
<- g1 + g2 + g3 + e
x <- rbinom(n, 1, plogis(x)) y
sim(10000, 1, 1, scale="log-additive", transform=FALSE)[["o"]]
sim(10000, 1, 1, scale="log-additive", transform=TRUE)[["o"]]
sim(10000, 1, 1, scale="additive", transform=FALSE)[["o"]]
sim(10000, 1, 1, scale="additive", transform=TRUE)[["o"]]
log(-1)
sessionInfo()