Sandwich variance estimators to control LD leakage

Author

Gibran Hemani

Published

February 10, 2023

Background

See https://explodecomputer.github.io/lab-book/posts/2022-12-16-vqtl/ for inflation issues with vQTLs.

Can sandwich estimators help https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5943197/

``library(dplyr)``
``````
Attaching package: 'dplyr'``````
``````The following objects are masked from 'package:stats':

filter, lag``````
``````The following objects are masked from 'package:base':

intersect, setdiff, setequal, union``````
``````library(ggplot2)
library(sandwich)
set.seed(12345)

test_drm <- function(g, y, sandwich)
{
nom <- c(b="Estimate", se="Std. Error", tval="t value", pval="Pr(>|t|)")
y.i <- tapply(y, g, median, na.rm=T)
z.ij <- abs(y - y.i[g+1])
a <- lm(z.ij ~ g)
m1 <- a %>%
summary %>%
coef %>%
as_tibble() %>%
# rename(all_of(nom)) %>%
slice(2) %>%
mutate(method="drm")
if(sandwich)
{
o <- sandwich::vcovHC(a, type="HC")
m2 <- m1
m2\$`Std. Error` <- sqrt(o[2,2])
m2\$`t value` <- m2\$Estimate / m2\$`Std. Error`
m2\$`Pr(>|t|)` <- pnorm(m2\$`t value`, lower.tail=TRUE)
m2\$method <- "drm sandwich"
return(m2)
}
return(m1)
}

correlated_binomial <- function (nid, p1, p2, rho, n = 2, round = TRUE, print = FALSE)
{
p <- p1
q <- p2
a <- function(rho, p, q) {
rho * sqrt(p * q * (1 - p) * (1 - q)) + (1 - p) * (1 - q)
}
a.0 <- a(rho, p, q)
prob <- c(`(0,0)` = a.0, `(1,0)` = 1 - q - a.0, `(0,1)` = 1 -
p - a.0, `(1,1)` = a.0 + p + q - 1)
if (min(prob) < 0) {
print(prob)
stop("Error: a probability is negative.")
}
n.sim <- nid
u <- sample.int(4, n.sim * n, replace = TRUE, prob = prob)
y <- floor((u - 1)/2)
x <- 1 - u%%2
x <- colSums(matrix(x, nrow = n))
y <- colSums(matrix(y, nrow = n))
if (round) {
x <- round(x)
y <- round(y)
}
if (print) {
print(table(x, y))
print(stats::cor(x, y))
}
return(cbind(x, y))
}

gendatp <- function(n, p1, p2, p3, r1)
{
dat <- correlated_binomial(n, p1, p2, r1) %>% as_tibble()
names(dat) <- c("y1", "y2")
dat\$y3 <- rbinom(n, 1, p3)
return(dat)
}

run_simp <- function(param, i)
{
set.seed(i*10)
dat <- gendatp(param\$n[i], param\$p1[i], param\$p2[i], param\$p3[i], param\$r1[i])
x <- dat\$y1 + rnorm(nrow(dat), sd=sd(dat\$y1)/4)
mod1 <- lm(x ~ y2 + y3, dat)
mod2 <- lm(x ~ y2 + y3 + y2*y3, dat)
amod <- anova(mod1, mod2)
param\$F[i] <- amod\$P[2]
o1 <- test_drm(dat\$y1, x, param\$sandwich[i])
o2 <- test_drm(dat\$y2, x, param\$sandwich[i])
o3 <- test_drm(dat\$y3, x, param\$sandwich[i])
param\$drm1[i] <- o1\$`Pr(>|t|)`
param\$drm2[i] <- o2\$`Pr(>|t|)`
param\$drm3[i] <- o3\$`Pr(>|t|)`
return(param[i,])
}

param <- expand.grid(
sandwich=c(T,F),
p1=0.1,
p2=0.1,
p3=0.5,
p4=0.1,
n=1000,
r1=seq(0, 1, by=0.2),
sim=1:250,
r2=NA,
F=NA,
drm1=NA,
drm2=NA,
drm3=NA
)

resp <- lapply(1:nrow(param), function(x) run_simp(param, x)) %>% bind_rows()
str(resp)``````
``````'data.frame':   3000 obs. of  13 variables:
\$ sandwich: logi  TRUE FALSE TRUE FALSE TRUE FALSE ...
\$ p1      : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
\$ p2      : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
\$ p3      : num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
\$ p4      : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
\$ n       : num  1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ...
\$ r1      : num  0 0 0.2 0.2 0.4 0.4 0.6 0.6 0.8 0.8 ...
\$ sim     : int  1 1 1 1 1 1 1 1 1 1 ...
\$ r2      : logi  NA NA NA NA NA NA ...
\$ F       : num  0.4136 0.4151 0.0649 0.1828 0.3566 ...
\$ drm1    : num  0.8477 0.376 0.9038 0.0149 0.3456 ...
\$ drm2    : num  0.988716 0.12422 1 0.000167 1 ...
\$ drm3    : num  0.45884 0.14746 0.00898 0.5823 0.18002 ...
- attr(*, "out.attrs")=List of 2
..\$ dim     : Named int [1:13] 2 1 1 1 1 1 6 250 1 1 ...
.. ..- attr(*, "names")= chr [1:13] "sandwich" "p1" "p2" "p3" ...
..\$ dimnames:List of 13
.. ..\$ sandwich: chr [1:2] "sandwich=TRUE" "sandwich=FALSE"
.. ..\$ p1      : chr "p1=0.1"
.. ..\$ p2      : chr "p2=0.1"
.. ..\$ p3      : chr "p3=0.5"
.. ..\$ p4      : chr "p4=0.1"
.. ..\$ n       : chr "n=1000"
.. ..\$ r1      : chr [1:6] "r1=0.0" "r1=0.2" "r1=0.4" "r1=0.6" ...
.. ..\$ sim     : chr [1:250] "sim=  1" "sim=  2" "sim=  3" "sim=  4" ...
.. ..\$ r2      : chr "r2=NA"
.. ..\$ F       : chr "F=NA"
.. ..\$ drm1    : chr "drm1=NA"
.. ..\$ drm2    : chr "drm2=NA"
.. ..\$ drm3    : chr "drm3=NA"``````
``````ggplot(resp, aes(x=r1, y=-log10(drm2))) +
geom_boxplot(aes(fill=as.factor(r1))) +
scale_fill_brewer(type="seq") +
labs(y="DRM -log10 p", x="LD between tagging\nvariant and causal variant", fill="") +
facet_grid(. ~ sandwich)``````

Power

``````run_simp2 <- function(param, i)
{
set.seed(i*10)
dat <- gendatp(param\$n[i], param\$p1[i], param\$p2[i], param\$p3[i], param\$r1[i])
x <- dat\$y1 + dat\$y2 * dat\$y3 * param\$b[i] + rnorm(nrow(dat), sd=sd(dat\$y1)/4)
mod1 <- lm(x ~ y2 + y3, dat)
mod2 <- lm(x ~ y2 + y3 + y2*y3, dat)
amod <- anova(mod1, mod2)
param\$F[i] <- amod\$P[2]
o1 <- test_drm(dat\$y1, x, param\$sandwich[i])
o2 <- test_drm(dat\$y2, x, param\$sandwich[i])
o3 <- test_drm(dat\$y3, x, param\$sandwich[i])
param\$drm1[i] <- o1\$`Pr(>|t|)`
param\$drm2[i] <- o2\$`Pr(>|t|)`
param\$drm3[i] <- o3\$`Pr(>|t|)`
return(param[i,])
}

param <- expand.grid(
sandwich=c(T,F),
b=c(0, 0.1, 1),
p1=0.1,
p2=0.1,
p3=0.5,
p4=0.1,
n=1000,
r1=seq(0, 1, by=0.2),
sim=1:250,
r2=NA,
F=NA,
drm1=NA,
drm2=NA,
drm3=NA
)

resp2 <- lapply(1:nrow(param), function(x) run_simp2(param, x)) %>% bind_rows()
str(resp)``````
``````'data.frame':   3000 obs. of  13 variables:
\$ sandwich: logi  TRUE FALSE TRUE FALSE TRUE FALSE ...
\$ p1      : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
\$ p2      : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
\$ p3      : num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
\$ p4      : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
\$ n       : num  1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ...
\$ r1      : num  0 0 0.2 0.2 0.4 0.4 0.6 0.6 0.8 0.8 ...
\$ sim     : int  1 1 1 1 1 1 1 1 1 1 ...
\$ r2      : logi  NA NA NA NA NA NA ...
\$ F       : num  0.4136 0.4151 0.0649 0.1828 0.3566 ...
\$ drm1    : num  0.8477 0.376 0.9038 0.0149 0.3456 ...
\$ drm2    : num  0.988716 0.12422 1 0.000167 1 ...
\$ drm3    : num  0.45884 0.14746 0.00898 0.5823 0.18002 ...
- attr(*, "out.attrs")=List of 2
..\$ dim     : Named int [1:13] 2 1 1 1 1 1 6 250 1 1 ...
.. ..- attr(*, "names")= chr [1:13] "sandwich" "p1" "p2" "p3" ...
..\$ dimnames:List of 13
.. ..\$ sandwich: chr [1:2] "sandwich=TRUE" "sandwich=FALSE"
.. ..\$ p1      : chr "p1=0.1"
.. ..\$ p2      : chr "p2=0.1"
.. ..\$ p3      : chr "p3=0.5"
.. ..\$ p4      : chr "p4=0.1"
.. ..\$ n       : chr "n=1000"
.. ..\$ r1      : chr [1:6] "r1=0.0" "r1=0.2" "r1=0.4" "r1=0.6" ...
.. ..\$ sim     : chr [1:250] "sim=  1" "sim=  2" "sim=  3" "sim=  4" ...
.. ..\$ r2      : chr "r2=NA"
.. ..\$ F       : chr "F=NA"
.. ..\$ drm1    : chr "drm1=NA"
.. ..\$ drm2    : chr "drm2=NA"
.. ..\$ drm3    : chr "drm3=NA"``````
``````ggplot(resp2, aes(x=r1, y=-log10(drm2))) +
geom_boxplot(aes(fill=as.factor(r1))) +
scale_fill_brewer(type="seq") +
labs(y="DRM -log10 p", x="LD between tagging\nvariant and causal variant", fill="") +
facet_grid(b ~ sandwich)``````
``Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).``

``sessionInfo()``
``````R version 4.2.1 Patched (2022-09-06 r82817)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.6.2

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base

other attached packages:
[1] sandwich_3.0-2 ggplot2_3.4.0  dplyr_1.0.10

loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 pillar_1.8.1       compiler_4.2.1     tools_4.2.1
[5] digest_0.6.31      jsonlite_1.8.4     evaluate_0.19      lifecycle_1.0.3
[9] tibble_3.1.8       gtable_0.3.1       lattice_0.20-45    pkgconfig_2.0.3
[13] rlang_1.0.6        DBI_1.1.3          cli_3.5.0          yaml_2.3.6
[17] xfun_0.36          fastmap_1.1.0      withr_2.5.0        stringr_1.5.0
[21] knitr_1.41         generics_0.1.3     vctrs_0.5.1        htmlwidgets_1.5.4
[25] grid_4.2.1         tidyselect_1.2.0   glue_1.6.2         R6_2.5.1
[29] fansi_1.0.3        rmarkdown_2.16     farver_2.1.1       magrittr_2.0.3
[33] scales_1.2.1       htmltools_0.5.4    assertthat_0.2.1   colorspace_2.0-3
[37] labeling_0.4.2     utf8_1.2.2         stringi_1.7.8      munsell_0.5.0
[41] zoo_1.8-11        ``````