MVMR collider – Gibran Hemani’s lab book

Background

Imaging traits are available in non-random samples.

library(simulateGP)
library(TwoSampleMR)

TwoSampleMR version 0.6.16

  [>] New authentication requirements: https://mrcieu.github.io/ieugwasr/articles/guide.html#authentication.

  [>] Major upgrades to our servers completed to improve service and stability.

  [>] We need your help to shape our emerging roadmap!

      Please take 2 minutes to give us feedback -

      https://forms.office.com/e/eSr7EFAfCG


Warning:
You are running an old version of the TwoSampleMR package.
This version:   0.6.16
Latest version: 0.6.17
Please consider updating using remotes::install_github('MRCIEU/TwoSampleMR')


Attaching package: 'TwoSampleMR'

The following objects are masked from 'package:simulateGP':

    allele_frequency, contingency, get_population_allele_frequency

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(janitor)


Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

library(furrr)

Loading required package: future

library(ggplot2)

n <- 10000
nsnp <- 100
g <- make_geno(n, nsnp, 0.5)
dim(g)

[1] 10000   100

effs <- choose_effects(nsnp, 0.5)

bmi <- make_phen(effs, g)

gx <- make_geno(n, nsnp, 0.5)
effsx <- choose_effects(nsnp, 0.5)

x <- make_phen(effsx, gx)

test <- rbinom(n, 1, plogis(bmi + x + bmi*x))

table(test)

test
   0    1 
5230 4770

y <- bmi * 0.5 + rnorm(n)

summary(lm(y ~ x))


Call:
lm(formula = y ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.2328 -0.7536 -0.0026  0.7479  4.6932 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)  0.01519    0.01116   1.360   0.1737  
x           -0.01911    0.01116  -1.712   0.0869 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.116 on 9998 degrees of freedom
Multiple R-squared:  0.0002931, Adjusted R-squared:  0.0001931 
F-statistic: 2.931 on 1 and 9998 DF,  p-value: 0.08691

summary(lm(y[test==1] ~ x[test==1]))


Call:
lm(formula = y[test == 1] ~ x[test == 1])

Residuals:
    Min      1Q  Median      3Q     Max 
-3.9033 -0.7492  0.0070  0.7614  4.5905 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   0.14059    0.01724   8.156 4.40e-16 ***
x[test == 1]  0.08875    0.01784   4.973 6.81e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.118 on 4768 degrees of freedom
Multiple R-squared:  0.005161,  Adjusted R-squared:  0.004952 
F-statistic: 24.73 on 1 and 4768 DF,  p-value: 6.811e-07

gwas_x <- gwas(x, cbind(g, gx))
gwas_y <- gwas(y, cbind(g, gx))

dat <- merge_exp_out(gwas_x, gwas_y)
library(TwoSampleMR)
mr(dat, method_list="mr_ivw")

Analysing 'X' on 'Y'

  id.exposure id.outcome outcome exposure                    method nsnp
1           X          Y       Y        X Inverse variance weighted  200
            b         se      pval
1 -0.01705005 0.03722552 0.6469378

gwas_x_test <- gwas(x[test==1], cbind(g, gx)[test==1,])
gwas_y_test <- gwas(y[test==1], cbind(g, gx)[test==1,])
gwas_test <- gwas(test, cbind(g, gx))

dat_test <- merge_exp_out(gwas_x_test, gwas_y_test)
mr(dat_test, method_list="mr_ivw")

Analysing 'X' on 'Y'

  id.exposure id.outcome outcome exposure                    method nsnp
1           X          Y       Y        X Inverse variance weighted  200
          b         se         pval
1 0.1730708 0.04249289 4.642449e-05

dat_test2 <- merge_exp_out(gwas_x_test, gwas_y)
mr(dat_test2, method_list="mr_ivw")

Analysing 'X' on 'Y'

  id.exposure id.outcome outcome exposure                    method nsnp
1           X          Y       Y        X Inverse variance weighted  200
           b         se       pval
1 0.08982225 0.04301305 0.03677478

sig_index <- gwas_x_test$pval < 5e-8 | gwas_test$pval < 5e-8
summary(lm(gwas_y_test$bhat[sig_index] ~ 0 + gwas_x_test$bhat[sig_index] + gwas_test$bhat[sig_index], weight=1/gwas_y_test$se[sig_index]^2))


Call:
lm(formula = gwas_y_test$bhat[sig_index] ~ 0 + gwas_x_test$bhat[sig_index] + 
    gwas_test$bhat[sig_index], weights = 1/gwas_y_test$se[sig_index]^2)

Weighted Residuals:
    Min      1Q  Median      3Q     Max 
-1.7689 -0.5755  0.1131  0.7070  2.6460 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)    
gwas_x_test$bhat[sig_index] -0.33688    0.07541  -4.467 0.000127 ***
gwas_test$bhat[sig_index]    1.99453    0.30199   6.605 4.37e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.117 on 27 degrees of freedom
Multiple R-squared:  0.6492,    Adjusted R-squared:  0.6232 
F-statistic: 24.99 on 2 and 27 DF,  p-value: 7.212e-07

summary(lm(gwas_y_test$bhat[sig_index] ~ 0 + gwas_x_test$bhat[sig_index] + gwas_test$bhat[sig_index], weight=1/gwas_y_test$se[sig_index]^2))


Call:
lm(formula = gwas_y_test$bhat[sig_index] ~ 0 + gwas_x_test$bhat[sig_index] + 
    gwas_test$bhat[sig_index], weights = 1/gwas_y_test$se[sig_index]^2)

Weighted Residuals:
    Min      1Q  Median      3Q     Max 
-1.7689 -0.5755  0.1131  0.7070  2.6460 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)    
gwas_x_test$bhat[sig_index] -0.33688    0.07541  -4.467 0.000127 ***
gwas_test$bhat[sig_index]    1.99453    0.30199   6.605 4.37e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.117 on 27 degrees of freedom
Multiple R-squared:  0.6492,    Adjusted R-squared:  0.6232 
F-statistic: 24.99 on 2 and 27 DF,  p-value: 7.212e-07

Simulation

run_sim <- function(nsnp, n, h2_x, h2_y, h2_u, b_us, b_xs, b_ints, b_uy, sim=1) {
    args <- tibble(nsnp, n, h2_x, h2_u, b_us, b_xs, b_ints, b_uy, sim)
    gu <- make_geno(n, nsnp, 0.5)
    effsu <- choose_effects(nsnp, h2_u)
    u <- make_phen(effsu, gu)
    uxy <- rnorm(n)

    gx <- make_geno(n, nsnp, 0.5)
    effsx <- choose_effects(nsnp, h2_x)
    x <- make_phen(c(0.2, effsx), cbind(uxy, gx))

    test <- rbinom(n, 1, plogis(u * b_us + x * b_xs + u*x * b_ints))

    y <- make_phen(c(b_uy, 0.2), cbind(u, uxy))
    res <- list()
    res[[1]] <- summary(lm(y ~ x))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="obs", exp="x", out="y", sel="none") %>% slice(2)
    res[[2]] <- summary(lm(y[test==1] ~ x[test==1]))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="obs", exp="x", out="y", sel="x and y") %>% slice(2)

    gwas_x <- gwas(x, cbind(gu, gx))
    gwas_y <- gwas(y, cbind(gu, gx))
    
    sel <- gwas_x$pval < 5e-8
    res[[3]] <- summary(lm(gwas_y$bhat[sel] ~ 0 + gwas_x$bhat[sel], weight=1/gwas_y$se[sel]^2 ))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="mr", exp="x", out="y", sel="none") %>% slice(1)

    gwas_x_test <- gwas(x[test==1], cbind(gu, gx)[test==1,])
    gwas_y_test <- gwas(y[test==1], cbind(gu, gx)[test==1,])

    sel <- gwas_x_test$pval < 5e-8
    res[[4]] <- summary(lm(gwas_y_test$bhat[sel] ~ 0 + gwas_x_test$bhat[sel], weight=1/gwas_y_test$se[sel]^2 ))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="mr", exp="x", out="y", sel="x and y") %>% slice(1)

    res[[5]] <- summary(lm(gwas_y$bhat[sel] ~ 0 + gwas_x_test$bhat[sel], weight=1/gwas_y$se[sel]^2 ))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="mr", exp="x", out="y", sel="x") %>% slice(1)

    res[[6]] <- summary(lm(gwas_y_test$bhat[sel] ~ 0 + gwas_x$bhat[sel], weight=1/gwas_y_test$se[sel]^2 ))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="mr", exp="x", out="y", sel="y") %>% slice(1)

    gwas_test <- gwas(test, cbind(gu, gx))

    sel <- gwas_x_test$pval < 5e-8 | gwas_test$pval < 5e-8
    res[[7]] <- summary(lm(gwas_y_test$bhat[sel] ~ 0 + gwas_x_test$bhat[sel] + gwas_test$bhat[sel], weight=1/gwas_y_test$se[sel]^2))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="mvmr", exp="x", out="y", sel="x and y") %>% slice(1)
    
    res[[8]] <- summary(lm(gwas_y$bhat[sel] ~ 0 + gwas_x_test$bhat[sel] + gwas_test$bhat[sel], weight=1/gwas_y$se[sel]^2))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="mvmr", exp="x", out="y", sel="x") %>% slice(1)

    res[[9]] <- summary(lm(gwas_y_test$bhat[sel] ~ 0 + gwas_x$bhat[sel] + gwas_test$bhat[sel], weight=1/gwas_y_test$se[sel]^2))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="mvmr", exp="x", out="y", sel="y") %>% slice(1)

    res[[10]] <- summary(lm(gwas_y$bhat[sel] ~ 0 + gwas_x$bhat[sel] + gwas_test$bhat[sel], weight=1/gwas_y$se[sel]^2))$coef %>% as_tibble() %>% janitor::clean_names() %>% mutate(method="mvmr", exp="x", out="y", sel="none") %>% slice(1)

    return(bind_rows(res) %>% select(method, exp, out, sel, everything()) %>% bind_cols(args, .))
}

run_sim(100, 50000, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5) %>% str

tibble [10 × 17] (S3: tbl_df/tbl/data.frame)
 $ nsnp     : num [1:10] 100 100 100 100 100 100 100 100 100 100
 $ n        : num [1:10] 50000 50000 50000 50000 50000 50000 50000 50000 50000 50000
 $ h2_x     : num [1:10] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
 $ h2_u     : num [1:10] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
 $ b_us     : num [1:10] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
 $ b_xs     : num [1:10] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
 $ b_ints   : num [1:10] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
 $ b_uy     : num [1:10] 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
 $ sim      : num [1:10] 1 1 1 1 1 1 1 1 1 1
 $ method   : chr [1:10] "obs" "obs" "mr" "mr" ...
 $ exp      : chr [1:10] "x" "x" "x" "x" ...
 $ out      : chr [1:10] "y" "y" "y" "y" ...
 $ sel      : chr [1:10] "none" "x and y" "none" "x and y" ...
 $ estimate : num [1:10] 0.044993 0.117134 -0.000159 0.069973 0.006623 ...
 $ std_error: num [1:10] 0.00447 0.00649 0.00631 0.02118 0.02399 ...
 $ t_value  : num [1:10] 10.0708 18.044 -0.0251 3.3041 0.2761 ...
 $ pr_t     : num [1:10] 7.84e-24 2.58e-72 9.80e-01 1.67e-03 7.83e-01 ...

Adjusting for the GWAS for test status makes things horribly worse

param <- expand.grid(
    n=50000, nsnp=100,
    h2_x=0.5, h2_u=0.5, b_us = 0.5, b_xs = c(0, 0.5), b_ints = c(0, 0.5), b_uy = 0.5,
    sim=1:20
)

opt <- furrr::furrr_options(seed=TRUE)
plan(multicore, workers=7)
out <- future_pmap(param, run_sim, .options=opt) %>%
    bind_rows()

ggplot(out %>% filter(b_ints==0.5), aes(x=sel, y=estimate)) +
geom_hline(yintercept=0, linetype="dotted") +
geom_violin(aes(fill=sel)) +
facet_grid(method ~ b_xs, label=label_both, scale="free_x") +
labs(y="Effect estimate", x="Which variable is GWAS'd in only tested individuals", fill="") +
scale_fill_brewer(type="qual")

sessionInfo()

R version 4.5.1 (2025-06-13)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.6.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_3.5.2      furrr_0.3.1        future_1.49.0      janitor_2.2.1     
[5] dplyr_1.1.4        TwoSampleMR_0.6.16 simulateGP_0.1.3  

loaded via a namespace (and not attached):
 [1] generics_0.1.3     stringi_1.8.7      listenv_0.9.1      digest_0.6.37     
 [5] magrittr_2.0.3     evaluate_1.0.3     grid_4.5.1         timechange_0.3.0  
 [9] RColorBrewer_1.1-3 fastmap_1.2.0      plyr_1.8.9         jsonlite_2.0.0    
[13] purrr_1.0.4        scales_1.4.0       codetools_0.2-20   cli_3.6.5         
[17] rlang_1.1.6        parallelly_1.44.0  withr_3.0.2        yaml_2.3.10       
[21] tools_4.5.1        parallel_4.5.1     globals_0.18.0     vctrs_0.6.5       
[25] R6_2.6.1           lifecycle_1.0.4    lubridate_1.9.4    snakecase_0.11.1  
[29] stringr_1.5.1      htmlwidgets_1.6.4  pkgconfig_2.0.3    pillar_1.10.2     
[33] gtable_0.3.6       data.table_1.17.0  glue_1.8.0         Rcpp_1.0.14       
[37] xfun_0.52          tibble_3.2.1       tidyselect_1.2.1   knitr_1.50        
[41] farver_2.1.2       htmltools_0.5.8.1  labeling_0.4.3     rmarkdown_2.29    
[45] compiler_4.5.1