Estimating environmental factor using GxE interaction

Author

Gibran Hemani

Published

January 21, 2025

Background

Is it possible to estimate the environmental factor using the GxE interaction term? In this model the genetic effect is masked unless individuals experience a particular environment. Stratifying individuals by different levels of environmental exposure will give different marginal genetic effects. Can the influence of the environment be recovered from this?

Initial simulation

library(purrr)
library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
fn <- function(n, bap, bm, bi, sim=1) {
    args <- list(n=n, bap=bap, bm=bm, bi=bi, sim=sim) %>% as_tibble()
    ap <- rbinom(n, 2, 0.5) / 2
    g <- rbinom(n, 2, 0.3)

    l <- ap * bap + g * ap * bi + rnorm(n)
    copd <- rbinom(n, 1, plogis(l))
    bhat <- glm(copd ~ g * ap, family="binomial")$coef[4]
    args$bhat <- bhat
    return(args)
}
fn(100000, 0.3, 0.1, 0.5)
# A tibble: 1 × 6
       n   bap    bm    bi   sim  bhat
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 100000   0.3   0.1   0.5     1 0.439
param <- expand.grid(
    n=100000,
    bap=c(0.1, 0.3, 0.5),
    bm=c(0, 0.1),
    bi=c(0.1, 0.3, 0.5),
    sim=1:5
)
res <- pmap(param, fn)
res %>% 
    bind_rows() %>%
    ggplot(aes(bap, bhat, color=factor(bi))) +
    geom_point()

Untidy sims:

library(dplyr)
library(simulateGP)

n <- 10000
G <- correlated_binomial(n, 0.3, 0.1, 0.5)
dim(G)
cor(G)

lm(G[,1] ~ G[,2])

x <- G[,1] + rnorm(n)

lm(x ~ G[,2])

cor(G) * (sd(G[,1]) / sd(G[,2]))







n <- 100000
ap <- rbinom(n, 2, 0.5) / 2
g <- rbinom(n, 2, 0.3)

b <- 1.2

l <- ap * b + g + g * ap + rnorm(n)
copd <- rbinom(n, 1, plogis(l))

b0 <- glm(copd ~ g, family="binomial", subset=ap==0)$coef[2]
b1 <- glm(copd ~ g, family="binomial", subset=ap==1)$coef[2]
b2 <- glm(copd ~ g, family="binomial", subset=ap==2)$coef[2]

b0
b1
b2

b2 <- glm(copd ~ g * ap, family="binomial")


fn <- function(n, bap, bm, bi, sim=1) {
    args <- list(n=n, bap=bap, bm=bm, bi=bi, sim=sim) %>% as_tibble()
    ap <- rbinom(n, 2, 0.5) / 2
    g <- rbinom(n, 2, 0.3)

    l <- ap * bap + g * ap * bi + rnorm(n)
    copd <- rbinom(n, 1, plogis(l))
    bhat <- glm(copd ~ g * ap, family="binomial")$coef[4]
    args$bhat <- bhat
    return(args)
}

param <- expand.grid(
    n=100000,
    bap=c(0.1, 0.3, 0.5),
    bm=c(0, 0.1),
    bi=c(0.1, 0.3, 0.5),
    sim=1:5
)

fn(100000, 0.3, 0.1, 0.5)

library(purrr)
res <- pmap(param, fn)


library(ggplot2)

res %>% 
    bind_rows() %>%
    ggplot(aes(bap, bhat, color=factor(bi))) +
    geom_point()

sessionInfo()
R version 4.4.3 (2025-02-28)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.6.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] dplyr_1.1.4   ggplot2_3.5.1 purrr_1.0.2  

loaded via a namespace (and not attached):
 [1] vctrs_0.6.5       cli_3.6.3         knitr_1.48        rlang_1.1.4      
 [5] xfun_0.48         generics_0.1.3    jsonlite_1.8.9    labeling_0.4.3   
 [9] glue_1.8.0        colorspace_2.1-1  htmltools_0.5.8.1 scales_1.3.0     
[13] fansi_1.0.6       rmarkdown_2.27    grid_4.4.3        evaluate_1.0.1   
[17] munsell_0.5.1     tibble_3.2.1      fastmap_1.2.0     yaml_2.3.10      
[21] lifecycle_1.0.4   compiler_4.4.3    htmlwidgets_1.6.4 pkgconfig_2.0.3  
[25] farver_2.1.2      digest_0.6.37     R6_2.5.1          tidyselect_1.2.1 
[29] utf8_1.2.4        pillar_1.9.0      magrittr_2.0.3    withr_3.0.2      
[33] tools_4.4.3       gtable_0.3.6