Background
If a trait is a composite of different sub-traits then is it possible to recover those sub-traits by identifying clustering variants into scores and then clustering scores?
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library (simulateGP)
n <- 100000
nclust <- 5
nsnp_per_trait <- 10
ntrait_per_clust <- 10
make_cluster <- function (n, nsnp_per_trait, ntrait_per_clust) {
G <- list ()
for (i in 1 : ntrait_per_clust) {
g <- matrix (rbinom (n * nsnp_per_trait, 2 , 0.3 ), nrow= n)
G[[i]] <- g
trait <- scale (g) %*% rnorm (nsnp_per_trait)
if (i == 1 ) {
trait_all <- trait
} else {
trait_all <- cbind (trait_all, trait)
}
}
G <- do.call (cbind, G)
return (list (G= G, T= trait_all))
}
make_disease <- function (n, nclust, nsnp_per_trait, ntrait_per_clust) {
L <- list ()
for (i in 1 : nclust) {
L[[i]] <- make_cluster (n, nsnp_per_trait, ntrait_per_clust)
}
T <- lapply (L, \(x) x$ T) %>% do.call (cbind, .)
D <- tibble (
liability = as.matrix (T) %*% rnorm (ncol (T)) %>% drop (),
disease = rbinom (n, 1 , plogis (liability))
)
G <- lapply (L, \(x) x$ G) %>% do.call (cbind, .)
return (list (G= G, T= T, D= D))
}
gwas <- function (y, g) {
(cov (y, g) / apply (g, 2 , var)) %>% drop ()
}
make_effects_matrix <- function (dat) {
o <- lapply (1 : ncol (dat$ T), \(i) {
simulateGP:: gwas (dat$ T[,i], dat$ G)$ fval
}) %>% do.call (cbind, .)
return (o)
}
dat <- make_disease (n, nclust, nsnp_per_trait, ntrait_per_clust= 1 )
effects_matrix <- make_effects_matrix (dat)
dim (effects_matrix)
dim (gwas (dat$ D$ disease, dat$ G))
effects_matrix[1 : 5 , 1 : 5 ]
o[1 : 5 , 1 : 5 ]
a <- princomp (sqrt (effects_matrix))
s <- a$ scores
s[s < 0 ] <- NA
plot (s[,1 ], s[,3 ], col= dat$ D$ disease+ 1 )
str (dat)
table (dat$ D$ disease)
R version 4.4.3 (2025-02-28)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.6.1
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/London
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] simulateGP_0.1.3 dplyr_1.1.4
loaded via a namespace (and not attached):
[1] digest_0.6.37 utf8_1.2.4 R6_2.5.1 fastmap_1.2.0
[5] tidyselect_1.2.1 xfun_0.48 magrittr_2.0.3 glue_1.8.0
[9] tibble_3.2.1 knitr_1.48 pkgconfig_2.0.3 htmltools_0.5.8.1
[13] rmarkdown_2.27 generics_0.1.3 lifecycle_1.0.4 cli_3.6.3
[17] fansi_1.0.6 vctrs_0.6.5 compiler_4.4.3 tools_4.4.3
[21] pillar_1.9.0 evaluate_1.0.1 yaml_2.3.10 rlang_1.1.4
[25] jsonlite_1.8.9 htmlwidgets_1.6.4