Power of sibling estimate of for rare variants

Author

Gibran Hemani

Published

November 10, 2023

Background

Ascertaining families who have rare disease can improve power of detection because rare variant is segregating within the family. What about rare variants influencing trait where families are not ascertained?

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
make_families <- function(af, nfam) {
    nsnp <- length(af)
    dads <- matrix(0, nfam, nsnp)
    mums <- matrix(0, nfam, nsnp)
    sibs1 <- matrix(0, nfam, nsnp)
    sibs2 <- matrix(0, nfam, nsnp)
    ibd <- matrix(0, nfam, nsnp)
    ibs <- matrix(0, nfam, nsnp)
    for(i in 1:nsnp)
    {
        dad1 <- rbinom(nfam, 1, af[i]) + 1
        dad2 <- (rbinom(nfam, 1, af[i]) + 1) * -1
        mum1 <- rbinom(nfam, 1, af[i]) + 1
        mum2 <- (rbinom(nfam, 1, af[i]) + 1) * -1

        dadindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE)
        dadh <- rep(NA, nfam)
        dadh[dadindex] <- dad1[dadindex]
        dadh[!dadindex] <- dad2[!dadindex]

        mumindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE)
        mumh <- rep(NA, nfam)
        mumh[mumindex] <- mum1[mumindex]
        mumh[!mumindex] <- mum2[!mumindex]

        sib1 <- cbind(dadh, mumh)

        dadindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE)
        dadh <- rep(NA, nfam)
        dadh[dadindex] <- dad1[dadindex]
        dadh[!dadindex] <- dad2[!dadindex]

        mumindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE)
        mumh <- rep(NA, nfam)
        mumh[mumindex] <- mum1[mumindex]
        mumh[!mumindex] <- mum2[!mumindex]

        sib2 <- cbind(dadh, mumh)

        ibd[,i] <- (as.numeric(sib1[,1] == sib2[,1]) + as.numeric(sib1[,2] == sib2[,2])) / 2


        sibs1[,i] <- rowSums(abs(sib1) - 1)
        sibs2[,i] <- rowSums(abs(sib2) - 1)
        dads[,i] <- dad1 - 1 + abs(dad2) - 1
        mums[,i] <- mum1 - 1 + abs(mum2) - 1

        # l[[i]] <- (sum(sib1[,1] == sib2[,1]) / nsnp + sum(sib1[,2] == sib2[,2]) / nsnp) / 2

    }

    # This may not be correct - getting some really large values
    ibs <- scale(sibs1) * scale(sibs2)

    # Just count how many alleles are in common
    ibs_unw <- abs(abs(sibs1 - sibs2) - 2) / 2
    return(list(dads=dads, mums=mums, sibs1=sibs1, sibs2=sibs2, ibd=ibd, ibs=ibs, ibs_unw=ibs_unw))
}

makePhen <- function(effs, indep, vy=1, vx=rep(1, length(effs)), my=0) {
    if(is.null(dim(indep))) indep <- cbind(indep)
    stopifnot(ncol(indep) == length(effs))
    stopifnot(length(vx) == length(effs))
    cors <- effs * vx / sqrt(vx) / sqrt(vy)
    stopifnot(sum(cors^2) <= 1)
    cors <- c(cors, sqrt(1-sum(cors^2)))
    indep <- t(t(scale(cbind(indep, rnorm(nrow(indep))))) * cors * c(vx, 1))
    y <- drop(scale(rowSums(indep)) * sqrt(vy)) + my
    return(y)
}

chooseEffects <- function(nsnp, totvar, sqrt=TRUE) {
    eff <- rnorm(nsnp)
    aeff <- abs(eff)
    sc <- sum(aeff) / totvar
    out <- eff / sc
    if(sqrt)
    {
        out <- sqrt(abs(out)) * sign(out)
    }
    return(out)
}

make_phenotypes <- function(fam, eff_gx, eff_xy, vx, vy, mx, my) {
    lapply(fam, function(g)
    {
        u <- rnorm(nrow(g))
        x <- makePhen(c(eff_gx), cbind(g), vy=vx, my=mx)
        y <- makePhen(c(eff_xy), cbind(x), vy=vy, my=my)
        return(data.frame(x=x, y=y))
    })
}

join_populations <- function(l) {
    dads <- do.call(rbind, lapply(l, function(x) x$dads))
    mums <- do.call(rbind, lapply(l, function(x) x$mums))
    sibs1 <- do.call(rbind, lapply(l, function(x) x$sibs1))
    sibs2 <- do.call(rbind, lapply(l, function(x) x$sibs2))
    ibd <- do.call(rbind, lapply(l, function(x) x$ibd))
    ibs <- do.call(rbind, lapply(l, function(x) x$ibs))
    return(list(dads=dads, mums=mums, sibs1=sibs1, sibs2=sibs2, ibd=ibd, ibs=ibs))
}

sample_populations <- function(l, n) {
    x <- nrow(l$dads)
    index <- sort(sample(1:x, n, replace=FALSE))
    l$dads <- l$dads[index,]
    l$mums <- l$mums[index,]
    l$sibs1 <- l$sibs1[index,]
    l$sibs2 <- l$sibs2[index,]
    l$ibd <- l$ibd[index,]
    l$ibs <- l$ibs[index,]
    l$ibs_unw <- l$ibs_unw[index,]
    return(l)
}

makephen <- function(b, g, vx=1) {
    pred <- b * g
    e <- rnorm(length(g), 0, sqrt(1-var(pred)))
    return(pred+e)
}

# Dynastic effects
dynastic_phen <- function(fam, eff_gx, eff_xy, eff_ux, eff_uy, eff_xu) {
    n <- nrow(fam$sibs1)
    
    # parents x

    # umums <- rnorm(n)
    # udads <- rnorm(n)
    # xmums <- makePhen(c(eff_gx, eff_ux), cbind(fam$mums, umums))
    # xdads <- makePhen(c(eff_gx, eff_ux), cbind(fam$dads, udads))

    x1 <- makephen(eff_gx, fam$sibs1)
    x2 <- makephen(eff_gx, fam$sibs2)
    l <- list()
    # l$dads <- data.frame(x=xdads)
    # l$mums <- data.frame(x=xmums)
    l$sibs1 <- data.frame(x=x1)
    l$sibs2 <- data.frame(x=x2)
    return(l)
}

sibreg <- function(g1, g2, y1, y2) {
    d <- bind_rows(
        tibble(
            y=y1, fg=(g1+g2)/2, cg=g1-fg
        ),
        tibble(
            y=y2, fg=(g1+g2)/2, cg=g2-fg
        )
    )
    m <- summary(lm(y ~ cg + fg, data=d))$coef
    tibble(bhat=m[2,1], se=m[2,2], pval=m[2,4], n=nrow(d), method="sibreg")
}

popreg <- function(y, x) {
    index <- is.finite(y) & is.finite(x)
    n <- sum(index)
    y <- y[index]
    x <- x[index]
    vx <- var(x)
    bhat <- cov(y, x) / vx
    ahat <- mean(y) - bhat * mean(x)
    # fitted <- ahat + x * bhat
    # residuals <- y - fitted
    # SSR <- sum((residuals - mean(residuals))^2)
    # SSF <- sum((fitted - mean(fitted))^2)

    rsq <- (bhat * vx)^2 / (vx * var(y))
    fval <- rsq * (n-2) / (1-rsq)
    tval <- sqrt(fval)
    se <- abs(bhat / tval)

    # Fval <- (SSF) / (SSR/(n-2))
    # pval <- pf(Fval, 1, n-2, lowe=F)
    p <- pf(fval, 1, n-2, lowe=F)
    return(tibble(
        bhat=bhat, se=se, fval=fval, pval=p, n=n, method="popreg"
    ))
}

sibreg2 <- function(fam, phen) {
    sdiffgx <- fam$sibs1[,1] - fam$sibs2[,1]
    sdiffx <- phen$sibs1$x - phen$sibs2$x
    popreg(sdiffx, sdiffgx) %>% mutate(method="sibreg")
}

reg <- function(f, p) {
    # sibreg(f$sibs1[,1], f$sibs2[,1], p$sibs1$x, p$sibs2$x)
    bind_rows(
        sibreg2(f, p),
        popreg(p$sibs1[,1], f$sibs1[,1])
    )
}


r2 <- 0.001
param <- tibble(
    af=seq(0.0001, 0.01, by=0.00005),
    vg=r2,
    varg=2*af*(1-af),
    b=sqrt(r2/varg)
)

res <- lapply(1:nrow(param), \(i) {
    f <- make_families(param$af[i], 500000)
    p <- dynastic_phen(f, param$b[i], 0, 0, 0, 0)
    r <- reg(f, p)
    r$af=param$af[i]
    return(r)
}) %>% bind_rows()
library(ggplot2)
res %>% group_by(af) %>%
    summarise(fvalratio=fval[1]/fval[2]) %>%
ggplot(., aes(x=af, y=fvalratio)) +
geom_point()

Summary

  • Power ratio does not change by allele frequency

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.6

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_3.4.2 dplyr_1.1.4  

loaded via a namespace (and not attached):
 [1] vctrs_0.6.4       cli_3.6.1         knitr_1.45        rlang_1.1.2      
 [5] xfun_0.41         generics_0.1.3    jsonlite_1.8.7    labeling_0.4.2   
 [9] glue_1.6.2        colorspace_2.1-0  htmltools_0.5.7   scales_1.2.1     
[13] fansi_1.0.5       rmarkdown_2.25    grid_4.3.2        munsell_0.5.0    
[17] evaluate_0.23     tibble_3.2.1      fastmap_1.1.1     yaml_2.3.7       
[21] lifecycle_1.0.4   compiler_4.3.2    htmlwidgets_1.6.3 pkgconfig_2.0.3  
[25] farver_2.1.1      digest_0.6.33     R6_2.5.1          tidyselect_1.2.0 
[29] utf8_1.2.4        pillar_1.9.0      magrittr_2.0.3    withr_2.5.2      
[33] tools_4.3.2       gtable_0.3.3