Gibran Hemani’s lab book - Power of sibling estimate of for rare variants

Background

Ascertaining families who have rare disease can improve power of detection because rare variant is segregating within the family. What about rare variants influencing trait where families are not ascertained?

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

make_families <- function(af, nfam) {
    nsnp <- length(af)
    dads <- matrix(0, nfam, nsnp)
    mums <- matrix(0, nfam, nsnp)
    sibs1 <- matrix(0, nfam, nsnp)
    sibs2 <- matrix(0, nfam, nsnp)
    ibd <- matrix(0, nfam, nsnp)
    ibs <- matrix(0, nfam, nsnp)
    for(i in 1:nsnp)
    {
        dad1 <- rbinom(nfam, 1, af[i]) + 1
        dad2 <- (rbinom(nfam, 1, af[i]) + 1) * -1
        mum1 <- rbinom(nfam, 1, af[i]) + 1
        mum2 <- (rbinom(nfam, 1, af[i]) + 1) * -1

        dadindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE)
        dadh <- rep(NA, nfam)
        dadh[dadindex] <- dad1[dadindex]
        dadh[!dadindex] <- dad2[!dadindex]

        mumindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE)
        mumh <- rep(NA, nfam)
        mumh[mumindex] <- mum1[mumindex]
        mumh[!mumindex] <- mum2[!mumindex]

        sib1 <- cbind(dadh, mumh)

        dadindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE)
        dadh <- rep(NA, nfam)
        dadh[dadindex] <- dad1[dadindex]
        dadh[!dadindex] <- dad2[!dadindex]

        mumindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE)
        mumh <- rep(NA, nfam)
        mumh[mumindex] <- mum1[mumindex]
        mumh[!mumindex] <- mum2[!mumindex]

        sib2 <- cbind(dadh, mumh)

        ibd[,i] <- (as.numeric(sib1[,1] == sib2[,1]) + as.numeric(sib1[,2] == sib2[,2])) / 2


        sibs1[,i] <- rowSums(abs(sib1) - 1)
        sibs2[,i] <- rowSums(abs(sib2) - 1)
        dads[,i] <- dad1 - 1 + abs(dad2) - 1
        mums[,i] <- mum1 - 1 + abs(mum2) - 1

        # l[[i]] <- (sum(sib1[,1] == sib2[,1]) / nsnp + sum(sib1[,2] == sib2[,2]) / nsnp) / 2

    }

    # This may not be correct - getting some really large values
    ibs <- scale(sibs1) * scale(sibs2)

    # Just count how many alleles are in common
    ibs_unw <- abs(abs(sibs1 - sibs2) - 2) / 2
    return(list(dads=dads, mums=mums, sibs1=sibs1, sibs2=sibs2, ibd=ibd, ibs=ibs, ibs_unw=ibs_unw))
}

makePhen <- function(effs, indep, vy=1, vx=rep(1, length(effs)), my=0) {
    if(is.null(dim(indep))) indep <- cbind(indep)
    stopifnot(ncol(indep) == length(effs))
    stopifnot(length(vx) == length(effs))
    cors <- effs * vx / sqrt(vx) / sqrt(vy)
    stopifnot(sum(cors^2) <= 1)
    cors <- c(cors, sqrt(1-sum(cors^2)))
    indep <- t(t(scale(cbind(indep, rnorm(nrow(indep))))) * cors * c(vx, 1))
    y <- drop(scale(rowSums(indep)) * sqrt(vy)) + my
    return(y)
}

chooseEffects <- function(nsnp, totvar, sqrt=TRUE) {
    eff <- rnorm(nsnp)
    aeff <- abs(eff)
    sc <- sum(aeff) / totvar
    out <- eff / sc
    if(sqrt)
    {
        out <- sqrt(abs(out)) * sign(out)
    }
    return(out)
}

make_phenotypes <- function(fam, eff_gx, eff_xy, vx, vy, mx, my) {
    lapply(fam, function(g)
    {
        u <- rnorm(nrow(g))
        x <- makePhen(c(eff_gx), cbind(g), vy=vx, my=mx)
        y <- makePhen(c(eff_xy), cbind(x), vy=vy, my=my)
        return(data.frame(x=x, y=y))
    })
}

join_populations <- function(l) {
    dads <- do.call(rbind, lapply(l, function(x) x$dads))
    mums <- do.call(rbind, lapply(l, function(x) x$mums))
    sibs1 <- do.call(rbind, lapply(l, function(x) x$sibs1))
    sibs2 <- do.call(rbind, lapply(l, function(x) x$sibs2))
    ibd <- do.call(rbind, lapply(l, function(x) x$ibd))
    ibs <- do.call(rbind, lapply(l, function(x) x$ibs))
    return(list(dads=dads, mums=mums, sibs1=sibs1, sibs2=sibs2, ibd=ibd, ibs=ibs))
}

sample_populations <- function(l, n) {
    x <- nrow(l$dads)
    index <- sort(sample(1:x, n, replace=FALSE))
    l$dads <- l$dads[index,]
    l$mums <- l$mums[index,]
    l$sibs1 <- l$sibs1[index,]
    l$sibs2 <- l$sibs2[index,]
    l$ibd <- l$ibd[index,]
    l$ibs <- l$ibs[index,]
    l$ibs_unw <- l$ibs_unw[index,]
    return(l)
}

makephen <- function(b, g, vx=1) {
    pred <- b * g
    e <- rnorm(length(g), 0, sqrt(1-var(pred)))
    return(pred+e)
}

# Dynastic effects
dynastic_phen <- function(fam, eff_gx, eff_xy, eff_ux, eff_uy, eff_xu) {
    n <- nrow(fam$sibs1)
    
    # parents x

    # umums <- rnorm(n)
    # udads <- rnorm(n)
    # xmums <- makePhen(c(eff_gx, eff_ux), cbind(fam$mums, umums))
    # xdads <- makePhen(c(eff_gx, eff_ux), cbind(fam$dads, udads))

    x1 <- makephen(eff_gx, fam$sibs1)
    x2 <- makephen(eff_gx, fam$sibs2)
    l <- list()
    # l$dads <- data.frame(x=xdads)
    # l$mums <- data.frame(x=xmums)
    l$sibs1 <- data.frame(x=x1)
    l$sibs2 <- data.frame(x=x2)
    return(l)
}

sibreg <- function(g1, g2, y1, y2) {
    d <- bind_rows(
        tibble(
            y=y1, fg=(g1+g2)/2, cg=g1-fg
        ),
        tibble(
            y=y2, fg=(g1+g2)/2, cg=g2-fg
        )
    )
    m <- summary(lm(y ~ cg + fg, data=d))$coef
    tibble(bhat=m[2,1], se=m[2,2], pval=m[2,4], n=nrow(d), method="sibreg")
}

popreg <- function(y, x) {
    index <- is.finite(y) & is.finite(x)
    n <- sum(index)
    y <- y[index]
    x <- x[index]
    vx <- var(x)
    bhat <- cov(y, x) / vx
    ahat <- mean(y) - bhat * mean(x)
    # fitted <- ahat + x * bhat
    # residuals <- y - fitted
    # SSR <- sum((residuals - mean(residuals))^2)
    # SSF <- sum((fitted - mean(fitted))^2)

    rsq <- (bhat * vx)^2 / (vx * var(y))
    fval <- rsq * (n-2) / (1-rsq)
    tval <- sqrt(fval)
    se <- abs(bhat / tval)

    # Fval <- (SSF) / (SSR/(n-2))
    # pval <- pf(Fval, 1, n-2, lowe=F)
    p <- pf(fval, 1, n-2, lowe=F)
    return(tibble(
        bhat=bhat, se=se, fval=fval, pval=p, n=n, method="popreg"
    ))
}

sibreg2 <- function(fam, phen) {
    sdiffgx <- fam$sibs1[,1] - fam$sibs2[,1]
    sdiffx <- phen$sibs1$x - phen$sibs2$x
    popreg(sdiffx, sdiffgx) %>% mutate(method="sibreg")
}

reg <- function(f, p) {
    # sibreg(f$sibs1[,1], f$sibs2[,1], p$sibs1$x, p$sibs2$x)
    bind_rows(
        sibreg2(f, p),
        popreg(p$sibs1[,1], f$sibs1[,1])
    )
}


r2 <- 0.001
param <- tibble(
    af=seq(0.0001, 0.01, by=0.00005),
    vg=r2,
    varg=2*af*(1-af),
    b=sqrt(r2/varg)
)

res <- lapply(1:nrow(param), \(i) {
    f <- make_families(param$af[i], 500000)
    p <- dynastic_phen(f, param$b[i], 0, 0, 0, 0)
    r <- reg(f, p)
    r$af=param$af[i]
    return(r)
}) %>% bind_rows()

library(ggplot2)
res %>% group_by(af) %>%
    summarise(fvalratio=fval[1]/fval[2]) %>%
ggplot(., aes(x=af, y=fvalratio)) +
geom_point()

Summary

Power ratio does not change by allele frequency

sessionInfo()

R version 4.3.2 (2023-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.6

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_3.4.2 dplyr_1.1.4  

loaded via a namespace (and not attached):
 [1] vctrs_0.6.4       cli_3.6.1         knitr_1.45        rlang_1.1.2      
 [5] xfun_0.41         generics_0.1.3    jsonlite_1.8.7    labeling_0.4.2   
 [9] glue_1.6.2        colorspace_2.1-0  htmltools_0.5.7   scales_1.2.1     
[13] fansi_1.0.5       rmarkdown_2.25    grid_4.3.2        munsell_0.5.0    
[17] evaluate_0.23     tibble_3.2.1      fastmap_1.1.1     yaml_2.3.7       
[21] lifecycle_1.0.4   compiler_4.3.2    htmlwidgets_1.6.3 pkgconfig_2.0.3  
[25] farver_2.1.1      digest_0.6.33     R6_2.5.1          tidyselect_1.2.0 
[29] utf8_1.2.4        pillar_1.9.0      magrittr_2.0.3    withr_2.5.2      
[33] tools_4.3.2       gtable_0.3.3