Gibran Hemani’s lab book - Founders in population expansion

Background

After discussion with Katy Peichel about MR for natural selection in an experimental situation.

9k three-spine stickleback fish populated 10 lakes, and in three is now several hundred thousand. How many of the founders actually contribute to the gene pool after a few generations?

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(vctrs)


Attaching package: 'vctrs'

The following object is masked from 'package:dplyr':

    data_frame

founder_population <- function(n1, n2) {
    bind_rows(
        tibble(iid=paste0("f0_", 1:n1), fid=iid, sex=1),
        tibble(iid=paste0("f0_", (n1+1):n2), fid=iid, sex=2)
    ) %>% mutate(generation=0)
}

sample_nchildren <- function(n) {
    rpois(n, 10)^2
}

simulate_children <- function(founder, pdeath) {
    lastgen <- strsplit(founder$iid[1], "_")[[1]][1] %>% gsub("f", "", .) %>% as.numeric()
    thisgen <- lastgen + 1
    p <- founder %>%
        mutate(death = rbinom(n(), 1, pdeath)) %>%
        filter(death == 0)
    n <- round(nrow(p)/2)
    founder$i <- 1:nrow(founder)
    ind1 <- subset(founder, sex==1)$i %>% {sample(., n, replace=TRUE)}
    ind2 <- subset(founder, sex==2)$i %>% {sample(., n, replace=TRUE)}
    parents <- tibble(
        dad=founder$iid[ind1],
        mum=founder$iid[ind2],
        fid=paste(founder$fid[ind1], founder$fid[ind2]),
        nchildren=sample_nchildren(n)
    )
    ind <- vec_rep_each(1:n, parents$nchildren)
    children <- tibble(
        iid = paste0("f", thisgen, "_", 1:length(ind)),
        mum = parents$mum[ind],
        dad = parents$dad[ind],
        fid = parents$fid[ind],
        sex = rbinom(length(dad), 1, 0.5) + 1,
        generation=thisgen
    )
    return(children)
}

Generate founder population

a <- founder_population(4500, 4500)
b <- simulate_children(a, 0.1)
b1 <- simulate_children(b, 0.9)

dim(b)

[1] 216444      6

dim(b1)

[1] 1183435       6

Do subsequent generations

l <- list()
l[[1]] <- founder_population(4500, 4500)
for(i in 2:4) {
    l[[i]] <- simulate_children(l[[i-1]], 0.95)
    print(dim(l[[i]]))
}

[1] 11864     6
[1] 29805     6
[1] 82869     6

dat <- bind_rows(l)
dim(dat)

[1] 129040      6

tail(dat$fid)

[1] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[2] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[3] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[4] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[5] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[6] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"

head(dat)

# A tibble: 6 × 6
  iid   fid     sex generation mum   dad  
  <chr> <chr> <dbl>      <dbl> <chr> <chr>
1 f0_1  f0_1      1          0 <NA>  <NA> 
2 f0_2  f0_2      1          0 <NA>  <NA> 
3 f0_3  f0_3      1          0 <NA>  <NA> 
4 f0_4  f0_4      1          0 <NA>  <NA> 
5 f0_5  f0_5      1          0 <NA>  <NA> 
6 f0_6  f0_6      1          0 <NA>  <NA>

How many founders per generation?

nfounders_per_generation <- function(dat) {
    group_by(dat, generation) %>%
        do({
            tibble(founder = unlist(strsplit(.$fid, " ")))
        }) %>%
        ungroup() %>%
        group_by(generation, founder) %>%
            summarise(n=n())
}

counts <- nfounders_per_generation(dat)

`summarise()` has grouped output by 'generation'. You can override using the
`.groups` argument.

counts

# A tibble: 4,841 × 3
# Groups:   generation [4]
   generation founder     n
        <dbl> <chr>   <int>
 1          0 f0_1        1
 2          0 f0_10       1
 3          0 f0_100      1
 4          0 f0_1000     1
 5          0 f0_1001     1
 6          0 f0_1002     1
 7          0 f0_1003     1
 8          0 f0_1004     1
 9          0 f0_1005     1
10          0 f0_1006     1
# ℹ 4,831 more rows

table(counts$generation)


   0    1    2    3 
4501  116  112  112

hist(counts$n, breaks=100)

table(counts$n)


     1      2     16     25     36     49     64     81    100    117    121 
  4500      1      1      2     13     15     15     17     13      1     22 
   128    144    145    164    166    169    170    174    179    181    195 
     1     10      1      1      1      6      1      1      1      1      1 
   196    202    219    221    225    242    250    256    260    270    276 
     5      1      1      2      3      3      2      2      1      1      1 
   289    290    300    302    345    351    355    357    362    363    365 
     2      1      2      1      1      1      1      1      1      1      1 
   369    371    373    374    375    377    382    388    389    390    394 
     1      1      1      1      1      1      1      1      1      2      1 
   396    400    415    417    427    433    452    477    484    485    491 
     1      1      1      1      1      1      1      1      1      1      1 
   494    495    510    512    523    528    546    561    563    564    565 
     1      1      1      1      1      1      2      2      1      1      1 
   569    594    603    607    608    611    617    637    642    668    673 
     1      1      1      1      1      1      1      1      1      1      1 
   680    687    696    707    708    722    757    758    784    785    786 
     1      1      1      1      1      1      1      1      1      1      2 
   790    792    798    846    848    852    884    969    977    983    999 
     1      1      1      1      1      1      1      1      1      1      1 
  1025   1028   1036   1046   1055   1079   1082   1101   1106   1146   1221 
     1      1      1      1      1      1      1      1      1      1      1 
  1230   1242   1270   1329   1403   1443   1465   1466   1471   1507   1517 
     1      1      1      1      1      1      1      1      1      1      1 
  1518   1544   1593   1610   1714   1731   1741   1787   1798   1811   1829 
     1      1      1      1      1      1      1      1      1      1      1 
  1844   1854   1859   1866   1869   1973   1988   2029   2055   2092   2166 
     1      1      1      1      1      1      1      1      1      1      1 
  2201   2274   2285   2293   2303   2379   2466   2479   2565   2583   2657 
     1      1      1      1      1      1      1      1      1      1      1 
  2674   2752   2800   2807   2905   2921   3057   3096   3100   3190   3313 
     1      1      1      1      1      1      1      1      1      1      1 
  3381   3460   3463   3533   3535   3546   3682   3707   3931   3963   4144 
     1      1      1      1      1      1      1      1      1      1      1 
  4207   4244   4270   4347   4449   4752   4785   4826   4925   5008   5036 
     1      1      1      1      1      1      1      1      1      1      1 
  5265   5284   5310   5525   5587   5593   6014   6339   6897   7105   7300 
     1      1      1      1      1      1      1      1      1      1      1 
  7362   7376   7548   8442   8494   9081   9643  10005  29685  29925 161730 
     1      1      1      1      1      1      1      1      1      1      1 
169746 
     1

Very few original pairings are represented by generation 3

sessionInfo()

R version 4.3.2 (2023-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.6

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] vctrs_0.6.4 dplyr_1.1.4

loaded via a namespace (and not attached):
 [1] digest_0.6.33     utf8_1.2.4        R6_2.5.1          fastmap_1.1.1    
 [5] tidyselect_1.2.0  xfun_0.41         magrittr_2.0.3    glue_1.6.2       
 [9] tibble_3.2.1      knitr_1.45        pkgconfig_2.0.3   htmltools_0.5.7  
[13] rmarkdown_2.25    generics_0.1.3    lifecycle_1.0.4   cli_3.6.1        
[17] fansi_1.0.5       withr_2.5.2       compiler_4.3.2    tools_4.3.2      
[21] pillar_1.9.0      evaluate_0.23     yaml_2.3.7        rlang_1.1.2      
[25] jsonlite_1.8.7    htmlwidgets_1.6.3