Background
After discussion with Katy Peichel about MR for natural selection in an experimental situation.
9k three-spine stickleback fish populated 10 lakes, and in three is now several hundred thousand. How many of the founders actually contribute to the gene pool after a few generations?
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Attaching package: 'vctrs'
The following object is masked from 'package:dplyr':
data_frame
founder_population <- function (n1, n2) {
bind_rows (
tibble (iid= paste0 ("f0_" , 1 : n1), fid= iid, sex= 1 ),
tibble (iid= paste0 ("f0_" , (n1+ 1 ): n2), fid= iid, sex= 2 )
) %>% mutate (generation= 0 )
}
sample_nchildren <- function (n) {
rpois (n, 10 )^ 2
}
simulate_children <- function (founder, pdeath) {
lastgen <- strsplit (founder$ iid[1 ], "_" )[[1 ]][1 ] %>% gsub ("f" , "" , .) %>% as.numeric ()
thisgen <- lastgen + 1
p <- founder %>%
mutate (death = rbinom (n (), 1 , pdeath)) %>%
filter (death == 0 )
n <- round (nrow (p)/ 2 )
founder$ i <- 1 : nrow (founder)
ind1 <- subset (founder, sex== 1 )$ i %>% {sample (., n, replace= TRUE )}
ind2 <- subset (founder, sex== 2 )$ i %>% {sample (., n, replace= TRUE )}
parents <- tibble (
dad= founder$ iid[ind1],
mum= founder$ iid[ind2],
fid= paste (founder$ fid[ind1], founder$ fid[ind2]),
nchildren= sample_nchildren (n)
)
ind <- vec_rep_each (1 : n, parents$ nchildren)
children <- tibble (
iid = paste0 ("f" , thisgen, "_" , 1 : length (ind)),
mum = parents$ mum[ind],
dad = parents$ dad[ind],
fid = parents$ fid[ind],
sex = rbinom (length (dad), 1 , 0.5 ) + 1 ,
generation= thisgen
)
return (children)
}
Generate founder population
a <- founder_population (4500 , 4500 )
b <- simulate_children (a, 0.1 )
b1 <- simulate_children (b, 0.9 )
dim (b)
Do subsequent generations
l <- list ()
l[[1 ]] <- founder_population (4500 , 4500 )
for (i in 2 : 4 ) {
l[[i]] <- simulate_children (l[[i-1 ]], 0.95 )
print (dim (l[[i]]))
}
[1] 11864 6
[1] 29805 6
[1] 82869 6
dat <- bind_rows (l)
dim (dat)
[1] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[2] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[3] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[4] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[5] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
[6] "f0_1141 f0_4500 f0_122 f0_4501 f0_448 f0_4501 f0_3351 f0_4500"
# A tibble: 6 × 6
iid fid sex generation mum dad
<chr> <chr> <dbl> <dbl> <chr> <chr>
1 f0_1 f0_1 1 0 <NA> <NA>
2 f0_2 f0_2 1 0 <NA> <NA>
3 f0_3 f0_3 1 0 <NA> <NA>
4 f0_4 f0_4 1 0 <NA> <NA>
5 f0_5 f0_5 1 0 <NA> <NA>
6 f0_6 f0_6 1 0 <NA> <NA>
How many founders per generation?
nfounders_per_generation <- function (dat) {
group_by (dat, generation) %>%
do ({
tibble (founder = unlist (strsplit (.$ fid, " " )))
}) %>%
ungroup () %>%
group_by (generation, founder) %>%
summarise (n= n ())
}
counts <- nfounders_per_generation (dat)
`summarise()` has grouped output by 'generation'. You can override using the
`.groups` argument.
# A tibble: 4,841 × 3
# Groups: generation [4]
generation founder n
<dbl> <chr> <int>
1 0 f0_1 1
2 0 f0_10 1
3 0 f0_100 1
4 0 f0_1000 1
5 0 f0_1001 1
6 0 f0_1002 1
7 0 f0_1003 1
8 0 f0_1004 1
9 0 f0_1005 1
10 0 f0_1006 1
# ℹ 4,831 more rows
hist (counts$ n, breaks= 100 )
1 2 16 25 36 49 64 81 100 117 121
4500 1 1 2 13 15 15 17 13 1 22
128 144 145 164 166 169 170 174 179 181 195
1 10 1 1 1 6 1 1 1 1 1
196 202 219 221 225 242 250 256 260 270 276
5 1 1 2 3 3 2 2 1 1 1
289 290 300 302 345 351 355 357 362 363 365
2 1 2 1 1 1 1 1 1 1 1
369 371 373 374 375 377 382 388 389 390 394
1 1 1 1 1 1 1 1 1 2 1
396 400 415 417 427 433 452 477 484 485 491
1 1 1 1 1 1 1 1 1 1 1
494 495 510 512 523 528 546 561 563 564 565
1 1 1 1 1 1 2 2 1 1 1
569 594 603 607 608 611 617 637 642 668 673
1 1 1 1 1 1 1 1 1 1 1
680 687 696 707 708 722 757 758 784 785 786
1 1 1 1 1 1 1 1 1 1 2
790 792 798 846 848 852 884 969 977 983 999
1 1 1 1 1 1 1 1 1 1 1
1025 1028 1036 1046 1055 1079 1082 1101 1106 1146 1221
1 1 1 1 1 1 1 1 1 1 1
1230 1242 1270 1329 1403 1443 1465 1466 1471 1507 1517
1 1 1 1 1 1 1 1 1 1 1
1518 1544 1593 1610 1714 1731 1741 1787 1798 1811 1829
1 1 1 1 1 1 1 1 1 1 1
1844 1854 1859 1866 1869 1973 1988 2029 2055 2092 2166
1 1 1 1 1 1 1 1 1 1 1
2201 2274 2285 2293 2303 2379 2466 2479 2565 2583 2657
1 1 1 1 1 1 1 1 1 1 1
2674 2752 2800 2807 2905 2921 3057 3096 3100 3190 3313
1 1 1 1 1 1 1 1 1 1 1
3381 3460 3463 3533 3535 3546 3682 3707 3931 3963 4144
1 1 1 1 1 1 1 1 1 1 1
4207 4244 4270 4347 4449 4752 4785 4826 4925 5008 5036
1 1 1 1 1 1 1 1 1 1 1
5265 5284 5310 5525 5587 5593 6014 6339 6897 7105 7300
1 1 1 1 1 1 1 1 1 1 1
7362 7376 7548 8442 8494 9081 9643 10005 29685 29925 161730
1 1 1 1 1 1 1 1 1 1 1
169746
1
Very few original pairings are represented by generation 3
R version 4.3.2 (2023-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
time zone: Europe/London
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] vctrs_0.6.4 dplyr_1.1.4
loaded via a namespace (and not attached):
[1] digest_0.6.33 utf8_1.2.4 R6_2.5.1 fastmap_1.1.1
[5] tidyselect_1.2.0 xfun_0.41 magrittr_2.0.3 glue_1.6.2
[9] tibble_3.2.1 knitr_1.45 pkgconfig_2.0.3 htmltools_0.5.7
[13] rmarkdown_2.25 generics_0.1.3 lifecycle_1.0.4 cli_3.6.1
[17] fansi_1.0.5 withr_2.5.2 compiler_4.3.2 tools_4.3.2
[21] pillar_1.9.0 evaluate_0.23 yaml_2.3.7 rlang_1.1.2
[25] jsonlite_1.8.7 htmlwidgets_1.6.3