library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
<- function(x) {
fn # Remove commas from numbers
<- gsub("(\\d+),(?=\\d+)", "\\1", x, perl = TRUE) %>%
b # Split sample components based on ','
strsplit(", ") %>%
1]]}
{.[[# Keep components that contain the word 'cases'
<- grep("cases", b, value = TRUE)
b1 # If none, probably a continuous trait
if(length(b1) == 0) {
<- b
b1
}# Now just extract the numbers from each 'cases' sample component and sum them
suppressWarnings({
%>%
b1 sapply(., \(y) {
strsplit(y, " ") %>%
unlist() %>% as.numeric() %>% na.omit() %>% first() %>% as.numeric()
%>% sum(na.rm=TRUE)
})
})
}
# Example
<- "25,453 European ancestry cases, 58,113 European ancestry controls, 360 cases and controls, PMID:25056061, 6,524 cases, 24,001 controls"
x fn(x)
[1] 32337
<- "360 cases and controls"
x fn(x)
[1] 360