library(PubmedPlot)
library(jsonlite)
library(dplyr)
library(XML)
term <- '"Mendelian randomisation" [Title] OR "Mendelian randomization" [Title]'
search_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
search_params <- list(
db = "pubmed",
term = term,
retmode = "json",
usehistory = "y",
retmax = 20000
)
search_response <- httr::GET(url = search_url, query = search_params)
search_content <- httr::content(search_response, "text")
search_result <- jsonlite::fromJSON(search_content)
pmids <- search_result$esearchresult$idlist
count <- search_result$esearchresult$count %>% as.numeric()
retmax <- search_result$esearchresult$retmax %>% as.numeric()
remainder <- count - retmax
if (remainder > 0) {
search_params$retstart <- retmax
search_response <- httr::GET(url = search_url, query = search_params)
search_content <- httr::content(search_response, "text")
search_result <- jsonlite::fromJSON(search_content)
pmids <- c(pmids, search_result$esearchresult$idlist)
}
# length(pmids)
efetch_url <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
# Prepare the body of the POST request for XML output
efetch_params <- list(
db = "pubmed",
id = paste(pmids, collapse = ","),
rettype = "abstract",
retmode = "xml"
)
# Query how many pmids
# Make the POST request to fetch abstracts
efetch_response <- httr::POST(url = efetch_url, body = efetch_params, encode = "form")
efetch_content <- httr::content(efetch_response, "text", encoding = "UTF-8")
# Parse the XML content
doc <- XML::xmlParse(efetch_content)
xmltop <- XML::xmlRoot(doc)
# xmlSize(xmltop)
# xmlName(xmltop[[1]][[1]][[1]])
# xmlValue(xmltop[[1]][[]][["PMID"]])
pub_dates <- xpathApply(doc, '//PubmedArticle', \(x) {
dplyr::tibble(
pmid = xmlValue(x[[1]][["PMID"]]),
ab = xmlValue(x[[1]][["Article"]][["Abstract"]]),
pub_date = lubridate::ymd(
paste(
xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Year"]]),
xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Month"]]),
xmlValue(x[["PubmedData"]][["History"]][["PubMedPubDate"]][["Day"]])
)
),
title = xmlValue(x[[1]][["Article"]][["ArticleTitle"]]),
journal_issn = xmlValue(x[[1]][["Article"]][["Journal"]][["ISSN"]]),
journal = xmlValue(x[[1]][["Article"]][["Journal"]][["Title"]]),
author_affil = xmlValue(x[[1]][["Article"]][["AuthorList"]][[1]][["AffiliationInfo"]])
)
}) %>% bind_rows()
jsonlite::write_json(pub_dates, path="pubmed.json", pretty = TRUE)