Code
cat command.txt | fold -w 80 -s | awk -F ' ' 'NR==1 { print "$", $0} NR>1 { print " " $0}' | sed 's/$/\\/' | sed '$ s/.$//'
$ nextflow run pgscatalog/pgsc_calc -profile test,singularity
See the online documentation for additional explanation of the terms and data presented in this report.
cat command.txt | fold -w 80 -s | awk -F ' ' 'NR==1 { print "$", $0} NR>1 { print " " $0}' | sed 's/$/\\/' | sed '$ s/.$//'
$ nextflow run pgscatalog/pgsc_calc -profile test,singularity
2.0.0
<- jsonlite::fromJSON(params$log_scorefiles, simplifyVector = FALSE)
json_list
<- function(trait_efo, mapped) {
link_traits if (length(trait_efo) == 0) {
return("")
else {
} return(purrr::map2_chr(trait_efo, mapped, ~ stringr::str_glue('<a href="http://www.ebi.ac.uk/efo/{.x}">{.y}</a>')))
}
}
<- function(x) {
extract_traits <- purrr::map(x, ~ extract_chr_handle_null(.x$header, "trait_efo"))
trait_efo <- purrr::map(x, ~ extract_chr_handle_null(.x$header, "trait_mapped"))
mapped <- purrr::map2(trait_efo, mapped, link_traits)
trait_display <- purrr::map_chr(trait_display, ~ paste(.x, collapse = "<br />"))
mapped_trait_links <- purrr::map(x, ~ extract_chr_handle_null(.x, "trait_reported"))
reported_traits ::map2_chr(reported_traits, mapped_trait_links, ~ {
purrr::str_glue("<u>Reported trait:</u> {.x} <br /> <u>Mapped trait(s):</u> {.y}")
stringr
})
}
<- function(x, field) {
extract_chr_handle_null return(replace(x[[field]], is.null(x[[field]]), ""))
}
<- function(id, link_type) {
link_pgscatalog if (id != "") {
return(stringr::str_glue('<a href="https://www.pgscatalog.org/{link_type}/{id}">{id}</a>'))
else {
} return(id)
}
}
<- function(id, note) {
add_note if (id != "") {
return(stringr::str_glue("{id} <br /> <small>{note}</small>"))
else {
} return(id)
}
}
<- function(original_build, harmonised_build) {
annotate_genome_build return(stringr::str_glue("<u>Original build:</u> {original_build} <br /> <u>Harmonised build:</u> {harmonised_build}"))
}
# extract fields from json list
tibble(
pgs_id = map_chr(json_list, "pgs_id"),
pgs_name = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "pgs_name")),
pgp_id = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "pgp_id")),
citation = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "citation")),
trait_display = extract_traits(json_list),
genome_build = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "genome_build")),
harmonised_build = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "HmPOS_build")),
n_variants = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "variants_number")),
compatible_effect_type = map_lgl(json_list, "compatible_effect_type"),
has_complex_alleles = map_lgl(json_list, "has_complex_alleles")) %>%
# add links to pgs catalog identifiers
mutate(pgs_id = purrr::map_chr(pgs_id, ~ link_pgscatalog(.x, "score")),
pgp_id = purrr::map_chr(pgp_id, ~ link_pgscatalog(.x, "publication"))) %>%
# add notes
mutate(pgp_id = purrr::map2_chr(pgp_id, citation, ~ add_note(.x, .y)),
pgs_id = purrr::map2_chr(pgs_id, pgs_name, ~ add_note(.x, .y)),
genome_build = purrr::map2_chr(genome_build, harmonised_build, ~ annotate_genome_build(.x, .y))) %>%
# pick columns
select(pgs_id, pgp_id, trait_display, n_variants, genome_build, has_complex_alleles, compatible_effect_type) -> scorefile_metadata
cat params.txt
keep_multiallelic: false
keep_ambiguous : false
min_overlap : 0.75
%>%
log_df mutate(match_status = forcats::fct_collapse(match_status, matched = "matched", other_level = "unmatched")) %>%
group_by(sampleset, accession, match_status, score_pass) %>%
count(wt = count) %>%
group_by(sampleset, accession) %>%
mutate(percent = round(n / sum(n) * 100, 1), n_variants = sum(n)) %>%
arrange(accession, desc(percent)) %>%
::pivot_wider(names_from = match_status, values_from = c(n, percent)) %>%
tidyrreplace(is.na(.), 0) -> compat
if (!"n_unmatched" %in% colnames(compat)) {
# handle missing column if all PGS matches perfectly (e.g. no unmatched or excluded variants)
<- compat %>%
compat mutate(n_unmatched = 0)
}
%>%
compat select(sampleset, accession, n_variants, score_pass, percent_matched,
%>%
n_matched, n_unmatched) mutate(score_pass = as.logical(score_pass)) %>%
::datatable(rownames = FALSE,
DTextensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = c('csv')),
colnames = c(
"Sampleset" = "sampleset",
"Scoring file" = "accession",
"Number of variants" = "n_variants",
"Passed matching" = "score_pass",
"Match %" = "percent_matched",
"Total matched" = "n_matched",
"Total unmatched" = "n_unmatched"
%>%
)) ::formatStyle('Scoring file',
DTvalueColumns = 'Passed matching',
backgroundColor = DT::styleEqual(c(FALSE, TRUE), c('#c2a5cf', '#a6dba0')))
2 scores for 2504 samples processed.
The summary density plots show up to six scoring files
All scores can be found in the results directory, at:
cineca/score/aggregated_scores.txt.gz
Samuel A. Lambert, Benjamin Wingfield, Joel T. Gibson, Laurent Gil, Santhi Ramachandran, Florent Yvon, Shirin Saverimuttu, Emily Tinsley, Elizabeth Lewis, Scott C. Ritchie, Jingqin Wu, Rodrigo Canovas, Aoife McMahon, Laura W. Harris, Helen Parkinson, Michael Inouye. Enhancing the Polygenic Score Catalog with tools for score calculation and ancestry normalization. Nature Genetics (2024) | doi: 10.1038/s41588-024-01937-x
For scores from the PGS Catalog, please remember to cite the original publications from which they came (these are listed in the metadata table).
# as of 2023-12-12 only non-default licenses are recorded in the scoring file header
<- "PGS obtained from the Catalog should be cited appropriately, and used in accordance with any licensing restrictions set by the authors. See EBI Terms of Use (https://www.ebi.ac.uk/about/terms-of-use/) for additional details."
default_ebi_terms
tibble(
pgs_id = map_chr(json_list, "pgs_id"),
license_text = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "license"))) %>%
mutate(license_text = ifelse(license_text == "", default_ebi_terms, license_text)) %>%
# display license terms for files in the PGS Catalog only (with a PGS ID)
filter(startsWith(pgs_id, "PGS")) %>%
::datatable(., colnames = c(
DT"PGS ID" = "pgs_id",
"License text" = "license_text"
))