PGS Catalog Calculator (pgsc_calc) report

Author

PGS Catalog Calculator (pgsc_calc)

Published

November 27, 2024

Note

See the online documentation for additional explanation of the terms and data presented in this report.

Workflow metadata

Command

Code
cat command.txt | fold -w 80 -s | awk -F ' ' 'NR==1 { print "$", $0} NR>1 { print "    " $0}' | sed 's/$/\\/' | sed '$ s/.$//' 
$ nextflow run pgscatalog/pgsc_calc -profile test,singularity
Tip
  • If you’re using the test profile, this report and these results are not biologically meaningful
  • The test profile is only used to check that all software is installed and working correctly
  • If you’re reading this message, then that means everything is OK and you’re ready to use your own data!

Version

2.0.0

Scoring file metadata

Scoring file summary

Code
json_list <- jsonlite::fromJSON(params$log_scorefiles, simplifyVector = FALSE)

link_traits <- function(trait_efo, mapped) {
  if (length(trait_efo) == 0) {
    return("")
  } else {
    return(purrr::map2_chr(trait_efo, mapped, ~ stringr::str_glue('<a href="http://www.ebi.ac.uk/efo/{.x}">{.y}</a>')))
  }
}

extract_traits <- function(x) {
  trait_efo <- purrr::map(x, ~ extract_chr_handle_null(.x$header, "trait_efo"))
  mapped <- purrr::map(x, ~ extract_chr_handle_null(.x$header, "trait_mapped"))
  trait_display <- purrr::map2(trait_efo, mapped, link_traits)
  mapped_trait_links <- purrr::map_chr(trait_display, ~ paste(.x, collapse = "<br />"))
  reported_traits <- purrr::map(x, ~ extract_chr_handle_null(.x, "trait_reported"))
  purrr::map2_chr(reported_traits, mapped_trait_links, ~ {
    stringr::str_glue("<u>Reported trait:</u> {.x} <br /> <u>Mapped trait(s):</u> {.y}")
  })
}

extract_chr_handle_null <- function(x, field) {
  return(replace(x[[field]], is.null(x[[field]]), ""))
}

link_pgscatalog <- function(id, link_type) {
  if (id != "") {
    return(stringr::str_glue('<a href="https://www.pgscatalog.org/{link_type}/{id}">{id}</a>'))
  } else {
    return(id)
  }
}

add_note <- function(id, note) {
  if (id != "") {
    return(stringr::str_glue("{id} <br /> <small>{note}</small>"))
  } else {
    return(id)
  }
}

annotate_genome_build <- function(original_build, harmonised_build) {
  return(stringr::str_glue("<u>Original build:</u> {original_build} <br /> <u>Harmonised build:</u> {harmonised_build}"))
}

# extract fields from json list
tibble(
  pgs_id = map_chr(json_list, "pgs_id"),
  pgs_name = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "pgs_name")),
  pgp_id = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "pgp_id")),
  citation = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "citation")),
  trait_display = extract_traits(json_list),
  genome_build = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "genome_build")),
  harmonised_build = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "HmPOS_build")),
  n_variants = purrr::map_chr(json_list, ~ extract_chr_handle_null(.x$header, "variants_number")),
  compatible_effect_type = map_lgl(json_list, "compatible_effect_type"),
  has_complex_alleles = map_lgl(json_list, "has_complex_alleles")) %>%
  # add links to pgs catalog identifiers
  mutate(pgs_id = purrr::map_chr(pgs_id, ~ link_pgscatalog(.x, "score")),
         pgp_id = purrr::map_chr(pgp_id, ~ link_pgscatalog(.x, "publication"))) %>%
  # add notes
  mutate(pgp_id = purrr::map2_chr(pgp_id, citation, ~ add_note(.x, .y)),
         pgs_id = purrr::map2_chr(pgs_id, pgs_name, ~ add_note(.x, .y)),
         genome_build = purrr::map2_chr(genome_build, harmonised_build, ~ annotate_genome_build(.x, .y))) %>% 
  # pick columns
  select(pgs_id, pgp_id, trait_display, n_variants, genome_build, has_complex_alleles, compatible_effect_type) -> scorefile_metadata

Variant matching

Parameters

Code
cat params.txt
keep_multiallelic: false
keep_ambiguous   : false
min_overlap      : 0.75

Summary

Code
log_df %>%
  mutate(match_status = forcats::fct_collapse(match_status, matched = "matched", other_level = "unmatched")) %>%
  group_by(sampleset, accession, match_status, score_pass) %>%
  count(wt = count) %>%
  group_by(sampleset, accession) %>%
  mutate(percent = round(n / sum(n) * 100, 1), n_variants = sum(n)) %>%
  arrange(accession, desc(percent)) %>%
  tidyr::pivot_wider(names_from = match_status, values_from = c(n, percent)) %>%
  replace(is.na(.), 0) -> compat
Code
if (!"n_unmatched" %in% colnames(compat)) {
  # handle missing column if all PGS matches perfectly (e.g. no unmatched or excluded variants)
  compat <- compat %>%
    mutate(n_unmatched = 0) 
}

compat %>%
  select(sampleset, accession, n_variants, score_pass, percent_matched,
         n_matched, n_unmatched) %>%
  mutate(score_pass = as.logical(score_pass)) %>%
  DT::datatable(rownames = FALSE,
                extensions = 'Buttons',
    options = list(dom = 'Bfrtip',
                   buttons = c('csv')),
    colnames = c(
      "Sampleset" = "sampleset",
      "Scoring file" = "accession",
      "Number of variants" = "n_variants",
      "Passed matching" = "score_pass",
      "Match %" = "percent_matched",
      "Total matched" = "n_matched",
      "Total unmatched" = "n_unmatched"
    )) %>%
  DT::formatStyle('Scoring file', 
                  valueColumns = 'Passed matching',
                  backgroundColor = DT::styleEqual(c(FALSE, TRUE), c('#c2a5cf', '#a6dba0')))

Detailed log

Scores

Success
  • All requested scores were calculated successfully

2 scores for 2504 samples processed.

Score data

Density plot(s)

Note

The summary density plots show up to six scoring files

Get all scores

All scores can be found in the results directory, at:

cineca/score/aggregated_scores.txt.gz

Citation

Samuel A. Lambert, Benjamin Wingfield, Joel T. Gibson, Laurent Gil, Santhi Ramachandran, Florent Yvon, Shirin Saverimuttu, Emily Tinsley, Elizabeth Lewis, Scott C. Ritchie, Jingqin Wu, Rodrigo Canovas, Aoife McMahon, Laura W. Harris, Helen Parkinson, Michael Inouye. Enhancing the Polygenic Score Catalog with tools for score calculation and ancestry normalization. Nature Genetics (2024) | doi: 10.1038/s41588-024-01937-x

Important

For scores from the PGS Catalog, please remember to cite the original publications from which they came (these are listed in the metadata table).

Score licenses

Tip
  • Scores deposited in the PGS Catalog may have specific license terms
  • It’s important to follow the license terms when you reuse scoring files
  • Please check below for a summary of license terms
  • License terms for custom scoring files aren’t reported here, please check how the creators of the scoring file licensed their data
Code
# as of 2023-12-12 only non-default licenses are recorded in the scoring file header
default_ebi_terms <- "PGS obtained from the Catalog should be cited appropriately, and used in accordance with any licensing restrictions set by the authors. See EBI Terms of Use (https://www.ebi.ac.uk/about/terms-of-use/) for additional details."

tibble(
    pgs_id = map_chr(json_list, "pgs_id"),
    license_text = map_chr(json_list, ~ extract_chr_handle_null(.x$header, "license"))) %>%
  mutate(license_text = ifelse(license_text == "", default_ebi_terms, license_text)) %>%
  # display license terms for files in the PGS Catalog only (with a PGS ID)
  filter(startsWith(pgs_id, "PGS")) %>%
  DT::datatable(., colnames = c(
      "PGS ID" = "pgs_id",
      "License text" = "license_text"
    ))