const NC_TO_UCSC: &[(&str, &str)] = &[
("NC_000001.11", "chr1"),
("NC_000002.12", "chr2"),
("NC_000003.12", "chr3"),
("NC_000004.12", "chr4"),
("NC_000005.10", "chr5"),
("NC_000006.12", "chr6"),
("NC_000007.14", "chr7"),
("NC_000008.11", "chr8"),
("NC_000009.12", "chr9"),
("NC_000010.11", "chr10"),
("NC_000011.10", "chr11"),
("NC_000012.12", "chr12"),
("NC_000013.11", "chr13"),
("NC_000014.9", "chr14"),
("NC_000015.10", "chr15"),
("NC_000016.10", "chr16"),
("NC_000017.11", "chr17"),
("NC_000018.10", "chr18"),
("NC_000019.10", "chr19"),
("NC_000020.11", "chr20"),
("NC_000021.9", "chr21"),
("NC_000022.11", "chr22"),
("NC_000023.11", "chrX"),
("NC_000024.10", "chrY"),
("NC_012920.1", "chrM"),
];
pub fn refseq_to_ucsc(acc: &str) -> &str {
for (key, value) in NC_TO_UCSC {
if *key == acc {
return value;
}
}
acc
}
pub fn ucsc_to_refseq(ucsc: &str) -> &str {
for (refseq, value) in NC_TO_UCSC {
if *value == ucsc {
return refseq;
}
}
ucsc
}
pub fn is_patch_sequence(chrom: &str) -> bool {
if chrom.starts_with("NW_") || chrom.starts_with("NT_") {
return true;
}
chrom.ends_with("_alt")
|| chrom.ends_with("_fix")
|| chrom.ends_with("_random")
|| chrom.starts_with("chrUn_")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn maps_all_standard_chromosomes() {
let expected = [
("NC_000001.11", "chr1"),
("NC_000002.12", "chr2"),
("NC_000003.12", "chr3"),
("NC_000004.12", "chr4"),
("NC_000005.10", "chr5"),
("NC_000006.12", "chr6"),
("NC_000007.14", "chr7"),
("NC_000008.11", "chr8"),
("NC_000009.12", "chr9"),
("NC_000010.11", "chr10"),
("NC_000011.10", "chr11"),
("NC_000012.12", "chr12"),
("NC_000013.11", "chr13"),
("NC_000014.9", "chr14"),
("NC_000015.10", "chr15"),
("NC_000016.10", "chr16"),
("NC_000017.11", "chr17"),
("NC_000018.10", "chr18"),
("NC_000019.10", "chr19"),
("NC_000020.11", "chr20"),
("NC_000021.9", "chr21"),
("NC_000022.11", "chr22"),
("NC_000023.11", "chrX"),
("NC_000024.10", "chrY"),
("NC_012920.1", "chrM"),
];
for (acc, expected_ucsc) in expected {
assert_eq!(
refseq_to_ucsc(acc),
expected_ucsc,
"mapping failed for {acc}"
);
}
}
#[test]
fn patch_sequences_round_trip_unchanged() {
assert_eq!(refseq_to_ucsc("NW_025791820.1"), "NW_025791820.1");
assert_eq!(refseq_to_ucsc("NT_187633.1"), "NT_187633.1");
}
#[test]
fn unknown_accession_returned_as_is() {
assert_eq!(refseq_to_ucsc("gibberish"), "gibberish");
assert_eq!(refseq_to_ucsc(""), "");
}
#[test]
fn ucsc_to_refseq_maps_all_standard_chromosomes() {
let expected = [
("chr1", "NC_000001.11"),
("chr2", "NC_000002.12"),
("chr3", "NC_000003.12"),
("chr4", "NC_000004.12"),
("chr5", "NC_000005.10"),
("chr6", "NC_000006.12"),
("chr7", "NC_000007.14"),
("chr8", "NC_000008.11"),
("chr9", "NC_000009.12"),
("chr10", "NC_000010.11"),
("chr11", "NC_000011.10"),
("chr12", "NC_000012.12"),
("chr13", "NC_000013.11"),
("chr14", "NC_000014.9"),
("chr15", "NC_000015.10"),
("chr16", "NC_000016.10"),
("chr17", "NC_000017.11"),
("chr18", "NC_000018.10"),
("chr19", "NC_000019.10"),
("chr20", "NC_000020.11"),
("chr21", "NC_000021.9"),
("chr22", "NC_000022.11"),
("chrX", "NC_000023.11"),
("chrY", "NC_000024.10"),
("chrM", "NC_012920.1"),
];
for (ucsc, expected_refseq) in expected {
assert_eq!(
ucsc_to_refseq(ucsc),
expected_refseq,
"inverse mapping failed for {ucsc}"
);
}
}
#[test]
fn ucsc_to_refseq_passes_through_patches_and_unknowns() {
assert_eq!(ucsc_to_refseq("chr9_KN196479v1_fix"), "chr9_KN196479v1_fix");
assert_eq!(
ucsc_to_refseq("chr22_KI270879v1_alt"),
"chr22_KI270879v1_alt"
);
assert_eq!(ucsc_to_refseq("chrUn_KI270302v1"), "chrUn_KI270302v1");
assert_eq!(ucsc_to_refseq("NC_000001.11"), "NC_000001.11");
assert_eq!(ucsc_to_refseq("NW_025791820.1"), "NW_025791820.1");
assert_eq!(ucsc_to_refseq("gibberish"), "gibberish");
assert_eq!(ucsc_to_refseq(""), "");
}
#[test]
fn ucsc_refseq_round_trip_identity_on_primary_chromosomes() {
for (_refseq, ucsc) in NC_TO_UCSC {
assert_eq!(
refseq_to_ucsc(ucsc_to_refseq(ucsc)),
*ucsc,
"round-trip failed for {ucsc}"
);
}
}
#[test]
fn is_patch_sequence_detects_prefixes() {
assert!(is_patch_sequence("NW_025791820.1"));
assert!(is_patch_sequence("NT_187633.1"));
assert!(!is_patch_sequence("NC_000001.11"));
assert!(!is_patch_sequence("chr1"));
assert!(!is_patch_sequence(""));
}
#[test]
fn is_patch_sequence_detects_ucsc_alt_fix_contigs() {
assert!(is_patch_sequence("chr9_KN196479v1_fix"));
assert!(is_patch_sequence("chr22_KI270879v1_alt"));
assert!(is_patch_sequence("chr1_KI270706v1_random"));
assert!(is_patch_sequence("chrUn_KI270302v1"));
assert!(!is_patch_sequence("chr1"));
assert!(!is_patch_sequence("chrX"));
assert!(!is_patch_sequence("chrY"));
assert!(!is_patch_sequence("chrM"));
}
}