use ferro_hgvs::parse_hgvs;
const INVALID_HGVS_PATTERNS: &[(&str, &str)] = &[
("NG_007485.1:g.0del", "position 0 invalid"),
("NG_012337.1:g.0_1insAAA", "position 0 invalid"),
("NG_007485.1:g.-1del", "negative genomic position"),
("NG_007485.1:g.4_3del", "inverted range"),
("NG_007485.1(NM_000077.4):c.135_130insT", "inverted range"),
("NG_012337.1(NM_012459.2):c.135_130insT", "inverted range"),
(
"NG_012337.1(NM_012459.2):c.-11025_11020inv",
"inverted range",
),
("NG_012337.1:g.7125+1G>T", "intronic offset on genomic"),
("NG_012337.1:g.5+3del", "intronic offset on genomic"),
("NG_012337.1:g.4_5+3del", "intronic offset on genomic"),
("NG_012337.1:g.5+3_7del", "intronic offset on genomic"),
(
"NG_012337.1:g.10_11insLRG_24:g.2+5",
"intronic offset on genomic in reference",
),
(
"NG_012337.1:g.20>60",
"substitution with number instead of base",
),
(
"NG_012337.1:g.20_21ins[30_?]",
"incomplete range in insertion",
),
(
"NG_012337.1:g.20_21ins[?_40]",
"incomplete range in insertion",
),
(
"NG_012337.1:g.20_21ins[30_40;50_?]",
"incomplete range in insertion",
),
(
"NG_012337.1:g.20_21ins[30_40;?_60]",
"incomplete range in insertion",
),
("NG_012337.1:g.20_21AA[30_40]", "invalid AA syntax"),
("NG_012337.1:g.20_21AA[?]", "invalid AA syntax"),
(
"NM_004152.3:p.205_685del",
"protein position without amino acid",
),
];
const EXTENDED_VALID_PATTERNS: &[(&str, &str)] = &[
("GRCh37(chr23):g.3250del", "assembly notation"),
("GRCh37(chrX):g.3250del", "assembly notation"),
("GRCh37(chrY):g.3250del", "assembly notation"),
("GRCh38(chr23):g.3250del", "assembly notation"),
("GRCh38(chrX):g.3250del", "assembly notation"),
("GRCh38(chrY):g.3250del", "assembly notation"),
(
"NG_012337.1(NM_012459.2):c.1_2ins[LRG_1:g.100000;AAA]",
"complex insertion with reference",
),
("NG_012337.1:g.274dup[1;A]", "complex dup notation"),
(
"NG_012337.1:g.20_21ins[(123);30_40]",
"complex insertion with ranges",
),
(
"NG_012337.1:g.20_21ins[123;30_40]",
"complex insertion with ranges",
),
(
"NG_012337.1:g.20_21ins[30_40;123]",
"complex insertion with ranges",
),
("NG_012337.1:g.20_21ins[?]", "uncertain insertion"),
("NG_012337.1:g.20_21ins[30_40;?]", "uncertain insertion"),
("NG_012337.1:g.722_742dup(731_741)", "uncertain dup range"),
("NG_012337.1:g.722_742dup(731_?)", "uncertain dup range"),
("NG_012337.1:g.722_742dup(?_731)", "uncertain dup range"),
("NG_012337.1:g.722_742dup(?)", "uncertain dup range"),
("NG_012337.1:g.722_742dup?", "uncertain dup"),
(
"NG_007485.1(NR_003529.3):n.10delinsNG_007485.1:g.0",
"delins with reference",
),
(
"NG_007485.1(NR_003529.3):n.10delinsNG_007485.1:g.100000",
"delins with reference",
),
];
const LEGACY_NOTATION_PATTERNS: &[(&str, &str)] = &[
(
"NG_012337.1(NM_003002.2):c.274ATT>T",
"multi-base substitution",
),
(
"NG_012337.1(NM_003002.2):c.274inv3",
"inversion with length",
),
(
"NG_012337.1(NM_003002.2):c.274_279inv3",
"inversion with length",
),
(
"NG_012337.1(NM_003002.2):c.274_279invAA",
"inversion with sequence",
),
(
"NG_012337.1(NM_003002.2):c.274invAA",
"inversion with sequence",
),
("NG_012337.1:g.274inv1", "inversion with length"),
("NG_012337.1:g.274invT", "inversion with sequence"),
("NG_012337.1:g.274_276inv3", "inversion with length"),
("NG_012337.1:g.274_276invTAC", "inversion with sequence"),
("NG_012337.1:g.274dup1", "dup with length"),
("NG_012337.1:g.274dupT", "dup with sequence"),
("NG_012337.1(NM_003002.2):c.274del1", "del with length"),
("NG_012337.1(NM_003002.2):c.274del3", "del with length"),
("NG_012337.1(NM_003002.2):c.274_275del2", "del with length"),
("NG_012337.1(NM_003002.2):c.274_279del3", "del with length"),
("NG_012337.1:g.7125delGACinsT", "delins with deleted seq"),
("NG_012337.1:g.7125delGinsT", "delins with deleted seq"),
];
const NONSTANDARD_PATTERNS: &[(&str, &str)] = &[
("NM_000352.3:c.2557_2694copy2", "copy number notation"),
(
"NC_000014.8:g.88401076_88459508copy4",
"copy number notation",
),
("NM_000352.3:n.2557_2694copy2", "copy number notation"),
];
#[test]
fn test_invalid_patterns_correctly_rejected() {
println!("\n=== Invalid HGVS Patterns (Should Reject) ===\n");
let mut correctly_rejected = 0;
let mut incorrectly_accepted = Vec::new();
for (pattern, reason) in INVALID_HGVS_PATTERNS {
let result = parse_hgvs(pattern);
if result.is_err() {
correctly_rejected += 1;
println!("✓ Rejected: {} ({})", pattern, reason);
} else {
incorrectly_accepted.push((pattern, reason));
println!("✗ ACCEPTED (should reject): {} ({})", pattern, reason);
}
}
println!(
"\nCorrectly rejected: {}/{}",
correctly_rejected,
INVALID_HGVS_PATTERNS.len()
);
if !incorrectly_accepted.is_empty() {
println!("\nBUGS - Incorrectly accepted invalid patterns:");
for (p, r) in &incorrectly_accepted {
println!(" - {} ({})", p, r);
}
}
}
#[test]
fn test_extended_valid_patterns() {
println!("\n=== Extended/Complex Valid HGVS Patterns ===\n");
let mut supported = 0;
let mut unsupported = Vec::new();
for (pattern, description) in EXTENDED_VALID_PATTERNS {
let result = parse_hgvs(pattern);
if result.is_ok() {
supported += 1;
println!("✓ Supported: {} ({})", pattern, description);
} else {
unsupported.push((pattern, description));
println!("✗ Unsupported: {} ({})", pattern, description);
}
}
println!(
"\nSupported: {}/{}",
supported,
EXTENDED_VALID_PATTERNS.len()
);
}
#[test]
fn test_legacy_notation_patterns() {
println!("\n=== Legacy/Lax Notation Patterns ===\n");
let mut supported = 0;
let mut unsupported = Vec::new();
for (pattern, description) in LEGACY_NOTATION_PATTERNS {
let result = parse_hgvs(pattern);
if result.is_ok() {
supported += 1;
println!("✓ Supported: {} ({})", pattern, description);
} else {
unsupported.push((pattern, description));
println!("✗ Unsupported: {} ({})", pattern, description);
}
}
println!(
"\nSupported: {}/{}",
supported,
LEGACY_NOTATION_PATTERNS.len()
);
}
#[test]
fn test_nonstandard_patterns() {
println!("\n=== Non-standard Patterns (Invitae-specific) ===\n");
let mut supported = 0;
for (pattern, description) in NONSTANDARD_PATTERNS {
let result = parse_hgvs(pattern);
if result.is_ok() {
supported += 1;
println!("✓ Supported: {} ({})", pattern, description);
} else {
println!("✗ Unsupported: {} ({})", pattern, description);
}
}
println!("\nSupported: {}/{}", supported, NONSTANDARD_PATTERNS.len());
}
#[test]
fn test_recalculated_support_summary() {
println!("\n============================================================");
println!(" RECALCULATED HGVS PATTERN SUPPORT SUMMARY");
println!("============================================================\n");
let invalid_rejected = INVALID_HGVS_PATTERNS
.iter()
.filter(|(p, _)| parse_hgvs(p).is_err())
.count();
let invalid_accepted = INVALID_HGVS_PATTERNS.len() - invalid_rejected;
let extended_supported = EXTENDED_VALID_PATTERNS
.iter()
.filter(|(p, _)| parse_hgvs(p).is_ok())
.count();
let extended_unsupported = EXTENDED_VALID_PATTERNS.len() - extended_supported;
let legacy_supported = LEGACY_NOTATION_PATTERNS
.iter()
.filter(|(p, _)| parse_hgvs(p).is_ok())
.count();
let legacy_unsupported = LEGACY_NOTATION_PATTERNS.len() - legacy_supported;
let nonstandard_supported = NONSTANDARD_PATTERNS
.iter()
.filter(|(p, _)| parse_hgvs(p).is_ok())
.count();
println!("Invalid HGVS (error cases):");
println!(
" Correctly rejected: {}/{}",
invalid_rejected,
INVALID_HGVS_PATTERNS.len()
);
println!(" Incorrectly accepted (bugs): {}", invalid_accepted);
println!("\nExtended Valid HGVS (complex notation):");
println!(
" Supported: {}/{}",
extended_supported,
EXTENDED_VALID_PATTERNS.len()
);
println!(" Unsupported (gaps): {}", extended_unsupported);
println!("\nLegacy Notation (should normalize):");
println!(
" Supported: {}/{}",
legacy_supported,
LEGACY_NOTATION_PATTERNS.len()
);
println!(" Unsupported (gaps): {}", legacy_unsupported);
println!("\nNon-standard (Invitae-specific):");
println!(
" Supported: {}/{}",
nonstandard_supported,
NONSTANDARD_PATTERNS.len()
);
let total_valid = EXTENDED_VALID_PATTERNS.len() + LEGACY_NOTATION_PATTERNS.len();
let total_supported = extended_supported + legacy_supported;
let mutalyzer_valid_rate = total_supported as f64 / total_valid as f64 * 100.0;
println!("\n------------------------------------------------------------");
println!("MUTALYZER ADJUSTED STATS (excluding invalid test cases):");
println!(" Valid patterns tested: {}", total_valid);
println!(" Supported: {}", total_supported);
println!(" Unsupported: {}", total_valid - total_supported);
println!(" Support rate: {:.1}%", mutalyzer_valid_rate);
println!("------------------------------------------------------------");
}