use ferro_hgvs::{parse_hgvs, MultiFastaProvider, Normalizer, ReferenceProvider};
use std::path::Path;
fn get_provider() -> Option<MultiFastaProvider> {
let manifest_path = Path::new("benchmark-output/manifest.json");
if manifest_path.exists() {
MultiFastaProvider::from_manifest(manifest_path).ok()
} else {
None
}
}
#[test]
fn test_nm_001408491_c517dela_should_shift() {
let Some(provider) = get_provider() else {
eprintln!("Skipping test: benchmark-output not available");
return;
};
let transcript = match provider.get_transcript("NM_001408491.1") {
Ok(tx) => tx,
Err(e) => {
eprintln!("Skipping test: transcript not found: {}", e);
return;
}
};
println!("=== Transcript NM_001408491.1 ===");
println!("Sequence length: {}", transcript.sequence.len());
println!("CDS start (1-based): {:?}", transcript.cds_start);
println!("CDS end: {:?}", transcript.cds_end);
println!("Exon count: {}", transcript.exons.len());
for (i, exon) in transcript.exons.iter().enumerate() {
println!(
" Exon {}: {} - {} (number: {})",
i + 1,
exon.start,
exon.end,
exon.number
);
}
let cds_start = transcript.cds_start.unwrap_or(1);
let tx_pos_517 = cds_start + 517 - 1; let tx_pos_518 = cds_start + 518 - 1;
println!("\nCoordinate mapping:");
println!(" CDS start: {}", cds_start);
println!(
" c.517 -> tx position {} (0-based: {})",
tx_pos_517,
tx_pos_517 - 1
);
println!(
" c.518 -> tx position {} (0-based: {})",
tx_pos_518,
tx_pos_518 - 1
);
let seq = transcript.sequence.as_bytes();
let idx_517 = (tx_pos_517 - 1) as usize; let idx_518 = (tx_pos_518 - 1) as usize;
if idx_517 < seq.len() && idx_518 < seq.len() {
println!("\nSequence around c.517-518:");
let start = idx_517.saturating_sub(3);
let end = (idx_518 + 4).min(seq.len());
let context: String = seq[start..end].iter().map(|&b| b as char).collect();
println!(" ... {} ...", context);
println!(
" c.517 (tx pos {}, 0-based {}): {}",
tx_pos_517, idx_517, seq[idx_517] as char
);
println!(
" c.518 (tx pos {}, 0-based {}): {}",
tx_pos_518, idx_518, seq[idx_518] as char
);
if seq[idx_517] == b'A' && seq[idx_518] == b'A' {
println!("\n CONFIRMED: Both c.517 and c.518 are 'A'");
println!(" According to 3' rule, c.517delA should shift to c.518del");
}
} else {
eprintln!("Position out of range!");
}
println!("\nExon containing c.517 (tx pos {}):", tx_pos_517);
for exon in &transcript.exons {
if tx_pos_517 >= exon.start && tx_pos_517 <= exon.end {
println!(" Exon {} (tx {}-{})", exon.number, exon.start, exon.end);
if tx_pos_518 <= exon.end {
println!(" c.518 is also in this exon - no boundary issues");
} else {
println!(" WARNING: c.518 is NOT in this exon - boundary may prevent shift!");
}
}
}
let normalizer = Normalizer::new(provider);
let variant = parse_hgvs("NM_001408491.1:c.517delA").unwrap();
let result = normalizer.normalize(&variant).unwrap();
let output = format!("{}", result);
println!("\n=== Normalization Result ===");
println!("Input: NM_001408491.1:c.517delA");
println!("Output: {}", output);
assert!(
output.contains("c.518del"),
"Expected c.517delA to normalize to c.518del (3' rule), got: {}",
output
);
}
#[test]
fn test_potential_bug_deletion_shift_nm033517() {
let Some(provider) = get_provider() else {
eprintln!("Skipping test: benchmark-output not available");
return;
};
let transcript = match provider.get_transcript("NM_033517.1") {
Ok(tx) => tx,
Err(e) => {
eprintln!("Skipping test: transcript not found: {}", e);
return;
}
};
let cds_start = transcript.cds_start.unwrap_or(1);
let seq = transcript.sequence.as_bytes();
for pos in 1324..=1328 {
let tx_pos = cds_start + pos - 1;
let idx = (tx_pos - 1) as usize;
if idx < seq.len() {
println!(
"c.{} (tx {}, idx {}): {}",
pos, tx_pos, idx, seq[idx] as char
);
}
}
let normalizer = Normalizer::new(provider);
let variant = parse_hgvs("NM_033517.1:c.1324del").unwrap();
let result = normalizer.normalize(&variant).unwrap();
println!("\nInput: NM_033517.1:c.1324del");
println!("Output: {}", result);
}
#[test]
fn test_potential_bug_delins_shift() {
let Some(provider) = get_provider() else {
eprintln!("Skipping test: benchmark-output not available");
return;
};
let transcript = match provider.get_transcript("NM_001282424.3") {
Ok(tx) => tx,
Err(e) => {
eprintln!("Skipping test: transcript not found: {}", e);
return;
}
};
let cds_start = transcript.cds_start.unwrap_or(1);
let seq = transcript.sequence.as_bytes();
println!("Sequence around c.2139-2141:");
for pos in 2137..=2145 {
let tx_pos = cds_start + pos - 1;
let idx = (tx_pos - 1) as usize;
if idx < seq.len() {
println!(
"c.{} (tx {}, idx {}): {}",
pos, tx_pos, idx, seq[idx] as char
);
}
}
let normalizer = Normalizer::new(provider);
let variant = parse_hgvs("NM_001282424.3:c.2139_2140delinsTATGCA").unwrap();
let result = normalizer.normalize(&variant).unwrap();
println!("\nInput: NM_001282424.3:c.2139_2140delinsTATGCA");
println!("Output: {}", result);
let output = format!("{}", result);
if output.contains("2140_2141") {
println!("\n⚠️ POTENTIAL BUG: Ferro shifted delins from 2139_2140 to 2140_2141");
println!(" HGVS spec says delins should use original position, not shift");
}
}
#[test]
fn test_5utr_duplication_shifting() {
let Some(provider) = get_provider() else {
eprintln!("Skipping test: benchmark-output not available");
return;
};
let transcript = match provider.get_transcript("NM_001394148.2") {
Ok(tx) => tx,
Err(e) => {
eprintln!("Skipping test: transcript not found: {}", e);
return;
}
};
let cds_start = transcript.cds_start.unwrap_or(1);
let seq = transcript.sequence.as_bytes();
println!("CDS start: {}", cds_start);
println!("Sequence around c.-56 to c.-45:");
for offset in -60i64..=-40 {
let tx_pos = (cds_start as i64 + offset) as u64;
if tx_pos >= 1 {
let idx = (tx_pos - 1) as usize;
if idx < seq.len() {
println!(
"c.{} (tx {}, idx {}): {}",
offset, tx_pos, idx, seq[idx] as char
);
}
}
}
let normalizer = Normalizer::new(provider);
let variant = parse_hgvs("NM_001394148.2:c.-56_-47dup").unwrap();
let result = normalizer.normalize(&variant).unwrap();
println!("\nInput: NM_001394148.2:c.-56_-47dup");
println!("Output: {}", result);
}
#[test]
fn test_compare_with_mutalyzer_sequence() {
let Some(provider) = get_provider() else {
eprintln!("Skipping test: benchmark-output not available");
return;
};
let transcript = match provider.get_transcript("NM_001408491.1") {
Ok(tx) => tx,
Err(e) => {
eprintln!("Skipping test: transcript not found: {}", e);
return;
}
};
let cds_start = transcript.cds_start.unwrap_or(1);
let tx_pos_517 = cds_start + 517 - 1;
let tx_pos_518 = cds_start + 518 - 1;
let seq = transcript.sequence.as_bytes();
let idx_517 = (tx_pos_517 - 1) as usize;
let idx_518 = (tx_pos_518 - 1) as usize;
if idx_517 < seq.len() && idx_518 < seq.len() {
let base_517 = seq[idx_517] as char;
let base_518 = seq[idx_518] as char;
println!("cdot sequence at c.517: {}", base_517);
println!("cdot sequence at c.518: {}", base_518);
assert_eq!(
base_517, 'A',
"Expected c.517 to be 'A' per mutalyzer result"
);
assert_eq!(
base_518, 'A',
"Expected c.518 to be 'A' for 3' shift to be valid"
);
}
}