use crate::coords::hgvs_pos_to_index;
use crate::error::FerroError;
use crate::error_handling::ResolvedAction;
use crate::hgvs::edit::{Base, NaEdit};
use crate::normalize::config::NormalizeConfig;
#[derive(Debug, Clone)]
pub struct ValidationResult {
pub valid: bool,
pub warning: Option<String>,
pub actual_ref: Option<String>,
pub stated_ref: Option<String>,
}
impl ValidationResult {
pub fn ok() -> Self {
Self {
valid: true,
warning: None,
actual_ref: None,
stated_ref: None,
}
}
pub fn mismatch(stated: String, actual: String) -> Self {
Self {
valid: false,
warning: Some(format!(
"Reference mismatch: stated '{}' but actual is '{}'",
stated, actual
)),
actual_ref: Some(actual),
stated_ref: Some(stated),
}
}
}
pub fn validate_reference(edit: &NaEdit, ref_seq: &[u8], start: u64, end: u64) -> ValidationResult {
match edit {
NaEdit::Substitution { reference, .. } => validate_single_base(reference, ref_seq, start),
NaEdit::Deletion { sequence, .. } => {
if let Some(seq) = sequence {
validate_sequence(seq.bases(), ref_seq, start, end)
} else {
ValidationResult::ok()
}
}
NaEdit::Delins { .. } => {
ValidationResult::ok()
}
NaEdit::Duplication { sequence, .. } => {
if let Some(seq) = sequence {
validate_sequence(seq.bases(), ref_seq, start, end)
} else {
ValidationResult::ok()
}
}
NaEdit::Inversion { sequence, .. } => {
if let Some(seq) = sequence {
validate_sequence(seq.bases(), ref_seq, start, end)
} else {
ValidationResult::ok()
}
}
_ => ValidationResult::ok(),
}
}
fn validate_single_base(stated: &Base, ref_seq: &[u8], pos: u64) -> ValidationResult {
let idx = hgvs_pos_to_index(pos);
if idx >= ref_seq.len() {
return ValidationResult::mismatch(
stated.to_char().to_string(),
format!("(position {} out of range)", pos),
);
}
let actual_byte = ref_seq[idx];
let stated_byte = stated.to_u8();
if actual_byte.eq_ignore_ascii_case(&stated_byte) {
ValidationResult::ok()
} else {
let actual_char = (actual_byte as char).to_ascii_uppercase();
ValidationResult::mismatch(stated.to_char().to_string(), actual_char.to_string())
}
}
fn validate_sequence(stated: &[Base], ref_seq: &[u8], start: u64, end: u64) -> ValidationResult {
let start_idx = hgvs_pos_to_index(start);
let end_idx = end as usize;
if end_idx > ref_seq.len() {
let stated_str: String = stated.iter().map(|b| b.to_char()).collect();
return ValidationResult::mismatch(stated_str, format!("(position {} out of range)", end));
}
if start_idx > end_idx {
let stated_str: String = stated.iter().map(|b| b.to_char()).collect();
return ValidationResult::mismatch(
stated_str,
format!("(inverted range: start {} > end {})", start, end),
);
}
let actual_bytes = &ref_seq[start_idx..end_idx];
if stated.len() != actual_bytes.len() {
let stated_str: String = stated.iter().map(|b| b.to_char()).collect();
let actual_str: String = actual_bytes
.iter()
.map(|&b| (b as char).to_ascii_uppercase())
.collect();
return ValidationResult::mismatch(stated_str, actual_str);
}
for (stated_base, &actual_byte) in stated.iter().zip(actual_bytes.iter()) {
let stated_byte = stated_base.to_u8();
if !actual_byte.eq_ignore_ascii_case(&stated_byte) {
let stated_str: String = stated.iter().map(|b| b.to_char()).collect();
let actual_str: String = actual_bytes
.iter()
.map(|&b| (b as char).to_ascii_uppercase())
.collect();
return ValidationResult::mismatch(stated_str, actual_str);
}
}
ValidationResult::ok()
}
pub fn apply_validation_policy(
result: &ValidationResult,
config: &NormalizeConfig,
variant_str: &str,
) -> Result<(), FerroError> {
if result.valid {
return Ok(());
}
let action = config.ref_mismatch_action();
match action {
ResolvedAction::Reject => Err(FerroError::ReferenceMismatch {
location: variant_str.to_string(),
expected: result.stated_ref.clone().unwrap_or_else(|| "?".to_string()),
found: result.actual_ref.clone().unwrap_or_else(|| "?".to_string()),
}),
ResolvedAction::WarnCorrect => {
if let Some(ref warning) = result.warning {
eprintln!("Warning: {} in '{}'", warning, variant_str);
}
Ok(())
}
ResolvedAction::SilentCorrect | ResolvedAction::Accept => {
Ok(())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::hgvs::edit::Sequence;
use std::str::FromStr;
#[test]
fn test_validate_substitution_match() {
let edit = NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
};
let ref_seq = b"ATGC";
let result = validate_reference(&edit, ref_seq, 1, 1);
assert!(result.valid);
}
#[test]
fn test_validate_substitution_mismatch() {
let edit = NaEdit::Substitution {
reference: Base::G, alternative: Base::A,
};
let ref_seq = b"ATGC"; let result = validate_reference(&edit, ref_seq, 1, 1);
assert!(!result.valid);
assert_eq!(result.stated_ref, Some("G".to_string()));
assert_eq!(result.actual_ref, Some("A".to_string()));
}
#[test]
fn test_validate_deletion_match() {
let edit = NaEdit::Deletion {
sequence: Some(Sequence::from_str("ATG").unwrap()),
length: None,
};
let ref_seq = b"ATGC";
let result = validate_reference(&edit, ref_seq, 1, 3);
assert!(result.valid);
}
#[test]
fn test_validate_deletion_mismatch() {
let edit = NaEdit::Deletion {
sequence: Some(Sequence::from_str("GGG").unwrap()),
length: None,
};
let ref_seq = b"ATGC";
let result = validate_reference(&edit, ref_seq, 1, 3);
assert!(!result.valid);
assert_eq!(result.stated_ref, Some("GGG".to_string()));
assert_eq!(result.actual_ref, Some("ATG".to_string()));
}
#[test]
fn test_validate_no_sequence() {
let edit = NaEdit::Deletion {
sequence: None,
length: Some(3),
};
let ref_seq = b"ATGC";
let result = validate_reference(&edit, ref_seq, 1, 3);
assert!(result.valid); }
#[test]
fn test_validate_case_insensitive() {
let edit = NaEdit::Substitution {
reference: Base::A,
alternative: Base::G,
};
let ref_seq = b"atgc"; let result = validate_reference(&edit, ref_seq, 1, 1);
assert!(result.valid);
}
#[test]
fn test_apply_policy_strict() {
let result = ValidationResult::mismatch("G".to_string(), "A".to_string());
let config = NormalizeConfig::strict();
let err = apply_validation_policy(&result, &config, "c.1G>T");
assert!(err.is_err());
}
#[test]
fn test_apply_policy_lenient() {
let result = ValidationResult::mismatch("G".to_string(), "A".to_string());
let config = NormalizeConfig::lenient();
let ok = apply_validation_policy(&result, &config, "c.1G>T");
assert!(ok.is_ok()); }
#[test]
fn test_apply_policy_silent() {
let result = ValidationResult::mismatch("G".to_string(), "A".to_string());
let config = NormalizeConfig::silent();
let ok = apply_validation_policy(&result, &config, "c.1G>T");
assert!(ok.is_ok());
}
#[test]
fn test_validate_sequence_inverted_range() {
let edit = NaEdit::Deletion {
sequence: Some(Sequence::from_str("ATG").unwrap()),
length: None,
};
let ref_seq = b"ATGC";
let result = validate_reference(&edit, ref_seq, 3, 1);
assert!(!result.valid);
}
}