use omicsx::protein::Protein;
use omicsx::alignment::SmithWaterman;
fn protein_from_codes(codes: &str) -> Protein {
let amino_acids = codes
.chars()
.map(|c| omicsx::protein::AminoAcid::from_code(c))
.collect::<Result<Vec<_>, _>>()
.expect("Invalid amino acid codes");
Protein::new(amino_acids).expect("Failed to create protein")
}
#[test]
fn test_soft_clipping_perfect_match() {
let query = protein_from_codes("ACDEFGH");
let target = protein_from_codes("ACDEFGH");
let sw = SmithWaterman::new();
let result = sw.align(&query, &target).unwrap();
assert_eq!(result.start_pos1, 0, "Perfect match should start at 0");
assert_eq!(result.end_pos1, 7, "Perfect match should end at full length");
assert_eq!(result.soft_clips.0, 0, "No left clip for perfect match");
assert_eq!(result.soft_clips.1, 0, "No right clip for perfect match");
}
#[test]
fn test_soft_clip_formula_invariant() {
let test_cases = vec![
("ACDEFGH", "ACDEFGH"),
("ACDEFGH", "ACDEF"),
("ACDEFG", "ACDEFGH"),
("GHACDEFG", "ACDEFGH"),
];
let sw = SmithWaterman::new();
for (query_codes, target_codes) in test_cases {
let query = protein_from_codes(query_codes);
let target = protein_from_codes(target_codes);
let result = sw.align(&query, &target).unwrap();
assert_eq!(result.soft_clips.0 as usize, result.start_pos1,
"Invariant failed for query {}: left_clip ({}) != start_pos ({})",
query_codes, result.soft_clips.0, result.start_pos1);
let expected_right = query.len() as u32 - result.end_pos1 as u32;
assert_eq!(result.soft_clips.1, expected_right,
"Invariant failed for query {}: right_clip ({}) != seq_len ({}) - end_pos ({})",
query_codes, result.soft_clips.1, query.len(), result.end_pos1);
}
}
#[test]
fn test_soft_clipping_positions_consistency() {
let query = protein_from_codes("SSSACDEFGH");
let target = protein_from_codes("ACDEFGHTTT");
let sw = SmithWaterman::new();
let result = sw.align(&query, &target).unwrap();
let aligned_length = (result.end_pos1 - result.start_pos1) as u32;
let total = result.soft_clips.0 + aligned_length + result.soft_clips.1;
assert_eq!(total as usize, query.len(),
"Position consistency: {} + {} + {} = {} (expected {})",
result.soft_clips.0, aligned_length, result.soft_clips.1, total, query.len());
if aligned_length > 0 {
assert!(!result.cigar.is_empty(),
"CIGAR string should not be empty for aligned region");
}
}
#[test]
fn test_soft_clipping_expected_values() {
let query = protein_from_codes("ACDEFGH");
let target = protein_from_codes("ACDEF");
let sw = SmithWaterman::new();
let result = sw.align(&query, &target).unwrap();
assert!(result.start_pos1 <= result.end_pos1,
"Start position must be <= end position");
assert!(result.end_pos1 <= query.len(),
"End position must be <= query length");
assert!(result.start_pos2 <= result.end_pos2,
"Start position (seq2) must be <= end position (seq2)");
assert!(result.end_pos2 <= target.len(),
"End position (seq2) must be <= target length");
assert!(result.soft_clips.0 <= query.len() as u32,
"Left soft-clip must be <= query length");
assert!(result.soft_clips.1 <= query.len() as u32,
"Right soft-clip must be <= query length");
}
#[test]
fn test_soft_clipping_mathematical_proof() {
let test_sequences = vec![
("ABCDEFGH", "CDEFGH"),
("ABCDEFGH", "ABCDEFG"),
("XXXACDEFGXXX", "ACDEFG"),
];
let sw = SmithWaterman::new();
for (query_str, target_str) in test_sequences {
let query = protein_from_codes(query_str);
let target = protein_from_codes(target_str);
let result = sw.align(&query, &target).unwrap();
let aligned_region_len = result.end_pos1 - result.start_pos1;
let reconstructed_len = result.soft_clips.0 as usize + aligned_region_len + result.soft_clips.1 as usize;
assert_eq!(reconstructed_len, query.len(),
"For query '{}': Formula failed. left_clip ({}) + aligned ({}) + right_clip ({}) = {} (expected {})",
query_str, result.soft_clips.0, aligned_region_len, result.soft_clips.1,
reconstructed_len, query.len());
}
}