pub fn normalize_positions(positions: Vec<u32>, field: &str) -> Result<Vec<u32>, String> {
if positions.contains(&0) {
return Err(format!(
"{field}: positions must be ≥ 1 (1-based); found zero"
));
}
let mut sorted = positions;
sorted.sort_unstable();
if let Some(dup) = sorted.windows(2).find(|w| w[0] == w[1]) {
return Err(format!("{field}: duplicate position: {}", dup[0]));
}
Ok(sorted)
}
pub fn normalize_ranges(ranges: Vec<[u32; 2]>, field: &str) -> Result<Vec<[u32; 2]>, String> {
if ranges.iter().any(|[s, e]| *s == 0 || *e == 0) {
return Err(format!(
"{field}: range endpoints must be ≥ 1 (1-based); found zero"
));
}
let bad: Vec<[u32; 2]> = ranges.iter().filter(|[s, e]| s >= e).copied().collect();
if !bad.is_empty() {
return Err(format!(
"{field}: each range must satisfy start < end; invalid ranges: {bad:?}"
));
}
let mut sorted = ranges;
sorted.sort_unstable_by(|[a_s, a_e], [b_s, b_e]| a_s.cmp(b_s).then_with(|| a_e.cmp(b_e)));
let overlapping: Vec<[[u32; 2]; 2]> = sorted
.windows(2)
.filter(|pair| pair[1][0] <= pair[0][1])
.map(|pair| [pair[0], pair[1]])
.collect();
if !overlapping.is_empty() {
return Err(format!(
"{field}: ranges must not overlap; overlapping pairs: {overlapping:?}"
));
}
Ok(sorted)
}
pub fn normalize_sequence(sequence: &str) -> Result<String, String> {
let upper = sequence.to_ascii_uppercase();
if upper.len() < 2 {
return Err(format!(
"sequence: must be at least 2 characters; got {} (\"{}\")",
upper.len(),
upper
));
}
if let Some(bad_char) = upper.chars().find(|c| !matches!(c, 'A'..='Z' | '*')) {
return Err(format!(
"sequence: invalid character {bad_char:?}; \
only A-Z (standard IUPAC amino acid codes) and * (stop codon) are permitted"
));
}
Ok(upper)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn positions_sorted() {
let result = normalize_positions(vec![3, 1, 2], "test").unwrap();
assert_eq!(result, vec![1, 2, 3]);
}
#[test]
fn positions_rejects_duplicates() {
assert!(normalize_positions(vec![3, 1, 2, 1], "test").is_err());
}
#[test]
fn positions_rejects_zero() {
assert!(normalize_positions(vec![0, 1, 2], "test").is_err());
}
#[test]
fn ranges_sorted_and_valid() {
let result = normalize_ranges(vec![[5, 10], [1, 3]], "test").unwrap();
assert_eq!(result, vec![[1, 3], [5, 10]]);
}
#[test]
fn ranges_rejects_degenerate() {
assert!(normalize_ranges(vec![[3, 3]], "test").is_err());
}
#[test]
fn ranges_rejects_overlap() {
assert!(normalize_ranges(vec![[1, 5], [4, 8]], "test").is_err());
}
#[test]
fn ranges_allows_adjacent() {
let result = normalize_ranges(vec![[1, 3], [4, 8]], "test").unwrap();
assert_eq!(result, vec![[1, 3], [4, 8]]);
}
#[test]
fn sequence_uppercased() {
let result = normalize_sequence("maeprq").unwrap();
assert_eq!(result, "MAEPRQ");
}
#[test]
fn sequence_rejects_short() {
assert!(normalize_sequence("M").is_err());
}
#[test]
fn sequence_rejects_invalid_chars() {
assert!(normalize_sequence("MAEP1Q").is_err());
}
}