use std::collections::HashMap;
use crate::error::A3Error;
use crate::normalization::{normalize_positions, normalize_ranges, normalize_sequence};
use crate::types::{
A3, A3_SCHEMA_URI, A3_VERSION, A3Index, Annotations, FlexEntry, Metadata, RegionEntry,
SiteEntry, VariantRecord,
};
pub fn validate(raw: A3) -> Result<A3, A3Error> {
let mut errors: Vec<String> = Vec::new();
if raw.schema != A3_SCHEMA_URI {
errors.push(format!(
"'$schema' must be '{A3_SCHEMA_URI}', got '{}'",
raw.schema
));
}
if raw.a3_version != A3_VERSION {
errors.push(format!(
"'a3_version' must be '{A3_VERSION}', got '{}'",
raw.a3_version
));
}
let sequence = match normalize_sequence(&raw.sequence) {
Ok(s) => s,
Err(e) => {
errors.push(e);
raw.sequence.to_uppercase()
}
};
let mut site: HashMap<String, SiteEntry> = HashMap::new();
for (name, entry) in raw.annotations.site {
if name.is_empty() {
errors.push("annotations.site: annotation name must not be empty".to_string());
continue; }
let field = format!("annotations.site.{name}");
match normalize_positions(entry.index, &field) {
Ok(index) => {
site.insert(
name,
SiteEntry {
index,
kind: entry.kind,
},
);
}
Err(e) => errors.push(e),
}
}
let mut region: HashMap<String, RegionEntry> = HashMap::new();
for (name, entry) in raw.annotations.region {
if name.is_empty() {
errors.push("annotations.region: annotation name must not be empty".to_string());
continue;
}
let field = format!("annotations.region.{name}");
match normalize_ranges(entry.index, &field) {
Ok(index) => {
region.insert(
name,
RegionEntry {
index,
kind: entry.kind,
},
);
}
Err(e) => errors.push(e),
}
}
let ptm = normalize_flex_family(raw.annotations.ptm, "ptm", &mut errors);
let processing = normalize_flex_family(raw.annotations.processing, "processing", &mut errors);
let mut variant: Vec<VariantRecord> = Vec::new();
for (i, record) in raw.annotations.variant.into_iter().enumerate() {
if record.position == 0 {
errors.push(format!(
"annotations.variant[{i}].position: must be ≥ 1 (1-based); got 0"
));
}
variant.push(record);
}
let seq_len = sequence.len() as u32;
for (name, entry) in &site {
check_positions_bounds(
&entry.index,
seq_len,
&format!("annotations.site.{name}"),
&mut errors,
);
}
for (name, entry) in ®ion {
check_ranges_bounds(
&entry.index,
seq_len,
&format!("annotations.region.{name}"),
&mut errors,
);
}
for (name, entry) in &ptm {
let field = format!("annotations.ptm.{name}");
match &entry.index {
A3Index::Positions(positions) => {
check_positions_bounds(positions, seq_len, &field, &mut errors)
}
A3Index::Ranges(ranges) => check_ranges_bounds(ranges, seq_len, &field, &mut errors),
}
}
for (name, entry) in &processing {
let field = format!("annotations.processing.{name}");
match &entry.index {
A3Index::Positions(positions) => {
check_positions_bounds(positions, seq_len, &field, &mut errors)
}
A3Index::Ranges(ranges) => check_ranges_bounds(ranges, seq_len, &field, &mut errors),
}
}
for (i, record) in variant.iter().enumerate() {
if record.position > seq_len {
errors.push(format!(
"annotations.variant[{i}].position: position {} is out of bounds \
for sequence of length {seq_len} (must be 1–{seq_len})",
record.position
));
}
}
if !errors.is_empty() {
return Err(A3Error::Validate(errors));
}
Ok(A3 {
schema: raw.schema,
a3_version: raw.a3_version,
sequence,
annotations: Annotations {
site,
region,
ptm,
processing,
variant,
},
metadata: Metadata {
uniprot_id: raw.metadata.uniprot_id,
description: raw.metadata.description,
reference: raw.metadata.reference,
organism: raw.metadata.organism,
},
})
}
fn normalize_flex_family(
entries: HashMap<String, FlexEntry>,
family: &str,
errors: &mut Vec<String>,
) -> HashMap<String, FlexEntry> {
let mut out: HashMap<String, FlexEntry> = HashMap::new();
for (name, entry) in entries {
if name.is_empty() {
errors.push(format!(
"annotations.{family}: annotation name must not be empty"
));
continue;
}
let field = format!("annotations.{family}.{name}");
let index = match entry.index {
A3Index::Positions(positions) => match normalize_positions(positions, &field) {
Ok(p) => A3Index::Positions(p),
Err(e) => {
errors.push(e);
continue;
}
},
A3Index::Ranges(ranges) => match normalize_ranges(ranges, &field) {
Ok(r) => A3Index::Ranges(r),
Err(e) => {
errors.push(e);
continue;
}
},
};
out.insert(
name,
FlexEntry {
index,
kind: entry.kind,
},
);
}
out
}
fn check_positions_bounds(positions: &[u32], seq_len: u32, field: &str, errors: &mut Vec<String>) {
for &pos in positions {
if pos > seq_len {
errors.push(format!(
"{field}: position {pos} is out of bounds for sequence of \
length {seq_len} (must be 1–{seq_len})"
));
}
}
}
fn check_ranges_bounds(ranges: &[[u32; 2]], seq_len: u32, field: &str, errors: &mut Vec<String>) {
for [start, end] in ranges {
if *end > seq_len {
errors.push(format!(
"{field}: range [{start}, {end}] is out of bounds for sequence \
of length {seq_len} (must be 1–{seq_len})"
));
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn minimal_raw() -> A3 {
A3 {
schema: A3_SCHEMA_URI.to_string(),
a3_version: A3_VERSION.to_string(),
sequence: "MAEPRQ".to_string(),
annotations: Annotations::default(),
metadata: Metadata::default(),
}
}
#[test]
fn valid_minimal_a3() {
let result = validate(minimal_raw());
assert!(result.is_ok());
}
#[test]
fn sequence_is_uppercased() {
let mut raw = minimal_raw();
raw.sequence = "maeprq".to_string();
let a3 = validate(raw).unwrap();
assert_eq!(a3.sequence, "MAEPRQ");
}
#[test]
fn rejects_short_sequence() {
let mut raw = minimal_raw();
raw.sequence = "M".to_string();
assert!(validate(raw).is_err());
}
#[test]
fn rejects_out_of_bounds_site_position() {
let mut raw = minimal_raw();
raw.annotations.site.insert(
"test".to_string(),
SiteEntry {
index: vec![10],
kind: String::new(),
},
);
let err = validate(raw).unwrap_err();
assert!(matches!(err, A3Error::Validate(_)));
}
#[test]
fn collects_multiple_errors() {
let mut raw = minimal_raw();
raw.annotations.site.insert(
"a".to_string(),
SiteEntry {
index: vec![99],
kind: String::new(),
},
);
raw.annotations.site.insert(
"b".to_string(),
SiteEntry {
index: vec![88],
kind: String::new(),
},
);
match validate(raw) {
Err(A3Error::Validate(errs)) => assert_eq!(errs.len(), 2),
_ => panic!("expected Validate error with 2 messages"),
}
}
}