rtemis_a3/lib.rs
1//! # rtemis-a3
2//!
3//! Rust implementation of the [A3 (Amino Acid Annotation) format](https://a3.rtemis.org).
4//!
5//! A3 is a structured format for annotating amino acid sequences with site,
6//! region, post-translational modification, processing, and variant information.
7//!
8//! ## Quick start
9//!
10//! ```rust
11//! use rtemis_a3::{a3_from_json, a3_to_json};
12//!
13//! let json = r#"{
14//! "$schema": "https://schema.rtemis.org/a3/v1/schema.json",
15//! "a3_version": "1.0.0",
16//! "sequence": "MAEPRQ",
17//! "annotations": { "site": {}, "region": {}, "ptm": {}, "processing": {}, "variant": [] },
18//! "metadata": { "uniprot_id": "", "description": "", "reference": "", "organism": "" }
19//! }"#;
20//!
21//! let a3 = a3_from_json(json).unwrap();
22//! assert_eq!(a3.sequence(), "MAEPRQ");
23//! ```
24//!
25//! ## Module layout
26//!
27//! - [`error`] — `A3Error` enum
28//! - [`types`] — data model structs and enums
29//! - [`normalization`] — pure helpers: sort, deduplicate, overlap check
30//! - [`validation`] — two-stage validation (structural then contextual)
31
32pub mod error;
33pub mod normalization;
34pub mod types;
35pub mod validation;
36
37// Re-export the most commonly used items so users can write
38// `use rtemis_a3::A3` instead of `use rtemis_a3::types::A3`.
39pub use error::A3Error;
40pub use types::{
41 A3, A3_SCHEMA_URI, A3_VERSION, A3Index, Annotations, FlexEntry, Metadata, RegionEntry,
42 SiteEntry, VariantRecord,
43};
44pub use validation::validate;
45
46use serde::Serialize as _;
47
48// ---------------------------------------------------------------------------
49// Public API
50// ---------------------------------------------------------------------------
51
52/// Parse and validate an A3 JSON string.
53///
54/// This function composes two steps:
55/// 1. `serde_json::from_str` — deserialize the JSON into a raw [`A3`] struct.
56/// 2. [`validate()`] — normalize fields and enforce all A3 rules (from [`validation`]).
57///
58/// Both steps return `Result<_, A3Error>`. The `?` operator is Rust's concise
59/// way to propagate errors: if the expression on its left is `Err(e)`, the
60/// function immediately returns `Err(e.into())` (converting the error type if
61/// needed). If it is `Ok(value)`, execution continues with `value`.
62///
63/// Without `?` the first line would be:
64/// ```ignore
65/// let raw: A3 = match serde_json::from_str(text) {
66/// Ok(v) => v,
67/// Err(e) => return Err(A3Error::from(e)),
68/// };
69/// ```
70///
71/// `?` makes chains of fallible operations read almost like non-fallible code.
72pub fn a3_from_json(text: &str) -> Result<A3, A3Error> {
73 // `serde_json::from_str` returns `Result<A3, serde_json::Error>`.
74 // The `?` converts `serde_json::Error` → `A3Error::Parse` automatically
75 // because we wrote `#[from] serde_json::Error` in the error definition.
76 let raw: A3 = serde_json::from_str(text)?;
77
78 // `validate` returns `Result<A3, A3Error>` — same error type, so `?`
79 // needs no conversion here.
80 validate(raw)
81}
82
83/// Serialize a validated [`A3`] to a JSON string.
84///
85/// `indent` controls formatting:
86/// - `None` — compact, no whitespace (good for storage / wire transfer)
87/// - `Some(n)` — pretty-printed with `n` spaces per level (good for display)
88///
89/// Returns `Err(`[`A3Error::Serialize`]`)` if serde_json fails while
90/// serializing. In practice this is unreachable for well-typed A3 values.
91///
92/// `a3` is passed as `&A3` (an immutable reference) because we only need to
93/// read it, not own or modify it. The caller keeps ownership.
94pub fn a3_to_json(a3: &A3, indent: Option<usize>) -> Result<String, A3Error> {
95 match indent {
96 // Compact output — single line, no extra whitespace.
97 // `.map_err(A3Error::Serialize)` is required before `?` because
98 // `#[from]` is only on A3Error::Parse, so serde_json::Error does not
99 // auto-convert into A3Error::Serialize.
100 None => Ok(serde_json::to_string(a3).map_err(A3Error::Serialize)?),
101
102 // Pretty output with a custom indent width.
103 //
104 // `serde_json::to_string_pretty` hard-codes 2 spaces, so we use the
105 // lower-level `Serializer` + `PrettyFormatter` API to get any width.
106 Some(n) => {
107 let indent_str = " ".repeat(n);
108
109 // `PrettyFormatter::with_indent` takes a byte slice (`&[u8]`).
110 // `.as_bytes()` converts `&str` → `&[u8]` (safe for ASCII spaces).
111 let formatter = serde_json::ser::PrettyFormatter::with_indent(indent_str.as_bytes());
112
113 // Collect serialized bytes into a `Vec<u8>` (a growable byte buffer).
114 let mut buf = Vec::new();
115 let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter);
116
117 // `Serialize::serialize` is the trait method — we call it explicitly
118 // because `a3` already has `#[derive(Serialize)]` from types.rs.
119 a3.serialize(&mut ser).map_err(A3Error::Serialize)?;
120
121 // serde_json always produces valid UTF-8, so `unwrap` is safe here.
122 // `expect` is like `unwrap` but with a custom panic message if it
123 // ever fires — useful as documentation of why we believe it is safe.
124 Ok(String::from_utf8(buf).expect("serde_json always produces valid UTF-8"))
125 }
126 }
127}
128
129/// Return the amino acid character at a 1-based `position`.
130///
131/// Returns `None` if `position` is 0 or beyond the sequence length.
132///
133/// `Option<T>` is Rust's null-safe alternative to nullable values — unlike
134/// `null` in other languages, the compiler forces callers to handle both
135/// `Some(value)` and `None` before they can use the result.
136pub fn residue_at(a3: &A3, position: u32) -> Option<char> {
137 if position == 0 || position > a3.sequence.len() as u32 {
138 return None;
139 }
140
141 // Positions are 1-based; byte index is 0-based — subtract 1.
142 // The sequence is validated to be ASCII-only ([A-Z*]), so each character
143 // is exactly one byte. `.as_bytes().get(i)` is O(1), whereas
144 // `.chars().nth(i)` would be O(N) because it walks the UTF-8 string.
145 // Casting `u8 → char` is safe for ASCII values.
146 a3.sequence
147 .as_bytes()
148 .get((position - 1) as usize)
149 .map(|&b| b as char)
150}
151
152/// Return all variant records at a 1-based `position`.
153///
154/// Returns references into `a3`'s data without copying anything.
155/// The compiler infers that the returned `&VariantRecord` references live as
156/// long as the `&A3` reference passed in (lifetime elision — the `'a`
157/// annotations are implicit but enforced).
158pub fn variants_at(a3: &A3, position: u32) -> Vec<&VariantRecord> {
159 a3.annotations
160 .variant
161 .iter()
162 .filter(|v| v.position == position)
163 .collect()
164}
165
166// ---------------------------------------------------------------------------
167// Tests
168// ---------------------------------------------------------------------------
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 // The minimal JSON the spec requires all five families to be present.
175 const MINIMAL_JSON: &str = r#"{
176 "$schema": "https://schema.rtemis.org/a3/v1/schema.json",
177 "a3_version": "1.0.0",
178 "sequence": "MAEPRQ",
179 "annotations": {
180 "site": {},
181 "region": {},
182 "ptm": {},
183 "processing": {},
184 "variant": []
185 },
186 "metadata": {
187 "uniprot_id": "",
188 "description": "",
189 "reference": "",
190 "organism": ""
191 }
192 }"#;
193
194 #[test]
195 fn round_trip() {
196 // Parse → serialize → parse again; both A3 values must be identical.
197 let a3 = a3_from_json(MINIMAL_JSON).unwrap();
198 let json = a3_to_json(&a3, None).unwrap();
199 let a3_again = a3_from_json(&json).unwrap();
200 // `#[derive(Debug)]` is needed for `assert_eq!` to display the values
201 // on failure. We compare the re-serialized form since field order may
202 // differ — if both round-trip identically they are semantically equal.
203 assert_eq!(
204 a3_to_json(&a3, None).unwrap(),
205 a3_to_json(&a3_again, None).unwrap()
206 );
207 }
208
209 #[test]
210 fn residue_at_valid_position() {
211 let a3 = a3_from_json(MINIMAL_JSON).unwrap();
212 // "MAEPRQ" — position 1 is 'M', position 6 is 'Q'.
213 assert_eq!(residue_at(&a3, 1), Some('M'));
214 assert_eq!(residue_at(&a3, 6), Some('Q'));
215 }
216
217 #[test]
218 fn residue_at_out_of_bounds() {
219 let a3 = a3_from_json(MINIMAL_JSON).unwrap();
220 assert_eq!(residue_at(&a3, 0), None);
221 assert_eq!(residue_at(&a3, 99), None);
222 }
223
224 #[test]
225 fn rejects_missing_schema() {
226 let json = r#"{"a3_version":"1.0.0","sequence":"MAEPRQ","annotations":{"site":{},"region":{},"ptm":{},"processing":{},"variant":[]},"metadata":{}}"#;
227 assert!(a3_from_json(json).is_err());
228 }
229
230 #[test]
231 fn rejects_wrong_schema_uri() {
232 let json = r#"{"$schema":"https://example.com/wrong","a3_version":"1.0.0","sequence":"MAEPRQ","annotations":{"site":{},"region":{},"ptm":{},"processing":{},"variant":[]},"metadata":{}}"#;
233 assert!(a3_from_json(json).is_err());
234 }
235
236 #[test]
237 fn rejects_missing_version() {
238 let json = r#"{"$schema":"https://schema.rtemis.org/a3/v1/schema.json","sequence":"MAEPRQ","annotations":{"site":{},"region":{},"ptm":{},"processing":{},"variant":[]},"metadata":{}}"#;
239 assert!(a3_from_json(json).is_err());
240 }
241
242 #[test]
243 fn rejects_unknown_top_level_key() {
244 let json = r#"{"$schema":"https://schema.rtemis.org/a3/v1/schema.json","a3_version":"1.0.0","sequence":"MAEPRQ","foo":"bar"}"#;
245 assert!(a3_from_json(json).is_err());
246 }
247
248 #[test]
249 fn rejects_unknown_metadata_key() {
250 let json = r#"{"$schema":"https://schema.rtemis.org/a3/v1/schema.json","a3_version":"1.0.0","sequence":"MAEPRQ","metadata":{"gene":"MAPT"}}"#;
251 assert!(a3_from_json(json).is_err());
252 }
253
254 #[test]
255 fn pretty_print_contains_newlines() {
256 let a3 = a3_from_json(MINIMAL_JSON).unwrap();
257 let pretty = a3_to_json(&a3, Some(2)).unwrap();
258 assert!(pretty.contains('\n'));
259 }
260}