Skip to main content

rtemis_a3/
types.rs

1//! Data model for the A3 format.
2//!
3//! All struct fields are `pub(crate)` — visible within this crate for
4//! construction and validation, but invisible to external callers. Public
5//! getter methods on each type provide read-only access. This enforces the
6//! invariant that every `A3` value has passed through [`crate::validate()`]
7//! (defined in [`crate::validation`]).
8//!
9//! The hierarchy mirrors the JSON wire format exactly:
10//!
11//! ```text
12//! A3
13//!  ├── sequence:    String
14//!  ├── annotations: Annotations
15//!  │    ├── site:       HashMap<String, SiteEntry>
16//!  │    ├── region:     HashMap<String, RegionEntry>
17//!  │    ├── ptm:        HashMap<String, FlexEntry>
18//!  │    ├── processing: HashMap<String, FlexEntry>
19//!  │    └── variant:    Vec<VariantRecord>
20//!  └── metadata:    Metadata
21//! ```
22
23use std::collections::HashMap;
24
25use serde::{Deserialize, Serialize};
26
27// ---------------------------------------------------------------------------
28// Site — positions only
29// ---------------------------------------------------------------------------
30
31/// A named annotation marking individual residue positions.
32///
33/// Fields are `pub(crate)` so only code within this crate can construct or
34/// mutate a `SiteEntry`. External callers use the getter methods.
35///
36/// `#[derive(Serialize, Deserialize)]` generates JSON read/write code even for
37/// `pub(crate)` fields — the derive macros run inside the crate and have full
38/// field access.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct SiteEntry {
41    /// 1-based residue positions, sorted ascending, no duplicates.
42    pub(crate) index: Vec<u32>,
43
44    /// Optional label (e.g. `"activeSite"`). Defaults to `""` when absent.
45    ///
46    /// `#[serde(rename = "type")]` maps this field to the JSON key `"type"`.
47    /// We cannot name the Rust field `type` because that is a reserved keyword.
48    /// `#[serde(default)]` fills in `""` when the key is absent from JSON.
49    #[serde(rename = "type", default)]
50    pub(crate) kind: String,
51}
52
53impl SiteEntry {
54    /// 1-based residue positions, sorted ascending, no duplicates.
55    pub fn index(&self) -> &[u32] {
56        &self.index
57    }
58
59    /// Annotation type label. Empty string when unset.
60    pub fn kind(&self) -> &str {
61        &self.kind
62    }
63}
64
65// ---------------------------------------------------------------------------
66// Region — ranges only
67// ---------------------------------------------------------------------------
68
69/// A named annotation marking contiguous sequence spans.
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct RegionEntry {
72    /// Inclusive `[start, end]` range pairs, sorted by start position.
73    /// Each pair satisfies `start < end`; ranges do not overlap.
74    pub(crate) index: Vec<[u32; 2]>,
75
76    #[serde(rename = "type", default)]
77    pub(crate) kind: String,
78}
79
80impl RegionEntry {
81    /// Inclusive `[start, end]` range pairs, sorted by start, non-overlapping.
82    pub fn index(&self) -> &[[u32; 2]] {
83        &self.index
84    }
85
86    /// Annotation type label. Empty string when unset.
87    pub fn kind(&self) -> &str {
88        &self.kind
89    }
90}
91
92// ---------------------------------------------------------------------------
93// A3Index — positions OR ranges (used by PTM and Processing)
94// ---------------------------------------------------------------------------
95
96/// The index for PTM and Processing entries can be either positions or ranges,
97/// but never a mix of both within a single entry.
98///
99/// `enum` in Rust is a *sum type* — a value is exactly one of the listed
100/// variants. This is the idiomatic way to represent "either A or B".
101///
102/// `#[serde(untagged)]` tells serde to detect the variant by trying each one
103/// in declaration order, without requiring a discriminator field in the JSON.
104/// Serde tries `Ranges` first; if that fails it falls back to `Positions`.
105/// This works because `Vec<[u32; 2]>` (array of 2-element arrays) and
106/// `Vec<u32>` (array of integers) are structurally unambiguous in JSON.
107///
108/// Named `A3Index` to match the cross-language naming convention used across
109/// the rtemis-a3 implementations (R, Python, TypeScript, Rust).
110#[derive(Debug, Clone, Serialize, Deserialize)]
111#[serde(untagged)]
112pub enum A3Index {
113    /// Contiguous span pairs — the inner array always has exactly two elements.
114    Ranges(Vec<[u32; 2]>),
115    /// Individual residue positions.
116    Positions(Vec<u32>),
117}
118
119impl A3Index {
120    /// Returns the positions slice if this is a `Positions` variant, else `None`.
121    pub fn as_positions(&self) -> Option<&[u32]> {
122        match self {
123            A3Index::Positions(p) => Some(p),
124            A3Index::Ranges(_) => None,
125        }
126    }
127
128    /// Returns the ranges slice if this is a `Ranges` variant, else `None`.
129    pub fn as_ranges(&self) -> Option<&[[u32; 2]]> {
130        match self {
131            A3Index::Ranges(r) => Some(r),
132            A3Index::Positions(_) => None,
133        }
134    }
135}
136
137/// A named PTM or Processing annotation with a flexible index type.
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct FlexEntry {
140    pub(crate) index: A3Index,
141
142    #[serde(rename = "type", default)]
143    pub(crate) kind: String,
144}
145
146impl FlexEntry {
147    /// The index — either positions or ranges.
148    pub fn index(&self) -> &A3Index {
149        &self.index
150    }
151
152    /// Annotation type label. Empty string when unset.
153    pub fn kind(&self) -> &str {
154        &self.kind
155    }
156}
157
158// ---------------------------------------------------------------------------
159// Variant
160// ---------------------------------------------------------------------------
161
162/// A single sequence variant record.
163///
164/// The spec requires a `position` field and permits any additional
165/// JSON-compatible fields, captured by `extra`.
166///
167/// `#[serde(flatten)]` on a `HashMap` field instructs serde to absorb
168/// all JSON keys that are not explicitly named fields in this struct.
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct VariantRecord {
171    /// Required. 1-based position of the variant on the sequence.
172    pub(crate) position: u32,
173
174    /// All other fields from the variant record, preserved as-is.
175    /// `serde_json::Value` can represent any valid JSON value.
176    #[serde(flatten)]
177    pub(crate) extra: HashMap<String, serde_json::Value>,
178}
179
180impl VariantRecord {
181    /// 1-based position of the variant on the sequence.
182    pub fn position(&self) -> u32 {
183        self.position
184    }
185
186    /// All extra fields from the variant record beyond `position`.
187    pub fn extra(&self) -> &HashMap<String, serde_json::Value> {
188        &self.extra
189    }
190}
191
192// ---------------------------------------------------------------------------
193// Annotations
194// ---------------------------------------------------------------------------
195
196/// Container for all five annotation families.
197///
198/// `#[serde(default)]`              — fills all fields with empty collections
199///                                    when `annotations` is absent from JSON.
200/// `#[serde(deny_unknown_fields)]`  — any key other than the five families
201///                                    is a hard error.
202/// `#[derive(Default)]`             — generates empty-collection defaults,
203///                                    required by `#[serde(default)]`.
204#[derive(Debug, Clone, Default, Serialize, Deserialize)]
205#[serde(default, deny_unknown_fields)]
206pub struct Annotations {
207    pub(crate) site: HashMap<String, SiteEntry>,
208    pub(crate) region: HashMap<String, RegionEntry>,
209    pub(crate) ptm: HashMap<String, FlexEntry>,
210    pub(crate) processing: HashMap<String, FlexEntry>,
211    pub(crate) variant: Vec<VariantRecord>,
212}
213
214impl Annotations {
215    /// Named site annotations (individual residue positions).
216    pub fn site(&self) -> &HashMap<String, SiteEntry> {
217        &self.site
218    }
219
220    /// Named region annotations (contiguous spans).
221    pub fn region(&self) -> &HashMap<String, RegionEntry> {
222        &self.region
223    }
224
225    /// Named PTM annotations (positions or ranges).
226    pub fn ptm(&self) -> &HashMap<String, FlexEntry> {
227        &self.ptm
228    }
229
230    /// Named processing annotations (positions or ranges).
231    pub fn processing(&self) -> &HashMap<String, FlexEntry> {
232        &self.processing
233    }
234
235    /// Ordered list of variant records.
236    pub fn variant(&self) -> &[VariantRecord] {
237        &self.variant
238    }
239}
240
241// ---------------------------------------------------------------------------
242// Metadata
243// ---------------------------------------------------------------------------
244
245/// Descriptive metadata attached to the sequence.
246///
247/// All four fields default to `""`. Unknown keys are rejected by
248/// `deny_unknown_fields`.
249#[derive(Debug, Clone, Default, Serialize, Deserialize)]
250#[serde(default, deny_unknown_fields)]
251pub struct Metadata {
252    pub(crate) uniprot_id: String,
253    pub(crate) description: String,
254    pub(crate) reference: String,
255    pub(crate) organism: String,
256}
257
258impl Metadata {
259    /// UniProt accession (e.g. `"P10636"`). Empty string when unset.
260    pub fn uniprot_id(&self) -> &str {
261        &self.uniprot_id
262    }
263
264    /// Human-readable protein description. Empty string when unset.
265    pub fn description(&self) -> &str {
266        &self.description
267    }
268
269    /// Citation or URL. Empty string when unset.
270    pub fn reference(&self) -> &str {
271        &self.reference
272    }
273
274    /// Species name. Empty string when unset.
275    pub fn organism(&self) -> &str {
276        &self.organism
277    }
278}
279
280// ---------------------------------------------------------------------------
281// A3 — root type
282// ---------------------------------------------------------------------------
283
284/// Expected value for the `$schema` envelope field.
285pub const A3_SCHEMA_URI: &str = "https://schema.rtemis.org/a3/v1/schema.json";
286/// Expected value for the `a3_version` envelope field.
287pub const A3_VERSION: &str = "1.0.0";
288
289/// The root A3 object.
290///
291/// Fields are `pub(crate)` — only [`crate::validate()`] (in [`crate::validation`]) may construct an `A3`,
292/// guaranteeing that every value returned to external callers has passed full
293/// two-stage validation. Public getter methods provide read-only access.
294#[derive(Debug, Clone, Serialize, Deserialize)]
295#[serde(deny_unknown_fields)]
296pub struct A3 {
297    /// JSON Schema URI — must equal [`A3_SCHEMA_URI`]. Required on input.
298    #[serde(rename = "$schema")]
299    pub(crate) schema: String,
300
301    /// A3 spec version — must equal [`A3_VERSION`]. Required on input.
302    pub(crate) a3_version: String,
303
304    /// The amino acid sequence. Non-empty, ≥ 2 characters, `[A-Z*]` only.
305    /// Lowercase input is normalized to uppercase during validation.
306    pub(crate) sequence: String,
307
308    /// All annotation families. Required; use an empty object `{}` if none.
309    pub(crate) annotations: Annotations,
310
311    /// Sequence metadata. Required; use an empty object `{}` if none.
312    pub(crate) metadata: Metadata,
313}
314
315impl A3 {
316    /// JSON Schema URI.
317    pub fn schema(&self) -> &str {
318        &self.schema
319    }
320
321    /// A3 spec version string.
322    pub fn a3_version(&self) -> &str {
323        &self.a3_version
324    }
325
326    /// The amino acid sequence, normalized to uppercase.
327    pub fn sequence(&self) -> &str {
328        &self.sequence
329    }
330
331    /// All five annotation families.
332    pub fn annotations(&self) -> &Annotations {
333        &self.annotations
334    }
335
336    /// Sequence metadata.
337    pub fn metadata(&self) -> &Metadata {
338        &self.metadata
339    }
340}