rtemis_a3/types.rs
1//! Data model for the A3 format.
2//!
3//! All struct fields are `pub(crate)` — visible within this crate for
4//! construction and validation, but invisible to external callers. Public
5//! getter methods on each type provide read-only access. This enforces the
6//! invariant that every `A3` value has passed through [`crate::validate()`]
7//! (defined in [`crate::validation`]).
8//!
9//! The hierarchy mirrors the JSON wire format exactly:
10//!
11//! ```text
12//! A3
13//! ├── sequence: String
14//! ├── annotations: Annotations
15//! │ ├── site: HashMap<String, SiteEntry>
16//! │ ├── region: HashMap<String, RegionEntry>
17//! │ ├── ptm: HashMap<String, FlexEntry>
18//! │ ├── processing: HashMap<String, FlexEntry>
19//! │ └── variant: Vec<VariantRecord>
20//! └── metadata: Metadata
21//! ```
22
23use std::collections::HashMap;
24
25use serde::{Deserialize, Serialize};
26
27// ---------------------------------------------------------------------------
28// Site — positions only
29// ---------------------------------------------------------------------------
30
31/// A named annotation marking individual residue positions.
32///
33/// Fields are `pub(crate)` so only code within this crate can construct or
34/// mutate a `SiteEntry`. External callers use the getter methods.
35///
36/// `#[derive(Serialize, Deserialize)]` generates JSON read/write code even for
37/// `pub(crate)` fields — the derive macros run inside the crate and have full
38/// field access.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct SiteEntry {
41 /// 1-based residue positions, sorted ascending, no duplicates.
42 pub(crate) index: Vec<u32>,
43
44 /// Optional label (e.g. `"activeSite"`). Defaults to `""` when absent.
45 ///
46 /// `#[serde(rename = "type")]` maps this field to the JSON key `"type"`.
47 /// We cannot name the Rust field `type` because that is a reserved keyword.
48 /// `#[serde(default)]` fills in `""` when the key is absent from JSON.
49 #[serde(rename = "type", default)]
50 pub(crate) kind: String,
51}
52
53impl SiteEntry {
54 /// 1-based residue positions, sorted ascending, no duplicates.
55 pub fn index(&self) -> &[u32] {
56 &self.index
57 }
58
59 /// Annotation type label. Empty string when unset.
60 pub fn kind(&self) -> &str {
61 &self.kind
62 }
63}
64
65// ---------------------------------------------------------------------------
66// Region — ranges only
67// ---------------------------------------------------------------------------
68
69/// A named annotation marking contiguous sequence spans.
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct RegionEntry {
72 /// Inclusive `[start, end]` range pairs, sorted by start position.
73 /// Each pair satisfies `start < end`; ranges do not overlap.
74 pub(crate) index: Vec<[u32; 2]>,
75
76 #[serde(rename = "type", default)]
77 pub(crate) kind: String,
78}
79
80impl RegionEntry {
81 /// Inclusive `[start, end]` range pairs, sorted by start, non-overlapping.
82 pub fn index(&self) -> &[[u32; 2]] {
83 &self.index
84 }
85
86 /// Annotation type label. Empty string when unset.
87 pub fn kind(&self) -> &str {
88 &self.kind
89 }
90}
91
92// ---------------------------------------------------------------------------
93// A3Index — positions OR ranges (used by PTM and Processing)
94// ---------------------------------------------------------------------------
95
96/// The index for PTM and Processing entries can be either positions or ranges,
97/// but never a mix of both within a single entry.
98///
99/// `enum` in Rust is a *sum type* — a value is exactly one of the listed
100/// variants. This is the idiomatic way to represent "either A or B".
101///
102/// `#[serde(untagged)]` tells serde to detect the variant by trying each one
103/// in declaration order, without requiring a discriminator field in the JSON.
104/// Serde tries `Ranges` first; if that fails it falls back to `Positions`.
105/// This works because `Vec<[u32; 2]>` (array of 2-element arrays) and
106/// `Vec<u32>` (array of integers) are structurally unambiguous in JSON.
107///
108/// Named `A3Index` to match the cross-language naming convention used across
109/// the rtemis-a3 implementations (R, Python, TypeScript, Rust).
110#[derive(Debug, Clone, Serialize, Deserialize)]
111#[serde(untagged)]
112pub enum A3Index {
113 /// Contiguous span pairs — the inner array always has exactly two elements.
114 Ranges(Vec<[u32; 2]>),
115 /// Individual residue positions.
116 Positions(Vec<u32>),
117}
118
119impl A3Index {
120 /// Returns the positions slice if this is a `Positions` variant, else `None`.
121 pub fn as_positions(&self) -> Option<&[u32]> {
122 match self {
123 A3Index::Positions(p) => Some(p),
124 A3Index::Ranges(_) => None,
125 }
126 }
127
128 /// Returns the ranges slice if this is a `Ranges` variant, else `None`.
129 pub fn as_ranges(&self) -> Option<&[[u32; 2]]> {
130 match self {
131 A3Index::Ranges(r) => Some(r),
132 A3Index::Positions(_) => None,
133 }
134 }
135}
136
137/// A named PTM or Processing annotation with a flexible index type.
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct FlexEntry {
140 pub(crate) index: A3Index,
141
142 #[serde(rename = "type", default)]
143 pub(crate) kind: String,
144}
145
146impl FlexEntry {
147 /// The index — either positions or ranges.
148 pub fn index(&self) -> &A3Index {
149 &self.index
150 }
151
152 /// Annotation type label. Empty string when unset.
153 pub fn kind(&self) -> &str {
154 &self.kind
155 }
156}
157
158// ---------------------------------------------------------------------------
159// Variant
160// ---------------------------------------------------------------------------
161
162/// A single sequence variant record.
163///
164/// The spec requires a `position` field and permits any additional
165/// JSON-compatible fields, captured by `extra`.
166///
167/// `#[serde(flatten)]` on a `HashMap` field instructs serde to absorb
168/// all JSON keys that are not explicitly named fields in this struct.
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct VariantRecord {
171 /// Required. 1-based position of the variant on the sequence.
172 pub(crate) position: u32,
173
174 /// All other fields from the variant record, preserved as-is.
175 /// `serde_json::Value` can represent any valid JSON value.
176 #[serde(flatten)]
177 pub(crate) extra: HashMap<String, serde_json::Value>,
178}
179
180impl VariantRecord {
181 /// 1-based position of the variant on the sequence.
182 pub fn position(&self) -> u32 {
183 self.position
184 }
185
186 /// All extra fields from the variant record beyond `position`.
187 pub fn extra(&self) -> &HashMap<String, serde_json::Value> {
188 &self.extra
189 }
190}
191
192// ---------------------------------------------------------------------------
193// Annotations
194// ---------------------------------------------------------------------------
195
196/// Container for all five annotation families.
197///
198/// `#[serde(default)]` — fills all fields with empty collections
199/// when `annotations` is absent from JSON.
200/// `#[serde(deny_unknown_fields)]` — any key other than the five families
201/// is a hard error.
202/// `#[derive(Default)]` — generates empty-collection defaults,
203/// required by `#[serde(default)]`.
204#[derive(Debug, Clone, Default, Serialize, Deserialize)]
205#[serde(default, deny_unknown_fields)]
206pub struct Annotations {
207 pub(crate) site: HashMap<String, SiteEntry>,
208 pub(crate) region: HashMap<String, RegionEntry>,
209 pub(crate) ptm: HashMap<String, FlexEntry>,
210 pub(crate) processing: HashMap<String, FlexEntry>,
211 pub(crate) variant: Vec<VariantRecord>,
212}
213
214impl Annotations {
215 /// Named site annotations (individual residue positions).
216 pub fn site(&self) -> &HashMap<String, SiteEntry> {
217 &self.site
218 }
219
220 /// Named region annotations (contiguous spans).
221 pub fn region(&self) -> &HashMap<String, RegionEntry> {
222 &self.region
223 }
224
225 /// Named PTM annotations (positions or ranges).
226 pub fn ptm(&self) -> &HashMap<String, FlexEntry> {
227 &self.ptm
228 }
229
230 /// Named processing annotations (positions or ranges).
231 pub fn processing(&self) -> &HashMap<String, FlexEntry> {
232 &self.processing
233 }
234
235 /// Ordered list of variant records.
236 pub fn variant(&self) -> &[VariantRecord] {
237 &self.variant
238 }
239}
240
241// ---------------------------------------------------------------------------
242// Metadata
243// ---------------------------------------------------------------------------
244
245/// Descriptive metadata attached to the sequence.
246///
247/// All four fields default to `""`. Unknown keys are rejected by
248/// `deny_unknown_fields`.
249#[derive(Debug, Clone, Default, Serialize, Deserialize)]
250#[serde(default, deny_unknown_fields)]
251pub struct Metadata {
252 pub(crate) uniprot_id: String,
253 pub(crate) description: String,
254 pub(crate) reference: String,
255 pub(crate) organism: String,
256}
257
258impl Metadata {
259 /// UniProt accession (e.g. `"P10636"`). Empty string when unset.
260 pub fn uniprot_id(&self) -> &str {
261 &self.uniprot_id
262 }
263
264 /// Human-readable protein description. Empty string when unset.
265 pub fn description(&self) -> &str {
266 &self.description
267 }
268
269 /// Citation or URL. Empty string when unset.
270 pub fn reference(&self) -> &str {
271 &self.reference
272 }
273
274 /// Species name. Empty string when unset.
275 pub fn organism(&self) -> &str {
276 &self.organism
277 }
278}
279
280// ---------------------------------------------------------------------------
281// A3 — root type
282// ---------------------------------------------------------------------------
283
284/// Expected value for the `$schema` envelope field.
285pub const A3_SCHEMA_URI: &str = "https://schema.rtemis.org/a3/v1/schema.json";
286/// Expected value for the `a3_version` envelope field.
287pub const A3_VERSION: &str = "1.0.0";
288
289/// The root A3 object.
290///
291/// Fields are `pub(crate)` — only [`crate::validate()`] (in [`crate::validation`]) may construct an `A3`,
292/// guaranteeing that every value returned to external callers has passed full
293/// two-stage validation. Public getter methods provide read-only access.
294#[derive(Debug, Clone, Serialize, Deserialize)]
295#[serde(deny_unknown_fields)]
296pub struct A3 {
297 /// JSON Schema URI — must equal [`A3_SCHEMA_URI`]. Required on input.
298 #[serde(rename = "$schema")]
299 pub(crate) schema: String,
300
301 /// A3 spec version — must equal [`A3_VERSION`]. Required on input.
302 pub(crate) a3_version: String,
303
304 /// The amino acid sequence. Non-empty, ≥ 2 characters, `[A-Z*]` only.
305 /// Lowercase input is normalized to uppercase during validation.
306 pub(crate) sequence: String,
307
308 /// All annotation families. Required; use an empty object `{}` if none.
309 pub(crate) annotations: Annotations,
310
311 /// Sequence metadata. Required; use an empty object `{}` if none.
312 pub(crate) metadata: Metadata,
313}
314
315impl A3 {
316 /// JSON Schema URI.
317 pub fn schema(&self) -> &str {
318 &self.schema
319 }
320
321 /// A3 spec version string.
322 pub fn a3_version(&self) -> &str {
323 &self.a3_version
324 }
325
326 /// The amino acid sequence, normalized to uppercase.
327 pub fn sequence(&self) -> &str {
328 &self.sequence
329 }
330
331 /// All five annotation families.
332 pub fn annotations(&self) -> &Annotations {
333 &self.annotations
334 }
335
336 /// Sequence metadata.
337 pub fn metadata(&self) -> &Metadata {
338 &self.metadata
339 }
340}