Skip to main content

cyrs_schema/
file.rs

1//! TOML schema-file loader and round-trip serialiser (spec 0002).
2//!
3//! Gated by the `file` feature so the default build does not pull in
4//! `toml` or `thiserror`. The public API is three functions:
5//!
6//! - [`load_from_toml_str`] — parse a TOML string.
7//! - [`load_from_toml_path`] — read a file and parse its contents.
8//! - [`serialise_to_toml`] — render an [`InMemorySchema`] back to TOML
9//!   for the round-trip property in spec 0002 §10.
10//!
11//! Intermediate serde-shaped structs ([`SchemaFile`], [`MetaBlock`],
12//! [`LabelEntry`], [`RelTypeEntry`], [`ParameterEntry`],
13//! [`PropertyEntry`]) convert to/from [`InMemorySchema`]. They are
14//! public so callers who want to introspect or massage the raw file
15//! shape before conversion can do so.
16
17use std::collections::BTreeSet;
18use std::path::{Path, PathBuf};
19
20use serde::{Deserialize, Serialize};
21use smol_str::SmolStr;
22use thiserror::Error;
23
24use crate::{
25    InMemorySchema, ParamDecl, PropertyDecl, PropertyType, RelDecl,
26    in_memory::{BuilderError, InMemorySchemaBuilder},
27};
28
29/// Format version accepted by this loader. See spec 0002 §8.
30pub const SCHEMA_FILE_VERSION: &str = "0.1.0";
31
32// ============================================================
33// Public API
34// ============================================================
35
36/// Parse a `schema.toml` string into an [`InMemorySchema`].
37pub fn load_from_toml_str(input: &str) -> Result<InMemorySchema, SchemaLoadError> {
38    let file: SchemaFile = toml::from_str(input)?;
39    file.into_schema()
40}
41
42/// Read the file at `path` and parse it as a `schema.toml`.
43pub fn load_from_toml_path(path: &Path) -> Result<InMemorySchema, SchemaLoadError> {
44    let input = std::fs::read_to_string(path).map_err(|source| SchemaLoadError::Io {
45        path: path.to_path_buf(),
46        source,
47    })?;
48    load_from_toml_str(&input)
49}
50
51/// Render an [`InMemorySchema`] back to a TOML string.
52///
53/// The output satisfies the spec 0002 §10 round-trip property: feeding
54/// it back through [`load_from_toml_str`] yields a schema semantically
55/// equal to the input (collection ordering uses `BTreeMap` internally
56/// so the result is deterministic).
57#[must_use]
58pub fn serialise_to_toml(schema: &InMemorySchema) -> String {
59    let file = SchemaFile::from_schema(schema);
60    toml::to_string_pretty(&file).expect("SchemaFile serialises infallibly")
61}
62
63// ============================================================
64// Error taxonomy
65// ============================================================
66
67/// Errors surfaced by the TOML schema loader (spec 0002 §11).
68#[derive(Debug, Error)]
69pub enum SchemaLoadError {
70    /// The input is not well-formed TOML, or does not match the
71    /// expected shape.
72    #[error("malformed schema TOML: {0}")]
73    TomlParse(#[from] toml::de::Error),
74    /// Reading the file failed.
75    #[error("reading schema from {path}: {source}")]
76    Io {
77        /// Path we tried to read.
78        path: PathBuf,
79        /// Underlying I/O error.
80        #[source]
81        source: std::io::Error,
82    },
83    /// A relationship type references an undeclared label.
84    #[error("rel type endpoint references unknown label `{0}`")]
85    UnknownLabelRef(SmolStr),
86    /// A label name is declared twice.
87    #[error("duplicate label `{0}`")]
88    DuplicateLabel(SmolStr),
89    /// A rel type name is declared twice.
90    #[error("duplicate rel type `{0}`")]
91    DuplicateRelType(SmolStr),
92    /// A parameter name is declared twice.
93    #[error("duplicate parameter `{0}`")]
94    DuplicateParameter(SmolStr),
95    /// A `type` string lies outside the grammar in spec 0002 §4, or
96    /// `[meta].cyrs_schema_version` does not match the supported version.
97    #[error("bad type string: {0}")]
98    BadType(String),
99}
100
101impl From<BuilderError> for SchemaLoadError {
102    fn from(err: BuilderError) -> Self {
103        match err {
104            BuilderError::DuplicateLabel(n) => Self::DuplicateLabel(n),
105            BuilderError::DuplicateRelType(n) => Self::DuplicateRelType(n),
106            BuilderError::DuplicateParameter(n) => Self::DuplicateParameter(n),
107        }
108    }
109}
110
111// ============================================================
112// serde-shaped intermediate structs
113// ============================================================
114
115/// Top-level shape of a `schema.toml` file (spec 0002 §3).
116#[derive(Debug, Clone, Default, Serialize, Deserialize)]
117#[serde(deny_unknown_fields)]
118pub struct SchemaFile {
119    /// Optional `[meta]` block.
120    #[serde(default, skip_serializing_if = "Option::is_none")]
121    pub meta: Option<MetaBlock>,
122    /// `[[label]]` array of tables.
123    #[serde(default, rename = "label", skip_serializing_if = "Vec::is_empty")]
124    pub labels: Vec<LabelEntry>,
125    /// `[[rel_type]]` array of tables.
126    #[serde(default, rename = "rel_type", skip_serializing_if = "Vec::is_empty")]
127    pub rel_types: Vec<RelTypeEntry>,
128    /// `[[parameter]]` array of tables.
129    #[serde(default, rename = "parameter", skip_serializing_if = "Vec::is_empty")]
130    pub parameters: Vec<ParameterEntry>,
131}
132
133/// `[meta]` block contents (spec 0002 §8).
134#[derive(Debug, Clone, Serialize, Deserialize)]
135#[serde(deny_unknown_fields)]
136pub struct MetaBlock {
137    /// Format version; must equal [`SCHEMA_FILE_VERSION`].
138    pub cyrs_schema_version: String,
139    /// Optional human-friendly name.
140    #[serde(default, skip_serializing_if = "Option::is_none")]
141    pub schema_name: Option<String>,
142    /// Optional description.
143    #[serde(default, skip_serializing_if = "Option::is_none")]
144    pub description: Option<String>,
145}
146
147/// `[[label]]` entry (spec 0002 §5).
148#[derive(Debug, Clone, Serialize, Deserialize)]
149#[serde(deny_unknown_fields)]
150pub struct LabelEntry {
151    /// Label name.
152    pub name: String,
153    /// Declared properties.
154    #[serde(default, skip_serializing_if = "Vec::is_empty")]
155    pub properties: Vec<PropertyEntry>,
156}
157
158/// `[[rel_type]]` entry (spec 0002 §6).
159#[derive(Debug, Clone, Serialize, Deserialize)]
160#[serde(deny_unknown_fields)]
161pub struct RelTypeEntry {
162    /// Rel type name.
163    pub name: String,
164    /// Allowed start-endpoint labels. Empty = polymorphic.
165    #[serde(default)]
166    pub start_labels: Vec<String>,
167    /// Allowed end-endpoint labels. Empty = polymorphic.
168    #[serde(default)]
169    pub end_labels: Vec<String>,
170    /// Properties declared on the relationship.
171    #[serde(default, skip_serializing_if = "Vec::is_empty")]
172    pub properties: Vec<PropertyEntry>,
173}
174
175/// `[[parameter]]` entry (spec 0002 §7).
176#[derive(Debug, Clone, Serialize, Deserialize)]
177#[serde(deny_unknown_fields)]
178pub struct ParameterEntry {
179    /// Parameter name, without the leading `$`.
180    pub name: String,
181    /// Declared type; must satisfy the §4 grammar.
182    #[serde(rename = "type")]
183    pub ty: String,
184    /// Optional scalar default (string / int / float / bool).
185    #[serde(default, skip_serializing_if = "Option::is_none")]
186    pub default: Option<toml::Value>,
187}
188
189/// Property entry used inside label and rel type declarations.
190#[derive(Debug, Clone, Serialize, Deserialize)]
191#[serde(deny_unknown_fields)]
192pub struct PropertyEntry {
193    /// Property name.
194    pub name: String,
195    /// Declared type; must satisfy the §4 grammar.
196    #[serde(rename = "type")]
197    pub ty: String,
198    /// `true` when the property is required on every instance.
199    #[serde(default, skip_serializing_if = "is_false")]
200    pub required: bool,
201}
202
203#[inline]
204#[allow(clippy::trivially_copy_pass_by_ref)] // serde requires `&T`.
205fn is_false(b: &bool) -> bool {
206    !*b
207}
208
209// ============================================================
210// Conversion
211// ============================================================
212
213impl SchemaFile {
214    /// Convert into an [`InMemorySchema`], validating invariants along
215    /// the way.
216    pub fn into_schema(self) -> Result<InMemorySchema, SchemaLoadError> {
217        if let Some(meta) = &self.meta
218            && meta.cyrs_schema_version != SCHEMA_FILE_VERSION
219        {
220            return Err(SchemaLoadError::BadType(format!(
221                "unsupported cyrs_schema_version `{}`; this loader speaks `{SCHEMA_FILE_VERSION}`",
222                meta.cyrs_schema_version
223            )));
224        }
225
226        let mut builder = InMemorySchemaBuilder::default();
227
228        // Track declared label names for rel-type endpoint validation.
229        let mut declared: BTreeSet<SmolStr> = BTreeSet::new();
230        for lbl in &self.labels {
231            let key = SmolStr::new(&lbl.name);
232            if !declared.insert(key.clone()) {
233                return Err(SchemaLoadError::DuplicateLabel(key));
234            }
235        }
236
237        for lbl in self.labels {
238            let name = SmolStr::new(&lbl.name);
239            let mut props = Vec::with_capacity(lbl.properties.len());
240            for p in lbl.properties {
241                props.push(p.into_decl()?);
242            }
243            builder = builder.add_label(name, props);
244        }
245
246        let mut rel_names: BTreeSet<SmolStr> = BTreeSet::new();
247        for rel in &self.rel_types {
248            let key = SmolStr::new(&rel.name);
249            if !rel_names.insert(key.clone()) {
250                return Err(SchemaLoadError::DuplicateRelType(key));
251            }
252            for endpoint in rel.start_labels.iter().chain(rel.end_labels.iter()) {
253                let ep = SmolStr::new(endpoint);
254                if !declared.contains(&ep) {
255                    return Err(SchemaLoadError::UnknownLabelRef(ep));
256                }
257            }
258        }
259
260        for rel in self.rel_types {
261            let mut props = Vec::with_capacity(rel.properties.len());
262            for p in rel.properties {
263                props.push(p.into_decl()?);
264            }
265            builder = builder.add_rel_type(RelDecl {
266                name: SmolStr::new(&rel.name),
267                start_labels: rel.start_labels.into_iter().map(SmolStr::from).collect(),
268                end_labels: rel.end_labels.into_iter().map(SmolStr::from).collect(),
269                properties: props,
270            });
271        }
272
273        for p in self.parameters {
274            builder = builder.add_parameter(p.into_decl()?);
275        }
276
277        if let Some(meta) = self.meta {
278            builder = builder
279                .schema_name(meta.schema_name.map(SmolStr::from))
280                .description(meta.description);
281        }
282
283        builder.build().map_err(SchemaLoadError::from)
284    }
285
286    /// Build a serialisable view of a schema (the inverse of
287    /// [`SchemaFile::into_schema`]).
288    #[must_use]
289    pub fn from_schema(schema: &InMemorySchema) -> Self {
290        let meta = Some(MetaBlock {
291            cyrs_schema_version: SCHEMA_FILE_VERSION.to_owned(),
292            schema_name: schema.schema_name.as_ref().map(ToString::to_string),
293            description: schema.description.clone(),
294        });
295
296        let labels = schema
297            .labels
298            .iter()
299            .map(|(name, props)| LabelEntry {
300                name: name.to_string(),
301                properties: props.iter().map(PropertyEntry::from_decl).collect(),
302            })
303            .collect();
304
305        let rel_types = schema
306            .rel_types
307            .values()
308            .map(|r| RelTypeEntry {
309                name: r.name.to_string(),
310                start_labels: r.start_labels.iter().map(ToString::to_string).collect(),
311                end_labels: r.end_labels.iter().map(ToString::to_string).collect(),
312                properties: r.properties.iter().map(PropertyEntry::from_decl).collect(),
313            })
314            .collect();
315
316        let parameters = schema
317            .parameters
318            .values()
319            .map(ParameterEntry::from_decl)
320            .collect();
321
322        Self {
323            meta,
324            labels,
325            rel_types,
326            parameters,
327        }
328    }
329}
330
331impl PropertyEntry {
332    fn into_decl(self) -> Result<PropertyDecl, SchemaLoadError> {
333        Ok(PropertyDecl {
334            name: SmolStr::new(&self.name),
335            ty: parse_type(&self.ty)?,
336            required: self.required,
337        })
338    }
339
340    fn from_decl(d: &PropertyDecl) -> Self {
341        Self {
342            name: d.name.to_string(),
343            ty: render_type(&d.ty),
344            required: d.required,
345        }
346    }
347}
348
349impl ParameterEntry {
350    fn into_decl(self) -> Result<ParamDecl, SchemaLoadError> {
351        let default = match self.default {
352            None => None,
353            Some(v) => Some(render_default_literal(&v)?),
354        };
355        Ok(ParamDecl {
356            name: SmolStr::new(&self.name),
357            ty: parse_type(&self.ty)?,
358            default,
359        })
360    }
361
362    fn from_decl(d: &ParamDecl) -> Self {
363        Self {
364            name: d.name.to_string(),
365            ty: render_type(&d.ty),
366            default: d.default.as_ref().map(parse_default_literal),
367        }
368    }
369}
370
371// ============================================================
372// Type grammar (spec 0002 §4)
373// ============================================================
374
375fn parse_type(input: &str) -> Result<PropertyType, SchemaLoadError> {
376    let trimmed = input.trim();
377    // NULLABLE is a modifier; at v0 we parse it transparently — the
378    // PropertyType lattice in spec 0001 §8.2 has no NULLABLE variant,
379    // so we accept the modifier and surface the underlying type.
380    if let Some(rest) = trimmed.strip_prefix("NULLABLE ") {
381        return parse_type(rest);
382    }
383    if let Some(inner) = trimmed
384        .strip_prefix("LIST<")
385        .and_then(|s| s.strip_suffix('>'))
386    {
387        return Ok(PropertyType::List(Box::new(parse_type(inner)?)));
388    }
389    Ok(match trimmed {
390        "STRING" => PropertyType::String,
391        "INTEGER" => PropertyType::Int,
392        "FLOAT" => PropertyType::Float,
393        "BOOLEAN" => PropertyType::Bool,
394        "DATE" => PropertyType::Date,
395        "DATETIME" => PropertyType::Datetime,
396        "DURATION" => PropertyType::Opaque(SmolStr::new("DURATION")),
397        "POINT" => PropertyType::Opaque(SmolStr::new("POINT")),
398        "MAP" => PropertyType::Opaque(SmolStr::new("MAP")),
399        "NULL" => PropertyType::Opaque(SmolStr::new("NULL")),
400        other => return Err(SchemaLoadError::BadType(other.to_owned())),
401    })
402}
403
404fn render_type(ty: &PropertyType) -> String {
405    match ty {
406        PropertyType::String => "STRING".to_owned(),
407        PropertyType::Int => "INTEGER".to_owned(),
408        PropertyType::Float => "FLOAT".to_owned(),
409        PropertyType::Bool => "BOOLEAN".to_owned(),
410        PropertyType::Date => "DATE".to_owned(),
411        PropertyType::Datetime => "DATETIME".to_owned(),
412        PropertyType::List(inner) => format!("LIST<{}>", render_type(inner)),
413        // Enum / Any are not expressible in the v0 file format (spec
414        // 0002 §20). Render Enum with its type name and Any as MAP so
415        // round-trip still produces valid TOML. A later spec may add
416        // richer syntax.
417        PropertyType::Opaque(n) | PropertyType::Enum(n, _) => n.to_string(),
418        PropertyType::Any => "MAP".to_owned(),
419    }
420}
421
422fn parse_default_literal(s: &SmolStr) -> toml::Value {
423    if let Ok(b) = s.parse::<bool>() {
424        return toml::Value::Boolean(b);
425    }
426    if let Ok(i) = s.parse::<i64>() {
427        return toml::Value::Integer(i);
428    }
429    if let Ok(f) = s.parse::<f64>() {
430        return toml::Value::Float(f);
431    }
432    // Fall back to a string; strip surrounding quotes if present so
433    // the round-trip does not accumulate escaping.
434    let stripped = s
435        .strip_prefix('"')
436        .and_then(|x| x.strip_suffix('"'))
437        .unwrap_or(s.as_str());
438    toml::Value::String(stripped.to_owned())
439}
440
441fn render_default_literal(v: &toml::Value) -> Result<SmolStr, SchemaLoadError> {
442    Ok(match v {
443        toml::Value::String(s) => SmolStr::new(s),
444        toml::Value::Integer(i) => SmolStr::new(i.to_string()),
445        toml::Value::Float(f) => SmolStr::new(f.to_string()),
446        toml::Value::Boolean(b) => SmolStr::new(b.to_string()),
447        other => {
448            return Err(SchemaLoadError::BadType(format!(
449                "parameter default must be a scalar (string, integer, float, boolean); got {other}",
450            )));
451        }
452    })
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn parse_type_primitives() {
461        assert_eq!(parse_type("STRING").unwrap(), PropertyType::String);
462        assert_eq!(parse_type("INTEGER").unwrap(), PropertyType::Int);
463        assert_eq!(
464            parse_type("LIST<STRING>").unwrap(),
465            PropertyType::List(Box::new(PropertyType::String))
466        );
467        assert_eq!(parse_type("NULLABLE STRING").unwrap(), PropertyType::String);
468        assert_eq!(
469            parse_type("LIST<NULLABLE INTEGER>").unwrap(),
470            PropertyType::List(Box::new(PropertyType::Int))
471        );
472    }
473
474    #[test]
475    fn parse_type_rejects_garbage() {
476        let err = parse_type("not a type").unwrap_err();
477        assert!(matches!(err, SchemaLoadError::BadType(_)));
478    }
479
480    #[test]
481    fn render_type_round_trips_primitives() {
482        for (s, _t) in [
483            ("STRING", PropertyType::String),
484            ("INTEGER", PropertyType::Int),
485            ("FLOAT", PropertyType::Float),
486            ("BOOLEAN", PropertyType::Bool),
487            ("DATE", PropertyType::Date),
488            ("DATETIME", PropertyType::Datetime),
489        ] {
490            let t = parse_type(s).unwrap();
491            assert_eq!(render_type(&t), s);
492        }
493        let nested = parse_type("LIST<LIST<INTEGER>>").unwrap();
494        assert_eq!(render_type(&nested), "LIST<LIST<INTEGER>>");
495    }
496}