Skip to main content

schema_index_yaml/
lib.rs

1//! Parse a `*.schema.yml` index definition into the core
2//! [`IndexSchema`](schema_core::IndexSchema).
3//!
4//! A schema file describes one search document: its root table, its fields, and
5//! how related tables fold in through joins and aggregates. Each field is
6//! written **type-first** — `- <type>: <name>` (`keyword: email`,
7//! `has_many: orders`, `count: orderCount`, `geo: location`) — and carries
8//! only the siblings that type allows. Parsing is two stages:
9//!
10//! 1. [`SchemaYaml`] deserializes the file. Each field's type tag selects the
11//!    body shape it parses into (see [`Field`]).
12//!    [`ParseFrom`](schema_core::ParseFrom) also checks the declared `version`
13//!    against [`SUPPORTED_VERSIONS`].
14//! 2. `TryFrom<SchemaYaml>` converts it into the core model, validating
15//!    identifiers and the arity rules YAML alone can't express: a join takes
16//!    exactly the key its verb implies (`column` for `belongs_to`,
17//!    `foreign_key` for `has_one`/`has_many`, `through` for `many_to_many`),
18//!    `sum`/`min`/`max` aggregates need a `column` and a `value_type`, a
19//!    `between` filter takes exactly two values, and a `geo` field needs either
20//!    `lat`+`lon` or a single `column`.
21
22mod conversion;
23mod entities;
24mod parser;
25
26pub use entities::*;
27pub use parser::ParseError;
28
29use serde::Deserialize;
30
31pub const SUPPORTED_VERSIONS: &[u8] = &[1];
32
33/// The JSON Schema (authored as YAML) describing a `*.schema.yml` index file,
34/// embedded from this crate's `schemas/` directory for editor assist and
35/// programmatic access. Kept in lockstep with this parser by `schema`'s
36/// `schema_drift` test.
37pub const INDEX_SCHEMA: &str = include_str!("../schemas/index.schema.yml");
38
39#[derive(thiserror::Error, Debug)]
40pub enum ConversionError {
41    #[error("invalid table name: {0}")]
42    TableName(#[from] schema_core::TableNameError),
43    #[error("invalid column name: {0}")]
44    ColumnName(#[from] schema_core::ColumnNameError),
45    #[error("invalid database schema name: {0}")]
46    DatabaseSchema(#[from] schema_core::DatabaseSchemaError),
47    #[error("`{verb}` join is missing its key: it takes {expected}")]
48    MissingJoinKey {
49        verb: &'static str,
50        expected: &'static str,
51    },
52    #[error("`{verb}` join does not take `{sibling}`; it takes {expected}")]
53    UnexpectedJoinKey {
54        verb: &'static str,
55        sibling: &'static str,
56        expected: &'static str,
57    },
58    #[error("`{verb}` join does not take `{sibling}` (a to-one join picks a single row)")]
59    UnexpectedJoinSibling {
60        verb: &'static str,
61        sibling: &'static str,
62    },
63    #[error("aggregate must specify either `foreign_key` or `through`, not both or neither")]
64    InvalidAggregateKey,
65    #[error("aggregate op '{op}' requires a `column`")]
66    MissingAggregateColumn { op: &'static str },
67    #[error("filter op '{op}' requires a value")]
68    MissingFilterValue { op: &'static str },
69    #[error("filter op 'between' requires exactly 2 values, got {got}")]
70    InvalidBetweenArity { got: usize },
71    #[error("filter op '{op}' requires a sequence value")]
72    ExpectedListValue { op: &'static str },
73    #[error("aggregate op '{op}' requires a `value_type` (its result mirrors the column)")]
74    MissingAggregateType { op: &'static str },
75    #[error(
76        "aggregate op '{op}' `value_type` must be a scalar type — `geo_point` and `custom` \
77         are not valid aggregate result types"
78    )]
79    InvalidAggregateType { op: &'static str },
80    #[error(
81        "a `geo` field needs either both `lat` and `lon` (two columns) or a single `column` \
82         holding a combined value — not a mix"
83    )]
84    InvalidGeoSource,
85}
86
87#[derive(Debug, Clone, Deserialize)]
88#[serde(deny_unknown_fields)]
89pub struct SchemaYaml {
90    pub version: u8,
91    pub table: String,
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub schema: Option<String>,
94    #[serde(skip_serializing_if = "Option::is_none")]
95    pub primary_key: Option<String>,
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub doc_id: Option<String>,
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub soft_delete: Option<SoftDelete>,
100    /// Root filters: only matching root rows become documents.
101    #[serde(default, skip_serializing_if = "Option::is_none")]
102    pub filters: Option<Vec<Filter>>,
103    pub fields: Vec<Field>,
104}
105
106impl TryFrom<SchemaYaml> for schema_core::IndexSchema {
107    type Error = ConversionError;
108
109    fn try_from(yaml: SchemaYaml) -> Result<Self, Self::Error> {
110        use schema_core::common::{ColumnName, TableName};
111
112        let table = TableName::try_new(yaml.table)?;
113        let db_schema = match yaml.schema {
114            Some(s) => schema_core::DatabaseSchema::try_new(s)?,
115            None => schema_core::DatabaseSchema::default(),
116        };
117        let primary_key = yaml.primary_key.map(ColumnName::try_new).transpose()?;
118        let doc_id = yaml.doc_id.map(ColumnName::try_new).transpose()?;
119        let soft_delete = yaml
120            .soft_delete
121            .map(conversion::convert_soft_delete)
122            .transpose()?;
123        let filters = conversion::convert_filters_opt(yaml.filters)?;
124        let fields = yaml
125            .fields
126            .into_iter()
127            .map(conversion::convert_field)
128            .collect::<Result<_, _>>()?;
129
130        Ok(schema_core::IndexSchema {
131            version: yaml.version,
132            table,
133            db_schema,
134            primary_key,
135            doc_id,
136            soft_delete,
137            filters,
138            fields,
139        })
140    }
141}