Skip to main content

schema_index_yaml/
lib.rs

1#![doc = include_str!("../README.md")]
2
3mod conversion;
4mod entities;
5mod parser;
6
7pub use entities::*;
8pub use parser::ParseError;
9
10use serde::Deserialize;
11
12pub const SUPPORTED_VERSIONS: &[u8] = &[1];
13
14/// The JSON Schema (authored as YAML) describing a `*.schema.yml` index file,
15/// embedded from this crate's `schemas/` directory for editor assist and
16/// programmatic access (both re-exported from `schema` and emitted by `flusso
17/// schema index`). Kept in lockstep with this parser by `schema`'s `schema_drift`
18/// test.
19pub const INDEX_SCHEMA: &str = include_str!("../index.schema.yml");
20
21#[derive(thiserror::Error, Debug)]
22pub enum ConversionError {
23    #[error("invalid table name: {0}")]
24    TableName(#[from] schema_core::TableNameError),
25    #[error("invalid column name: {0}")]
26    ColumnName(#[from] schema_core::ColumnNameError),
27    #[error("invalid database schema name: {0}")]
28    DatabaseSchema(#[from] schema_core::DatabaseSchemaError),
29    #[error("`{verb}` join is missing its key: it takes {expected}")]
30    MissingJoinKey {
31        verb: &'static str,
32        expected: &'static str,
33    },
34    #[error("`{verb}` join does not take `{sibling}`; it takes {expected}")]
35    UnexpectedJoinKey {
36        verb: &'static str,
37        sibling: &'static str,
38        expected: &'static str,
39    },
40    #[error("`{verb}` join does not take `{sibling}` (a to-one join picks a single row)")]
41    UnexpectedJoinSibling {
42        verb: &'static str,
43        sibling: &'static str,
44    },
45    #[error("aggregate must specify either `foreign_key` or `through`, not both or neither")]
46    InvalidAggregateKey,
47    #[error("aggregate op '{op}' requires a `column`")]
48    MissingAggregateColumn { op: &'static str },
49    #[error("filter op '{op}' requires a value")]
50    MissingFilterValue { op: &'static str },
51    #[error("filter op 'between' requires exactly 2 values, got {got}")]
52    InvalidBetweenArity { got: usize },
53    #[error("filter op '{op}' requires a sequence value")]
54    ExpectedListValue { op: &'static str },
55    #[error("aggregate op '{op}' requires a `value_type` (its result mirrors the column)")]
56    MissingAggregateType { op: &'static str },
57    #[error(
58        "aggregate op '{op}' `value_type` must be a scalar type — `geo_point` and `custom` \
59         are not valid aggregate result types"
60    )]
61    InvalidAggregateType { op: &'static str },
62    #[error(
63        "aggregate op 'ids' requires an `element_type` (`long` or `keyword`) — it states the \
64         element type of the collected primary keys"
65    )]
66    MissingElementType,
67    #[error(
68        "aggregate op 'ids' `element_type` must be a scalar type — `geo_point` and `custom` \
69         are not valid element types"
70    )]
71    InvalidElementType,
72    #[error(
73        "aggregate op 'ids' does not take `{sibling}` (it always collects the related table's primary key)"
74    )]
75    UnexpectedIdsSibling { sibling: &'static str },
76    #[error("aggregate does not take `{sibling}` (only `ids` does)")]
77    UnexpectedAggregateSibling { sibling: &'static str },
78    #[error(
79        "a `geo` field needs either both `lat` and `lon` (two columns) or a single `column` \
80         holding a combined value — not a mix"
81    )]
82    InvalidGeoSource,
83    #[error(
84        "a `map` field's `values` must be a leaf type — `text`/`keyword` or a number/date kind \
85         (`{got}` is not one); `boolean`, `binary`, `json`, `geo`, and `custom` are not valid \
86         map value types"
87    )]
88    InvalidMapValueType { got: &'static str },
89    #[error(
90        "`doc_id` is not supported yet — the document `_id` is always derived from `primary_key`. \
91         Remove `doc_id` from the schema."
92    )]
93    DocIdUnsupported,
94    #[error(
95        "a `default` must be a scalar value (string, number, bool, or date) — a `{got}` default \
96         is not supported"
97    )]
98    NonScalarDefault { got: &'static str },
99}
100
101#[derive(Debug, Clone, Deserialize)]
102#[serde(deny_unknown_fields)]
103pub struct SchemaYaml {
104    pub version: u8,
105    pub table: String,
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub schema: Option<String>,
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub primary_key: Option<String>,
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub doc_id: Option<String>,
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub soft_delete: Option<SoftDelete>,
114    /// Root filters: only matching root rows become documents.
115    #[serde(default, skip_serializing_if = "Option::is_none")]
116    pub filters: Option<Vec<Filter>>,
117    pub fields: Vec<Field>,
118}
119
120impl TryFrom<SchemaYaml> for schema_core::IndexSchema {
121    type Error = ConversionError;
122
123    fn try_from(yaml: SchemaYaml) -> Result<Self, Self::Error> {
124        use schema_core::common::{ColumnName, TableName};
125
126        let table = TableName::try_new(yaml.table)?;
127        let db_schema = match yaml.schema {
128            Some(s) => schema_core::DatabaseSchema::try_new(s)?,
129            None => schema_core::DatabaseSchema::default(),
130        };
131        let primary_key = yaml.primary_key.map(ColumnName::try_new).transpose()?;
132        // `doc_id` parses (so existing schemas still deserialize) but is rejected
133        // here: honoring a non-pk `_id` needs the value at delete time, which the
134        // pk-keyed tombstone path can't supply. Tracked as a follow-up feature.
135        if yaml.doc_id.is_some() {
136            return Err(ConversionError::DocIdUnsupported);
137        }
138        let doc_id = yaml.doc_id.map(ColumnName::try_new).transpose()?;
139        let soft_delete = yaml
140            .soft_delete
141            .map(conversion::convert_soft_delete)
142            .transpose()?;
143        let filters = conversion::convert_filters_opt(yaml.filters)?;
144        let fields = yaml
145            .fields
146            .into_iter()
147            .map(conversion::convert_field)
148            .collect::<Result<_, _>>()?;
149
150        Ok(schema_core::IndexSchema {
151            version: yaml.version,
152            table,
153            db_schema,
154            primary_key,
155            doc_id,
156            soft_delete,
157            filters,
158            fields,
159        })
160    }
161}