Skip to main content

mdql_core/
schema.rs

1//! Load and validate table-level _mdql.md files (type: schema).
2
3use std::path::Path;
4
5use indexmap::IndexMap;
6
7use crate::errors::MdqlError;
8use crate::parser::parse_file;
9
10pub const MDQL_FILENAME: &str = "_mdql.md";
11
12pub const VALID_FIELD_TYPES: &[&str] = &["string", "int", "float", "bool", "date", "datetime", "string[]", "dict"];
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum FieldType {
16    String,
17    Int,
18    Float,
19    Bool,
20    Date,
21    DateTime,
22    StringArray,
23    Dict,
24}
25
26impl FieldType {
27    pub fn from_str(s: &str) -> Option<FieldType> {
28        match s {
29            "string" => Some(FieldType::String),
30            "int" => Some(FieldType::Int),
31            "float" => Some(FieldType::Float),
32            "bool" => Some(FieldType::Bool),
33            "date" => Some(FieldType::Date),
34            "datetime" => Some(FieldType::DateTime),
35            "string[]" => Some(FieldType::StringArray),
36            "dict" => Some(FieldType::Dict),
37            _ => None,
38        }
39    }
40
41    pub fn as_str(&self) -> &'static str {
42        match self {
43            FieldType::String => "string",
44            FieldType::Int => "int",
45            FieldType::Float => "float",
46            FieldType::Bool => "bool",
47            FieldType::Date => "date",
48            FieldType::DateTime => "datetime",
49            FieldType::StringArray => "string[]",
50            FieldType::Dict => "dict",
51        }
52    }
53}
54
55#[derive(Debug, Clone)]
56pub struct FieldDef {
57    pub field_type: FieldType,
58    pub required: bool,
59    pub enum_values: Option<Vec<String>>,
60}
61
62#[derive(Debug, Clone)]
63pub struct SectionDef {
64    pub content_type: String,
65    pub required: bool,
66}
67
68#[derive(Debug, Clone)]
69pub struct Rules {
70    pub reject_unknown_frontmatter: bool,
71    pub reject_unknown_sections: bool,
72    pub reject_duplicate_sections: bool,
73    pub normalize_numbered_headings: bool,
74}
75
76#[derive(Debug, Clone)]
77pub struct Schema {
78    pub table: String,
79    pub primary_key: String,
80    pub frontmatter: IndexMap<String, FieldDef>,
81    pub h1_required: bool,
82    pub h1_must_equal_frontmatter: Option<String>,
83    pub sections: IndexMap<String, SectionDef>,
84    pub rules: Rules,
85}
86
87impl Schema {
88    /// All non-section keys that appear in rows.
89    pub fn metadata_keys(&self) -> std::collections::HashSet<String> {
90        let mut keys: std::collections::HashSet<String> = self
91            .frontmatter
92            .keys()
93            .cloned()
94            .collect();
95        keys.insert("path".to_string());
96        keys.insert("h1".to_string());
97        keys.insert("created".to_string());
98        keys.insert("modified".to_string());
99        keys
100    }
101}
102
103fn yaml_to_str(val: &serde_yaml::Value) -> Option<&str> {
104    val.as_str()
105}
106
107fn yaml_to_bool(val: &serde_yaml::Value) -> Option<bool> {
108    val.as_bool()
109}
110
111fn yaml_to_mapping(val: &serde_yaml::Value) -> Option<&serde_yaml::Mapping> {
112    val.as_mapping()
113}
114
115pub fn load_schema(folder: &Path) -> crate::errors::Result<Schema> {
116    let schema_path = folder.join(MDQL_FILENAME);
117    if !schema_path.exists() {
118        return Err(MdqlError::SchemaNotFound(format!(
119            "No {} in {}",
120            MDQL_FILENAME,
121            folder.display()
122        )));
123    }
124
125    let parsed = parse_file(&schema_path, Some(folder), false)?;
126
127    if !parsed.parse_errors.is_empty() {
128        return Err(MdqlError::SchemaInvalid(format!(
129            "Cannot parse {}: {}",
130            MDQL_FILENAME,
131            parsed.parse_errors.join("; ")
132        )));
133    }
134
135    let fm = &parsed.raw_frontmatter;
136    validate_meta_schema(fm, &schema_path)?;
137
138    let fm_map = fm.as_mapping().unwrap();
139
140    // Build field definitions
141    let mut frontmatter_defs: IndexMap<String, FieldDef> = IndexMap::new();
142    let fm_key = serde_yaml::Value::String("frontmatter".into());
143    if let Some(fm_fields) = fm_map.get(&fm_key) {
144        if let Some(fields_map) = yaml_to_mapping(fm_fields) {
145            for (name_val, spec_val) in fields_map {
146                let name = name_val.as_str().unwrap_or("").to_string();
147                let spec = spec_val.as_mapping().ok_or_else(|| {
148                    MdqlError::SchemaInvalid(format!(
149                        "{}: frontmatter.{} must be a mapping",
150                        MDQL_FILENAME, name
151                    ))
152                })?;
153
154                let ftype_str = spec
155                    .get(&serde_yaml::Value::String("type".into()))
156                    .and_then(yaml_to_str)
157                    .unwrap_or("string");
158
159                let field_type = FieldType::from_str(ftype_str).ok_or_else(|| {
160                    MdqlError::SchemaInvalid(format!(
161                        "{}: frontmatter.{} has invalid type '{}'. Valid types: {}",
162                        MDQL_FILENAME,
163                        name,
164                        ftype_str,
165                        VALID_FIELD_TYPES.join(", ")
166                    ))
167                })?;
168
169                let required = spec
170                    .get(&serde_yaml::Value::String("required".into()))
171                    .and_then(yaml_to_bool)
172                    .unwrap_or(false);
173
174                let enum_values = spec
175                    .get(&serde_yaml::Value::String("enum".into()))
176                    .and_then(|v| v.as_sequence())
177                    .map(|seq| {
178                        seq.iter()
179                            .map(|v| match v {
180                                serde_yaml::Value::String(s) => s.clone(),
181                                other => format!("{:?}", other),
182                            })
183                            .collect()
184                    });
185
186                frontmatter_defs.insert(name, FieldDef {
187                    field_type,
188                    required,
189                    enum_values,
190                });
191            }
192        }
193    }
194
195    // Build section definitions
196    let mut section_defs: IndexMap<String, SectionDef> = IndexMap::new();
197    let sections_key = serde_yaml::Value::String("sections".into());
198    if let Some(sections_val) = fm_map.get(&sections_key) {
199        if let Some(sections_map) = yaml_to_mapping(sections_val) {
200            for (name_val, spec_val) in sections_map {
201                let name = name_val.as_str().unwrap_or("").to_string();
202                let spec = spec_val.as_mapping().ok_or_else(|| {
203                    MdqlError::SchemaInvalid(format!(
204                        "{}: sections.{} must be a mapping",
205                        MDQL_FILENAME, name
206                    ))
207                })?;
208
209                let content_type = spec
210                    .get(&serde_yaml::Value::String("type".into()))
211                    .and_then(yaml_to_str)
212                    .unwrap_or("markdown")
213                    .to_string();
214
215                let required = spec
216                    .get(&serde_yaml::Value::String("required".into()))
217                    .and_then(yaml_to_bool)
218                    .unwrap_or(false);
219
220                section_defs.insert(name, SectionDef {
221                    content_type,
222                    required,
223                });
224            }
225        }
226    }
227
228    // H1 config
229    let h1_key = serde_yaml::Value::String("h1".into());
230    let h1_config = fm_map.get(&h1_key);
231    let h1_required = h1_config
232        .and_then(yaml_to_mapping)
233        .and_then(|m| m.get(&serde_yaml::Value::String("required".into())))
234        .and_then(yaml_to_bool)
235        .unwrap_or(true);
236    let h1_must_equal = h1_config
237        .and_then(yaml_to_mapping)
238        .and_then(|m| m.get(&serde_yaml::Value::String("must_equal_frontmatter".into())))
239        .and_then(yaml_to_str)
240        .map(|s| s.to_string());
241
242    // Rules
243    let rules_key = serde_yaml::Value::String("rules".into());
244    let rules_map = fm_map.get(&rules_key).and_then(yaml_to_mapping);
245
246    let get_rule_bool = |key: &str, default: bool| -> bool {
247        rules_map
248            .and_then(|m| m.get(&serde_yaml::Value::String(key.into())))
249            .and_then(yaml_to_bool)
250            .unwrap_or(default)
251    };
252
253    let rules = Rules {
254        reject_unknown_frontmatter: get_rule_bool("reject_unknown_frontmatter", true),
255        reject_unknown_sections: get_rule_bool("reject_unknown_sections", true),
256        reject_duplicate_sections: get_rule_bool("reject_duplicate_sections", true),
257        normalize_numbered_headings: get_rule_bool("normalize_numbered_headings", false),
258    };
259
260    // Table name
261    let table = fm_map
262        .get(&serde_yaml::Value::String("table".into()))
263        .and_then(yaml_to_str)
264        .unwrap_or("")
265        .to_string();
266
267    let primary_key = fm_map
268        .get(&serde_yaml::Value::String("primary_key".into()))
269        .and_then(yaml_to_str)
270        .unwrap_or("path")
271        .to_string();
272
273    Ok(Schema {
274        table,
275        primary_key,
276        frontmatter: frontmatter_defs,
277        h1_required,
278        h1_must_equal_frontmatter: h1_must_equal,
279        sections: section_defs,
280        rules,
281    })
282}
283
284fn validate_meta_schema(fm: &serde_yaml::Value, path: &Path) -> crate::errors::Result<()> {
285    let map = fm.as_mapping().ok_or_else(|| {
286        MdqlError::SchemaInvalid(format!("{}: frontmatter must be a mapping", path.display()))
287    })?;
288
289    // type: schema
290    let type_val = map.get(&serde_yaml::Value::String("type".into()));
291    if type_val.and_then(yaml_to_str) != Some("schema") {
292        return Err(MdqlError::SchemaInvalid(format!(
293            "{}: frontmatter must have 'type: schema'",
294            path.display()
295        )));
296    }
297
298    // table must be a string
299    let table_val = map.get(&serde_yaml::Value::String("table".into()));
300    if table_val.and_then(yaml_to_str).is_none() {
301        return Err(MdqlError::SchemaInvalid(format!(
302            "{}: frontmatter must have 'table' as a string",
303            path.display()
304        )));
305    }
306
307    // frontmatter must be a mapping if present
308    let fm_val = map.get(&serde_yaml::Value::String("frontmatter".into()));
309    if let Some(v) = fm_val {
310        if !v.is_mapping() && !v.is_null() {
311            return Err(MdqlError::SchemaInvalid(format!(
312                "{}: 'frontmatter' must be a mapping",
313                path.display()
314            )));
315        }
316    }
317
318    // sections must be a mapping if present
319    let sec_val = map.get(&serde_yaml::Value::String("sections".into()));
320    if let Some(v) = sec_val {
321        if !v.is_mapping() && !v.is_null() {
322            return Err(MdqlError::SchemaInvalid(format!(
323                "{}: 'sections' must be a mapping",
324                path.display()
325            )));
326        }
327    }
328
329    Ok(())
330}
331
332#[cfg(test)]
333mod tests {
334    use super::*;
335    use std::fs;
336
337    fn make_schema_file(dir: &Path, content: &str) {
338        fs::write(dir.join(MDQL_FILENAME), content).unwrap();
339    }
340
341    #[test]
342    fn test_load_basic_schema() {
343        let dir = tempfile::tempdir().unwrap();
344        make_schema_file(
345            dir.path(),
346            "---\ntype: schema\ntable: test\nprimary_key: path\nfrontmatter:\n  title:\n    type: string\n    required: true\nh1:\n  required: false\nsections: {}\nrules:\n  reject_unknown_frontmatter: true\n  reject_unknown_sections: false\n  reject_duplicate_sections: true\n---\n",
347        );
348        let schema = load_schema(dir.path()).unwrap();
349        assert_eq!(schema.table, "test");
350        assert_eq!(schema.primary_key, "path");
351        assert!(schema.frontmatter.contains_key("title"));
352        assert!(schema.frontmatter["title"].required);
353        assert_eq!(schema.frontmatter["title"].field_type, FieldType::String);
354        assert!(!schema.h1_required);
355        assert!(!schema.rules.reject_unknown_sections);
356    }
357
358    #[test]
359    fn test_missing_schema() {
360        let dir = tempfile::tempdir().unwrap();
361        let result = load_schema(dir.path());
362        assert!(matches!(result, Err(MdqlError::SchemaNotFound(_))));
363    }
364
365    #[test]
366    fn test_wrong_type() {
367        let dir = tempfile::tempdir().unwrap();
368        make_schema_file(dir.path(), "---\ntype: database\nname: test\n---\n");
369        let result = load_schema(dir.path());
370        assert!(matches!(result, Err(MdqlError::SchemaInvalid(_))));
371    }
372
373    #[test]
374    fn test_enum_values() {
375        let dir = tempfile::tempdir().unwrap();
376        make_schema_file(
377            dir.path(),
378            "---\ntype: schema\ntable: test\nfrontmatter:\n  status:\n    type: string\n    required: true\n    enum: [ACTIVE, ARCHIVED]\n---\n",
379        );
380        let schema = load_schema(dir.path()).unwrap();
381        assert_eq!(
382            schema.frontmatter["status"].enum_values,
383            Some(vec!["ACTIVE".to_string(), "ARCHIVED".to_string()])
384        );
385    }
386}