Skip to main content

mdql_core/
schema.rs

1//! Load and validate table-level _mdql.md files (type: schema).
2
3use std::path::Path;
4
5use indexmap::IndexMap;
6
7use crate::errors::MdqlError;
8use crate::parser::parse_file;
9
10pub const MDQL_FILENAME: &str = "_mdql.md";
11
12pub const VALID_FIELD_TYPES: &[&str] = &["string", "int", "float", "bool", "date", "datetime", "string[]", "dict"];
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum FieldType {
16    String,
17    Int,
18    Float,
19    Bool,
20    Date,
21    DateTime,
22    StringArray,
23    Dict,
24}
25
26impl FieldType {
27    pub fn from_str(s: &str) -> Option<FieldType> {
28        match s {
29            "string" => Some(FieldType::String),
30            "int" => Some(FieldType::Int),
31            "float" => Some(FieldType::Float),
32            "bool" => Some(FieldType::Bool),
33            "date" => Some(FieldType::Date),
34            "datetime" => Some(FieldType::DateTime),
35            "string[]" => Some(FieldType::StringArray),
36            "dict" => Some(FieldType::Dict),
37            _ => None,
38        }
39    }
40
41    pub fn as_str(&self) -> &'static str {
42        match self {
43            FieldType::String => "string",
44            FieldType::Int => "int",
45            FieldType::Float => "float",
46            FieldType::Bool => "bool",
47            FieldType::Date => "date",
48            FieldType::DateTime => "datetime",
49            FieldType::StringArray => "string[]",
50            FieldType::Dict => "dict",
51        }
52    }
53}
54
55#[derive(Debug, Clone)]
56pub struct FieldDef {
57    pub field_type: FieldType,
58    pub required: bool,
59    pub enum_values: Option<Vec<String>>,
60}
61
62#[derive(Debug, Clone)]
63pub struct SectionDef {
64    pub content_type: String,
65    pub required: bool,
66}
67
68#[derive(Debug, Clone)]
69pub struct Rules {
70    pub reject_unknown_frontmatter: bool,
71    pub reject_unknown_sections: bool,
72    pub reject_duplicate_sections: bool,
73    pub normalize_numbered_headings: bool,
74}
75
76#[derive(Debug, Clone)]
77pub struct Schema {
78    pub table: String,
79    pub primary_key: String,
80    pub frontmatter: IndexMap<String, FieldDef>,
81    pub h1_required: bool,
82    pub h1_must_equal_frontmatter: Option<String>,
83    pub sections: IndexMap<String, SectionDef>,
84    pub rules: Rules,
85}
86
87impl Schema {
88    /// All non-section keys that appear in rows.
89    pub fn metadata_keys(&self) -> std::collections::HashSet<String> {
90        let mut keys: std::collections::HashSet<String> = self
91            .frontmatter
92            .keys()
93            .cloned()
94            .collect();
95        keys.insert("path".to_string());
96        keys.insert("h1".to_string());
97        keys.insert("created".to_string());
98        keys.insert("modified".to_string());
99        keys
100    }
101}
102
103fn yaml_to_str(val: &serde_yaml::Value) -> Option<&str> {
104    val.as_str()
105}
106
107fn yaml_to_bool(val: &serde_yaml::Value) -> Option<bool> {
108    val.as_bool()
109}
110
111fn yaml_to_mapping(val: &serde_yaml::Value) -> Option<&serde_yaml::Mapping> {
112    val.as_mapping()
113}
114
115pub fn load_schema(folder: &Path) -> crate::errors::Result<Schema> {
116    let schema_path = folder.join(MDQL_FILENAME);
117    if !schema_path.exists() {
118        return Err(MdqlError::SchemaNotFound(format!(
119            "No {} in {}",
120            MDQL_FILENAME,
121            folder.display()
122        )));
123    }
124
125    let parsed = parse_file(&schema_path, Some(folder), false)?;
126
127    if !parsed.parse_errors.is_empty() {
128        return Err(MdqlError::SchemaInvalid(format!(
129            "Cannot parse {}: {}",
130            MDQL_FILENAME,
131            parsed.parse_errors.join("; ")
132        )));
133    }
134
135    let fm = &parsed.raw_frontmatter;
136    validate_meta_schema(fm, &schema_path)?;
137
138    let fm_map = fm.as_mapping().ok_or_else(|| {
139        MdqlError::SchemaInvalid(format!(
140            "{}: frontmatter must be a YAML mapping",
141            MDQL_FILENAME
142        ))
143    })?;
144
145    // Build field definitions
146    let mut frontmatter_defs: IndexMap<String, FieldDef> = IndexMap::new();
147    let fm_key = serde_yaml::Value::String("frontmatter".into());
148    if let Some(fm_fields) = fm_map.get(&fm_key) {
149        if let Some(fields_map) = yaml_to_mapping(fm_fields) {
150            for (name_val, spec_val) in fields_map {
151                let name = name_val.as_str().unwrap_or("").to_string();
152                let spec = spec_val.as_mapping().ok_or_else(|| {
153                    MdqlError::SchemaInvalid(format!(
154                        "{}: frontmatter.{} must be a mapping",
155                        MDQL_FILENAME, name
156                    ))
157                })?;
158
159                let ftype_str = spec
160                    .get(&serde_yaml::Value::String("type".into()))
161                    .and_then(yaml_to_str)
162                    .unwrap_or("string");
163
164                let field_type = FieldType::from_str(ftype_str).ok_or_else(|| {
165                    MdqlError::SchemaInvalid(format!(
166                        "{}: frontmatter.{} has invalid type '{}'. Valid types: {}",
167                        MDQL_FILENAME,
168                        name,
169                        ftype_str,
170                        VALID_FIELD_TYPES.join(", ")
171                    ))
172                })?;
173
174                let required = spec
175                    .get(&serde_yaml::Value::String("required".into()))
176                    .and_then(yaml_to_bool)
177                    .unwrap_or(false);
178
179                let enum_values = spec
180                    .get(&serde_yaml::Value::String("enum".into()))
181                    .and_then(|v| v.as_sequence())
182                    .map(|seq| {
183                        seq.iter()
184                            .map(|v| match v {
185                                serde_yaml::Value::String(s) => s.clone(),
186                                other => format!("{:?}", other),
187                            })
188                            .collect()
189                    });
190
191                frontmatter_defs.insert(name, FieldDef {
192                    field_type,
193                    required,
194                    enum_values,
195                });
196            }
197        }
198    }
199
200    // Build section definitions
201    let mut section_defs: IndexMap<String, SectionDef> = IndexMap::new();
202    let sections_key = serde_yaml::Value::String("sections".into());
203    if let Some(sections_val) = fm_map.get(&sections_key) {
204        if let Some(sections_map) = yaml_to_mapping(sections_val) {
205            for (name_val, spec_val) in sections_map {
206                let name = name_val.as_str().unwrap_or("").to_string();
207                let spec = spec_val.as_mapping().ok_or_else(|| {
208                    MdqlError::SchemaInvalid(format!(
209                        "{}: sections.{} must be a mapping",
210                        MDQL_FILENAME, name
211                    ))
212                })?;
213
214                let content_type = spec
215                    .get(&serde_yaml::Value::String("type".into()))
216                    .and_then(yaml_to_str)
217                    .unwrap_or("markdown")
218                    .to_string();
219
220                let required = spec
221                    .get(&serde_yaml::Value::String("required".into()))
222                    .and_then(yaml_to_bool)
223                    .unwrap_or(false);
224
225                section_defs.insert(name, SectionDef {
226                    content_type,
227                    required,
228                });
229            }
230        }
231    }
232
233    // H1 config
234    let h1_key = serde_yaml::Value::String("h1".into());
235    let h1_config = fm_map.get(&h1_key);
236    let h1_required = h1_config
237        .and_then(yaml_to_mapping)
238        .and_then(|m| m.get(&serde_yaml::Value::String("required".into())))
239        .and_then(yaml_to_bool)
240        .unwrap_or(true);
241    let h1_must_equal = h1_config
242        .and_then(yaml_to_mapping)
243        .and_then(|m| m.get(&serde_yaml::Value::String("must_equal_frontmatter".into())))
244        .and_then(yaml_to_str)
245        .map(|s| s.to_string());
246
247    // Rules
248    let rules_key = serde_yaml::Value::String("rules".into());
249    let rules_map = fm_map.get(&rules_key).and_then(yaml_to_mapping);
250
251    let get_rule_bool = |key: &str, default: bool| -> bool {
252        rules_map
253            .and_then(|m| m.get(&serde_yaml::Value::String(key.into())))
254            .and_then(yaml_to_bool)
255            .unwrap_or(default)
256    };
257
258    let rules = Rules {
259        reject_unknown_frontmatter: get_rule_bool("reject_unknown_frontmatter", true),
260        reject_unknown_sections: get_rule_bool("reject_unknown_sections", true),
261        reject_duplicate_sections: get_rule_bool("reject_duplicate_sections", true),
262        normalize_numbered_headings: get_rule_bool("normalize_numbered_headings", false),
263    };
264
265    // Table name
266    let table = fm_map
267        .get(&serde_yaml::Value::String("table".into()))
268        .and_then(yaml_to_str)
269        .unwrap_or("")
270        .to_string();
271
272    let primary_key = fm_map
273        .get(&serde_yaml::Value::String("primary_key".into()))
274        .and_then(yaml_to_str)
275        .unwrap_or("path")
276        .to_string();
277
278    Ok(Schema {
279        table,
280        primary_key,
281        frontmatter: frontmatter_defs,
282        h1_required,
283        h1_must_equal_frontmatter: h1_must_equal,
284        sections: section_defs,
285        rules,
286    })
287}
288
289fn validate_meta_schema(fm: &serde_yaml::Value, path: &Path) -> crate::errors::Result<()> {
290    let map = fm.as_mapping().ok_or_else(|| {
291        MdqlError::SchemaInvalid(format!("{}: frontmatter must be a mapping", path.display()))
292    })?;
293
294    // type: schema
295    let type_val = map.get(&serde_yaml::Value::String("type".into()));
296    if type_val.and_then(yaml_to_str) != Some("schema") {
297        return Err(MdqlError::SchemaInvalid(format!(
298            "{}: frontmatter must have 'type: schema'",
299            path.display()
300        )));
301    }
302
303    // table must be a string
304    let table_val = map.get(&serde_yaml::Value::String("table".into()));
305    if table_val.and_then(yaml_to_str).is_none() {
306        return Err(MdqlError::SchemaInvalid(format!(
307            "{}: frontmatter must have 'table' as a string",
308            path.display()
309        )));
310    }
311
312    // frontmatter must be a mapping if present
313    let fm_val = map.get(&serde_yaml::Value::String("frontmatter".into()));
314    if let Some(v) = fm_val {
315        if !v.is_mapping() && !v.is_null() {
316            return Err(MdqlError::SchemaInvalid(format!(
317                "{}: 'frontmatter' must be a mapping",
318                path.display()
319            )));
320        }
321    }
322
323    // sections must be a mapping if present
324    let sec_val = map.get(&serde_yaml::Value::String("sections".into()));
325    if let Some(v) = sec_val {
326        if !v.is_mapping() && !v.is_null() {
327            return Err(MdqlError::SchemaInvalid(format!(
328                "{}: 'sections' must be a mapping",
329                path.display()
330            )));
331        }
332    }
333
334    Ok(())
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340    use std::fs;
341
342    fn make_schema_file(dir: &Path, content: &str) {
343        fs::write(dir.join(MDQL_FILENAME), content).unwrap();
344    }
345
346    #[test]
347    fn test_load_basic_schema() {
348        let dir = tempfile::tempdir().unwrap();
349        make_schema_file(
350            dir.path(),
351            "---\ntype: schema\ntable: test\nprimary_key: path\nfrontmatter:\n  title:\n    type: string\n    required: true\nh1:\n  required: false\nsections: {}\nrules:\n  reject_unknown_frontmatter: true\n  reject_unknown_sections: false\n  reject_duplicate_sections: true\n---\n",
352        );
353        let schema = load_schema(dir.path()).unwrap();
354        assert_eq!(schema.table, "test");
355        assert_eq!(schema.primary_key, "path");
356        assert!(schema.frontmatter.contains_key("title"));
357        assert!(schema.frontmatter["title"].required);
358        assert_eq!(schema.frontmatter["title"].field_type, FieldType::String);
359        assert!(!schema.h1_required);
360        assert!(!schema.rules.reject_unknown_sections);
361    }
362
363    #[test]
364    fn test_missing_schema() {
365        let dir = tempfile::tempdir().unwrap();
366        let result = load_schema(dir.path());
367        assert!(matches!(result, Err(MdqlError::SchemaNotFound(_))));
368    }
369
370    #[test]
371    fn test_wrong_type() {
372        let dir = tempfile::tempdir().unwrap();
373        make_schema_file(dir.path(), "---\ntype: database\nname: test\n---\n");
374        let result = load_schema(dir.path());
375        assert!(matches!(result, Err(MdqlError::SchemaInvalid(_))));
376    }
377
378    #[test]
379    fn test_enum_values() {
380        let dir = tempfile::tempdir().unwrap();
381        make_schema_file(
382            dir.path(),
383            "---\ntype: schema\ntable: test\nfrontmatter:\n  status:\n    type: string\n    required: true\n    enum: [ACTIVE, ARCHIVED]\n---\n",
384        );
385        let schema = load_schema(dir.path()).unwrap();
386        assert_eq!(
387            schema.frontmatter["status"].enum_values,
388            Some(vec!["ACTIVE".to_string(), "ARCHIVED".to_string()])
389        );
390    }
391}