Skip to main content

mdql_core/
schema.rs

1//! Load and validate table-level _mdql.md files (type: schema).
2
3use std::path::Path;
4
5use indexmap::IndexMap;
6
7use crate::errors::MdqlError;
8use crate::parser::parse_file;
9
10pub const MDQL_FILENAME: &str = "_mdql.md";
11
12pub const VALID_FIELD_TYPES: &[&str] = &["string", "int", "float", "bool", "date", "datetime", "string[]"];
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum FieldType {
16    String,
17    Int,
18    Float,
19    Bool,
20    Date,
21    DateTime,
22    StringArray,
23}
24
25impl FieldType {
26    pub fn from_str(s: &str) -> Option<FieldType> {
27        match s {
28            "string" => Some(FieldType::String),
29            "int" => Some(FieldType::Int),
30            "float" => Some(FieldType::Float),
31            "bool" => Some(FieldType::Bool),
32            "date" => Some(FieldType::Date),
33            "datetime" => Some(FieldType::DateTime),
34            "string[]" => Some(FieldType::StringArray),
35            _ => None,
36        }
37    }
38
39    pub fn as_str(&self) -> &'static str {
40        match self {
41            FieldType::String => "string",
42            FieldType::Int => "int",
43            FieldType::Float => "float",
44            FieldType::Bool => "bool",
45            FieldType::Date => "date",
46            FieldType::DateTime => "datetime",
47            FieldType::StringArray => "string[]",
48        }
49    }
50}
51
52#[derive(Debug, Clone)]
53pub struct FieldDef {
54    pub field_type: FieldType,
55    pub required: bool,
56    pub enum_values: Option<Vec<String>>,
57}
58
59#[derive(Debug, Clone)]
60pub struct SectionDef {
61    pub content_type: String,
62    pub required: bool,
63}
64
65#[derive(Debug, Clone)]
66pub struct Rules {
67    pub reject_unknown_frontmatter: bool,
68    pub reject_unknown_sections: bool,
69    pub reject_duplicate_sections: bool,
70    pub normalize_numbered_headings: bool,
71}
72
73#[derive(Debug, Clone)]
74pub struct Schema {
75    pub table: String,
76    pub primary_key: String,
77    pub frontmatter: IndexMap<String, FieldDef>,
78    pub h1_required: bool,
79    pub h1_must_equal_frontmatter: Option<String>,
80    pub sections: IndexMap<String, SectionDef>,
81    pub rules: Rules,
82}
83
84impl Schema {
85    /// All non-section keys that appear in rows.
86    pub fn metadata_keys(&self) -> std::collections::HashSet<String> {
87        let mut keys: std::collections::HashSet<String> = self
88            .frontmatter
89            .keys()
90            .cloned()
91            .collect();
92        keys.insert("path".to_string());
93        keys.insert("h1".to_string());
94        keys.insert("created".to_string());
95        keys.insert("modified".to_string());
96        keys
97    }
98}
99
100fn yaml_to_str(val: &serde_yaml::Value) -> Option<&str> {
101    val.as_str()
102}
103
104fn yaml_to_bool(val: &serde_yaml::Value) -> Option<bool> {
105    val.as_bool()
106}
107
108fn yaml_to_mapping(val: &serde_yaml::Value) -> Option<&serde_yaml::Mapping> {
109    val.as_mapping()
110}
111
112pub fn load_schema(folder: &Path) -> crate::errors::Result<Schema> {
113    let schema_path = folder.join(MDQL_FILENAME);
114    if !schema_path.exists() {
115        return Err(MdqlError::SchemaNotFound(format!(
116            "No {} in {}",
117            MDQL_FILENAME,
118            folder.display()
119        )));
120    }
121
122    let parsed = parse_file(&schema_path, Some(folder), false)?;
123
124    if !parsed.parse_errors.is_empty() {
125        return Err(MdqlError::SchemaInvalid(format!(
126            "Cannot parse {}: {}",
127            MDQL_FILENAME,
128            parsed.parse_errors.join("; ")
129        )));
130    }
131
132    let fm = &parsed.raw_frontmatter;
133    validate_meta_schema(fm, &schema_path)?;
134
135    let fm_map = fm.as_mapping().unwrap();
136
137    // Build field definitions
138    let mut frontmatter_defs: IndexMap<String, FieldDef> = IndexMap::new();
139    let fm_key = serde_yaml::Value::String("frontmatter".into());
140    if let Some(fm_fields) = fm_map.get(&fm_key) {
141        if let Some(fields_map) = yaml_to_mapping(fm_fields) {
142            for (name_val, spec_val) in fields_map {
143                let name = name_val.as_str().unwrap_or("").to_string();
144                let spec = spec_val.as_mapping().ok_or_else(|| {
145                    MdqlError::SchemaInvalid(format!(
146                        "{}: frontmatter.{} must be a mapping",
147                        MDQL_FILENAME, name
148                    ))
149                })?;
150
151                let ftype_str = spec
152                    .get(&serde_yaml::Value::String("type".into()))
153                    .and_then(yaml_to_str)
154                    .unwrap_or("string");
155
156                let field_type = FieldType::from_str(ftype_str).ok_or_else(|| {
157                    MdqlError::SchemaInvalid(format!(
158                        "{}: frontmatter.{} has invalid type '{}'. Valid types: {}",
159                        MDQL_FILENAME,
160                        name,
161                        ftype_str,
162                        VALID_FIELD_TYPES.join(", ")
163                    ))
164                })?;
165
166                let required = spec
167                    .get(&serde_yaml::Value::String("required".into()))
168                    .and_then(yaml_to_bool)
169                    .unwrap_or(false);
170
171                let enum_values = spec
172                    .get(&serde_yaml::Value::String("enum".into()))
173                    .and_then(|v| v.as_sequence())
174                    .map(|seq| {
175                        seq.iter()
176                            .map(|v| match v {
177                                serde_yaml::Value::String(s) => s.clone(),
178                                other => format!("{:?}", other),
179                            })
180                            .collect()
181                    });
182
183                frontmatter_defs.insert(name, FieldDef {
184                    field_type,
185                    required,
186                    enum_values,
187                });
188            }
189        }
190    }
191
192    // Build section definitions
193    let mut section_defs: IndexMap<String, SectionDef> = IndexMap::new();
194    let sections_key = serde_yaml::Value::String("sections".into());
195    if let Some(sections_val) = fm_map.get(&sections_key) {
196        if let Some(sections_map) = yaml_to_mapping(sections_val) {
197            for (name_val, spec_val) in sections_map {
198                let name = name_val.as_str().unwrap_or("").to_string();
199                let spec = spec_val.as_mapping().ok_or_else(|| {
200                    MdqlError::SchemaInvalid(format!(
201                        "{}: sections.{} must be a mapping",
202                        MDQL_FILENAME, name
203                    ))
204                })?;
205
206                let content_type = spec
207                    .get(&serde_yaml::Value::String("type".into()))
208                    .and_then(yaml_to_str)
209                    .unwrap_or("markdown")
210                    .to_string();
211
212                let required = spec
213                    .get(&serde_yaml::Value::String("required".into()))
214                    .and_then(yaml_to_bool)
215                    .unwrap_or(false);
216
217                section_defs.insert(name, SectionDef {
218                    content_type,
219                    required,
220                });
221            }
222        }
223    }
224
225    // H1 config
226    let h1_key = serde_yaml::Value::String("h1".into());
227    let h1_config = fm_map.get(&h1_key);
228    let h1_required = h1_config
229        .and_then(yaml_to_mapping)
230        .and_then(|m| m.get(&serde_yaml::Value::String("required".into())))
231        .and_then(yaml_to_bool)
232        .unwrap_or(true);
233    let h1_must_equal = h1_config
234        .and_then(yaml_to_mapping)
235        .and_then(|m| m.get(&serde_yaml::Value::String("must_equal_frontmatter".into())))
236        .and_then(yaml_to_str)
237        .map(|s| s.to_string());
238
239    // Rules
240    let rules_key = serde_yaml::Value::String("rules".into());
241    let rules_map = fm_map.get(&rules_key).and_then(yaml_to_mapping);
242
243    let get_rule_bool = |key: &str, default: bool| -> bool {
244        rules_map
245            .and_then(|m| m.get(&serde_yaml::Value::String(key.into())))
246            .and_then(yaml_to_bool)
247            .unwrap_or(default)
248    };
249
250    let rules = Rules {
251        reject_unknown_frontmatter: get_rule_bool("reject_unknown_frontmatter", true),
252        reject_unknown_sections: get_rule_bool("reject_unknown_sections", true),
253        reject_duplicate_sections: get_rule_bool("reject_duplicate_sections", true),
254        normalize_numbered_headings: get_rule_bool("normalize_numbered_headings", false),
255    };
256
257    // Table name
258    let table = fm_map
259        .get(&serde_yaml::Value::String("table".into()))
260        .and_then(yaml_to_str)
261        .unwrap_or("")
262        .to_string();
263
264    let primary_key = fm_map
265        .get(&serde_yaml::Value::String("primary_key".into()))
266        .and_then(yaml_to_str)
267        .unwrap_or("path")
268        .to_string();
269
270    Ok(Schema {
271        table,
272        primary_key,
273        frontmatter: frontmatter_defs,
274        h1_required,
275        h1_must_equal_frontmatter: h1_must_equal,
276        sections: section_defs,
277        rules,
278    })
279}
280
281fn validate_meta_schema(fm: &serde_yaml::Value, path: &Path) -> crate::errors::Result<()> {
282    let map = fm.as_mapping().ok_or_else(|| {
283        MdqlError::SchemaInvalid(format!("{}: frontmatter must be a mapping", path.display()))
284    })?;
285
286    // type: schema
287    let type_val = map.get(&serde_yaml::Value::String("type".into()));
288    if type_val.and_then(yaml_to_str) != Some("schema") {
289        return Err(MdqlError::SchemaInvalid(format!(
290            "{}: frontmatter must have 'type: schema'",
291            path.display()
292        )));
293    }
294
295    // table must be a string
296    let table_val = map.get(&serde_yaml::Value::String("table".into()));
297    if table_val.and_then(yaml_to_str).is_none() {
298        return Err(MdqlError::SchemaInvalid(format!(
299            "{}: frontmatter must have 'table' as a string",
300            path.display()
301        )));
302    }
303
304    // frontmatter must be a mapping if present
305    let fm_val = map.get(&serde_yaml::Value::String("frontmatter".into()));
306    if let Some(v) = fm_val {
307        if !v.is_mapping() && !v.is_null() {
308            return Err(MdqlError::SchemaInvalid(format!(
309                "{}: 'frontmatter' must be a mapping",
310                path.display()
311            )));
312        }
313    }
314
315    // sections must be a mapping if present
316    let sec_val = map.get(&serde_yaml::Value::String("sections".into()));
317    if let Some(v) = sec_val {
318        if !v.is_mapping() && !v.is_null() {
319            return Err(MdqlError::SchemaInvalid(format!(
320                "{}: 'sections' must be a mapping",
321                path.display()
322            )));
323        }
324    }
325
326    Ok(())
327}
328
329#[cfg(test)]
330mod tests {
331    use super::*;
332    use std::fs;
333
334    fn make_schema_file(dir: &Path, content: &str) {
335        fs::write(dir.join(MDQL_FILENAME), content).unwrap();
336    }
337
338    #[test]
339    fn test_load_basic_schema() {
340        let dir = tempfile::tempdir().unwrap();
341        make_schema_file(
342            dir.path(),
343            "---\ntype: schema\ntable: test\nprimary_key: path\nfrontmatter:\n  title:\n    type: string\n    required: true\nh1:\n  required: false\nsections: {}\nrules:\n  reject_unknown_frontmatter: true\n  reject_unknown_sections: false\n  reject_duplicate_sections: true\n---\n",
344        );
345        let schema = load_schema(dir.path()).unwrap();
346        assert_eq!(schema.table, "test");
347        assert_eq!(schema.primary_key, "path");
348        assert!(schema.frontmatter.contains_key("title"));
349        assert!(schema.frontmatter["title"].required);
350        assert_eq!(schema.frontmatter["title"].field_type, FieldType::String);
351        assert!(!schema.h1_required);
352        assert!(!schema.rules.reject_unknown_sections);
353    }
354
355    #[test]
356    fn test_missing_schema() {
357        let dir = tempfile::tempdir().unwrap();
358        let result = load_schema(dir.path());
359        assert!(matches!(result, Err(MdqlError::SchemaNotFound(_))));
360    }
361
362    #[test]
363    fn test_wrong_type() {
364        let dir = tempfile::tempdir().unwrap();
365        make_schema_file(dir.path(), "---\ntype: database\nname: test\n---\n");
366        let result = load_schema(dir.path());
367        assert!(matches!(result, Err(MdqlError::SchemaInvalid(_))));
368    }
369
370    #[test]
371    fn test_enum_values() {
372        let dir = tempfile::tempdir().unwrap();
373        make_schema_file(
374            dir.path(),
375            "---\ntype: schema\ntable: test\nfrontmatter:\n  status:\n    type: string\n    required: true\n    enum: [ACTIVE, ARCHIVED]\n---\n",
376        );
377        let schema = load_schema(dir.path()).unwrap();
378        assert_eq!(
379            schema.frontmatter["status"].enum_values,
380            Some(vec!["ACTIVE".to_string(), "ARCHIVED".to_string()])
381        );
382    }
383}