Skip to main content

mdql_core/
schema.rs

1//! Load and validate table-level _mdql.md files (type: schema).
2
3use std::path::Path;
4
5use indexmap::IndexMap;
6
7use crate::errors::MdqlError;
8use crate::parser::parse_file;
9
10pub const MDQL_FILENAME: &str = "_mdql.md";
11
12pub const VALID_FIELD_TYPES: &[&str] = &["string", "int", "float", "bool", "date", "string[]"];
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum FieldType {
16    String,
17    Int,
18    Float,
19    Bool,
20    Date,
21    StringArray,
22}
23
24impl FieldType {
25    pub fn from_str(s: &str) -> Option<FieldType> {
26        match s {
27            "string" => Some(FieldType::String),
28            "int" => Some(FieldType::Int),
29            "float" => Some(FieldType::Float),
30            "bool" => Some(FieldType::Bool),
31            "date" => Some(FieldType::Date),
32            "string[]" => Some(FieldType::StringArray),
33            _ => None,
34        }
35    }
36
37    pub fn as_str(&self) -> &'static str {
38        match self {
39            FieldType::String => "string",
40            FieldType::Int => "int",
41            FieldType::Float => "float",
42            FieldType::Bool => "bool",
43            FieldType::Date => "date",
44            FieldType::StringArray => "string[]",
45        }
46    }
47}
48
49#[derive(Debug, Clone)]
50pub struct FieldDef {
51    pub field_type: FieldType,
52    pub required: bool,
53    pub enum_values: Option<Vec<String>>,
54}
55
56#[derive(Debug, Clone)]
57pub struct SectionDef {
58    pub content_type: String,
59    pub required: bool,
60}
61
62#[derive(Debug, Clone)]
63pub struct Rules {
64    pub reject_unknown_frontmatter: bool,
65    pub reject_unknown_sections: bool,
66    pub reject_duplicate_sections: bool,
67    pub normalize_numbered_headings: bool,
68}
69
70#[derive(Debug, Clone)]
71pub struct Schema {
72    pub table: String,
73    pub primary_key: String,
74    pub frontmatter: IndexMap<String, FieldDef>,
75    pub h1_required: bool,
76    pub h1_must_equal_frontmatter: Option<String>,
77    pub sections: IndexMap<String, SectionDef>,
78    pub rules: Rules,
79}
80
81impl Schema {
82    /// All non-section keys that appear in rows.
83    pub fn metadata_keys(&self) -> std::collections::HashSet<String> {
84        let mut keys: std::collections::HashSet<String> = self
85            .frontmatter
86            .keys()
87            .cloned()
88            .collect();
89        keys.insert("path".to_string());
90        keys.insert("h1".to_string());
91        keys.insert("created".to_string());
92        keys.insert("modified".to_string());
93        keys
94    }
95}
96
97fn yaml_to_str(val: &serde_yaml::Value) -> Option<&str> {
98    val.as_str()
99}
100
101fn yaml_to_bool(val: &serde_yaml::Value) -> Option<bool> {
102    val.as_bool()
103}
104
105fn yaml_to_mapping(val: &serde_yaml::Value) -> Option<&serde_yaml::Mapping> {
106    val.as_mapping()
107}
108
109pub fn load_schema(folder: &Path) -> crate::errors::Result<Schema> {
110    let schema_path = folder.join(MDQL_FILENAME);
111    if !schema_path.exists() {
112        return Err(MdqlError::SchemaNotFound(format!(
113            "No {} in {}",
114            MDQL_FILENAME,
115            folder.display()
116        )));
117    }
118
119    let parsed = parse_file(&schema_path, Some(folder), false)?;
120
121    if !parsed.parse_errors.is_empty() {
122        return Err(MdqlError::SchemaInvalid(format!(
123            "Cannot parse {}: {}",
124            MDQL_FILENAME,
125            parsed.parse_errors.join("; ")
126        )));
127    }
128
129    let fm = &parsed.raw_frontmatter;
130    validate_meta_schema(fm, &schema_path)?;
131
132    let fm_map = fm.as_mapping().unwrap();
133
134    // Build field definitions
135    let mut frontmatter_defs: IndexMap<String, FieldDef> = IndexMap::new();
136    let fm_key = serde_yaml::Value::String("frontmatter".into());
137    if let Some(fm_fields) = fm_map.get(&fm_key) {
138        if let Some(fields_map) = yaml_to_mapping(fm_fields) {
139            for (name_val, spec_val) in fields_map {
140                let name = name_val.as_str().unwrap_or("").to_string();
141                let spec = spec_val.as_mapping().ok_or_else(|| {
142                    MdqlError::SchemaInvalid(format!(
143                        "{}: frontmatter.{} must be a mapping",
144                        MDQL_FILENAME, name
145                    ))
146                })?;
147
148                let ftype_str = spec
149                    .get(&serde_yaml::Value::String("type".into()))
150                    .and_then(yaml_to_str)
151                    .unwrap_or("string");
152
153                let field_type = FieldType::from_str(ftype_str).ok_or_else(|| {
154                    MdqlError::SchemaInvalid(format!(
155                        "{}: frontmatter.{} has invalid type '{}'. Valid types: {}",
156                        MDQL_FILENAME,
157                        name,
158                        ftype_str,
159                        VALID_FIELD_TYPES.join(", ")
160                    ))
161                })?;
162
163                let required = spec
164                    .get(&serde_yaml::Value::String("required".into()))
165                    .and_then(yaml_to_bool)
166                    .unwrap_or(false);
167
168                let enum_values = spec
169                    .get(&serde_yaml::Value::String("enum".into()))
170                    .and_then(|v| v.as_sequence())
171                    .map(|seq| {
172                        seq.iter()
173                            .map(|v| match v {
174                                serde_yaml::Value::String(s) => s.clone(),
175                                other => format!("{:?}", other),
176                            })
177                            .collect()
178                    });
179
180                frontmatter_defs.insert(name, FieldDef {
181                    field_type,
182                    required,
183                    enum_values,
184                });
185            }
186        }
187    }
188
189    // Build section definitions
190    let mut section_defs: IndexMap<String, SectionDef> = IndexMap::new();
191    let sections_key = serde_yaml::Value::String("sections".into());
192    if let Some(sections_val) = fm_map.get(&sections_key) {
193        if let Some(sections_map) = yaml_to_mapping(sections_val) {
194            for (name_val, spec_val) in sections_map {
195                let name = name_val.as_str().unwrap_or("").to_string();
196                let spec = spec_val.as_mapping().ok_or_else(|| {
197                    MdqlError::SchemaInvalid(format!(
198                        "{}: sections.{} must be a mapping",
199                        MDQL_FILENAME, name
200                    ))
201                })?;
202
203                let content_type = spec
204                    .get(&serde_yaml::Value::String("type".into()))
205                    .and_then(yaml_to_str)
206                    .unwrap_or("markdown")
207                    .to_string();
208
209                let required = spec
210                    .get(&serde_yaml::Value::String("required".into()))
211                    .and_then(yaml_to_bool)
212                    .unwrap_or(false);
213
214                section_defs.insert(name, SectionDef {
215                    content_type,
216                    required,
217                });
218            }
219        }
220    }
221
222    // H1 config
223    let h1_key = serde_yaml::Value::String("h1".into());
224    let h1_config = fm_map.get(&h1_key);
225    let h1_required = h1_config
226        .and_then(yaml_to_mapping)
227        .and_then(|m| m.get(&serde_yaml::Value::String("required".into())))
228        .and_then(yaml_to_bool)
229        .unwrap_or(true);
230    let h1_must_equal = h1_config
231        .and_then(yaml_to_mapping)
232        .and_then(|m| m.get(&serde_yaml::Value::String("must_equal_frontmatter".into())))
233        .and_then(yaml_to_str)
234        .map(|s| s.to_string());
235
236    // Rules
237    let rules_key = serde_yaml::Value::String("rules".into());
238    let rules_map = fm_map.get(&rules_key).and_then(yaml_to_mapping);
239
240    let get_rule_bool = |key: &str, default: bool| -> bool {
241        rules_map
242            .and_then(|m| m.get(&serde_yaml::Value::String(key.into())))
243            .and_then(yaml_to_bool)
244            .unwrap_or(default)
245    };
246
247    let rules = Rules {
248        reject_unknown_frontmatter: get_rule_bool("reject_unknown_frontmatter", true),
249        reject_unknown_sections: get_rule_bool("reject_unknown_sections", true),
250        reject_duplicate_sections: get_rule_bool("reject_duplicate_sections", true),
251        normalize_numbered_headings: get_rule_bool("normalize_numbered_headings", false),
252    };
253
254    // Table name
255    let table = fm_map
256        .get(&serde_yaml::Value::String("table".into()))
257        .and_then(yaml_to_str)
258        .unwrap_or("")
259        .to_string();
260
261    let primary_key = fm_map
262        .get(&serde_yaml::Value::String("primary_key".into()))
263        .and_then(yaml_to_str)
264        .unwrap_or("path")
265        .to_string();
266
267    Ok(Schema {
268        table,
269        primary_key,
270        frontmatter: frontmatter_defs,
271        h1_required,
272        h1_must_equal_frontmatter: h1_must_equal,
273        sections: section_defs,
274        rules,
275    })
276}
277
278fn validate_meta_schema(fm: &serde_yaml::Value, path: &Path) -> crate::errors::Result<()> {
279    let map = fm.as_mapping().ok_or_else(|| {
280        MdqlError::SchemaInvalid(format!("{}: frontmatter must be a mapping", path.display()))
281    })?;
282
283    // type: schema
284    let type_val = map.get(&serde_yaml::Value::String("type".into()));
285    if type_val.and_then(yaml_to_str) != Some("schema") {
286        return Err(MdqlError::SchemaInvalid(format!(
287            "{}: frontmatter must have 'type: schema'",
288            path.display()
289        )));
290    }
291
292    // table must be a string
293    let table_val = map.get(&serde_yaml::Value::String("table".into()));
294    if table_val.and_then(yaml_to_str).is_none() {
295        return Err(MdqlError::SchemaInvalid(format!(
296            "{}: frontmatter must have 'table' as a string",
297            path.display()
298        )));
299    }
300
301    // frontmatter must be a mapping if present
302    let fm_val = map.get(&serde_yaml::Value::String("frontmatter".into()));
303    if let Some(v) = fm_val {
304        if !v.is_mapping() && !v.is_null() {
305            return Err(MdqlError::SchemaInvalid(format!(
306                "{}: 'frontmatter' must be a mapping",
307                path.display()
308            )));
309        }
310    }
311
312    // sections must be a mapping if present
313    let sec_val = map.get(&serde_yaml::Value::String("sections".into()));
314    if let Some(v) = sec_val {
315        if !v.is_mapping() && !v.is_null() {
316            return Err(MdqlError::SchemaInvalid(format!(
317                "{}: 'sections' must be a mapping",
318                path.display()
319            )));
320        }
321    }
322
323    Ok(())
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use std::fs;
330
331    fn make_schema_file(dir: &Path, content: &str) {
332        fs::write(dir.join(MDQL_FILENAME), content).unwrap();
333    }
334
335    #[test]
336    fn test_load_basic_schema() {
337        let dir = tempfile::tempdir().unwrap();
338        make_schema_file(
339            dir.path(),
340            "---\ntype: schema\ntable: test\nprimary_key: path\nfrontmatter:\n  title:\n    type: string\n    required: true\nh1:\n  required: false\nsections: {}\nrules:\n  reject_unknown_frontmatter: true\n  reject_unknown_sections: false\n  reject_duplicate_sections: true\n---\n",
341        );
342        let schema = load_schema(dir.path()).unwrap();
343        assert_eq!(schema.table, "test");
344        assert_eq!(schema.primary_key, "path");
345        assert!(schema.frontmatter.contains_key("title"));
346        assert!(schema.frontmatter["title"].required);
347        assert_eq!(schema.frontmatter["title"].field_type, FieldType::String);
348        assert!(!schema.h1_required);
349        assert!(!schema.rules.reject_unknown_sections);
350    }
351
352    #[test]
353    fn test_missing_schema() {
354        let dir = tempfile::tempdir().unwrap();
355        let result = load_schema(dir.path());
356        assert!(matches!(result, Err(MdqlError::SchemaNotFound(_))));
357    }
358
359    #[test]
360    fn test_wrong_type() {
361        let dir = tempfile::tempdir().unwrap();
362        make_schema_file(dir.path(), "---\ntype: database\nname: test\n---\n");
363        let result = load_schema(dir.path());
364        assert!(matches!(result, Err(MdqlError::SchemaInvalid(_))));
365    }
366
367    #[test]
368    fn test_enum_values() {
369        let dir = tempfile::tempdir().unwrap();
370        make_schema_file(
371            dir.path(),
372            "---\ntype: schema\ntable: test\nfrontmatter:\n  status:\n    type: string\n    required: true\n    enum: [ACTIVE, ARCHIVED]\n---\n",
373        );
374        let schema = load_schema(dir.path()).unwrap();
375        assert_eq!(
376            schema.frontmatter["status"].enum_values,
377            Some(vec!["ACTIVE".to_string(), "ARCHIVED".to_string()])
378        );
379    }
380}