Skip to main content

mdql_core/
schema.rs

1//! Load and validate table-level _mdql.md files (type: schema).
2
3use std::path::Path;
4
5use indexmap::IndexMap;
6
7use crate::errors::MdqlError;
8use crate::parser::parse_file;
9
10pub const MDQL_FILENAME: &str = "_mdql.md";
11
12pub const VALID_FIELD_TYPES: &[&str] = &["string", "int", "float", "bool", "date", "datetime", "string[]", "dict"];
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum FieldType {
16    String,
17    Int,
18    Float,
19    Bool,
20    Date,
21    DateTime,
22    StringArray,
23    Dict,
24}
25
26impl FieldType {
27    pub fn from_str(s: &str) -> Option<FieldType> {
28        match s {
29            "string" => Some(FieldType::String),
30            "int" => Some(FieldType::Int),
31            "float" => Some(FieldType::Float),
32            "bool" => Some(FieldType::Bool),
33            "date" => Some(FieldType::Date),
34            "datetime" => Some(FieldType::DateTime),
35            "string[]" => Some(FieldType::StringArray),
36            "dict" => Some(FieldType::Dict),
37            _ => None,
38        }
39    }
40
41    pub fn as_str(&self) -> &'static str {
42        match self {
43            FieldType::String => "string",
44            FieldType::Int => "int",
45            FieldType::Float => "float",
46            FieldType::Bool => "bool",
47            FieldType::Date => "date",
48            FieldType::DateTime => "datetime",
49            FieldType::StringArray => "string[]",
50            FieldType::Dict => "dict",
51        }
52    }
53}
54
55#[derive(Debug, Clone)]
56pub struct FieldDef {
57    pub field_type: FieldType,
58    pub required: bool,
59    pub enum_values: Option<Vec<String>>,
60}
61
62#[derive(Debug, Clone)]
63pub struct SectionDef {
64    pub content_type: String,
65    pub required: bool,
66}
67
68#[derive(Debug, Clone)]
69pub struct Rules {
70    pub reject_unknown_frontmatter: bool,
71    pub reject_unknown_sections: bool,
72    pub reject_duplicate_sections: bool,
73    pub normalize_numbered_headings: bool,
74}
75
76#[derive(Debug, Clone)]
77pub struct Schema {
78    pub table: String,
79    pub primary_key: String,
80    pub frontmatter: IndexMap<String, FieldDef>,
81    pub h1_required: bool,
82    pub sections: IndexMap<String, SectionDef>,
83    pub rules: Rules,
84}
85
86impl Schema {
87    /// All non-section keys that appear in rows.
88    pub fn metadata_keys(&self) -> std::collections::HashSet<String> {
89        let mut keys: std::collections::HashSet<String> = self
90            .frontmatter
91            .keys()
92            .cloned()
93            .collect();
94        keys.insert("path".to_string());
95        keys.insert("h1".to_string());
96        keys.insert("created".to_string());
97        keys.insert("modified".to_string());
98        keys
99    }
100}
101
102fn yaml_to_str(val: &serde_yaml::Value) -> Option<&str> {
103    val.as_str()
104}
105
106fn yaml_to_bool(val: &serde_yaml::Value) -> Option<bool> {
107    val.as_bool()
108}
109
110fn yaml_to_mapping(val: &serde_yaml::Value) -> Option<&serde_yaml::Mapping> {
111    val.as_mapping()
112}
113
114pub fn load_schema(folder: &Path) -> crate::errors::Result<Schema> {
115    let schema_path = folder.join(MDQL_FILENAME);
116    if !schema_path.exists() {
117        return Err(MdqlError::SchemaNotFound(format!(
118            "No {} in {}",
119            MDQL_FILENAME,
120            folder.display()
121        )));
122    }
123
124    let parsed = parse_file(&schema_path, Some(folder), false)?;
125
126    if !parsed.parse_errors.is_empty() {
127        return Err(MdqlError::SchemaInvalid(format!(
128            "Cannot parse {}: {}",
129            MDQL_FILENAME,
130            parsed.parse_errors.join("; ")
131        )));
132    }
133
134    let fm = &parsed.raw_frontmatter;
135    validate_meta_schema(fm, &schema_path)?;
136
137    let fm_map = fm.as_mapping().ok_or_else(|| {
138        MdqlError::SchemaInvalid(format!(
139            "{}: frontmatter must be a YAML mapping",
140            MDQL_FILENAME
141        ))
142    })?;
143
144    // Build field definitions
145    let mut frontmatter_defs: IndexMap<String, FieldDef> = IndexMap::new();
146    let fm_key = serde_yaml::Value::String("frontmatter".into());
147    if let Some(fm_fields) = fm_map.get(&fm_key) {
148        if let Some(fields_map) = yaml_to_mapping(fm_fields) {
149            for (name_val, spec_val) in fields_map {
150                let name = name_val.as_str().unwrap_or("").to_string();
151                let spec = spec_val.as_mapping().ok_or_else(|| {
152                    MdqlError::SchemaInvalid(format!(
153                        "{}: frontmatter.{} must be a mapping",
154                        MDQL_FILENAME, name
155                    ))
156                })?;
157
158                let ftype_str = spec
159                    .get(&serde_yaml::Value::String("type".into()))
160                    .and_then(yaml_to_str)
161                    .unwrap_or("string");
162
163                let field_type = FieldType::from_str(ftype_str).ok_or_else(|| {
164                    MdqlError::SchemaInvalid(format!(
165                        "{}: frontmatter.{} has invalid type '{}'. Valid types: {}",
166                        MDQL_FILENAME,
167                        name,
168                        ftype_str,
169                        VALID_FIELD_TYPES.join(", ")
170                    ))
171                })?;
172
173                let required = spec
174                    .get(&serde_yaml::Value::String("required".into()))
175                    .and_then(yaml_to_bool)
176                    .unwrap_or(false);
177
178                let enum_values = spec
179                    .get(&serde_yaml::Value::String("enum".into()))
180                    .and_then(|v| v.as_sequence())
181                    .map(|seq| {
182                        seq.iter()
183                            .map(|v| match v {
184                                serde_yaml::Value::String(s) => s.clone(),
185                                other => format!("{:?}", other),
186                            })
187                            .collect()
188                    });
189
190                frontmatter_defs.insert(name, FieldDef {
191                    field_type,
192                    required,
193                    enum_values,
194                });
195            }
196        }
197    }
198
199    // Build section definitions
200    let mut section_defs: IndexMap<String, SectionDef> = IndexMap::new();
201    let sections_key = serde_yaml::Value::String("sections".into());
202    if let Some(sections_val) = fm_map.get(&sections_key) {
203        if let Some(sections_map) = yaml_to_mapping(sections_val) {
204            for (name_val, spec_val) in sections_map {
205                let name = name_val.as_str().unwrap_or("").to_string();
206                let spec = spec_val.as_mapping().ok_or_else(|| {
207                    MdqlError::SchemaInvalid(format!(
208                        "{}: sections.{} must be a mapping",
209                        MDQL_FILENAME, name
210                    ))
211                })?;
212
213                let content_type = spec
214                    .get(&serde_yaml::Value::String("type".into()))
215                    .and_then(yaml_to_str)
216                    .unwrap_or("markdown")
217                    .to_string();
218
219                let required = spec
220                    .get(&serde_yaml::Value::String("required".into()))
221                    .and_then(yaml_to_bool)
222                    .unwrap_or(false);
223
224                section_defs.insert(name, SectionDef {
225                    content_type,
226                    required,
227                });
228            }
229        }
230    }
231
232    // H1 config
233    let h1_key = serde_yaml::Value::String("h1".into());
234    let h1_config = fm_map.get(&h1_key);
235    let h1_required = h1_config
236        .and_then(yaml_to_mapping)
237        .and_then(|m| m.get(&serde_yaml::Value::String("required".into())))
238        .and_then(yaml_to_bool)
239        .unwrap_or(true);
240    // Rules
241    let rules_key = serde_yaml::Value::String("rules".into());
242    let rules_map = fm_map.get(&rules_key).and_then(yaml_to_mapping);
243
244    let get_rule_bool = |key: &str, default: bool| -> bool {
245        rules_map
246            .and_then(|m| m.get(&serde_yaml::Value::String(key.into())))
247            .and_then(yaml_to_bool)
248            .unwrap_or(default)
249    };
250
251    let rules = Rules {
252        reject_unknown_frontmatter: get_rule_bool("reject_unknown_frontmatter", true),
253        reject_unknown_sections: get_rule_bool("reject_unknown_sections", true),
254        reject_duplicate_sections: get_rule_bool("reject_duplicate_sections", true),
255        normalize_numbered_headings: get_rule_bool("normalize_numbered_headings", false),
256    };
257
258    // Table name
259    let table = fm_map
260        .get(&serde_yaml::Value::String("table".into()))
261        .and_then(yaml_to_str)
262        .unwrap_or("")
263        .to_string();
264
265    let primary_key = fm_map
266        .get(&serde_yaml::Value::String("primary_key".into()))
267        .and_then(yaml_to_str)
268        .unwrap_or("path")
269        .to_string();
270
271    Ok(Schema {
272        table,
273        primary_key,
274        frontmatter: frontmatter_defs,
275        h1_required,
276        sections: section_defs,
277        rules,
278    })
279}
280
281fn validate_meta_schema(fm: &serde_yaml::Value, path: &Path) -> crate::errors::Result<()> {
282    let map = fm.as_mapping().ok_or_else(|| {
283        MdqlError::SchemaInvalid(format!("{}: frontmatter must be a mapping", path.display()))
284    })?;
285
286    // type: schema
287    let type_val = map.get(&serde_yaml::Value::String("type".into()));
288    if type_val.and_then(yaml_to_str) != Some("schema") {
289        return Err(MdqlError::SchemaInvalid(format!(
290            "{}: frontmatter must have 'type: schema'",
291            path.display()
292        )));
293    }
294
295    // table must be a string
296    let table_val = map.get(&serde_yaml::Value::String("table".into()));
297    if table_val.and_then(yaml_to_str).is_none() {
298        return Err(MdqlError::SchemaInvalid(format!(
299            "{}: frontmatter must have 'table' as a string",
300            path.display()
301        )));
302    }
303
304    // frontmatter must be a mapping if present
305    let fm_val = map.get(&serde_yaml::Value::String("frontmatter".into()));
306    if let Some(v) = fm_val {
307        if !v.is_mapping() && !v.is_null() {
308            return Err(MdqlError::SchemaInvalid(format!(
309                "{}: 'frontmatter' must be a mapping",
310                path.display()
311            )));
312        }
313    }
314
315    // sections must be a mapping if present
316    let sec_val = map.get(&serde_yaml::Value::String("sections".into()));
317    if let Some(v) = sec_val {
318        if !v.is_mapping() && !v.is_null() {
319            return Err(MdqlError::SchemaInvalid(format!(
320                "{}: 'sections' must be a mapping",
321                path.display()
322            )));
323        }
324    }
325
326    Ok(())
327}
328
329#[cfg(test)]
330mod tests {
331    use super::*;
332    use std::fs;
333
334    fn make_schema_file(dir: &Path, content: &str) {
335        fs::write(dir.join(MDQL_FILENAME), content).unwrap();
336    }
337
338    #[test]
339    fn test_load_basic_schema() {
340        let dir = tempfile::tempdir().unwrap();
341        make_schema_file(
342            dir.path(),
343            "---\ntype: schema\ntable: test\nprimary_key: path\nfrontmatter:\n  title:\n    type: string\n    required: true\nh1:\n  required: false\nsections: {}\nrules:\n  reject_unknown_frontmatter: true\n  reject_unknown_sections: false\n  reject_duplicate_sections: true\n---\n",
344        );
345        let schema = load_schema(dir.path()).unwrap();
346        assert_eq!(schema.table, "test");
347        assert_eq!(schema.primary_key, "path");
348        assert!(schema.frontmatter.contains_key("title"));
349        assert!(schema.frontmatter["title"].required);
350        assert_eq!(schema.frontmatter["title"].field_type, FieldType::String);
351        assert!(!schema.h1_required);
352        assert!(!schema.rules.reject_unknown_sections);
353    }
354
355    #[test]
356    fn test_missing_schema() {
357        let dir = tempfile::tempdir().unwrap();
358        let result = load_schema(dir.path());
359        assert!(matches!(result, Err(MdqlError::SchemaNotFound(_))));
360    }
361
362    #[test]
363    fn test_wrong_type() {
364        let dir = tempfile::tempdir().unwrap();
365        make_schema_file(dir.path(), "---\ntype: database\nname: test\n---\n");
366        let result = load_schema(dir.path());
367        assert!(matches!(result, Err(MdqlError::SchemaInvalid(_))));
368    }
369
370    #[test]
371    fn test_enum_values() {
372        let dir = tempfile::tempdir().unwrap();
373        make_schema_file(
374            dir.path(),
375            "---\ntype: schema\ntable: test\nfrontmatter:\n  status:\n    type: string\n    required: true\n    enum: [ACTIVE, ARCHIVED]\n---\n",
376        );
377        let schema = load_schema(dir.path()).unwrap();
378        assert_eq!(
379            schema.frontmatter["status"].enum_values,
380            Some(vec!["ACTIVE".to_string(), "ARCHIVED".to_string()])
381        );
382    }
383}