1use std::path::Path;
4
5use indexmap::IndexMap;
6
7use crate::errors::MdqlError;
8use crate::parser::parse_file;
9
10pub const MDQL_FILENAME: &str = "_mdql.md";
11
12pub const VALID_FIELD_TYPES: &[&str] = &["string", "int", "float", "bool", "date", "datetime", "string[]", "dict"];
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum FieldType {
16 String,
17 Int,
18 Float,
19 Bool,
20 Date,
21 DateTime,
22 StringArray,
23 Dict,
24}
25
26impl FieldType {
27 pub fn from_str(s: &str) -> Option<FieldType> {
28 match s {
29 "string" => Some(FieldType::String),
30 "int" => Some(FieldType::Int),
31 "float" => Some(FieldType::Float),
32 "bool" => Some(FieldType::Bool),
33 "date" => Some(FieldType::Date),
34 "datetime" => Some(FieldType::DateTime),
35 "string[]" => Some(FieldType::StringArray),
36 "dict" => Some(FieldType::Dict),
37 _ => None,
38 }
39 }
40
41 pub fn as_str(&self) -> &'static str {
42 match self {
43 FieldType::String => "string",
44 FieldType::Int => "int",
45 FieldType::Float => "float",
46 FieldType::Bool => "bool",
47 FieldType::Date => "date",
48 FieldType::DateTime => "datetime",
49 FieldType::StringArray => "string[]",
50 FieldType::Dict => "dict",
51 }
52 }
53}
54
55#[derive(Debug, Clone)]
56pub struct FieldDef {
57 pub field_type: FieldType,
58 pub required: bool,
59 pub enum_values: Option<Vec<String>>,
60}
61
62#[derive(Debug, Clone)]
63pub struct SectionDef {
64 pub content_type: String,
65 pub required: bool,
66}
67
68#[derive(Debug, Clone)]
69pub struct Rules {
70 pub reject_unknown_frontmatter: bool,
71 pub reject_unknown_sections: bool,
72 pub reject_duplicate_sections: bool,
73 pub normalize_numbered_headings: bool,
74}
75
76#[derive(Debug, Clone)]
77pub struct Schema {
78 pub table: String,
79 pub primary_key: String,
80 pub frontmatter: IndexMap<String, FieldDef>,
81 pub h1_required: bool,
82 pub sections: IndexMap<String, SectionDef>,
83 pub rules: Rules,
84}
85
86impl Schema {
87 pub fn metadata_keys(&self) -> std::collections::HashSet<String> {
89 let mut keys: std::collections::HashSet<String> = self
90 .frontmatter
91 .keys()
92 .cloned()
93 .collect();
94 keys.insert("path".to_string());
95 keys.insert("h1".to_string());
96 keys.insert("created".to_string());
97 keys.insert("modified".to_string());
98 keys
99 }
100}
101
102fn yaml_to_str(val: &serde_yaml::Value) -> Option<&str> {
103 val.as_str()
104}
105
106fn yaml_to_bool(val: &serde_yaml::Value) -> Option<bool> {
107 val.as_bool()
108}
109
110fn yaml_to_mapping(val: &serde_yaml::Value) -> Option<&serde_yaml::Mapping> {
111 val.as_mapping()
112}
113
114pub fn load_schema(folder: &Path) -> crate::errors::Result<Schema> {
115 let schema_path = folder.join(MDQL_FILENAME);
116 if !schema_path.exists() {
117 return Err(MdqlError::SchemaNotFound(format!(
118 "No {} in {}",
119 MDQL_FILENAME,
120 folder.display()
121 )));
122 }
123
124 let parsed = parse_file(&schema_path, Some(folder), false)?;
125
126 if !parsed.parse_errors.is_empty() {
127 return Err(MdqlError::SchemaInvalid(format!(
128 "Cannot parse {}: {}",
129 MDQL_FILENAME,
130 parsed.parse_errors.join("; ")
131 )));
132 }
133
134 let fm = &parsed.raw_frontmatter;
135 validate_meta_schema(fm, &schema_path)?;
136
137 let fm_map = fm.as_mapping().ok_or_else(|| {
138 MdqlError::SchemaInvalid(format!(
139 "{}: frontmatter must be a YAML mapping",
140 MDQL_FILENAME
141 ))
142 })?;
143
144 let mut frontmatter_defs: IndexMap<String, FieldDef> = IndexMap::new();
146 let fm_key = serde_yaml::Value::String("frontmatter".into());
147 if let Some(fm_fields) = fm_map.get(&fm_key) {
148 if let Some(fields_map) = yaml_to_mapping(fm_fields) {
149 for (name_val, spec_val) in fields_map {
150 let name = name_val.as_str().unwrap_or("").to_string();
151 let spec = spec_val.as_mapping().ok_or_else(|| {
152 MdqlError::SchemaInvalid(format!(
153 "{}: frontmatter.{} must be a mapping",
154 MDQL_FILENAME, name
155 ))
156 })?;
157
158 let ftype_str = spec
159 .get(&serde_yaml::Value::String("type".into()))
160 .and_then(yaml_to_str)
161 .unwrap_or("string");
162
163 let field_type = FieldType::from_str(ftype_str).ok_or_else(|| {
164 MdqlError::SchemaInvalid(format!(
165 "{}: frontmatter.{} has invalid type '{}'. Valid types: {}",
166 MDQL_FILENAME,
167 name,
168 ftype_str,
169 VALID_FIELD_TYPES.join(", ")
170 ))
171 })?;
172
173 let required = spec
174 .get(&serde_yaml::Value::String("required".into()))
175 .and_then(yaml_to_bool)
176 .unwrap_or(false);
177
178 let enum_values = spec
179 .get(&serde_yaml::Value::String("enum".into()))
180 .and_then(|v| v.as_sequence())
181 .map(|seq| {
182 seq.iter()
183 .map(|v| match v {
184 serde_yaml::Value::String(s) => s.clone(),
185 other => format!("{:?}", other),
186 })
187 .collect()
188 });
189
190 frontmatter_defs.insert(name, FieldDef {
191 field_type,
192 required,
193 enum_values,
194 });
195 }
196 }
197 }
198
199 let mut section_defs: IndexMap<String, SectionDef> = IndexMap::new();
201 let sections_key = serde_yaml::Value::String("sections".into());
202 if let Some(sections_val) = fm_map.get(§ions_key) {
203 if let Some(sections_map) = yaml_to_mapping(sections_val) {
204 for (name_val, spec_val) in sections_map {
205 let name = name_val.as_str().unwrap_or("").to_string();
206 let spec = spec_val.as_mapping().ok_or_else(|| {
207 MdqlError::SchemaInvalid(format!(
208 "{}: sections.{} must be a mapping",
209 MDQL_FILENAME, name
210 ))
211 })?;
212
213 let content_type = spec
214 .get(&serde_yaml::Value::String("type".into()))
215 .and_then(yaml_to_str)
216 .unwrap_or("markdown")
217 .to_string();
218
219 let required = spec
220 .get(&serde_yaml::Value::String("required".into()))
221 .and_then(yaml_to_bool)
222 .unwrap_or(false);
223
224 section_defs.insert(name, SectionDef {
225 content_type,
226 required,
227 });
228 }
229 }
230 }
231
232 let h1_key = serde_yaml::Value::String("h1".into());
234 let h1_config = fm_map.get(&h1_key);
235 let h1_required = h1_config
236 .and_then(yaml_to_mapping)
237 .and_then(|m| m.get(&serde_yaml::Value::String("required".into())))
238 .and_then(yaml_to_bool)
239 .unwrap_or(true);
240 let rules_key = serde_yaml::Value::String("rules".into());
242 let rules_map = fm_map.get(&rules_key).and_then(yaml_to_mapping);
243
244 let get_rule_bool = |key: &str, default: bool| -> bool {
245 rules_map
246 .and_then(|m| m.get(&serde_yaml::Value::String(key.into())))
247 .and_then(yaml_to_bool)
248 .unwrap_or(default)
249 };
250
251 let rules = Rules {
252 reject_unknown_frontmatter: get_rule_bool("reject_unknown_frontmatter", true),
253 reject_unknown_sections: get_rule_bool("reject_unknown_sections", true),
254 reject_duplicate_sections: get_rule_bool("reject_duplicate_sections", true),
255 normalize_numbered_headings: get_rule_bool("normalize_numbered_headings", false),
256 };
257
258 let table = fm_map
260 .get(&serde_yaml::Value::String("table".into()))
261 .and_then(yaml_to_str)
262 .unwrap_or("")
263 .to_string();
264
265 let primary_key = fm_map
266 .get(&serde_yaml::Value::String("primary_key".into()))
267 .and_then(yaml_to_str)
268 .unwrap_or("path")
269 .to_string();
270
271 Ok(Schema {
272 table,
273 primary_key,
274 frontmatter: frontmatter_defs,
275 h1_required,
276 sections: section_defs,
277 rules,
278 })
279}
280
281fn validate_meta_schema(fm: &serde_yaml::Value, path: &Path) -> crate::errors::Result<()> {
282 let map = fm.as_mapping().ok_or_else(|| {
283 MdqlError::SchemaInvalid(format!("{}: frontmatter must be a mapping", path.display()))
284 })?;
285
286 let type_val = map.get(&serde_yaml::Value::String("type".into()));
288 if type_val.and_then(yaml_to_str) != Some("schema") {
289 return Err(MdqlError::SchemaInvalid(format!(
290 "{}: frontmatter must have 'type: schema'",
291 path.display()
292 )));
293 }
294
295 let table_val = map.get(&serde_yaml::Value::String("table".into()));
297 if table_val.and_then(yaml_to_str).is_none() {
298 return Err(MdqlError::SchemaInvalid(format!(
299 "{}: frontmatter must have 'table' as a string",
300 path.display()
301 )));
302 }
303
304 let fm_val = map.get(&serde_yaml::Value::String("frontmatter".into()));
306 if let Some(v) = fm_val {
307 if !v.is_mapping() && !v.is_null() {
308 return Err(MdqlError::SchemaInvalid(format!(
309 "{}: 'frontmatter' must be a mapping",
310 path.display()
311 )));
312 }
313 }
314
315 let sec_val = map.get(&serde_yaml::Value::String("sections".into()));
317 if let Some(v) = sec_val {
318 if !v.is_mapping() && !v.is_null() {
319 return Err(MdqlError::SchemaInvalid(format!(
320 "{}: 'sections' must be a mapping",
321 path.display()
322 )));
323 }
324 }
325
326 Ok(())
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332 use std::fs;
333
334 fn make_schema_file(dir: &Path, content: &str) {
335 fs::write(dir.join(MDQL_FILENAME), content).unwrap();
336 }
337
338 #[test]
339 fn test_load_basic_schema() {
340 let dir = tempfile::tempdir().unwrap();
341 make_schema_file(
342 dir.path(),
343 "---\ntype: schema\ntable: test\nprimary_key: path\nfrontmatter:\n title:\n type: string\n required: true\nh1:\n required: false\nsections: {}\nrules:\n reject_unknown_frontmatter: true\n reject_unknown_sections: false\n reject_duplicate_sections: true\n---\n",
344 );
345 let schema = load_schema(dir.path()).unwrap();
346 assert_eq!(schema.table, "test");
347 assert_eq!(schema.primary_key, "path");
348 assert!(schema.frontmatter.contains_key("title"));
349 assert!(schema.frontmatter["title"].required);
350 assert_eq!(schema.frontmatter["title"].field_type, FieldType::String);
351 assert!(!schema.h1_required);
352 assert!(!schema.rules.reject_unknown_sections);
353 }
354
355 #[test]
356 fn test_missing_schema() {
357 let dir = tempfile::tempdir().unwrap();
358 let result = load_schema(dir.path());
359 assert!(matches!(result, Err(MdqlError::SchemaNotFound(_))));
360 }
361
362 #[test]
363 fn test_wrong_type() {
364 let dir = tempfile::tempdir().unwrap();
365 make_schema_file(dir.path(), "---\ntype: database\nname: test\n---\n");
366 let result = load_schema(dir.path());
367 assert!(matches!(result, Err(MdqlError::SchemaInvalid(_))));
368 }
369
370 #[test]
371 fn test_enum_values() {
372 let dir = tempfile::tempdir().unwrap();
373 make_schema_file(
374 dir.path(),
375 "---\ntype: schema\ntable: test\nfrontmatter:\n status:\n type: string\n required: true\n enum: [ACTIVE, ARCHIVED]\n---\n",
376 );
377 let schema = load_schema(dir.path()).unwrap();
378 assert_eq!(
379 schema.frontmatter["status"].enum_values,
380 Some(vec!["ACTIVE".to_string(), "ARCHIVED".to_string()])
381 );
382 }
383}