Skip to main content

mdql_core/
validator.rs

1//! Validate parsed markdown files against a schema.
2
3use std::collections::{HashMap, HashSet};
4
5use crate::database::DatabaseConfig;
6use crate::errors::{ValidationError, ValidationErrorKind};
7use crate::model::{Row, Value};
8use crate::parser::ParsedFile;
9use crate::schema::Schema;
10use crate::stamp::TIMESTAMP_FIELDS;
11
12pub fn validate_file(parsed: &ParsedFile, schema: &Schema) -> Vec<ValidationError> {
13    let mut errors = Vec::new();
14    let fp = &parsed.path;
15
16    // Parse-level errors
17    for msg in &parsed.parse_errors {
18        errors.push(ValidationError {
19            file_path: fp.clone(),
20            error_type: ValidationErrorKind::ParseError,
21            field: None,
22            message: msg.clone(),
23            line_number: None,
24        });
25    }
26
27    if errors.iter().any(|e| e.error_type == ValidationErrorKind::ParseError) {
28        return errors;
29    }
30
31    let fm = &parsed.raw_frontmatter;
32    let fm_map = match fm.as_mapping() {
33        Some(m) => m,
34        None => return errors,
35    };
36
37    // --- Frontmatter field checks ---
38    for (name, field_def) in &schema.frontmatter {
39        let key = serde_yaml::Value::String(name.clone());
40        match fm_map.get(&key) {
41            None => {
42                if field_def.required {
43                    errors.push(ValidationError {
44                        file_path: fp.clone(),
45                        error_type: ValidationErrorKind::MissingField,
46                        field: Some(name.clone()),
47                        message: format!("Missing required frontmatter field '{}'", name),
48                        line_number: None,
49                    });
50                }
51            }
52            Some(value) => {
53                if let Some(type_err) = check_type(value, &field_def.field_type, name) {
54                    errors.push(ValidationError {
55                        file_path: fp.clone(),
56                        error_type: ValidationErrorKind::TypeMismatch,
57                        field: Some(name.clone()),
58                        message: type_err,
59                        line_number: None,
60                    });
61                }
62
63                if let Some(ref enum_vals) = field_def.enum_values {
64                    if !value.is_null() {
65                        let str_val = yaml_value_to_string(value);
66                        if !enum_vals.contains(&str_val) {
67                            errors.push(ValidationError {
68                                file_path: fp.clone(),
69                                error_type: ValidationErrorKind::EnumViolation,
70                                field: Some(name.clone()),
71                                message: format!(
72                                    "Field '{}' value '{}' not in allowed values: {:?}",
73                                    name, str_val, enum_vals
74                                ),
75                                line_number: None,
76                            });
77                        }
78                    }
79                }
80            }
81        }
82    }
83
84    // Validate timestamp fields as datetime (ISO 8601)
85    for ts_field in TIMESTAMP_FIELDS {
86        let key = serde_yaml::Value::String(ts_field.to_string());
87        if let Some(value) = fm_map.get(&key) {
88            if let Some(type_err) = check_type(
89                value,
90                &crate::schema::FieldType::DateTime,
91                ts_field,
92            ) {
93                errors.push(ValidationError {
94                    file_path: fp.clone(),
95                    error_type: ValidationErrorKind::TypeMismatch,
96                    field: Some(ts_field.to_string()),
97                    message: type_err,
98                    line_number: None,
99                });
100            }
101        }
102    }
103
104    // Unknown frontmatter
105    if schema.rules.reject_unknown_frontmatter {
106        for (key_val, _) in fm_map {
107            if let Some(key) = key_val.as_str() {
108                if !schema.frontmatter.contains_key(key)
109                    && !TIMESTAMP_FIELDS.contains(&key)
110                {
111                    errors.push(ValidationError {
112                        file_path: fp.clone(),
113                        error_type: ValidationErrorKind::UnknownField,
114                        field: Some(key.to_string()),
115                        message: format!(
116                            "Unknown frontmatter field '{}' (not in schema)",
117                            key
118                        ),
119                        line_number: None,
120                    });
121                }
122            }
123        }
124    }
125
126    // --- H1 checks ---
127    if schema.h1_required && parsed.h1.is_none() {
128        errors.push(ValidationError {
129            file_path: fp.clone(),
130            error_type: ValidationErrorKind::MissingH1,
131            field: None,
132            message: "Missing required H1 heading".to_string(),
133            line_number: None,
134        });
135    }
136
137    // --- Section checks ---
138    let section_names: Vec<&str> = parsed
139        .sections
140        .iter()
141        .map(|s| s.normalized_heading.as_str())
142        .collect();
143
144    // Count occurrences
145    let mut section_counter: HashMap<&str, usize> = HashMap::new();
146    for name in &section_names {
147        *section_counter.entry(name).or_insert(0) += 1;
148    }
149
150    // Duplicate sections
151    if schema.rules.reject_duplicate_sections {
152        for (name, count) in &section_counter {
153            if *count > 1 {
154                errors.push(ValidationError {
155                    file_path: fp.clone(),
156                    error_type: ValidationErrorKind::DuplicateSection,
157                    field: Some(name.to_string()),
158                    message: format!(
159                        "Duplicate section '{}' (appears {} times)",
160                        name, count
161                    ),
162                    line_number: None,
163                });
164            }
165        }
166    }
167
168    // Required sections
169    for (name, section_def) in &schema.sections {
170        if section_def.required && !section_names.contains(&name.as_str()) {
171            errors.push(ValidationError {
172                file_path: fp.clone(),
173                error_type: ValidationErrorKind::MissingSection,
174                field: Some(name.clone()),
175                message: format!("Missing required section '{}'", name),
176                line_number: None,
177            });
178        }
179    }
180
181    // Unknown sections
182    if schema.rules.reject_unknown_sections {
183        for section in &parsed.sections {
184            if !schema.sections.contains_key(&section.normalized_heading) {
185                errors.push(ValidationError {
186                    file_path: fp.clone(),
187                    error_type: ValidationErrorKind::UnknownSection,
188                    field: Some(section.normalized_heading.clone()),
189                    message: format!(
190                        "Unknown section '{}' (not in schema)",
191                        section.normalized_heading
192                    ),
193                    line_number: Some(section.line_number),
194                });
195            }
196        }
197    }
198
199    // Loose body is a hard error — all content must be under H2 sections
200    if parsed.has_loose_body {
201        errors.push(ValidationError {
202            file_path: fp.clone(),
203            error_type: ValidationErrorKind::LooseBody,
204            field: None,
205            message: "Body content not under an H2 section is not allowed; wrap in ## heading".to_string(),
206            line_number: None,
207        });
208    }
209
210    errors
211}
212
213fn check_type(
214    value: &serde_yaml::Value,
215    expected: &crate::schema::FieldType,
216    field_name: &str,
217) -> Option<String> {
218    use crate::schema::FieldType;
219
220    if value.is_null() {
221        return None;
222    }
223
224    match expected {
225        FieldType::String => {
226            if !value.is_string() {
227                return Some(format!(
228                    "Field '{}' expected string, got {}",
229                    field_name,
230                    yaml_type_name(value)
231                ));
232            }
233        }
234        FieldType::Int => {
235            if value.is_bool() {
236                return Some(format!(
237                    "Field '{}' expected int, got bool",
238                    field_name
239                ));
240            }
241            // serde_yaml may parse integers as i64 or u64
242            if !value.is_i64() && !value.is_u64() {
243                return Some(format!(
244                    "Field '{}' expected int, got {}",
245                    field_name,
246                    yaml_type_name(value)
247                ));
248            }
249        }
250        FieldType::Float => {
251            if value.is_bool() {
252                return Some(format!(
253                    "Field '{}' expected float, got bool",
254                    field_name
255                ));
256            }
257            if !value.is_f64() && !value.is_i64() && !value.is_u64() {
258                return Some(format!(
259                    "Field '{}' expected float, got {}",
260                    field_name,
261                    yaml_type_name(value)
262                ));
263            }
264        }
265        FieldType::Bool => {
266            if !value.is_bool() {
267                return Some(format!(
268                    "Field '{}' expected bool, got {}",
269                    field_name,
270                    yaml_type_name(value)
271                ));
272            }
273        }
274        FieldType::Date => {
275            if let Some(s) = value.as_str() {
276                if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_err() {
277                    return Some(format!(
278                        "Field '{}' expected date (YYYY-MM-DD), got string '{}'",
279                        field_name, s
280                    ));
281                }
282                return None;
283            }
284            if !value.is_string() {
285                return Some(format!(
286                    "Field '{}' expected date, got {}",
287                    field_name,
288                    yaml_type_name(value)
289                ));
290            }
291        }
292        FieldType::DateTime => {
293            if let Some(s) = value.as_str() {
294                let ok = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").is_ok()
295                    || chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f").is_ok();
296                if !ok {
297                    return Some(format!(
298                        "Field '{}' expected datetime (ISO 8601), got string '{}'",
299                        field_name, s
300                    ));
301                }
302                return None;
303            }
304            if !value.is_string() {
305                return Some(format!(
306                    "Field '{}' expected datetime, got {}",
307                    field_name,
308                    yaml_type_name(value)
309                ));
310            }
311        }
312        FieldType::StringArray => {
313            match value.as_sequence() {
314                None => {
315                    return Some(format!(
316                        "Field '{}' expected string[], got {}",
317                        field_name,
318                        yaml_type_name(value)
319                    ));
320                }
321                Some(seq) => {
322                    for (i, item) in seq.iter().enumerate() {
323                        if !item.is_string() {
324                            return Some(format!(
325                                "Field '{}[{}]' expected string, got {}",
326                                field_name,
327                                i,
328                                yaml_type_name(item)
329                            ));
330                        }
331                    }
332                }
333            }
334        }
335        FieldType::Dict => {
336            if !value.is_mapping() {
337                return Some(format!(
338                    "Field '{}' expected dict (mapping), got {}",
339                    field_name,
340                    yaml_type_name(value)
341                ));
342            }
343            // Dict values may be scalars, lists, or nested dicts
344        }
345    }
346
347    None
348}
349
350fn yaml_type_name(value: &serde_yaml::Value) -> &'static str {
351    match value {
352        serde_yaml::Value::Null => "null",
353        serde_yaml::Value::Bool(_) => "bool",
354        serde_yaml::Value::Number(_) => {
355            if value.is_f64() && !value.is_i64() && !value.is_u64() {
356                "float"
357            } else {
358                "int"
359            }
360        }
361        serde_yaml::Value::String(_) => "str",
362        serde_yaml::Value::Sequence(_) => "list",
363        serde_yaml::Value::Mapping(_) => "mapping",
364        _ => "unknown",
365    }
366}
367
368fn yaml_value_to_string(value: &serde_yaml::Value) -> String {
369    match value {
370        serde_yaml::Value::String(s) => s.clone(),
371        serde_yaml::Value::Number(n) => n.to_string(),
372        serde_yaml::Value::Bool(b) => b.to_string(),
373        serde_yaml::Value::Null => "null".to_string(),
374        _ => format!("{:?}", value),
375    }
376}
377
378/// Validate all foreign key constraints across a loaded database.
379pub(crate) fn validate_foreign_keys(
380    db_config: &DatabaseConfig,
381    tables: &HashMap<String, (Schema, Vec<Row>)>,
382) -> Vec<ValidationError> {
383    let mut errors = Vec::new();
384
385    for fk in &db_config.foreign_keys {
386        let to_table = match tables.get(&fk.to_table) {
387            Some(t) => t,
388            None => {
389                errors.push(ValidationError {
390                    file_path: format!("_mdql.md"),
391                    error_type: ValidationErrorKind::FkMissingTable,
392                    field: None,
393                    message: format!(
394                        "Foreign key references unknown table '{}'",
395                        fk.to_table
396                    ),
397                    line_number: None,
398                });
399                continue;
400            }
401        };
402
403        let from_table = match tables.get(&fk.from_table) {
404            Some(t) => t,
405            None => {
406                errors.push(ValidationError {
407                    file_path: format!("_mdql.md"),
408                    error_type: ValidationErrorKind::FkMissingTable,
409                    field: None,
410                    message: format!(
411                        "Foreign key references unknown table '{}'",
412                        fk.from_table
413                    ),
414                    line_number: None,
415                });
416                continue;
417            }
418        };
419
420        // Build set of valid target values
421        let valid_values: HashSet<String> = to_table
422            .1
423            .iter()
424            .filter_map(|row| {
425                row.get(&fk.to_column).and_then(|v| match v {
426                    Value::Null => None,
427                    _ => Some(v.to_display_string()),
428                })
429            })
430            .collect();
431
432        // Check each row in the referencing table
433        for row in &from_table.1 {
434            let value = match row.get(&fk.from_column) {
435                Some(Value::Null) | None => continue,
436                Some(v) => v,
437            };
438
439            let file_path = row
440                .get("path")
441                .map(|v| format!("{}/{}", fk.from_table, v.to_display_string()))
442                .unwrap_or_else(|| fk.from_table.clone());
443
444            let values_to_check: Vec<String> = match value {
445                Value::List(items) => items.iter().map(|s| s.clone()).collect(),
446                _ => vec![value.to_display_string()],
447            };
448
449            for value_str in &values_to_check {
450                if !valid_values.contains(value_str) {
451                    errors.push(ValidationError {
452                        file_path: file_path.clone(),
453                        error_type: ValidationErrorKind::FkViolation,
454                        field: Some(fk.from_column.clone()),
455                        message: format!(
456                            "{} = '{}' not found in {}.{}",
457                            fk.from_column, value_str, fk.to_table, fk.to_column
458                        ),
459                        line_number: None,
460                    });
461                }
462            }
463        }
464    }
465
466    errors
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472    use crate::parser::parse_text;
473    use crate::schema::*;
474    use indexmap::IndexMap;
475
476    fn make_schema() -> Schema {
477        let mut frontmatter = IndexMap::new();
478        frontmatter.insert("title".to_string(), FieldDef {
479            field_type: FieldType::String,
480            required: true,
481            enum_values: None,
482        });
483        frontmatter.insert("count".to_string(), FieldDef {
484            field_type: FieldType::Int,
485            required: true,
486            enum_values: None,
487        });
488        frontmatter.insert("status".to_string(), FieldDef {
489            field_type: FieldType::String,
490            required: false,
491            enum_values: Some(vec!["ACTIVE".into(), "ARCHIVED".into()]),
492        });
493
494        let mut sections = IndexMap::new();
495        sections.insert("Summary".to_string(), SectionDef {
496            content_type: "markdown".to_string(),
497            required: true,
498        });
499
500        Schema {
501            table: "test".to_string(),
502            primary_key: "path".to_string(),
503            frontmatter,
504            h1_required: false,
505            sections,
506            rules: Rules {
507                reject_unknown_frontmatter: true,
508                reject_unknown_sections: false,
509                reject_duplicate_sections: true,
510                normalize_numbered_headings: false,
511            },
512        }
513    }
514
515    #[test]
516    fn test_valid_file() {
517        let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Summary\n\nA summary.\n";
518        let parsed = parse_text(text, "test.md", false);
519        let errors = validate_file(&parsed, &make_schema());
520        assert!(errors.is_empty(), "Expected no errors, got: {:?}", errors);
521    }
522
523    #[test]
524    fn test_missing_required_field() {
525        let text = "---\ntitle: \"Hello\"\n---\n\n## Summary\n\nText.\n";
526        let parsed = parse_text(text, "test.md", false);
527        let errors = validate_file(&parsed, &make_schema());
528        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::MissingField && e.field.as_deref() == Some("count")));
529    }
530
531    #[test]
532    fn test_type_mismatch() {
533        let text = "---\ntitle: \"Hello\"\ncount: \"not a number\"\n---\n\n## Summary\n\nText.\n";
534        let parsed = parse_text(text, "test.md", false);
535        let errors = validate_file(&parsed, &make_schema());
536        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::TypeMismatch && e.field.as_deref() == Some("count")));
537    }
538
539    #[test]
540    fn test_enum_violation() {
541        let text = "---\ntitle: \"Hello\"\ncount: 5\nstatus: INVALID\n---\n\n## Summary\n\nText.\n";
542        let parsed = parse_text(text, "test.md", false);
543        let errors = validate_file(&parsed, &make_schema());
544        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::EnumViolation));
545    }
546
547    #[test]
548    fn test_unknown_frontmatter() {
549        let text = "---\ntitle: \"Hello\"\ncount: 5\nextra: bad\n---\n\n## Summary\n\nText.\n";
550        let parsed = parse_text(text, "test.md", false);
551        let errors = validate_file(&parsed, &make_schema());
552        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::UnknownField && e.field.as_deref() == Some("extra")));
553    }
554
555    #[test]
556    fn test_missing_required_section() {
557        let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Other\n\nText.\n";
558        let parsed = parse_text(text, "test.md", false);
559        let errors = validate_file(&parsed, &make_schema());
560        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::MissingSection));
561    }
562
563    #[test]
564    fn test_duplicate_section() {
565        let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Summary\n\nFirst.\n\n## Summary\n\nSecond.\n";
566        let parsed = parse_text(text, "test.md", false);
567        let errors = validate_file(&parsed, &make_schema());
568        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::DuplicateSection));
569    }
570
571    // --- Foreign key validation tests ---
572
573    use crate::database::{DatabaseConfig, ForeignKey};
574
575    fn make_fk_tables() -> HashMap<String, (Schema, Vec<Row>)> {
576        let strategy_schema = Schema {
577            table: "strategies".to_string(),
578            primary_key: "path".to_string(),
579            frontmatter: IndexMap::new(),
580            h1_required: false,
581            sections: IndexMap::new(),
582            rules: Rules {
583                reject_unknown_frontmatter: false,
584                reject_unknown_sections: false,
585                reject_duplicate_sections: false,
586                normalize_numbered_headings: false,
587            },
588        };
589
590        let backtest_schema = Schema {
591            table: "backtests".to_string(),
592            primary_key: "path".to_string(),
593            frontmatter: IndexMap::new(),
594            h1_required: false,
595            sections: IndexMap::new(),
596            rules: Rules {
597                reject_unknown_frontmatter: false,
598                reject_unknown_sections: false,
599                reject_duplicate_sections: false,
600                normalize_numbered_headings: false,
601            },
602        };
603
604        let mut s1 = Row::new();
605        s1.insert("path".into(), Value::String("alpha.md".into()));
606        let mut s2 = Row::new();
607        s2.insert("path".into(), Value::String("beta.md".into()));
608
609        let mut b1 = Row::new();
610        b1.insert("path".into(), Value::String("bt-alpha.md".into()));
611        b1.insert("strategy".into(), Value::String("alpha.md".into()));
612        let mut b2 = Row::new();
613        b2.insert("path".into(), Value::String("bt-beta.md".into()));
614        b2.insert("strategy".into(), Value::String("beta.md".into()));
615
616        let mut tables = HashMap::new();
617        tables.insert("strategies".into(), (strategy_schema, vec![s1, s2]));
618        tables.insert("backtests".into(), (backtest_schema, vec![b1, b2]));
619        tables
620    }
621
622    fn make_fk_config() -> DatabaseConfig {
623        DatabaseConfig {
624            name: "test".into(),
625            foreign_keys: vec![ForeignKey {
626                from_table: "backtests".into(),
627                from_column: "strategy".into(),
628                to_table: "strategies".into(),
629                to_column: "path".into(),
630            }],
631            views: vec![],
632            sync: None,
633        }
634    }
635
636    #[test]
637    fn test_fk_valid() {
638        let tables = make_fk_tables();
639        let config = make_fk_config();
640        let errors = validate_foreign_keys(&config, &tables);
641        assert!(errors.is_empty(), "Expected no FK errors, got: {:?}", errors);
642    }
643
644    #[test]
645    fn test_fk_violation() {
646        let mut tables = make_fk_tables();
647        // Add a backtest referencing a nonexistent strategy
648        let mut broken = Row::new();
649        broken.insert("path".into(), Value::String("bt-broken.md".into()));
650        broken.insert("strategy".into(), Value::String("nonexistent.md".into()));
651        tables.get_mut("backtests").unwrap().1.push(broken);
652
653        let config = make_fk_config();
654        let errors = validate_foreign_keys(&config, &tables);
655        assert_eq!(errors.len(), 1);
656        assert_eq!(errors[0].error_type, ValidationErrorKind::FkViolation);
657        assert!(errors[0].message.contains("nonexistent.md"));
658    }
659
660    #[test]
661    fn test_fk_null_not_violation() {
662        let mut tables = make_fk_tables();
663        // Add a backtest with null strategy — should not be a violation
664        let mut nullref = Row::new();
665        nullref.insert("path".into(), Value::String("bt-null.md".into()));
666        nullref.insert("strategy".into(), Value::Null);
667        tables.get_mut("backtests").unwrap().1.push(nullref);
668
669        let config = make_fk_config();
670        let errors = validate_foreign_keys(&config, &tables);
671        assert!(errors.is_empty());
672    }
673
674    #[test]
675    fn test_fk_missing_table() {
676        let tables = make_fk_tables();
677        let config = DatabaseConfig {
678            name: "test".into(),
679            foreign_keys: vec![ForeignKey {
680                from_table: "backtests".into(),
681                from_column: "strategy".into(),
682                to_table: "nonexistent_table".into(),
683                to_column: "path".into(),
684            }],
685            views: vec![],
686            sync: None,
687        };
688        let errors = validate_foreign_keys(&config, &tables);
689        assert_eq!(errors.len(), 1);
690        assert_eq!(errors[0].error_type, ValidationErrorKind::FkMissingTable);
691    }
692
693    #[test]
694    fn test_fk_string_array_valid() {
695        let mut tables = make_fk_tables();
696        let array_row = Row::from([
697            ("path".into(), Value::String("bt-multi.md".into())),
698            ("strategy".into(), Value::List(vec![
699                "alpha.md".into(),
700                "beta.md".into(),
701            ])),
702        ]);
703        tables.get_mut("backtests").unwrap().1.push(array_row);
704
705        let config = DatabaseConfig {
706            name: "test".into(),
707            foreign_keys: vec![ForeignKey {
708                from_table: "backtests".into(),
709                from_column: "strategy".into(),
710                to_table: "strategies".into(),
711                to_column: "path".into(),
712            }],
713            views: vec![],
714            sync: None,
715        };
716        let errors = validate_foreign_keys(&config, &tables);
717        assert!(errors.is_empty());
718    }
719
720    #[test]
721    fn test_fk_string_array_one_invalid() {
722        let mut tables = make_fk_tables();
723        let array_row = Row::from([
724            ("path".into(), Value::String("bt-multi.md".into())),
725            ("strategy".into(), Value::List(vec![
726                "alpha.md".into(),
727                "nonexistent.md".into(),
728            ])),
729        ]);
730        tables.get_mut("backtests").unwrap().1.push(array_row);
731
732        let config = DatabaseConfig {
733            name: "test".into(),
734            foreign_keys: vec![ForeignKey {
735                from_table: "backtests".into(),
736                from_column: "strategy".into(),
737                to_table: "strategies".into(),
738                to_column: "path".into(),
739            }],
740            views: vec![],
741            sync: None,
742        };
743        let errors = validate_foreign_keys(&config, &tables);
744        assert_eq!(errors.len(), 1);
745        assert!(errors[0].message.contains("nonexistent.md"));
746    }
747}