Skip to main content

mdql_core/
validator.rs

1//! Validate parsed markdown files against a schema.
2
3use std::collections::{HashMap, HashSet};
4
5use crate::database::DatabaseConfig;
6use crate::errors::{ValidationError, ValidationErrorKind};
7use crate::model::{Row, Value};
8use crate::parser::ParsedFile;
9use crate::schema::Schema;
10use crate::stamp::TIMESTAMP_FIELDS;
11
12pub fn validate_file(parsed: &ParsedFile, schema: &Schema) -> Vec<ValidationError> {
13    let mut errors = Vec::new();
14    let fp = &parsed.path;
15
16    // Parse-level errors
17    for msg in &parsed.parse_errors {
18        errors.push(ValidationError {
19            file_path: fp.clone(),
20            error_type: ValidationErrorKind::ParseError,
21            field: None,
22            message: msg.clone(),
23            line_number: None,
24        });
25    }
26
27    if errors.iter().any(|e| e.error_type == ValidationErrorKind::ParseError) {
28        return errors;
29    }
30
31    let fm = &parsed.raw_frontmatter;
32    let fm_map = match fm.as_mapping() {
33        Some(m) => m,
34        None => return errors,
35    };
36
37    // --- Frontmatter field checks ---
38    for (name, field_def) in &schema.frontmatter {
39        let key = serde_yaml::Value::String(name.clone());
40        match fm_map.get(&key) {
41            None => {
42                if field_def.required {
43                    errors.push(ValidationError {
44                        file_path: fp.clone(),
45                        error_type: ValidationErrorKind::MissingField,
46                        field: Some(name.clone()),
47                        message: format!("Missing required frontmatter field '{}'", name),
48                        line_number: None,
49                    });
50                }
51            }
52            Some(value) => {
53                if let Some(type_err) = check_type(value, &field_def.field_type, name) {
54                    errors.push(ValidationError {
55                        file_path: fp.clone(),
56                        error_type: ValidationErrorKind::TypeMismatch,
57                        field: Some(name.clone()),
58                        message: type_err,
59                        line_number: None,
60                    });
61                }
62
63                if let Some(ref enum_vals) = field_def.enum_values {
64                    if !value.is_null() {
65                        let str_val = yaml_value_to_string(value);
66                        if !enum_vals.contains(&str_val) {
67                            errors.push(ValidationError {
68                                file_path: fp.clone(),
69                                error_type: ValidationErrorKind::EnumViolation,
70                                field: Some(name.clone()),
71                                message: format!(
72                                    "Field '{}' value '{}' not in allowed values: {:?}",
73                                    name, str_val, enum_vals
74                                ),
75                                line_number: None,
76                            });
77                        }
78                    }
79                }
80            }
81        }
82    }
83
84    // Validate timestamp fields as datetime (ISO 8601)
85    for ts_field in TIMESTAMP_FIELDS {
86        let key = serde_yaml::Value::String(ts_field.to_string());
87        if let Some(value) = fm_map.get(&key) {
88            if let Some(type_err) = check_type(
89                value,
90                &crate::schema::FieldType::DateTime,
91                ts_field,
92            ) {
93                errors.push(ValidationError {
94                    file_path: fp.clone(),
95                    error_type: ValidationErrorKind::TypeMismatch,
96                    field: Some(ts_field.to_string()),
97                    message: type_err,
98                    line_number: None,
99                });
100            }
101        }
102    }
103
104    // Unknown frontmatter
105    if schema.rules.reject_unknown_frontmatter {
106        for (key_val, _) in fm_map {
107            if let Some(key) = key_val.as_str() {
108                if !schema.frontmatter.contains_key(key)
109                    && !TIMESTAMP_FIELDS.contains(&key)
110                {
111                    errors.push(ValidationError {
112                        file_path: fp.clone(),
113                        error_type: ValidationErrorKind::UnknownField,
114                        field: Some(key.to_string()),
115                        message: format!(
116                            "Unknown frontmatter field '{}' (not in schema)",
117                            key
118                        ),
119                        line_number: None,
120                    });
121                }
122            }
123        }
124    }
125
126    // --- H1 checks ---
127    if schema.h1_required && parsed.h1.is_none() {
128        errors.push(ValidationError {
129            file_path: fp.clone(),
130            error_type: ValidationErrorKind::MissingH1,
131            field: None,
132            message: "Missing required H1 heading".to_string(),
133            line_number: None,
134        });
135    }
136
137    if let Some(ref h1_field) = schema.h1_must_equal_frontmatter {
138        if let Some(ref h1) = parsed.h1 {
139            let key = serde_yaml::Value::String(h1_field.clone());
140            if let Some(expected_val) = fm_map.get(&key) {
141                let expected = yaml_value_to_string(expected_val);
142                if h1 != &expected {
143                    errors.push(ValidationError {
144                        file_path: fp.clone(),
145                        error_type: ValidationErrorKind::H1Mismatch,
146                        field: None,
147                        message: format!(
148                            "H1 '{}' does not match frontmatter '{}' (expected '{}')",
149                            h1, h1_field, expected
150                        ),
151                        line_number: parsed.h1_line_number,
152                    });
153                }
154            }
155        }
156    }
157
158    // --- Section checks ---
159    let section_names: Vec<&str> = parsed
160        .sections
161        .iter()
162        .map(|s| s.normalized_heading.as_str())
163        .collect();
164
165    // Count occurrences
166    let mut section_counter: HashMap<&str, usize> = HashMap::new();
167    for name in &section_names {
168        *section_counter.entry(name).or_insert(0) += 1;
169    }
170
171    // Duplicate sections
172    if schema.rules.reject_duplicate_sections {
173        for (name, count) in &section_counter {
174            if *count > 1 {
175                errors.push(ValidationError {
176                    file_path: fp.clone(),
177                    error_type: ValidationErrorKind::DuplicateSection,
178                    field: Some(name.to_string()),
179                    message: format!(
180                        "Duplicate section '{}' (appears {} times)",
181                        name, count
182                    ),
183                    line_number: None,
184                });
185            }
186        }
187    }
188
189    // Required sections
190    for (name, section_def) in &schema.sections {
191        if section_def.required && !section_names.contains(&name.as_str()) {
192            errors.push(ValidationError {
193                file_path: fp.clone(),
194                error_type: ValidationErrorKind::MissingSection,
195                field: Some(name.clone()),
196                message: format!("Missing required section '{}'", name),
197                line_number: None,
198            });
199        }
200    }
201
202    // Unknown sections
203    if schema.rules.reject_unknown_sections {
204        for section in &parsed.sections {
205            if !schema.sections.contains_key(&section.normalized_heading) {
206                errors.push(ValidationError {
207                    file_path: fp.clone(),
208                    error_type: ValidationErrorKind::UnknownSection,
209                    field: Some(section.normalized_heading.clone()),
210                    message: format!(
211                        "Unknown section '{}' (not in schema)",
212                        section.normalized_heading
213                    ),
214                    line_number: Some(section.line_number),
215                });
216            }
217        }
218    }
219
220    errors
221}
222
223fn check_type(
224    value: &serde_yaml::Value,
225    expected: &crate::schema::FieldType,
226    field_name: &str,
227) -> Option<String> {
228    use crate::schema::FieldType;
229
230    if value.is_null() {
231        return None;
232    }
233
234    match expected {
235        FieldType::String => {
236            if !value.is_string() {
237                return Some(format!(
238                    "Field '{}' expected string, got {}",
239                    field_name,
240                    yaml_type_name(value)
241                ));
242            }
243        }
244        FieldType::Int => {
245            if value.is_bool() {
246                return Some(format!(
247                    "Field '{}' expected int, got bool",
248                    field_name
249                ));
250            }
251            // serde_yaml may parse integers as i64 or u64
252            if !value.is_i64() && !value.is_u64() {
253                return Some(format!(
254                    "Field '{}' expected int, got {}",
255                    field_name,
256                    yaml_type_name(value)
257                ));
258            }
259        }
260        FieldType::Float => {
261            if value.is_bool() {
262                return Some(format!(
263                    "Field '{}' expected float, got bool",
264                    field_name
265                ));
266            }
267            if !value.is_f64() && !value.is_i64() && !value.is_u64() {
268                return Some(format!(
269                    "Field '{}' expected float, got {}",
270                    field_name,
271                    yaml_type_name(value)
272                ));
273            }
274        }
275        FieldType::Bool => {
276            if !value.is_bool() {
277                return Some(format!(
278                    "Field '{}' expected bool, got {}",
279                    field_name,
280                    yaml_type_name(value)
281                ));
282            }
283        }
284        FieldType::Date => {
285            if let Some(s) = value.as_str() {
286                if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_err() {
287                    return Some(format!(
288                        "Field '{}' expected date (YYYY-MM-DD), got string '{}'",
289                        field_name, s
290                    ));
291                }
292                return None;
293            }
294            if !value.is_string() {
295                return Some(format!(
296                    "Field '{}' expected date, got {}",
297                    field_name,
298                    yaml_type_name(value)
299                ));
300            }
301        }
302        FieldType::DateTime => {
303            if let Some(s) = value.as_str() {
304                let ok = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").is_ok()
305                    || chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f").is_ok();
306                if !ok {
307                    return Some(format!(
308                        "Field '{}' expected datetime (ISO 8601), got string '{}'",
309                        field_name, s
310                    ));
311                }
312                return None;
313            }
314            if !value.is_string() {
315                return Some(format!(
316                    "Field '{}' expected datetime, got {}",
317                    field_name,
318                    yaml_type_name(value)
319                ));
320            }
321        }
322        FieldType::StringArray => {
323            match value.as_sequence() {
324                None => {
325                    return Some(format!(
326                        "Field '{}' expected string[], got {}",
327                        field_name,
328                        yaml_type_name(value)
329                    ));
330                }
331                Some(seq) => {
332                    for (i, item) in seq.iter().enumerate() {
333                        if !item.is_string() {
334                            return Some(format!(
335                                "Field '{}[{}]' expected string, got {}",
336                                field_name,
337                                i,
338                                yaml_type_name(item)
339                            ));
340                        }
341                    }
342                }
343            }
344        }
345        FieldType::Dict => {
346            if !value.is_mapping() {
347                return Some(format!(
348                    "Field '{}' expected dict (mapping), got {}",
349                    field_name,
350                    yaml_type_name(value)
351                ));
352            }
353            // Dict values may be scalars, lists, or nested dicts
354        }
355    }
356
357    None
358}
359
360fn yaml_type_name(value: &serde_yaml::Value) -> &'static str {
361    match value {
362        serde_yaml::Value::Null => "null",
363        serde_yaml::Value::Bool(_) => "bool",
364        serde_yaml::Value::Number(_) => {
365            if value.is_f64() && !value.is_i64() && !value.is_u64() {
366                "float"
367            } else {
368                "int"
369            }
370        }
371        serde_yaml::Value::String(_) => "str",
372        serde_yaml::Value::Sequence(_) => "list",
373        serde_yaml::Value::Mapping(_) => "mapping",
374        _ => "unknown",
375    }
376}
377
378fn yaml_value_to_string(value: &serde_yaml::Value) -> String {
379    match value {
380        serde_yaml::Value::String(s) => s.clone(),
381        serde_yaml::Value::Number(n) => n.to_string(),
382        serde_yaml::Value::Bool(b) => b.to_string(),
383        serde_yaml::Value::Null => "null".to_string(),
384        _ => format!("{:?}", value),
385    }
386}
387
388/// Validate all foreign key constraints across a loaded database.
389pub(crate) fn validate_foreign_keys(
390    db_config: &DatabaseConfig,
391    tables: &HashMap<String, (Schema, Vec<Row>)>,
392) -> Vec<ValidationError> {
393    let mut errors = Vec::new();
394
395    for fk in &db_config.foreign_keys {
396        let to_table = match tables.get(&fk.to_table) {
397            Some(t) => t,
398            None => {
399                errors.push(ValidationError {
400                    file_path: format!("_mdql.md"),
401                    error_type: ValidationErrorKind::FkMissingTable,
402                    field: None,
403                    message: format!(
404                        "Foreign key references unknown table '{}'",
405                        fk.to_table
406                    ),
407                    line_number: None,
408                });
409                continue;
410            }
411        };
412
413        let from_table = match tables.get(&fk.from_table) {
414            Some(t) => t,
415            None => {
416                errors.push(ValidationError {
417                    file_path: format!("_mdql.md"),
418                    error_type: ValidationErrorKind::FkMissingTable,
419                    field: None,
420                    message: format!(
421                        "Foreign key references unknown table '{}'",
422                        fk.from_table
423                    ),
424                    line_number: None,
425                });
426                continue;
427            }
428        };
429
430        // Build set of valid target values
431        let valid_values: HashSet<String> = to_table
432            .1
433            .iter()
434            .filter_map(|row| {
435                row.get(&fk.to_column).and_then(|v| match v {
436                    Value::Null => None,
437                    _ => Some(v.to_display_string()),
438                })
439            })
440            .collect();
441
442        // Check each row in the referencing table
443        for row in &from_table.1 {
444            let value = match row.get(&fk.from_column) {
445                Some(Value::Null) | None => continue,
446                Some(v) => v,
447            };
448
449            let file_path = row
450                .get("path")
451                .map(|v| format!("{}/{}", fk.from_table, v.to_display_string()))
452                .unwrap_or_else(|| fk.from_table.clone());
453
454            let values_to_check: Vec<String> = match value {
455                Value::List(items) => items.iter().map(|s| s.clone()).collect(),
456                _ => vec![value.to_display_string()],
457            };
458
459            for value_str in &values_to_check {
460                if !valid_values.contains(value_str) {
461                    errors.push(ValidationError {
462                        file_path: file_path.clone(),
463                        error_type: ValidationErrorKind::FkViolation,
464                        field: Some(fk.from_column.clone()),
465                        message: format!(
466                            "{} = '{}' not found in {}.{}",
467                            fk.from_column, value_str, fk.to_table, fk.to_column
468                        ),
469                        line_number: None,
470                    });
471                }
472            }
473        }
474    }
475
476    errors
477}
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482    use crate::parser::parse_text;
483    use crate::schema::*;
484    use indexmap::IndexMap;
485
486    fn make_schema() -> Schema {
487        let mut frontmatter = IndexMap::new();
488        frontmatter.insert("title".to_string(), FieldDef {
489            field_type: FieldType::String,
490            required: true,
491            enum_values: None,
492        });
493        frontmatter.insert("count".to_string(), FieldDef {
494            field_type: FieldType::Int,
495            required: true,
496            enum_values: None,
497        });
498        frontmatter.insert("status".to_string(), FieldDef {
499            field_type: FieldType::String,
500            required: false,
501            enum_values: Some(vec!["ACTIVE".into(), "ARCHIVED".into()]),
502        });
503
504        let mut sections = IndexMap::new();
505        sections.insert("Summary".to_string(), SectionDef {
506            content_type: "markdown".to_string(),
507            required: true,
508        });
509
510        Schema {
511            table: "test".to_string(),
512            primary_key: "path".to_string(),
513            frontmatter,
514            h1_required: false,
515            h1_must_equal_frontmatter: None,
516            sections,
517            rules: Rules {
518                reject_unknown_frontmatter: true,
519                reject_unknown_sections: false,
520                reject_duplicate_sections: true,
521                normalize_numbered_headings: false,
522            },
523        }
524    }
525
526    #[test]
527    fn test_valid_file() {
528        let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Summary\n\nA summary.\n";
529        let parsed = parse_text(text, "test.md", false);
530        let errors = validate_file(&parsed, &make_schema());
531        assert!(errors.is_empty(), "Expected no errors, got: {:?}", errors);
532    }
533
534    #[test]
535    fn test_missing_required_field() {
536        let text = "---\ntitle: \"Hello\"\n---\n\n## Summary\n\nText.\n";
537        let parsed = parse_text(text, "test.md", false);
538        let errors = validate_file(&parsed, &make_schema());
539        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::MissingField && e.field.as_deref() == Some("count")));
540    }
541
542    #[test]
543    fn test_type_mismatch() {
544        let text = "---\ntitle: \"Hello\"\ncount: \"not a number\"\n---\n\n## Summary\n\nText.\n";
545        let parsed = parse_text(text, "test.md", false);
546        let errors = validate_file(&parsed, &make_schema());
547        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::TypeMismatch && e.field.as_deref() == Some("count")));
548    }
549
550    #[test]
551    fn test_enum_violation() {
552        let text = "---\ntitle: \"Hello\"\ncount: 5\nstatus: INVALID\n---\n\n## Summary\n\nText.\n";
553        let parsed = parse_text(text, "test.md", false);
554        let errors = validate_file(&parsed, &make_schema());
555        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::EnumViolation));
556    }
557
558    #[test]
559    fn test_unknown_frontmatter() {
560        let text = "---\ntitle: \"Hello\"\ncount: 5\nextra: bad\n---\n\n## Summary\n\nText.\n";
561        let parsed = parse_text(text, "test.md", false);
562        let errors = validate_file(&parsed, &make_schema());
563        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::UnknownField && e.field.as_deref() == Some("extra")));
564    }
565
566    #[test]
567    fn test_missing_required_section() {
568        let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Other\n\nText.\n";
569        let parsed = parse_text(text, "test.md", false);
570        let errors = validate_file(&parsed, &make_schema());
571        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::MissingSection));
572    }
573
574    #[test]
575    fn test_duplicate_section() {
576        let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Summary\n\nFirst.\n\n## Summary\n\nSecond.\n";
577        let parsed = parse_text(text, "test.md", false);
578        let errors = validate_file(&parsed, &make_schema());
579        assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::DuplicateSection));
580    }
581
582    // --- Foreign key validation tests ---
583
584    use crate::database::{DatabaseConfig, ForeignKey};
585
586    fn make_fk_tables() -> HashMap<String, (Schema, Vec<Row>)> {
587        let strategy_schema = Schema {
588            table: "strategies".to_string(),
589            primary_key: "path".to_string(),
590            frontmatter: IndexMap::new(),
591            h1_required: false,
592            h1_must_equal_frontmatter: None,
593            sections: IndexMap::new(),
594            rules: Rules {
595                reject_unknown_frontmatter: false,
596                reject_unknown_sections: false,
597                reject_duplicate_sections: false,
598                normalize_numbered_headings: false,
599            },
600        };
601
602        let backtest_schema = Schema {
603            table: "backtests".to_string(),
604            primary_key: "path".to_string(),
605            frontmatter: IndexMap::new(),
606            h1_required: false,
607            h1_must_equal_frontmatter: None,
608            sections: IndexMap::new(),
609            rules: Rules {
610                reject_unknown_frontmatter: false,
611                reject_unknown_sections: false,
612                reject_duplicate_sections: false,
613                normalize_numbered_headings: false,
614            },
615        };
616
617        let mut s1 = Row::new();
618        s1.insert("path".into(), Value::String("alpha.md".into()));
619        let mut s2 = Row::new();
620        s2.insert("path".into(), Value::String("beta.md".into()));
621
622        let mut b1 = Row::new();
623        b1.insert("path".into(), Value::String("bt-alpha.md".into()));
624        b1.insert("strategy".into(), Value::String("alpha.md".into()));
625        let mut b2 = Row::new();
626        b2.insert("path".into(), Value::String("bt-beta.md".into()));
627        b2.insert("strategy".into(), Value::String("beta.md".into()));
628
629        let mut tables = HashMap::new();
630        tables.insert("strategies".into(), (strategy_schema, vec![s1, s2]));
631        tables.insert("backtests".into(), (backtest_schema, vec![b1, b2]));
632        tables
633    }
634
635    fn make_fk_config() -> DatabaseConfig {
636        DatabaseConfig {
637            name: "test".into(),
638            foreign_keys: vec![ForeignKey {
639                from_table: "backtests".into(),
640                from_column: "strategy".into(),
641                to_table: "strategies".into(),
642                to_column: "path".into(),
643            }],
644            views: vec![],
645            sync: None,
646        }
647    }
648
649    #[test]
650    fn test_fk_valid() {
651        let tables = make_fk_tables();
652        let config = make_fk_config();
653        let errors = validate_foreign_keys(&config, &tables);
654        assert!(errors.is_empty(), "Expected no FK errors, got: {:?}", errors);
655    }
656
657    #[test]
658    fn test_fk_violation() {
659        let mut tables = make_fk_tables();
660        // Add a backtest referencing a nonexistent strategy
661        let mut broken = Row::new();
662        broken.insert("path".into(), Value::String("bt-broken.md".into()));
663        broken.insert("strategy".into(), Value::String("nonexistent.md".into()));
664        tables.get_mut("backtests").unwrap().1.push(broken);
665
666        let config = make_fk_config();
667        let errors = validate_foreign_keys(&config, &tables);
668        assert_eq!(errors.len(), 1);
669        assert_eq!(errors[0].error_type, ValidationErrorKind::FkViolation);
670        assert!(errors[0].message.contains("nonexistent.md"));
671    }
672
673    #[test]
674    fn test_fk_null_not_violation() {
675        let mut tables = make_fk_tables();
676        // Add a backtest with null strategy — should not be a violation
677        let mut nullref = Row::new();
678        nullref.insert("path".into(), Value::String("bt-null.md".into()));
679        nullref.insert("strategy".into(), Value::Null);
680        tables.get_mut("backtests").unwrap().1.push(nullref);
681
682        let config = make_fk_config();
683        let errors = validate_foreign_keys(&config, &tables);
684        assert!(errors.is_empty());
685    }
686
687    #[test]
688    fn test_fk_missing_table() {
689        let tables = make_fk_tables();
690        let config = DatabaseConfig {
691            name: "test".into(),
692            foreign_keys: vec![ForeignKey {
693                from_table: "backtests".into(),
694                from_column: "strategy".into(),
695                to_table: "nonexistent_table".into(),
696                to_column: "path".into(),
697            }],
698            views: vec![],
699            sync: None,
700        };
701        let errors = validate_foreign_keys(&config, &tables);
702        assert_eq!(errors.len(), 1);
703        assert_eq!(errors[0].error_type, ValidationErrorKind::FkMissingTable);
704    }
705
706    #[test]
707    fn test_fk_string_array_valid() {
708        let mut tables = make_fk_tables();
709        let array_row = Row::from([
710            ("path".into(), Value::String("bt-multi.md".into())),
711            ("strategy".into(), Value::List(vec![
712                "alpha.md".into(),
713                "beta.md".into(),
714            ])),
715        ]);
716        tables.get_mut("backtests").unwrap().1.push(array_row);
717
718        let config = DatabaseConfig {
719            name: "test".into(),
720            foreign_keys: vec![ForeignKey {
721                from_table: "backtests".into(),
722                from_column: "strategy".into(),
723                to_table: "strategies".into(),
724                to_column: "path".into(),
725            }],
726            views: vec![],
727            sync: None,
728        };
729        let errors = validate_foreign_keys(&config, &tables);
730        assert!(errors.is_empty());
731    }
732
733    #[test]
734    fn test_fk_string_array_one_invalid() {
735        let mut tables = make_fk_tables();
736        let array_row = Row::from([
737            ("path".into(), Value::String("bt-multi.md".into())),
738            ("strategy".into(), Value::List(vec![
739                "alpha.md".into(),
740                "nonexistent.md".into(),
741            ])),
742        ]);
743        tables.get_mut("backtests").unwrap().1.push(array_row);
744
745        let config = DatabaseConfig {
746            name: "test".into(),
747            foreign_keys: vec![ForeignKey {
748                from_table: "backtests".into(),
749                from_column: "strategy".into(),
750                to_table: "strategies".into(),
751                to_column: "path".into(),
752            }],
753            views: vec![],
754            sync: None,
755        };
756        let errors = validate_foreign_keys(&config, &tables);
757        assert_eq!(errors.len(), 1);
758        assert!(errors[0].message.contains("nonexistent.md"));
759    }
760}