1use std::collections::{HashMap, HashSet};
4
5use crate::database::DatabaseConfig;
6use crate::errors::ValidationError;
7use crate::model::{Row, Value};
8use crate::parser::ParsedFile;
9use crate::schema::Schema;
10use crate::stamp::TIMESTAMP_FIELDS;
11
12pub fn validate_file(parsed: &ParsedFile, schema: &Schema) -> Vec<ValidationError> {
13 let mut errors = Vec::new();
14 let fp = &parsed.path;
15
16 for msg in &parsed.parse_errors {
18 errors.push(ValidationError {
19 file_path: fp.clone(),
20 error_type: "parse_error".to_string(),
21 field: None,
22 message: msg.clone(),
23 line_number: None,
24 });
25 }
26
27 if errors.iter().any(|e| e.error_type == "parse_error") {
28 return errors;
29 }
30
31 let fm = &parsed.raw_frontmatter;
32 let fm_map = match fm.as_mapping() {
33 Some(m) => m,
34 None => return errors,
35 };
36
37 for (name, field_def) in &schema.frontmatter {
39 let key = serde_yaml::Value::String(name.clone());
40 match fm_map.get(&key) {
41 None => {
42 if field_def.required {
43 errors.push(ValidationError {
44 file_path: fp.clone(),
45 error_type: "missing_field".to_string(),
46 field: Some(name.clone()),
47 message: format!("Missing required frontmatter field '{}'", name),
48 line_number: None,
49 });
50 }
51 }
52 Some(value) => {
53 if let Some(type_err) = check_type(value, &field_def.field_type, name) {
54 errors.push(ValidationError {
55 file_path: fp.clone(),
56 error_type: "type_mismatch".to_string(),
57 field: Some(name.clone()),
58 message: type_err,
59 line_number: None,
60 });
61 }
62
63 if let Some(ref enum_vals) = field_def.enum_values {
64 if !value.is_null() {
65 let str_val = yaml_value_to_string(value);
66 if !enum_vals.contains(&str_val) {
67 errors.push(ValidationError {
68 file_path: fp.clone(),
69 error_type: "enum_violation".to_string(),
70 field: Some(name.clone()),
71 message: format!(
72 "Field '{}' value '{}' not in allowed values: {:?}",
73 name, str_val, enum_vals
74 ),
75 line_number: None,
76 });
77 }
78 }
79 }
80 }
81 }
82 }
83
84 for ts_field in TIMESTAMP_FIELDS {
86 let key = serde_yaml::Value::String(ts_field.to_string());
87 if let Some(value) = fm_map.get(&key) {
88 if let Some(type_err) = check_type(
89 value,
90 &crate::schema::FieldType::DateTime,
91 ts_field,
92 ) {
93 errors.push(ValidationError {
94 file_path: fp.clone(),
95 error_type: "type_mismatch".to_string(),
96 field: Some(ts_field.to_string()),
97 message: type_err,
98 line_number: None,
99 });
100 }
101 }
102 }
103
104 if schema.rules.reject_unknown_frontmatter {
106 for (key_val, _) in fm_map {
107 if let Some(key) = key_val.as_str() {
108 if !schema.frontmatter.contains_key(key)
109 && !TIMESTAMP_FIELDS.contains(&key)
110 {
111 errors.push(ValidationError {
112 file_path: fp.clone(),
113 error_type: "unknown_field".to_string(),
114 field: Some(key.to_string()),
115 message: format!(
116 "Unknown frontmatter field '{}' (not in schema)",
117 key
118 ),
119 line_number: None,
120 });
121 }
122 }
123 }
124 }
125
126 if schema.h1_required && parsed.h1.is_none() {
128 errors.push(ValidationError {
129 file_path: fp.clone(),
130 error_type: "missing_h1".to_string(),
131 field: None,
132 message: "Missing required H1 heading".to_string(),
133 line_number: None,
134 });
135 }
136
137 if let Some(ref h1_field) = schema.h1_must_equal_frontmatter {
138 if let Some(ref h1) = parsed.h1 {
139 let key = serde_yaml::Value::String(h1_field.clone());
140 if let Some(expected_val) = fm_map.get(&key) {
141 let expected = yaml_value_to_string(expected_val);
142 if h1 != &expected {
143 errors.push(ValidationError {
144 file_path: fp.clone(),
145 error_type: "h1_mismatch".to_string(),
146 field: None,
147 message: format!(
148 "H1 '{}' does not match frontmatter '{}' (expected '{}')",
149 h1, h1_field, expected
150 ),
151 line_number: parsed.h1_line_number,
152 });
153 }
154 }
155 }
156 }
157
158 let section_names: Vec<&str> = parsed
160 .sections
161 .iter()
162 .map(|s| s.normalized_heading.as_str())
163 .collect();
164
165 let mut section_counter: HashMap<&str, usize> = HashMap::new();
167 for name in §ion_names {
168 *section_counter.entry(name).or_insert(0) += 1;
169 }
170
171 if schema.rules.reject_duplicate_sections {
173 for (name, count) in §ion_counter {
174 if *count > 1 {
175 errors.push(ValidationError {
176 file_path: fp.clone(),
177 error_type: "duplicate_section".to_string(),
178 field: Some(name.to_string()),
179 message: format!(
180 "Duplicate section '{}' (appears {} times)",
181 name, count
182 ),
183 line_number: None,
184 });
185 }
186 }
187 }
188
189 for (name, section_def) in &schema.sections {
191 if section_def.required && !section_names.contains(&name.as_str()) {
192 errors.push(ValidationError {
193 file_path: fp.clone(),
194 error_type: "missing_section".to_string(),
195 field: Some(name.clone()),
196 message: format!("Missing required section '{}'", name),
197 line_number: None,
198 });
199 }
200 }
201
202 if schema.rules.reject_unknown_sections {
204 for section in &parsed.sections {
205 if !schema.sections.contains_key(§ion.normalized_heading) {
206 errors.push(ValidationError {
207 file_path: fp.clone(),
208 error_type: "unknown_section".to_string(),
209 field: Some(section.normalized_heading.clone()),
210 message: format!(
211 "Unknown section '{}' (not in schema)",
212 section.normalized_heading
213 ),
214 line_number: Some(section.line_number),
215 });
216 }
217 }
218 }
219
220 errors
221}
222
223fn check_type(
224 value: &serde_yaml::Value,
225 expected: &crate::schema::FieldType,
226 field_name: &str,
227) -> Option<String> {
228 use crate::schema::FieldType;
229
230 if value.is_null() {
231 return None;
232 }
233
234 match expected {
235 FieldType::String => {
236 if !value.is_string() {
237 return Some(format!(
238 "Field '{}' expected string, got {}",
239 field_name,
240 yaml_type_name(value)
241 ));
242 }
243 }
244 FieldType::Int => {
245 if value.is_bool() {
246 return Some(format!(
247 "Field '{}' expected int, got bool",
248 field_name
249 ));
250 }
251 if !value.is_i64() && !value.is_u64() {
253 return Some(format!(
254 "Field '{}' expected int, got {}",
255 field_name,
256 yaml_type_name(value)
257 ));
258 }
259 }
260 FieldType::Float => {
261 if value.is_bool() {
262 return Some(format!(
263 "Field '{}' expected float, got bool",
264 field_name
265 ));
266 }
267 if !value.is_f64() && !value.is_i64() && !value.is_u64() {
268 return Some(format!(
269 "Field '{}' expected float, got {}",
270 field_name,
271 yaml_type_name(value)
272 ));
273 }
274 }
275 FieldType::Bool => {
276 if !value.is_bool() {
277 return Some(format!(
278 "Field '{}' expected bool, got {}",
279 field_name,
280 yaml_type_name(value)
281 ));
282 }
283 }
284 FieldType::Date => {
285 if let Some(s) = value.as_str() {
286 if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_err() {
287 return Some(format!(
288 "Field '{}' expected date (YYYY-MM-DD), got string '{}'",
289 field_name, s
290 ));
291 }
292 return None;
293 }
294 if !value.is_string() {
295 return Some(format!(
296 "Field '{}' expected date, got {}",
297 field_name,
298 yaml_type_name(value)
299 ));
300 }
301 }
302 FieldType::DateTime => {
303 if let Some(s) = value.as_str() {
304 let ok = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").is_ok()
305 || chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f").is_ok();
306 if !ok {
307 return Some(format!(
308 "Field '{}' expected datetime (ISO 8601), got string '{}'",
309 field_name, s
310 ));
311 }
312 return None;
313 }
314 if !value.is_string() {
315 return Some(format!(
316 "Field '{}' expected datetime, got {}",
317 field_name,
318 yaml_type_name(value)
319 ));
320 }
321 }
322 FieldType::StringArray => {
323 match value.as_sequence() {
324 None => {
325 return Some(format!(
326 "Field '{}' expected string[], got {}",
327 field_name,
328 yaml_type_name(value)
329 ));
330 }
331 Some(seq) => {
332 for (i, item) in seq.iter().enumerate() {
333 if !item.is_string() {
334 return Some(format!(
335 "Field '{}[{}]' expected string, got {}",
336 field_name,
337 i,
338 yaml_type_name(item)
339 ));
340 }
341 }
342 }
343 }
344 }
345 FieldType::Dict => {
346 if !value.is_mapping() {
347 return Some(format!(
348 "Field '{}' expected dict (mapping), got {}",
349 field_name,
350 yaml_type_name(value)
351 ));
352 }
353 if let Some(mapping) = value.as_mapping() {
354 for (k, v) in mapping {
355 if v.is_mapping() || v.is_sequence() {
356 return Some(format!(
357 "Field '{}' dict value for key '{}' must be a scalar, got {}",
358 field_name,
359 k.as_str().unwrap_or("?"),
360 yaml_type_name(v)
361 ));
362 }
363 }
364 }
365 }
366 }
367
368 None
369}
370
371fn yaml_type_name(value: &serde_yaml::Value) -> &'static str {
372 match value {
373 serde_yaml::Value::Null => "null",
374 serde_yaml::Value::Bool(_) => "bool",
375 serde_yaml::Value::Number(_) => {
376 if value.is_f64() && !value.is_i64() && !value.is_u64() {
377 "float"
378 } else {
379 "int"
380 }
381 }
382 serde_yaml::Value::String(_) => "str",
383 serde_yaml::Value::Sequence(_) => "list",
384 serde_yaml::Value::Mapping(_) => "mapping",
385 _ => "unknown",
386 }
387}
388
389fn yaml_value_to_string(value: &serde_yaml::Value) -> String {
390 match value {
391 serde_yaml::Value::String(s) => s.clone(),
392 serde_yaml::Value::Number(n) => n.to_string(),
393 serde_yaml::Value::Bool(b) => b.to_string(),
394 serde_yaml::Value::Null => "null".to_string(),
395 _ => format!("{:?}", value),
396 }
397}
398
399pub fn validate_foreign_keys(
401 db_config: &DatabaseConfig,
402 tables: &HashMap<String, (Schema, Vec<Row>)>,
403) -> Vec<ValidationError> {
404 let mut errors = Vec::new();
405
406 for fk in &db_config.foreign_keys {
407 let to_table = match tables.get(&fk.to_table) {
408 Some(t) => t,
409 None => {
410 errors.push(ValidationError {
411 file_path: format!("_mdql.md"),
412 error_type: "fk_missing_table".to_string(),
413 field: None,
414 message: format!(
415 "Foreign key references unknown table '{}'",
416 fk.to_table
417 ),
418 line_number: None,
419 });
420 continue;
421 }
422 };
423
424 let from_table = match tables.get(&fk.from_table) {
425 Some(t) => t,
426 None => {
427 errors.push(ValidationError {
428 file_path: format!("_mdql.md"),
429 error_type: "fk_missing_table".to_string(),
430 field: None,
431 message: format!(
432 "Foreign key references unknown table '{}'",
433 fk.from_table
434 ),
435 line_number: None,
436 });
437 continue;
438 }
439 };
440
441 let valid_values: HashSet<String> = to_table
443 .1
444 .iter()
445 .filter_map(|row| {
446 row.get(&fk.to_column).and_then(|v| match v {
447 Value::Null => None,
448 _ => Some(v.to_display_string()),
449 })
450 })
451 .collect();
452
453 for row in &from_table.1 {
455 let value = match row.get(&fk.from_column) {
456 Some(Value::Null) | None => continue,
457 Some(v) => v,
458 };
459
460 let file_path = row
461 .get("path")
462 .map(|v| format!("{}/{}", fk.from_table, v.to_display_string()))
463 .unwrap_or_else(|| fk.from_table.clone());
464
465 let values_to_check: Vec<String> = match value {
466 Value::List(items) => items.iter().map(|s| s.clone()).collect(),
467 _ => vec![value.to_display_string()],
468 };
469
470 for value_str in &values_to_check {
471 if !valid_values.contains(value_str) {
472 errors.push(ValidationError {
473 file_path: file_path.clone(),
474 error_type: "fk_violation".to_string(),
475 field: Some(fk.from_column.clone()),
476 message: format!(
477 "{} = '{}' not found in {}.{}",
478 fk.from_column, value_str, fk.to_table, fk.to_column
479 ),
480 line_number: None,
481 });
482 }
483 }
484 }
485 }
486
487 errors
488}
489
490#[cfg(test)]
491mod tests {
492 use super::*;
493 use crate::parser::parse_text;
494 use crate::schema::*;
495 use indexmap::IndexMap;
496
497 fn make_schema() -> Schema {
498 let mut frontmatter = IndexMap::new();
499 frontmatter.insert("title".to_string(), FieldDef {
500 field_type: FieldType::String,
501 required: true,
502 enum_values: None,
503 });
504 frontmatter.insert("count".to_string(), FieldDef {
505 field_type: FieldType::Int,
506 required: true,
507 enum_values: None,
508 });
509 frontmatter.insert("status".to_string(), FieldDef {
510 field_type: FieldType::String,
511 required: false,
512 enum_values: Some(vec!["ACTIVE".into(), "ARCHIVED".into()]),
513 });
514
515 let mut sections = IndexMap::new();
516 sections.insert("Summary".to_string(), SectionDef {
517 content_type: "markdown".to_string(),
518 required: true,
519 });
520
521 Schema {
522 table: "test".to_string(),
523 primary_key: "path".to_string(),
524 frontmatter,
525 h1_required: false,
526 h1_must_equal_frontmatter: None,
527 sections,
528 rules: Rules {
529 reject_unknown_frontmatter: true,
530 reject_unknown_sections: false,
531 reject_duplicate_sections: true,
532 normalize_numbered_headings: false,
533 },
534 }
535 }
536
537 #[test]
538 fn test_valid_file() {
539 let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Summary\n\nA summary.\n";
540 let parsed = parse_text(text, "test.md", false);
541 let errors = validate_file(&parsed, &make_schema());
542 assert!(errors.is_empty(), "Expected no errors, got: {:?}", errors);
543 }
544
545 #[test]
546 fn test_missing_required_field() {
547 let text = "---\ntitle: \"Hello\"\n---\n\n## Summary\n\nText.\n";
548 let parsed = parse_text(text, "test.md", false);
549 let errors = validate_file(&parsed, &make_schema());
550 assert!(errors.iter().any(|e| e.error_type == "missing_field" && e.field.as_deref() == Some("count")));
551 }
552
553 #[test]
554 fn test_type_mismatch() {
555 let text = "---\ntitle: \"Hello\"\ncount: \"not a number\"\n---\n\n## Summary\n\nText.\n";
556 let parsed = parse_text(text, "test.md", false);
557 let errors = validate_file(&parsed, &make_schema());
558 assert!(errors.iter().any(|e| e.error_type == "type_mismatch" && e.field.as_deref() == Some("count")));
559 }
560
561 #[test]
562 fn test_enum_violation() {
563 let text = "---\ntitle: \"Hello\"\ncount: 5\nstatus: INVALID\n---\n\n## Summary\n\nText.\n";
564 let parsed = parse_text(text, "test.md", false);
565 let errors = validate_file(&parsed, &make_schema());
566 assert!(errors.iter().any(|e| e.error_type == "enum_violation"));
567 }
568
569 #[test]
570 fn test_unknown_frontmatter() {
571 let text = "---\ntitle: \"Hello\"\ncount: 5\nextra: bad\n---\n\n## Summary\n\nText.\n";
572 let parsed = parse_text(text, "test.md", false);
573 let errors = validate_file(&parsed, &make_schema());
574 assert!(errors.iter().any(|e| e.error_type == "unknown_field" && e.field.as_deref() == Some("extra")));
575 }
576
577 #[test]
578 fn test_missing_required_section() {
579 let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Other\n\nText.\n";
580 let parsed = parse_text(text, "test.md", false);
581 let errors = validate_file(&parsed, &make_schema());
582 assert!(errors.iter().any(|e| e.error_type == "missing_section"));
583 }
584
585 #[test]
586 fn test_duplicate_section() {
587 let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Summary\n\nFirst.\n\n## Summary\n\nSecond.\n";
588 let parsed = parse_text(text, "test.md", false);
589 let errors = validate_file(&parsed, &make_schema());
590 assert!(errors.iter().any(|e| e.error_type == "duplicate_section"));
591 }
592
593 use crate::database::{DatabaseConfig, ForeignKey};
596
597 fn make_fk_tables() -> HashMap<String, (Schema, Vec<Row>)> {
598 let strategy_schema = Schema {
599 table: "strategies".to_string(),
600 primary_key: "path".to_string(),
601 frontmatter: IndexMap::new(),
602 h1_required: false,
603 h1_must_equal_frontmatter: None,
604 sections: IndexMap::new(),
605 rules: Rules {
606 reject_unknown_frontmatter: false,
607 reject_unknown_sections: false,
608 reject_duplicate_sections: false,
609 normalize_numbered_headings: false,
610 },
611 };
612
613 let backtest_schema = Schema {
614 table: "backtests".to_string(),
615 primary_key: "path".to_string(),
616 frontmatter: IndexMap::new(),
617 h1_required: false,
618 h1_must_equal_frontmatter: None,
619 sections: IndexMap::new(),
620 rules: Rules {
621 reject_unknown_frontmatter: false,
622 reject_unknown_sections: false,
623 reject_duplicate_sections: false,
624 normalize_numbered_headings: false,
625 },
626 };
627
628 let mut s1 = Row::new();
629 s1.insert("path".into(), Value::String("alpha.md".into()));
630 let mut s2 = Row::new();
631 s2.insert("path".into(), Value::String("beta.md".into()));
632
633 let mut b1 = Row::new();
634 b1.insert("path".into(), Value::String("bt-alpha.md".into()));
635 b1.insert("strategy".into(), Value::String("alpha.md".into()));
636 let mut b2 = Row::new();
637 b2.insert("path".into(), Value::String("bt-beta.md".into()));
638 b2.insert("strategy".into(), Value::String("beta.md".into()));
639
640 let mut tables = HashMap::new();
641 tables.insert("strategies".into(), (strategy_schema, vec![s1, s2]));
642 tables.insert("backtests".into(), (backtest_schema, vec![b1, b2]));
643 tables
644 }
645
646 fn make_fk_config() -> DatabaseConfig {
647 DatabaseConfig {
648 name: "test".into(),
649 foreign_keys: vec![ForeignKey {
650 from_table: "backtests".into(),
651 from_column: "strategy".into(),
652 to_table: "strategies".into(),
653 to_column: "path".into(),
654 }],
655 views: vec![],
656 }
657 }
658
659 #[test]
660 fn test_fk_valid() {
661 let tables = make_fk_tables();
662 let config = make_fk_config();
663 let errors = validate_foreign_keys(&config, &tables);
664 assert!(errors.is_empty(), "Expected no FK errors, got: {:?}", errors);
665 }
666
667 #[test]
668 fn test_fk_violation() {
669 let mut tables = make_fk_tables();
670 let mut broken = Row::new();
672 broken.insert("path".into(), Value::String("bt-broken.md".into()));
673 broken.insert("strategy".into(), Value::String("nonexistent.md".into()));
674 tables.get_mut("backtests").unwrap().1.push(broken);
675
676 let config = make_fk_config();
677 let errors = validate_foreign_keys(&config, &tables);
678 assert_eq!(errors.len(), 1);
679 assert_eq!(errors[0].error_type, "fk_violation");
680 assert!(errors[0].message.contains("nonexistent.md"));
681 }
682
683 #[test]
684 fn test_fk_null_not_violation() {
685 let mut tables = make_fk_tables();
686 let mut nullref = Row::new();
688 nullref.insert("path".into(), Value::String("bt-null.md".into()));
689 nullref.insert("strategy".into(), Value::Null);
690 tables.get_mut("backtests").unwrap().1.push(nullref);
691
692 let config = make_fk_config();
693 let errors = validate_foreign_keys(&config, &tables);
694 assert!(errors.is_empty());
695 }
696
697 #[test]
698 fn test_fk_missing_table() {
699 let tables = make_fk_tables();
700 let config = DatabaseConfig {
701 name: "test".into(),
702 foreign_keys: vec![ForeignKey {
703 from_table: "backtests".into(),
704 from_column: "strategy".into(),
705 to_table: "nonexistent_table".into(),
706 to_column: "path".into(),
707 }],
708 views: vec![],
709 };
710 let errors = validate_foreign_keys(&config, &tables);
711 assert_eq!(errors.len(), 1);
712 assert_eq!(errors[0].error_type, "fk_missing_table");
713 }
714
715 #[test]
716 fn test_fk_string_array_valid() {
717 let mut tables = make_fk_tables();
718 let array_row = Row::from([
719 ("path".into(), Value::String("bt-multi.md".into())),
720 ("strategy".into(), Value::List(vec![
721 "alpha.md".into(),
722 "beta.md".into(),
723 ])),
724 ]);
725 tables.get_mut("backtests").unwrap().1.push(array_row);
726
727 let config = DatabaseConfig {
728 name: "test".into(),
729 foreign_keys: vec![ForeignKey {
730 from_table: "backtests".into(),
731 from_column: "strategy".into(),
732 to_table: "strategies".into(),
733 to_column: "path".into(),
734 }],
735 views: vec![],
736 };
737 let errors = validate_foreign_keys(&config, &tables);
738 assert!(errors.is_empty());
739 }
740
741 #[test]
742 fn test_fk_string_array_one_invalid() {
743 let mut tables = make_fk_tables();
744 let array_row = Row::from([
745 ("path".into(), Value::String("bt-multi.md".into())),
746 ("strategy".into(), Value::List(vec![
747 "alpha.md".into(),
748 "nonexistent.md".into(),
749 ])),
750 ]);
751 tables.get_mut("backtests").unwrap().1.push(array_row);
752
753 let config = DatabaseConfig {
754 name: "test".into(),
755 foreign_keys: vec![ForeignKey {
756 from_table: "backtests".into(),
757 from_column: "strategy".into(),
758 to_table: "strategies".into(),
759 to_column: "path".into(),
760 }],
761 views: vec![],
762 };
763 let errors = validate_foreign_keys(&config, &tables);
764 assert_eq!(errors.len(), 1);
765 assert!(errors[0].message.contains("nonexistent.md"));
766 }
767}