1use std::collections::{HashMap, HashSet};
4
5use crate::database::DatabaseConfig;
6use crate::errors::{ValidationError, ValidationErrorKind};
7use crate::model::{Row, Value};
8use crate::parser::ParsedFile;
9use crate::schema::Schema;
10use crate::stamp::TIMESTAMP_FIELDS;
11
12pub fn validate_file(parsed: &ParsedFile, schema: &Schema) -> Vec<ValidationError> {
13 let mut errors = Vec::new();
14 let fp = &parsed.path;
15
16 for msg in &parsed.parse_errors {
18 errors.push(ValidationError {
19 file_path: fp.clone(),
20 error_type: ValidationErrorKind::ParseError,
21 field: None,
22 message: msg.clone(),
23 line_number: None,
24 });
25 }
26
27 if errors.iter().any(|e| e.error_type == ValidationErrorKind::ParseError) {
28 return errors;
29 }
30
31 let fm = &parsed.raw_frontmatter;
32 let fm_map = match fm.as_mapping() {
33 Some(m) => m,
34 None => return errors,
35 };
36
37 for (name, field_def) in &schema.frontmatter {
39 let key = serde_yaml::Value::String(name.clone());
40 match fm_map.get(&key) {
41 None => {
42 if field_def.required {
43 errors.push(ValidationError {
44 file_path: fp.clone(),
45 error_type: ValidationErrorKind::MissingField,
46 field: Some(name.clone()),
47 message: format!("Missing required frontmatter field '{}'", name),
48 line_number: None,
49 });
50 }
51 }
52 Some(value) => {
53 if let Some(type_err) = check_type(value, &field_def.field_type, name) {
54 errors.push(ValidationError {
55 file_path: fp.clone(),
56 error_type: ValidationErrorKind::TypeMismatch,
57 field: Some(name.clone()),
58 message: type_err,
59 line_number: None,
60 });
61 }
62
63 if let Some(ref enum_vals) = field_def.enum_values {
64 if !value.is_null() {
65 let str_val = yaml_value_to_string(value);
66 if !enum_vals.contains(&str_val) {
67 errors.push(ValidationError {
68 file_path: fp.clone(),
69 error_type: ValidationErrorKind::EnumViolation,
70 field: Some(name.clone()),
71 message: format!(
72 "Field '{}' value '{}' not in allowed values: {:?}",
73 name, str_val, enum_vals
74 ),
75 line_number: None,
76 });
77 }
78 }
79 }
80 }
81 }
82 }
83
84 for ts_field in TIMESTAMP_FIELDS {
86 let key = serde_yaml::Value::String(ts_field.to_string());
87 if let Some(value) = fm_map.get(&key) {
88 if let Some(type_err) = check_type(
89 value,
90 &crate::schema::FieldType::DateTime,
91 ts_field,
92 ) {
93 errors.push(ValidationError {
94 file_path: fp.clone(),
95 error_type: ValidationErrorKind::TypeMismatch,
96 field: Some(ts_field.to_string()),
97 message: type_err,
98 line_number: None,
99 });
100 }
101 }
102 }
103
104 if schema.rules.reject_unknown_frontmatter {
106 for (key_val, _) in fm_map {
107 if let Some(key) = key_val.as_str() {
108 if !schema.frontmatter.contains_key(key)
109 && !TIMESTAMP_FIELDS.contains(&key)
110 {
111 errors.push(ValidationError {
112 file_path: fp.clone(),
113 error_type: ValidationErrorKind::UnknownField,
114 field: Some(key.to_string()),
115 message: format!(
116 "Unknown frontmatter field '{}' (not in schema)",
117 key
118 ),
119 line_number: None,
120 });
121 }
122 }
123 }
124 }
125
126 if schema.h1_required && parsed.h1.is_none() {
128 errors.push(ValidationError {
129 file_path: fp.clone(),
130 error_type: ValidationErrorKind::MissingH1,
131 field: None,
132 message: "Missing required H1 heading".to_string(),
133 line_number: None,
134 });
135 }
136
137 let section_names: Vec<&str> = parsed
139 .sections
140 .iter()
141 .map(|s| s.normalized_heading.as_str())
142 .collect();
143
144 let mut section_counter: HashMap<&str, usize> = HashMap::new();
146 for name in §ion_names {
147 *section_counter.entry(name).or_insert(0) += 1;
148 }
149
150 if schema.rules.reject_duplicate_sections {
152 for (name, count) in §ion_counter {
153 if *count > 1 {
154 errors.push(ValidationError {
155 file_path: fp.clone(),
156 error_type: ValidationErrorKind::DuplicateSection,
157 field: Some(name.to_string()),
158 message: format!(
159 "Duplicate section '{}' (appears {} times)",
160 name, count
161 ),
162 line_number: None,
163 });
164 }
165 }
166 }
167
168 for (name, section_def) in &schema.sections {
170 if section_def.required && !section_names.contains(&name.as_str()) {
171 errors.push(ValidationError {
172 file_path: fp.clone(),
173 error_type: ValidationErrorKind::MissingSection,
174 field: Some(name.clone()),
175 message: format!("Missing required section '{}'", name),
176 line_number: None,
177 });
178 }
179 }
180
181 if schema.rules.reject_unknown_sections {
183 for section in &parsed.sections {
184 if !schema.sections.contains_key(§ion.normalized_heading) {
185 errors.push(ValidationError {
186 file_path: fp.clone(),
187 error_type: ValidationErrorKind::UnknownSection,
188 field: Some(section.normalized_heading.clone()),
189 message: format!(
190 "Unknown section '{}' (not in schema)",
191 section.normalized_heading
192 ),
193 line_number: Some(section.line_number),
194 });
195 }
196 }
197 }
198
199 if parsed.has_loose_body {
201 errors.push(ValidationError {
202 file_path: fp.clone(),
203 error_type: ValidationErrorKind::LooseBody,
204 field: None,
205 message: "Body content not under an H2 section is not allowed; wrap in ## heading".to_string(),
206 line_number: None,
207 });
208 }
209
210 errors
211}
212
213fn check_type(
214 value: &serde_yaml::Value,
215 expected: &crate::schema::FieldType,
216 field_name: &str,
217) -> Option<String> {
218 use crate::schema::FieldType;
219
220 if value.is_null() {
221 return None;
222 }
223
224 match expected {
225 FieldType::String => {
226 if !value.is_string() {
227 return Some(format!(
228 "Field '{}' expected string, got {}",
229 field_name,
230 yaml_type_name(value)
231 ));
232 }
233 }
234 FieldType::Int => {
235 if value.is_bool() {
236 return Some(format!(
237 "Field '{}' expected int, got bool",
238 field_name
239 ));
240 }
241 if !value.is_i64() && !value.is_u64() {
243 return Some(format!(
244 "Field '{}' expected int, got {}",
245 field_name,
246 yaml_type_name(value)
247 ));
248 }
249 }
250 FieldType::Float => {
251 if value.is_bool() {
252 return Some(format!(
253 "Field '{}' expected float, got bool",
254 field_name
255 ));
256 }
257 if !value.is_f64() && !value.is_i64() && !value.is_u64() {
258 return Some(format!(
259 "Field '{}' expected float, got {}",
260 field_name,
261 yaml_type_name(value)
262 ));
263 }
264 }
265 FieldType::Bool => {
266 if !value.is_bool() {
267 return Some(format!(
268 "Field '{}' expected bool, got {}",
269 field_name,
270 yaml_type_name(value)
271 ));
272 }
273 }
274 FieldType::Date => {
275 if let Some(s) = value.as_str() {
276 if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_err() {
277 return Some(format!(
278 "Field '{}' expected date (YYYY-MM-DD), got string '{}'",
279 field_name, s
280 ));
281 }
282 return None;
283 }
284 if !value.is_string() {
285 return Some(format!(
286 "Field '{}' expected date, got {}",
287 field_name,
288 yaml_type_name(value)
289 ));
290 }
291 }
292 FieldType::DateTime => {
293 if let Some(s) = value.as_str() {
294 let ok = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").is_ok()
295 || chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f").is_ok();
296 if !ok {
297 return Some(format!(
298 "Field '{}' expected datetime (ISO 8601), got string '{}'",
299 field_name, s
300 ));
301 }
302 return None;
303 }
304 if !value.is_string() {
305 return Some(format!(
306 "Field '{}' expected datetime, got {}",
307 field_name,
308 yaml_type_name(value)
309 ));
310 }
311 }
312 FieldType::StringArray => {
313 match value.as_sequence() {
314 None => {
315 return Some(format!(
316 "Field '{}' expected string[], got {}",
317 field_name,
318 yaml_type_name(value)
319 ));
320 }
321 Some(seq) => {
322 for (i, item) in seq.iter().enumerate() {
323 if !item.is_string() {
324 return Some(format!(
325 "Field '{}[{}]' expected string, got {}",
326 field_name,
327 i,
328 yaml_type_name(item)
329 ));
330 }
331 }
332 }
333 }
334 }
335 FieldType::Dict => {
336 if !value.is_mapping() {
337 return Some(format!(
338 "Field '{}' expected dict (mapping), got {}",
339 field_name,
340 yaml_type_name(value)
341 ));
342 }
343 }
345 }
346
347 None
348}
349
350fn yaml_type_name(value: &serde_yaml::Value) -> &'static str {
351 match value {
352 serde_yaml::Value::Null => "null",
353 serde_yaml::Value::Bool(_) => "bool",
354 serde_yaml::Value::Number(_) => {
355 if value.is_f64() && !value.is_i64() && !value.is_u64() {
356 "float"
357 } else {
358 "int"
359 }
360 }
361 serde_yaml::Value::String(_) => "str",
362 serde_yaml::Value::Sequence(_) => "list",
363 serde_yaml::Value::Mapping(_) => "mapping",
364 _ => "unknown",
365 }
366}
367
368fn yaml_value_to_string(value: &serde_yaml::Value) -> String {
369 match value {
370 serde_yaml::Value::String(s) => s.clone(),
371 serde_yaml::Value::Number(n) => n.to_string(),
372 serde_yaml::Value::Bool(b) => b.to_string(),
373 serde_yaml::Value::Null => "null".to_string(),
374 _ => format!("{:?}", value),
375 }
376}
377
378pub(crate) fn validate_foreign_keys(
380 db_config: &DatabaseConfig,
381 tables: &HashMap<String, (Schema, Vec<Row>)>,
382) -> Vec<ValidationError> {
383 let mut errors = Vec::new();
384
385 for fk in &db_config.foreign_keys {
386 let to_table = match tables.get(&fk.to_table) {
387 Some(t) => t,
388 None => {
389 errors.push(ValidationError {
390 file_path: format!("_mdql.md"),
391 error_type: ValidationErrorKind::FkMissingTable,
392 field: None,
393 message: format!(
394 "Foreign key references unknown table '{}'",
395 fk.to_table
396 ),
397 line_number: None,
398 });
399 continue;
400 }
401 };
402
403 let from_table = match tables.get(&fk.from_table) {
404 Some(t) => t,
405 None => {
406 errors.push(ValidationError {
407 file_path: format!("_mdql.md"),
408 error_type: ValidationErrorKind::FkMissingTable,
409 field: None,
410 message: format!(
411 "Foreign key references unknown table '{}'",
412 fk.from_table
413 ),
414 line_number: None,
415 });
416 continue;
417 }
418 };
419
420 let valid_values: HashSet<String> = to_table
422 .1
423 .iter()
424 .filter_map(|row| {
425 row.get(&fk.to_column).and_then(|v| match v {
426 Value::Null => None,
427 _ => Some(v.to_display_string()),
428 })
429 })
430 .collect();
431
432 for row in &from_table.1 {
434 let value = match row.get(&fk.from_column) {
435 Some(Value::Null) | None => continue,
436 Some(v) => v,
437 };
438
439 let file_path = row
440 .get("path")
441 .map(|v| format!("{}/{}", fk.from_table, v.to_display_string()))
442 .unwrap_or_else(|| fk.from_table.clone());
443
444 let values_to_check: Vec<String> = match value {
445 Value::List(items) => items.iter().map(|s| s.clone()).collect(),
446 _ => vec![value.to_display_string()],
447 };
448
449 for value_str in &values_to_check {
450 if !valid_values.contains(value_str) {
451 errors.push(ValidationError {
452 file_path: file_path.clone(),
453 error_type: ValidationErrorKind::FkViolation,
454 field: Some(fk.from_column.clone()),
455 message: format!(
456 "{} = '{}' not found in {}.{}",
457 fk.from_column, value_str, fk.to_table, fk.to_column
458 ),
459 line_number: None,
460 });
461 }
462 }
463 }
464 }
465
466 errors
467}
468
469#[cfg(test)]
470mod tests {
471 use super::*;
472 use crate::parser::parse_text;
473 use crate::schema::*;
474 use indexmap::IndexMap;
475
476 fn make_schema() -> Schema {
477 let mut frontmatter = IndexMap::new();
478 frontmatter.insert("title".to_string(), FieldDef {
479 field_type: FieldType::String,
480 required: true,
481 enum_values: None,
482 });
483 frontmatter.insert("count".to_string(), FieldDef {
484 field_type: FieldType::Int,
485 required: true,
486 enum_values: None,
487 });
488 frontmatter.insert("status".to_string(), FieldDef {
489 field_type: FieldType::String,
490 required: false,
491 enum_values: Some(vec!["ACTIVE".into(), "ARCHIVED".into()]),
492 });
493
494 let mut sections = IndexMap::new();
495 sections.insert("Summary".to_string(), SectionDef {
496 content_type: "markdown".to_string(),
497 required: true,
498 });
499
500 Schema {
501 table: "test".to_string(),
502 primary_key: "path".to_string(),
503 frontmatter,
504 h1_required: false,
505 sections,
506 rules: Rules {
507 reject_unknown_frontmatter: true,
508 reject_unknown_sections: false,
509 reject_duplicate_sections: true,
510 normalize_numbered_headings: false,
511 },
512 }
513 }
514
515 #[test]
516 fn test_valid_file() {
517 let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Summary\n\nA summary.\n";
518 let parsed = parse_text(text, "test.md", false);
519 let errors = validate_file(&parsed, &make_schema());
520 assert!(errors.is_empty(), "Expected no errors, got: {:?}", errors);
521 }
522
523 #[test]
524 fn test_missing_required_field() {
525 let text = "---\ntitle: \"Hello\"\n---\n\n## Summary\n\nText.\n";
526 let parsed = parse_text(text, "test.md", false);
527 let errors = validate_file(&parsed, &make_schema());
528 assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::MissingField && e.field.as_deref() == Some("count")));
529 }
530
531 #[test]
532 fn test_type_mismatch() {
533 let text = "---\ntitle: \"Hello\"\ncount: \"not a number\"\n---\n\n## Summary\n\nText.\n";
534 let parsed = parse_text(text, "test.md", false);
535 let errors = validate_file(&parsed, &make_schema());
536 assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::TypeMismatch && e.field.as_deref() == Some("count")));
537 }
538
539 #[test]
540 fn test_enum_violation() {
541 let text = "---\ntitle: \"Hello\"\ncount: 5\nstatus: INVALID\n---\n\n## Summary\n\nText.\n";
542 let parsed = parse_text(text, "test.md", false);
543 let errors = validate_file(&parsed, &make_schema());
544 assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::EnumViolation));
545 }
546
547 #[test]
548 fn test_unknown_frontmatter() {
549 let text = "---\ntitle: \"Hello\"\ncount: 5\nextra: bad\n---\n\n## Summary\n\nText.\n";
550 let parsed = parse_text(text, "test.md", false);
551 let errors = validate_file(&parsed, &make_schema());
552 assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::UnknownField && e.field.as_deref() == Some("extra")));
553 }
554
555 #[test]
556 fn test_missing_required_section() {
557 let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Other\n\nText.\n";
558 let parsed = parse_text(text, "test.md", false);
559 let errors = validate_file(&parsed, &make_schema());
560 assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::MissingSection));
561 }
562
563 #[test]
564 fn test_duplicate_section() {
565 let text = "---\ntitle: \"Hello\"\ncount: 5\n---\n\n## Summary\n\nFirst.\n\n## Summary\n\nSecond.\n";
566 let parsed = parse_text(text, "test.md", false);
567 let errors = validate_file(&parsed, &make_schema());
568 assert!(errors.iter().any(|e| e.error_type == ValidationErrorKind::DuplicateSection));
569 }
570
571 use crate::database::{DatabaseConfig, ForeignKey};
574
575 fn make_fk_tables() -> HashMap<String, (Schema, Vec<Row>)> {
576 let strategy_schema = Schema {
577 table: "strategies".to_string(),
578 primary_key: "path".to_string(),
579 frontmatter: IndexMap::new(),
580 h1_required: false,
581 sections: IndexMap::new(),
582 rules: Rules {
583 reject_unknown_frontmatter: false,
584 reject_unknown_sections: false,
585 reject_duplicate_sections: false,
586 normalize_numbered_headings: false,
587 },
588 };
589
590 let backtest_schema = Schema {
591 table: "backtests".to_string(),
592 primary_key: "path".to_string(),
593 frontmatter: IndexMap::new(),
594 h1_required: false,
595 sections: IndexMap::new(),
596 rules: Rules {
597 reject_unknown_frontmatter: false,
598 reject_unknown_sections: false,
599 reject_duplicate_sections: false,
600 normalize_numbered_headings: false,
601 },
602 };
603
604 let mut s1 = Row::new();
605 s1.insert("path".into(), Value::String("alpha.md".into()));
606 let mut s2 = Row::new();
607 s2.insert("path".into(), Value::String("beta.md".into()));
608
609 let mut b1 = Row::new();
610 b1.insert("path".into(), Value::String("bt-alpha.md".into()));
611 b1.insert("strategy".into(), Value::String("alpha.md".into()));
612 let mut b2 = Row::new();
613 b2.insert("path".into(), Value::String("bt-beta.md".into()));
614 b2.insert("strategy".into(), Value::String("beta.md".into()));
615
616 let mut tables = HashMap::new();
617 tables.insert("strategies".into(), (strategy_schema, vec![s1, s2]));
618 tables.insert("backtests".into(), (backtest_schema, vec![b1, b2]));
619 tables
620 }
621
622 fn make_fk_config() -> DatabaseConfig {
623 DatabaseConfig {
624 name: "test".into(),
625 foreign_keys: vec![ForeignKey {
626 from_table: "backtests".into(),
627 from_column: "strategy".into(),
628 to_table: "strategies".into(),
629 to_column: "path".into(),
630 }],
631 views: vec![],
632 sync: None,
633 }
634 }
635
636 #[test]
637 fn test_fk_valid() {
638 let tables = make_fk_tables();
639 let config = make_fk_config();
640 let errors = validate_foreign_keys(&config, &tables);
641 assert!(errors.is_empty(), "Expected no FK errors, got: {:?}", errors);
642 }
643
644 #[test]
645 fn test_fk_violation() {
646 let mut tables = make_fk_tables();
647 let mut broken = Row::new();
649 broken.insert("path".into(), Value::String("bt-broken.md".into()));
650 broken.insert("strategy".into(), Value::String("nonexistent.md".into()));
651 tables.get_mut("backtests").unwrap().1.push(broken);
652
653 let config = make_fk_config();
654 let errors = validate_foreign_keys(&config, &tables);
655 assert_eq!(errors.len(), 1);
656 assert_eq!(errors[0].error_type, ValidationErrorKind::FkViolation);
657 assert!(errors[0].message.contains("nonexistent.md"));
658 }
659
660 #[test]
661 fn test_fk_null_not_violation() {
662 let mut tables = make_fk_tables();
663 let mut nullref = Row::new();
665 nullref.insert("path".into(), Value::String("bt-null.md".into()));
666 nullref.insert("strategy".into(), Value::Null);
667 tables.get_mut("backtests").unwrap().1.push(nullref);
668
669 let config = make_fk_config();
670 let errors = validate_foreign_keys(&config, &tables);
671 assert!(errors.is_empty());
672 }
673
674 #[test]
675 fn test_fk_missing_table() {
676 let tables = make_fk_tables();
677 let config = DatabaseConfig {
678 name: "test".into(),
679 foreign_keys: vec![ForeignKey {
680 from_table: "backtests".into(),
681 from_column: "strategy".into(),
682 to_table: "nonexistent_table".into(),
683 to_column: "path".into(),
684 }],
685 views: vec![],
686 sync: None,
687 };
688 let errors = validate_foreign_keys(&config, &tables);
689 assert_eq!(errors.len(), 1);
690 assert_eq!(errors[0].error_type, ValidationErrorKind::FkMissingTable);
691 }
692
693 #[test]
694 fn test_fk_string_array_valid() {
695 let mut tables = make_fk_tables();
696 let array_row = Row::from([
697 ("path".into(), Value::String("bt-multi.md".into())),
698 ("strategy".into(), Value::List(vec![
699 "alpha.md".into(),
700 "beta.md".into(),
701 ])),
702 ]);
703 tables.get_mut("backtests").unwrap().1.push(array_row);
704
705 let config = DatabaseConfig {
706 name: "test".into(),
707 foreign_keys: vec![ForeignKey {
708 from_table: "backtests".into(),
709 from_column: "strategy".into(),
710 to_table: "strategies".into(),
711 to_column: "path".into(),
712 }],
713 views: vec![],
714 sync: None,
715 };
716 let errors = validate_foreign_keys(&config, &tables);
717 assert!(errors.is_empty());
718 }
719
720 #[test]
721 fn test_fk_string_array_one_invalid() {
722 let mut tables = make_fk_tables();
723 let array_row = Row::from([
724 ("path".into(), Value::String("bt-multi.md".into())),
725 ("strategy".into(), Value::List(vec![
726 "alpha.md".into(),
727 "nonexistent.md".into(),
728 ])),
729 ]);
730 tables.get_mut("backtests").unwrap().1.push(array_row);
731
732 let config = DatabaseConfig {
733 name: "test".into(),
734 foreign_keys: vec![ForeignKey {
735 from_table: "backtests".into(),
736 from_column: "strategy".into(),
737 to_table: "strategies".into(),
738 to_column: "path".into(),
739 }],
740 views: vec![],
741 sync: None,
742 };
743 let errors = validate_foreign_keys(&config, &tables);
744 assert_eq!(errors.len(), 1);
745 assert!(errors[0].message.contains("nonexistent.md"));
746 }
747}