1#![allow(clippy::module_name_repetitions)]
2
3use std::fmt;
4
5use serde::{Deserialize, Serialize};
6
7const MAX_CASE_ID_LEN: usize = 26;
9
10const MAX_SOURCES: usize = 20;
12
13const MAX_TITLE_LEN: usize = 200;
15
16const MAX_SUMMARY_LEN: usize = 2000;
18
19const KNOWN_CASE_SECTIONS: &[&str] = &[
23 "Events",
24 "Documents",
25 "Assets",
26 "Relationships",
27 "Timeline",
28 "Related Cases",
29];
30
31#[derive(Debug)]
33pub struct ParsedCase {
34 pub id: Option<String>,
36 pub sources: Vec<SourceEntry>,
37 pub title: String,
38 pub summary: String,
39 pub sections: Vec<Section>,
40 pub case_type: Option<String>,
42 pub status: Option<String>,
44 pub amounts: Option<String>,
46 pub tags: Vec<String>,
48 pub related_cases: Vec<RelatedCase>,
50}
51
52#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
54pub struct RelatedCase {
55 pub case_path: String,
57 pub description: String,
59}
60
61#[derive(Debug)]
63pub struct Section {
64 pub kind: SectionKind,
65 pub body: String,
66 pub line: usize,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum SectionKind {
73 People,
74 Organizations,
75 Events,
76 Documents,
77 Assets,
78 Relationships,
79 Timeline,
80 RelatedCases,
81}
82
83impl SectionKind {
84 fn from_heading(heading: &str) -> Option<Self> {
85 match heading.trim() {
86 s if s.eq_ignore_ascii_case("People") => Some(Self::People),
87 s if s.eq_ignore_ascii_case("Organizations") => Some(Self::Organizations),
88 s if s.eq_ignore_ascii_case("Events") => Some(Self::Events),
89 s if s.eq_ignore_ascii_case("Documents") => Some(Self::Documents),
90 s if s.eq_ignore_ascii_case("Assets") => Some(Self::Assets),
91 s if s.eq_ignore_ascii_case("Relationships") => Some(Self::Relationships),
92 s if s.eq_ignore_ascii_case("Timeline") => Some(Self::Timeline),
93 s if s.eq_ignore_ascii_case("Related Cases") => Some(Self::RelatedCases),
94 _ => None,
95 }
96 }
97
98 pub fn is_case_section(self) -> bool {
101 matches!(
102 self,
103 Self::Events
104 | Self::Documents
105 | Self::Assets
106 | Self::Relationships
107 | Self::Timeline
108 | Self::RelatedCases
109 )
110 }
111}
112
113#[derive(Debug)]
115pub struct ParseError {
116 pub line: usize,
117 pub message: String,
118}
119
120impl fmt::Display for ParseError {
121 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122 write!(f, "line {}: {}", self.line, self.message)
123 }
124}
125
126const MAX_CASE_TAGS: usize = 10;
128
129const MAX_ENTITY_TAGS: usize = 5;
131
132const MAX_TAG_LEN: usize = 50;
134
135const MAX_RELATED_CASES: usize = 10;
137
138const MAX_RELATED_DESCRIPTION_LEN: usize = 500;
140
141pub fn parse_related_cases(
146 body: &str,
147 section_start_line: usize,
148 errors: &mut Vec<ParseError>,
149) -> Vec<RelatedCase> {
150 let mut entries: Vec<(String, String, usize)> = Vec::new(); for (offset, line) in body.lines().enumerate() {
153 let file_line = section_start_line + offset + 1;
154
155 if let Some(rest) = line.strip_prefix("- ") {
156 let case_path = rest.trim().to_string();
157 entries.push((case_path, String::new(), file_line));
158 } else if let Some(rest) = line.strip_prefix(" description: ") {
159 if let Some(entry) = entries.last_mut() {
160 entry.1 = rest.trim().to_string();
161 } else {
162 errors.push(ParseError {
163 line: file_line,
164 message: "description without a preceding case path".into(),
165 });
166 }
167 } else if !line.trim().is_empty() {
168 errors.push(ParseError {
169 line: file_line,
170 message: format!("unexpected line in Related Cases: {line}"),
171 });
172 }
173 }
174
175 if entries.len() > MAX_RELATED_CASES {
176 errors.push(ParseError {
177 line: section_start_line,
178 message: format!(
179 "Related Cases exceeds {MAX_RELATED_CASES} entries (got {})",
180 entries.len()
181 ),
182 });
183 }
184
185 let mut result = Vec::new();
186 for (case_path, description, line) in entries {
187 if case_path.is_empty() {
188 errors.push(ParseError {
189 line,
190 message: "related case path must not be empty".into(),
191 });
192 continue;
193 }
194 if description.is_empty() {
195 errors.push(ParseError {
196 line,
197 message: format!("related case {case_path:?} missing description"),
198 });
199 continue;
200 }
201 if description.len() > MAX_RELATED_DESCRIPTION_LEN {
202 errors.push(ParseError {
203 line,
204 message: format!(
205 "related case description exceeds {MAX_RELATED_DESCRIPTION_LEN} chars (got {})",
206 description.len()
207 ),
208 });
209 continue;
210 }
211 result.push(RelatedCase {
212 case_path,
213 description,
214 });
215 }
216
217 result
218}
219
220#[derive(Deserialize)]
222struct FrontMatter {
223 #[serde(default)]
225 id: Option<String>,
226 #[serde(default)]
227 sources: Vec<SourceEntry>,
228 #[serde(default)]
229 case_type: Option<String>,
230 #[serde(default)]
231 status: Option<String>,
232 #[serde(default)]
233 amounts: Option<String>,
234 #[serde(default)]
235 tags: Vec<String>,
236}
237
238#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
241#[serde(untagged)]
242pub enum SourceEntry {
243 Url(String),
245 Structured {
247 url: String,
248 #[serde(default)]
249 title: Option<String>,
250 #[serde(default)]
251 published_at: Option<String>,
252 #[serde(default)]
253 language: Option<String>,
254 },
255}
256
257impl SourceEntry {
258 pub fn url(&self) -> &str {
260 match self {
261 Self::Url(u) => u,
262 Self::Structured { url, .. } => url,
263 }
264 }
265}
266
267#[derive(Deserialize)]
270struct EntityFrontMatter {
271 #[serde(default)]
272 id: Option<String>,
273 #[serde(default)]
274 tags: Vec<String>,
275}
276
277#[derive(Debug)]
279pub struct ParsedEntityFile {
280 pub id: Option<String>,
282 pub name: String,
284 pub body: String,
286 pub title_line: usize,
288 pub tags: Vec<String>,
290}
291
292pub fn parse(input: &str) -> Result<ParsedCase, Vec<ParseError>> {
297 let mut errors = Vec::new();
298
299 let (front_matter, body_start_line, body) = extract_front_matter(input, &mut errors);
301
302 let Some(front_matter) = front_matter else {
303 if errors.is_empty() {
304 errors.push(ParseError {
305 line: 1,
306 message: "missing YAML front matter (expected `---` delimiter)".into(),
307 });
308 }
309 return Err(errors);
310 };
311
312 validate_front_matter(&front_matter, &mut errors);
314
315 let (title, summary, mut sections) = extract_body(&body, body_start_line, &mut errors);
317
318 let mut related_cases = Vec::new();
320 for section in §ions {
321 if section.kind == SectionKind::RelatedCases {
322 let entries = parse_related_cases(§ion.body, section.line, &mut errors);
323 related_cases.extend(entries);
324 }
325 }
326 sections.retain(|s| s.kind != SectionKind::RelatedCases);
328
329 if !errors.is_empty() {
330 return Err(errors);
331 }
332
333 Ok(ParsedCase {
334 id: front_matter.id,
335 sources: front_matter.sources,
336 title,
337 summary,
338 sections,
339 case_type: front_matter.case_type,
340 status: front_matter.status,
341 amounts: front_matter.amounts,
342 tags: front_matter.tags,
343 related_cases,
344 })
345}
346
347pub fn parse_entity_file(input: &str) -> Result<ParsedEntityFile, Vec<ParseError>> {
352 let mut errors = Vec::new();
353
354 let (front_matter, body_start_line, body) = extract_entity_front_matter(input, &mut errors);
355
356 let id = front_matter.as_ref().and_then(|fm| fm.id.clone());
357 let tags = front_matter.map_or_else(Vec::new, |fm| fm.tags);
358
359 if tags.len() > MAX_ENTITY_TAGS {
361 errors.push(ParseError {
362 line: 2,
363 message: format!(
364 "front matter `tags` exceeds {MAX_ENTITY_TAGS} entries (got {})",
365 tags.len()
366 ),
367 });
368 }
369 for (i, tag) in tags.iter().enumerate() {
370 if tag.len() > MAX_TAG_LEN {
371 errors.push(ParseError {
372 line: 2,
373 message: format!("front matter tag #{} exceeds {MAX_TAG_LEN} chars", i + 1),
374 });
375 }
376 if tag.is_empty() {
377 errors.push(ParseError {
378 line: 2,
379 message: format!("front matter tag #{} is empty", i + 1),
380 });
381 }
382 }
383
384 let (name, title_line, field_body) = extract_entity_body(&body, body_start_line, &mut errors);
386
387 if !errors.is_empty() {
388 return Err(errors);
389 }
390
391 Ok(ParsedEntityFile {
392 id,
393 name,
394 body: field_body,
395 title_line,
396 tags,
397 })
398}
399
400fn extract_entity_front_matter(
403 input: &str,
404 errors: &mut Vec<ParseError>,
405) -> (Option<EntityFrontMatter>, usize, String) {
406 let lines: Vec<&str> = input.lines().collect();
407
408 let first_delim = lines.iter().position(|l| l.trim() == "---");
409 if first_delim != Some(0) {
410 return (None, 1, input.to_string());
412 }
413
414 let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
415 let Some(close_offset) = close_delim else {
416 errors.push(ParseError {
417 line: 1,
418 message: "unclosed YAML front matter (missing closing `---`)".into(),
419 });
420 return (None, 1, String::new());
421 };
422
423 let close_line = close_offset + 1;
424 let yaml_str: String = lines[1..close_line].join("\n");
425 let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
427
428 match serde_yaml::from_str::<EntityFrontMatter>(&yaml_str) {
429 Ok(fm) => (Some(fm), body_start_line, body),
430 Err(e) => {
431 errors.push(ParseError {
432 line: 2,
433 message: format!("invalid YAML front matter: {e}"),
434 });
435 (None, body_start_line, body)
436 }
437 }
438}
439
440fn extract_entity_body(
443 body: &str,
444 body_start_line: usize,
445 errors: &mut Vec<ParseError>,
446) -> (String, usize, String) {
447 let lines: Vec<&str> = body.lines().collect();
448 let mut name = String::new();
449 let mut title_found = false;
450 let mut title_line = body_start_line;
451 let mut field_lines: Vec<&str> = Vec::new();
452
453 for (i, line) in lines.iter().enumerate() {
454 let file_line = body_start_line + i;
455
456 if let Some(heading) = strip_heading(line, 1) {
457 if title_found {
458 errors.push(ParseError {
459 line: file_line,
460 message: "multiple H1 headings found (expected exactly one)".into(),
461 });
462 continue;
463 }
464 name = heading.to_string();
465 title_found = true;
466 title_line = file_line;
467 continue;
468 }
469
470 if strip_heading(line, 2).is_some() {
472 errors.push(ParseError {
473 line: file_line,
474 message: "H2 sections are not allowed in entity files".into(),
475 });
476 continue;
477 }
478
479 if title_found {
480 field_lines.push(line);
481 } else if !line.trim().is_empty() {
482 errors.push(ParseError {
483 line: file_line,
484 message: "expected H1 heading (# Name)".into(),
485 });
486 }
487 }
488
489 if !title_found {
490 errors.push(ParseError {
491 line: body_start_line,
492 message: "missing H1 heading".into(),
493 });
494 } else if name.len() > MAX_TITLE_LEN {
495 errors.push(ParseError {
496 line: title_line,
497 message: format!("H1 name exceeds {MAX_TITLE_LEN} chars (got {})", name.len()),
498 });
499 }
500
501 (name, title_line, field_lines.join("\n"))
502}
503
504fn extract_front_matter(
508 input: &str,
509 errors: &mut Vec<ParseError>,
510) -> (Option<FrontMatter>, usize, String) {
511 let lines: Vec<&str> = input.lines().collect();
512
513 let first_delim = lines.iter().position(|l| l.trim() == "---");
515 if first_delim != Some(0) {
516 errors.push(ParseError {
517 line: 1,
518 message: "missing YAML front matter (expected `---` on first line)".into(),
519 });
520 return (None, 1, input.to_string());
521 }
522
523 let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
525 let Some(close_offset) = close_delim else {
526 errors.push(ParseError {
527 line: 1,
528 message: "unclosed YAML front matter (missing closing `---`)".into(),
529 });
530 return (None, 1, String::new());
531 };
532
533 let close_line = close_offset + 1; let yaml_str: String = lines[1..close_line].join("\n");
535 let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
537
538 match serde_yaml::from_str::<FrontMatter>(&yaml_str) {
539 Ok(fm) => (Some(fm), body_start_line, body),
540 Err(e) => {
541 errors.push(ParseError {
542 line: 2,
543 message: format!("invalid YAML front matter: {e}"),
544 });
545 (None, body_start_line, body)
546 }
547 }
548}
549
550fn validate_front_matter(fm: &FrontMatter, errors: &mut Vec<ParseError>) {
551 if let Some(id) = &fm.id {
553 if id.len() != MAX_CASE_ID_LEN {
554 errors.push(ParseError {
555 line: 2,
556 message: format!(
557 "front matter `id` must be a {MAX_CASE_ID_LEN}-char NULID, got {} chars",
558 id.len()
559 ),
560 });
561 }
562 }
563
564 if fm.sources.len() > MAX_SOURCES {
566 errors.push(ParseError {
567 line: 2,
568 message: format!(
569 "front matter `sources` exceeds {MAX_SOURCES} entries (got {})",
570 fm.sources.len()
571 ),
572 });
573 }
574
575 for (i, source) in fm.sources.iter().enumerate() {
577 if !source.url().starts_with("https://") {
578 errors.push(ParseError {
579 line: 2,
580 message: format!("source[{i}] must be HTTPS, got {:?}", source.url()),
581 });
582 }
583 }
584
585 if let Some(ct) = &fm.case_type {
587 use crate::domain::CaseType;
588 let normalized = ct.to_lowercase().replace(' ', "_");
589 if !CaseType::KNOWN.contains(&normalized.as_str())
590 && crate::domain::parse_custom(ct).is_none()
591 {
592 errors.push(ParseError {
593 line: 2,
594 message: format!(
595 "invalid case_type {:?} (known: {}; use \"custom:Value\" for custom)",
596 ct,
597 CaseType::KNOWN.join(", ")
598 ),
599 });
600 }
601 }
602
603 if let Some(st) = &fm.status {
605 use crate::domain::CaseStatus;
606 let normalized = st.to_lowercase().replace(' ', "_");
607 if !CaseStatus::KNOWN.contains(&normalized.as_str()) {
608 errors.push(ParseError {
609 line: 2,
610 message: format!(
611 "invalid status {:?} (known: {})",
612 st,
613 CaseStatus::KNOWN.join(", ")
614 ),
615 });
616 }
617 }
618
619 if fm.tags.len() > MAX_CASE_TAGS {
621 errors.push(ParseError {
622 line: 2,
623 message: format!(
624 "front matter `tags` exceeds {MAX_CASE_TAGS} entries (got {})",
625 fm.tags.len()
626 ),
627 });
628 }
629 for (i, tag) in fm.tags.iter().enumerate() {
630 if tag.len() > MAX_TAG_LEN {
631 errors.push(ParseError {
632 line: 2,
633 message: format!("tag[{i}] exceeds {MAX_TAG_LEN} chars (got {})", tag.len()),
634 });
635 }
636 if tag.is_empty() {
637 errors.push(ParseError {
638 line: 2,
639 message: format!("tag[{i}] must not be empty"),
640 });
641 }
642 }
643}
644
645#[allow(clippy::too_many_lines)]
647fn extract_body(
648 body: &str,
649 body_start_line: usize,
650 errors: &mut Vec<ParseError>,
651) -> (String, String, Vec<Section>) {
652 let lines: Vec<&str> = body.lines().collect();
653 let mut title = String::new();
654 let mut title_found = false;
655 let mut summary_lines: Vec<&str> = Vec::new();
656 let mut sections: Vec<Section> = Vec::new();
657
658 let mut current_section_kind: Option<SectionKind> = None;
660 let mut current_section_line: usize = 0;
661 let mut current_section_body: Vec<&str> = Vec::new();
662
663 let mut state = State::BeforeTitle;
665
666 for (i, line) in lines.iter().enumerate() {
667 let file_line = body_start_line + i; if let Some(heading) = strip_heading(line, 1) {
670 if title_found {
671 errors.push(ParseError {
672 line: file_line,
673 message: "multiple H1 headings found (expected exactly one)".into(),
674 });
675 continue;
676 }
677 title = heading.to_string();
678 title_found = true;
679 state = State::Summary;
680 continue;
681 }
682
683 if let Some(heading) = strip_heading(line, 2) {
684 if let Some(kind) = current_section_kind.take() {
686 sections.push(Section {
687 kind,
688 body: current_section_body.join("\n"),
689 line: current_section_line,
690 });
691 current_section_body.clear();
692 }
693
694 match SectionKind::from_heading(heading) {
695 Some(kind) if kind.is_case_section() => {
696 if sections.iter().any(|s| s.kind == kind) {
698 errors.push(ParseError {
699 line: file_line,
700 message: format!("duplicate section: ## {heading}"),
701 });
702 }
703 current_section_kind = Some(kind);
704 current_section_line = file_line;
705 state = State::InSection;
706 }
707 Some(_) => {
708 errors.push(ParseError {
710 line: file_line,
711 message: format!(
712 "## {heading} is not allowed in case files (use standalone entity files in people/ or organizations/ instead)"
713 ),
714 });
715 }
716 None => {
717 errors.push(ParseError {
718 line: file_line,
719 message: format!(
720 "unknown section: ## {heading} (expected one of: {})",
721 KNOWN_CASE_SECTIONS.join(", ")
722 ),
723 });
724 }
725 }
726 continue;
727 }
728
729 match state {
730 State::BeforeTitle => {
731 if !line.trim().is_empty() {
733 errors.push(ParseError {
734 line: file_line,
735 message: "expected H1 title (# Title)".into(),
736 });
737 }
738 }
739 State::Summary => {
740 summary_lines.push(line);
741 }
742 State::InSection => {
743 current_section_body.push(line);
744 }
745 }
746 }
747
748 if let Some(kind) = current_section_kind.take() {
750 sections.push(Section {
751 kind,
752 body: current_section_body.join("\n"),
753 line: current_section_line,
754 });
755 }
756
757 if !title_found {
759 errors.push(ParseError {
760 line: body_start_line,
761 message: "missing H1 title".into(),
762 });
763 } else if title.len() > MAX_TITLE_LEN {
764 errors.push(ParseError {
765 line: body_start_line,
766 message: format!(
767 "H1 title exceeds {MAX_TITLE_LEN} chars (got {})",
768 title.len()
769 ),
770 });
771 }
772
773 let summary = summary_lines.clone().join("\n").trim().to_string();
775
776 if summary.len() > MAX_SUMMARY_LEN {
777 errors.push(ParseError {
778 line: body_start_line,
779 message: format!(
780 "summary exceeds {MAX_SUMMARY_LEN} chars (got {})",
781 summary.len()
782 ),
783 });
784 }
785
786 (title, summary, sections)
787}
788
789#[derive(Clone, Copy)]
790enum State {
791 BeforeTitle,
792 Summary,
793 InSection,
794}
795
796fn strip_heading(line: &str, level: usize) -> Option<&str> {
799 let prefix = "#".repeat(level);
800 let trimmed = line.trim_start();
801 if trimmed.starts_with(&prefix) {
802 let after = &trimmed[prefix.len()..];
803 if after.is_empty() {
805 return Some("");
806 }
807 if after.starts_with(' ') && !after.starts_with(" #") {
808 return Some(after[1..].trim());
810 }
811 if after.starts_with('#') {
813 return None;
814 }
815 }
816 None
817}
818
819#[cfg(test)]
820mod tests {
821 use super::*;
822
823 fn minimal_case() -> String {
824 [
825 "---",
826 "id: 01H9XT7H1J3929RK32FWSRKV88",
827 "sources:",
828 " - https://example.com/source",
829 "---",
830 "",
831 "# Test Case Title",
832 "",
833 "This is the summary.",
834 "",
835 "## Events",
836 "",
837 "### Something happened",
838 "- occurred_at: 2025-01-01",
839 "",
840 "## Relationships",
841 "",
842 "- Something happened -> Something happened: associate_of",
843 ]
844 .join("\n")
845 }
846
847 #[test]
848 fn parse_minimal_case() {
849 let result = parse(&minimal_case());
850 let case = result.unwrap_or_else(|errs| {
851 panic!(
852 "parse failed: {}",
853 errs.iter()
854 .map(ToString::to_string)
855 .collect::<Vec<_>>()
856 .join("; ")
857 );
858 });
859
860 assert_eq!(case.id.as_deref(), Some("01H9XT7H1J3929RK32FWSRKV88"));
861 assert_eq!(case.sources.len(), 1);
862 assert_eq!(case.sources[0].url(), "https://example.com/source");
863 assert_eq!(case.title, "Test Case Title");
864 assert_eq!(case.summary, "This is the summary.");
865 assert_eq!(case.sections.len(), 2);
866 assert_eq!(case.sections[0].kind, SectionKind::Events);
867 assert_eq!(case.sections[1].kind, SectionKind::Relationships);
868 }
869
870 #[test]
871 fn parse_missing_front_matter() {
872 let input = "# Title\n\nSummary.\n";
873 let errs = parse(input).unwrap_err();
874 assert!(errs.iter().any(|e| e.message.contains("front matter")));
875 }
876
877 #[test]
878 fn parse_unclosed_front_matter() {
879 let input = "---\nsources: []\n# Title\n";
880 let errs = parse(input).unwrap_err();
881 assert!(errs.iter().any(|e| e.message.contains("unclosed")));
882 }
883
884 #[test]
885 fn parse_invalid_case_id_wrong_length() {
886 let input = "---\nid: short\nsources: []\n---\n\n# Title\n";
887 let errs = parse(input).unwrap_err();
888 assert!(errs.iter().any(|e| e.message.contains("NULID")));
889 }
890
891 #[test]
892 fn parse_case_id_absent_is_ok() {
893 let input = "---\nsources:\n - https://example.com\n---\n\n# Title\n\nSummary.\n";
894 let case = parse(input).unwrap();
895 assert!(case.id.is_none());
896 }
897
898 #[test]
899 fn parse_non_https_source() {
900 let input = "---\nsources:\n - http://example.com\n---\n\n# Title\n";
901 let errs = parse(input).unwrap_err();
902 assert!(errs.iter().any(|e| e.message.contains("HTTPS")));
903 }
904
905 #[test]
906 fn parse_too_many_sources() {
907 let sources: Vec<String> = (0..21)
908 .map(|i| format!(" - https://example.com/{i}"))
909 .collect();
910 let input = format!("---\nsources:\n{}\n---\n\n# Title\n", sources.join("\n"));
911 let errs = parse(&input).unwrap_err();
912 assert!(errs.iter().any(|e| e.message.contains("exceeds 20")));
913 }
914
915 #[test]
916 fn parse_unknown_section() {
917 let input = [
918 "---",
919 "sources: []",
920 "---",
921 "",
922 "# Title",
923 "",
924 "## Unknown Section",
925 "",
926 ]
927 .join("\n");
928 let errs = parse(&input).unwrap_err();
929 assert!(errs.iter().any(|e| e.message.contains("unknown section")));
930 }
931
932 #[test]
933 fn parse_duplicate_section() {
934 let input = [
935 "---",
936 "sources: []",
937 "---",
938 "",
939 "# Title",
940 "",
941 "## Events",
942 "",
943 "## Events",
944 "",
945 ]
946 .join("\n");
947 let errs = parse(&input).unwrap_err();
948 assert!(errs.iter().any(|e| e.message.contains("duplicate")));
949 }
950
951 #[test]
952 fn parse_multiple_h1() {
953 let input = [
954 "---",
955 "sources: []",
956 "---",
957 "",
958 "# First Title",
959 "",
960 "# Second Title",
961 "",
962 ]
963 .join("\n");
964 let errs = parse(&input).unwrap_err();
965 assert!(errs.iter().any(|e| e.message.contains("multiple H1")));
966 }
967
968 #[test]
969 fn parse_all_sections() {
970 let input = [
971 "---",
972 "id: 01H9XT7H1KRQ9SJ7SD9ETB5CVQ",
973 "sources:",
974 " - https://example.com/a",
975 "---",
976 "",
977 "# Full Case",
978 "",
979 "Summary text here.",
980 "",
981 "## Events",
982 "",
983 "### Something happened",
984 "- occurred_at: 2025-01-01",
985 "",
986 "## Relationships",
987 "",
988 "- Alice -> Corp Inc: employed_by",
989 "",
990 "## Timeline",
991 "",
992 "Something happened",
993 ]
994 .join("\n");
995
996 let case = parse(&input).unwrap_or_else(|errs| {
997 panic!(
998 "parse failed: {}",
999 errs.iter()
1000 .map(ToString::to_string)
1001 .collect::<Vec<_>>()
1002 .join("; ")
1003 );
1004 });
1005
1006 assert_eq!(case.id.as_deref(), Some("01H9XT7H1KRQ9SJ7SD9ETB5CVQ"));
1007 assert_eq!(case.title, "Full Case");
1008 assert_eq!(case.summary, "Summary text here.");
1009 assert_eq!(case.sections.len(), 3);
1010 assert_eq!(case.sections[0].kind, SectionKind::Events);
1011 assert_eq!(case.sections[1].kind, SectionKind::Relationships);
1012 assert_eq!(case.sections[2].kind, SectionKind::Timeline);
1013 }
1014
1015 #[test]
1016 fn parse_empty_summary() {
1017 let input = [
1018 "---",
1019 "sources: []",
1020 "---",
1021 "",
1022 "# Title",
1023 "",
1024 "## Events",
1025 "",
1026 ]
1027 .join("\n");
1028
1029 let case = parse(&input).unwrap_or_else(|errs| {
1030 panic!(
1031 "parse failed: {}",
1032 errs.iter()
1033 .map(ToString::to_string)
1034 .collect::<Vec<_>>()
1035 .join("; ")
1036 );
1037 });
1038 assert_eq!(case.summary, "");
1039 }
1040
1041 #[test]
1042 fn parse_multiline_summary() {
1043 let input = [
1044 "---",
1045 "sources: []",
1046 "---",
1047 "",
1048 "# Title",
1049 "",
1050 "First line of summary.",
1051 "Second line of summary.",
1052 "",
1053 "## Events",
1054 "",
1055 ]
1056 .join("\n");
1057
1058 let case = parse(&input).unwrap_or_else(|errs| {
1059 panic!(
1060 "parse failed: {}",
1061 errs.iter()
1062 .map(ToString::to_string)
1063 .collect::<Vec<_>>()
1064 .join("; ")
1065 );
1066 });
1067 assert_eq!(
1068 case.summary,
1069 "First line of summary.\nSecond line of summary."
1070 );
1071 }
1072
1073 #[test]
1074 fn strip_heading_levels() {
1075 assert_eq!(strip_heading("# Title", 1), Some("Title"));
1076 assert_eq!(strip_heading("## Section", 2), Some("Section"));
1077 assert_eq!(strip_heading("### Entity", 3), Some("Entity"));
1078 assert_eq!(strip_heading("### Entity", 2), None);
1080 assert_eq!(strip_heading("## Section", 1), None);
1082 assert_eq!(strip_heading("Normal text", 1), None);
1084 }
1085
1086 #[test]
1087 fn section_body_content() {
1088 let input = [
1089 "---",
1090 "sources: []",
1091 "---",
1092 "",
1093 "# Title",
1094 "",
1095 "## Events",
1096 "",
1097 "### Bonnick dismissal",
1098 "- occurred_at: 2024-12-24",
1099 "- type: termination",
1100 "",
1101 ]
1102 .join("\n");
1103
1104 let case = parse(&input).unwrap_or_else(|errs| {
1105 panic!(
1106 "parse failed: {}",
1107 errs.iter()
1108 .map(ToString::to_string)
1109 .collect::<Vec<_>>()
1110 .join("; ")
1111 );
1112 });
1113
1114 assert_eq!(case.sections.len(), 1);
1115 let body = &case.sections[0].body;
1116 assert!(body.contains("### Bonnick dismissal"));
1117 assert!(body.contains("- occurred_at: 2024-12-24"));
1118 }
1119
1120 #[test]
1121 fn parse_rejects_people_section_in_case_file() {
1122 let input = [
1123 "---",
1124 "sources: []",
1125 "---",
1126 "",
1127 "# Title",
1128 "",
1129 "## People",
1130 "",
1131 ]
1132 .join("\n");
1133 let errs = parse(&input).unwrap_err();
1134 assert!(
1135 errs.iter()
1136 .any(|e| e.message.contains("not allowed in case files"))
1137 );
1138 }
1139
1140 #[test]
1141 fn parse_rejects_organizations_section_in_case_file() {
1142 let input = [
1143 "---",
1144 "sources: []",
1145 "---",
1146 "",
1147 "# Title",
1148 "",
1149 "## Organizations",
1150 "",
1151 ]
1152 .join("\n");
1153 let errs = parse(&input).unwrap_err();
1154 assert!(
1155 errs.iter()
1156 .any(|e| e.message.contains("not allowed in case files"))
1157 );
1158 }
1159
1160 #[test]
1161 fn parse_entity_file_with_id() {
1162 let input = [
1163 "---",
1164 "id: 01JXYZ123456789ABCDEFGHIJK",
1165 "---",
1166 "",
1167 "# Mark Bonnick",
1168 "",
1169 "- qualifier: Arsenal Kit Manager",
1170 "- nationality: British",
1171 "",
1172 ]
1173 .join("\n");
1174
1175 let result = parse_entity_file(&input).unwrap();
1176 assert_eq!(result.id.as_deref(), Some("01JXYZ123456789ABCDEFGHIJK"));
1177 assert_eq!(result.name, "Mark Bonnick");
1178 assert!(result.body.contains("- qualifier: Arsenal Kit Manager"));
1179 assert!(result.body.contains("- nationality: British"));
1180 }
1181
1182 #[test]
1183 fn parse_entity_file_without_id() {
1184 let input = [
1185 "---",
1186 "---",
1187 "",
1188 "# Arsenal FC",
1189 "",
1190 "- qualifier: English Football Club",
1191 "- org_type: sports_club",
1192 "",
1193 ]
1194 .join("\n");
1195
1196 let result = parse_entity_file(&input).unwrap();
1197 assert!(result.id.is_none());
1198 assert_eq!(result.name, "Arsenal FC");
1199 }
1200
1201 #[test]
1202 fn parse_entity_file_no_front_matter() {
1203 let input = ["# Bob Smith", "", "- nationality: Dutch", ""].join("\n");
1204
1205 let result = parse_entity_file(&input).unwrap();
1206 assert!(result.id.is_none());
1207 assert_eq!(result.name, "Bob Smith");
1208 assert!(result.body.contains("- nationality: Dutch"));
1209 }
1210
1211 #[test]
1212 fn parse_entity_file_rejects_h2_sections() {
1213 let input = [
1214 "---",
1215 "---",
1216 "",
1217 "# Test Entity",
1218 "",
1219 "## Relationships",
1220 "",
1221 ]
1222 .join("\n");
1223
1224 let errs = parse_entity_file(&input).unwrap_err();
1225 assert!(errs.iter().any(|e| e.message.contains("H2 sections")));
1226 }
1227
1228 #[test]
1229 fn parse_entity_file_missing_h1() {
1230 let input = ["---", "---", "", "- nationality: Dutch", ""].join("\n");
1231
1232 let errs = parse_entity_file(&input).unwrap_err();
1233 assert!(errs.iter().any(|e| e.message.contains("missing H1")));
1234 }
1235
1236 #[test]
1237 fn parse_related_cases_section() {
1238 let input = [
1239 "---",
1240 "tags: [bribery]",
1241 "sources:",
1242 " - https://example.com",
1243 "---",
1244 "",
1245 "# Test Case",
1246 "",
1247 "Summary text.",
1248 "",
1249 "## Related Cases",
1250 "",
1251 "- id/corruption/2002/blbi-liquidity-aid-scandal",
1252 " description: Artalyta bribed Urip to influence the BLBI investigation",
1253 "- id/corruption/2008/another-case",
1254 " description: A second related case",
1255 ]
1256 .join("\n");
1257
1258 let case = parse(&input).unwrap_or_else(|errs| {
1259 panic!(
1260 "parse failed: {}",
1261 errs.iter()
1262 .map(ToString::to_string)
1263 .collect::<Vec<_>>()
1264 .join("; ")
1265 );
1266 });
1267
1268 assert_eq!(case.related_cases.len(), 2);
1269 assert_eq!(
1270 case.related_cases[0].case_path,
1271 "id/corruption/2002/blbi-liquidity-aid-scandal"
1272 );
1273 assert_eq!(
1274 case.related_cases[0].description,
1275 "Artalyta bribed Urip to influence the BLBI investigation"
1276 );
1277 assert_eq!(
1278 case.related_cases[1].case_path,
1279 "id/corruption/2008/another-case"
1280 );
1281 assert_eq!(case.related_cases[1].description, "A second related case");
1282 assert!(
1284 !case
1285 .sections
1286 .iter()
1287 .any(|s| s.kind == SectionKind::RelatedCases)
1288 );
1289 }
1290
1291 #[test]
1292 fn parse_related_cases_empty_path() {
1293 let input = [
1294 "---",
1295 "sources: []",
1296 "---",
1297 "",
1298 "# Title",
1299 "",
1300 "## Related Cases",
1301 "",
1302 "- ",
1303 " description: Some description",
1304 ]
1305 .join("\n");
1306
1307 let errs = parse(&input).unwrap_err();
1308 assert!(
1309 errs.iter()
1310 .any(|e| e.message.contains("case path must not be empty"))
1311 );
1312 }
1313
1314 #[test]
1315 fn parse_related_cases_missing_description() {
1316 let input = [
1317 "---",
1318 "sources: []",
1319 "---",
1320 "",
1321 "# Title",
1322 "",
1323 "## Related Cases",
1324 "",
1325 "- id/corruption/2002/some-case",
1326 ]
1327 .join("\n");
1328
1329 let errs = parse(&input).unwrap_err();
1330 assert!(errs.iter().any(|e| e.message.contains("description")));
1331 }
1332
1333 #[test]
1334 fn parse_related_cases_description_too_long() {
1335 let long_desc = "x".repeat(501);
1336 let input = [
1337 "---",
1338 "sources: []",
1339 "---",
1340 "",
1341 "# Title",
1342 "",
1343 "## Related Cases",
1344 "",
1345 "- id/corruption/2002/some-case",
1346 &format!(" description: {long_desc}"),
1347 ]
1348 .join("\n");
1349
1350 let errs = parse(&input).unwrap_err();
1351 assert!(errs.iter().any(|e| e.message.contains("exceeds 500")));
1352 }
1353
1354 #[test]
1355 fn parse_related_cases_too_many() {
1356 let mut lines = vec![
1357 "---".to_string(),
1358 "sources: []".to_string(),
1359 "---".to_string(),
1360 String::new(),
1361 "# Title".to_string(),
1362 String::new(),
1363 "## Related Cases".to_string(),
1364 String::new(),
1365 ];
1366 for i in 0..11 {
1367 lines.push(format!("- id/corruption/2002/case-{i}"));
1368 lines.push(format!(" description: Description {i}"));
1369 }
1370 let input = lines.join("\n");
1371
1372 let errs = parse(&input).unwrap_err();
1373 assert!(errs.iter().any(|e| e.message.contains("exceeds 10")));
1374 }
1375}