1#![allow(clippy::module_name_repetitions)]
2
3use std::fmt;
4
5use serde::{Deserialize, Serialize};
6
7const MAX_CASE_ID_LEN: usize = 26;
9
10const MAX_SOURCES: usize = 20;
12
13const MAX_TITLE_LEN: usize = 200;
15
16const MAX_SUMMARY_LEN: usize = 2000;
18
19const MAX_TAGLINE_LEN: usize = 280;
21
22const KNOWN_CASE_SECTIONS: &[&str] = &[
26 "Events",
27 "Documents",
28 "Assets",
29 "Relationships",
30 "Timeline",
31 "Related Cases",
32];
33
34#[derive(Debug)]
36pub struct ParsedCase {
37 pub id: Option<String>,
39 pub sources: Vec<SourceEntry>,
40 pub title: String,
41 pub summary: String,
42 pub sections: Vec<Section>,
43 pub case_type: Option<String>,
45 pub status: Option<String>,
47 pub amounts: Option<String>,
49 pub tags: Vec<String>,
51 pub tagline: Option<String>,
53 pub related_cases: Vec<RelatedCase>,
55 pub involved: Vec<InvolvedEntry>,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
61pub struct RelatedCase {
62 pub case_path: String,
64 pub description: String,
66 #[serde(skip_serializing_if = "Option::is_none")]
68 pub id: Option<String>,
69 #[serde(skip)]
71 pub line: usize,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct InvolvedEntry {
77 pub entity_name: String,
79 pub id: Option<String>,
81 pub line: usize,
83}
84
85#[derive(Debug)]
87pub struct Section {
88 pub kind: SectionKind,
89 pub body: String,
90 pub line: usize,
92}
93
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum SectionKind {
97 People,
98 Organizations,
99 Events,
100 Documents,
101 Assets,
102 Relationships,
103 Timeline,
104 RelatedCases,
105 Involved,
106}
107
108impl SectionKind {
109 fn from_heading(heading: &str) -> Option<Self> {
110 match heading.trim() {
111 s if s.eq_ignore_ascii_case("People") => Some(Self::People),
112 s if s.eq_ignore_ascii_case("Organizations") => Some(Self::Organizations),
113 s if s.eq_ignore_ascii_case("Events") => Some(Self::Events),
114 s if s.eq_ignore_ascii_case("Documents") => Some(Self::Documents),
115 s if s.eq_ignore_ascii_case("Assets") => Some(Self::Assets),
116 s if s.eq_ignore_ascii_case("Relationships") => Some(Self::Relationships),
117 s if s.eq_ignore_ascii_case("Timeline") => Some(Self::Timeline),
118 s if s.eq_ignore_ascii_case("Related Cases") => Some(Self::RelatedCases),
119 s if s.eq_ignore_ascii_case("Involved") => Some(Self::Involved),
120 _ => None,
121 }
122 }
123
124 pub fn is_case_section(self) -> bool {
127 matches!(
128 self,
129 Self::Events
130 | Self::Documents
131 | Self::Assets
132 | Self::Relationships
133 | Self::Timeline
134 | Self::RelatedCases
135 | Self::Involved
136 )
137 }
138}
139
140#[derive(Debug)]
142pub struct ParseError {
143 pub line: usize,
144 pub message: String,
145}
146
147impl fmt::Display for ParseError {
148 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149 write!(f, "line {}: {}", self.line, self.message)
150 }
151}
152
153const MAX_CASE_TAGS: usize = 10;
155
156const MAX_ENTITY_TAGS: usize = 5;
158
159const MAX_TAG_LEN: usize = 50;
161
162const MAX_RELATED_CASES: usize = 10;
164
165const MAX_RELATED_DESCRIPTION_LEN: usize = 500;
167
168pub fn parse_related_cases(
173 body: &str,
174 section_start_line: usize,
175 errors: &mut Vec<ParseError>,
176) -> Vec<RelatedCase> {
177 let mut entries: Vec<(String, String, Option<String>, usize)> = Vec::new(); for (offset, line) in body.lines().enumerate() {
180 let file_line = section_start_line + offset + 1;
181
182 if let Some(rest) = line.strip_prefix("- ") {
183 let case_path = rest.trim().to_string();
184 entries.push((case_path, String::new(), None, file_line));
185 } else if let Some(rest) = line.strip_prefix(" description: ") {
186 if let Some(entry) = entries.last_mut() {
187 entry.1 = rest.trim().to_string();
188 } else {
189 errors.push(ParseError {
190 line: file_line,
191 message: "description without a preceding case path".into(),
192 });
193 }
194 } else if let Some(rest) = line.strip_prefix(" id: ") {
195 if let Some(entry) = entries.last_mut() {
196 entry.2 = Some(rest.trim().to_string());
197 } else {
198 errors.push(ParseError {
199 line: file_line,
200 message: "id without a preceding case path".into(),
201 });
202 }
203 } else if !line.trim().is_empty() {
204 errors.push(ParseError {
205 line: file_line,
206 message: format!("unexpected line in Related Cases: {line}"),
207 });
208 }
209 }
210
211 if entries.len() > MAX_RELATED_CASES {
212 errors.push(ParseError {
213 line: section_start_line,
214 message: format!(
215 "Related Cases exceeds {MAX_RELATED_CASES} entries (got {})",
216 entries.len()
217 ),
218 });
219 }
220
221 let mut result = Vec::new();
222 for (case_path, description, id, line) in entries {
223 if case_path.is_empty() {
224 errors.push(ParseError {
225 line,
226 message: "related case path must not be empty".into(),
227 });
228 continue;
229 }
230 if description.is_empty() {
231 errors.push(ParseError {
232 line,
233 message: format!("related case {case_path:?} missing description"),
234 });
235 continue;
236 }
237 if description.len() > MAX_RELATED_DESCRIPTION_LEN {
238 errors.push(ParseError {
239 line,
240 message: format!(
241 "related case description exceeds {MAX_RELATED_DESCRIPTION_LEN} chars (got {})",
242 description.len()
243 ),
244 });
245 continue;
246 }
247 result.push(RelatedCase {
248 case_path,
249 description,
250 id,
251 line,
252 });
253 }
254
255 result
256}
257
258const MAX_INVOLVED: usize = 50;
260
261pub fn parse_involved(
269 body: &str,
270 section_start_line: usize,
271 errors: &mut Vec<ParseError>,
272) -> Vec<InvolvedEntry> {
273 let mut entries = Vec::new();
274 let lines: Vec<&str> = body.lines().collect();
275
276 let mut i = 0;
277 while i < lines.len() {
278 let file_line = section_start_line + 1 + i;
279 let trimmed = lines[i].trim();
280
281 if trimmed.is_empty() {
282 i += 1;
283 continue;
284 }
285
286 let Some(name) = trimmed.strip_prefix("- ") else {
287 errors.push(ParseError {
288 line: file_line,
289 message: format!("expected involved entry `- Entity Name`, got {trimmed:?}"),
290 });
291 i += 1;
292 continue;
293 };
294
295 let entity_name = name.trim().to_string();
296 if entity_name.is_empty() {
297 errors.push(ParseError {
298 line: file_line,
299 message: "involved entity name must not be empty".into(),
300 });
301 i += 1;
302 continue;
303 }
304
305 let mut id: Option<String> = None;
307 if i + 1 < lines.len() {
308 let next = lines[i + 1].trim();
309 if let Some(id_val) = next.strip_prefix("id: ") {
310 id = Some(id_val.trim().to_string());
311 i += 1;
312 }
313 }
314
315 entries.push(InvolvedEntry {
316 entity_name,
317 id,
318 line: file_line,
319 });
320
321 i += 1;
322 }
323
324 if entries.len() > MAX_INVOLVED {
325 errors.push(ParseError {
326 line: section_start_line,
327 message: format!(
328 "Involved exceeds {MAX_INVOLVED} entries (got {})",
329 entries.len()
330 ),
331 });
332 }
333
334 entries
335}
336
337#[derive(Deserialize)]
339struct FrontMatter {
340 #[serde(default)]
342 id: Option<String>,
343 #[serde(default)]
344 sources: Vec<SourceEntry>,
345 #[serde(default)]
346 case_type: Option<String>,
347 #[serde(default)]
348 status: Option<String>,
349 #[serde(default)]
350 amounts: Option<String>,
351 #[serde(default)]
352 tags: Vec<String>,
353 #[serde(default)]
354 tagline: Option<String>,
355}
356#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
358#[serde(untagged)]
359pub enum SourceEntry {
360 Url(String),
362 Structured {
364 url: String,
365 #[serde(default)]
366 title: Option<String>,
367 #[serde(default)]
368 published_at: Option<String>,
369 #[serde(default)]
370 language: Option<String>,
371 },
372}
373
374impl SourceEntry {
375 pub fn url(&self) -> &str {
377 match self {
378 Self::Url(u) => u,
379 Self::Structured { url, .. } => url,
380 }
381 }
382}
383
384#[derive(Deserialize)]
387struct EntityFrontMatter {
388 #[serde(default)]
389 id: Option<String>,
390 #[serde(default)]
391 tags: Vec<String>,
392}
393
394#[derive(Debug)]
396pub struct ParsedEntityFile {
397 pub id: Option<String>,
399 pub name: String,
401 pub body: String,
403 pub title_line: usize,
405 pub tags: Vec<String>,
407}
408
409pub fn parse(input: &str) -> Result<ParsedCase, Vec<ParseError>> {
414 let mut errors = Vec::new();
415
416 let (front_matter, body_start_line, body) = extract_front_matter(input, &mut errors);
418
419 let Some(front_matter) = front_matter else {
420 if errors.is_empty() {
421 errors.push(ParseError {
422 line: 1,
423 message: "missing YAML front matter (expected `---` delimiter)".into(),
424 });
425 }
426 return Err(errors);
427 };
428
429 validate_front_matter(&front_matter, &mut errors);
431
432 let (title, summary, mut sections) = extract_body(&body, body_start_line, &mut errors);
434
435 let mut related_cases = Vec::new();
437 for section in §ions {
438 if section.kind == SectionKind::RelatedCases {
439 let entries = parse_related_cases(§ion.body, section.line, &mut errors);
440 related_cases.extend(entries);
441 }
442 }
443 sections.retain(|s| s.kind != SectionKind::RelatedCases);
445
446 let mut involved = Vec::new();
448 for section in §ions {
449 if section.kind == SectionKind::Involved {
450 let entries = parse_involved(§ion.body, section.line, &mut errors);
451 involved.extend(entries);
452 }
453 }
454 sections.retain(|s| s.kind != SectionKind::Involved);
456
457 if !errors.is_empty() {
458 return Err(errors);
459 }
460
461 Ok(ParsedCase {
462 id: front_matter.id,
463 sources: front_matter.sources,
464 title,
465 summary,
466 sections,
467 case_type: front_matter.case_type,
468 status: front_matter.status,
469 amounts: front_matter.amounts,
470 tags: front_matter.tags,
471 tagline: front_matter.tagline,
472 related_cases,
473 involved,
474 })
475}
476
477pub fn parse_entity_file(input: &str) -> Result<ParsedEntityFile, Vec<ParseError>> {
482 let mut errors = Vec::new();
483
484 let (front_matter, body_start_line, body) = extract_entity_front_matter(input, &mut errors);
485
486 let id = front_matter.as_ref().and_then(|fm| fm.id.clone());
487 let tags = front_matter.map_or_else(Vec::new, |fm| fm.tags);
488
489 if tags.len() > MAX_ENTITY_TAGS {
491 errors.push(ParseError {
492 line: 2,
493 message: format!(
494 "front matter `tags` exceeds {MAX_ENTITY_TAGS} entries (got {})",
495 tags.len()
496 ),
497 });
498 }
499 for (i, tag) in tags.iter().enumerate() {
500 if tag.len() > MAX_TAG_LEN {
501 errors.push(ParseError {
502 line: 2,
503 message: format!("front matter tag #{} exceeds {MAX_TAG_LEN} chars", i + 1),
504 });
505 }
506 if tag.is_empty() {
507 errors.push(ParseError {
508 line: 2,
509 message: format!("front matter tag #{} is empty", i + 1),
510 });
511 }
512 }
513
514 let (name, title_line, field_body) = extract_entity_body(&body, body_start_line, &mut errors);
516
517 if !errors.is_empty() {
518 return Err(errors);
519 }
520
521 Ok(ParsedEntityFile {
522 id,
523 name,
524 body: field_body,
525 title_line,
526 tags,
527 })
528}
529
530fn extract_entity_front_matter(
533 input: &str,
534 errors: &mut Vec<ParseError>,
535) -> (Option<EntityFrontMatter>, usize, String) {
536 let lines: Vec<&str> = input.lines().collect();
537
538 let first_delim = lines.iter().position(|l| l.trim() == "---");
539 if first_delim != Some(0) {
540 return (None, 1, input.to_string());
542 }
543
544 let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
545 let Some(close_offset) = close_delim else {
546 errors.push(ParseError {
547 line: 1,
548 message: "unclosed YAML front matter (missing closing `---`)".into(),
549 });
550 return (None, 1, String::new());
551 };
552
553 let close_line = close_offset + 1;
554 let yaml_str: String = lines[1..close_line].join("\n");
555 let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
557
558 match serde_yaml::from_str::<EntityFrontMatter>(&yaml_str) {
559 Ok(fm) => (Some(fm), body_start_line, body),
560 Err(e) => {
561 errors.push(ParseError {
562 line: 2,
563 message: format!("invalid YAML front matter: {e}"),
564 });
565 (None, body_start_line, body)
566 }
567 }
568}
569
570fn extract_entity_body(
573 body: &str,
574 body_start_line: usize,
575 errors: &mut Vec<ParseError>,
576) -> (String, usize, String) {
577 let lines: Vec<&str> = body.lines().collect();
578 let mut name = String::new();
579 let mut title_found = false;
580 let mut title_line = body_start_line;
581 let mut field_lines: Vec<&str> = Vec::new();
582
583 for (i, line) in lines.iter().enumerate() {
584 let file_line = body_start_line + i;
585
586 if let Some(heading) = strip_heading(line, 1) {
587 if title_found {
588 errors.push(ParseError {
589 line: file_line,
590 message: "multiple H1 headings found (expected exactly one)".into(),
591 });
592 continue;
593 }
594 name = heading.to_string();
595 title_found = true;
596 title_line = file_line;
597 continue;
598 }
599
600 if strip_heading(line, 2).is_some() {
602 errors.push(ParseError {
603 line: file_line,
604 message: "H2 sections are not allowed in entity files".into(),
605 });
606 continue;
607 }
608
609 if title_found {
610 field_lines.push(line);
611 } else if !line.trim().is_empty() {
612 errors.push(ParseError {
613 line: file_line,
614 message: "expected H1 heading (# Name)".into(),
615 });
616 }
617 }
618
619 if !title_found {
620 errors.push(ParseError {
621 line: body_start_line,
622 message: "missing H1 heading".into(),
623 });
624 } else if name.len() > MAX_TITLE_LEN {
625 errors.push(ParseError {
626 line: title_line,
627 message: format!("H1 name exceeds {MAX_TITLE_LEN} chars (got {})", name.len()),
628 });
629 }
630
631 (name, title_line, field_lines.join("\n"))
632}
633
634fn extract_front_matter(
638 input: &str,
639 errors: &mut Vec<ParseError>,
640) -> (Option<FrontMatter>, usize, String) {
641 let lines: Vec<&str> = input.lines().collect();
642
643 let first_delim = lines.iter().position(|l| l.trim() == "---");
645 if first_delim != Some(0) {
646 errors.push(ParseError {
647 line: 1,
648 message: "missing YAML front matter (expected `---` on first line)".into(),
649 });
650 return (None, 1, input.to_string());
651 }
652
653 let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
655 let Some(close_offset) = close_delim else {
656 errors.push(ParseError {
657 line: 1,
658 message: "unclosed YAML front matter (missing closing `---`)".into(),
659 });
660 return (None, 1, String::new());
661 };
662
663 let close_line = close_offset + 1; let yaml_str: String = lines[1..close_line].join("\n");
665 let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
667
668 match serde_yaml::from_str::<FrontMatter>(&yaml_str) {
669 Ok(fm) => (Some(fm), body_start_line, body),
670 Err(e) => {
671 errors.push(ParseError {
672 line: 2,
673 message: format!("invalid YAML front matter: {e}"),
674 });
675 (None, body_start_line, body)
676 }
677 }
678}
679
680fn validate_front_matter(fm: &FrontMatter, errors: &mut Vec<ParseError>) {
681 if let Some(id) = &fm.id
683 && id.len() != MAX_CASE_ID_LEN
684 {
685 errors.push(ParseError {
686 line: 2,
687 message: format!(
688 "front matter `id` must be a {MAX_CASE_ID_LEN}-char NULID, got {} chars",
689 id.len()
690 ),
691 });
692 }
693
694 if fm.sources.len() > MAX_SOURCES {
696 errors.push(ParseError {
697 line: 2,
698 message: format!(
699 "front matter `sources` exceeds {MAX_SOURCES} entries (got {})",
700 fm.sources.len()
701 ),
702 });
703 }
704
705 for (i, source) in fm.sources.iter().enumerate() {
707 if !source.url().starts_with("https://") {
708 errors.push(ParseError {
709 line: 2,
710 message: format!("source[{i}] must be HTTPS, got {:?}", source.url()),
711 });
712 }
713 }
714
715 if let Some(ct) = &fm.case_type {
717 use crate::domain::CaseType;
718 let normalized = ct.to_lowercase().replace(' ', "_");
719 if !CaseType::KNOWN.contains(&normalized.as_str())
720 && crate::domain::parse_custom(ct).is_none()
721 {
722 errors.push(ParseError {
723 line: 2,
724 message: format!(
725 "invalid case_type {:?} (known: {}; use \"custom:Value\" for custom)",
726 ct,
727 CaseType::KNOWN.join(", ")
728 ),
729 });
730 }
731 }
732
733 if let Some(st) = &fm.status {
735 use crate::domain::CaseStatus;
736 let normalized = st.to_lowercase().replace(' ', "_");
737 if !CaseStatus::KNOWN.contains(&normalized.as_str()) {
738 errors.push(ParseError {
739 line: 2,
740 message: format!(
741 "invalid status {:?} (known: {})",
742 st,
743 CaseStatus::KNOWN.join(", ")
744 ),
745 });
746 }
747 }
748
749 if fm.tags.len() > MAX_CASE_TAGS {
751 errors.push(ParseError {
752 line: 2,
753 message: format!(
754 "front matter `tags` exceeds {MAX_CASE_TAGS} entries (got {})",
755 fm.tags.len()
756 ),
757 });
758 }
759 for (i, tag) in fm.tags.iter().enumerate() {
760 if tag.len() > MAX_TAG_LEN {
761 errors.push(ParseError {
762 line: 2,
763 message: format!("tag[{i}] exceeds {MAX_TAG_LEN} chars (got {})", tag.len()),
764 });
765 }
766 if tag.is_empty() {
767 errors.push(ParseError {
768 line: 2,
769 message: format!("tag[{i}] must not be empty"),
770 });
771 }
772 }
773
774 if let Some(tl) = &fm.tagline {
776 if tl.len() > MAX_TAGLINE_LEN {
777 errors.push(ParseError {
778 line: 2,
779 message: format!(
780 "tagline exceeds {MAX_TAGLINE_LEN} chars (got {})",
781 tl.len()
782 ),
783 });
784 }
785 if tl.trim().is_empty() {
786 errors.push(ParseError {
787 line: 2,
788 message: "tagline must not be empty".to_string(),
789 });
790 }
791 }
792}
793
794#[allow(clippy::too_many_lines)]
796fn extract_body(
797 body: &str,
798 body_start_line: usize,
799 errors: &mut Vec<ParseError>,
800) -> (String, String, Vec<Section>) {
801 let lines: Vec<&str> = body.lines().collect();
802 let mut title = String::new();
803 let mut title_found = false;
804 let mut summary_lines: Vec<&str> = Vec::new();
805 let mut sections: Vec<Section> = Vec::new();
806
807 let mut current_section_kind: Option<SectionKind> = None;
809 let mut current_section_line: usize = 0;
810 let mut current_section_body: Vec<&str> = Vec::new();
811
812 let mut state = State::BeforeTitle;
814
815 for (i, line) in lines.iter().enumerate() {
816 let file_line = body_start_line + i; if let Some(heading) = strip_heading(line, 1) {
819 if title_found {
820 errors.push(ParseError {
821 line: file_line,
822 message: "multiple H1 headings found (expected exactly one)".into(),
823 });
824 continue;
825 }
826 title = heading.to_string();
827 title_found = true;
828 state = State::Summary;
829 continue;
830 }
831
832 if let Some(heading) = strip_heading(line, 2) {
833 if let Some(kind) = current_section_kind.take() {
835 sections.push(Section {
836 kind,
837 body: current_section_body.join("\n"),
838 line: current_section_line,
839 });
840 current_section_body.clear();
841 }
842
843 match SectionKind::from_heading(heading) {
844 Some(kind) if kind.is_case_section() => {
845 if sections.iter().any(|s| s.kind == kind) {
847 errors.push(ParseError {
848 line: file_line,
849 message: format!("duplicate section: ## {heading}"),
850 });
851 }
852 current_section_kind = Some(kind);
853 current_section_line = file_line;
854 state = State::InSection;
855 }
856 Some(_) => {
857 errors.push(ParseError {
859 line: file_line,
860 message: format!(
861 "## {heading} is not allowed in case files (use standalone entity files in people/ or organizations/ instead)"
862 ),
863 });
864 }
865 None => {
866 errors.push(ParseError {
867 line: file_line,
868 message: format!(
869 "unknown section: ## {heading} (expected one of: {})",
870 KNOWN_CASE_SECTIONS.join(", ")
871 ),
872 });
873 }
874 }
875 continue;
876 }
877
878 match state {
879 State::BeforeTitle => {
880 if !line.trim().is_empty() {
882 errors.push(ParseError {
883 line: file_line,
884 message: "expected H1 title (# Title)".into(),
885 });
886 }
887 }
888 State::Summary => {
889 summary_lines.push(line);
890 }
891 State::InSection => {
892 current_section_body.push(line);
893 }
894 }
895 }
896
897 if let Some(kind) = current_section_kind.take() {
899 sections.push(Section {
900 kind,
901 body: current_section_body.join("\n"),
902 line: current_section_line,
903 });
904 }
905
906 if !title_found {
908 errors.push(ParseError {
909 line: body_start_line,
910 message: "missing H1 title".into(),
911 });
912 } else if title.len() > MAX_TITLE_LEN {
913 errors.push(ParseError {
914 line: body_start_line,
915 message: format!(
916 "H1 title exceeds {MAX_TITLE_LEN} chars (got {})",
917 title.len()
918 ),
919 });
920 }
921
922 let summary = summary_lines.clone().join("\n").trim().to_string();
924
925 if summary.len() > MAX_SUMMARY_LEN {
926 errors.push(ParseError {
927 line: body_start_line,
928 message: format!(
929 "summary exceeds {MAX_SUMMARY_LEN} chars (got {})",
930 summary.len()
931 ),
932 });
933 }
934
935 (title, summary, sections)
936}
937
938#[derive(Clone, Copy)]
939enum State {
940 BeforeTitle,
941 Summary,
942 InSection,
943}
944
945fn strip_heading(line: &str, level: usize) -> Option<&str> {
948 let prefix = "#".repeat(level);
949 let trimmed = line.trim_start();
950 if trimmed.starts_with(&prefix) {
951 let after = &trimmed[prefix.len()..];
952 if after.is_empty() {
954 return Some("");
955 }
956 if after.starts_with(' ') && !after.starts_with(" #") {
957 return Some(after[1..].trim());
959 }
960 if after.starts_with('#') {
962 return None;
963 }
964 }
965 None
966}
967
968#[cfg(test)]
969mod tests {
970 use super::*;
971
972 fn minimal_case() -> String {
973 [
974 "---",
975 "id: 01H9XT7H1J3929RK32FWSRKV88",
976 "sources:",
977 " - https://example.com/source",
978 "---",
979 "",
980 "# Test Case Title",
981 "",
982 "This is the summary.",
983 "",
984 "## Events",
985 "",
986 "### Something happened",
987 "- occurred_at: 2025-01-01",
988 "",
989 "## Relationships",
990 "",
991 "- Something happened -> Something happened: associate_of",
992 ]
993 .join("\n")
994 }
995
996 #[test]
997 fn parse_minimal_case() {
998 let result = parse(&minimal_case());
999 let case = result.unwrap_or_else(|errs| {
1000 panic!(
1001 "parse failed: {}",
1002 errs.iter()
1003 .map(ToString::to_string)
1004 .collect::<Vec<_>>()
1005 .join("; ")
1006 );
1007 });
1008
1009 assert_eq!(case.id.as_deref(), Some("01H9XT7H1J3929RK32FWSRKV88"));
1010 assert_eq!(case.sources.len(), 1);
1011 assert_eq!(case.sources[0].url(), "https://example.com/source");
1012 assert_eq!(case.title, "Test Case Title");
1013 assert_eq!(case.summary, "This is the summary.");
1014 assert_eq!(case.sections.len(), 2);
1015 assert_eq!(case.sections[0].kind, SectionKind::Events);
1016 assert_eq!(case.sections[1].kind, SectionKind::Relationships);
1017 }
1018
1019 #[test]
1020 fn parse_missing_front_matter() {
1021 let input = "# Title\n\nSummary.\n";
1022 let errs = parse(input).unwrap_err();
1023 assert!(errs.iter().any(|e| e.message.contains("front matter")));
1024 }
1025
1026 #[test]
1027 fn parse_unclosed_front_matter() {
1028 let input = "---\nsources: []\n# Title\n";
1029 let errs = parse(input).unwrap_err();
1030 assert!(errs.iter().any(|e| e.message.contains("unclosed")));
1031 }
1032
1033 #[test]
1034 fn parse_invalid_case_id_wrong_length() {
1035 let input = "---\nid: short\nsources: []\n---\n\n# Title\n";
1036 let errs = parse(input).unwrap_err();
1037 assert!(errs.iter().any(|e| e.message.contains("NULID")));
1038 }
1039
1040 #[test]
1041 fn parse_case_id_absent_is_ok() {
1042 let input = "---\nsources:\n - https://example.com\n---\n\n# Title\n\nSummary.\n";
1043 let case = parse(input).unwrap();
1044 assert!(case.id.is_none());
1045 }
1046
1047 #[test]
1048 fn parse_non_https_source() {
1049 let input = "---\nsources:\n - http://example.com\n---\n\n# Title\n";
1050 let errs = parse(input).unwrap_err();
1051 assert!(errs.iter().any(|e| e.message.contains("HTTPS")));
1052 }
1053
1054 #[test]
1055 fn parse_too_many_sources() {
1056 let sources: Vec<String> = (0..21)
1057 .map(|i| format!(" - https://example.com/{i}"))
1058 .collect();
1059 let input = format!("---\nsources:\n{}\n---\n\n# Title\n", sources.join("\n"));
1060 let errs = parse(&input).unwrap_err();
1061 assert!(errs.iter().any(|e| e.message.contains("exceeds 20")));
1062 }
1063
1064 #[test]
1065 fn parse_unknown_section() {
1066 let input = [
1067 "---",
1068 "sources: []",
1069 "---",
1070 "",
1071 "# Title",
1072 "",
1073 "## Unknown Section",
1074 "",
1075 ]
1076 .join("\n");
1077 let errs = parse(&input).unwrap_err();
1078 assert!(errs.iter().any(|e| e.message.contains("unknown section")));
1079 }
1080
1081 #[test]
1082 fn parse_duplicate_section() {
1083 let input = [
1084 "---",
1085 "sources: []",
1086 "---",
1087 "",
1088 "# Title",
1089 "",
1090 "## Events",
1091 "",
1092 "## Events",
1093 "",
1094 ]
1095 .join("\n");
1096 let errs = parse(&input).unwrap_err();
1097 assert!(errs.iter().any(|e| e.message.contains("duplicate")));
1098 }
1099
1100 #[test]
1101 fn parse_multiple_h1() {
1102 let input = [
1103 "---",
1104 "sources: []",
1105 "---",
1106 "",
1107 "# First Title",
1108 "",
1109 "# Second Title",
1110 "",
1111 ]
1112 .join("\n");
1113 let errs = parse(&input).unwrap_err();
1114 assert!(errs.iter().any(|e| e.message.contains("multiple H1")));
1115 }
1116
1117 #[test]
1118 fn parse_all_sections() {
1119 let input = [
1120 "---",
1121 "id: 01H9XT7H1KRQ9SJ7SD9ETB5CVQ",
1122 "sources:",
1123 " - https://example.com/a",
1124 "---",
1125 "",
1126 "# Full Case",
1127 "",
1128 "Summary text here.",
1129 "",
1130 "## Events",
1131 "",
1132 "### Something happened",
1133 "- occurred_at: 2025-01-01",
1134 "",
1135 "## Relationships",
1136 "",
1137 "- Alice -> Corp Inc: employed_by",
1138 "",
1139 "## Timeline",
1140 "",
1141 "Something happened",
1142 ]
1143 .join("\n");
1144
1145 let case = parse(&input).unwrap_or_else(|errs| {
1146 panic!(
1147 "parse failed: {}",
1148 errs.iter()
1149 .map(ToString::to_string)
1150 .collect::<Vec<_>>()
1151 .join("; ")
1152 );
1153 });
1154
1155 assert_eq!(case.id.as_deref(), Some("01H9XT7H1KRQ9SJ7SD9ETB5CVQ"));
1156 assert_eq!(case.title, "Full Case");
1157 assert_eq!(case.summary, "Summary text here.");
1158 assert_eq!(case.sections.len(), 3);
1159 assert_eq!(case.sections[0].kind, SectionKind::Events);
1160 assert_eq!(case.sections[1].kind, SectionKind::Relationships);
1161 assert_eq!(case.sections[2].kind, SectionKind::Timeline);
1162 }
1163
1164 #[test]
1165 fn parse_empty_summary() {
1166 let input = [
1167 "---",
1168 "sources: []",
1169 "---",
1170 "",
1171 "# Title",
1172 "",
1173 "## Events",
1174 "",
1175 ]
1176 .join("\n");
1177
1178 let case = parse(&input).unwrap_or_else(|errs| {
1179 panic!(
1180 "parse failed: {}",
1181 errs.iter()
1182 .map(ToString::to_string)
1183 .collect::<Vec<_>>()
1184 .join("; ")
1185 );
1186 });
1187 assert_eq!(case.summary, "");
1188 }
1189
1190 #[test]
1191 fn parse_multiline_summary() {
1192 let input = [
1193 "---",
1194 "sources: []",
1195 "---",
1196 "",
1197 "# Title",
1198 "",
1199 "First line of summary.",
1200 "Second line of summary.",
1201 "",
1202 "## Events",
1203 "",
1204 ]
1205 .join("\n");
1206
1207 let case = parse(&input).unwrap_or_else(|errs| {
1208 panic!(
1209 "parse failed: {}",
1210 errs.iter()
1211 .map(ToString::to_string)
1212 .collect::<Vec<_>>()
1213 .join("; ")
1214 );
1215 });
1216 assert_eq!(
1217 case.summary,
1218 "First line of summary.\nSecond line of summary."
1219 );
1220 }
1221
1222 #[test]
1223 fn strip_heading_levels() {
1224 assert_eq!(strip_heading("# Title", 1), Some("Title"));
1225 assert_eq!(strip_heading("## Section", 2), Some("Section"));
1226 assert_eq!(strip_heading("### Entity", 3), Some("Entity"));
1227 assert_eq!(strip_heading("### Entity", 2), None);
1229 assert_eq!(strip_heading("## Section", 1), None);
1231 assert_eq!(strip_heading("Normal text", 1), None);
1233 }
1234
1235 #[test]
1236 fn section_body_content() {
1237 let input = [
1238 "---",
1239 "sources: []",
1240 "---",
1241 "",
1242 "# Title",
1243 "",
1244 "## Events",
1245 "",
1246 "### Bonnick dismissal",
1247 "- occurred_at: 2024-12-24",
1248 "- type: termination",
1249 "",
1250 ]
1251 .join("\n");
1252
1253 let case = parse(&input).unwrap_or_else(|errs| {
1254 panic!(
1255 "parse failed: {}",
1256 errs.iter()
1257 .map(ToString::to_string)
1258 .collect::<Vec<_>>()
1259 .join("; ")
1260 );
1261 });
1262
1263 assert_eq!(case.sections.len(), 1);
1264 let body = &case.sections[0].body;
1265 assert!(body.contains("### Bonnick dismissal"));
1266 assert!(body.contains("- occurred_at: 2024-12-24"));
1267 }
1268
1269 #[test]
1270 fn parse_rejects_people_section_in_case_file() {
1271 let input = [
1272 "---",
1273 "sources: []",
1274 "---",
1275 "",
1276 "# Title",
1277 "",
1278 "## People",
1279 "",
1280 ]
1281 .join("\n");
1282 let errs = parse(&input).unwrap_err();
1283 assert!(
1284 errs.iter()
1285 .any(|e| e.message.contains("not allowed in case files"))
1286 );
1287 }
1288
1289 #[test]
1290 fn parse_rejects_organizations_section_in_case_file() {
1291 let input = [
1292 "---",
1293 "sources: []",
1294 "---",
1295 "",
1296 "# Title",
1297 "",
1298 "## Organizations",
1299 "",
1300 ]
1301 .join("\n");
1302 let errs = parse(&input).unwrap_err();
1303 assert!(
1304 errs.iter()
1305 .any(|e| e.message.contains("not allowed in case files"))
1306 );
1307 }
1308
1309 #[test]
1310 fn parse_entity_file_with_id() {
1311 let input = [
1312 "---",
1313 "id: 01JXYZ123456789ABCDEFGHIJK",
1314 "---",
1315 "",
1316 "# Mark Bonnick",
1317 "",
1318 "- qualifier: Arsenal Kit Manager",
1319 "- nationality: British",
1320 "",
1321 ]
1322 .join("\n");
1323
1324 let result = parse_entity_file(&input).unwrap();
1325 assert_eq!(result.id.as_deref(), Some("01JXYZ123456789ABCDEFGHIJK"));
1326 assert_eq!(result.name, "Mark Bonnick");
1327 assert!(result.body.contains("- qualifier: Arsenal Kit Manager"));
1328 assert!(result.body.contains("- nationality: British"));
1329 }
1330
1331 #[test]
1332 fn parse_entity_file_without_id() {
1333 let input = [
1334 "---",
1335 "---",
1336 "",
1337 "# Arsenal FC",
1338 "",
1339 "- qualifier: English Football Club",
1340 "- org_type: sports_club",
1341 "",
1342 ]
1343 .join("\n");
1344
1345 let result = parse_entity_file(&input).unwrap();
1346 assert!(result.id.is_none());
1347 assert_eq!(result.name, "Arsenal FC");
1348 }
1349
1350 #[test]
1351 fn parse_entity_file_no_front_matter() {
1352 let input = ["# Bob Smith", "", "- nationality: Dutch", ""].join("\n");
1353
1354 let result = parse_entity_file(&input).unwrap();
1355 assert!(result.id.is_none());
1356 assert_eq!(result.name, "Bob Smith");
1357 assert!(result.body.contains("- nationality: Dutch"));
1358 }
1359
1360 #[test]
1361 fn parse_entity_file_rejects_h2_sections() {
1362 let input = [
1363 "---",
1364 "---",
1365 "",
1366 "# Test Entity",
1367 "",
1368 "## Relationships",
1369 "",
1370 ]
1371 .join("\n");
1372
1373 let errs = parse_entity_file(&input).unwrap_err();
1374 assert!(errs.iter().any(|e| e.message.contains("H2 sections")));
1375 }
1376
1377 #[test]
1378 fn parse_entity_file_missing_h1() {
1379 let input = ["---", "---", "", "- nationality: Dutch", ""].join("\n");
1380
1381 let errs = parse_entity_file(&input).unwrap_err();
1382 assert!(errs.iter().any(|e| e.message.contains("missing H1")));
1383 }
1384
1385 #[test]
1386 fn parse_related_cases_section() {
1387 let input = [
1388 "---",
1389 "tags: [bribery]",
1390 "sources:",
1391 " - https://example.com",
1392 "---",
1393 "",
1394 "# Test Case",
1395 "",
1396 "Summary text.",
1397 "",
1398 "## Related Cases",
1399 "",
1400 "- id/corruption/2002/blbi-liquidity-aid-scandal",
1401 " description: Artalyta bribed Urip to influence the BLBI investigation",
1402 "- id/corruption/2008/another-case",
1403 " description: A second related case",
1404 ]
1405 .join("\n");
1406
1407 let case = parse(&input).unwrap_or_else(|errs| {
1408 panic!(
1409 "parse failed: {}",
1410 errs.iter()
1411 .map(ToString::to_string)
1412 .collect::<Vec<_>>()
1413 .join("; ")
1414 );
1415 });
1416
1417 assert_eq!(case.related_cases.len(), 2);
1418 assert_eq!(
1419 case.related_cases[0].case_path,
1420 "id/corruption/2002/blbi-liquidity-aid-scandal"
1421 );
1422 assert_eq!(
1423 case.related_cases[0].description,
1424 "Artalyta bribed Urip to influence the BLBI investigation"
1425 );
1426 assert_eq!(
1427 case.related_cases[1].case_path,
1428 "id/corruption/2008/another-case"
1429 );
1430 assert_eq!(case.related_cases[1].description, "A second related case");
1431 assert!(
1433 !case
1434 .sections
1435 .iter()
1436 .any(|s| s.kind == SectionKind::RelatedCases)
1437 );
1438 }
1439
1440 #[test]
1441 fn parse_related_cases_empty_path() {
1442 let input = [
1443 "---",
1444 "sources: []",
1445 "---",
1446 "",
1447 "# Title",
1448 "",
1449 "## Related Cases",
1450 "",
1451 "- ",
1452 " description: Some description",
1453 ]
1454 .join("\n");
1455
1456 let errs = parse(&input).unwrap_err();
1457 assert!(
1458 errs.iter()
1459 .any(|e| e.message.contains("case path must not be empty"))
1460 );
1461 }
1462
1463 #[test]
1464 fn parse_related_cases_missing_description() {
1465 let input = [
1466 "---",
1467 "sources: []",
1468 "---",
1469 "",
1470 "# Title",
1471 "",
1472 "## Related Cases",
1473 "",
1474 "- id/corruption/2002/some-case",
1475 ]
1476 .join("\n");
1477
1478 let errs = parse(&input).unwrap_err();
1479 assert!(errs.iter().any(|e| e.message.contains("description")));
1480 }
1481
1482 #[test]
1483 fn parse_related_cases_description_too_long() {
1484 let long_desc = "x".repeat(501);
1485 let input = [
1486 "---",
1487 "sources: []",
1488 "---",
1489 "",
1490 "# Title",
1491 "",
1492 "## Related Cases",
1493 "",
1494 "- id/corruption/2002/some-case",
1495 &format!(" description: {long_desc}"),
1496 ]
1497 .join("\n");
1498
1499 let errs = parse(&input).unwrap_err();
1500 assert!(errs.iter().any(|e| e.message.contains("exceeds 500")));
1501 }
1502
1503 #[test]
1504 fn parse_related_cases_too_many() {
1505 let mut lines = vec![
1506 "---".to_string(),
1507 "sources: []".to_string(),
1508 "---".to_string(),
1509 String::new(),
1510 "# Title".to_string(),
1511 String::new(),
1512 "## Related Cases".to_string(),
1513 String::new(),
1514 ];
1515 for i in 0..11 {
1516 lines.push(format!("- id/corruption/2002/case-{i}"));
1517 lines.push(format!(" description: Description {i}"));
1518 }
1519 let input = lines.join("\n");
1520
1521 let errs = parse(&input).unwrap_err();
1522 assert!(errs.iter().any(|e| e.message.contains("exceeds 10")));
1523 }
1524}