1#![allow(clippy::module_name_repetitions)]
2
3use std::fmt;
4
5use serde::{Deserialize, Serialize};
6
7const MAX_CASE_ID_LEN: usize = 26;
9
10const MAX_SOURCES: usize = 20;
12
13const MAX_TITLE_LEN: usize = 200;
15
16const MAX_SUMMARY_LEN: usize = 2000;
18
19const KNOWN_CASE_SECTIONS: &[&str] = &[
23 "Events",
24 "Documents",
25 "Assets",
26 "Relationships",
27 "Timeline",
28 "Related Cases",
29];
30
31#[derive(Debug)]
33pub struct ParsedCase {
34 pub id: Option<String>,
36 pub sources: Vec<SourceEntry>,
37 pub title: String,
38 pub summary: String,
39 pub sections: Vec<Section>,
40 pub case_type: Option<String>,
42 pub status: Option<String>,
44 pub amounts: Option<String>,
46 pub tags: Vec<String>,
48 pub related_cases: Vec<RelatedCase>,
50 pub involved: Vec<InvolvedEntry>,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
56pub struct RelatedCase {
57 pub case_path: String,
59 pub description: String,
61 #[serde(skip_serializing_if = "Option::is_none")]
63 pub id: Option<String>,
64 #[serde(skip)]
66 pub line: usize,
67}
68
69#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct InvolvedEntry {
72 pub entity_name: String,
74 pub id: Option<String>,
76 pub line: usize,
78}
79
80#[derive(Debug)]
82pub struct Section {
83 pub kind: SectionKind,
84 pub body: String,
85 pub line: usize,
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91pub enum SectionKind {
92 People,
93 Organizations,
94 Events,
95 Documents,
96 Assets,
97 Relationships,
98 Timeline,
99 RelatedCases,
100 Involved,
101}
102
103impl SectionKind {
104 fn from_heading(heading: &str) -> Option<Self> {
105 match heading.trim() {
106 s if s.eq_ignore_ascii_case("People") => Some(Self::People),
107 s if s.eq_ignore_ascii_case("Organizations") => Some(Self::Organizations),
108 s if s.eq_ignore_ascii_case("Events") => Some(Self::Events),
109 s if s.eq_ignore_ascii_case("Documents") => Some(Self::Documents),
110 s if s.eq_ignore_ascii_case("Assets") => Some(Self::Assets),
111 s if s.eq_ignore_ascii_case("Relationships") => Some(Self::Relationships),
112 s if s.eq_ignore_ascii_case("Timeline") => Some(Self::Timeline),
113 s if s.eq_ignore_ascii_case("Related Cases") => Some(Self::RelatedCases),
114 s if s.eq_ignore_ascii_case("Involved") => Some(Self::Involved),
115 _ => None,
116 }
117 }
118
119 pub fn is_case_section(self) -> bool {
122 matches!(
123 self,
124 Self::Events
125 | Self::Documents
126 | Self::Assets
127 | Self::Relationships
128 | Self::Timeline
129 | Self::RelatedCases
130 | Self::Involved
131 )
132 }
133}
134
135#[derive(Debug)]
137pub struct ParseError {
138 pub line: usize,
139 pub message: String,
140}
141
142impl fmt::Display for ParseError {
143 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144 write!(f, "line {}: {}", self.line, self.message)
145 }
146}
147
148const MAX_CASE_TAGS: usize = 10;
150
151const MAX_ENTITY_TAGS: usize = 5;
153
154const MAX_TAG_LEN: usize = 50;
156
157const MAX_RELATED_CASES: usize = 10;
159
160const MAX_RELATED_DESCRIPTION_LEN: usize = 500;
162
163pub fn parse_related_cases(
168 body: &str,
169 section_start_line: usize,
170 errors: &mut Vec<ParseError>,
171) -> Vec<RelatedCase> {
172 let mut entries: Vec<(String, String, Option<String>, usize)> = Vec::new(); for (offset, line) in body.lines().enumerate() {
175 let file_line = section_start_line + offset + 1;
176
177 if let Some(rest) = line.strip_prefix("- ") {
178 let case_path = rest.trim().to_string();
179 entries.push((case_path, String::new(), None, file_line));
180 } else if let Some(rest) = line.strip_prefix(" description: ") {
181 if let Some(entry) = entries.last_mut() {
182 entry.1 = rest.trim().to_string();
183 } else {
184 errors.push(ParseError {
185 line: file_line,
186 message: "description without a preceding case path".into(),
187 });
188 }
189 } else if let Some(rest) = line.strip_prefix(" id: ") {
190 if let Some(entry) = entries.last_mut() {
191 entry.2 = Some(rest.trim().to_string());
192 } else {
193 errors.push(ParseError {
194 line: file_line,
195 message: "id without a preceding case path".into(),
196 });
197 }
198 } else if !line.trim().is_empty() {
199 errors.push(ParseError {
200 line: file_line,
201 message: format!("unexpected line in Related Cases: {line}"),
202 });
203 }
204 }
205
206 if entries.len() > MAX_RELATED_CASES {
207 errors.push(ParseError {
208 line: section_start_line,
209 message: format!(
210 "Related Cases exceeds {MAX_RELATED_CASES} entries (got {})",
211 entries.len()
212 ),
213 });
214 }
215
216 let mut result = Vec::new();
217 for (case_path, description, id, line) in entries {
218 if case_path.is_empty() {
219 errors.push(ParseError {
220 line,
221 message: "related case path must not be empty".into(),
222 });
223 continue;
224 }
225 if description.is_empty() {
226 errors.push(ParseError {
227 line,
228 message: format!("related case {case_path:?} missing description"),
229 });
230 continue;
231 }
232 if description.len() > MAX_RELATED_DESCRIPTION_LEN {
233 errors.push(ParseError {
234 line,
235 message: format!(
236 "related case description exceeds {MAX_RELATED_DESCRIPTION_LEN} chars (got {})",
237 description.len()
238 ),
239 });
240 continue;
241 }
242 result.push(RelatedCase {
243 case_path,
244 description,
245 id,
246 line,
247 });
248 }
249
250 result
251}
252
253const MAX_INVOLVED: usize = 50;
255
256pub fn parse_involved(
264 body: &str,
265 section_start_line: usize,
266 errors: &mut Vec<ParseError>,
267) -> Vec<InvolvedEntry> {
268 let mut entries = Vec::new();
269 let lines: Vec<&str> = body.lines().collect();
270
271 let mut i = 0;
272 while i < lines.len() {
273 let file_line = section_start_line + 1 + i;
274 let trimmed = lines[i].trim();
275
276 if trimmed.is_empty() {
277 i += 1;
278 continue;
279 }
280
281 let Some(name) = trimmed.strip_prefix("- ") else {
282 errors.push(ParseError {
283 line: file_line,
284 message: format!("expected involved entry `- Entity Name`, got {trimmed:?}"),
285 });
286 i += 1;
287 continue;
288 };
289
290 let entity_name = name.trim().to_string();
291 if entity_name.is_empty() {
292 errors.push(ParseError {
293 line: file_line,
294 message: "involved entity name must not be empty".into(),
295 });
296 i += 1;
297 continue;
298 }
299
300 let mut id: Option<String> = None;
302 if i + 1 < lines.len() {
303 let next = lines[i + 1].trim();
304 if let Some(id_val) = next.strip_prefix("id: ") {
305 id = Some(id_val.trim().to_string());
306 i += 1;
307 }
308 }
309
310 entries.push(InvolvedEntry {
311 entity_name,
312 id,
313 line: file_line,
314 });
315
316 i += 1;
317 }
318
319 if entries.len() > MAX_INVOLVED {
320 errors.push(ParseError {
321 line: section_start_line,
322 message: format!(
323 "Involved exceeds {MAX_INVOLVED} entries (got {})",
324 entries.len()
325 ),
326 });
327 }
328
329 entries
330}
331
332#[derive(Deserialize)]
334struct FrontMatter {
335 #[serde(default)]
337 id: Option<String>,
338 #[serde(default)]
339 sources: Vec<SourceEntry>,
340 #[serde(default)]
341 case_type: Option<String>,
342 #[serde(default)]
343 status: Option<String>,
344 #[serde(default)]
345 amounts: Option<String>,
346 #[serde(default)]
347 tags: Vec<String>,
348}
349
350#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
353#[serde(untagged)]
354pub enum SourceEntry {
355 Url(String),
357 Structured {
359 url: String,
360 #[serde(default)]
361 title: Option<String>,
362 #[serde(default)]
363 published_at: Option<String>,
364 #[serde(default)]
365 language: Option<String>,
366 },
367}
368
369impl SourceEntry {
370 pub fn url(&self) -> &str {
372 match self {
373 Self::Url(u) => u,
374 Self::Structured { url, .. } => url,
375 }
376 }
377}
378
379#[derive(Deserialize)]
382struct EntityFrontMatter {
383 #[serde(default)]
384 id: Option<String>,
385 #[serde(default)]
386 tags: Vec<String>,
387}
388
389#[derive(Debug)]
391pub struct ParsedEntityFile {
392 pub id: Option<String>,
394 pub name: String,
396 pub body: String,
398 pub title_line: usize,
400 pub tags: Vec<String>,
402}
403
404pub fn parse(input: &str) -> Result<ParsedCase, Vec<ParseError>> {
409 let mut errors = Vec::new();
410
411 let (front_matter, body_start_line, body) = extract_front_matter(input, &mut errors);
413
414 let Some(front_matter) = front_matter else {
415 if errors.is_empty() {
416 errors.push(ParseError {
417 line: 1,
418 message: "missing YAML front matter (expected `---` delimiter)".into(),
419 });
420 }
421 return Err(errors);
422 };
423
424 validate_front_matter(&front_matter, &mut errors);
426
427 let (title, summary, mut sections) = extract_body(&body, body_start_line, &mut errors);
429
430 let mut related_cases = Vec::new();
432 for section in §ions {
433 if section.kind == SectionKind::RelatedCases {
434 let entries = parse_related_cases(§ion.body, section.line, &mut errors);
435 related_cases.extend(entries);
436 }
437 }
438 sections.retain(|s| s.kind != SectionKind::RelatedCases);
440
441 let mut involved = Vec::new();
443 for section in §ions {
444 if section.kind == SectionKind::Involved {
445 let entries = parse_involved(§ion.body, section.line, &mut errors);
446 involved.extend(entries);
447 }
448 }
449 sections.retain(|s| s.kind != SectionKind::Involved);
451
452 if !errors.is_empty() {
453 return Err(errors);
454 }
455
456 Ok(ParsedCase {
457 id: front_matter.id,
458 sources: front_matter.sources,
459 title,
460 summary,
461 sections,
462 case_type: front_matter.case_type,
463 status: front_matter.status,
464 amounts: front_matter.amounts,
465 tags: front_matter.tags,
466 related_cases,
467 involved,
468 })
469}
470
471pub fn parse_entity_file(input: &str) -> Result<ParsedEntityFile, Vec<ParseError>> {
476 let mut errors = Vec::new();
477
478 let (front_matter, body_start_line, body) = extract_entity_front_matter(input, &mut errors);
479
480 let id = front_matter.as_ref().and_then(|fm| fm.id.clone());
481 let tags = front_matter.map_or_else(Vec::new, |fm| fm.tags);
482
483 if tags.len() > MAX_ENTITY_TAGS {
485 errors.push(ParseError {
486 line: 2,
487 message: format!(
488 "front matter `tags` exceeds {MAX_ENTITY_TAGS} entries (got {})",
489 tags.len()
490 ),
491 });
492 }
493 for (i, tag) in tags.iter().enumerate() {
494 if tag.len() > MAX_TAG_LEN {
495 errors.push(ParseError {
496 line: 2,
497 message: format!("front matter tag #{} exceeds {MAX_TAG_LEN} chars", i + 1),
498 });
499 }
500 if tag.is_empty() {
501 errors.push(ParseError {
502 line: 2,
503 message: format!("front matter tag #{} is empty", i + 1),
504 });
505 }
506 }
507
508 let (name, title_line, field_body) = extract_entity_body(&body, body_start_line, &mut errors);
510
511 if !errors.is_empty() {
512 return Err(errors);
513 }
514
515 Ok(ParsedEntityFile {
516 id,
517 name,
518 body: field_body,
519 title_line,
520 tags,
521 })
522}
523
524fn extract_entity_front_matter(
527 input: &str,
528 errors: &mut Vec<ParseError>,
529) -> (Option<EntityFrontMatter>, usize, String) {
530 let lines: Vec<&str> = input.lines().collect();
531
532 let first_delim = lines.iter().position(|l| l.trim() == "---");
533 if first_delim != Some(0) {
534 return (None, 1, input.to_string());
536 }
537
538 let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
539 let Some(close_offset) = close_delim else {
540 errors.push(ParseError {
541 line: 1,
542 message: "unclosed YAML front matter (missing closing `---`)".into(),
543 });
544 return (None, 1, String::new());
545 };
546
547 let close_line = close_offset + 1;
548 let yaml_str: String = lines[1..close_line].join("\n");
549 let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
551
552 match serde_yaml::from_str::<EntityFrontMatter>(&yaml_str) {
553 Ok(fm) => (Some(fm), body_start_line, body),
554 Err(e) => {
555 errors.push(ParseError {
556 line: 2,
557 message: format!("invalid YAML front matter: {e}"),
558 });
559 (None, body_start_line, body)
560 }
561 }
562}
563
564fn extract_entity_body(
567 body: &str,
568 body_start_line: usize,
569 errors: &mut Vec<ParseError>,
570) -> (String, usize, String) {
571 let lines: Vec<&str> = body.lines().collect();
572 let mut name = String::new();
573 let mut title_found = false;
574 let mut title_line = body_start_line;
575 let mut field_lines: Vec<&str> = Vec::new();
576
577 for (i, line) in lines.iter().enumerate() {
578 let file_line = body_start_line + i;
579
580 if let Some(heading) = strip_heading(line, 1) {
581 if title_found {
582 errors.push(ParseError {
583 line: file_line,
584 message: "multiple H1 headings found (expected exactly one)".into(),
585 });
586 continue;
587 }
588 name = heading.to_string();
589 title_found = true;
590 title_line = file_line;
591 continue;
592 }
593
594 if strip_heading(line, 2).is_some() {
596 errors.push(ParseError {
597 line: file_line,
598 message: "H2 sections are not allowed in entity files".into(),
599 });
600 continue;
601 }
602
603 if title_found {
604 field_lines.push(line);
605 } else if !line.trim().is_empty() {
606 errors.push(ParseError {
607 line: file_line,
608 message: "expected H1 heading (# Name)".into(),
609 });
610 }
611 }
612
613 if !title_found {
614 errors.push(ParseError {
615 line: body_start_line,
616 message: "missing H1 heading".into(),
617 });
618 } else if name.len() > MAX_TITLE_LEN {
619 errors.push(ParseError {
620 line: title_line,
621 message: format!("H1 name exceeds {MAX_TITLE_LEN} chars (got {})", name.len()),
622 });
623 }
624
625 (name, title_line, field_lines.join("\n"))
626}
627
628fn extract_front_matter(
632 input: &str,
633 errors: &mut Vec<ParseError>,
634) -> (Option<FrontMatter>, usize, String) {
635 let lines: Vec<&str> = input.lines().collect();
636
637 let first_delim = lines.iter().position(|l| l.trim() == "---");
639 if first_delim != Some(0) {
640 errors.push(ParseError {
641 line: 1,
642 message: "missing YAML front matter (expected `---` on first line)".into(),
643 });
644 return (None, 1, input.to_string());
645 }
646
647 let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
649 let Some(close_offset) = close_delim else {
650 errors.push(ParseError {
651 line: 1,
652 message: "unclosed YAML front matter (missing closing `---`)".into(),
653 });
654 return (None, 1, String::new());
655 };
656
657 let close_line = close_offset + 1; let yaml_str: String = lines[1..close_line].join("\n");
659 let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
661
662 match serde_yaml::from_str::<FrontMatter>(&yaml_str) {
663 Ok(fm) => (Some(fm), body_start_line, body),
664 Err(e) => {
665 errors.push(ParseError {
666 line: 2,
667 message: format!("invalid YAML front matter: {e}"),
668 });
669 (None, body_start_line, body)
670 }
671 }
672}
673
674fn validate_front_matter(fm: &FrontMatter, errors: &mut Vec<ParseError>) {
675 if let Some(id) = &fm.id
677 && id.len() != MAX_CASE_ID_LEN {
678 errors.push(ParseError {
679 line: 2,
680 message: format!(
681 "front matter `id` must be a {MAX_CASE_ID_LEN}-char NULID, got {} chars",
682 id.len()
683 ),
684 });
685 }
686
687 if fm.sources.len() > MAX_SOURCES {
689 errors.push(ParseError {
690 line: 2,
691 message: format!(
692 "front matter `sources` exceeds {MAX_SOURCES} entries (got {})",
693 fm.sources.len()
694 ),
695 });
696 }
697
698 for (i, source) in fm.sources.iter().enumerate() {
700 if !source.url().starts_with("https://") {
701 errors.push(ParseError {
702 line: 2,
703 message: format!("source[{i}] must be HTTPS, got {:?}", source.url()),
704 });
705 }
706 }
707
708 if let Some(ct) = &fm.case_type {
710 use crate::domain::CaseType;
711 let normalized = ct.to_lowercase().replace(' ', "_");
712 if !CaseType::KNOWN.contains(&normalized.as_str())
713 && crate::domain::parse_custom(ct).is_none()
714 {
715 errors.push(ParseError {
716 line: 2,
717 message: format!(
718 "invalid case_type {:?} (known: {}; use \"custom:Value\" for custom)",
719 ct,
720 CaseType::KNOWN.join(", ")
721 ),
722 });
723 }
724 }
725
726 if let Some(st) = &fm.status {
728 use crate::domain::CaseStatus;
729 let normalized = st.to_lowercase().replace(' ', "_");
730 if !CaseStatus::KNOWN.contains(&normalized.as_str()) {
731 errors.push(ParseError {
732 line: 2,
733 message: format!(
734 "invalid status {:?} (known: {})",
735 st,
736 CaseStatus::KNOWN.join(", ")
737 ),
738 });
739 }
740 }
741
742 if fm.tags.len() > MAX_CASE_TAGS {
744 errors.push(ParseError {
745 line: 2,
746 message: format!(
747 "front matter `tags` exceeds {MAX_CASE_TAGS} entries (got {})",
748 fm.tags.len()
749 ),
750 });
751 }
752 for (i, tag) in fm.tags.iter().enumerate() {
753 if tag.len() > MAX_TAG_LEN {
754 errors.push(ParseError {
755 line: 2,
756 message: format!("tag[{i}] exceeds {MAX_TAG_LEN} chars (got {})", tag.len()),
757 });
758 }
759 if tag.is_empty() {
760 errors.push(ParseError {
761 line: 2,
762 message: format!("tag[{i}] must not be empty"),
763 });
764 }
765 }
766}
767
768#[allow(clippy::too_many_lines)]
770fn extract_body(
771 body: &str,
772 body_start_line: usize,
773 errors: &mut Vec<ParseError>,
774) -> (String, String, Vec<Section>) {
775 let lines: Vec<&str> = body.lines().collect();
776 let mut title = String::new();
777 let mut title_found = false;
778 let mut summary_lines: Vec<&str> = Vec::new();
779 let mut sections: Vec<Section> = Vec::new();
780
781 let mut current_section_kind: Option<SectionKind> = None;
783 let mut current_section_line: usize = 0;
784 let mut current_section_body: Vec<&str> = Vec::new();
785
786 let mut state = State::BeforeTitle;
788
789 for (i, line) in lines.iter().enumerate() {
790 let file_line = body_start_line + i; if let Some(heading) = strip_heading(line, 1) {
793 if title_found {
794 errors.push(ParseError {
795 line: file_line,
796 message: "multiple H1 headings found (expected exactly one)".into(),
797 });
798 continue;
799 }
800 title = heading.to_string();
801 title_found = true;
802 state = State::Summary;
803 continue;
804 }
805
806 if let Some(heading) = strip_heading(line, 2) {
807 if let Some(kind) = current_section_kind.take() {
809 sections.push(Section {
810 kind,
811 body: current_section_body.join("\n"),
812 line: current_section_line,
813 });
814 current_section_body.clear();
815 }
816
817 match SectionKind::from_heading(heading) {
818 Some(kind) if kind.is_case_section() => {
819 if sections.iter().any(|s| s.kind == kind) {
821 errors.push(ParseError {
822 line: file_line,
823 message: format!("duplicate section: ## {heading}"),
824 });
825 }
826 current_section_kind = Some(kind);
827 current_section_line = file_line;
828 state = State::InSection;
829 }
830 Some(_) => {
831 errors.push(ParseError {
833 line: file_line,
834 message: format!(
835 "## {heading} is not allowed in case files (use standalone entity files in people/ or organizations/ instead)"
836 ),
837 });
838 }
839 None => {
840 errors.push(ParseError {
841 line: file_line,
842 message: format!(
843 "unknown section: ## {heading} (expected one of: {})",
844 KNOWN_CASE_SECTIONS.join(", ")
845 ),
846 });
847 }
848 }
849 continue;
850 }
851
852 match state {
853 State::BeforeTitle => {
854 if !line.trim().is_empty() {
856 errors.push(ParseError {
857 line: file_line,
858 message: "expected H1 title (# Title)".into(),
859 });
860 }
861 }
862 State::Summary => {
863 summary_lines.push(line);
864 }
865 State::InSection => {
866 current_section_body.push(line);
867 }
868 }
869 }
870
871 if let Some(kind) = current_section_kind.take() {
873 sections.push(Section {
874 kind,
875 body: current_section_body.join("\n"),
876 line: current_section_line,
877 });
878 }
879
880 if !title_found {
882 errors.push(ParseError {
883 line: body_start_line,
884 message: "missing H1 title".into(),
885 });
886 } else if title.len() > MAX_TITLE_LEN {
887 errors.push(ParseError {
888 line: body_start_line,
889 message: format!(
890 "H1 title exceeds {MAX_TITLE_LEN} chars (got {})",
891 title.len()
892 ),
893 });
894 }
895
896 let summary = summary_lines.clone().join("\n").trim().to_string();
898
899 if summary.len() > MAX_SUMMARY_LEN {
900 errors.push(ParseError {
901 line: body_start_line,
902 message: format!(
903 "summary exceeds {MAX_SUMMARY_LEN} chars (got {})",
904 summary.len()
905 ),
906 });
907 }
908
909 (title, summary, sections)
910}
911
912#[derive(Clone, Copy)]
913enum State {
914 BeforeTitle,
915 Summary,
916 InSection,
917}
918
919fn strip_heading(line: &str, level: usize) -> Option<&str> {
922 let prefix = "#".repeat(level);
923 let trimmed = line.trim_start();
924 if trimmed.starts_with(&prefix) {
925 let after = &trimmed[prefix.len()..];
926 if after.is_empty() {
928 return Some("");
929 }
930 if after.starts_with(' ') && !after.starts_with(" #") {
931 return Some(after[1..].trim());
933 }
934 if after.starts_with('#') {
936 return None;
937 }
938 }
939 None
940}
941
942#[cfg(test)]
943mod tests {
944 use super::*;
945
946 fn minimal_case() -> String {
947 [
948 "---",
949 "id: 01H9XT7H1J3929RK32FWSRKV88",
950 "sources:",
951 " - https://example.com/source",
952 "---",
953 "",
954 "# Test Case Title",
955 "",
956 "This is the summary.",
957 "",
958 "## Events",
959 "",
960 "### Something happened",
961 "- occurred_at: 2025-01-01",
962 "",
963 "## Relationships",
964 "",
965 "- Something happened -> Something happened: associate_of",
966 ]
967 .join("\n")
968 }
969
970 #[test]
971 fn parse_minimal_case() {
972 let result = parse(&minimal_case());
973 let case = result.unwrap_or_else(|errs| {
974 panic!(
975 "parse failed: {}",
976 errs.iter()
977 .map(ToString::to_string)
978 .collect::<Vec<_>>()
979 .join("; ")
980 );
981 });
982
983 assert_eq!(case.id.as_deref(), Some("01H9XT7H1J3929RK32FWSRKV88"));
984 assert_eq!(case.sources.len(), 1);
985 assert_eq!(case.sources[0].url(), "https://example.com/source");
986 assert_eq!(case.title, "Test Case Title");
987 assert_eq!(case.summary, "This is the summary.");
988 assert_eq!(case.sections.len(), 2);
989 assert_eq!(case.sections[0].kind, SectionKind::Events);
990 assert_eq!(case.sections[1].kind, SectionKind::Relationships);
991 }
992
993 #[test]
994 fn parse_missing_front_matter() {
995 let input = "# Title\n\nSummary.\n";
996 let errs = parse(input).unwrap_err();
997 assert!(errs.iter().any(|e| e.message.contains("front matter")));
998 }
999
1000 #[test]
1001 fn parse_unclosed_front_matter() {
1002 let input = "---\nsources: []\n# Title\n";
1003 let errs = parse(input).unwrap_err();
1004 assert!(errs.iter().any(|e| e.message.contains("unclosed")));
1005 }
1006
1007 #[test]
1008 fn parse_invalid_case_id_wrong_length() {
1009 let input = "---\nid: short\nsources: []\n---\n\n# Title\n";
1010 let errs = parse(input).unwrap_err();
1011 assert!(errs.iter().any(|e| e.message.contains("NULID")));
1012 }
1013
1014 #[test]
1015 fn parse_case_id_absent_is_ok() {
1016 let input = "---\nsources:\n - https://example.com\n---\n\n# Title\n\nSummary.\n";
1017 let case = parse(input).unwrap();
1018 assert!(case.id.is_none());
1019 }
1020
1021 #[test]
1022 fn parse_non_https_source() {
1023 let input = "---\nsources:\n - http://example.com\n---\n\n# Title\n";
1024 let errs = parse(input).unwrap_err();
1025 assert!(errs.iter().any(|e| e.message.contains("HTTPS")));
1026 }
1027
1028 #[test]
1029 fn parse_too_many_sources() {
1030 let sources: Vec<String> = (0..21)
1031 .map(|i| format!(" - https://example.com/{i}"))
1032 .collect();
1033 let input = format!("---\nsources:\n{}\n---\n\n# Title\n", sources.join("\n"));
1034 let errs = parse(&input).unwrap_err();
1035 assert!(errs.iter().any(|e| e.message.contains("exceeds 20")));
1036 }
1037
1038 #[test]
1039 fn parse_unknown_section() {
1040 let input = [
1041 "---",
1042 "sources: []",
1043 "---",
1044 "",
1045 "# Title",
1046 "",
1047 "## Unknown Section",
1048 "",
1049 ]
1050 .join("\n");
1051 let errs = parse(&input).unwrap_err();
1052 assert!(errs.iter().any(|e| e.message.contains("unknown section")));
1053 }
1054
1055 #[test]
1056 fn parse_duplicate_section() {
1057 let input = [
1058 "---",
1059 "sources: []",
1060 "---",
1061 "",
1062 "# Title",
1063 "",
1064 "## Events",
1065 "",
1066 "## Events",
1067 "",
1068 ]
1069 .join("\n");
1070 let errs = parse(&input).unwrap_err();
1071 assert!(errs.iter().any(|e| e.message.contains("duplicate")));
1072 }
1073
1074 #[test]
1075 fn parse_multiple_h1() {
1076 let input = [
1077 "---",
1078 "sources: []",
1079 "---",
1080 "",
1081 "# First Title",
1082 "",
1083 "# Second Title",
1084 "",
1085 ]
1086 .join("\n");
1087 let errs = parse(&input).unwrap_err();
1088 assert!(errs.iter().any(|e| e.message.contains("multiple H1")));
1089 }
1090
1091 #[test]
1092 fn parse_all_sections() {
1093 let input = [
1094 "---",
1095 "id: 01H9XT7H1KRQ9SJ7SD9ETB5CVQ",
1096 "sources:",
1097 " - https://example.com/a",
1098 "---",
1099 "",
1100 "# Full Case",
1101 "",
1102 "Summary text here.",
1103 "",
1104 "## Events",
1105 "",
1106 "### Something happened",
1107 "- occurred_at: 2025-01-01",
1108 "",
1109 "## Relationships",
1110 "",
1111 "- Alice -> Corp Inc: employed_by",
1112 "",
1113 "## Timeline",
1114 "",
1115 "Something happened",
1116 ]
1117 .join("\n");
1118
1119 let case = parse(&input).unwrap_or_else(|errs| {
1120 panic!(
1121 "parse failed: {}",
1122 errs.iter()
1123 .map(ToString::to_string)
1124 .collect::<Vec<_>>()
1125 .join("; ")
1126 );
1127 });
1128
1129 assert_eq!(case.id.as_deref(), Some("01H9XT7H1KRQ9SJ7SD9ETB5CVQ"));
1130 assert_eq!(case.title, "Full Case");
1131 assert_eq!(case.summary, "Summary text here.");
1132 assert_eq!(case.sections.len(), 3);
1133 assert_eq!(case.sections[0].kind, SectionKind::Events);
1134 assert_eq!(case.sections[1].kind, SectionKind::Relationships);
1135 assert_eq!(case.sections[2].kind, SectionKind::Timeline);
1136 }
1137
1138 #[test]
1139 fn parse_empty_summary() {
1140 let input = [
1141 "---",
1142 "sources: []",
1143 "---",
1144 "",
1145 "# Title",
1146 "",
1147 "## Events",
1148 "",
1149 ]
1150 .join("\n");
1151
1152 let case = parse(&input).unwrap_or_else(|errs| {
1153 panic!(
1154 "parse failed: {}",
1155 errs.iter()
1156 .map(ToString::to_string)
1157 .collect::<Vec<_>>()
1158 .join("; ")
1159 );
1160 });
1161 assert_eq!(case.summary, "");
1162 }
1163
1164 #[test]
1165 fn parse_multiline_summary() {
1166 let input = [
1167 "---",
1168 "sources: []",
1169 "---",
1170 "",
1171 "# Title",
1172 "",
1173 "First line of summary.",
1174 "Second line of summary.",
1175 "",
1176 "## Events",
1177 "",
1178 ]
1179 .join("\n");
1180
1181 let case = parse(&input).unwrap_or_else(|errs| {
1182 panic!(
1183 "parse failed: {}",
1184 errs.iter()
1185 .map(ToString::to_string)
1186 .collect::<Vec<_>>()
1187 .join("; ")
1188 );
1189 });
1190 assert_eq!(
1191 case.summary,
1192 "First line of summary.\nSecond line of summary."
1193 );
1194 }
1195
1196 #[test]
1197 fn strip_heading_levels() {
1198 assert_eq!(strip_heading("# Title", 1), Some("Title"));
1199 assert_eq!(strip_heading("## Section", 2), Some("Section"));
1200 assert_eq!(strip_heading("### Entity", 3), Some("Entity"));
1201 assert_eq!(strip_heading("### Entity", 2), None);
1203 assert_eq!(strip_heading("## Section", 1), None);
1205 assert_eq!(strip_heading("Normal text", 1), None);
1207 }
1208
1209 #[test]
1210 fn section_body_content() {
1211 let input = [
1212 "---",
1213 "sources: []",
1214 "---",
1215 "",
1216 "# Title",
1217 "",
1218 "## Events",
1219 "",
1220 "### Bonnick dismissal",
1221 "- occurred_at: 2024-12-24",
1222 "- type: termination",
1223 "",
1224 ]
1225 .join("\n");
1226
1227 let case = parse(&input).unwrap_or_else(|errs| {
1228 panic!(
1229 "parse failed: {}",
1230 errs.iter()
1231 .map(ToString::to_string)
1232 .collect::<Vec<_>>()
1233 .join("; ")
1234 );
1235 });
1236
1237 assert_eq!(case.sections.len(), 1);
1238 let body = &case.sections[0].body;
1239 assert!(body.contains("### Bonnick dismissal"));
1240 assert!(body.contains("- occurred_at: 2024-12-24"));
1241 }
1242
1243 #[test]
1244 fn parse_rejects_people_section_in_case_file() {
1245 let input = [
1246 "---",
1247 "sources: []",
1248 "---",
1249 "",
1250 "# Title",
1251 "",
1252 "## People",
1253 "",
1254 ]
1255 .join("\n");
1256 let errs = parse(&input).unwrap_err();
1257 assert!(
1258 errs.iter()
1259 .any(|e| e.message.contains("not allowed in case files"))
1260 );
1261 }
1262
1263 #[test]
1264 fn parse_rejects_organizations_section_in_case_file() {
1265 let input = [
1266 "---",
1267 "sources: []",
1268 "---",
1269 "",
1270 "# Title",
1271 "",
1272 "## Organizations",
1273 "",
1274 ]
1275 .join("\n");
1276 let errs = parse(&input).unwrap_err();
1277 assert!(
1278 errs.iter()
1279 .any(|e| e.message.contains("not allowed in case files"))
1280 );
1281 }
1282
1283 #[test]
1284 fn parse_entity_file_with_id() {
1285 let input = [
1286 "---",
1287 "id: 01JXYZ123456789ABCDEFGHIJK",
1288 "---",
1289 "",
1290 "# Mark Bonnick",
1291 "",
1292 "- qualifier: Arsenal Kit Manager",
1293 "- nationality: British",
1294 "",
1295 ]
1296 .join("\n");
1297
1298 let result = parse_entity_file(&input).unwrap();
1299 assert_eq!(result.id.as_deref(), Some("01JXYZ123456789ABCDEFGHIJK"));
1300 assert_eq!(result.name, "Mark Bonnick");
1301 assert!(result.body.contains("- qualifier: Arsenal Kit Manager"));
1302 assert!(result.body.contains("- nationality: British"));
1303 }
1304
1305 #[test]
1306 fn parse_entity_file_without_id() {
1307 let input = [
1308 "---",
1309 "---",
1310 "",
1311 "# Arsenal FC",
1312 "",
1313 "- qualifier: English Football Club",
1314 "- org_type: sports_club",
1315 "",
1316 ]
1317 .join("\n");
1318
1319 let result = parse_entity_file(&input).unwrap();
1320 assert!(result.id.is_none());
1321 assert_eq!(result.name, "Arsenal FC");
1322 }
1323
1324 #[test]
1325 fn parse_entity_file_no_front_matter() {
1326 let input = ["# Bob Smith", "", "- nationality: Dutch", ""].join("\n");
1327
1328 let result = parse_entity_file(&input).unwrap();
1329 assert!(result.id.is_none());
1330 assert_eq!(result.name, "Bob Smith");
1331 assert!(result.body.contains("- nationality: Dutch"));
1332 }
1333
1334 #[test]
1335 fn parse_entity_file_rejects_h2_sections() {
1336 let input = [
1337 "---",
1338 "---",
1339 "",
1340 "# Test Entity",
1341 "",
1342 "## Relationships",
1343 "",
1344 ]
1345 .join("\n");
1346
1347 let errs = parse_entity_file(&input).unwrap_err();
1348 assert!(errs.iter().any(|e| e.message.contains("H2 sections")));
1349 }
1350
1351 #[test]
1352 fn parse_entity_file_missing_h1() {
1353 let input = ["---", "---", "", "- nationality: Dutch", ""].join("\n");
1354
1355 let errs = parse_entity_file(&input).unwrap_err();
1356 assert!(errs.iter().any(|e| e.message.contains("missing H1")));
1357 }
1358
1359 #[test]
1360 fn parse_related_cases_section() {
1361 let input = [
1362 "---",
1363 "tags: [bribery]",
1364 "sources:",
1365 " - https://example.com",
1366 "---",
1367 "",
1368 "# Test Case",
1369 "",
1370 "Summary text.",
1371 "",
1372 "## Related Cases",
1373 "",
1374 "- id/corruption/2002/blbi-liquidity-aid-scandal",
1375 " description: Artalyta bribed Urip to influence the BLBI investigation",
1376 "- id/corruption/2008/another-case",
1377 " description: A second related case",
1378 ]
1379 .join("\n");
1380
1381 let case = parse(&input).unwrap_or_else(|errs| {
1382 panic!(
1383 "parse failed: {}",
1384 errs.iter()
1385 .map(ToString::to_string)
1386 .collect::<Vec<_>>()
1387 .join("; ")
1388 );
1389 });
1390
1391 assert_eq!(case.related_cases.len(), 2);
1392 assert_eq!(
1393 case.related_cases[0].case_path,
1394 "id/corruption/2002/blbi-liquidity-aid-scandal"
1395 );
1396 assert_eq!(
1397 case.related_cases[0].description,
1398 "Artalyta bribed Urip to influence the BLBI investigation"
1399 );
1400 assert_eq!(
1401 case.related_cases[1].case_path,
1402 "id/corruption/2008/another-case"
1403 );
1404 assert_eq!(case.related_cases[1].description, "A second related case");
1405 assert!(
1407 !case
1408 .sections
1409 .iter()
1410 .any(|s| s.kind == SectionKind::RelatedCases)
1411 );
1412 }
1413
1414 #[test]
1415 fn parse_related_cases_empty_path() {
1416 let input = [
1417 "---",
1418 "sources: []",
1419 "---",
1420 "",
1421 "# Title",
1422 "",
1423 "## Related Cases",
1424 "",
1425 "- ",
1426 " description: Some description",
1427 ]
1428 .join("\n");
1429
1430 let errs = parse(&input).unwrap_err();
1431 assert!(
1432 errs.iter()
1433 .any(|e| e.message.contains("case path must not be empty"))
1434 );
1435 }
1436
1437 #[test]
1438 fn parse_related_cases_missing_description() {
1439 let input = [
1440 "---",
1441 "sources: []",
1442 "---",
1443 "",
1444 "# Title",
1445 "",
1446 "## Related Cases",
1447 "",
1448 "- id/corruption/2002/some-case",
1449 ]
1450 .join("\n");
1451
1452 let errs = parse(&input).unwrap_err();
1453 assert!(errs.iter().any(|e| e.message.contains("description")));
1454 }
1455
1456 #[test]
1457 fn parse_related_cases_description_too_long() {
1458 let long_desc = "x".repeat(501);
1459 let input = [
1460 "---",
1461 "sources: []",
1462 "---",
1463 "",
1464 "# Title",
1465 "",
1466 "## Related Cases",
1467 "",
1468 "- id/corruption/2002/some-case",
1469 &format!(" description: {long_desc}"),
1470 ]
1471 .join("\n");
1472
1473 let errs = parse(&input).unwrap_err();
1474 assert!(errs.iter().any(|e| e.message.contains("exceeds 500")));
1475 }
1476
1477 #[test]
1478 fn parse_related_cases_too_many() {
1479 let mut lines = vec![
1480 "---".to_string(),
1481 "sources: []".to_string(),
1482 "---".to_string(),
1483 String::new(),
1484 "# Title".to_string(),
1485 String::new(),
1486 "## Related Cases".to_string(),
1487 String::new(),
1488 ];
1489 for i in 0..11 {
1490 lines.push(format!("- id/corruption/2002/case-{i}"));
1491 lines.push(format!(" description: Description {i}"));
1492 }
1493 let input = lines.join("\n");
1494
1495 let errs = parse(&input).unwrap_err();
1496 assert!(errs.iter().any(|e| e.message.contains("exceeds 10")));
1497 }
1498}