1#![allow(clippy::module_name_repetitions)]
2
3use std::fmt;
4
5use serde::{Deserialize, Serialize};
6
7const MAX_CASE_ID_LEN: usize = 26;
9
10const MAX_SOURCES: usize = 20;
12
13const MAX_TITLE_LEN: usize = 200;
15
16const MAX_SUMMARY_LEN: usize = 2000;
18
19const KNOWN_CASE_SECTIONS: &[&str] = &[
23 "Events",
24 "Documents",
25 "Assets",
26 "Relationships",
27 "Timeline",
28 "Related Cases",
29];
30
31#[derive(Debug)]
33pub struct ParsedCase {
34 pub id: Option<String>,
36 pub sources: Vec<SourceEntry>,
37 pub title: String,
38 pub summary: String,
39 pub sections: Vec<Section>,
40 pub case_type: Option<String>,
42 pub status: Option<String>,
44 pub amounts: Option<String>,
46 pub tags: Vec<String>,
48 pub related_cases: Vec<RelatedCase>,
50 pub involved: Vec<InvolvedEntry>,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
56pub struct RelatedCase {
57 pub case_path: String,
59 pub description: String,
61 #[serde(skip_serializing_if = "Option::is_none")]
63 pub id: Option<String>,
64 #[serde(skip)]
66 pub line: usize,
67}
68
69#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct InvolvedEntry {
72 pub entity_name: String,
74 pub id: Option<String>,
76 pub line: usize,
78}
79
80#[derive(Debug)]
82pub struct Section {
83 pub kind: SectionKind,
84 pub body: String,
85 pub line: usize,
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91pub enum SectionKind {
92 People,
93 Organizations,
94 Events,
95 Documents,
96 Assets,
97 Relationships,
98 Timeline,
99 RelatedCases,
100 Involved,
101}
102
103impl SectionKind {
104 fn from_heading(heading: &str) -> Option<Self> {
105 match heading.trim() {
106 s if s.eq_ignore_ascii_case("People") => Some(Self::People),
107 s if s.eq_ignore_ascii_case("Organizations") => Some(Self::Organizations),
108 s if s.eq_ignore_ascii_case("Events") => Some(Self::Events),
109 s if s.eq_ignore_ascii_case("Documents") => Some(Self::Documents),
110 s if s.eq_ignore_ascii_case("Assets") => Some(Self::Assets),
111 s if s.eq_ignore_ascii_case("Relationships") => Some(Self::Relationships),
112 s if s.eq_ignore_ascii_case("Timeline") => Some(Self::Timeline),
113 s if s.eq_ignore_ascii_case("Related Cases") => Some(Self::RelatedCases),
114 s if s.eq_ignore_ascii_case("Involved") => Some(Self::Involved),
115 _ => None,
116 }
117 }
118
119 pub fn is_case_section(self) -> bool {
122 matches!(
123 self,
124 Self::Events
125 | Self::Documents
126 | Self::Assets
127 | Self::Relationships
128 | Self::Timeline
129 | Self::RelatedCases
130 | Self::Involved
131 )
132 }
133}
134
135#[derive(Debug)]
137pub struct ParseError {
138 pub line: usize,
139 pub message: String,
140}
141
142impl fmt::Display for ParseError {
143 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144 write!(f, "line {}: {}", self.line, self.message)
145 }
146}
147
148const MAX_CASE_TAGS: usize = 10;
150
151const MAX_ENTITY_TAGS: usize = 5;
153
154const MAX_TAG_LEN: usize = 50;
156
157const MAX_RELATED_CASES: usize = 10;
159
160const MAX_RELATED_DESCRIPTION_LEN: usize = 500;
162
163pub fn parse_related_cases(
168 body: &str,
169 section_start_line: usize,
170 errors: &mut Vec<ParseError>,
171) -> Vec<RelatedCase> {
172 let mut entries: Vec<(String, String, Option<String>, usize)> = Vec::new(); for (offset, line) in body.lines().enumerate() {
175 let file_line = section_start_line + offset + 1;
176
177 if let Some(rest) = line.strip_prefix("- ") {
178 let case_path = rest.trim().to_string();
179 entries.push((case_path, String::new(), None, file_line));
180 } else if let Some(rest) = line.strip_prefix(" description: ") {
181 if let Some(entry) = entries.last_mut() {
182 entry.1 = rest.trim().to_string();
183 } else {
184 errors.push(ParseError {
185 line: file_line,
186 message: "description without a preceding case path".into(),
187 });
188 }
189 } else if let Some(rest) = line.strip_prefix(" id: ") {
190 if let Some(entry) = entries.last_mut() {
191 entry.2 = Some(rest.trim().to_string());
192 } else {
193 errors.push(ParseError {
194 line: file_line,
195 message: "id without a preceding case path".into(),
196 });
197 }
198 } else if !line.trim().is_empty() {
199 errors.push(ParseError {
200 line: file_line,
201 message: format!("unexpected line in Related Cases: {line}"),
202 });
203 }
204 }
205
206 if entries.len() > MAX_RELATED_CASES {
207 errors.push(ParseError {
208 line: section_start_line,
209 message: format!(
210 "Related Cases exceeds {MAX_RELATED_CASES} entries (got {})",
211 entries.len()
212 ),
213 });
214 }
215
216 let mut result = Vec::new();
217 for (case_path, description, id, line) in entries {
218 if case_path.is_empty() {
219 errors.push(ParseError {
220 line,
221 message: "related case path must not be empty".into(),
222 });
223 continue;
224 }
225 if description.is_empty() {
226 errors.push(ParseError {
227 line,
228 message: format!("related case {case_path:?} missing description"),
229 });
230 continue;
231 }
232 if description.len() > MAX_RELATED_DESCRIPTION_LEN {
233 errors.push(ParseError {
234 line,
235 message: format!(
236 "related case description exceeds {MAX_RELATED_DESCRIPTION_LEN} chars (got {})",
237 description.len()
238 ),
239 });
240 continue;
241 }
242 result.push(RelatedCase {
243 case_path,
244 description,
245 id,
246 line,
247 });
248 }
249
250 result
251}
252
253const MAX_INVOLVED: usize = 50;
255
256pub fn parse_involved(
264 body: &str,
265 section_start_line: usize,
266 errors: &mut Vec<ParseError>,
267) -> Vec<InvolvedEntry> {
268 let mut entries = Vec::new();
269 let lines: Vec<&str> = body.lines().collect();
270
271 let mut i = 0;
272 while i < lines.len() {
273 let file_line = section_start_line + 1 + i;
274 let trimmed = lines[i].trim();
275
276 if trimmed.is_empty() {
277 i += 1;
278 continue;
279 }
280
281 let Some(name) = trimmed.strip_prefix("- ") else {
282 errors.push(ParseError {
283 line: file_line,
284 message: format!("expected involved entry `- Entity Name`, got {trimmed:?}"),
285 });
286 i += 1;
287 continue;
288 };
289
290 let entity_name = name.trim().to_string();
291 if entity_name.is_empty() {
292 errors.push(ParseError {
293 line: file_line,
294 message: "involved entity name must not be empty".into(),
295 });
296 i += 1;
297 continue;
298 }
299
300 let mut id: Option<String> = None;
302 if i + 1 < lines.len() {
303 let next = lines[i + 1].trim();
304 if let Some(id_val) = next.strip_prefix("id: ") {
305 id = Some(id_val.trim().to_string());
306 i += 1;
307 }
308 }
309
310 entries.push(InvolvedEntry {
311 entity_name,
312 id,
313 line: file_line,
314 });
315
316 i += 1;
317 }
318
319 if entries.len() > MAX_INVOLVED {
320 errors.push(ParseError {
321 line: section_start_line,
322 message: format!(
323 "Involved exceeds {MAX_INVOLVED} entries (got {})",
324 entries.len()
325 ),
326 });
327 }
328
329 entries
330}
331
332#[derive(Deserialize)]
334struct FrontMatter {
335 #[serde(default)]
337 id: Option<String>,
338 #[serde(default)]
339 sources: Vec<SourceEntry>,
340 #[serde(default)]
341 case_type: Option<String>,
342 #[serde(default)]
343 status: Option<String>,
344 #[serde(default)]
345 amounts: Option<String>,
346 #[serde(default)]
347 tags: Vec<String>,
348}
349
350#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
353#[serde(untagged)]
354pub enum SourceEntry {
355 Url(String),
357 Structured {
359 url: String,
360 #[serde(default)]
361 title: Option<String>,
362 #[serde(default)]
363 published_at: Option<String>,
364 #[serde(default)]
365 language: Option<String>,
366 },
367}
368
369impl SourceEntry {
370 pub fn url(&self) -> &str {
372 match self {
373 Self::Url(u) => u,
374 Self::Structured { url, .. } => url,
375 }
376 }
377}
378
379#[derive(Deserialize)]
382struct EntityFrontMatter {
383 #[serde(default)]
384 id: Option<String>,
385 #[serde(default)]
386 tags: Vec<String>,
387}
388
389#[derive(Debug)]
391pub struct ParsedEntityFile {
392 pub id: Option<String>,
394 pub name: String,
396 pub body: String,
398 pub title_line: usize,
400 pub tags: Vec<String>,
402}
403
404pub fn parse(input: &str) -> Result<ParsedCase, Vec<ParseError>> {
409 let mut errors = Vec::new();
410
411 let (front_matter, body_start_line, body) = extract_front_matter(input, &mut errors);
413
414 let Some(front_matter) = front_matter else {
415 if errors.is_empty() {
416 errors.push(ParseError {
417 line: 1,
418 message: "missing YAML front matter (expected `---` delimiter)".into(),
419 });
420 }
421 return Err(errors);
422 };
423
424 validate_front_matter(&front_matter, &mut errors);
426
427 let (title, summary, mut sections) = extract_body(&body, body_start_line, &mut errors);
429
430 let mut related_cases = Vec::new();
432 for section in §ions {
433 if section.kind == SectionKind::RelatedCases {
434 let entries = parse_related_cases(§ion.body, section.line, &mut errors);
435 related_cases.extend(entries);
436 }
437 }
438 sections.retain(|s| s.kind != SectionKind::RelatedCases);
440
441 let mut involved = Vec::new();
443 for section in §ions {
444 if section.kind == SectionKind::Involved {
445 let entries = parse_involved(§ion.body, section.line, &mut errors);
446 involved.extend(entries);
447 }
448 }
449 sections.retain(|s| s.kind != SectionKind::Involved);
451
452 if !errors.is_empty() {
453 return Err(errors);
454 }
455
456 Ok(ParsedCase {
457 id: front_matter.id,
458 sources: front_matter.sources,
459 title,
460 summary,
461 sections,
462 case_type: front_matter.case_type,
463 status: front_matter.status,
464 amounts: front_matter.amounts,
465 tags: front_matter.tags,
466 related_cases,
467 involved,
468 })
469}
470
471pub fn parse_entity_file(input: &str) -> Result<ParsedEntityFile, Vec<ParseError>> {
476 let mut errors = Vec::new();
477
478 let (front_matter, body_start_line, body) = extract_entity_front_matter(input, &mut errors);
479
480 let id = front_matter.as_ref().and_then(|fm| fm.id.clone());
481 let tags = front_matter.map_or_else(Vec::new, |fm| fm.tags);
482
483 if tags.len() > MAX_ENTITY_TAGS {
485 errors.push(ParseError {
486 line: 2,
487 message: format!(
488 "front matter `tags` exceeds {MAX_ENTITY_TAGS} entries (got {})",
489 tags.len()
490 ),
491 });
492 }
493 for (i, tag) in tags.iter().enumerate() {
494 if tag.len() > MAX_TAG_LEN {
495 errors.push(ParseError {
496 line: 2,
497 message: format!("front matter tag #{} exceeds {MAX_TAG_LEN} chars", i + 1),
498 });
499 }
500 if tag.is_empty() {
501 errors.push(ParseError {
502 line: 2,
503 message: format!("front matter tag #{} is empty", i + 1),
504 });
505 }
506 }
507
508 let (name, title_line, field_body) = extract_entity_body(&body, body_start_line, &mut errors);
510
511 if !errors.is_empty() {
512 return Err(errors);
513 }
514
515 Ok(ParsedEntityFile {
516 id,
517 name,
518 body: field_body,
519 title_line,
520 tags,
521 })
522}
523
524fn extract_entity_front_matter(
527 input: &str,
528 errors: &mut Vec<ParseError>,
529) -> (Option<EntityFrontMatter>, usize, String) {
530 let lines: Vec<&str> = input.lines().collect();
531
532 let first_delim = lines.iter().position(|l| l.trim() == "---");
533 if first_delim != Some(0) {
534 return (None, 1, input.to_string());
536 }
537
538 let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
539 let Some(close_offset) = close_delim else {
540 errors.push(ParseError {
541 line: 1,
542 message: "unclosed YAML front matter (missing closing `---`)".into(),
543 });
544 return (None, 1, String::new());
545 };
546
547 let close_line = close_offset + 1;
548 let yaml_str: String = lines[1..close_line].join("\n");
549 let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
551
552 match serde_yaml::from_str::<EntityFrontMatter>(&yaml_str) {
553 Ok(fm) => (Some(fm), body_start_line, body),
554 Err(e) => {
555 errors.push(ParseError {
556 line: 2,
557 message: format!("invalid YAML front matter: {e}"),
558 });
559 (None, body_start_line, body)
560 }
561 }
562}
563
564fn extract_entity_body(
567 body: &str,
568 body_start_line: usize,
569 errors: &mut Vec<ParseError>,
570) -> (String, usize, String) {
571 let lines: Vec<&str> = body.lines().collect();
572 let mut name = String::new();
573 let mut title_found = false;
574 let mut title_line = body_start_line;
575 let mut field_lines: Vec<&str> = Vec::new();
576
577 for (i, line) in lines.iter().enumerate() {
578 let file_line = body_start_line + i;
579
580 if let Some(heading) = strip_heading(line, 1) {
581 if title_found {
582 errors.push(ParseError {
583 line: file_line,
584 message: "multiple H1 headings found (expected exactly one)".into(),
585 });
586 continue;
587 }
588 name = heading.to_string();
589 title_found = true;
590 title_line = file_line;
591 continue;
592 }
593
594 if strip_heading(line, 2).is_some() {
596 errors.push(ParseError {
597 line: file_line,
598 message: "H2 sections are not allowed in entity files".into(),
599 });
600 continue;
601 }
602
603 if title_found {
604 field_lines.push(line);
605 } else if !line.trim().is_empty() {
606 errors.push(ParseError {
607 line: file_line,
608 message: "expected H1 heading (# Name)".into(),
609 });
610 }
611 }
612
613 if !title_found {
614 errors.push(ParseError {
615 line: body_start_line,
616 message: "missing H1 heading".into(),
617 });
618 } else if name.len() > MAX_TITLE_LEN {
619 errors.push(ParseError {
620 line: title_line,
621 message: format!("H1 name exceeds {MAX_TITLE_LEN} chars (got {})", name.len()),
622 });
623 }
624
625 (name, title_line, field_lines.join("\n"))
626}
627
628fn extract_front_matter(
632 input: &str,
633 errors: &mut Vec<ParseError>,
634) -> (Option<FrontMatter>, usize, String) {
635 let lines: Vec<&str> = input.lines().collect();
636
637 let first_delim = lines.iter().position(|l| l.trim() == "---");
639 if first_delim != Some(0) {
640 errors.push(ParseError {
641 line: 1,
642 message: "missing YAML front matter (expected `---` on first line)".into(),
643 });
644 return (None, 1, input.to_string());
645 }
646
647 let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
649 let Some(close_offset) = close_delim else {
650 errors.push(ParseError {
651 line: 1,
652 message: "unclosed YAML front matter (missing closing `---`)".into(),
653 });
654 return (None, 1, String::new());
655 };
656
657 let close_line = close_offset + 1; let yaml_str: String = lines[1..close_line].join("\n");
659 let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
661
662 match serde_yaml::from_str::<FrontMatter>(&yaml_str) {
663 Ok(fm) => (Some(fm), body_start_line, body),
664 Err(e) => {
665 errors.push(ParseError {
666 line: 2,
667 message: format!("invalid YAML front matter: {e}"),
668 });
669 (None, body_start_line, body)
670 }
671 }
672}
673
674fn validate_front_matter(fm: &FrontMatter, errors: &mut Vec<ParseError>) {
675 if let Some(id) = &fm.id
677 && id.len() != MAX_CASE_ID_LEN
678 {
679 errors.push(ParseError {
680 line: 2,
681 message: format!(
682 "front matter `id` must be a {MAX_CASE_ID_LEN}-char NULID, got {} chars",
683 id.len()
684 ),
685 });
686 }
687
688 if fm.sources.len() > MAX_SOURCES {
690 errors.push(ParseError {
691 line: 2,
692 message: format!(
693 "front matter `sources` exceeds {MAX_SOURCES} entries (got {})",
694 fm.sources.len()
695 ),
696 });
697 }
698
699 for (i, source) in fm.sources.iter().enumerate() {
701 if !source.url().starts_with("https://") {
702 errors.push(ParseError {
703 line: 2,
704 message: format!("source[{i}] must be HTTPS, got {:?}", source.url()),
705 });
706 }
707 }
708
709 if let Some(ct) = &fm.case_type {
711 use crate::domain::CaseType;
712 let normalized = ct.to_lowercase().replace(' ', "_");
713 if !CaseType::KNOWN.contains(&normalized.as_str())
714 && crate::domain::parse_custom(ct).is_none()
715 {
716 errors.push(ParseError {
717 line: 2,
718 message: format!(
719 "invalid case_type {:?} (known: {}; use \"custom:Value\" for custom)",
720 ct,
721 CaseType::KNOWN.join(", ")
722 ),
723 });
724 }
725 }
726
727 if let Some(st) = &fm.status {
729 use crate::domain::CaseStatus;
730 let normalized = st.to_lowercase().replace(' ', "_");
731 if !CaseStatus::KNOWN.contains(&normalized.as_str()) {
732 errors.push(ParseError {
733 line: 2,
734 message: format!(
735 "invalid status {:?} (known: {})",
736 st,
737 CaseStatus::KNOWN.join(", ")
738 ),
739 });
740 }
741 }
742
743 if fm.tags.len() > MAX_CASE_TAGS {
745 errors.push(ParseError {
746 line: 2,
747 message: format!(
748 "front matter `tags` exceeds {MAX_CASE_TAGS} entries (got {})",
749 fm.tags.len()
750 ),
751 });
752 }
753 for (i, tag) in fm.tags.iter().enumerate() {
754 if tag.len() > MAX_TAG_LEN {
755 errors.push(ParseError {
756 line: 2,
757 message: format!("tag[{i}] exceeds {MAX_TAG_LEN} chars (got {})", tag.len()),
758 });
759 }
760 if tag.is_empty() {
761 errors.push(ParseError {
762 line: 2,
763 message: format!("tag[{i}] must not be empty"),
764 });
765 }
766 }
767}
768
769#[allow(clippy::too_many_lines)]
771fn extract_body(
772 body: &str,
773 body_start_line: usize,
774 errors: &mut Vec<ParseError>,
775) -> (String, String, Vec<Section>) {
776 let lines: Vec<&str> = body.lines().collect();
777 let mut title = String::new();
778 let mut title_found = false;
779 let mut summary_lines: Vec<&str> = Vec::new();
780 let mut sections: Vec<Section> = Vec::new();
781
782 let mut current_section_kind: Option<SectionKind> = None;
784 let mut current_section_line: usize = 0;
785 let mut current_section_body: Vec<&str> = Vec::new();
786
787 let mut state = State::BeforeTitle;
789
790 for (i, line) in lines.iter().enumerate() {
791 let file_line = body_start_line + i; if let Some(heading) = strip_heading(line, 1) {
794 if title_found {
795 errors.push(ParseError {
796 line: file_line,
797 message: "multiple H1 headings found (expected exactly one)".into(),
798 });
799 continue;
800 }
801 title = heading.to_string();
802 title_found = true;
803 state = State::Summary;
804 continue;
805 }
806
807 if let Some(heading) = strip_heading(line, 2) {
808 if let Some(kind) = current_section_kind.take() {
810 sections.push(Section {
811 kind,
812 body: current_section_body.join("\n"),
813 line: current_section_line,
814 });
815 current_section_body.clear();
816 }
817
818 match SectionKind::from_heading(heading) {
819 Some(kind) if kind.is_case_section() => {
820 if sections.iter().any(|s| s.kind == kind) {
822 errors.push(ParseError {
823 line: file_line,
824 message: format!("duplicate section: ## {heading}"),
825 });
826 }
827 current_section_kind = Some(kind);
828 current_section_line = file_line;
829 state = State::InSection;
830 }
831 Some(_) => {
832 errors.push(ParseError {
834 line: file_line,
835 message: format!(
836 "## {heading} is not allowed in case files (use standalone entity files in people/ or organizations/ instead)"
837 ),
838 });
839 }
840 None => {
841 errors.push(ParseError {
842 line: file_line,
843 message: format!(
844 "unknown section: ## {heading} (expected one of: {})",
845 KNOWN_CASE_SECTIONS.join(", ")
846 ),
847 });
848 }
849 }
850 continue;
851 }
852
853 match state {
854 State::BeforeTitle => {
855 if !line.trim().is_empty() {
857 errors.push(ParseError {
858 line: file_line,
859 message: "expected H1 title (# Title)".into(),
860 });
861 }
862 }
863 State::Summary => {
864 summary_lines.push(line);
865 }
866 State::InSection => {
867 current_section_body.push(line);
868 }
869 }
870 }
871
872 if let Some(kind) = current_section_kind.take() {
874 sections.push(Section {
875 kind,
876 body: current_section_body.join("\n"),
877 line: current_section_line,
878 });
879 }
880
881 if !title_found {
883 errors.push(ParseError {
884 line: body_start_line,
885 message: "missing H1 title".into(),
886 });
887 } else if title.len() > MAX_TITLE_LEN {
888 errors.push(ParseError {
889 line: body_start_line,
890 message: format!(
891 "H1 title exceeds {MAX_TITLE_LEN} chars (got {})",
892 title.len()
893 ),
894 });
895 }
896
897 let summary = summary_lines.clone().join("\n").trim().to_string();
899
900 if summary.len() > MAX_SUMMARY_LEN {
901 errors.push(ParseError {
902 line: body_start_line,
903 message: format!(
904 "summary exceeds {MAX_SUMMARY_LEN} chars (got {})",
905 summary.len()
906 ),
907 });
908 }
909
910 (title, summary, sections)
911}
912
913#[derive(Clone, Copy)]
914enum State {
915 BeforeTitle,
916 Summary,
917 InSection,
918}
919
920fn strip_heading(line: &str, level: usize) -> Option<&str> {
923 let prefix = "#".repeat(level);
924 let trimmed = line.trim_start();
925 if trimmed.starts_with(&prefix) {
926 let after = &trimmed[prefix.len()..];
927 if after.is_empty() {
929 return Some("");
930 }
931 if after.starts_with(' ') && !after.starts_with(" #") {
932 return Some(after[1..].trim());
934 }
935 if after.starts_with('#') {
937 return None;
938 }
939 }
940 None
941}
942
943#[cfg(test)]
944mod tests {
945 use super::*;
946
947 fn minimal_case() -> String {
948 [
949 "---",
950 "id: 01H9XT7H1J3929RK32FWSRKV88",
951 "sources:",
952 " - https://example.com/source",
953 "---",
954 "",
955 "# Test Case Title",
956 "",
957 "This is the summary.",
958 "",
959 "## Events",
960 "",
961 "### Something happened",
962 "- occurred_at: 2025-01-01",
963 "",
964 "## Relationships",
965 "",
966 "- Something happened -> Something happened: associate_of",
967 ]
968 .join("\n")
969 }
970
971 #[test]
972 fn parse_minimal_case() {
973 let result = parse(&minimal_case());
974 let case = result.unwrap_or_else(|errs| {
975 panic!(
976 "parse failed: {}",
977 errs.iter()
978 .map(ToString::to_string)
979 .collect::<Vec<_>>()
980 .join("; ")
981 );
982 });
983
984 assert_eq!(case.id.as_deref(), Some("01H9XT7H1J3929RK32FWSRKV88"));
985 assert_eq!(case.sources.len(), 1);
986 assert_eq!(case.sources[0].url(), "https://example.com/source");
987 assert_eq!(case.title, "Test Case Title");
988 assert_eq!(case.summary, "This is the summary.");
989 assert_eq!(case.sections.len(), 2);
990 assert_eq!(case.sections[0].kind, SectionKind::Events);
991 assert_eq!(case.sections[1].kind, SectionKind::Relationships);
992 }
993
994 #[test]
995 fn parse_missing_front_matter() {
996 let input = "# Title\n\nSummary.\n";
997 let errs = parse(input).unwrap_err();
998 assert!(errs.iter().any(|e| e.message.contains("front matter")));
999 }
1000
1001 #[test]
1002 fn parse_unclosed_front_matter() {
1003 let input = "---\nsources: []\n# Title\n";
1004 let errs = parse(input).unwrap_err();
1005 assert!(errs.iter().any(|e| e.message.contains("unclosed")));
1006 }
1007
1008 #[test]
1009 fn parse_invalid_case_id_wrong_length() {
1010 let input = "---\nid: short\nsources: []\n---\n\n# Title\n";
1011 let errs = parse(input).unwrap_err();
1012 assert!(errs.iter().any(|e| e.message.contains("NULID")));
1013 }
1014
1015 #[test]
1016 fn parse_case_id_absent_is_ok() {
1017 let input = "---\nsources:\n - https://example.com\n---\n\n# Title\n\nSummary.\n";
1018 let case = parse(input).unwrap();
1019 assert!(case.id.is_none());
1020 }
1021
1022 #[test]
1023 fn parse_non_https_source() {
1024 let input = "---\nsources:\n - http://example.com\n---\n\n# Title\n";
1025 let errs = parse(input).unwrap_err();
1026 assert!(errs.iter().any(|e| e.message.contains("HTTPS")));
1027 }
1028
1029 #[test]
1030 fn parse_too_many_sources() {
1031 let sources: Vec<String> = (0..21)
1032 .map(|i| format!(" - https://example.com/{i}"))
1033 .collect();
1034 let input = format!("---\nsources:\n{}\n---\n\n# Title\n", sources.join("\n"));
1035 let errs = parse(&input).unwrap_err();
1036 assert!(errs.iter().any(|e| e.message.contains("exceeds 20")));
1037 }
1038
1039 #[test]
1040 fn parse_unknown_section() {
1041 let input = [
1042 "---",
1043 "sources: []",
1044 "---",
1045 "",
1046 "# Title",
1047 "",
1048 "## Unknown Section",
1049 "",
1050 ]
1051 .join("\n");
1052 let errs = parse(&input).unwrap_err();
1053 assert!(errs.iter().any(|e| e.message.contains("unknown section")));
1054 }
1055
1056 #[test]
1057 fn parse_duplicate_section() {
1058 let input = [
1059 "---",
1060 "sources: []",
1061 "---",
1062 "",
1063 "# Title",
1064 "",
1065 "## Events",
1066 "",
1067 "## Events",
1068 "",
1069 ]
1070 .join("\n");
1071 let errs = parse(&input).unwrap_err();
1072 assert!(errs.iter().any(|e| e.message.contains("duplicate")));
1073 }
1074
1075 #[test]
1076 fn parse_multiple_h1() {
1077 let input = [
1078 "---",
1079 "sources: []",
1080 "---",
1081 "",
1082 "# First Title",
1083 "",
1084 "# Second Title",
1085 "",
1086 ]
1087 .join("\n");
1088 let errs = parse(&input).unwrap_err();
1089 assert!(errs.iter().any(|e| e.message.contains("multiple H1")));
1090 }
1091
1092 #[test]
1093 fn parse_all_sections() {
1094 let input = [
1095 "---",
1096 "id: 01H9XT7H1KRQ9SJ7SD9ETB5CVQ",
1097 "sources:",
1098 " - https://example.com/a",
1099 "---",
1100 "",
1101 "# Full Case",
1102 "",
1103 "Summary text here.",
1104 "",
1105 "## Events",
1106 "",
1107 "### Something happened",
1108 "- occurred_at: 2025-01-01",
1109 "",
1110 "## Relationships",
1111 "",
1112 "- Alice -> Corp Inc: employed_by",
1113 "",
1114 "## Timeline",
1115 "",
1116 "Something happened",
1117 ]
1118 .join("\n");
1119
1120 let case = parse(&input).unwrap_or_else(|errs| {
1121 panic!(
1122 "parse failed: {}",
1123 errs.iter()
1124 .map(ToString::to_string)
1125 .collect::<Vec<_>>()
1126 .join("; ")
1127 );
1128 });
1129
1130 assert_eq!(case.id.as_deref(), Some("01H9XT7H1KRQ9SJ7SD9ETB5CVQ"));
1131 assert_eq!(case.title, "Full Case");
1132 assert_eq!(case.summary, "Summary text here.");
1133 assert_eq!(case.sections.len(), 3);
1134 assert_eq!(case.sections[0].kind, SectionKind::Events);
1135 assert_eq!(case.sections[1].kind, SectionKind::Relationships);
1136 assert_eq!(case.sections[2].kind, SectionKind::Timeline);
1137 }
1138
1139 #[test]
1140 fn parse_empty_summary() {
1141 let input = [
1142 "---",
1143 "sources: []",
1144 "---",
1145 "",
1146 "# Title",
1147 "",
1148 "## Events",
1149 "",
1150 ]
1151 .join("\n");
1152
1153 let case = parse(&input).unwrap_or_else(|errs| {
1154 panic!(
1155 "parse failed: {}",
1156 errs.iter()
1157 .map(ToString::to_string)
1158 .collect::<Vec<_>>()
1159 .join("; ")
1160 );
1161 });
1162 assert_eq!(case.summary, "");
1163 }
1164
1165 #[test]
1166 fn parse_multiline_summary() {
1167 let input = [
1168 "---",
1169 "sources: []",
1170 "---",
1171 "",
1172 "# Title",
1173 "",
1174 "First line of summary.",
1175 "Second line of summary.",
1176 "",
1177 "## Events",
1178 "",
1179 ]
1180 .join("\n");
1181
1182 let case = parse(&input).unwrap_or_else(|errs| {
1183 panic!(
1184 "parse failed: {}",
1185 errs.iter()
1186 .map(ToString::to_string)
1187 .collect::<Vec<_>>()
1188 .join("; ")
1189 );
1190 });
1191 assert_eq!(
1192 case.summary,
1193 "First line of summary.\nSecond line of summary."
1194 );
1195 }
1196
1197 #[test]
1198 fn strip_heading_levels() {
1199 assert_eq!(strip_heading("# Title", 1), Some("Title"));
1200 assert_eq!(strip_heading("## Section", 2), Some("Section"));
1201 assert_eq!(strip_heading("### Entity", 3), Some("Entity"));
1202 assert_eq!(strip_heading("### Entity", 2), None);
1204 assert_eq!(strip_heading("## Section", 1), None);
1206 assert_eq!(strip_heading("Normal text", 1), None);
1208 }
1209
1210 #[test]
1211 fn section_body_content() {
1212 let input = [
1213 "---",
1214 "sources: []",
1215 "---",
1216 "",
1217 "# Title",
1218 "",
1219 "## Events",
1220 "",
1221 "### Bonnick dismissal",
1222 "- occurred_at: 2024-12-24",
1223 "- type: termination",
1224 "",
1225 ]
1226 .join("\n");
1227
1228 let case = parse(&input).unwrap_or_else(|errs| {
1229 panic!(
1230 "parse failed: {}",
1231 errs.iter()
1232 .map(ToString::to_string)
1233 .collect::<Vec<_>>()
1234 .join("; ")
1235 );
1236 });
1237
1238 assert_eq!(case.sections.len(), 1);
1239 let body = &case.sections[0].body;
1240 assert!(body.contains("### Bonnick dismissal"));
1241 assert!(body.contains("- occurred_at: 2024-12-24"));
1242 }
1243
1244 #[test]
1245 fn parse_rejects_people_section_in_case_file() {
1246 let input = [
1247 "---",
1248 "sources: []",
1249 "---",
1250 "",
1251 "# Title",
1252 "",
1253 "## People",
1254 "",
1255 ]
1256 .join("\n");
1257 let errs = parse(&input).unwrap_err();
1258 assert!(
1259 errs.iter()
1260 .any(|e| e.message.contains("not allowed in case files"))
1261 );
1262 }
1263
1264 #[test]
1265 fn parse_rejects_organizations_section_in_case_file() {
1266 let input = [
1267 "---",
1268 "sources: []",
1269 "---",
1270 "",
1271 "# Title",
1272 "",
1273 "## Organizations",
1274 "",
1275 ]
1276 .join("\n");
1277 let errs = parse(&input).unwrap_err();
1278 assert!(
1279 errs.iter()
1280 .any(|e| e.message.contains("not allowed in case files"))
1281 );
1282 }
1283
1284 #[test]
1285 fn parse_entity_file_with_id() {
1286 let input = [
1287 "---",
1288 "id: 01JXYZ123456789ABCDEFGHIJK",
1289 "---",
1290 "",
1291 "# Mark Bonnick",
1292 "",
1293 "- qualifier: Arsenal Kit Manager",
1294 "- nationality: British",
1295 "",
1296 ]
1297 .join("\n");
1298
1299 let result = parse_entity_file(&input).unwrap();
1300 assert_eq!(result.id.as_deref(), Some("01JXYZ123456789ABCDEFGHIJK"));
1301 assert_eq!(result.name, "Mark Bonnick");
1302 assert!(result.body.contains("- qualifier: Arsenal Kit Manager"));
1303 assert!(result.body.contains("- nationality: British"));
1304 }
1305
1306 #[test]
1307 fn parse_entity_file_without_id() {
1308 let input = [
1309 "---",
1310 "---",
1311 "",
1312 "# Arsenal FC",
1313 "",
1314 "- qualifier: English Football Club",
1315 "- org_type: sports_club",
1316 "",
1317 ]
1318 .join("\n");
1319
1320 let result = parse_entity_file(&input).unwrap();
1321 assert!(result.id.is_none());
1322 assert_eq!(result.name, "Arsenal FC");
1323 }
1324
1325 #[test]
1326 fn parse_entity_file_no_front_matter() {
1327 let input = ["# Bob Smith", "", "- nationality: Dutch", ""].join("\n");
1328
1329 let result = parse_entity_file(&input).unwrap();
1330 assert!(result.id.is_none());
1331 assert_eq!(result.name, "Bob Smith");
1332 assert!(result.body.contains("- nationality: Dutch"));
1333 }
1334
1335 #[test]
1336 fn parse_entity_file_rejects_h2_sections() {
1337 let input = [
1338 "---",
1339 "---",
1340 "",
1341 "# Test Entity",
1342 "",
1343 "## Relationships",
1344 "",
1345 ]
1346 .join("\n");
1347
1348 let errs = parse_entity_file(&input).unwrap_err();
1349 assert!(errs.iter().any(|e| e.message.contains("H2 sections")));
1350 }
1351
1352 #[test]
1353 fn parse_entity_file_missing_h1() {
1354 let input = ["---", "---", "", "- nationality: Dutch", ""].join("\n");
1355
1356 let errs = parse_entity_file(&input).unwrap_err();
1357 assert!(errs.iter().any(|e| e.message.contains("missing H1")));
1358 }
1359
1360 #[test]
1361 fn parse_related_cases_section() {
1362 let input = [
1363 "---",
1364 "tags: [bribery]",
1365 "sources:",
1366 " - https://example.com",
1367 "---",
1368 "",
1369 "# Test Case",
1370 "",
1371 "Summary text.",
1372 "",
1373 "## Related Cases",
1374 "",
1375 "- id/corruption/2002/blbi-liquidity-aid-scandal",
1376 " description: Artalyta bribed Urip to influence the BLBI investigation",
1377 "- id/corruption/2008/another-case",
1378 " description: A second related case",
1379 ]
1380 .join("\n");
1381
1382 let case = parse(&input).unwrap_or_else(|errs| {
1383 panic!(
1384 "parse failed: {}",
1385 errs.iter()
1386 .map(ToString::to_string)
1387 .collect::<Vec<_>>()
1388 .join("; ")
1389 );
1390 });
1391
1392 assert_eq!(case.related_cases.len(), 2);
1393 assert_eq!(
1394 case.related_cases[0].case_path,
1395 "id/corruption/2002/blbi-liquidity-aid-scandal"
1396 );
1397 assert_eq!(
1398 case.related_cases[0].description,
1399 "Artalyta bribed Urip to influence the BLBI investigation"
1400 );
1401 assert_eq!(
1402 case.related_cases[1].case_path,
1403 "id/corruption/2008/another-case"
1404 );
1405 assert_eq!(case.related_cases[1].description, "A second related case");
1406 assert!(
1408 !case
1409 .sections
1410 .iter()
1411 .any(|s| s.kind == SectionKind::RelatedCases)
1412 );
1413 }
1414
1415 #[test]
1416 fn parse_related_cases_empty_path() {
1417 let input = [
1418 "---",
1419 "sources: []",
1420 "---",
1421 "",
1422 "# Title",
1423 "",
1424 "## Related Cases",
1425 "",
1426 "- ",
1427 " description: Some description",
1428 ]
1429 .join("\n");
1430
1431 let errs = parse(&input).unwrap_err();
1432 assert!(
1433 errs.iter()
1434 .any(|e| e.message.contains("case path must not be empty"))
1435 );
1436 }
1437
1438 #[test]
1439 fn parse_related_cases_missing_description() {
1440 let input = [
1441 "---",
1442 "sources: []",
1443 "---",
1444 "",
1445 "# Title",
1446 "",
1447 "## Related Cases",
1448 "",
1449 "- id/corruption/2002/some-case",
1450 ]
1451 .join("\n");
1452
1453 let errs = parse(&input).unwrap_err();
1454 assert!(errs.iter().any(|e| e.message.contains("description")));
1455 }
1456
1457 #[test]
1458 fn parse_related_cases_description_too_long() {
1459 let long_desc = "x".repeat(501);
1460 let input = [
1461 "---",
1462 "sources: []",
1463 "---",
1464 "",
1465 "# Title",
1466 "",
1467 "## Related Cases",
1468 "",
1469 "- id/corruption/2002/some-case",
1470 &format!(" description: {long_desc}"),
1471 ]
1472 .join("\n");
1473
1474 let errs = parse(&input).unwrap_err();
1475 assert!(errs.iter().any(|e| e.message.contains("exceeds 500")));
1476 }
1477
1478 #[test]
1479 fn parse_related_cases_too_many() {
1480 let mut lines = vec![
1481 "---".to_string(),
1482 "sources: []".to_string(),
1483 "---".to_string(),
1484 String::new(),
1485 "# Title".to_string(),
1486 String::new(),
1487 "## Related Cases".to_string(),
1488 String::new(),
1489 ];
1490 for i in 0..11 {
1491 lines.push(format!("- id/corruption/2002/case-{i}"));
1492 lines.push(format!(" description: Description {i}"));
1493 }
1494 let input = lines.join("\n");
1495
1496 let errs = parse(&input).unwrap_err();
1497 assert!(errs.iter().any(|e| e.message.contains("exceeds 10")));
1498 }
1499}