Skip to main content

weave_content/
parser.rs

1#![allow(clippy::module_name_repetitions)]
2
3use std::fmt;
4
5use serde::Deserialize;
6
7/// Maximum length of a case ID (kebab-case identifier).
8const MAX_CASE_ID_LEN: usize = 60;
9
10/// Maximum number of sources in front matter.
11const MAX_SOURCES: usize = 20;
12
13/// Maximum length of the case title (H1).
14const MAX_TITLE_LEN: usize = 200;
15
16/// Maximum length of the case summary.
17const MAX_SUMMARY_LEN: usize = 2000;
18
19/// Known H2 section names for case files (case-insensitive match).
20/// Actors and Institutions are no longer allowed in case files -- they
21/// live in standalone entity files under `actors/` and `institutions/`.
22const KNOWN_CASE_SECTIONS: &[&str] = &["Events", "Relationships", "Timeline"];
23
24/// A parsed case file with front matter, title, summary, and raw sections.
25#[derive(Debug)]
26pub struct ParsedCase {
27    pub id: String,
28    pub sources: Vec<String>,
29    pub title: String,
30    pub summary: String,
31    pub sections: Vec<Section>,
32}
33
34/// A raw H2 section with its heading text and body content.
35#[derive(Debug)]
36pub struct Section {
37    pub kind: SectionKind,
38    pub body: String,
39    /// Line number (1-indexed) where the H2 heading appears in the original file.
40    pub line: usize,
41}
42
43/// The type of an H2 section, mapped from heading text.
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum SectionKind {
46    Actors,
47    Institutions,
48    Events,
49    Relationships,
50    Timeline,
51}
52
53impl SectionKind {
54    fn from_heading(heading: &str) -> Option<Self> {
55        match heading.trim() {
56            s if s.eq_ignore_ascii_case("Actors") => Some(Self::Actors),
57            s if s.eq_ignore_ascii_case("Institutions") => Some(Self::Institutions),
58            s if s.eq_ignore_ascii_case("Events") => Some(Self::Events),
59            s if s.eq_ignore_ascii_case("Relationships") => Some(Self::Relationships),
60            s if s.eq_ignore_ascii_case("Timeline") => Some(Self::Timeline),
61            _ => None,
62        }
63    }
64
65    /// Whether this section kind is valid in case files.
66    /// Actors and Institutions are no longer allowed in case files.
67    pub fn is_case_section(self) -> bool {
68        matches!(self, Self::Events | Self::Relationships | Self::Timeline)
69    }
70}
71
72/// A parser error with file location.
73#[derive(Debug)]
74pub struct ParseError {
75    pub line: usize,
76    pub message: String,
77}
78
79impl fmt::Display for ParseError {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        write!(f, "line {}: {}", self.line, self.message)
82    }
83}
84
85/// YAML front matter schema.
86#[derive(Deserialize)]
87struct FrontMatter {
88    id: String,
89    #[serde(default)]
90    sources: Vec<String>,
91}
92
93/// YAML front matter schema for standalone entity files.
94/// Only contains an optional `id` field (NULID, generated on first build).
95#[derive(Deserialize)]
96struct EntityFrontMatter {
97    #[serde(default)]
98    id: Option<String>,
99}
100
101/// A parsed standalone entity file (actor or institution).
102#[derive(Debug)]
103pub struct ParsedEntityFile {
104    /// Stored NULID from front matter (None if not yet generated).
105    pub id: Option<String>,
106    /// Entity name from H1 heading.
107    pub name: String,
108    /// Raw bullet field lines (body after H1, no sections).
109    pub body: String,
110    /// Line number of the H1 heading in the original file.
111    pub title_line: usize,
112}
113
114/// Parse a Markdown case file into a `ParsedCase`.
115///
116/// Extracts YAML front matter, H1 title, summary, and H2 sections.
117/// Returns errors for malformed structure or boundary violations.
118pub fn parse(input: &str) -> Result<ParsedCase, Vec<ParseError>> {
119    let mut errors = Vec::new();
120
121    // Extract front matter
122    let (front_matter, body_start_line, body) = extract_front_matter(input, &mut errors);
123
124    let Some(front_matter) = front_matter else {
125        if errors.is_empty() {
126            errors.push(ParseError {
127                line: 1,
128                message: "missing YAML front matter (expected `---` delimiter)".into(),
129            });
130        }
131        return Err(errors);
132    };
133
134    // Validate front matter fields
135    validate_front_matter(&front_matter, &mut errors);
136
137    // Extract title, summary, and sections from body
138    let (title, summary, sections) = extract_body(&body, body_start_line, &mut errors);
139
140    if !errors.is_empty() {
141        return Err(errors);
142    }
143
144    Ok(ParsedCase {
145        id: front_matter.id,
146        sources: front_matter.sources,
147        title,
148        summary,
149        sections,
150    })
151}
152
153/// Parse a standalone entity file (actor or institution).
154///
155/// Entity files have YAML front matter with optional `id:`, an H1 name,
156/// and bullet fields directly in the body. No H2 sections are allowed.
157pub fn parse_entity_file(input: &str) -> Result<ParsedEntityFile, Vec<ParseError>> {
158    let mut errors = Vec::new();
159
160    let (front_matter, body_start_line, body) = extract_entity_front_matter(input, &mut errors);
161
162    let id = front_matter.and_then(|fm| fm.id);
163
164    // Extract H1 title and body content (no sections allowed)
165    let (name, title_line, field_body) = extract_entity_body(&body, body_start_line, &mut errors);
166
167    if !errors.is_empty() {
168        return Err(errors);
169    }
170
171    Ok(ParsedEntityFile {
172        id,
173        name,
174        body: field_body,
175        title_line,
176    })
177}
178
179/// Extract YAML front matter for entity files.
180/// Front matter is optional for entity files -- if absent, returns None with no error.
181fn extract_entity_front_matter(
182    input: &str,
183    errors: &mut Vec<ParseError>,
184) -> (Option<EntityFrontMatter>, usize, String) {
185    let lines: Vec<&str> = input.lines().collect();
186
187    let first_delim = lines.iter().position(|l| l.trim() == "---");
188    if first_delim != Some(0) {
189        // No front matter -- entire file is body, starting at line 1
190        return (None, 1, input.to_string());
191    }
192
193    let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
194    let Some(close_offset) = close_delim else {
195        errors.push(ParseError {
196            line: 1,
197            message: "unclosed YAML front matter (missing closing `---`)".into(),
198        });
199        return (None, 1, String::new());
200    };
201
202    let close_line = close_offset + 1;
203    let yaml_str: String = lines[1..close_line].join("\n");
204    let body_start_line = close_line + 2; // 1-indexed line number after closing `---`
205    let body = lines[close_line + 1..].join("\n");
206
207    match serde_yaml::from_str::<EntityFrontMatter>(&yaml_str) {
208        Ok(fm) => (Some(fm), body_start_line, body),
209        Err(e) => {
210            errors.push(ParseError {
211                line: 2,
212                message: format!("invalid YAML front matter: {e}"),
213            });
214            (None, body_start_line, body)
215        }
216    }
217}
218
219/// Extract H1 name and field body from an entity file.
220/// Rejects any H2 sections.
221fn extract_entity_body(
222    body: &str,
223    body_start_line: usize,
224    errors: &mut Vec<ParseError>,
225) -> (String, usize, String) {
226    let lines: Vec<&str> = body.lines().collect();
227    let mut name = String::new();
228    let mut title_found = false;
229    let mut title_line = body_start_line;
230    let mut field_lines: Vec<&str> = Vec::new();
231
232    for (i, line) in lines.iter().enumerate() {
233        let file_line = body_start_line + i;
234
235        if let Some(heading) = strip_heading(line, 1) {
236            if title_found {
237                errors.push(ParseError {
238                    line: file_line,
239                    message: "multiple H1 headings found (expected exactly one)".into(),
240                });
241                continue;
242            }
243            name = heading.to_string();
244            title_found = true;
245            title_line = file_line;
246            continue;
247        }
248
249        // Reject H2 sections in entity files
250        if strip_heading(line, 2).is_some() {
251            errors.push(ParseError {
252                line: file_line,
253                message: "H2 sections are not allowed in entity files".into(),
254            });
255            continue;
256        }
257
258        if title_found {
259            field_lines.push(line);
260        } else if !line.trim().is_empty() {
261            errors.push(ParseError {
262                line: file_line,
263                message: "expected H1 heading (# Name)".into(),
264            });
265        }
266    }
267
268    if !title_found {
269        errors.push(ParseError {
270            line: body_start_line,
271            message: "missing H1 heading".into(),
272        });
273    } else if name.len() > MAX_TITLE_LEN {
274        errors.push(ParseError {
275            line: title_line,
276            message: format!("H1 name exceeds {MAX_TITLE_LEN} chars (got {})", name.len()),
277        });
278    }
279
280    (name, title_line, field_lines.join("\n"))
281}
282
283/// Extract YAML front matter delimited by `---` lines.
284/// Returns the parsed front matter, the line number where the body starts,
285/// and the body text.
286fn extract_front_matter(
287    input: &str,
288    errors: &mut Vec<ParseError>,
289) -> (Option<FrontMatter>, usize, String) {
290    let lines: Vec<&str> = input.lines().collect();
291
292    // First non-empty line must be `---`
293    let first_delim = lines.iter().position(|l| l.trim() == "---");
294    if first_delim != Some(0) {
295        errors.push(ParseError {
296            line: 1,
297            message: "missing YAML front matter (expected `---` on first line)".into(),
298        });
299        return (None, 1, input.to_string());
300    }
301
302    // Find closing `---`
303    let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
304    let Some(close_offset) = close_delim else {
305        errors.push(ParseError {
306            line: 1,
307            message: "unclosed YAML front matter (missing closing `---`)".into(),
308        });
309        return (None, 1, String::new());
310    };
311
312    let close_line = close_offset + 1; // index in `lines`
313    let yaml_str: String = lines[1..close_line].join("\n");
314    let body_start_line = close_line + 2; // 1-indexed line number after closing `---`
315    let body = lines[close_line + 1..].join("\n");
316
317    match serde_yaml::from_str::<FrontMatter>(&yaml_str) {
318        Ok(fm) => (Some(fm), body_start_line, body),
319        Err(e) => {
320            errors.push(ParseError {
321                line: 2,
322                message: format!("invalid YAML front matter: {e}"),
323            });
324            (None, body_start_line, body)
325        }
326    }
327}
328
329fn validate_front_matter(fm: &FrontMatter, errors: &mut Vec<ParseError>) {
330    // Validate case ID
331    if fm.id.is_empty() {
332        errors.push(ParseError {
333            line: 2,
334            message: "front matter `id` must not be empty".into(),
335        });
336    } else if fm.id.len() > MAX_CASE_ID_LEN {
337        errors.push(ParseError {
338            line: 2,
339            message: format!(
340                "front matter `id` exceeds {MAX_CASE_ID_LEN} chars (got {})",
341                fm.id.len()
342            ),
343        });
344    } else if !is_kebab_case(&fm.id) {
345        errors.push(ParseError {
346            line: 2,
347            message: format!(
348                "front matter `id` must be kebab-case [a-z0-9-], got {:?}",
349                fm.id
350            ),
351        });
352    }
353
354    // Validate sources count
355    if fm.sources.len() > MAX_SOURCES {
356        errors.push(ParseError {
357            line: 2,
358            message: format!(
359                "front matter `sources` exceeds {MAX_SOURCES} entries (got {})",
360                fm.sources.len()
361            ),
362        });
363    }
364
365    // Validate each source URL is HTTPS
366    for (i, url) in fm.sources.iter().enumerate() {
367        if !url.starts_with("https://") {
368            errors.push(ParseError {
369                line: 2,
370                message: format!("source[{i}] must be HTTPS, got {url:?}"),
371            });
372        }
373    }
374}
375
376/// Check if a string is valid kebab-case: `[a-z0-9](-[a-z0-9]+)*`
377fn is_kebab_case(s: &str) -> bool {
378    !s.is_empty()
379        && !s.starts_with('-')
380        && !s.ends_with('-')
381        && !s.contains("--")
382        && s.chars()
383            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
384}
385
386/// Extract the H1 title, summary text, and H2 sections from the body.
387#[allow(clippy::too_many_lines)]
388fn extract_body(
389    body: &str,
390    body_start_line: usize,
391    errors: &mut Vec<ParseError>,
392) -> (String, String, Vec<Section>) {
393    let lines: Vec<&str> = body.lines().collect();
394    let mut title = String::new();
395    let mut title_found = false;
396    let mut summary_lines: Vec<&str> = Vec::new();
397    let mut sections: Vec<Section> = Vec::new();
398
399    // Track current H2 section being built
400    let mut current_section_kind: Option<SectionKind> = None;
401    let mut current_section_line: usize = 0;
402    let mut current_section_body: Vec<&str> = Vec::new();
403
404    // State: before H1, after H1 (summary), in sections
405    let mut state = State::BeforeTitle;
406
407    for (i, line) in lines.iter().enumerate() {
408        let file_line = body_start_line + i; // 1-indexed line in original file
409
410        if let Some(heading) = strip_heading(line, 1) {
411            if title_found {
412                errors.push(ParseError {
413                    line: file_line,
414                    message: "multiple H1 headings found (expected exactly one)".into(),
415                });
416                continue;
417            }
418            title = heading.to_string();
419            title_found = true;
420            state = State::Summary;
421            continue;
422        }
423
424        if let Some(heading) = strip_heading(line, 2) {
425            // Flush previous section
426            if let Some(kind) = current_section_kind.take() {
427                sections.push(Section {
428                    kind,
429                    body: current_section_body.join("\n"),
430                    line: current_section_line,
431                });
432                current_section_body.clear();
433            }
434
435            match SectionKind::from_heading(heading) {
436                Some(kind) if kind.is_case_section() => {
437                    // Check for duplicate sections
438                    if sections.iter().any(|s| s.kind == kind) {
439                        errors.push(ParseError {
440                            line: file_line,
441                            message: format!("duplicate section: ## {heading}"),
442                        });
443                    }
444                    current_section_kind = Some(kind);
445                    current_section_line = file_line;
446                    state = State::InSection;
447                }
448                Some(_) => {
449                    // Legacy section (Actors/Institutions) -- not allowed in case files
450                    errors.push(ParseError {
451                        line: file_line,
452                        message: format!(
453                            "## {heading} is not allowed in case files (use standalone entity files in actors/ or institutions/ instead)"
454                        ),
455                    });
456                }
457                None => {
458                    errors.push(ParseError {
459                        line: file_line,
460                        message: format!(
461                            "unknown section: ## {heading} (expected one of: {})",
462                            KNOWN_CASE_SECTIONS.join(", ")
463                        ),
464                    });
465                }
466            }
467            continue;
468        }
469
470        match state {
471            State::BeforeTitle => {
472                // Skip blank lines before title
473                if !line.trim().is_empty() {
474                    errors.push(ParseError {
475                        line: file_line,
476                        message: "expected H1 title (# Title)".into(),
477                    });
478                }
479            }
480            State::Summary => {
481                summary_lines.push(line);
482            }
483            State::InSection => {
484                current_section_body.push(line);
485            }
486        }
487    }
488
489    // Flush last section
490    if let Some(kind) = current_section_kind.take() {
491        sections.push(Section {
492            kind,
493            body: current_section_body.join("\n"),
494            line: current_section_line,
495        });
496    }
497
498    // Validate title
499    if !title_found {
500        errors.push(ParseError {
501            line: body_start_line,
502            message: "missing H1 title".into(),
503        });
504    } else if title.len() > MAX_TITLE_LEN {
505        errors.push(ParseError {
506            line: body_start_line,
507            message: format!(
508                "H1 title exceeds {MAX_TITLE_LEN} chars (got {})",
509                title.len()
510            ),
511        });
512    }
513
514    // Build summary (trim leading/trailing blank lines)
515    let summary = summary_lines.clone().join("\n").trim().to_string();
516
517    if summary.len() > MAX_SUMMARY_LEN {
518        errors.push(ParseError {
519            line: body_start_line,
520            message: format!(
521                "summary exceeds {MAX_SUMMARY_LEN} chars (got {})",
522                summary.len()
523            ),
524        });
525    }
526
527    (title, summary, sections)
528}
529
530#[derive(Clone, Copy)]
531enum State {
532    BeforeTitle,
533    Summary,
534    InSection,
535}
536
537/// Strip an ATX heading prefix of the given level. Returns the heading text.
538/// E.g., `strip_heading("## Foo", 2)` returns `Some("Foo")`.
539fn strip_heading(line: &str, level: usize) -> Option<&str> {
540    let prefix = "#".repeat(level);
541    let trimmed = line.trim_start();
542    if trimmed.starts_with(&prefix) {
543        let after = &trimmed[prefix.len()..];
544        // Must be followed by space or end of line, and NOT more `#` chars
545        if after.is_empty() {
546            return Some("");
547        }
548        if after.starts_with(' ') && !after.starts_with(" #") {
549            // Actually, need to exclude `### Foo` when looking for `## Foo`
550            return Some(after[1..].trim());
551        }
552        // Check: `###` should not match `##`
553        if after.starts_with('#') {
554            return None;
555        }
556    }
557    None
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563
564    fn minimal_case() -> String {
565        [
566            "---",
567            "id: test-case",
568            "sources:",
569            "  - https://example.com/source",
570            "---",
571            "",
572            "# Test Case Title",
573            "",
574            "This is the summary.",
575            "",
576            "## Events",
577            "",
578            "### Something happened",
579            "- occurred_at: 2025-01-01",
580            "",
581            "## Relationships",
582            "",
583            "- Something happened -> Something happened: related_to",
584        ]
585        .join("\n")
586    }
587
588    #[test]
589    fn parse_minimal_case() {
590        let result = parse(&minimal_case());
591        let case = result.unwrap_or_else(|errs| {
592            panic!(
593                "parse failed: {}",
594                errs.iter()
595                    .map(ToString::to_string)
596                    .collect::<Vec<_>>()
597                    .join("; ")
598            );
599        });
600
601        assert_eq!(case.id, "test-case");
602        assert_eq!(case.sources, vec!["https://example.com/source"]);
603        assert_eq!(case.title, "Test Case Title");
604        assert_eq!(case.summary, "This is the summary.");
605        assert_eq!(case.sections.len(), 2);
606        assert_eq!(case.sections[0].kind, SectionKind::Events);
607        assert_eq!(case.sections[1].kind, SectionKind::Relationships);
608    }
609
610    #[test]
611    fn parse_missing_front_matter() {
612        let input = "# Title\n\nSummary.\n";
613        let errs = parse(input).unwrap_err();
614        assert!(errs.iter().any(|e| e.message.contains("front matter")));
615    }
616
617    #[test]
618    fn parse_unclosed_front_matter() {
619        let input = "---\nid: test\n# Title\n";
620        let errs = parse(input).unwrap_err();
621        assert!(errs.iter().any(|e| e.message.contains("unclosed")));
622    }
623
624    #[test]
625    fn parse_invalid_case_id_uppercase() {
626        let input = "---\nid: Test-Case\nsources: []\n---\n\n# Title\n";
627        let errs = parse(input).unwrap_err();
628        assert!(errs.iter().any(|e| e.message.contains("kebab-case")));
629    }
630
631    #[test]
632    fn parse_case_id_too_long() {
633        let long_id = "a".repeat(61);
634        let input = format!("---\nid: {long_id}\nsources: []\n---\n\n# Title\n");
635        let errs = parse(&input).unwrap_err();
636        assert!(errs.iter().any(|e| e.message.contains("exceeds 60")));
637    }
638
639    #[test]
640    fn parse_non_https_source() {
641        let input = "---\nid: test\nsources:\n  - http://example.com\n---\n\n# Title\n";
642        let errs = parse(input).unwrap_err();
643        assert!(errs.iter().any(|e| e.message.contains("HTTPS")));
644    }
645
646    #[test]
647    fn parse_too_many_sources() {
648        let sources: Vec<String> = (0..21)
649            .map(|i| format!("  - https://example.com/{i}"))
650            .collect();
651        let input = format!(
652            "---\nid: test\nsources:\n{}\n---\n\n# Title\n",
653            sources.join("\n")
654        );
655        let errs = parse(&input).unwrap_err();
656        assert!(errs.iter().any(|e| e.message.contains("exceeds 20")));
657    }
658
659    #[test]
660    fn parse_unknown_section() {
661        let input = [
662            "---",
663            "id: test",
664            "sources: []",
665            "---",
666            "",
667            "# Title",
668            "",
669            "## Unknown Section",
670            "",
671        ]
672        .join("\n");
673        let errs = parse(&input).unwrap_err();
674        assert!(errs.iter().any(|e| e.message.contains("unknown section")));
675    }
676
677    #[test]
678    fn parse_duplicate_section() {
679        let input = [
680            "---",
681            "id: test",
682            "sources: []",
683            "---",
684            "",
685            "# Title",
686            "",
687            "## Events",
688            "",
689            "## Events",
690            "",
691        ]
692        .join("\n");
693        let errs = parse(&input).unwrap_err();
694        assert!(errs.iter().any(|e| e.message.contains("duplicate")));
695    }
696
697    #[test]
698    fn parse_multiple_h1() {
699        let input = [
700            "---",
701            "id: test",
702            "sources: []",
703            "---",
704            "",
705            "# First Title",
706            "",
707            "# Second Title",
708            "",
709        ]
710        .join("\n");
711        let errs = parse(&input).unwrap_err();
712        assert!(errs.iter().any(|e| e.message.contains("multiple H1")));
713    }
714
715    #[test]
716    fn parse_all_sections() {
717        let input = [
718            "---",
719            "id: full-case",
720            "sources:",
721            "  - https://example.com/a",
722            "---",
723            "",
724            "# Full Case",
725            "",
726            "Summary text here.",
727            "",
728            "## Events",
729            "",
730            "### Something happened",
731            "- occurred_at: 2025-01-01",
732            "",
733            "## Relationships",
734            "",
735            "- Alice -> Corp Inc: employed_by",
736            "",
737            "## Timeline",
738            "",
739            "Something happened",
740        ]
741        .join("\n");
742
743        let case = parse(&input).unwrap_or_else(|errs| {
744            panic!(
745                "parse failed: {}",
746                errs.iter()
747                    .map(ToString::to_string)
748                    .collect::<Vec<_>>()
749                    .join("; ")
750            );
751        });
752
753        assert_eq!(case.id, "full-case");
754        assert_eq!(case.title, "Full Case");
755        assert_eq!(case.summary, "Summary text here.");
756        assert_eq!(case.sections.len(), 3);
757        assert_eq!(case.sections[0].kind, SectionKind::Events);
758        assert_eq!(case.sections[1].kind, SectionKind::Relationships);
759        assert_eq!(case.sections[2].kind, SectionKind::Timeline);
760    }
761
762    #[test]
763    fn parse_empty_summary() {
764        let input = [
765            "---",
766            "id: test",
767            "sources: []",
768            "---",
769            "",
770            "# Title",
771            "",
772            "## Events",
773            "",
774        ]
775        .join("\n");
776
777        let case = parse(&input).unwrap_or_else(|errs| {
778            panic!(
779                "parse failed: {}",
780                errs.iter()
781                    .map(ToString::to_string)
782                    .collect::<Vec<_>>()
783                    .join("; ")
784            );
785        });
786        assert_eq!(case.summary, "");
787    }
788
789    #[test]
790    fn parse_multiline_summary() {
791        let input = [
792            "---",
793            "id: test",
794            "sources: []",
795            "---",
796            "",
797            "# Title",
798            "",
799            "First line of summary.",
800            "Second line of summary.",
801            "",
802            "## Events",
803            "",
804        ]
805        .join("\n");
806
807        let case = parse(&input).unwrap_or_else(|errs| {
808            panic!(
809                "parse failed: {}",
810                errs.iter()
811                    .map(ToString::to_string)
812                    .collect::<Vec<_>>()
813                    .join("; ")
814            );
815        });
816        assert_eq!(
817            case.summary,
818            "First line of summary.\nSecond line of summary."
819        );
820    }
821
822    #[test]
823    fn strip_heading_levels() {
824        assert_eq!(strip_heading("# Title", 1), Some("Title"));
825        assert_eq!(strip_heading("## Section", 2), Some("Section"));
826        assert_eq!(strip_heading("### Entity", 3), Some("Entity"));
827        // H3 should not match H2
828        assert_eq!(strip_heading("### Entity", 2), None);
829        // H2 should not match H1
830        assert_eq!(strip_heading("## Section", 1), None);
831        // Not a heading
832        assert_eq!(strip_heading("Normal text", 1), None);
833    }
834
835    #[test]
836    fn kebab_case_validation() {
837        assert!(is_kebab_case("valid-case-id"));
838        assert!(is_kebab_case("a"));
839        assert!(is_kebab_case("test-123"));
840        assert!(!is_kebab_case(""));
841        assert!(!is_kebab_case("-leading"));
842        assert!(!is_kebab_case("trailing-"));
843        assert!(!is_kebab_case("double--dash"));
844        assert!(!is_kebab_case("Upper"));
845        assert!(!is_kebab_case("has space"));
846    }
847
848    #[test]
849    fn section_body_content() {
850        let input = [
851            "---",
852            "id: test",
853            "sources: []",
854            "---",
855            "",
856            "# Title",
857            "",
858            "## Events",
859            "",
860            "### Bonnick dismissal",
861            "- occurred_at: 2024-12-24",
862            "- type: termination",
863            "",
864        ]
865        .join("\n");
866
867        let case = parse(&input).unwrap_or_else(|errs| {
868            panic!(
869                "parse failed: {}",
870                errs.iter()
871                    .map(ToString::to_string)
872                    .collect::<Vec<_>>()
873                    .join("; ")
874            );
875        });
876
877        assert_eq!(case.sections.len(), 1);
878        let body = &case.sections[0].body;
879        assert!(body.contains("### Bonnick dismissal"));
880        assert!(body.contains("- occurred_at: 2024-12-24"));
881    }
882
883    #[test]
884    fn parse_rejects_actors_section_in_case_file() {
885        let input = [
886            "---",
887            "id: test",
888            "sources: []",
889            "---",
890            "",
891            "# Title",
892            "",
893            "## Actors",
894            "",
895        ]
896        .join("\n");
897        let errs = parse(&input).unwrap_err();
898        assert!(
899            errs.iter()
900                .any(|e| e.message.contains("not allowed in case files"))
901        );
902    }
903
904    #[test]
905    fn parse_rejects_institutions_section_in_case_file() {
906        let input = [
907            "---",
908            "id: test",
909            "sources: []",
910            "---",
911            "",
912            "# Title",
913            "",
914            "## Institutions",
915            "",
916        ]
917        .join("\n");
918        let errs = parse(&input).unwrap_err();
919        assert!(
920            errs.iter()
921                .any(|e| e.message.contains("not allowed in case files"))
922        );
923    }
924
925    #[test]
926    fn parse_entity_file_with_id() {
927        let input = [
928            "---",
929            "id: 01JXYZ123456789ABCDEFGHIJK",
930            "---",
931            "",
932            "# Mark Bonnick",
933            "",
934            "- qualifier: Arsenal Kit Manager",
935            "- nationality: British",
936            "",
937        ]
938        .join("\n");
939
940        let result = parse_entity_file(&input).unwrap();
941        assert_eq!(result.id.as_deref(), Some("01JXYZ123456789ABCDEFGHIJK"));
942        assert_eq!(result.name, "Mark Bonnick");
943        assert!(result.body.contains("- qualifier: Arsenal Kit Manager"));
944        assert!(result.body.contains("- nationality: British"));
945    }
946
947    #[test]
948    fn parse_entity_file_without_id() {
949        let input = [
950            "---",
951            "---",
952            "",
953            "# Arsenal FC",
954            "",
955            "- qualifier: English Football Club",
956            "- institution_type: football_club",
957            "",
958        ]
959        .join("\n");
960
961        let result = parse_entity_file(&input).unwrap();
962        assert!(result.id.is_none());
963        assert_eq!(result.name, "Arsenal FC");
964    }
965
966    #[test]
967    fn parse_entity_file_no_front_matter() {
968        let input = ["# Bob Smith", "", "- nationality: Dutch", ""].join("\n");
969
970        let result = parse_entity_file(&input).unwrap();
971        assert!(result.id.is_none());
972        assert_eq!(result.name, "Bob Smith");
973        assert!(result.body.contains("- nationality: Dutch"));
974    }
975
976    #[test]
977    fn parse_entity_file_rejects_h2_sections() {
978        let input = [
979            "---",
980            "---",
981            "",
982            "# Test Entity",
983            "",
984            "## Relationships",
985            "",
986        ]
987        .join("\n");
988
989        let errs = parse_entity_file(&input).unwrap_err();
990        assert!(errs.iter().any(|e| e.message.contains("H2 sections")));
991    }
992
993    #[test]
994    fn parse_entity_file_missing_h1() {
995        let input = ["---", "---", "", "- nationality: Dutch", ""].join("\n");
996
997        let errs = parse_entity_file(&input).unwrap_err();
998        assert!(errs.iter().any(|e| e.message.contains("missing H1")));
999    }
1000}