#![allow(clippy::module_name_repetitions)]
use std::fmt;
use serde::{Deserialize, Serialize};
const MAX_CASE_ID_LEN: usize = 26;
const MAX_SOURCES: usize = 20;
const MAX_TITLE_LEN: usize = 200;
const MAX_SUMMARY_LEN: usize = 2000;
const KNOWN_CASE_SECTIONS: &[&str] = &[
"Events",
"Documents",
"Assets",
"Relationships",
"Timeline",
"Related Cases",
];
#[derive(Debug)]
pub struct ParsedCase {
pub id: Option<String>,
pub sources: Vec<SourceEntry>,
pub title: String,
pub summary: String,
pub sections: Vec<Section>,
pub case_type: Option<String>,
pub status: Option<String>,
pub amounts: Option<String>,
pub tags: Vec<String>,
pub related_cases: Vec<RelatedCase>,
pub involved: Vec<InvolvedEntry>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct RelatedCase {
pub case_path: String,
pub description: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
#[serde(skip)]
pub line: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InvolvedEntry {
pub entity_name: String,
pub id: Option<String>,
pub line: usize,
}
#[derive(Debug)]
pub struct Section {
pub kind: SectionKind,
pub body: String,
pub line: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SectionKind {
People,
Organizations,
Events,
Documents,
Assets,
Relationships,
Timeline,
RelatedCases,
Involved,
}
impl SectionKind {
fn from_heading(heading: &str) -> Option<Self> {
match heading.trim() {
s if s.eq_ignore_ascii_case("People") => Some(Self::People),
s if s.eq_ignore_ascii_case("Organizations") => Some(Self::Organizations),
s if s.eq_ignore_ascii_case("Events") => Some(Self::Events),
s if s.eq_ignore_ascii_case("Documents") => Some(Self::Documents),
s if s.eq_ignore_ascii_case("Assets") => Some(Self::Assets),
s if s.eq_ignore_ascii_case("Relationships") => Some(Self::Relationships),
s if s.eq_ignore_ascii_case("Timeline") => Some(Self::Timeline),
s if s.eq_ignore_ascii_case("Related Cases") => Some(Self::RelatedCases),
s if s.eq_ignore_ascii_case("Involved") => Some(Self::Involved),
_ => None,
}
}
pub fn is_case_section(self) -> bool {
matches!(
self,
Self::Events
| Self::Documents
| Self::Assets
| Self::Relationships
| Self::Timeline
| Self::RelatedCases
| Self::Involved
)
}
}
#[derive(Debug)]
pub struct ParseError {
pub line: usize,
pub message: String,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "line {}: {}", self.line, self.message)
}
}
const MAX_CASE_TAGS: usize = 10;
const MAX_ENTITY_TAGS: usize = 5;
const MAX_TAG_LEN: usize = 50;
const MAX_RELATED_CASES: usize = 10;
const MAX_RELATED_DESCRIPTION_LEN: usize = 500;
pub fn parse_related_cases(
body: &str,
section_start_line: usize,
errors: &mut Vec<ParseError>,
) -> Vec<RelatedCase> {
let mut entries: Vec<(String, String, Option<String>, usize)> = Vec::new();
for (offset, line) in body.lines().enumerate() {
let file_line = section_start_line + offset + 1;
if let Some(rest) = line.strip_prefix("- ") {
let case_path = rest.trim().to_string();
entries.push((case_path, String::new(), None, file_line));
} else if let Some(rest) = line.strip_prefix(" description: ") {
if let Some(entry) = entries.last_mut() {
entry.1 = rest.trim().to_string();
} else {
errors.push(ParseError {
line: file_line,
message: "description without a preceding case path".into(),
});
}
} else if let Some(rest) = line.strip_prefix(" id: ") {
if let Some(entry) = entries.last_mut() {
entry.2 = Some(rest.trim().to_string());
} else {
errors.push(ParseError {
line: file_line,
message: "id without a preceding case path".into(),
});
}
} else if !line.trim().is_empty() {
errors.push(ParseError {
line: file_line,
message: format!("unexpected line in Related Cases: {line}"),
});
}
}
if entries.len() > MAX_RELATED_CASES {
errors.push(ParseError {
line: section_start_line,
message: format!(
"Related Cases exceeds {MAX_RELATED_CASES} entries (got {})",
entries.len()
),
});
}
let mut result = Vec::new();
for (case_path, description, id, line) in entries {
if case_path.is_empty() {
errors.push(ParseError {
line,
message: "related case path must not be empty".into(),
});
continue;
}
if description.is_empty() {
errors.push(ParseError {
line,
message: format!("related case {case_path:?} missing description"),
});
continue;
}
if description.len() > MAX_RELATED_DESCRIPTION_LEN {
errors.push(ParseError {
line,
message: format!(
"related case description exceeds {MAX_RELATED_DESCRIPTION_LEN} chars (got {})",
description.len()
),
});
continue;
}
result.push(RelatedCase {
case_path,
description,
id,
line,
});
}
result
}
const MAX_INVOLVED: usize = 50;
pub fn parse_involved(
body: &str,
section_start_line: usize,
errors: &mut Vec<ParseError>,
) -> Vec<InvolvedEntry> {
let mut entries = Vec::new();
let lines: Vec<&str> = body.lines().collect();
let mut i = 0;
while i < lines.len() {
let file_line = section_start_line + 1 + i;
let trimmed = lines[i].trim();
if trimmed.is_empty() {
i += 1;
continue;
}
let Some(name) = trimmed.strip_prefix("- ") else {
errors.push(ParseError {
line: file_line,
message: format!("expected involved entry `- Entity Name`, got {trimmed:?}"),
});
i += 1;
continue;
};
let entity_name = name.trim().to_string();
if entity_name.is_empty() {
errors.push(ParseError {
line: file_line,
message: "involved entity name must not be empty".into(),
});
i += 1;
continue;
}
let mut id: Option<String> = None;
if i + 1 < lines.len() {
let next = lines[i + 1].trim();
if let Some(id_val) = next.strip_prefix("id: ") {
id = Some(id_val.trim().to_string());
i += 1;
}
}
entries.push(InvolvedEntry {
entity_name,
id,
line: file_line,
});
i += 1;
}
if entries.len() > MAX_INVOLVED {
errors.push(ParseError {
line: section_start_line,
message: format!(
"Involved exceeds {MAX_INVOLVED} entries (got {})",
entries.len()
),
});
}
entries
}
#[derive(Deserialize)]
struct FrontMatter {
#[serde(default)]
id: Option<String>,
#[serde(default)]
sources: Vec<SourceEntry>,
#[serde(default)]
case_type: Option<String>,
#[serde(default)]
status: Option<String>,
#[serde(default)]
amounts: Option<String>,
#[serde(default)]
tags: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
#[serde(untagged)]
pub enum SourceEntry {
Url(String),
Structured {
url: String,
#[serde(default)]
title: Option<String>,
#[serde(default)]
published_at: Option<String>,
#[serde(default)]
language: Option<String>,
},
}
impl SourceEntry {
pub fn url(&self) -> &str {
match self {
Self::Url(u) => u,
Self::Structured { url, .. } => url,
}
}
}
#[derive(Deserialize)]
struct EntityFrontMatter {
#[serde(default)]
id: Option<String>,
#[serde(default)]
tags: Vec<String>,
}
#[derive(Debug)]
pub struct ParsedEntityFile {
pub id: Option<String>,
pub name: String,
pub body: String,
pub title_line: usize,
pub tags: Vec<String>,
}
pub fn parse(input: &str) -> Result<ParsedCase, Vec<ParseError>> {
let mut errors = Vec::new();
let (front_matter, body_start_line, body) = extract_front_matter(input, &mut errors);
let Some(front_matter) = front_matter else {
if errors.is_empty() {
errors.push(ParseError {
line: 1,
message: "missing YAML front matter (expected `---` delimiter)".into(),
});
}
return Err(errors);
};
validate_front_matter(&front_matter, &mut errors);
let (title, summary, mut sections) = extract_body(&body, body_start_line, &mut errors);
let mut related_cases = Vec::new();
for section in §ions {
if section.kind == SectionKind::RelatedCases {
let entries = parse_related_cases(§ion.body, section.line, &mut errors);
related_cases.extend(entries);
}
}
sections.retain(|s| s.kind != SectionKind::RelatedCases);
let mut involved = Vec::new();
for section in §ions {
if section.kind == SectionKind::Involved {
let entries = parse_involved(§ion.body, section.line, &mut errors);
involved.extend(entries);
}
}
sections.retain(|s| s.kind != SectionKind::Involved);
if !errors.is_empty() {
return Err(errors);
}
Ok(ParsedCase {
id: front_matter.id,
sources: front_matter.sources,
title,
summary,
sections,
case_type: front_matter.case_type,
status: front_matter.status,
amounts: front_matter.amounts,
tags: front_matter.tags,
related_cases,
involved,
})
}
pub fn parse_entity_file(input: &str) -> Result<ParsedEntityFile, Vec<ParseError>> {
let mut errors = Vec::new();
let (front_matter, body_start_line, body) = extract_entity_front_matter(input, &mut errors);
let id = front_matter.as_ref().and_then(|fm| fm.id.clone());
let tags = front_matter.map_or_else(Vec::new, |fm| fm.tags);
if tags.len() > MAX_ENTITY_TAGS {
errors.push(ParseError {
line: 2,
message: format!(
"front matter `tags` exceeds {MAX_ENTITY_TAGS} entries (got {})",
tags.len()
),
});
}
for (i, tag) in tags.iter().enumerate() {
if tag.len() > MAX_TAG_LEN {
errors.push(ParseError {
line: 2,
message: format!("front matter tag #{} exceeds {MAX_TAG_LEN} chars", i + 1),
});
}
if tag.is_empty() {
errors.push(ParseError {
line: 2,
message: format!("front matter tag #{} is empty", i + 1),
});
}
}
let (name, title_line, field_body) = extract_entity_body(&body, body_start_line, &mut errors);
if !errors.is_empty() {
return Err(errors);
}
Ok(ParsedEntityFile {
id,
name,
body: field_body,
title_line,
tags,
})
}
fn extract_entity_front_matter(
input: &str,
errors: &mut Vec<ParseError>,
) -> (Option<EntityFrontMatter>, usize, String) {
let lines: Vec<&str> = input.lines().collect();
let first_delim = lines.iter().position(|l| l.trim() == "---");
if first_delim != Some(0) {
return (None, 1, input.to_string());
}
let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
let Some(close_offset) = close_delim else {
errors.push(ParseError {
line: 1,
message: "unclosed YAML front matter (missing closing `---`)".into(),
});
return (None, 1, String::new());
};
let close_line = close_offset + 1;
let yaml_str: String = lines[1..close_line].join("\n");
let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
match serde_yaml::from_str::<EntityFrontMatter>(&yaml_str) {
Ok(fm) => (Some(fm), body_start_line, body),
Err(e) => {
errors.push(ParseError {
line: 2,
message: format!("invalid YAML front matter: {e}"),
});
(None, body_start_line, body)
}
}
}
fn extract_entity_body(
body: &str,
body_start_line: usize,
errors: &mut Vec<ParseError>,
) -> (String, usize, String) {
let lines: Vec<&str> = body.lines().collect();
let mut name = String::new();
let mut title_found = false;
let mut title_line = body_start_line;
let mut field_lines: Vec<&str> = Vec::new();
for (i, line) in lines.iter().enumerate() {
let file_line = body_start_line + i;
if let Some(heading) = strip_heading(line, 1) {
if title_found {
errors.push(ParseError {
line: file_line,
message: "multiple H1 headings found (expected exactly one)".into(),
});
continue;
}
name = heading.to_string();
title_found = true;
title_line = file_line;
continue;
}
if strip_heading(line, 2).is_some() {
errors.push(ParseError {
line: file_line,
message: "H2 sections are not allowed in entity files".into(),
});
continue;
}
if title_found {
field_lines.push(line);
} else if !line.trim().is_empty() {
errors.push(ParseError {
line: file_line,
message: "expected H1 heading (# Name)".into(),
});
}
}
if !title_found {
errors.push(ParseError {
line: body_start_line,
message: "missing H1 heading".into(),
});
} else if name.len() > MAX_TITLE_LEN {
errors.push(ParseError {
line: title_line,
message: format!("H1 name exceeds {MAX_TITLE_LEN} chars (got {})", name.len()),
});
}
(name, title_line, field_lines.join("\n"))
}
fn extract_front_matter(
input: &str,
errors: &mut Vec<ParseError>,
) -> (Option<FrontMatter>, usize, String) {
let lines: Vec<&str> = input.lines().collect();
let first_delim = lines.iter().position(|l| l.trim() == "---");
if first_delim != Some(0) {
errors.push(ParseError {
line: 1,
message: "missing YAML front matter (expected `---` on first line)".into(),
});
return (None, 1, input.to_string());
}
let close_delim = lines[1..].iter().position(|l| l.trim() == "---");
let Some(close_offset) = close_delim else {
errors.push(ParseError {
line: 1,
message: "unclosed YAML front matter (missing closing `---`)".into(),
});
return (None, 1, String::new());
};
let close_line = close_offset + 1; let yaml_str: String = lines[1..close_line].join("\n");
let body_start_line = close_line + 2; let body = lines[close_line + 1..].join("\n");
match serde_yaml::from_str::<FrontMatter>(&yaml_str) {
Ok(fm) => (Some(fm), body_start_line, body),
Err(e) => {
errors.push(ParseError {
line: 2,
message: format!("invalid YAML front matter: {e}"),
});
(None, body_start_line, body)
}
}
}
fn validate_front_matter(fm: &FrontMatter, errors: &mut Vec<ParseError>) {
if let Some(id) = &fm.id
&& id.len() != MAX_CASE_ID_LEN {
errors.push(ParseError {
line: 2,
message: format!(
"front matter `id` must be a {MAX_CASE_ID_LEN}-char NULID, got {} chars",
id.len()
),
});
}
if fm.sources.len() > MAX_SOURCES {
errors.push(ParseError {
line: 2,
message: format!(
"front matter `sources` exceeds {MAX_SOURCES} entries (got {})",
fm.sources.len()
),
});
}
for (i, source) in fm.sources.iter().enumerate() {
if !source.url().starts_with("https://") {
errors.push(ParseError {
line: 2,
message: format!("source[{i}] must be HTTPS, got {:?}", source.url()),
});
}
}
if let Some(ct) = &fm.case_type {
use crate::domain::CaseType;
let normalized = ct.to_lowercase().replace(' ', "_");
if !CaseType::KNOWN.contains(&normalized.as_str())
&& crate::domain::parse_custom(ct).is_none()
{
errors.push(ParseError {
line: 2,
message: format!(
"invalid case_type {:?} (known: {}; use \"custom:Value\" for custom)",
ct,
CaseType::KNOWN.join(", ")
),
});
}
}
if let Some(st) = &fm.status {
use crate::domain::CaseStatus;
let normalized = st.to_lowercase().replace(' ', "_");
if !CaseStatus::KNOWN.contains(&normalized.as_str()) {
errors.push(ParseError {
line: 2,
message: format!(
"invalid status {:?} (known: {})",
st,
CaseStatus::KNOWN.join(", ")
),
});
}
}
if fm.tags.len() > MAX_CASE_TAGS {
errors.push(ParseError {
line: 2,
message: format!(
"front matter `tags` exceeds {MAX_CASE_TAGS} entries (got {})",
fm.tags.len()
),
});
}
for (i, tag) in fm.tags.iter().enumerate() {
if tag.len() > MAX_TAG_LEN {
errors.push(ParseError {
line: 2,
message: format!("tag[{i}] exceeds {MAX_TAG_LEN} chars (got {})", tag.len()),
});
}
if tag.is_empty() {
errors.push(ParseError {
line: 2,
message: format!("tag[{i}] must not be empty"),
});
}
}
}
#[allow(clippy::too_many_lines)]
fn extract_body(
body: &str,
body_start_line: usize,
errors: &mut Vec<ParseError>,
) -> (String, String, Vec<Section>) {
let lines: Vec<&str> = body.lines().collect();
let mut title = String::new();
let mut title_found = false;
let mut summary_lines: Vec<&str> = Vec::new();
let mut sections: Vec<Section> = Vec::new();
let mut current_section_kind: Option<SectionKind> = None;
let mut current_section_line: usize = 0;
let mut current_section_body: Vec<&str> = Vec::new();
let mut state = State::BeforeTitle;
for (i, line) in lines.iter().enumerate() {
let file_line = body_start_line + i;
if let Some(heading) = strip_heading(line, 1) {
if title_found {
errors.push(ParseError {
line: file_line,
message: "multiple H1 headings found (expected exactly one)".into(),
});
continue;
}
title = heading.to_string();
title_found = true;
state = State::Summary;
continue;
}
if let Some(heading) = strip_heading(line, 2) {
if let Some(kind) = current_section_kind.take() {
sections.push(Section {
kind,
body: current_section_body.join("\n"),
line: current_section_line,
});
current_section_body.clear();
}
match SectionKind::from_heading(heading) {
Some(kind) if kind.is_case_section() => {
if sections.iter().any(|s| s.kind == kind) {
errors.push(ParseError {
line: file_line,
message: format!("duplicate section: ## {heading}"),
});
}
current_section_kind = Some(kind);
current_section_line = file_line;
state = State::InSection;
}
Some(_) => {
errors.push(ParseError {
line: file_line,
message: format!(
"## {heading} is not allowed in case files (use standalone entity files in people/ or organizations/ instead)"
),
});
}
None => {
errors.push(ParseError {
line: file_line,
message: format!(
"unknown section: ## {heading} (expected one of: {})",
KNOWN_CASE_SECTIONS.join(", ")
),
});
}
}
continue;
}
match state {
State::BeforeTitle => {
if !line.trim().is_empty() {
errors.push(ParseError {
line: file_line,
message: "expected H1 title (# Title)".into(),
});
}
}
State::Summary => {
summary_lines.push(line);
}
State::InSection => {
current_section_body.push(line);
}
}
}
if let Some(kind) = current_section_kind.take() {
sections.push(Section {
kind,
body: current_section_body.join("\n"),
line: current_section_line,
});
}
if !title_found {
errors.push(ParseError {
line: body_start_line,
message: "missing H1 title".into(),
});
} else if title.len() > MAX_TITLE_LEN {
errors.push(ParseError {
line: body_start_line,
message: format!(
"H1 title exceeds {MAX_TITLE_LEN} chars (got {})",
title.len()
),
});
}
let summary = summary_lines.clone().join("\n").trim().to_string();
if summary.len() > MAX_SUMMARY_LEN {
errors.push(ParseError {
line: body_start_line,
message: format!(
"summary exceeds {MAX_SUMMARY_LEN} chars (got {})",
summary.len()
),
});
}
(title, summary, sections)
}
#[derive(Clone, Copy)]
enum State {
BeforeTitle,
Summary,
InSection,
}
fn strip_heading(line: &str, level: usize) -> Option<&str> {
let prefix = "#".repeat(level);
let trimmed = line.trim_start();
if trimmed.starts_with(&prefix) {
let after = &trimmed[prefix.len()..];
if after.is_empty() {
return Some("");
}
if after.starts_with(' ') && !after.starts_with(" #") {
return Some(after[1..].trim());
}
if after.starts_with('#') {
return None;
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
fn minimal_case() -> String {
[
"---",
"id: 01H9XT7H1J3929RK32FWSRKV88",
"sources:",
" - https://example.com/source",
"---",
"",
"# Test Case Title",
"",
"This is the summary.",
"",
"## Events",
"",
"### Something happened",
"- occurred_at: 2025-01-01",
"",
"## Relationships",
"",
"- Something happened -> Something happened: associate_of",
]
.join("\n")
}
#[test]
fn parse_minimal_case() {
let result = parse(&minimal_case());
let case = result.unwrap_or_else(|errs| {
panic!(
"parse failed: {}",
errs.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join("; ")
);
});
assert_eq!(case.id.as_deref(), Some("01H9XT7H1J3929RK32FWSRKV88"));
assert_eq!(case.sources.len(), 1);
assert_eq!(case.sources[0].url(), "https://example.com/source");
assert_eq!(case.title, "Test Case Title");
assert_eq!(case.summary, "This is the summary.");
assert_eq!(case.sections.len(), 2);
assert_eq!(case.sections[0].kind, SectionKind::Events);
assert_eq!(case.sections[1].kind, SectionKind::Relationships);
}
#[test]
fn parse_missing_front_matter() {
let input = "# Title\n\nSummary.\n";
let errs = parse(input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("front matter")));
}
#[test]
fn parse_unclosed_front_matter() {
let input = "---\nsources: []\n# Title\n";
let errs = parse(input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("unclosed")));
}
#[test]
fn parse_invalid_case_id_wrong_length() {
let input = "---\nid: short\nsources: []\n---\n\n# Title\n";
let errs = parse(input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("NULID")));
}
#[test]
fn parse_case_id_absent_is_ok() {
let input = "---\nsources:\n - https://example.com\n---\n\n# Title\n\nSummary.\n";
let case = parse(input).unwrap();
assert!(case.id.is_none());
}
#[test]
fn parse_non_https_source() {
let input = "---\nsources:\n - http://example.com\n---\n\n# Title\n";
let errs = parse(input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("HTTPS")));
}
#[test]
fn parse_too_many_sources() {
let sources: Vec<String> = (0..21)
.map(|i| format!(" - https://example.com/{i}"))
.collect();
let input = format!("---\nsources:\n{}\n---\n\n# Title\n", sources.join("\n"));
let errs = parse(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("exceeds 20")));
}
#[test]
fn parse_unknown_section() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## Unknown Section",
"",
]
.join("\n");
let errs = parse(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("unknown section")));
}
#[test]
fn parse_duplicate_section() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## Events",
"",
"## Events",
"",
]
.join("\n");
let errs = parse(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("duplicate")));
}
#[test]
fn parse_multiple_h1() {
let input = [
"---",
"sources: []",
"---",
"",
"# First Title",
"",
"# Second Title",
"",
]
.join("\n");
let errs = parse(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("multiple H1")));
}
#[test]
fn parse_all_sections() {
let input = [
"---",
"id: 01H9XT7H1KRQ9SJ7SD9ETB5CVQ",
"sources:",
" - https://example.com/a",
"---",
"",
"# Full Case",
"",
"Summary text here.",
"",
"## Events",
"",
"### Something happened",
"- occurred_at: 2025-01-01",
"",
"## Relationships",
"",
"- Alice -> Corp Inc: employed_by",
"",
"## Timeline",
"",
"Something happened",
]
.join("\n");
let case = parse(&input).unwrap_or_else(|errs| {
panic!(
"parse failed: {}",
errs.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join("; ")
);
});
assert_eq!(case.id.as_deref(), Some("01H9XT7H1KRQ9SJ7SD9ETB5CVQ"));
assert_eq!(case.title, "Full Case");
assert_eq!(case.summary, "Summary text here.");
assert_eq!(case.sections.len(), 3);
assert_eq!(case.sections[0].kind, SectionKind::Events);
assert_eq!(case.sections[1].kind, SectionKind::Relationships);
assert_eq!(case.sections[2].kind, SectionKind::Timeline);
}
#[test]
fn parse_empty_summary() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## Events",
"",
]
.join("\n");
let case = parse(&input).unwrap_or_else(|errs| {
panic!(
"parse failed: {}",
errs.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join("; ")
);
});
assert_eq!(case.summary, "");
}
#[test]
fn parse_multiline_summary() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"First line of summary.",
"Second line of summary.",
"",
"## Events",
"",
]
.join("\n");
let case = parse(&input).unwrap_or_else(|errs| {
panic!(
"parse failed: {}",
errs.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join("; ")
);
});
assert_eq!(
case.summary,
"First line of summary.\nSecond line of summary."
);
}
#[test]
fn strip_heading_levels() {
assert_eq!(strip_heading("# Title", 1), Some("Title"));
assert_eq!(strip_heading("## Section", 2), Some("Section"));
assert_eq!(strip_heading("### Entity", 3), Some("Entity"));
assert_eq!(strip_heading("### Entity", 2), None);
assert_eq!(strip_heading("## Section", 1), None);
assert_eq!(strip_heading("Normal text", 1), None);
}
#[test]
fn section_body_content() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## Events",
"",
"### Bonnick dismissal",
"- occurred_at: 2024-12-24",
"- type: termination",
"",
]
.join("\n");
let case = parse(&input).unwrap_or_else(|errs| {
panic!(
"parse failed: {}",
errs.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join("; ")
);
});
assert_eq!(case.sections.len(), 1);
let body = &case.sections[0].body;
assert!(body.contains("### Bonnick dismissal"));
assert!(body.contains("- occurred_at: 2024-12-24"));
}
#[test]
fn parse_rejects_people_section_in_case_file() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## People",
"",
]
.join("\n");
let errs = parse(&input).unwrap_err();
assert!(
errs.iter()
.any(|e| e.message.contains("not allowed in case files"))
);
}
#[test]
fn parse_rejects_organizations_section_in_case_file() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## Organizations",
"",
]
.join("\n");
let errs = parse(&input).unwrap_err();
assert!(
errs.iter()
.any(|e| e.message.contains("not allowed in case files"))
);
}
#[test]
fn parse_entity_file_with_id() {
let input = [
"---",
"id: 01JXYZ123456789ABCDEFGHIJK",
"---",
"",
"# Mark Bonnick",
"",
"- qualifier: Arsenal Kit Manager",
"- nationality: British",
"",
]
.join("\n");
let result = parse_entity_file(&input).unwrap();
assert_eq!(result.id.as_deref(), Some("01JXYZ123456789ABCDEFGHIJK"));
assert_eq!(result.name, "Mark Bonnick");
assert!(result.body.contains("- qualifier: Arsenal Kit Manager"));
assert!(result.body.contains("- nationality: British"));
}
#[test]
fn parse_entity_file_without_id() {
let input = [
"---",
"---",
"",
"# Arsenal FC",
"",
"- qualifier: English Football Club",
"- org_type: sports_club",
"",
]
.join("\n");
let result = parse_entity_file(&input).unwrap();
assert!(result.id.is_none());
assert_eq!(result.name, "Arsenal FC");
}
#[test]
fn parse_entity_file_no_front_matter() {
let input = ["# Bob Smith", "", "- nationality: Dutch", ""].join("\n");
let result = parse_entity_file(&input).unwrap();
assert!(result.id.is_none());
assert_eq!(result.name, "Bob Smith");
assert!(result.body.contains("- nationality: Dutch"));
}
#[test]
fn parse_entity_file_rejects_h2_sections() {
let input = [
"---",
"---",
"",
"# Test Entity",
"",
"## Relationships",
"",
]
.join("\n");
let errs = parse_entity_file(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("H2 sections")));
}
#[test]
fn parse_entity_file_missing_h1() {
let input = ["---", "---", "", "- nationality: Dutch", ""].join("\n");
let errs = parse_entity_file(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("missing H1")));
}
#[test]
fn parse_related_cases_section() {
let input = [
"---",
"tags: [bribery]",
"sources:",
" - https://example.com",
"---",
"",
"# Test Case",
"",
"Summary text.",
"",
"## Related Cases",
"",
"- id/corruption/2002/blbi-liquidity-aid-scandal",
" description: Artalyta bribed Urip to influence the BLBI investigation",
"- id/corruption/2008/another-case",
" description: A second related case",
]
.join("\n");
let case = parse(&input).unwrap_or_else(|errs| {
panic!(
"parse failed: {}",
errs.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join("; ")
);
});
assert_eq!(case.related_cases.len(), 2);
assert_eq!(
case.related_cases[0].case_path,
"id/corruption/2002/blbi-liquidity-aid-scandal"
);
assert_eq!(
case.related_cases[0].description,
"Artalyta bribed Urip to influence the BLBI investigation"
);
assert_eq!(
case.related_cases[1].case_path,
"id/corruption/2008/another-case"
);
assert_eq!(case.related_cases[1].description, "A second related case");
assert!(
!case
.sections
.iter()
.any(|s| s.kind == SectionKind::RelatedCases)
);
}
#[test]
fn parse_related_cases_empty_path() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## Related Cases",
"",
"- ",
" description: Some description",
]
.join("\n");
let errs = parse(&input).unwrap_err();
assert!(
errs.iter()
.any(|e| e.message.contains("case path must not be empty"))
);
}
#[test]
fn parse_related_cases_missing_description() {
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## Related Cases",
"",
"- id/corruption/2002/some-case",
]
.join("\n");
let errs = parse(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("description")));
}
#[test]
fn parse_related_cases_description_too_long() {
let long_desc = "x".repeat(501);
let input = [
"---",
"sources: []",
"---",
"",
"# Title",
"",
"## Related Cases",
"",
"- id/corruption/2002/some-case",
&format!(" description: {long_desc}"),
]
.join("\n");
let errs = parse(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("exceeds 500")));
}
#[test]
fn parse_related_cases_too_many() {
let mut lines = vec![
"---".to_string(),
"sources: []".to_string(),
"---".to_string(),
String::new(),
"# Title".to_string(),
String::new(),
"## Related Cases".to_string(),
String::new(),
];
for i in 0..11 {
lines.push(format!("- id/corruption/2002/case-{i}"));
lines.push(format!(" description: Description {i}"));
}
let input = lines.join("\n");
let errs = parse(&input).unwrap_err();
assert!(errs.iter().any(|e| e.message.contains("exceeds 10")));
}
}