use std::fmt;
use crate::parser::{ParseError, SectionKind};
const MAX_ENTITIES_PER_FILE: usize = 50;
const MAX_NAME_LEN: usize = 300;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Label {
Person,
Organization,
Event,
Document,
Asset,
}
impl fmt::Display for Label {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Person => write!(f, "person"),
Self::Organization => write!(f, "organization"),
Self::Event => write!(f, "event"),
Self::Document => write!(f, "document"),
Self::Asset => write!(f, "asset"),
}
}
}
impl Label {
pub fn from_section(kind: SectionKind) -> Option<Self> {
match kind {
SectionKind::People => Some(Self::Person),
SectionKind::Organizations => Some(Self::Organization),
SectionKind::Events => Some(Self::Event),
SectionKind::Documents => Some(Self::Document),
SectionKind::Assets => Some(Self::Asset),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub struct Entity {
pub name: String,
pub label: Label,
pub fields: Vec<(String, FieldValue)>,
pub id: Option<String>,
pub line: usize,
pub tags: Vec<String>,
pub slug: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FieldValue {
Single(String),
List(Vec<String>),
}
pub fn parse_entity_file_body(
name: &str,
body: &str,
label: Label,
id: Option<String>,
title_line: usize,
errors: &mut Vec<ParseError>,
) -> Entity {
let section_kind = match label {
Label::Person => SectionKind::People,
Label::Organization => SectionKind::Organizations,
Label::Event => SectionKind::Events,
Label::Document => SectionKind::Documents,
Label::Asset => SectionKind::Assets,
};
let wrapped = format!("### {name}\n{body}");
let mut entities = parse_entities(&wrapped, section_kind, title_line.saturating_sub(1), errors);
if let Some(mut entity) = entities.pop() {
entity.id = id;
entity.line = title_line;
entity
} else {
Entity {
name: name.to_string(),
label,
fields: Vec::new(),
id,
line: title_line,
tags: Vec::new(),
slug: None,
}
}
}
#[allow(clippy::too_many_lines)]
pub fn parse_entities(
body: &str,
section_kind: SectionKind,
section_start_line: usize,
errors: &mut Vec<ParseError>,
) -> Vec<Entity> {
let Some(label) = Label::from_section(section_kind) else {
return Vec::new();
};
let lines: Vec<&str> = body.lines().collect();
let mut entities: Vec<Entity> = Vec::new();
let mut current_name: Option<String> = None;
let mut current_line: usize = 0;
let mut current_fields: Vec<(String, FieldValue)> = Vec::new();
let mut pending_list_key: Option<String> = None;
let mut pending_list_items: Vec<String> = Vec::new();
for (i, line) in lines.iter().enumerate() {
let file_line = section_start_line + 1 + i;
if let Some(name) = strip_h3(line) {
flush_pending_list(
&mut pending_list_key,
&mut pending_list_items,
&mut current_fields,
);
if let Some(entity_name) = current_name.take() {
let entity = build_entity(
entity_name,
label,
current_line,
&mut current_fields,
errors,
);
entities.push(entity);
}
current_name = Some(name.to_string());
current_line = file_line;
current_fields.clear();
continue;
}
if current_name.is_none() {
if !line.trim().is_empty() {
errors.push(ParseError {
line: file_line,
message: "content before first entity heading (### Name)".into(),
});
}
continue;
}
let trimmed = line.trim();
if let Some(item) = trimmed.strip_prefix("- ") {
if line.starts_with(" - ") && pending_list_key.is_some() {
pending_list_items.push(item.trim().to_string());
continue;
}
flush_pending_list(
&mut pending_list_key,
&mut pending_list_items,
&mut current_fields,
);
if let Some((key, value)) = parse_bullet(item) {
if value.is_empty() {
pending_list_key = Some(key);
pending_list_items.clear();
} else if is_list_field(&key) && value.contains(',') {
let items: Vec<String> = value
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
current_fields.push((key, FieldValue::List(items)));
} else {
current_fields.push((key, FieldValue::Single(value)));
}
} else {
errors.push(ParseError {
line: file_line,
message: format!(
"invalid field syntax: expected `- key: value`, got {trimmed:?}"
),
});
}
continue;
}
if line.starts_with(" ") && !trimmed.is_empty() && !trimmed.starts_with('-') {
if pending_list_key.is_some() {
errors.push(ParseError {
line: file_line,
message: "unexpected indented text in list context".into(),
});
} else if let Some(last) = current_fields.last_mut() {
match last.1 {
FieldValue::Single(ref mut val) => {
val.push('\n');
val.push_str(trimmed);
}
FieldValue::List(ref mut items) => {
let tail = items.pop().unwrap_or_default();
let joined = if tail.is_empty() {
trimmed.to_string()
} else {
format!("{tail} {trimmed}")
};
for part in joined.split(',') {
let part = part.trim().to_string();
if !part.is_empty() {
items.push(part);
}
}
}
}
}
continue;
}
if !trimmed.is_empty() {
flush_pending_list(
&mut pending_list_key,
&mut pending_list_items,
&mut current_fields,
);
}
}
flush_pending_list(
&mut pending_list_key,
&mut pending_list_items,
&mut current_fields,
);
if let Some(entity_name) = current_name.take() {
let entity = build_entity(
entity_name,
label,
current_line,
&mut current_fields,
errors,
);
entities.push(entity);
}
if entities.len() > MAX_ENTITIES_PER_FILE {
errors.push(ParseError {
line: section_start_line,
message: format!(
"too many entities in section (max {MAX_ENTITIES_PER_FILE}, got {})",
entities.len()
),
});
}
entities
}
fn flush_pending_list(
pending_key: &mut Option<String>,
pending_items: &mut Vec<String>,
fields: &mut Vec<(String, FieldValue)>,
) {
if let Some(key) = pending_key.take() {
fields.push((key, FieldValue::List(std::mem::take(pending_items))));
}
}
fn build_entity(
name: String,
label: Label,
line: usize,
fields: &mut Vec<(String, FieldValue)>,
errors: &mut Vec<ParseError>,
) -> Entity {
if name.trim().is_empty() {
errors.push(ParseError {
line,
message: "entity name must not be empty".into(),
});
} else if name.len() > MAX_NAME_LEN {
errors.push(ParseError {
line,
message: format!(
"entity name exceeds {MAX_NAME_LEN} chars (got {})",
name.len()
),
});
}
let id = extract_id_field(fields);
apply_type_shorthand(fields, label);
normalize_enum_fields(fields);
validate_fields(fields, label, line, errors);
Entity {
name,
label,
fields: std::mem::take(fields),
id,
line,
tags: Vec::new(),
slug: None,
}
}
fn extract_id_field(fields: &mut Vec<(String, FieldValue)>) -> Option<String> {
let pos = fields.iter().position(|(k, _)| k == "id")?;
let (_, value) = fields.remove(pos);
match value {
FieldValue::Single(s) if !s.is_empty() => Some(s),
_ => None,
}
}
fn apply_type_shorthand(fields: &mut [(String, FieldValue)], label: Label) {
for field in fields.iter_mut() {
if field.0 == "type" {
field.0 = match label {
Label::Organization => "org_type".to_string(),
Label::Event => "event_type".to_string(),
Label::Document => "doc_type".to_string(),
Label::Asset => "asset_type".to_string(),
Label::Person => "type".to_string(), };
}
}
}
fn parse_bullet(item: &str) -> Option<(String, String)> {
let colon_pos = item.find(':')?;
let key = item[..colon_pos].trim();
if key.is_empty() {
return None;
}
let value = item[colon_pos + 1..].trim();
Some((key.to_string(), value.to_string()))
}
fn is_list_field(key: &str) -> bool {
matches!(key, "aliases" | "urls" | "role")
}
fn strip_h3(line: &str) -> Option<&str> {
let trimmed = line.trim_start();
if let Some(rest) = trimmed.strip_prefix("### ") {
if !rest.starts_with('#') {
return Some(rest.trim());
}
}
None
}
const COMMON_FIELDS: &[&str] = &[
"qualifier",
"aliases",
"thumbnail",
"thumbnail_source",
"urls",
"description",
];
const PERSON_FIELDS: &[&str] = &[
"role",
"nationality",
"date_of_birth",
"place_of_birth",
"status",
];
const ORGANIZATION_FIELDS: &[&str] = &[
"org_type",
"jurisdiction",
"headquarters",
"founded_date",
"registration_number",
"status",
];
const EVENT_FIELDS: &[&str] = &["event_type", "occurred_at", "jurisdiction", "severity"];
const DOCUMENT_FIELDS: &[&str] = &["doc_type", "issued_at", "issuing_authority", "case_number"];
const ASSET_FIELDS: &[&str] = &["asset_type", "value", "status"];
use crate::domain;
const ROLE_VALUES: &[&str] = domain::Role::KNOWN;
const ORG_TYPE_VALUES: &[&str] = domain::OrgType::KNOWN;
const EVENT_TYPE_VALUES: &[&str] = domain::EventType::KNOWN;
const DOC_TYPE_VALUES: &[&str] = domain::DocType::KNOWN;
const ASSET_TYPE_VALUES: &[&str] = domain::AssetType::KNOWN;
const SEVERITY_VALUES: &[&str] = domain::Severity::KNOWN;
const PERSON_STATUS_VALUES: &[&str] = domain::PersonStatus::KNOWN;
const ORG_STATUS_VALUES: &[&str] = domain::OrgStatus::KNOWN;
const ASSET_STATUS_VALUES: &[&str] = domain::AssetStatus::KNOWN;
struct FieldConstraint {
max_len: usize,
enum_values: Option<&'static [&'static str]>,
}
fn field_constraint(key: &str) -> Option<FieldConstraint> {
match key {
"description" => Some(FieldConstraint {
max_len: 2000,
enum_values: None,
}),
"thumbnail" | "thumbnail_source" => Some(FieldConstraint {
max_len: 2048,
enum_values: None,
}),
"occurred_at" | "date_of_birth" | "founded_date" | "issued_at" | "opened_at"
| "closed_at" => Some(FieldConstraint {
max_len: 10,
enum_values: None,
}),
"place_of_birth" | "headquarters" | "issuing_authority" | "value" => {
Some(FieldConstraint {
max_len: 200,
enum_values: None,
})
}
"jurisdiction" => Some(FieldConstraint {
max_len: 203, enum_values: None,
}),
"role" => Some(FieldConstraint {
max_len: 100,
enum_values: Some(ROLE_VALUES),
}),
"org_type" => Some(FieldConstraint {
max_len: 100,
enum_values: Some(ORG_TYPE_VALUES),
}),
"event_type" => Some(FieldConstraint {
max_len: 100,
enum_values: Some(EVENT_TYPE_VALUES),
}),
"doc_type" => Some(FieldConstraint {
max_len: 100,
enum_values: Some(DOC_TYPE_VALUES),
}),
"asset_type" => Some(FieldConstraint {
max_len: 100,
enum_values: Some(ASSET_TYPE_VALUES),
}),
"severity" => Some(FieldConstraint {
max_len: 20,
enum_values: Some(SEVERITY_VALUES),
}),
"status" => Some(FieldConstraint {
max_len: 30,
enum_values: None,
}),
"qualifier" | "nationality" | "case_number" | "registration_number" => {
Some(FieldConstraint {
max_len: 100,
enum_values: None,
})
}
_ => None,
}
}
const MAX_ALIASES: usize = 10;
const MAX_ALIAS_LEN: usize = 200;
const MAX_URLS: usize = 10;
const MAX_URL_LEN: usize = 2048;
fn normalize_enum_fields(fields: &mut [(String, FieldValue)]) {
for (key, value) in fields.iter_mut() {
let is_enum = field_constraint(key).and_then(|c| c.enum_values).is_some();
match value {
FieldValue::Single(val) if is_enum && !val.starts_with("custom:") => {
let normalized = val.to_lowercase().replace(' ', "_");
if normalized != *val {
*val = normalized;
}
}
FieldValue::List(items) if is_enum => {
for item in items.iter_mut() {
if !item.starts_with("custom:") {
let normalized = item.to_lowercase().replace(' ', "_");
if normalized != *item {
*item = normalized;
}
}
}
}
_ => {}
}
}
}
fn validate_fields(
fields: &[(String, FieldValue)],
label: Label,
line: usize,
errors: &mut Vec<ParseError>,
) {
let label_fields: &[&str] = match label {
Label::Person => PERSON_FIELDS,
Label::Organization => ORGANIZATION_FIELDS,
Label::Event => EVENT_FIELDS,
Label::Document => DOCUMENT_FIELDS,
Label::Asset => ASSET_FIELDS,
};
for (key, value) in fields {
if !COMMON_FIELDS.contains(&key.as_str()) && !label_fields.contains(&key.as_str()) {
errors.push(ParseError {
line,
message: format!("unknown field {key:?} for {label}"),
});
continue;
}
match value {
FieldValue::Single(val) => validate_single_field(key, val, label, line, errors),
FieldValue::List(items) => validate_list_field(key, items, line, errors),
}
}
if label == Label::Organization && !fields.iter().any(|(k, _)| k == "org_type") {
errors.push(ParseError {
line,
message: "organization entity missing required field \"org_type\"".into(),
});
}
}
fn validate_single_field(
key: &str,
val: &str,
label: Label,
line: usize,
errors: &mut Vec<ParseError>,
) {
if let Some(constraint) = field_constraint(key) {
if val.len() > constraint.max_len {
errors.push(ParseError {
line,
message: format!(
"field {key:?} exceeds {} chars (got {})",
constraint.max_len,
val.len()
),
});
}
if let Some(allowed) = constraint.enum_values {
validate_enum_value(key, val, allowed, line, errors);
}
if matches!(
key,
"occurred_at"
| "date_of_birth"
| "founded_date"
| "issued_at"
| "opened_at"
| "closed_at"
) && !val.is_empty()
{
validate_date_format(key, val, line, errors);
}
if matches!(key, "thumbnail" | "thumbnail_source")
&& !val.is_empty()
&& !val.starts_with("https://")
{
errors.push(ParseError {
line,
message: format!("field {key:?} must be HTTPS URL"),
});
}
}
if key == "status" {
validate_status(val, label, line, errors);
}
if key == "jurisdiction" && !val.is_empty() {
validate_jurisdiction(val, line, errors);
}
if key == "value" && !val.is_empty() {
validate_money(val, line, errors);
}
}
fn validate_list_field(key: &str, items: &[String], line: usize, errors: &mut Vec<ParseError>) {
match key {
"aliases" => {
if items.len() > MAX_ALIASES {
errors.push(ParseError {
line,
message: format!(
"aliases exceeds {MAX_ALIASES} items (got {})",
items.len()
),
});
}
for item in items {
if item.len() > MAX_ALIAS_LEN {
errors.push(ParseError {
line,
message: format!("alias exceeds {MAX_ALIAS_LEN} chars: {item:?}"),
});
}
}
}
"urls" => {
if items.len() > MAX_URLS {
errors.push(ParseError {
line,
message: format!("urls exceeds {MAX_URLS} items (got {})", items.len()),
});
}
for item in items {
if item.len() > MAX_URL_LEN {
errors.push(ParseError {
line,
message: format!("url exceeds {MAX_URL_LEN} chars: {item:?}"),
});
}
if !item.starts_with("https://") {
errors.push(ParseError {
line,
message: format!("url must be HTTPS: {item:?}"),
});
}
}
}
"role" => {
if items.len() > MAX_ROLES {
errors.push(ParseError {
line,
message: format!("role exceeds {MAX_ROLES} items (got {})", items.len()),
});
}
for item in items {
validate_enum_value("role", item, ROLE_VALUES, line, errors);
}
}
_ => {}
}
}
const MAX_ROLES: usize = 10;
fn validate_status(value: &str, label: Label, line: usize, errors: &mut Vec<ParseError>) {
let allowed: &[&str] = match label {
Label::Person => PERSON_STATUS_VALUES,
Label::Organization => ORG_STATUS_VALUES,
Label::Asset => ASSET_STATUS_VALUES,
_ => {
errors.push(ParseError {
line,
message: format!("field \"status\" is not valid for {label}"),
});
return;
}
};
let normalized = value.to_lowercase().replace(' ', "_");
if !allowed.contains(&normalized.as_str()) {
errors.push(ParseError {
line,
message: format!(
"invalid status {value:?} for {label} (known: {})",
allowed.join(", ")
),
});
}
}
fn validate_jurisdiction(value: &str, line: usize, errors: &mut Vec<ParseError>) {
if let Some(slash_pos) = value.find('/') {
let country = &value[..slash_pos];
let subdivision = &value[slash_pos + 1..];
if country.len() != 2 || !country.chars().all(|c| c.is_ascii_uppercase()) {
errors.push(ParseError {
line,
message: format!(
"jurisdiction country must be 2-letter uppercase ISO code, got {country:?}"
),
});
}
if subdivision.is_empty() || subdivision.len() > domain::MAX_SUBDIVISION_LEN {
errors.push(ParseError {
line,
message: format!(
"jurisdiction subdivision must be 1-{} chars",
domain::MAX_SUBDIVISION_LEN
),
});
}
} else {
if value.len() != 2 || !value.chars().all(|c| c.is_ascii_uppercase()) {
errors.push(ParseError {
line,
message: format!(
"jurisdiction must be 2-letter uppercase ISO code or CODE/Subdivision, got {value:?}"
),
});
}
}
}
fn validate_money(value: &str, line: usize, errors: &mut Vec<ParseError>) {
let parts: Vec<&str> = value.splitn(3, ' ').collect();
if parts.len() < 3 {
errors.push(ParseError {
line,
message: format!(
"invalid money format: expected `amount currency \"display\"`, got {value:?}"
),
});
return;
}
if parts[0].parse::<i64>().is_err() {
errors.push(ParseError {
line,
message: format!("money amount must be an integer, got {:?}", parts[0]),
});
}
let currency = parts[1];
if currency.len() != 3 || !currency.chars().all(|c| c.is_ascii_uppercase()) {
errors.push(ParseError {
line,
message: format!(
"money currency must be 3-letter uppercase ISO code, got {currency:?}"
),
});
}
let display = parts[2];
if !display.starts_with('"') || !display.ends_with('"') {
errors.push(ParseError {
line,
message: format!("money display must be quoted, got {display:?}"),
});
} else {
let inner = &display[1..display.len() - 1];
if inner.len() > domain::MAX_MONEY_DISPLAY_LEN {
errors.push(ParseError {
line,
message: format!(
"money display exceeds {} chars (got {})",
domain::MAX_MONEY_DISPLAY_LEN,
inner.len()
),
});
}
}
}
fn validate_enum_value(
key: &str,
value: &str,
allowed: &[&str],
line: usize,
errors: &mut Vec<ParseError>,
) {
if let Some(custom) = value.strip_prefix("custom:") {
if custom.is_empty() || custom.len() > 100 {
errors.push(ParseError {
line,
message: format!(
"field {key:?} custom value must be 1-100 chars, got {}",
custom.len()
),
});
}
return;
}
let normalized = value.to_lowercase().replace(' ', "_");
if !allowed.contains(&normalized.as_str()) {
errors.push(ParseError {
line,
message: format!(
"invalid {key} value {value:?} (known: {}; use \"custom:Value\" for custom)",
allowed.join(", ")
),
});
}
}
fn validate_date_format(key: &str, value: &str, line: usize, errors: &mut Vec<ParseError>) {
let valid = matches!(value.len(), 4 | 7 | 10)
&& value.chars().enumerate().all(|(i, c)| match i {
4 | 7 => c == '-',
_ => c.is_ascii_digit(),
});
if !valid {
errors.push(ParseError {
line,
message: format!("field {key:?} must be YYYY, YYYY-MM, or YYYY-MM-DD, got {value:?}"),
});
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_person_entity() {
let body = [
"",
"### Mark Bonnick",
"- qualifier: Arsenal Kit Manager",
"- nationality: GB",
"- role: custom:Kit Manager",
"- date_of_birth: 1962",
"- description: Academy kit manager at Arsenal FC for 22 years",
" (2001-2024). Age 62 at time of dismissal.",
"",
]
.join("\n");
let mut errors = Vec::new();
let entities = parse_entities(&body, SectionKind::People, 10, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
assert_eq!(entities.len(), 1);
let e = &entities[0];
assert_eq!(e.name, "Mark Bonnick");
assert_eq!(e.label, Label::Person);
assert_eq!(e.fields.len(), 5);
let desc = e
.fields
.iter()
.find(|(k, _)| k == "description")
.map(|(_, v)| v);
assert_eq!(
desc,
Some(&FieldValue::Single(
"Academy kit manager at Arsenal FC for 22 years\n(2001-2024). Age 62 at time of dismissal.".into()
))
);
}
#[test]
fn parse_person_with_role_list() {
let body = "### Test\n- role: politician, executive\n";
let mut errors = Vec::new();
let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
let roles = entities[0].fields.iter().find(|(k, _)| k == "role");
assert_eq!(
roles.map(|(_, v)| v),
Some(&FieldValue::List(vec![
"politician".into(),
"executive".into(),
]))
);
}
#[test]
fn parse_person_with_status() {
let body = "### Test\n- status: imprisoned\n";
let mut errors = Vec::new();
let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
}
#[test]
fn reject_invalid_person_status() {
let body = "### Test\n- status: unknown_status\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("invalid status")));
}
#[test]
fn parse_organization_with_type_shorthand() {
let body = [
"",
"### Arsenal FC",
"- type: sports_club",
"- jurisdiction: GB",
"- aliases: Arsenal, The Gunners, Arsenal Football Club",
"- urls:",
" - https://www.arsenal.com",
" - https://en.wikipedia.org/wiki/Arsenal_F.C.",
"",
]
.join("\n");
let mut errors = Vec::new();
let entities = parse_entities(&body, SectionKind::Organizations, 20, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
assert_eq!(entities.len(), 1);
let e = &entities[0];
assert_eq!(e.name, "Arsenal FC");
assert_eq!(e.label, Label::Organization);
let it = e.fields.iter().find(|(k, _)| k == "org_type");
assert_eq!(
it.map(|(_, v)| v),
Some(&FieldValue::Single("sports_club".into()))
);
let aliases = e.fields.iter().find(|(k, _)| k == "aliases");
assert_eq!(
aliases.map(|(_, v)| v),
Some(&FieldValue::List(vec![
"Arsenal".into(),
"The Gunners".into(),
"Arsenal Football Club".into(),
]))
);
let urls = e.fields.iter().find(|(k, _)| k == "urls");
assert_eq!(
urls.map(|(_, v)| v),
Some(&FieldValue::List(vec![
"https://www.arsenal.com".into(),
"https://en.wikipedia.org/wiki/Arsenal_F.C.".into(),
]))
);
}
#[test]
fn parse_organization_with_jurisdiction_subdivision() {
let body = "### Pemkab Bogor\n- org_type: local_government\n- jurisdiction: ID/West Java\n";
let mut errors = Vec::new();
let entities = parse_entities(body, SectionKind::Organizations, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
let j = entities[0].fields.iter().find(|(k, _)| k == "jurisdiction");
assert_eq!(
j.map(|(_, v)| v),
Some(&FieldValue::Single("ID/West Java".into()))
);
}
#[test]
fn reject_invalid_jurisdiction() {
let body = "### Test\n- org_type: corporation\n- jurisdiction: England\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::Organizations, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("jurisdiction")));
}
#[test]
fn parse_event_with_type_shorthand() {
let body = [
"",
"### Bonnick dismissal",
"- occurred_at: 2024-12-24",
"- type: dismissal",
"- description: Arsenal dismisses Bonnick.",
"",
]
.join("\n");
let mut errors = Vec::new();
let entities = parse_entities(&body, SectionKind::Events, 50, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
let e = &entities[0];
assert_eq!(e.label, Label::Event);
let dt = e.fields.iter().find(|(k, _)| k == "event_type");
assert_eq!(
dt.map(|(_, v)| v),
Some(&FieldValue::Single("dismissal".into()))
);
}
#[test]
fn parse_event_with_severity() {
let body =
"### Test event\n- event_type: bribery\n- severity: major\n- occurred_at: 2024-01-01\n";
let mut errors = Vec::new();
let entities = parse_entities(body, SectionKind::Events, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
}
#[test]
fn parse_document_entity() {
let body = [
"### Indictment No. 123",
"- doc_type: indictment",
"- issued_at: 2024-03-15",
"- issuing_authority: Jakarta District Court",
"- case_number: 123/Pid.B/2024/PN.Jkt.Pst",
]
.join("\n");
let mut errors = Vec::new();
let entities = parse_entities(&body, SectionKind::Documents, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
assert_eq!(entities.len(), 1);
assert_eq!(entities[0].label, Label::Document);
}
#[test]
fn parse_asset_entity() {
let body = "### Bribe payment\n- asset_type: cash\n- value: 500000000000 IDR \"Rp 500 billion\"\n- status: seized\n";
let mut errors = Vec::new();
let entities = parse_entities(body, SectionKind::Assets, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
assert_eq!(entities.len(), 1);
assert_eq!(entities[0].label, Label::Asset);
}
#[test]
fn reject_invalid_money_format() {
let body = "### Test\n- asset_type: cash\n- value: lots of money\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::Assets, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("money")));
}
#[test]
fn reject_unknown_field() {
let body = "### Test\n- foobar: value\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("unknown field")));
}
#[test]
fn reject_wrong_label_field() {
let body = "### Test\n- org_type: court\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("unknown field")));
}
#[test]
fn reject_invalid_enum_value() {
let body = "### Test\n- role: wizard\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("invalid role")));
}
#[test]
fn accept_custom_enum_value() {
let body = "### Test\n- role: custom:Kit Manager\n";
let mut errors = Vec::new();
let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
assert_eq!(entities.len(), 1);
}
#[test]
fn normalize_enum_value_spaces_to_underscores() {
let body = "### Test\n- role: civil servant\n";
let mut errors = Vec::new();
let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
assert_eq!(entities.len(), 1);
let val = entities[0]
.fields
.iter()
.find(|(k, _)| k == "role")
.map(|(_, v)| match v {
FieldValue::Single(s) => s.as_str(),
_ => "",
});
assert_eq!(val, Some("civil_servant"));
}
#[test]
fn normalize_enum_list_values() {
let body = "### Test\n- role: civil servant, law enforcement\n";
let mut errors = Vec::new();
let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
let roles = entities[0].fields.iter().find(|(k, _)| k == "role");
assert_eq!(
roles.map(|(_, v)| v),
Some(&FieldValue::List(vec![
"civil_servant".into(),
"law_enforcement".into(),
]))
);
}
#[test]
fn reject_invalid_date_format() {
let body = "### Test\n- date_of_birth: January 1990\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("YYYY")));
}
#[test]
fn accept_valid_date_formats() {
for date in &["2024", "2024-01", "2024-01-15"] {
let body = format!("### Test\n- date_of_birth: {date}\n");
let mut errors = Vec::new();
parse_entities(&body, SectionKind::People, 1, &mut errors);
assert!(
errors.is_empty(),
"date {date:?} should be valid: {errors:?}"
);
}
}
#[test]
fn reject_non_https_url() {
let body = "### Test\n- urls:\n - http://example.com\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
}
#[test]
fn reject_non_https_thumbnail() {
let body = "### Test\n- thumbnail: http://example.com/img.jpg\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::People, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
}
#[test]
fn multiple_entities() {
let body = [
"",
"### Alice",
"- nationality: NL",
"",
"### Bob",
"- nationality: GB",
"",
]
.join("\n");
let mut errors = Vec::new();
let entities = parse_entities(&body, SectionKind::People, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
assert_eq!(entities.len(), 2);
assert_eq!(entities[0].name, "Alice");
assert_eq!(entities[1].name, "Bob");
}
#[test]
fn field_max_length_violation() {
let long_val = "a".repeat(201);
let body = format!("### Test\n- nationality: {long_val}\n");
let mut errors = Vec::new();
parse_entities(&body, SectionKind::People, 1, &mut errors);
assert!(
errors
.iter()
.any(|e| e.message.contains("exceeds 100 chars"))
);
}
#[test]
fn too_many_aliases() {
let aliases: Vec<String> = (0..11).map(|i| format!("Alias{i}")).collect();
let body = format!("### Test\n- aliases: {}\n", aliases.join(", "));
let mut errors = Vec::new();
parse_entities(&body, SectionKind::People, 1, &mut errors);
assert!(errors.iter().any(|e| e.message.contains("exceeds 10")));
}
#[test]
fn require_org_type_for_organizations() {
let body = "### Test Corp\n- qualifier: Test\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::Organizations, 1, &mut errors);
assert!(
errors
.iter()
.any(|e| { e.message.contains("missing required field \"org_type\"") })
);
}
#[test]
fn accept_organization_with_type() {
let body = "### Test Corp\n- qualifier: Test\n- org_type: corporation\n";
let mut errors = Vec::new();
parse_entities(body, SectionKind::Organizations, 1, &mut errors);
assert!(errors.is_empty(), "errors: {errors:?}");
}
}