Skip to main content

weave_content/
entity.rs

1use std::fmt;
2
3use crate::parser::{ParseError, SectionKind};
4
5/// Maximum entities per file.
6const MAX_ENTITIES_PER_FILE: usize = 50;
7
8/// Maximum length of an entity name.
9const MAX_NAME_LEN: usize = 300;
10
11/// Label derived from the section an entity appears in.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Label {
14    Actor,
15    Institution,
16    PublicRecord,
17}
18
19impl fmt::Display for Label {
20    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21        match self {
22            Self::Actor => write!(f, "actor"),
23            Self::Institution => write!(f, "institution"),
24            Self::PublicRecord => write!(f, "public_record"),
25        }
26    }
27}
28
29impl Label {
30    pub fn from_section(kind: SectionKind) -> Option<Self> {
31        match kind {
32            SectionKind::Actors => Some(Self::Actor),
33            SectionKind::Institutions => Some(Self::Institution),
34            SectionKind::Events => Some(Self::PublicRecord),
35            _ => None,
36        }
37    }
38}
39
40/// A parsed entity with its name, label, and field map.
41#[derive(Debug, Clone)]
42pub struct Entity {
43    pub name: String,
44    pub label: Label,
45    pub fields: Vec<(String, FieldValue)>,
46    /// Stored NULID from `- id:` field (None if not yet generated).
47    pub id: Option<String>,
48    /// Line number (1-indexed) of the H3 heading.
49    pub line: usize,
50}
51
52/// A field value: either a single string or a list of strings.
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub enum FieldValue {
55    Single(String),
56    List(Vec<String>),
57}
58
59/// Parse a single entity from a standalone entity file body.
60/// The body is the text after the H1 heading (bullet fields, no H3 headings).
61/// `label` is determined by the file's directory (actors/ or institutions/).
62/// `id` comes from the front matter (may be None).
63pub fn parse_entity_file_body(
64    name: &str,
65    body: &str,
66    label: Label,
67    id: Option<String>,
68    title_line: usize,
69    errors: &mut Vec<ParseError>,
70) -> Entity {
71    let section_kind = match label {
72        Label::Actor => SectionKind::Actors,
73        Label::Institution => SectionKind::Institutions,
74        Label::PublicRecord => SectionKind::Events,
75    };
76
77    // Wrap the body with a fake H3 heading so we can reuse parse_entities
78    let wrapped = format!("### {name}\n{body}");
79    let mut entities = parse_entities(&wrapped, section_kind, title_line.saturating_sub(1), errors);
80
81    if let Some(mut entity) = entities.pop() {
82        entity.id = id;
83        entity.line = title_line;
84        entity
85    } else {
86        Entity {
87            name: name.to_string(),
88            label,
89            fields: Vec::new(),
90            id,
91            line: title_line,
92        }
93    }
94}
95
96/// Parse entities from an entity section (Actors, Institutions, Events).
97/// The `body` is the text between the H2 heading and the next H2 heading.
98/// `section_start_line` is the line number of the H2 heading in the original file.
99#[allow(clippy::too_many_lines)]
100pub fn parse_entities(
101    body: &str,
102    section_kind: SectionKind,
103    section_start_line: usize,
104    errors: &mut Vec<ParseError>,
105) -> Vec<Entity> {
106    let Some(label) = Label::from_section(section_kind) else {
107        return Vec::new();
108    };
109
110    let lines: Vec<&str> = body.lines().collect();
111    let mut entities: Vec<Entity> = Vec::new();
112    let mut current_name: Option<String> = None;
113    let mut current_line: usize = 0;
114    let mut current_fields: Vec<(String, FieldValue)> = Vec::new();
115    // Track multi-line value continuation and nested list building
116    let mut pending_list_key: Option<String> = None;
117    let mut pending_list_items: Vec<String> = Vec::new();
118
119    for (i, line) in lines.iter().enumerate() {
120        let file_line = section_start_line + 1 + i; // +1 because body starts after the H2 heading line
121
122        // Check for H3 heading
123        if let Some(name) = strip_h3(line) {
124            // Flush pending list
125            flush_pending_list(
126                &mut pending_list_key,
127                &mut pending_list_items,
128                &mut current_fields,
129            );
130
131            // Flush previous entity
132            if let Some(entity_name) = current_name.take() {
133                let entity = build_entity(
134                    entity_name,
135                    label,
136                    current_line,
137                    &mut current_fields,
138                    errors,
139                );
140                entities.push(entity);
141            }
142
143            current_name = Some(name.to_string());
144            current_line = file_line;
145            current_fields.clear();
146            continue;
147        }
148
149        // Only parse bullet fields if we're inside an entity (after an H3)
150        if current_name.is_none() {
151            if !line.trim().is_empty() {
152                errors.push(ParseError {
153                    line: file_line,
154                    message: "content before first entity heading (### Name)".into(),
155                });
156            }
157            continue;
158        }
159
160        let trimmed = line.trim();
161
162        // Nested list item: `  - value` (2-space indent + dash)
163        if let Some(item) = trimmed.strip_prefix("- ") {
164            if line.starts_with("  - ") && pending_list_key.is_some() {
165                // Nested list item for pending list key
166                pending_list_items.push(item.trim().to_string());
167                continue;
168            }
169
170            // Flush pending list before processing new top-level bullet
171            flush_pending_list(
172                &mut pending_list_key,
173                &mut pending_list_items,
174                &mut current_fields,
175            );
176
177            // Top-level bullet: `- key: value` or `- key:`
178            if let Some((key, value)) = parse_bullet(item) {
179                if value.is_empty() {
180                    // Start a nested list: `- urls:`
181                    pending_list_key = Some(key);
182                    pending_list_items.clear();
183                } else if is_list_field(&key) && value.contains(',') {
184                    // Comma-separated list: `- aliases: A, B, C`
185                    let items: Vec<String> = value
186                        .split(',')
187                        .map(|s| s.trim().to_string())
188                        .filter(|s| !s.is_empty())
189                        .collect();
190                    current_fields.push((key, FieldValue::List(items)));
191                } else {
192                    current_fields.push((key, FieldValue::Single(value)));
193                }
194            } else {
195                errors.push(ParseError {
196                    line: file_line,
197                    message: format!(
198                        "invalid field syntax: expected `- key: value`, got {trimmed:?}"
199                    ),
200                });
201            }
202            continue;
203        }
204
205        // Multi-line value continuation (2-space indent, not a bullet)
206        if line.starts_with("  ") && !trimmed.is_empty() && !trimmed.starts_with('-') {
207            if pending_list_key.is_some() {
208                // Could be continuation inside a list context -- treat as error
209                errors.push(ParseError {
210                    line: file_line,
211                    message: "unexpected indented text in list context".into(),
212                });
213            } else if let Some(last) = current_fields.last_mut() {
214                // Append to last single-value field
215                if let FieldValue::Single(ref mut val) = last.1 {
216                    val.push('\n');
217                    val.push_str(trimmed);
218                }
219            }
220            continue;
221        }
222
223        // Blank line or other content -- ignore
224        if !trimmed.is_empty() {
225            // Flush pending list on non-indented non-bullet content
226            flush_pending_list(
227                &mut pending_list_key,
228                &mut pending_list_items,
229                &mut current_fields,
230            );
231        }
232    }
233
234    // Flush final pending list and entity
235    flush_pending_list(
236        &mut pending_list_key,
237        &mut pending_list_items,
238        &mut current_fields,
239    );
240
241    if let Some(entity_name) = current_name.take() {
242        let entity = build_entity(
243            entity_name,
244            label,
245            current_line,
246            &mut current_fields,
247            errors,
248        );
249        entities.push(entity);
250    }
251
252    // Boundary check
253    if entities.len() > MAX_ENTITIES_PER_FILE {
254        errors.push(ParseError {
255            line: section_start_line,
256            message: format!(
257                "too many entities in section (max {MAX_ENTITIES_PER_FILE}, got {})",
258                entities.len()
259            ),
260        });
261    }
262
263    entities
264}
265
266fn flush_pending_list(
267    pending_key: &mut Option<String>,
268    pending_items: &mut Vec<String>,
269    fields: &mut Vec<(String, FieldValue)>,
270) {
271    if let Some(key) = pending_key.take() {
272        fields.push((key, FieldValue::List(std::mem::take(pending_items))));
273    }
274}
275
276fn build_entity(
277    name: String,
278    label: Label,
279    line: usize,
280    fields: &mut Vec<(String, FieldValue)>,
281    errors: &mut Vec<ParseError>,
282) -> Entity {
283    // Validate name
284    if name.trim().is_empty() {
285        errors.push(ParseError {
286            line,
287            message: "entity name must not be empty".into(),
288        });
289    } else if name.len() > MAX_NAME_LEN {
290        errors.push(ParseError {
291            line,
292            message: format!(
293                "entity name exceeds {MAX_NAME_LEN} chars (got {})",
294                name.len()
295            ),
296        });
297    }
298
299    // Extract id field before validation (not a schema field)
300    let id = extract_id_field(fields);
301
302    // Apply type: shorthand
303    apply_type_shorthand(fields, label);
304
305    // Validate fields against schema
306    validate_fields(fields, label, line, errors);
307
308    Entity {
309        name,
310        label,
311        fields: std::mem::take(fields),
312        id,
313        line,
314    }
315}
316
317/// Extract and remove the `id` field from the field list.
318fn extract_id_field(fields: &mut Vec<(String, FieldValue)>) -> Option<String> {
319    let pos = fields.iter().position(|(k, _)| k == "id")?;
320    let (_, value) = fields.remove(pos);
321    match value {
322        FieldValue::Single(s) if !s.is_empty() => Some(s),
323        _ => None,
324    }
325}
326
327/// Replace `type:` shorthand with the label-specific field name.
328fn apply_type_shorthand(fields: &mut [(String, FieldValue)], label: Label) {
329    for field in fields.iter_mut() {
330        if field.0 == "type" {
331            field.0 = match label {
332                Label::Institution => "institution_type".to_string(),
333                Label::PublicRecord => "document_type".to_string(),
334                Label::Actor => "type".to_string(), // will be caught as unknown
335            };
336        }
337    }
338}
339
340/// Parse `key: value` from a bullet item (after stripping `- `).
341fn parse_bullet(item: &str) -> Option<(String, String)> {
342    let colon_pos = item.find(':')?;
343    let key = item[..colon_pos].trim();
344    if key.is_empty() {
345        return None;
346    }
347    let value = item[colon_pos + 1..].trim();
348    Some((key.to_string(), value.to_string()))
349}
350
351/// Check if a field name is a list-type field.
352fn is_list_field(key: &str) -> bool {
353    matches!(key, "aliases" | "urls")
354}
355
356/// Strip an H3 heading prefix. Returns the heading text.
357fn strip_h3(line: &str) -> Option<&str> {
358    let trimmed = line.trim_start();
359    if let Some(rest) = trimmed.strip_prefix("### ") {
360        // Must not be H4+
361        if !rest.starts_with('#') {
362            return Some(rest.trim());
363        }
364    }
365    None
366}
367
368// --- Field validation ---
369
370/// Known fields per label (common + label-specific).
371const COMMON_FIELDS: &[&str] = &[
372    "qualifier",
373    "aliases",
374    "thumbnail",
375    "thumbnail_source",
376    "occurred_at",
377    "urls",
378    "description",
379];
380
381const ACTOR_FIELDS: &[&str] = &[
382    "date_of_birth",
383    "place_of_birth",
384    "nationality",
385    "occupation",
386];
387
388const INSTITUTION_FIELDS: &[&str] = &[
389    "institution_type",
390    "jurisdiction",
391    "headquarters",
392    "founded_date",
393    "registration_number",
394];
395
396const PUBLIC_RECORD_FIELDS: &[&str] = &[
397    "document_type",
398    "case_number",
399    "filing_date",
400    "issuing_authority",
401];
402
403/// Known enum values.
404const OCCUPATION_VALUES: &[&str] = &[
405    "politician",
406    "executive",
407    "journalist",
408    "lawyer",
409    "footballer",
410    "activist",
411    "civil_servant",
412    "military",
413    "academic",
414    "lobbyist",
415];
416
417const INSTITUTION_TYPE_VALUES: &[&str] = &[
418    "football_club",
419    "political_party",
420    "corporation",
421    "government_agency",
422    "court",
423    "law_enforcement",
424    "ngo",
425    "media",
426    "regulatory_body",
427    "military",
428    "university",
429    "trade_union",
430    "lobby_group",
431    "sports_body",
432];
433
434const DOCUMENT_TYPE_VALUES: &[&str] = &[
435    "court_ruling",
436    "criminal_charge",
437    "contract",
438    "legislation",
439    "filing",
440    "investigation",
441    "termination",
442    "transfer",
443    "election_result",
444    "financial_disclosure",
445    "sanctions",
446    "permit",
447    "audit_report",
448];
449
450/// Field max lengths.
451struct FieldConstraint {
452    max_len: usize,
453    /// If Some, the field is an enum with these known values.
454    enum_values: Option<&'static [&'static str]>,
455}
456
457fn field_constraint(key: &str) -> Option<FieldConstraint> {
458    match key {
459        "description" => Some(FieldConstraint {
460            max_len: 2000,
461            enum_values: None,
462        }),
463        "thumbnail" | "thumbnail_source" => Some(FieldConstraint {
464            max_len: 2048,
465            enum_values: None,
466        }),
467        "occurred_at" | "date_of_birth" | "founded_date" | "filing_date" => Some(FieldConstraint {
468            max_len: 10,
469            enum_values: None,
470        }),
471        "place_of_birth" | "jurisdiction" | "headquarters" | "issuing_authority" => {
472            Some(FieldConstraint {
473                max_len: 200,
474                enum_values: None,
475            })
476        }
477        "occupation" => Some(FieldConstraint {
478            max_len: 100,
479            enum_values: Some(OCCUPATION_VALUES),
480        }),
481        "institution_type" => Some(FieldConstraint {
482            max_len: 100,
483            enum_values: Some(INSTITUTION_TYPE_VALUES),
484        }),
485        "document_type" => Some(FieldConstraint {
486            max_len: 100,
487            enum_values: Some(DOCUMENT_TYPE_VALUES),
488        }),
489        "qualifier" | "nationality" | "case_number" | "registration_number" => {
490            Some(FieldConstraint {
491                max_len: 100,
492                enum_values: None,
493            })
494        }
495        // List fields validated separately
496        _ => None,
497    }
498}
499
500/// Maximum items in list fields.
501const MAX_ALIASES: usize = 10;
502const MAX_ALIAS_LEN: usize = 200;
503const MAX_URLS: usize = 10;
504const MAX_URL_LEN: usize = 2048;
505
506fn validate_fields(
507    fields: &[(String, FieldValue)],
508    label: Label,
509    line: usize,
510    errors: &mut Vec<ParseError>,
511) {
512    let label_fields: &[&str] = match label {
513        Label::Actor => ACTOR_FIELDS,
514        Label::Institution => INSTITUTION_FIELDS,
515        Label::PublicRecord => PUBLIC_RECORD_FIELDS,
516    };
517
518    for (key, value) in fields {
519        // Check if field is known
520        if !COMMON_FIELDS.contains(&key.as_str()) && !label_fields.contains(&key.as_str()) {
521            errors.push(ParseError {
522                line,
523                message: format!("unknown field {key:?} for {label}"),
524            });
525            continue;
526        }
527
528        match value {
529            FieldValue::Single(val) => {
530                if let Some(constraint) = field_constraint(key) {
531                    if val.len() > constraint.max_len {
532                        errors.push(ParseError {
533                            line,
534                            message: format!(
535                                "field {key:?} exceeds {} chars (got {})",
536                                constraint.max_len,
537                                val.len()
538                            ),
539                        });
540                    }
541
542                    // Validate enum values
543                    if let Some(allowed) = constraint.enum_values {
544                        validate_enum_value(key, val, allowed, line, errors);
545                    }
546
547                    // Validate date format
548                    if matches!(
549                        key.as_str(),
550                        "occurred_at" | "date_of_birth" | "founded_date" | "filing_date"
551                    ) && !val.is_empty()
552                    {
553                        validate_date_format(key, val, line, errors);
554                    }
555
556                    // Validate URL fields
557                    if matches!(key.as_str(), "thumbnail" | "thumbnail_source")
558                        && !val.is_empty()
559                        && !val.starts_with("https://")
560                    {
561                        errors.push(ParseError {
562                            line,
563                            message: format!("field {key:?} must be HTTPS URL"),
564                        });
565                    }
566                }
567            }
568            FieldValue::List(items) => match key.as_str() {
569                "aliases" => {
570                    if items.len() > MAX_ALIASES {
571                        errors.push(ParseError {
572                            line,
573                            message: format!(
574                                "aliases exceeds {MAX_ALIASES} items (got {})",
575                                items.len()
576                            ),
577                        });
578                    }
579                    for item in items {
580                        if item.len() > MAX_ALIAS_LEN {
581                            errors.push(ParseError {
582                                line,
583                                message: format!("alias exceeds {MAX_ALIAS_LEN} chars: {item:?}"),
584                            });
585                        }
586                    }
587                }
588                "urls" => {
589                    if items.len() > MAX_URLS {
590                        errors.push(ParseError {
591                            line,
592                            message: format!("urls exceeds {MAX_URLS} items (got {})", items.len()),
593                        });
594                    }
595                    for item in items {
596                        if item.len() > MAX_URL_LEN {
597                            errors.push(ParseError {
598                                line,
599                                message: format!("url exceeds {MAX_URL_LEN} chars: {item:?}"),
600                            });
601                        }
602                        if !item.starts_with("https://") {
603                            errors.push(ParseError {
604                                line,
605                                message: format!("url must be HTTPS: {item:?}"),
606                            });
607                        }
608                    }
609                }
610                _ => {}
611            },
612        }
613    }
614}
615
616fn validate_enum_value(
617    key: &str,
618    value: &str,
619    allowed: &[&str],
620    line: usize,
621    errors: &mut Vec<ParseError>,
622) {
623    // custom: prefix is always valid (if non-empty after prefix, max 100 chars)
624    if let Some(custom) = value.strip_prefix("custom:") {
625        if custom.is_empty() || custom.len() > 100 {
626            errors.push(ParseError {
627                line,
628                message: format!(
629                    "field {key:?} custom value must be 1-100 chars, got {}",
630                    custom.len()
631                ),
632            });
633        }
634        return;
635    }
636
637    let normalized = value.to_lowercase().replace(' ', "_");
638    if !allowed.contains(&normalized.as_str()) {
639        errors.push(ParseError {
640            line,
641            message: format!(
642                "invalid {key} value {value:?} (known: {}; use \"custom:Value\" for custom)",
643                allowed.join(", ")
644            ),
645        });
646    }
647}
648
649fn validate_date_format(key: &str, value: &str, line: usize, errors: &mut Vec<ParseError>) {
650    // Valid formats: YYYY, YYYY-MM, YYYY-MM-DD
651    let valid = matches!(value.len(), 4 | 7 | 10)
652        && value.chars().enumerate().all(|(i, c)| match i {
653            4 | 7 => c == '-',
654            _ => c.is_ascii_digit(),
655        });
656
657    if !valid {
658        errors.push(ParseError {
659            line,
660            message: format!("field {key:?} must be YYYY, YYYY-MM, or YYYY-MM-DD, got {value:?}"),
661        });
662    }
663}
664
665#[cfg(test)]
666mod tests {
667    use super::*;
668
669    #[test]
670    fn parse_actor_entity() {
671        let body = [
672            "",
673            "### Mark Bonnick",
674            "- qualifier: Arsenal Kit Manager",
675            "- nationality: British",
676            "- occupation: custom:Kit Manager",
677            "- date_of_birth: 1962",
678            "- description: Academy kit manager at Arsenal FC for 22 years",
679            "  (2001-2024). Age 62 at time of dismissal.",
680            "",
681        ]
682        .join("\n");
683
684        let mut errors = Vec::new();
685        let entities = parse_entities(&body, SectionKind::Actors, 10, &mut errors);
686        assert!(errors.is_empty(), "errors: {errors:?}");
687        assert_eq!(entities.len(), 1);
688
689        let e = &entities[0];
690        assert_eq!(e.name, "Mark Bonnick");
691        assert_eq!(e.label, Label::Actor);
692        assert_eq!(e.fields.len(), 5);
693
694        // Check multi-line description
695        let desc = e
696            .fields
697            .iter()
698            .find(|(k, _)| k == "description")
699            .map(|(_, v)| v);
700        assert_eq!(
701            desc,
702            Some(&FieldValue::Single(
703                "Academy kit manager at Arsenal FC for 22 years\n(2001-2024). Age 62 at time of dismissal.".into()
704            ))
705        );
706    }
707
708    #[test]
709    fn parse_institution_with_type_shorthand() {
710        let body = [
711            "",
712            "### Arsenal FC",
713            "- type: football_club",
714            "- jurisdiction: England",
715            "- aliases: Arsenal, The Gunners, Arsenal Football Club",
716            "- urls:",
717            "  - https://www.arsenal.com",
718            "  - https://en.wikipedia.org/wiki/Arsenal_F.C.",
719            "",
720        ]
721        .join("\n");
722
723        let mut errors = Vec::new();
724        let entities = parse_entities(&body, SectionKind::Institutions, 20, &mut errors);
725        assert!(errors.is_empty(), "errors: {errors:?}");
726        assert_eq!(entities.len(), 1);
727
728        let e = &entities[0];
729        assert_eq!(e.name, "Arsenal FC");
730        assert_eq!(e.label, Label::Institution);
731
732        // type: should have been expanded to institution_type:
733        let it = e.fields.iter().find(|(k, _)| k == "institution_type");
734        assert_eq!(
735            it.map(|(_, v)| v),
736            Some(&FieldValue::Single("football_club".into()))
737        );
738
739        // aliases as comma-separated
740        let aliases = e.fields.iter().find(|(k, _)| k == "aliases");
741        assert_eq!(
742            aliases.map(|(_, v)| v),
743            Some(&FieldValue::List(vec![
744                "Arsenal".into(),
745                "The Gunners".into(),
746                "Arsenal Football Club".into(),
747            ]))
748        );
749
750        // urls as nested list
751        let urls = e.fields.iter().find(|(k, _)| k == "urls");
752        assert_eq!(
753            urls.map(|(_, v)| v),
754            Some(&FieldValue::List(vec![
755                "https://www.arsenal.com".into(),
756                "https://en.wikipedia.org/wiki/Arsenal_F.C.".into(),
757            ]))
758        );
759    }
760
761    #[test]
762    fn parse_event_with_type_shorthand() {
763        let body = [
764            "",
765            "### Bonnick dismissal",
766            "- occurred_at: 2024-12-24",
767            "- type: termination",
768            "- description: Arsenal dismisses Bonnick.",
769            "",
770        ]
771        .join("\n");
772
773        let mut errors = Vec::new();
774        let entities = parse_entities(&body, SectionKind::Events, 50, &mut errors);
775        assert!(errors.is_empty(), "errors: {errors:?}");
776
777        let e = &entities[0];
778        assert_eq!(e.label, Label::PublicRecord);
779        let dt = e.fields.iter().find(|(k, _)| k == "document_type");
780        assert_eq!(
781            dt.map(|(_, v)| v),
782            Some(&FieldValue::Single("termination".into()))
783        );
784    }
785
786    #[test]
787    fn reject_unknown_field() {
788        let body = "### Test\n- foobar: value\n";
789        let mut errors = Vec::new();
790        parse_entities(body, SectionKind::Actors, 1, &mut errors);
791        assert!(errors.iter().any(|e| e.message.contains("unknown field")));
792    }
793
794    #[test]
795    fn reject_wrong_label_field() {
796        // institution_type on an actor
797        let body = "### Test\n- institution_type: court\n";
798        let mut errors = Vec::new();
799        parse_entities(body, SectionKind::Actors, 1, &mut errors);
800        assert!(errors.iter().any(|e| e.message.contains("unknown field")));
801    }
802
803    #[test]
804    fn reject_invalid_enum_value() {
805        let body = "### Test\n- occupation: wizard\n";
806        let mut errors = Vec::new();
807        parse_entities(body, SectionKind::Actors, 1, &mut errors);
808        assert!(
809            errors
810                .iter()
811                .any(|e| e.message.contains("invalid occupation"))
812        );
813    }
814
815    #[test]
816    fn accept_custom_enum_value() {
817        let body = "### Test\n- occupation: custom:Kit Manager\n";
818        let mut errors = Vec::new();
819        let entities = parse_entities(body, SectionKind::Actors, 1, &mut errors);
820        assert!(errors.is_empty(), "errors: {errors:?}");
821        assert_eq!(entities.len(), 1);
822    }
823
824    #[test]
825    fn reject_invalid_date_format() {
826        let body = "### Test\n- date_of_birth: January 1990\n";
827        let mut errors = Vec::new();
828        parse_entities(body, SectionKind::Actors, 1, &mut errors);
829        assert!(errors.iter().any(|e| e.message.contains("YYYY")));
830    }
831
832    #[test]
833    fn accept_valid_date_formats() {
834        for date in &["2024", "2024-01", "2024-01-15"] {
835            let body = format!("### Test\n- date_of_birth: {date}\n");
836            let mut errors = Vec::new();
837            parse_entities(&body, SectionKind::Actors, 1, &mut errors);
838            assert!(
839                errors.is_empty(),
840                "date {date:?} should be valid: {errors:?}"
841            );
842        }
843    }
844
845    #[test]
846    fn reject_non_https_url() {
847        let body = "### Test\n- urls:\n  - http://example.com\n";
848        let mut errors = Vec::new();
849        parse_entities(body, SectionKind::Actors, 1, &mut errors);
850        assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
851    }
852
853    #[test]
854    fn reject_non_https_thumbnail() {
855        let body = "### Test\n- thumbnail: http://example.com/img.jpg\n";
856        let mut errors = Vec::new();
857        parse_entities(body, SectionKind::Actors, 1, &mut errors);
858        assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
859    }
860
861    #[test]
862    fn multiple_entities() {
863        let body = [
864            "",
865            "### Alice",
866            "- nationality: Dutch",
867            "",
868            "### Bob",
869            "- nationality: British",
870            "",
871        ]
872        .join("\n");
873
874        let mut errors = Vec::new();
875        let entities = parse_entities(&body, SectionKind::Actors, 1, &mut errors);
876        assert!(errors.is_empty(), "errors: {errors:?}");
877        assert_eq!(entities.len(), 2);
878        assert_eq!(entities[0].name, "Alice");
879        assert_eq!(entities[1].name, "Bob");
880    }
881
882    #[test]
883    fn field_max_length_violation() {
884        let long_val = "a".repeat(201);
885        let body = format!("### Test\n- nationality: {long_val}\n");
886        let mut errors = Vec::new();
887        parse_entities(&body, SectionKind::Actors, 1, &mut errors);
888        assert!(
889            errors
890                .iter()
891                .any(|e| e.message.contains("exceeds 100 chars"))
892        );
893    }
894
895    #[test]
896    fn too_many_aliases() {
897        let aliases: Vec<String> = (0..11).map(|i| format!("Alias{i}")).collect();
898        let body = format!("### Test\n- aliases: {}\n", aliases.join(", "));
899        let mut errors = Vec::new();
900        parse_entities(&body, SectionKind::Actors, 1, &mut errors);
901        assert!(errors.iter().any(|e| e.message.contains("exceeds 10")));
902    }
903}