1use std::fmt;
2
3use crate::parser::{ParseError, SectionKind};
4
5const MAX_ENTITIES_PER_FILE: usize = 50;
7
8const MAX_NAME_LEN: usize = 300;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Label {
14 Person,
15 Organization,
16 Event,
17 Document,
18 Asset,
19}
20
21impl fmt::Display for Label {
22 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23 match self {
24 Self::Person => write!(f, "person"),
25 Self::Organization => write!(f, "organization"),
26 Self::Event => write!(f, "event"),
27 Self::Document => write!(f, "document"),
28 Self::Asset => write!(f, "asset"),
29 }
30 }
31}
32
33impl Label {
34 pub fn from_section(kind: SectionKind) -> Option<Self> {
35 match kind {
36 SectionKind::People => Some(Self::Person),
37 SectionKind::Organizations => Some(Self::Organization),
38 SectionKind::Events => Some(Self::Event),
39 SectionKind::Documents => Some(Self::Document),
40 SectionKind::Assets => Some(Self::Asset),
41 _ => None,
42 }
43 }
44}
45
46#[derive(Debug, Clone)]
48pub struct Entity {
49 pub name: String,
50 pub label: Label,
51 pub fields: Vec<(String, FieldValue)>,
52 pub id: Option<String>,
54 pub line: usize,
56 pub tags: Vec<String>,
58 pub slug: Option<String>,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq)]
65pub enum FieldValue {
66 Single(String),
67 List(Vec<String>),
68}
69
70pub fn parse_entity_file_body(
75 name: &str,
76 body: &str,
77 label: Label,
78 id: Option<String>,
79 title_line: usize,
80 errors: &mut Vec<ParseError>,
81) -> Entity {
82 let section_kind = match label {
83 Label::Person => SectionKind::People,
84 Label::Organization => SectionKind::Organizations,
85 Label::Event => SectionKind::Events,
86 Label::Document => SectionKind::Documents,
87 Label::Asset => SectionKind::Assets,
88 };
89
90 let wrapped = format!("### {name}\n{body}");
92 let mut entities = parse_entities(&wrapped, section_kind, title_line.saturating_sub(1), errors);
93
94 if let Some(mut entity) = entities.pop() {
95 entity.id = id;
96 entity.line = title_line;
97 entity
98 } else {
99 Entity {
100 name: name.to_string(),
101 label,
102 fields: Vec::new(),
103 id,
104 line: title_line,
105 tags: Vec::new(),
106 slug: None,
107 }
108 }
109}
110
111#[allow(clippy::too_many_lines)]
115pub fn parse_entities(
116 body: &str,
117 section_kind: SectionKind,
118 section_start_line: usize,
119 errors: &mut Vec<ParseError>,
120) -> Vec<Entity> {
121 let Some(label) = Label::from_section(section_kind) else {
122 return Vec::new();
123 };
124
125 let lines: Vec<&str> = body.lines().collect();
126 let mut entities: Vec<Entity> = Vec::new();
127 let mut current_name: Option<String> = None;
128 let mut current_line: usize = 0;
129 let mut current_fields: Vec<(String, FieldValue)> = Vec::new();
130 let mut pending_list_key: Option<String> = None;
132 let mut pending_list_items: Vec<String> = Vec::new();
133
134 for (i, line) in lines.iter().enumerate() {
135 let file_line = section_start_line + 1 + i; if let Some(name) = strip_h3(line) {
139 flush_pending_list(
141 &mut pending_list_key,
142 &mut pending_list_items,
143 &mut current_fields,
144 );
145
146 if let Some(entity_name) = current_name.take() {
148 let entity = build_entity(
149 entity_name,
150 label,
151 current_line,
152 &mut current_fields,
153 errors,
154 );
155 entities.push(entity);
156 }
157
158 current_name = Some(name.to_string());
159 current_line = file_line;
160 current_fields.clear();
161 continue;
162 }
163
164 if current_name.is_none() {
166 if !line.trim().is_empty() {
167 errors.push(ParseError {
168 line: file_line,
169 message: "content before first entity heading (### Name)".into(),
170 });
171 }
172 continue;
173 }
174
175 let trimmed = line.trim();
176
177 if let Some(item) = trimmed.strip_prefix("- ") {
179 if line.starts_with(" - ") && pending_list_key.is_some() {
180 pending_list_items.push(item.trim().to_string());
182 continue;
183 }
184
185 flush_pending_list(
187 &mut pending_list_key,
188 &mut pending_list_items,
189 &mut current_fields,
190 );
191
192 if let Some((key, value)) = parse_bullet(item) {
194 if value.is_empty() {
195 pending_list_key = Some(key);
197 pending_list_items.clear();
198 } else if is_list_field(&key) && value.contains(',') {
199 let items: Vec<String> = value
201 .split(',')
202 .map(|s| s.trim().to_string())
203 .filter(|s| !s.is_empty())
204 .collect();
205 current_fields.push((key, FieldValue::List(items)));
206 } else {
207 current_fields.push((key, FieldValue::Single(value)));
208 }
209 } else {
210 errors.push(ParseError {
211 line: file_line,
212 message: format!(
213 "invalid field syntax: expected `- key: value`, got {trimmed:?}"
214 ),
215 });
216 }
217 continue;
218 }
219
220 if line.starts_with(" ") && !trimmed.is_empty() && !trimmed.starts_with('-') {
222 if pending_list_key.is_some() {
223 errors.push(ParseError {
225 line: file_line,
226 message: "unexpected indented text in list context".into(),
227 });
228 } else if let Some(last) = current_fields.last_mut() {
229 match last.1 {
230 FieldValue::Single(ref mut val) => {
231 val.push('\n');
232 val.push_str(trimmed);
233 }
234 FieldValue::List(ref mut items) => {
235 let tail = items.pop().unwrap_or_default();
239 let joined = if tail.is_empty() {
240 trimmed.to_string()
241 } else {
242 format!("{tail} {trimmed}")
243 };
244 for part in joined.split(',') {
245 let part = part.trim().to_string();
246 if !part.is_empty() {
247 items.push(part);
248 }
249 }
250 }
251 }
252 }
253 continue;
254 }
255
256 if !trimmed.is_empty() {
258 flush_pending_list(
260 &mut pending_list_key,
261 &mut pending_list_items,
262 &mut current_fields,
263 );
264 }
265 }
266
267 flush_pending_list(
269 &mut pending_list_key,
270 &mut pending_list_items,
271 &mut current_fields,
272 );
273
274 if let Some(entity_name) = current_name.take() {
275 let entity = build_entity(
276 entity_name,
277 label,
278 current_line,
279 &mut current_fields,
280 errors,
281 );
282 entities.push(entity);
283 }
284
285 if entities.len() > MAX_ENTITIES_PER_FILE {
287 errors.push(ParseError {
288 line: section_start_line,
289 message: format!(
290 "too many entities in section (max {MAX_ENTITIES_PER_FILE}, got {})",
291 entities.len()
292 ),
293 });
294 }
295
296 entities
297}
298
299fn flush_pending_list(
300 pending_key: &mut Option<String>,
301 pending_items: &mut Vec<String>,
302 fields: &mut Vec<(String, FieldValue)>,
303) {
304 if let Some(key) = pending_key.take() {
305 fields.push((key, FieldValue::List(std::mem::take(pending_items))));
306 }
307}
308
309fn build_entity(
310 name: String,
311 label: Label,
312 line: usize,
313 fields: &mut Vec<(String, FieldValue)>,
314 errors: &mut Vec<ParseError>,
315) -> Entity {
316 if name.trim().is_empty() {
318 errors.push(ParseError {
319 line,
320 message: "entity name must not be empty".into(),
321 });
322 } else if name.len() > MAX_NAME_LEN {
323 errors.push(ParseError {
324 line,
325 message: format!(
326 "entity name exceeds {MAX_NAME_LEN} chars (got {})",
327 name.len()
328 ),
329 });
330 }
331
332 let id = extract_id_field(fields);
334
335 apply_type_shorthand(fields, label);
337
338 normalize_enum_fields(fields);
340
341 validate_fields(fields, label, line, errors);
343
344 Entity {
345 name,
346 label,
347 fields: std::mem::take(fields),
348 id,
349 line,
350 tags: Vec::new(),
351 slug: None,
352 }
353}
354
355fn extract_id_field(fields: &mut Vec<(String, FieldValue)>) -> Option<String> {
357 let pos = fields.iter().position(|(k, _)| k == "id")?;
358 let (_, value) = fields.remove(pos);
359 match value {
360 FieldValue::Single(s) if !s.is_empty() => Some(s),
361 _ => None,
362 }
363}
364
365fn apply_type_shorthand(fields: &mut [(String, FieldValue)], label: Label) {
367 for field in fields.iter_mut() {
368 if field.0 == "type" {
369 field.0 = match label {
370 Label::Organization => "org_type".to_string(),
371 Label::Event => "event_type".to_string(),
372 Label::Document => "doc_type".to_string(),
373 Label::Asset => "asset_type".to_string(),
374 Label::Person => "type".to_string(), };
376 }
377 }
378}
379
380fn parse_bullet(item: &str) -> Option<(String, String)> {
382 let colon_pos = item.find(':')?;
383 let key = item[..colon_pos].trim();
384 if key.is_empty() {
385 return None;
386 }
387 let value = item[colon_pos + 1..].trim();
388 Some((key.to_string(), value.to_string()))
389}
390
391fn is_list_field(key: &str) -> bool {
393 matches!(key, "aliases" | "urls" | "role")
394}
395
396fn strip_h3(line: &str) -> Option<&str> {
398 let trimmed = line.trim_start();
399 if let Some(rest) = trimmed.strip_prefix("### ") {
400 if !rest.starts_with('#') {
402 return Some(rest.trim());
403 }
404 }
405 None
406}
407
408const COMMON_FIELDS: &[&str] = &[
412 "qualifier",
413 "aliases",
414 "thumbnail",
415 "thumbnail_source",
416 "urls",
417 "description",
418];
419
420const PERSON_FIELDS: &[&str] = &[
421 "role",
422 "nationality",
423 "date_of_birth",
424 "place_of_birth",
425 "status",
426];
427
428const ORGANIZATION_FIELDS: &[&str] = &[
429 "org_type",
430 "jurisdiction",
431 "headquarters",
432 "founded_date",
433 "registration_number",
434 "status",
435];
436
437const EVENT_FIELDS: &[&str] = &["event_type", "occurred_at", "jurisdiction", "severity"];
438
439const DOCUMENT_FIELDS: &[&str] = &["doc_type", "issued_at", "issuing_authority", "case_number"];
440
441const ASSET_FIELDS: &[&str] = &["asset_type", "value", "status"];
442
443use crate::domain;
445
446const ROLE_VALUES: &[&str] = domain::Role::KNOWN;
447const ORG_TYPE_VALUES: &[&str] = domain::OrgType::KNOWN;
448const EVENT_TYPE_VALUES: &[&str] = domain::EventType::KNOWN;
449const DOC_TYPE_VALUES: &[&str] = domain::DocType::KNOWN;
450const ASSET_TYPE_VALUES: &[&str] = domain::AssetType::KNOWN;
451const SEVERITY_VALUES: &[&str] = domain::Severity::KNOWN;
452const PERSON_STATUS_VALUES: &[&str] = domain::PersonStatus::KNOWN;
453const ORG_STATUS_VALUES: &[&str] = domain::OrgStatus::KNOWN;
454const ASSET_STATUS_VALUES: &[&str] = domain::AssetStatus::KNOWN;
455
456struct FieldConstraint {
458 max_len: usize,
459 enum_values: Option<&'static [&'static str]>,
461}
462
463fn field_constraint(key: &str) -> Option<FieldConstraint> {
464 match key {
465 "description" => Some(FieldConstraint {
466 max_len: 2000,
467 enum_values: None,
468 }),
469 "thumbnail" | "thumbnail_source" => Some(FieldConstraint {
470 max_len: 2048,
471 enum_values: None,
472 }),
473 "occurred_at" | "date_of_birth" | "founded_date" | "issued_at" | "opened_at"
474 | "closed_at" => Some(FieldConstraint {
475 max_len: 10,
476 enum_values: None,
477 }),
478 "place_of_birth" | "headquarters" | "issuing_authority" | "value" => {
479 Some(FieldConstraint {
480 max_len: 200,
481 enum_values: None,
482 })
483 }
484 "jurisdiction" => Some(FieldConstraint {
485 max_len: 203, enum_values: None,
488 }),
489 "role" => Some(FieldConstraint {
490 max_len: 100,
491 enum_values: Some(ROLE_VALUES),
492 }),
493 "org_type" => Some(FieldConstraint {
494 max_len: 100,
495 enum_values: Some(ORG_TYPE_VALUES),
496 }),
497 "event_type" => Some(FieldConstraint {
498 max_len: 100,
499 enum_values: Some(EVENT_TYPE_VALUES),
500 }),
501 "doc_type" => Some(FieldConstraint {
502 max_len: 100,
503 enum_values: Some(DOC_TYPE_VALUES),
504 }),
505 "asset_type" => Some(FieldConstraint {
506 max_len: 100,
507 enum_values: Some(ASSET_TYPE_VALUES),
508 }),
509 "severity" => Some(FieldConstraint {
510 max_len: 20,
511 enum_values: Some(SEVERITY_VALUES),
512 }),
513 "status" => Some(FieldConstraint {
514 max_len: 30,
517 enum_values: None,
518 }),
519 "qualifier" | "nationality" | "case_number" | "registration_number" => {
520 Some(FieldConstraint {
521 max_len: 100,
522 enum_values: None,
523 })
524 }
525 _ => None,
527 }
528}
529
530const MAX_ALIASES: usize = 10;
532const MAX_ALIAS_LEN: usize = 200;
533const MAX_URLS: usize = 10;
534const MAX_URL_LEN: usize = 2048;
535
536fn normalize_enum_fields(fields: &mut [(String, FieldValue)]) {
540 for (key, value) in fields.iter_mut() {
541 let is_enum = field_constraint(key).and_then(|c| c.enum_values).is_some();
542
543 match value {
544 FieldValue::Single(val) if is_enum && !val.starts_with("custom:") => {
545 let normalized = val.to_lowercase().replace(' ', "_");
546 if normalized != *val {
547 *val = normalized;
548 }
549 }
550 FieldValue::List(items) if is_enum => {
551 for item in items.iter_mut() {
552 if !item.starts_with("custom:") {
553 let normalized = item.to_lowercase().replace(' ', "_");
554 if normalized != *item {
555 *item = normalized;
556 }
557 }
558 }
559 }
560 _ => {}
561 }
562 }
563}
564
565fn validate_fields(
566 fields: &[(String, FieldValue)],
567 label: Label,
568 line: usize,
569 errors: &mut Vec<ParseError>,
570) {
571 let label_fields: &[&str] = match label {
572 Label::Person => PERSON_FIELDS,
573 Label::Organization => ORGANIZATION_FIELDS,
574 Label::Event => EVENT_FIELDS,
575 Label::Document => DOCUMENT_FIELDS,
576 Label::Asset => ASSET_FIELDS,
577 };
578
579 for (key, value) in fields {
580 if !COMMON_FIELDS.contains(&key.as_str()) && !label_fields.contains(&key.as_str()) {
581 errors.push(ParseError {
582 line,
583 message: format!("unknown field {key:?} for {label}"),
584 });
585 continue;
586 }
587
588 match value {
589 FieldValue::Single(val) => validate_single_field(key, val, label, line, errors),
590 FieldValue::List(items) => validate_list_field(key, items, line, errors),
591 }
592 }
593
594 if label == Label::Organization && !fields.iter().any(|(k, _)| k == "org_type") {
596 errors.push(ParseError {
597 line,
598 message: "organization entity missing required field \"org_type\"".into(),
599 });
600 }
601}
602
603fn validate_single_field(
605 key: &str,
606 val: &str,
607 label: Label,
608 line: usize,
609 errors: &mut Vec<ParseError>,
610) {
611 if let Some(constraint) = field_constraint(key) {
612 if val.len() > constraint.max_len {
613 errors.push(ParseError {
614 line,
615 message: format!(
616 "field {key:?} exceeds {} chars (got {})",
617 constraint.max_len,
618 val.len()
619 ),
620 });
621 }
622
623 if let Some(allowed) = constraint.enum_values {
624 validate_enum_value(key, val, allowed, line, errors);
625 }
626
627 if matches!(
628 key,
629 "occurred_at"
630 | "date_of_birth"
631 | "founded_date"
632 | "issued_at"
633 | "opened_at"
634 | "closed_at"
635 ) && !val.is_empty()
636 {
637 validate_date_format(key, val, line, errors);
638 }
639
640 if matches!(key, "thumbnail" | "thumbnail_source")
641 && !val.is_empty()
642 && !val.starts_with("https://")
643 {
644 errors.push(ParseError {
645 line,
646 message: format!("field {key:?} must be HTTPS URL"),
647 });
648 }
649 }
650
651 if key == "status" {
652 validate_status(val, label, line, errors);
653 }
654
655 if key == "jurisdiction" && !val.is_empty() {
656 validate_jurisdiction(val, line, errors);
657 }
658
659 if key == "value" && !val.is_empty() {
660 validate_money(val, line, errors);
661 }
662}
663
664fn validate_list_field(key: &str, items: &[String], line: usize, errors: &mut Vec<ParseError>) {
666 match key {
667 "aliases" => {
668 if items.len() > MAX_ALIASES {
669 errors.push(ParseError {
670 line,
671 message: format!(
672 "aliases exceeds {MAX_ALIASES} items (got {})",
673 items.len()
674 ),
675 });
676 }
677 for item in items {
678 if item.len() > MAX_ALIAS_LEN {
679 errors.push(ParseError {
680 line,
681 message: format!("alias exceeds {MAX_ALIAS_LEN} chars: {item:?}"),
682 });
683 }
684 }
685 }
686 "urls" => {
687 if items.len() > MAX_URLS {
688 errors.push(ParseError {
689 line,
690 message: format!("urls exceeds {MAX_URLS} items (got {})", items.len()),
691 });
692 }
693 for item in items {
694 if item.len() > MAX_URL_LEN {
695 errors.push(ParseError {
696 line,
697 message: format!("url exceeds {MAX_URL_LEN} chars: {item:?}"),
698 });
699 }
700 if !item.starts_with("https://") {
701 errors.push(ParseError {
702 line,
703 message: format!("url must be HTTPS: {item:?}"),
704 });
705 }
706 }
707 }
708 "role" => {
709 if items.len() > MAX_ROLES {
710 errors.push(ParseError {
711 line,
712 message: format!("role exceeds {MAX_ROLES} items (got {})", items.len()),
713 });
714 }
715 for item in items {
716 validate_enum_value("role", item, ROLE_VALUES, line, errors);
717 }
718 }
719 _ => {}
720 }
721}
722
723const MAX_ROLES: usize = 10;
725
726fn validate_status(value: &str, label: Label, line: usize, errors: &mut Vec<ParseError>) {
728 let allowed: &[&str] = match label {
729 Label::Person => PERSON_STATUS_VALUES,
730 Label::Organization => ORG_STATUS_VALUES,
731 Label::Asset => ASSET_STATUS_VALUES,
732 _ => {
733 errors.push(ParseError {
734 line,
735 message: format!("field \"status\" is not valid for {label}"),
736 });
737 return;
738 }
739 };
740
741 let normalized = value.to_lowercase().replace(' ', "_");
742 if !allowed.contains(&normalized.as_str()) {
743 errors.push(ParseError {
744 line,
745 message: format!(
746 "invalid status {value:?} for {label} (known: {})",
747 allowed.join(", ")
748 ),
749 });
750 }
751}
752
753fn validate_jurisdiction(value: &str, line: usize, errors: &mut Vec<ParseError>) {
755 if let Some(slash_pos) = value.find('/') {
756 let country = &value[..slash_pos];
757 let subdivision = &value[slash_pos + 1..];
758 if country.len() != 2 || !country.chars().all(|c| c.is_ascii_uppercase()) {
759 errors.push(ParseError {
760 line,
761 message: format!(
762 "jurisdiction country must be 2-letter uppercase ISO code, got {country:?}"
763 ),
764 });
765 }
766 if subdivision.is_empty() || subdivision.len() > domain::MAX_SUBDIVISION_LEN {
767 errors.push(ParseError {
768 line,
769 message: format!(
770 "jurisdiction subdivision must be 1-{} chars",
771 domain::MAX_SUBDIVISION_LEN
772 ),
773 });
774 }
775 } else {
776 if value.len() != 2 || !value.chars().all(|c| c.is_ascii_uppercase()) {
778 errors.push(ParseError {
779 line,
780 message: format!(
781 "jurisdiction must be 2-letter uppercase ISO code or CODE/Subdivision, got {value:?}"
782 ),
783 });
784 }
785 }
786}
787
788fn validate_money(value: &str, line: usize, errors: &mut Vec<ParseError>) {
791 let parts: Vec<&str> = value.splitn(3, ' ').collect();
793 if parts.len() < 3 {
794 errors.push(ParseError {
795 line,
796 message: format!(
797 "invalid money format: expected `amount currency \"display\"`, got {value:?}"
798 ),
799 });
800 return;
801 }
802
803 if parts[0].parse::<i64>().is_err() {
805 errors.push(ParseError {
806 line,
807 message: format!("money amount must be an integer, got {:?}", parts[0]),
808 });
809 }
810
811 let currency = parts[1];
813 if currency.len() != 3 || !currency.chars().all(|c| c.is_ascii_uppercase()) {
814 errors.push(ParseError {
815 line,
816 message: format!(
817 "money currency must be 3-letter uppercase ISO code, got {currency:?}"
818 ),
819 });
820 }
821
822 let display = parts[2];
824 if !display.starts_with('"') || !display.ends_with('"') {
825 errors.push(ParseError {
826 line,
827 message: format!("money display must be quoted, got {display:?}"),
828 });
829 } else {
830 let inner = &display[1..display.len() - 1];
831 if inner.len() > domain::MAX_MONEY_DISPLAY_LEN {
832 errors.push(ParseError {
833 line,
834 message: format!(
835 "money display exceeds {} chars (got {})",
836 domain::MAX_MONEY_DISPLAY_LEN,
837 inner.len()
838 ),
839 });
840 }
841 }
842}
843
844fn validate_enum_value(
845 key: &str,
846 value: &str,
847 allowed: &[&str],
848 line: usize,
849 errors: &mut Vec<ParseError>,
850) {
851 if let Some(custom) = value.strip_prefix("custom:") {
853 if custom.is_empty() || custom.len() > 100 {
854 errors.push(ParseError {
855 line,
856 message: format!(
857 "field {key:?} custom value must be 1-100 chars, got {}",
858 custom.len()
859 ),
860 });
861 }
862 return;
863 }
864
865 let normalized = value.to_lowercase().replace(' ', "_");
866 if !allowed.contains(&normalized.as_str()) {
867 errors.push(ParseError {
868 line,
869 message: format!(
870 "invalid {key} value {value:?} (known: {}; use \"custom:Value\" for custom)",
871 allowed.join(", ")
872 ),
873 });
874 }
875}
876
877fn validate_date_format(key: &str, value: &str, line: usize, errors: &mut Vec<ParseError>) {
878 let valid = matches!(value.len(), 4 | 7 | 10)
880 && value.chars().enumerate().all(|(i, c)| match i {
881 4 | 7 => c == '-',
882 _ => c.is_ascii_digit(),
883 });
884
885 if !valid {
886 errors.push(ParseError {
887 line,
888 message: format!("field {key:?} must be YYYY, YYYY-MM, or YYYY-MM-DD, got {value:?}"),
889 });
890 }
891}
892
893#[cfg(test)]
894mod tests {
895 use super::*;
896
897 #[test]
898 fn parse_person_entity() {
899 let body = [
900 "",
901 "### Mark Bonnick",
902 "- qualifier: Arsenal Kit Manager",
903 "- nationality: GB",
904 "- role: custom:Kit Manager",
905 "- date_of_birth: 1962",
906 "- description: Academy kit manager at Arsenal FC for 22 years",
907 " (2001-2024). Age 62 at time of dismissal.",
908 "",
909 ]
910 .join("\n");
911
912 let mut errors = Vec::new();
913 let entities = parse_entities(&body, SectionKind::People, 10, &mut errors);
914 assert!(errors.is_empty(), "errors: {errors:?}");
915 assert_eq!(entities.len(), 1);
916
917 let e = &entities[0];
918 assert_eq!(e.name, "Mark Bonnick");
919 assert_eq!(e.label, Label::Person);
920 assert_eq!(e.fields.len(), 5);
921
922 let desc = e
924 .fields
925 .iter()
926 .find(|(k, _)| k == "description")
927 .map(|(_, v)| v);
928 assert_eq!(
929 desc,
930 Some(&FieldValue::Single(
931 "Academy kit manager at Arsenal FC for 22 years\n(2001-2024). Age 62 at time of dismissal.".into()
932 ))
933 );
934 }
935
936 #[test]
937 fn parse_person_with_role_list() {
938 let body = "### Test\n- role: politician, executive\n";
939 let mut errors = Vec::new();
940 let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
941 assert!(errors.is_empty(), "errors: {errors:?}");
942 let roles = entities[0].fields.iter().find(|(k, _)| k == "role");
943 assert_eq!(
944 roles.map(|(_, v)| v),
945 Some(&FieldValue::List(vec![
946 "politician".into(),
947 "executive".into(),
948 ]))
949 );
950 }
951
952 #[test]
953 fn parse_person_with_status() {
954 let body = "### Test\n- status: imprisoned\n";
955 let mut errors = Vec::new();
956 let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
957 assert!(errors.is_empty(), "errors: {errors:?}");
958 }
959
960 #[test]
961 fn reject_invalid_person_status() {
962 let body = "### Test\n- status: unknown_status\n";
963 let mut errors = Vec::new();
964 parse_entities(body, SectionKind::People, 1, &mut errors);
965 assert!(errors.iter().any(|e| e.message.contains("invalid status")));
966 }
967
968 #[test]
969 fn parse_organization_with_type_shorthand() {
970 let body = [
971 "",
972 "### Arsenal FC",
973 "- type: sports_club",
974 "- jurisdiction: GB",
975 "- aliases: Arsenal, The Gunners, Arsenal Football Club",
976 "- urls:",
977 " - https://www.arsenal.com",
978 " - https://en.wikipedia.org/wiki/Arsenal_F.C.",
979 "",
980 ]
981 .join("\n");
982
983 let mut errors = Vec::new();
984 let entities = parse_entities(&body, SectionKind::Organizations, 20, &mut errors);
985 assert!(errors.is_empty(), "errors: {errors:?}");
986 assert_eq!(entities.len(), 1);
987
988 let e = &entities[0];
989 assert_eq!(e.name, "Arsenal FC");
990 assert_eq!(e.label, Label::Organization);
991
992 let it = e.fields.iter().find(|(k, _)| k == "org_type");
994 assert_eq!(
995 it.map(|(_, v)| v),
996 Some(&FieldValue::Single("sports_club".into()))
997 );
998
999 let aliases = e.fields.iter().find(|(k, _)| k == "aliases");
1001 assert_eq!(
1002 aliases.map(|(_, v)| v),
1003 Some(&FieldValue::List(vec![
1004 "Arsenal".into(),
1005 "The Gunners".into(),
1006 "Arsenal Football Club".into(),
1007 ]))
1008 );
1009
1010 let urls = e.fields.iter().find(|(k, _)| k == "urls");
1012 assert_eq!(
1013 urls.map(|(_, v)| v),
1014 Some(&FieldValue::List(vec![
1015 "https://www.arsenal.com".into(),
1016 "https://en.wikipedia.org/wiki/Arsenal_F.C.".into(),
1017 ]))
1018 );
1019 }
1020
1021 #[test]
1022 fn parse_organization_with_jurisdiction_subdivision() {
1023 let body = "### Pemkab Bogor\n- org_type: local_government\n- jurisdiction: ID/West Java\n";
1024 let mut errors = Vec::new();
1025 let entities = parse_entities(body, SectionKind::Organizations, 1, &mut errors);
1026 assert!(errors.is_empty(), "errors: {errors:?}");
1027 let j = entities[0].fields.iter().find(|(k, _)| k == "jurisdiction");
1028 assert_eq!(
1029 j.map(|(_, v)| v),
1030 Some(&FieldValue::Single("ID/West Java".into()))
1031 );
1032 }
1033
1034 #[test]
1035 fn reject_invalid_jurisdiction() {
1036 let body = "### Test\n- org_type: corporation\n- jurisdiction: England\n";
1037 let mut errors = Vec::new();
1038 parse_entities(body, SectionKind::Organizations, 1, &mut errors);
1039 assert!(errors.iter().any(|e| e.message.contains("jurisdiction")));
1040 }
1041
1042 #[test]
1043 fn parse_event_with_type_shorthand() {
1044 let body = [
1045 "",
1046 "### Bonnick dismissal",
1047 "- occurred_at: 2024-12-24",
1048 "- type: dismissal",
1049 "- description: Arsenal dismisses Bonnick.",
1050 "",
1051 ]
1052 .join("\n");
1053
1054 let mut errors = Vec::new();
1055 let entities = parse_entities(&body, SectionKind::Events, 50, &mut errors);
1056 assert!(errors.is_empty(), "errors: {errors:?}");
1057
1058 let e = &entities[0];
1059 assert_eq!(e.label, Label::Event);
1060 let dt = e.fields.iter().find(|(k, _)| k == "event_type");
1061 assert_eq!(
1062 dt.map(|(_, v)| v),
1063 Some(&FieldValue::Single("dismissal".into()))
1064 );
1065 }
1066
1067 #[test]
1068 fn parse_event_with_severity() {
1069 let body =
1070 "### Test event\n- event_type: bribery\n- severity: major\n- occurred_at: 2024-01-01\n";
1071 let mut errors = Vec::new();
1072 let entities = parse_entities(body, SectionKind::Events, 1, &mut errors);
1073 assert!(errors.is_empty(), "errors: {errors:?}");
1074 }
1075
1076 #[test]
1077 fn parse_document_entity() {
1078 let body = [
1079 "### Indictment No. 123",
1080 "- doc_type: indictment",
1081 "- issued_at: 2024-03-15",
1082 "- issuing_authority: Jakarta District Court",
1083 "- case_number: 123/Pid.B/2024/PN.Jkt.Pst",
1084 ]
1085 .join("\n");
1086 let mut errors = Vec::new();
1087 let entities = parse_entities(&body, SectionKind::Documents, 1, &mut errors);
1088 assert!(errors.is_empty(), "errors: {errors:?}");
1089 assert_eq!(entities.len(), 1);
1090 assert_eq!(entities[0].label, Label::Document);
1091 }
1092
1093 #[test]
1094 fn parse_asset_entity() {
1095 let body = "### Bribe payment\n- asset_type: cash\n- value: 500000000000 IDR \"Rp 500 billion\"\n- status: seized\n";
1096 let mut errors = Vec::new();
1097 let entities = parse_entities(body, SectionKind::Assets, 1, &mut errors);
1098 assert!(errors.is_empty(), "errors: {errors:?}");
1099 assert_eq!(entities.len(), 1);
1100 assert_eq!(entities[0].label, Label::Asset);
1101 }
1102
1103 #[test]
1104 fn reject_invalid_money_format() {
1105 let body = "### Test\n- asset_type: cash\n- value: lots of money\n";
1106 let mut errors = Vec::new();
1107 parse_entities(body, SectionKind::Assets, 1, &mut errors);
1108 assert!(errors.iter().any(|e| e.message.contains("money")));
1109 }
1110
1111 #[test]
1112 fn reject_unknown_field() {
1113 let body = "### Test\n- foobar: value\n";
1114 let mut errors = Vec::new();
1115 parse_entities(body, SectionKind::People, 1, &mut errors);
1116 assert!(errors.iter().any(|e| e.message.contains("unknown field")));
1117 }
1118
1119 #[test]
1120 fn reject_wrong_label_field() {
1121 let body = "### Test\n- org_type: court\n";
1123 let mut errors = Vec::new();
1124 parse_entities(body, SectionKind::People, 1, &mut errors);
1125 assert!(errors.iter().any(|e| e.message.contains("unknown field")));
1126 }
1127
1128 #[test]
1129 fn reject_invalid_enum_value() {
1130 let body = "### Test\n- role: wizard\n";
1131 let mut errors = Vec::new();
1132 parse_entities(body, SectionKind::People, 1, &mut errors);
1133 assert!(errors.iter().any(|e| e.message.contains("invalid role")));
1134 }
1135
1136 #[test]
1137 fn accept_custom_enum_value() {
1138 let body = "### Test\n- role: custom:Kit Manager\n";
1139 let mut errors = Vec::new();
1140 let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
1141 assert!(errors.is_empty(), "errors: {errors:?}");
1142 assert_eq!(entities.len(), 1);
1143 }
1144
1145 #[test]
1146 fn normalize_enum_value_spaces_to_underscores() {
1147 let body = "### Test\n- role: civil servant\n";
1148 let mut errors = Vec::new();
1149 let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
1150 assert!(errors.is_empty(), "errors: {errors:?}");
1151 assert_eq!(entities.len(), 1);
1152 let val = entities[0]
1153 .fields
1154 .iter()
1155 .find(|(k, _)| k == "role")
1156 .map(|(_, v)| match v {
1157 FieldValue::Single(s) => s.as_str(),
1158 _ => "",
1159 });
1160 assert_eq!(val, Some("civil_servant"));
1161 }
1162
1163 #[test]
1164 fn normalize_enum_list_values() {
1165 let body = "### Test\n- role: civil servant, law enforcement\n";
1166 let mut errors = Vec::new();
1167 let entities = parse_entities(body, SectionKind::People, 1, &mut errors);
1168 assert!(errors.is_empty(), "errors: {errors:?}");
1169 let roles = entities[0].fields.iter().find(|(k, _)| k == "role");
1170 assert_eq!(
1171 roles.map(|(_, v)| v),
1172 Some(&FieldValue::List(vec![
1173 "civil_servant".into(),
1174 "law_enforcement".into(),
1175 ]))
1176 );
1177 }
1178
1179 #[test]
1180 fn reject_invalid_date_format() {
1181 let body = "### Test\n- date_of_birth: January 1990\n";
1182 let mut errors = Vec::new();
1183 parse_entities(body, SectionKind::People, 1, &mut errors);
1184 assert!(errors.iter().any(|e| e.message.contains("YYYY")));
1185 }
1186
1187 #[test]
1188 fn accept_valid_date_formats() {
1189 for date in &["2024", "2024-01", "2024-01-15"] {
1190 let body = format!("### Test\n- date_of_birth: {date}\n");
1191 let mut errors = Vec::new();
1192 parse_entities(&body, SectionKind::People, 1, &mut errors);
1193 assert!(
1194 errors.is_empty(),
1195 "date {date:?} should be valid: {errors:?}"
1196 );
1197 }
1198 }
1199
1200 #[test]
1201 fn reject_non_https_url() {
1202 let body = "### Test\n- urls:\n - http://example.com\n";
1203 let mut errors = Vec::new();
1204 parse_entities(body, SectionKind::People, 1, &mut errors);
1205 assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
1206 }
1207
1208 #[test]
1209 fn reject_non_https_thumbnail() {
1210 let body = "### Test\n- thumbnail: http://example.com/img.jpg\n";
1211 let mut errors = Vec::new();
1212 parse_entities(body, SectionKind::People, 1, &mut errors);
1213 assert!(errors.iter().any(|e| e.message.contains("HTTPS")));
1214 }
1215
1216 #[test]
1217 fn multiple_entities() {
1218 let body = [
1219 "",
1220 "### Alice",
1221 "- nationality: NL",
1222 "",
1223 "### Bob",
1224 "- nationality: GB",
1225 "",
1226 ]
1227 .join("\n");
1228
1229 let mut errors = Vec::new();
1230 let entities = parse_entities(&body, SectionKind::People, 1, &mut errors);
1231 assert!(errors.is_empty(), "errors: {errors:?}");
1232 assert_eq!(entities.len(), 2);
1233 assert_eq!(entities[0].name, "Alice");
1234 assert_eq!(entities[1].name, "Bob");
1235 }
1236
1237 #[test]
1238 fn field_max_length_violation() {
1239 let long_val = "a".repeat(201);
1240 let body = format!("### Test\n- nationality: {long_val}\n");
1241 let mut errors = Vec::new();
1242 parse_entities(&body, SectionKind::People, 1, &mut errors);
1243 assert!(
1244 errors
1245 .iter()
1246 .any(|e| e.message.contains("exceeds 100 chars"))
1247 );
1248 }
1249
1250 #[test]
1251 fn too_many_aliases() {
1252 let aliases: Vec<String> = (0..11).map(|i| format!("Alias{i}")).collect();
1253 let body = format!("### Test\n- aliases: {}\n", aliases.join(", "));
1254 let mut errors = Vec::new();
1255 parse_entities(&body, SectionKind::People, 1, &mut errors);
1256 assert!(errors.iter().any(|e| e.message.contains("exceeds 10")));
1257 }
1258
1259 #[test]
1260 fn require_org_type_for_organizations() {
1261 let body = "### Test Corp\n- qualifier: Test\n";
1262 let mut errors = Vec::new();
1263 parse_entities(body, SectionKind::Organizations, 1, &mut errors);
1264 assert!(
1265 errors
1266 .iter()
1267 .any(|e| { e.message.contains("missing required field \"org_type\"") })
1268 );
1269 }
1270
1271 #[test]
1272 fn accept_organization_with_type() {
1273 let body = "### Test Corp\n- qualifier: Test\n- org_type: corporation\n";
1274 let mut errors = Vec::new();
1275 parse_entities(body, SectionKind::Organizations, 1, &mut errors);
1276 assert!(errors.is_empty(), "errors: {errors:?}");
1277 }
1278}