Skip to main content

weave_content/
domain.rs

1//! Domain types for the OSINT case graph.
2//!
3//! These types represent the target data model per ADR-014. They are
4//! pure value objects with no infrastructure dependencies.
5
6use std::fmt;
7
8use serde::Serialize;
9
10// ---------------------------------------------------------------------------
11// Entity labels
12// ---------------------------------------------------------------------------
13
14/// Graph node label — determines which fields are valid on an entity.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
16#[serde(rename_all = "snake_case")]
17pub enum EntityLabel {
18    Person,
19    Organization,
20    Event,
21    Document,
22    Asset,
23    Case,
24}
25
26impl fmt::Display for EntityLabel {
27    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28        match self {
29            Self::Person => write!(f, "person"),
30            Self::Organization => write!(f, "organization"),
31            Self::Event => write!(f, "event"),
32            Self::Document => write!(f, "document"),
33            Self::Asset => write!(f, "asset"),
34            Self::Case => write!(f, "case"),
35        }
36    }
37}
38
39// ---------------------------------------------------------------------------
40// Person enums
41// ---------------------------------------------------------------------------
42
43/// Role a person holds (multiple allowed per person).
44#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
45#[serde(rename_all = "snake_case")]
46pub enum Role {
47    Politician,
48    Executive,
49    CivilServant,
50    Military,
51    Judiciary,
52    LawEnforcement,
53    Journalist,
54    Academic,
55    Activist,
56    Athlete,
57    Lawyer,
58    Lobbyist,
59    Banker,
60    Accountant,
61    Consultant,
62    /// Free-form value not in the predefined list.
63    Custom(String),
64}
65
66/// Maximum length of a custom enum value.
67const MAX_CUSTOM_LEN: usize = 100;
68
69impl Role {
70    /// All known non-custom values as `&str`.
71    pub const KNOWN: &[&str] = &[
72        "politician",
73        "executive",
74        "civil_servant",
75        "military",
76        "judiciary",
77        "law_enforcement",
78        "journalist",
79        "academic",
80        "activist",
81        "athlete",
82        "lawyer",
83        "lobbyist",
84        "banker",
85        "accountant",
86        "consultant",
87    ];
88}
89
90/// Status of a person.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
92#[serde(rename_all = "snake_case")]
93pub enum PersonStatus {
94    Active,
95    Deceased,
96    Imprisoned,
97    Fugitive,
98    Acquitted,
99}
100
101impl PersonStatus {
102    pub const KNOWN: &[&str] = &["active", "deceased", "imprisoned", "fugitive", "acquitted"];
103}
104
105// ---------------------------------------------------------------------------
106// Organization enums
107// ---------------------------------------------------------------------------
108
109/// Type of organization.
110#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
111#[serde(rename_all = "snake_case")]
112pub enum OrgType {
113    GovernmentMinistry,
114    GovernmentAgency,
115    LocalGovernment,
116    Legislature,
117    Court,
118    LawEnforcement,
119    Prosecutor,
120    Regulator,
121    PoliticalParty,
122    StateEnterprise,
123    Corporation,
124    Bank,
125    Ngo,
126    Media,
127    University,
128    SportsClub,
129    SportsBody,
130    TradeUnion,
131    LobbyGroup,
132    Military,
133    ReligiousBody,
134    Custom(String),
135}
136
137impl OrgType {
138    pub const KNOWN: &[&str] = &[
139        "government_ministry",
140        "government_agency",
141        "local_government",
142        "legislature",
143        "court",
144        "law_enforcement",
145        "prosecutor",
146        "regulator",
147        "political_party",
148        "state_enterprise",
149        "corporation",
150        "bank",
151        "ngo",
152        "media",
153        "university",
154        "sports_club",
155        "sports_body",
156        "trade_union",
157        "lobby_group",
158        "military",
159        "religious_body",
160    ];
161}
162
163/// Status of an organization.
164#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
165#[serde(rename_all = "snake_case")]
166pub enum OrgStatus {
167    Active,
168    Dissolved,
169    Suspended,
170    Merged,
171}
172
173impl OrgStatus {
174    pub const KNOWN: &[&str] = &["active", "dissolved", "suspended", "merged"];
175}
176
177// ---------------------------------------------------------------------------
178// Event enums
179// ---------------------------------------------------------------------------
180
181/// Type of event.
182#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
183#[serde(rename_all = "snake_case")]
184pub enum EventType {
185    Arrest,
186    Indictment,
187    Trial,
188    Conviction,
189    Acquittal,
190    Sentencing,
191    Appeal,
192    Pardon,
193    Parole,
194    Bribery,
195    Embezzlement,
196    Fraud,
197    Extortion,
198    MoneyLaundering,
199    Murder,
200    Assault,
201    Dismissal,
202    Resignation,
203    Appointment,
204    Election,
205    InvestigationOpened,
206    InvestigationClosed,
207    Raid,
208    Seizure,
209    Warrant,
210    FugitiveFlight,
211    FugitiveCapture,
212    PolicyChange,
213    ContractAward,
214    FinancialDefault,
215    Bailout,
216    WhistleblowerReport,
217    Custom(String),
218}
219
220impl EventType {
221    pub const KNOWN: &[&str] = &[
222        "arrest",
223        "indictment",
224        "trial",
225        "conviction",
226        "acquittal",
227        "sentencing",
228        "appeal",
229        "pardon",
230        "parole",
231        "bribery",
232        "embezzlement",
233        "fraud",
234        "extortion",
235        "money_laundering",
236        "murder",
237        "assault",
238        "dismissal",
239        "resignation",
240        "appointment",
241        "election",
242        "investigation_opened",
243        "investigation_closed",
244        "raid",
245        "seizure",
246        "warrant",
247        "fugitive_flight",
248        "fugitive_capture",
249        "policy_change",
250        "contract_award",
251        "financial_default",
252        "bailout",
253        "whistleblower_report",
254    ];
255}
256
257/// Event severity.
258#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
259#[serde(rename_all = "snake_case")]
260pub enum Severity {
261    Minor,
262    Significant,
263    Major,
264    Critical,
265}
266
267impl Severity {
268    pub const KNOWN: &[&str] = &["minor", "significant", "major", "critical"];
269}
270
271// ---------------------------------------------------------------------------
272// Document enums
273// ---------------------------------------------------------------------------
274
275/// Type of document.
276#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
277#[serde(rename_all = "snake_case")]
278pub enum DocType {
279    CourtRuling,
280    Indictment,
281    ChargeSheet,
282    Warrant,
283    Contract,
284    Permit,
285    AuditReport,
286    FinancialDisclosure,
287    Legislation,
288    Regulation,
289    PressRelease,
290    InvestigationReport,
291    SanctionsNotice,
292    Custom(String),
293}
294
295impl DocType {
296    pub const KNOWN: &[&str] = &[
297        "court_ruling",
298        "indictment",
299        "charge_sheet",
300        "warrant",
301        "contract",
302        "permit",
303        "audit_report",
304        "financial_disclosure",
305        "legislation",
306        "regulation",
307        "press_release",
308        "investigation_report",
309        "sanctions_notice",
310    ];
311}
312
313// ---------------------------------------------------------------------------
314// Asset enums
315// ---------------------------------------------------------------------------
316
317/// Type of asset.
318#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
319#[serde(rename_all = "snake_case")]
320pub enum AssetType {
321    Cash,
322    BankAccount,
323    RealEstate,
324    Vehicle,
325    Equity,
326    ContractValue,
327    Grant,
328    BudgetAllocation,
329    SeizedAsset,
330    Custom(String),
331}
332
333impl AssetType {
334    pub const KNOWN: &[&str] = &[
335        "cash",
336        "bank_account",
337        "real_estate",
338        "vehicle",
339        "equity",
340        "contract_value",
341        "grant",
342        "budget_allocation",
343        "seized_asset",
344    ];
345}
346
347/// Status of an asset.
348#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
349#[serde(rename_all = "snake_case")]
350pub enum AssetStatus {
351    Active,
352    Frozen,
353    Seized,
354    Forfeited,
355    Returned,
356}
357
358impl AssetStatus {
359    pub const KNOWN: &[&str] = &["active", "frozen", "seized", "forfeited", "returned"];
360}
361
362// ---------------------------------------------------------------------------
363// Case enums
364// ---------------------------------------------------------------------------
365
366/// Type of case.
367#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
368#[serde(rename_all = "snake_case")]
369pub enum CaseType {
370    Corruption,
371    Fraud,
372    Bribery,
373    Embezzlement,
374    Murder,
375    CivilRights,
376    Regulatory,
377    Political,
378    Custom(String),
379}
380
381impl CaseType {
382    pub const KNOWN: &[&str] = &[
383        "corruption",
384        "fraud",
385        "bribery",
386        "embezzlement",
387        "murder",
388        "civil_rights",
389        "regulatory",
390        "political",
391    ];
392}
393
394/// Status of a case.
395#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
396#[serde(rename_all = "snake_case")]
397pub enum CaseStatus {
398    Open,
399    UnderInvestigation,
400    Trial,
401    Convicted,
402    Acquitted,
403    Closed,
404    Appeal,
405}
406
407impl CaseStatus {
408    pub const KNOWN: &[&str] = &[
409        "open",
410        "under_investigation",
411        "trial",
412        "convicted",
413        "acquitted",
414        "closed",
415        "appeal",
416    ];
417}
418
419// ---------------------------------------------------------------------------
420// Structured value types
421// ---------------------------------------------------------------------------
422
423/// Monetary amount with currency and human-readable display.
424///
425/// `amount` is in the smallest currency unit (e.g. cents for USD, sen for IDR).
426#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
427pub struct Money {
428    pub amount: i64,
429    pub currency: String,
430    pub display: String,
431}
432
433/// Maximum length of the `currency` field (ISO 4217 = 3 chars).
434pub const MAX_CURRENCY_LEN: usize = 3;
435
436/// Maximum length of the `display` field.
437pub const MAX_MONEY_DISPLAY_LEN: usize = 100;
438
439/// Geographic jurisdiction: ISO 3166-1 country code with optional subdivision.
440#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
441pub struct Jurisdiction {
442    /// ISO 3166-1 alpha-2 country code (e.g. `ID`, `GB`).
443    pub country: String,
444    /// Optional subdivision name (e.g. `South Sulawesi`).
445    #[serde(skip_serializing_if = "Option::is_none")]
446    pub subdivision: Option<String>,
447}
448
449/// Maximum length of the `country` field (ISO 3166-1 alpha-2 = 2 chars).
450pub const MAX_COUNTRY_LEN: usize = 2;
451
452/// Maximum length of the `subdivision` field.
453pub const MAX_SUBDIVISION_LEN: usize = 200;
454
455/// A source of information (news article, official document, etc.).
456#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
457pub struct Source {
458    /// HTTPS URL of the source.
459    pub url: String,
460    /// Extracted domain (e.g. `kompas.com`).
461    pub domain: String,
462    /// Article or document title.
463    #[serde(skip_serializing_if = "Option::is_none")]
464    pub title: Option<String>,
465    /// Publication date (ISO 8601 date string).
466    #[serde(skip_serializing_if = "Option::is_none")]
467    pub published_at: Option<String>,
468    /// Wayback Machine or other archive URL.
469    #[serde(skip_serializing_if = "Option::is_none")]
470    pub archived_url: Option<String>,
471    /// ISO 639-1 language code (e.g. `id`, `en`).
472    #[serde(skip_serializing_if = "Option::is_none")]
473    pub language: Option<String>,
474}
475
476/// Maximum length of a source URL.
477pub const MAX_SOURCE_URL_LEN: usize = 2048;
478
479/// Maximum length of a source domain.
480pub const MAX_SOURCE_DOMAIN_LEN: usize = 253;
481
482/// Maximum length of a source title.
483pub const MAX_SOURCE_TITLE_LEN: usize = 300;
484
485/// Maximum length of a source language code (ISO 639-1 = 2 chars).
486pub const MAX_SOURCE_LANGUAGE_LEN: usize = 2;
487
488// ---------------------------------------------------------------------------
489// Parsing helpers
490// ---------------------------------------------------------------------------
491
492/// Parse a `custom:Value` string. Returns `Some(value)` if the prefix is
493/// present and the value is within length limits, `None` otherwise.
494pub fn parse_custom(value: &str) -> Option<&str> {
495    let custom = value.strip_prefix("custom:")?;
496    if custom.is_empty() || custom.len() > MAX_CUSTOM_LEN {
497        return None;
498    }
499    Some(custom)
500}
501
502// ---------------------------------------------------------------------------
503// Tests
504// ---------------------------------------------------------------------------
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509
510    #[test]
511    fn entity_label_display() {
512        assert_eq!(EntityLabel::Person.to_string(), "person");
513        assert_eq!(EntityLabel::Organization.to_string(), "organization");
514        assert_eq!(EntityLabel::Event.to_string(), "event");
515        assert_eq!(EntityLabel::Document.to_string(), "document");
516        assert_eq!(EntityLabel::Asset.to_string(), "asset");
517        assert_eq!(EntityLabel::Case.to_string(), "case");
518    }
519
520    #[test]
521    fn entity_label_serializes_snake_case() {
522        let json = serde_json::to_string(&EntityLabel::Organization).unwrap_or_default();
523        assert_eq!(json, "\"organization\"");
524    }
525
526    #[test]
527    fn money_serialization() {
528        let m = Money {
529            amount: 500_000_000_000,
530            currency: "IDR".into(),
531            display: "Rp 500 billion".into(),
532        };
533        let json = serde_json::to_string(&m).unwrap_or_default();
534        assert!(json.contains("\"amount\":500000000000"));
535        assert!(json.contains("\"currency\":\"IDR\""));
536        assert!(json.contains("\"display\":\"Rp 500 billion\""));
537    }
538
539    #[test]
540    fn jurisdiction_without_subdivision() {
541        let j = Jurisdiction {
542            country: "ID".into(),
543            subdivision: None,
544        };
545        let json = serde_json::to_string(&j).unwrap_or_default();
546        assert!(json.contains("\"country\":\"ID\""));
547        assert!(!json.contains("subdivision"));
548    }
549
550    #[test]
551    fn jurisdiction_with_subdivision() {
552        let j = Jurisdiction {
553            country: "ID".into(),
554            subdivision: Some("South Sulawesi".into()),
555        };
556        let json = serde_json::to_string(&j).unwrap_or_default();
557        assert!(json.contains("\"subdivision\":\"South Sulawesi\""));
558    }
559
560    #[test]
561    fn source_minimal() {
562        let s = Source {
563            url: "https://kompas.com/article".into(),
564            domain: "kompas.com".into(),
565            title: None,
566            published_at: None,
567            archived_url: None,
568            language: None,
569        };
570        let json = serde_json::to_string(&s).unwrap_or_default();
571        assert!(json.contains("\"domain\":\"kompas.com\""));
572        assert!(!json.contains("title"));
573        assert!(!json.contains("language"));
574    }
575
576    #[test]
577    fn source_full() {
578        let s = Source {
579            url: "https://kompas.com/article".into(),
580            domain: "kompas.com".into(),
581            title: Some("Breaking news".into()),
582            published_at: Some("2024-01-15".into()),
583            archived_url: Some(
584                "https://web.archive.org/web/2024/https://kompas.com/article".into(),
585            ),
586            language: Some("id".into()),
587        };
588        let json = serde_json::to_string(&s).unwrap_or_default();
589        assert!(json.contains("\"title\":\"Breaking news\""));
590        assert!(json.contains("\"language\":\"id\""));
591    }
592
593    #[test]
594    fn parse_custom_valid() {
595        assert_eq!(parse_custom("custom:Kit Manager"), Some("Kit Manager"));
596    }
597
598    #[test]
599    fn parse_custom_empty() {
600        assert_eq!(parse_custom("custom:"), None);
601    }
602
603    #[test]
604    fn parse_custom_too_long() {
605        let long = format!("custom:{}", "a".repeat(101));
606        assert_eq!(parse_custom(&long), None);
607    }
608
609    #[test]
610    fn parse_custom_no_prefix() {
611        assert_eq!(parse_custom("politician"), None);
612    }
613
614    #[test]
615    fn role_known_values_count() {
616        assert_eq!(Role::KNOWN.len(), 15);
617    }
618
619    #[test]
620    fn event_type_known_values_count() {
621        assert_eq!(EventType::KNOWN.len(), 32);
622    }
623
624    #[test]
625    fn org_type_known_values_count() {
626        assert_eq!(OrgType::KNOWN.len(), 21);
627    }
628
629    #[test]
630    fn severity_known_values_count() {
631        assert_eq!(Severity::KNOWN.len(), 4);
632    }
633}