use std::fmt;
use serde::Serialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum EntityLabel {
Person,
Organization,
Event,
Document,
Asset,
Case,
}
impl fmt::Display for EntityLabel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Person => write!(f, "person"),
Self::Organization => write!(f, "organization"),
Self::Event => write!(f, "event"),
Self::Document => write!(f, "document"),
Self::Asset => write!(f, "asset"),
Self::Case => write!(f, "case"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum Role {
Politician,
Executive,
CivilServant,
Military,
Judiciary,
LawEnforcement,
Journalist,
Academic,
Activist,
Athlete,
Lawyer,
Lobbyist,
Banker,
Accountant,
Consultant,
Custom(String),
}
const MAX_CUSTOM_LEN: usize = 100;
impl Role {
pub const KNOWN: &[&str] = &[
"politician",
"executive",
"civil_servant",
"military",
"judiciary",
"law_enforcement",
"journalist",
"academic",
"activist",
"athlete",
"lawyer",
"lobbyist",
"banker",
"accountant",
"consultant",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum PersonStatus {
Active,
Deceased,
Imprisoned,
Fugitive,
Acquitted,
}
impl PersonStatus {
pub const KNOWN: &[&str] = &["active", "deceased", "imprisoned", "fugitive", "acquitted"];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum OrgType {
GovernmentMinistry,
GovernmentAgency,
LocalGovernment,
Legislature,
Court,
LawEnforcement,
Prosecutor,
Regulator,
PoliticalParty,
StateEnterprise,
Corporation,
Bank,
Ngo,
Media,
University,
SportsClub,
SportsBody,
TradeUnion,
LobbyGroup,
Military,
ReligiousBody,
Custom(String),
}
impl OrgType {
pub const KNOWN: &[&str] = &[
"government_ministry",
"government_agency",
"local_government",
"legislature",
"court",
"law_enforcement",
"prosecutor",
"regulator",
"political_party",
"state_enterprise",
"corporation",
"bank",
"ngo",
"media",
"university",
"sports_club",
"sports_body",
"trade_union",
"lobby_group",
"military",
"religious_body",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum OrgStatus {
Active,
Dissolved,
Suspended,
Merged,
}
impl OrgStatus {
pub const KNOWN: &[&str] = &["active", "dissolved", "suspended", "merged"];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum EventType {
Arrest,
Indictment,
Trial,
Conviction,
Acquittal,
Sentencing,
Appeal,
Pardon,
Parole,
Bribery,
Embezzlement,
Fraud,
Extortion,
MoneyLaundering,
Murder,
Assault,
Dismissal,
Resignation,
Appointment,
Election,
InvestigationOpened,
InvestigationClosed,
Raid,
Seizure,
Warrant,
FugitiveFlight,
FugitiveCapture,
PolicyChange,
ContractAward,
FinancialDefault,
Bailout,
WhistleblowerReport,
Custom(String),
}
impl EventType {
pub const KNOWN: &[&str] = &[
"arrest",
"indictment",
"trial",
"conviction",
"acquittal",
"sentencing",
"appeal",
"pardon",
"parole",
"bribery",
"embezzlement",
"fraud",
"extortion",
"money_laundering",
"murder",
"assault",
"dismissal",
"resignation",
"appointment",
"election",
"investigation_opened",
"investigation_closed",
"raid",
"seizure",
"warrant",
"fugitive_flight",
"fugitive_capture",
"policy_change",
"contract_award",
"financial_default",
"bailout",
"whistleblower_report",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum Severity {
Minor,
Significant,
Major,
Critical,
}
impl Severity {
pub const KNOWN: &[&str] = &["minor", "significant", "major", "critical"];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum DocType {
CourtRuling,
Indictment,
ChargeSheet,
Warrant,
Contract,
Permit,
AuditReport,
FinancialDisclosure,
Legislation,
Regulation,
PressRelease,
InvestigationReport,
SanctionsNotice,
Custom(String),
}
impl DocType {
pub const KNOWN: &[&str] = &[
"court_ruling",
"indictment",
"charge_sheet",
"warrant",
"contract",
"permit",
"audit_report",
"financial_disclosure",
"legislation",
"regulation",
"press_release",
"investigation_report",
"sanctions_notice",
];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum AssetType {
Cash,
BankAccount,
RealEstate,
Vehicle,
Equity,
ContractValue,
Grant,
BudgetAllocation,
SeizedAsset,
Custom(String),
}
impl AssetType {
pub const KNOWN: &[&str] = &[
"cash",
"bank_account",
"real_estate",
"vehicle",
"equity",
"contract_value",
"grant",
"budget_allocation",
"seized_asset",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum AssetStatus {
Active,
Frozen,
Seized,
Forfeited,
Returned,
}
impl AssetStatus {
pub const KNOWN: &[&str] = &["active", "frozen", "seized", "forfeited", "returned"];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum CaseType {
Corruption,
Fraud,
Bribery,
Embezzlement,
Murder,
CivilRights,
Regulatory,
Political,
Custom(String),
}
impl CaseType {
pub const KNOWN: &[&str] = &[
"corruption",
"fraud",
"bribery",
"embezzlement",
"murder",
"civil_rights",
"regulatory",
"political",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum CaseStatus {
Open,
UnderInvestigation,
Trial,
Convicted,
Acquitted,
Closed,
Appeal,
}
impl CaseStatus {
pub const KNOWN: &[&str] = &[
"open",
"under_investigation",
"trial",
"convicted",
"acquitted",
"closed",
"appeal",
];
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Money {
pub amount: i64,
pub currency: String,
pub display: String,
}
pub const MAX_CURRENCY_LEN: usize = 3;
pub const MAX_MONEY_DISPLAY_LEN: usize = 100;
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Jurisdiction {
pub country: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub subdivision: Option<String>,
}
pub const MAX_COUNTRY_LEN: usize = 2;
pub const MAX_SUBDIVISION_LEN: usize = 200;
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Source {
pub url: String,
pub domain: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub published_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub archived_url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub language: Option<String>,
}
pub const MAX_SOURCE_URL_LEN: usize = 2048;
pub const MAX_SOURCE_DOMAIN_LEN: usize = 253;
pub const MAX_SOURCE_TITLE_LEN: usize = 300;
pub const MAX_SOURCE_LANGUAGE_LEN: usize = 2;
pub fn parse_custom(value: &str) -> Option<&str> {
let custom = value.strip_prefix("custom:")?;
if custom.is_empty() || custom.len() > MAX_CUSTOM_LEN {
return None;
}
Some(custom)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn entity_label_display() {
assert_eq!(EntityLabel::Person.to_string(), "person");
assert_eq!(EntityLabel::Organization.to_string(), "organization");
assert_eq!(EntityLabel::Event.to_string(), "event");
assert_eq!(EntityLabel::Document.to_string(), "document");
assert_eq!(EntityLabel::Asset.to_string(), "asset");
assert_eq!(EntityLabel::Case.to_string(), "case");
}
#[test]
fn entity_label_serializes_snake_case() {
let json = serde_json::to_string(&EntityLabel::Organization).unwrap_or_default();
assert_eq!(json, "\"organization\"");
}
#[test]
fn money_serialization() {
let m = Money {
amount: 500_000_000_000,
currency: "IDR".into(),
display: "Rp 500 billion".into(),
};
let json = serde_json::to_string(&m).unwrap_or_default();
assert!(json.contains("\"amount\":500000000000"));
assert!(json.contains("\"currency\":\"IDR\""));
assert!(json.contains("\"display\":\"Rp 500 billion\""));
}
#[test]
fn jurisdiction_without_subdivision() {
let j = Jurisdiction {
country: "ID".into(),
subdivision: None,
};
let json = serde_json::to_string(&j).unwrap_or_default();
assert!(json.contains("\"country\":\"ID\""));
assert!(!json.contains("subdivision"));
}
#[test]
fn jurisdiction_with_subdivision() {
let j = Jurisdiction {
country: "ID".into(),
subdivision: Some("South Sulawesi".into()),
};
let json = serde_json::to_string(&j).unwrap_or_default();
assert!(json.contains("\"subdivision\":\"South Sulawesi\""));
}
#[test]
fn source_minimal() {
let s = Source {
url: "https://kompas.com/article".into(),
domain: "kompas.com".into(),
title: None,
published_at: None,
archived_url: None,
language: None,
};
let json = serde_json::to_string(&s).unwrap_or_default();
assert!(json.contains("\"domain\":\"kompas.com\""));
assert!(!json.contains("title"));
assert!(!json.contains("language"));
}
#[test]
fn source_full() {
let s = Source {
url: "https://kompas.com/article".into(),
domain: "kompas.com".into(),
title: Some("Breaking news".into()),
published_at: Some("2024-01-15".into()),
archived_url: Some(
"https://web.archive.org/web/2024/https://kompas.com/article".into(),
),
language: Some("id".into()),
};
let json = serde_json::to_string(&s).unwrap_or_default();
assert!(json.contains("\"title\":\"Breaking news\""));
assert!(json.contains("\"language\":\"id\""));
}
#[test]
fn parse_custom_valid() {
assert_eq!(parse_custom("custom:Kit Manager"), Some("Kit Manager"));
}
#[test]
fn parse_custom_empty() {
assert_eq!(parse_custom("custom:"), None);
}
#[test]
fn parse_custom_too_long() {
let long = format!("custom:{}", "a".repeat(101));
assert_eq!(parse_custom(&long), None);
}
#[test]
fn parse_custom_no_prefix() {
assert_eq!(parse_custom("politician"), None);
}
#[test]
fn role_known_values_count() {
assert_eq!(Role::KNOWN.len(), 15);
}
#[test]
fn event_type_known_values_count() {
assert_eq!(EventType::KNOWN.len(), 32);
}
#[test]
fn org_type_known_values_count() {
assert_eq!(OrgType::KNOWN.len(), 21);
}
#[test]
fn severity_known_values_count() {
assert_eq!(Severity::KNOWN.len(), 4);
}
}