use std::fmt;
use serde::Serialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum EntityLabel {
Person,
Organization,
Event,
Document,
Asset,
Case,
}
impl fmt::Display for EntityLabel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Person => write!(f, "person"),
Self::Organization => write!(f, "organization"),
Self::Event => write!(f, "event"),
Self::Document => write!(f, "document"),
Self::Asset => write!(f, "asset"),
Self::Case => write!(f, "case"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum Role {
Politician,
Executive,
CivilServant,
Military,
Judiciary,
LawEnforcement,
Journalist,
Academic,
Activist,
Athlete,
Lawyer,
Lobbyist,
Banker,
Accountant,
Consultant,
Custom(String),
}
const MAX_CUSTOM_LEN: usize = 100;
impl Role {
pub const KNOWN: &[&str] = &[
"politician",
"executive",
"civil_servant",
"military",
"judiciary",
"law_enforcement",
"journalist",
"academic",
"activist",
"athlete",
"lawyer",
"lobbyist",
"banker",
"accountant",
"consultant",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum PersonStatus {
Active,
Deceased,
Imprisoned,
Fugitive,
Acquitted,
}
impl PersonStatus {
pub const KNOWN: &[&str] = &["active", "deceased", "imprisoned", "fugitive", "acquitted"];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum OrgType {
GovernmentMinistry,
GovernmentAgency,
LocalGovernment,
Legislature,
Court,
LawEnforcement,
Prosecutor,
Regulator,
PoliticalParty,
StateEnterprise,
Corporation,
Bank,
Ngo,
Media,
University,
SportsClub,
SportsBody,
TradeUnion,
LobbyGroup,
Military,
ReligiousBody,
Custom(String),
}
impl OrgType {
pub const KNOWN: &[&str] = &[
"government_ministry",
"government_agency",
"local_government",
"legislature",
"court",
"law_enforcement",
"prosecutor",
"regulator",
"political_party",
"state_enterprise",
"corporation",
"bank",
"ngo",
"media",
"university",
"sports_club",
"sports_body",
"trade_union",
"lobby_group",
"military",
"religious_body",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum OrgStatus {
Active,
Dissolved,
Suspended,
Merged,
}
impl OrgStatus {
pub const KNOWN: &[&str] = &["active", "dissolved", "suspended", "merged"];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum EventType {
Arrest,
Indictment,
Trial,
Conviction,
Acquittal,
Sentencing,
Appeal,
Pardon,
Parole,
Bribery,
Embezzlement,
Fraud,
Extortion,
MoneyLaundering,
Murder,
Assault,
Dismissal,
Resignation,
Appointment,
Election,
InvestigationOpened,
InvestigationClosed,
Raid,
Seizure,
Warrant,
FugitiveFlight,
FugitiveCapture,
PolicyChange,
ContractAward,
FinancialDefault,
Bailout,
WhistleblowerReport,
Custom(String),
}
impl EventType {
pub const KNOWN: &[&str] = &[
"arrest",
"indictment",
"trial",
"conviction",
"acquittal",
"sentencing",
"appeal",
"pardon",
"parole",
"bribery",
"embezzlement",
"fraud",
"extortion",
"money_laundering",
"murder",
"assault",
"dismissal",
"resignation",
"appointment",
"election",
"investigation_opened",
"investigation_closed",
"raid",
"seizure",
"warrant",
"fugitive_flight",
"fugitive_capture",
"policy_change",
"contract_award",
"financial_default",
"bailout",
"whistleblower_report",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum Severity {
Minor,
Significant,
Major,
Critical,
}
impl Severity {
pub const KNOWN: &[&str] = &["minor", "significant", "major", "critical"];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum DocType {
CourtRuling,
Indictment,
ChargeSheet,
Warrant,
Contract,
Permit,
AuditReport,
FinancialDisclosure,
Legislation,
Regulation,
PressRelease,
InvestigationReport,
SanctionsNotice,
Custom(String),
}
impl DocType {
pub const KNOWN: &[&str] = &[
"court_ruling",
"indictment",
"charge_sheet",
"warrant",
"contract",
"permit",
"audit_report",
"financial_disclosure",
"legislation",
"regulation",
"press_release",
"investigation_report",
"sanctions_notice",
];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum AssetType {
Cash,
BankAccount,
RealEstate,
Vehicle,
Equity,
ContractValue,
Grant,
BudgetAllocation,
SeizedAsset,
Custom(String),
}
impl AssetType {
pub const KNOWN: &[&str] = &[
"cash",
"bank_account",
"real_estate",
"vehicle",
"equity",
"contract_value",
"grant",
"budget_allocation",
"seized_asset",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum AssetStatus {
Active,
Frozen,
Seized,
Forfeited,
Returned,
}
impl AssetStatus {
pub const KNOWN: &[&str] = &["active", "frozen", "seized", "forfeited", "returned"];
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum CaseType {
Corruption,
Fraud,
Bribery,
Embezzlement,
Murder,
CivilRights,
Regulatory,
Political,
Custom(String),
}
impl CaseType {
pub const KNOWN: &[&str] = &[
"corruption",
"fraud",
"bribery",
"embezzlement",
"murder",
"civil_rights",
"regulatory",
"political",
];
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum CaseStatus {
Open,
UnderInvestigation,
Trial,
Convicted,
Acquitted,
Closed,
Appeal,
}
impl CaseStatus {
pub const KNOWN: &[&str] = &[
"open",
"under_investigation",
"trial",
"convicted",
"acquitted",
"closed",
"appeal",
];
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Money {
pub amount: i64,
pub currency: String,
pub display: String,
}
pub const MAX_CURRENCY_LEN: usize = 3;
pub const MAX_MONEY_DISPLAY_LEN: usize = 100;
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Jurisdiction {
pub country: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub subdivision: Option<String>,
}
pub const MAX_COUNTRY_LEN: usize = 2;
pub const MAX_SUBDIVISION_LEN: usize = 200;
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Source {
pub url: String,
pub domain: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub published_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub archived_url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub language: Option<String>,
}
pub const MAX_SOURCE_URL_LEN: usize = 2048;
pub const MAX_SOURCE_DOMAIN_LEN: usize = 253;
pub const MAX_SOURCE_TITLE_LEN: usize = 300;
pub const MAX_SOURCE_LANGUAGE_LEN: usize = 2;
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct AmountEntry {
pub value: i64,
pub currency: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub label: Option<String>,
pub approximate: bool,
}
pub const MAX_AMOUNT_ENTRIES: usize = 10;
const MAX_AMOUNT_LABEL_LEN: usize = 50;
pub const AMOUNT_LABEL_KNOWN: &[&str] = &[
"bribe",
"fine",
"restitution",
"state_loss",
"kickback",
"embezzlement",
"fraud",
"gratification",
"bailout",
"procurement",
"penalty",
"fee",
"donation",
"loan",
];
impl AmountEntry {
pub fn parse_dsl(input: &str) -> Result<Vec<Self>, String> {
let input = input.trim();
if input.is_empty() {
return Ok(Vec::new());
}
let entries: Vec<&str> = input
.split('|')
.map(str::trim)
.filter(|s| !s.is_empty())
.collect();
if entries.len() > MAX_AMOUNT_ENTRIES {
return Err(format!(
"too many amount entries ({}, max {MAX_AMOUNT_ENTRIES})",
entries.len()
));
}
entries.iter().map(|e| Self::parse_one(e)).collect()
}
fn parse_one(entry: &str) -> Result<Self, String> {
let (approximate, rest) = if let Some(r) = entry.strip_prefix('~') {
(true, r.trim_start())
} else {
(false, entry)
};
let parts: Vec<&str> = rest.splitn(3, char::is_whitespace).collect();
match parts.len() {
2 | 3 => {
let value = parts[0]
.parse::<i64>()
.map_err(|_| format!("invalid amount value: {:?}", parts[0]))?;
let currency = Self::validate_currency(parts[1])?;
let label = if parts.len() == 3 {
Some(Self::validate_label(parts[2])?)
} else {
None
};
Ok(Self {
value,
currency,
label,
approximate,
})
}
_ => Err(format!("invalid amount format: {entry:?}")),
}
}
fn validate_currency(s: &str) -> Result<String, String> {
let upper = s.to_uppercase();
if upper.len() > MAX_CURRENCY_LEN
|| upper.is_empty()
|| !upper.chars().all(|c| c.is_ascii_uppercase())
{
return Err(format!("invalid currency: {s:?}"));
}
Ok(upper)
}
fn validate_label(s: &str) -> Result<String, String> {
if s.len() > MAX_AMOUNT_LABEL_LEN {
return Err(format!("amount label too long: {s:?}"));
}
if AMOUNT_LABEL_KNOWN.contains(&s) || parse_custom(s).is_some() {
Ok(s.to_string())
} else {
Err(format!(
"unknown amount label: {s:?} (use custom:Value for custom)"
))
}
}
pub fn format_display(&self) -> String {
let prefix = if self.approximate { "~" } else { "" };
let formatted_value = format_human_number(self.value);
let label_suffix = match &self.label {
None => String::new(),
Some(l) => format!(" ({})", l.replace('_', " ")),
};
format!("{prefix}{} {formatted_value}{label_suffix}", self.currency)
}
pub fn format_list(entries: &[Self]) -> String {
entries
.iter()
.map(Self::format_display)
.collect::<Vec<_>>()
.join("; ")
}
}
impl fmt::Display for AmountEntry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.format_display())
}
}
fn format_human_number(n: i64) -> String {
let abs = n.unsigned_abs();
let neg = if n < 0 { "-" } else { "" };
let (divisor, suffix) = if abs >= 1_000_000_000_000 {
(1_000_000_000_000_u64, "trillion")
} else if abs >= 1_000_000_000 {
(1_000_000_000, "billion")
} else if abs >= 1_000_000 {
(1_000_000, "million")
} else {
return format_integer(n);
};
let whole = abs / divisor;
let remainder = abs % divisor;
if remainder == 0 {
return format!("{neg}{whole} {suffix}");
}
let frac = (remainder * 100) / divisor;
if frac == 0 {
format!("{neg}{whole} {suffix}")
} else if frac.is_multiple_of(10) {
format!("{neg}{whole}.{} {suffix}", frac / 10)
} else {
format!("{neg}{whole}.{frac:02} {suffix}")
}
}
fn format_integer(n: i64) -> String {
let s = n.to_string();
let bytes = s.as_bytes();
let mut result = String::with_capacity(s.len() + s.len() / 3);
let start = usize::from(n < 0);
if n < 0 {
result.push('-');
}
let digits = &bytes[start..];
for (i, &b) in digits.iter().enumerate() {
if i > 0 && (digits.len() - i).is_multiple_of(3) {
result.push('.');
}
result.push(b as char);
}
result
}
pub fn parse_custom(value: &str) -> Option<&str> {
let custom = value.strip_prefix("custom:")?;
if custom.is_empty() || custom.len() > MAX_CUSTOM_LEN {
return None;
}
Some(custom)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn entity_label_display() {
assert_eq!(EntityLabel::Person.to_string(), "person");
assert_eq!(EntityLabel::Organization.to_string(), "organization");
assert_eq!(EntityLabel::Event.to_string(), "event");
assert_eq!(EntityLabel::Document.to_string(), "document");
assert_eq!(EntityLabel::Asset.to_string(), "asset");
assert_eq!(EntityLabel::Case.to_string(), "case");
}
#[test]
fn entity_label_serializes_snake_case() {
let json = serde_json::to_string(&EntityLabel::Organization).unwrap_or_default();
assert_eq!(json, "\"organization\"");
}
#[test]
fn money_serialization() {
let m = Money {
amount: 500_000_000_000,
currency: "IDR".into(),
display: "Rp 500 billion".into(),
};
let json = serde_json::to_string(&m).unwrap_or_default();
assert!(json.contains("\"amount\":500000000000"));
assert!(json.contains("\"currency\":\"IDR\""));
assert!(json.contains("\"display\":\"Rp 500 billion\""));
}
#[test]
fn jurisdiction_without_subdivision() {
let j = Jurisdiction {
country: "ID".into(),
subdivision: None,
};
let json = serde_json::to_string(&j).unwrap_or_default();
assert!(json.contains("\"country\":\"ID\""));
assert!(!json.contains("subdivision"));
}
#[test]
fn jurisdiction_with_subdivision() {
let j = Jurisdiction {
country: "ID".into(),
subdivision: Some("South Sulawesi".into()),
};
let json = serde_json::to_string(&j).unwrap_or_default();
assert!(json.contains("\"subdivision\":\"South Sulawesi\""));
}
#[test]
fn source_minimal() {
let s = Source {
url: "https://kompas.com/article".into(),
domain: "kompas.com".into(),
title: None,
published_at: None,
archived_url: None,
language: None,
};
let json = serde_json::to_string(&s).unwrap_or_default();
assert!(json.contains("\"domain\":\"kompas.com\""));
assert!(!json.contains("title"));
assert!(!json.contains("language"));
}
#[test]
fn source_full() {
let s = Source {
url: "https://kompas.com/article".into(),
domain: "kompas.com".into(),
title: Some("Breaking news".into()),
published_at: Some("2024-01-15".into()),
archived_url: Some(
"https://web.archive.org/web/2024/https://kompas.com/article".into(),
),
language: Some("id".into()),
};
let json = serde_json::to_string(&s).unwrap_or_default();
assert!(json.contains("\"title\":\"Breaking news\""));
assert!(json.contains("\"language\":\"id\""));
}
#[test]
fn parse_custom_valid() {
assert_eq!(parse_custom("custom:Kit Manager"), Some("Kit Manager"));
}
#[test]
fn parse_custom_empty() {
assert_eq!(parse_custom("custom:"), None);
}
#[test]
fn parse_custom_too_long() {
let long = format!("custom:{}", "a".repeat(101));
assert_eq!(parse_custom(&long), None);
}
#[test]
fn parse_custom_no_prefix() {
assert_eq!(parse_custom("politician"), None);
}
#[test]
fn amount_entry_parse_simple() {
let entries = AmountEntry::parse_dsl("660000 USD bribe").unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].value, 660_000);
assert_eq!(entries[0].currency, "USD");
assert_eq!(entries[0].label.as_deref(), Some("bribe"));
assert!(!entries[0].approximate);
}
#[test]
fn amount_entry_parse_approximate() {
let entries = AmountEntry::parse_dsl("~16800000000000 IDR state_loss").unwrap();
assert_eq!(entries.len(), 1);
assert!(entries[0].approximate);
assert_eq!(entries[0].value, 16_800_000_000_000);
assert_eq!(entries[0].currency, "IDR");
assert_eq!(entries[0].label.as_deref(), Some("state_loss"));
}
#[test]
fn amount_entry_parse_multiple() {
let entries = AmountEntry::parse_dsl("660000 USD bribe | 250000000 IDR fine").unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].currency, "USD");
assert_eq!(entries[1].currency, "IDR");
}
#[test]
fn amount_entry_parse_no_label() {
let entries = AmountEntry::parse_dsl("1000 EUR").unwrap();
assert_eq!(entries.len(), 1);
assert!(entries[0].label.is_none());
}
#[test]
fn amount_entry_parse_empty() {
let entries = AmountEntry::parse_dsl("").unwrap();
assert!(entries.is_empty());
}
#[test]
fn amount_entry_parse_invalid_value() {
assert!(AmountEntry::parse_dsl("abc USD").is_err());
}
#[test]
fn amount_entry_parse_unknown_label() {
assert!(AmountEntry::parse_dsl("1000 USD unknown_label").is_err());
}
#[test]
fn amount_entry_parse_custom_label() {
let entries = AmountEntry::parse_dsl("1000 USD custom:MyLabel").unwrap();
assert_eq!(entries[0].label.as_deref(), Some("custom:MyLabel"));
}
#[test]
fn amount_entry_format_display() {
let entry = AmountEntry {
value: 660_000,
currency: "USD".into(),
label: Some("bribe".into()),
approximate: false,
};
assert_eq!(entry.format_display(), "USD 660.000 (bribe)");
}
#[test]
fn amount_entry_format_approximate() {
let entry = AmountEntry {
value: 16_800_000_000_000,
currency: "IDR".into(),
label: Some("state_loss".into()),
approximate: true,
};
assert_eq!(
entry.format_display(),
"~IDR 16.8 trillion (state loss)"
);
}
#[test]
fn amount_entry_format_no_label() {
let entry = AmountEntry {
value: 1000,
currency: "EUR".into(),
label: None,
approximate: false,
};
assert_eq!(entry.format_display(), "EUR 1.000");
}
#[test]
fn amount_entry_serialization() {
let entry = AmountEntry {
value: 660_000,
currency: "USD".into(),
label: Some("bribe".into()),
approximate: false,
};
let json = serde_json::to_string(&entry).unwrap_or_default();
assert!(json.contains("\"value\":660000"));
assert!(json.contains("\"currency\":\"USD\""));
assert!(json.contains("\"label\":\"bribe\""));
assert!(json.contains("\"approximate\":false"));
}
#[test]
fn amount_entry_serialization_no_label() {
let entry = AmountEntry {
value: 1000,
currency: "EUR".into(),
label: None,
approximate: false,
};
let json = serde_json::to_string(&entry).unwrap_or_default();
assert!(!json.contains("label"));
}
#[test]
fn format_integer_commas() {
assert_eq!(format_integer(0), "0");
assert_eq!(format_integer(999), "999");
assert_eq!(format_integer(1000), "1.000");
assert_eq!(format_integer(1_000_000), "1.000.000");
assert_eq!(format_integer(16_800_000_000_000), "16.800.000.000.000");
}
#[test]
fn format_human_number_below_million() {
assert_eq!(format_human_number(0), "0");
assert_eq!(format_human_number(999), "999");
assert_eq!(format_human_number(1_000), "1.000");
assert_eq!(format_human_number(660_000), "660.000");
assert_eq!(format_human_number(999_999), "999.999");
}
#[test]
fn format_human_number_millions() {
assert_eq!(format_human_number(1_000_000), "1 million");
assert_eq!(format_human_number(1_500_000), "1.5 million");
assert_eq!(format_human_number(250_000_000), "250 million");
assert_eq!(format_human_number(1_230_000), "1.23 million");
}
#[test]
fn format_human_number_billions() {
assert_eq!(format_human_number(1_000_000_000), "1 billion");
assert_eq!(format_human_number(4_580_000_000), "4.58 billion");
assert_eq!(format_human_number(100_000_000_000), "100 billion");
}
#[test]
fn format_human_number_trillions() {
assert_eq!(format_human_number(1_000_000_000_000), "1 trillion");
assert_eq!(format_human_number(4_580_000_000_000), "4.58 trillion");
assert_eq!(format_human_number(144_000_000_000_000), "144 trillion");
assert_eq!(format_human_number(16_800_000_000_000), "16.8 trillion");
}
#[test]
fn amount_entry_too_many() {
let dsl = (0..11)
.map(|i| format!("{i} USD"))
.collect::<Vec<_>>()
.join(" | ");
assert!(AmountEntry::parse_dsl(&dsl).is_err());
}
#[test]
fn role_known_values_count() {
assert_eq!(Role::KNOWN.len(), 15);
}
#[test]
fn event_type_known_values_count() {
assert_eq!(EventType::KNOWN.len(), 32);
}
#[test]
fn org_type_known_values_count() {
assert_eq!(OrgType::KNOWN.len(), 21);
}
#[test]
fn severity_known_values_count() {
assert_eq!(Severity::KNOWN.len(), 4);
}
}