use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum SanctionsList {
OFAC, EU, UN, UKOFSI, Custom(String), }
impl SanctionsList {
pub fn name(&self) -> &str {
match self {
SanctionsList::OFAC => "OFAC SDN",
SanctionsList::EU => "EU Consolidated",
SanctionsList::UN => "UN Security Council",
SanctionsList::UKOFSI => "UK OFSI",
SanctionsList::Custom(name) => name,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum MatchType {
Exact,
Partial,
Fuzzy,
Alias,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SanctionsResult {
pub screened_value: String,
pub is_match: bool,
pub matches: Vec<SanctionsMatch>,
pub screening_time: DateTime<Utc>,
pub lists_checked: Vec<SanctionsList>,
}
impl SanctionsResult {
pub fn has_high_confidence_match(&self) -> bool {
self.matches.iter().any(|m| m.confidence >= 0.9)
}
pub fn highest_confidence(&self) -> Option<&SanctionsMatch> {
self.matches.iter().max_by(|a, b| {
a.confidence.partial_cmp(&b.confidence).unwrap_or(std::cmp::Ordering::Equal)
})
}
pub fn matches_above_threshold(&self, threshold: f32) -> Vec<&SanctionsMatch> {
self.matches.iter().filter(|m| m.confidence >= threshold).collect()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SanctionsMatch {
pub matched_name: String,
pub list: SanctionsList,
pub match_type: MatchType,
pub confidence: f32,
pub entry_id: String,
pub program: Option<String>,
pub country: Option<String>,
}
#[derive(Debug, Clone)]
struct SanctionedEntity {
id: String,
name: String,
aliases: Vec<String>,
list: SanctionsList,
program: Option<String>,
country: Option<String>,
}
pub struct SanctionsScreener {
entities: Vec<SanctionedEntity>,
enabled_lists: HashSet<SanctionsList>,
fuzzy_threshold: f32,
}
impl SanctionsScreener {
pub fn new() -> Self {
let mut screener = Self {
entities: Vec::new(),
enabled_lists: HashSet::new(),
fuzzy_threshold: 0.85,
};
screener.enabled_lists.insert(SanctionsList::OFAC);
screener.enabled_lists.insert(SanctionsList::EU);
screener.enabled_lists.insert(SanctionsList::UN);
screener.load_default_entries();
screener
}
fn load_default_entries(&mut self) {
self.entities.push(SanctionedEntity {
id: "OFAC-001".to_string(),
name: "SANCTIONED ENTITY ONE".to_string(),
aliases: vec!["ENTITY ONE".to_string(), "E1 LTD".to_string()],
list: SanctionsList::OFAC,
program: Some("SDGT".to_string()),
country: Some("XX".to_string()),
});
self.entities.push(SanctionedEntity {
id: "EU-001".to_string(),
name: "RESTRICTED COMPANY EU".to_string(),
aliases: vec!["RC EU".to_string()],
list: SanctionsList::EU,
program: Some("COUNCIL REGULATION".to_string()),
country: Some("YY".to_string()),
});
self.entities.push(SanctionedEntity {
id: "UN-001".to_string(),
name: "UN LISTED ORGANIZATION".to_string(),
aliases: vec!["ULO".to_string()],
list: SanctionsList::UN,
program: Some("1267".to_string()),
country: None,
});
}
pub fn enable_list(&mut self, list: SanctionsList) {
self.enabled_lists.insert(list);
}
pub fn disable_list(&mut self, list: &SanctionsList) {
self.enabled_lists.remove(list);
}
pub fn set_fuzzy_threshold(&mut self, threshold: f32) {
self.fuzzy_threshold = threshold.clamp(0.0, 1.0);
}
pub fn screen(&self, name: &str) -> SanctionsResult {
let name_upper = name.to_uppercase();
let mut matches = Vec::new();
let lists_checked: Vec<SanctionsList> = self.enabled_lists.iter().cloned().collect();
for entity in &self.entities {
if !self.enabled_lists.contains(&entity.list) {
continue;
}
if entity.name == name_upper {
matches.push(SanctionsMatch {
matched_name: entity.name.clone(),
list: entity.list.clone(),
match_type: MatchType::Exact,
confidence: 1.0,
entry_id: entity.id.clone(),
program: entity.program.clone(),
country: entity.country.clone(),
});
continue;
}
for alias in &entity.aliases {
if alias.to_uppercase() == name_upper {
matches.push(SanctionsMatch {
matched_name: entity.name.clone(),
list: entity.list.clone(),
match_type: MatchType::Alias,
confidence: 0.95,
entry_id: entity.id.clone(),
program: entity.program.clone(),
country: entity.country.clone(),
});
break;
}
}
let similarity = self.calculate_similarity(&name_upper, &entity.name);
if similarity >= self.fuzzy_threshold {
matches.push(SanctionsMatch {
matched_name: entity.name.clone(),
list: entity.list.clone(),
match_type: MatchType::Fuzzy,
confidence: similarity,
entry_id: entity.id.clone(),
program: entity.program.clone(),
country: entity.country.clone(),
});
}
if entity.name.contains(&name_upper) || name_upper.contains(&entity.name) {
let partial_conf = 0.7 + (0.2 * (name_upper.len().min(entity.name.len()) as f32
/ name_upper.len().max(entity.name.len()) as f32));
if !matches.iter().any(|m| m.entry_id == entity.id) {
matches.push(SanctionsMatch {
matched_name: entity.name.clone(),
list: entity.list.clone(),
match_type: MatchType::Partial,
confidence: partial_conf,
entry_id: entity.id.clone(),
program: entity.program.clone(),
country: entity.country.clone(),
});
}
}
}
matches.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal));
SanctionsResult {
screened_value: name.to_string(),
is_match: !matches.is_empty(),
matches,
screening_time: Utc::now(),
lists_checked,
}
}
pub fn screen_batch(&self, names: &[&str]) -> Vec<SanctionsResult> {
names.iter().map(|name| self.screen(name)).collect()
}
fn calculate_similarity(&self, s1: &str, s2: &str) -> f32 {
if s1.is_empty() || s2.is_empty() {
return 0.0;
}
let len1 = s1.len();
let len2 = s2.len();
let max_len = len1.max(len2);
let common_chars: usize = s1.chars()
.filter(|c| s2.contains(*c))
.count();
let char_similarity = common_chars as f32 / max_len as f32;
let words1: HashSet<&str> = s1.split_whitespace().collect();
let words2: HashSet<&str> = s2.split_whitespace().collect();
let common_words = words1.intersection(&words2).count();
let total_words = words1.union(&words2).count();
let word_similarity = if total_words > 0 {
common_words as f32 / total_words as f32
} else {
0.0
};
(char_similarity * 0.4) + (word_similarity * 0.6)
}
pub fn add_entity(&mut self, name: &str, aliases: Vec<String>, list: SanctionsList) {
let id = format!("{}-{}", list.name(), self.entities.len());
self.entities.push(SanctionedEntity {
id,
name: name.to_uppercase(),
aliases: aliases.into_iter().map(|a| a.to_uppercase()).collect(),
list,
program: None,
country: None,
});
}
}
impl Default for SanctionsScreener {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exact_match() {
let screener = SanctionsScreener::new();
let result = screener.screen("SANCTIONED ENTITY ONE");
assert!(result.is_match);
assert!(!result.matches.is_empty());
assert_eq!(result.matches[0].match_type, MatchType::Exact);
assert_eq!(result.matches[0].confidence, 1.0);
}
#[test]
fn test_alias_match() {
let screener = SanctionsScreener::new();
let result = screener.screen("ENTITY ONE");
assert!(result.is_match);
assert!(result.matches.iter().any(|m| m.match_type == MatchType::Alias));
}
#[test]
fn test_no_match() {
let screener = SanctionsScreener::new();
let result = screener.screen("LEGITIMATE COMPANY XYZ");
assert!(!result.has_high_confidence_match());
}
#[test]
fn test_batch_screening() {
let screener = SanctionsScreener::new();
let names = vec!["SANCTIONED ENTITY ONE", "NORMAL COMPANY", "ENTITY ONE"];
let results = screener.screen_batch(&names);
assert_eq!(results.len(), 3);
assert!(results[0].is_match); assert!(results[2].is_match); }
#[test]
fn test_custom_entity() {
let mut screener = SanctionsScreener::new();
screener.add_entity(
"CUSTOM BAD ACTOR",
vec!["CBA".to_string(), "BAD ACTOR CO".to_string()],
SanctionsList::Custom("INTERNAL".to_string()),
);
screener.enable_list(SanctionsList::Custom("INTERNAL".to_string()));
let result = screener.screen("CUSTOM BAD ACTOR");
assert!(result.is_match);
}
#[test]
fn test_list_filtering() {
let mut screener = SanctionsScreener::new();
screener.disable_list(&SanctionsList::OFAC);
let result = screener.screen("SANCTIONED ENTITY ONE");
assert!(!result.lists_checked.contains(&SanctionsList::OFAC));
}
#[test]
fn test_fuzzy_threshold() {
let mut screener = SanctionsScreener::new();
screener.set_fuzzy_threshold(0.95);
let result = screener.screen("SANCTIONED ENTTY ONE"); if result.is_match {
assert!(result.matches[0].confidence < 1.0);
}
}
#[test]
fn test_highest_confidence() {
let screener = SanctionsScreener::new();
let result = screener.screen("SANCTIONED ENTITY ONE");
let highest = result.highest_confidence();
assert!(highest.is_some());
assert_eq!(highest.unwrap().confidence, 1.0);
}
}