rustkernel_compliance/
kyc.rs

1//! Know Your Customer (KYC) kernels.
2//!
3//! This module provides KYC analysis:
4//! - Risk scoring
5//! - Entity resolution/matching
6
7use crate::messages::{
8    EntityResolutionInput, EntityResolutionOutput, KYCScoringInput, KYCScoringOutput,
9};
10use crate::types::{Entity, EntityMatch, EntityResolutionResult, KYCFactors, KYCResult, RiskTier};
11use async_trait::async_trait;
12use rustkernel_core::error::Result;
13use rustkernel_core::traits::BatchKernel;
14use rustkernel_core::{domain::Domain, kernel::KernelMetadata, traits::GpuKernel};
15use std::time::Instant;
16
17// ============================================================================
18// KYC Scoring Kernel
19// ============================================================================
20
21/// KYC risk scoring kernel.
22///
23/// Aggregates multiple risk factors into an overall KYC risk score.
24#[derive(Debug, Clone)]
25pub struct KYCScoring {
26    metadata: KernelMetadata,
27}
28
29impl Default for KYCScoring {
30    fn default() -> Self {
31        Self::new()
32    }
33}
34
35impl KYCScoring {
36    /// Create a new KYC scoring kernel.
37    #[must_use]
38    pub fn new() -> Self {
39        Self {
40            metadata: KernelMetadata::batch("compliance/kyc-scoring", Domain::Compliance)
41                .with_description("KYC risk factor aggregation")
42                .with_throughput(50_000)
43                .with_latency_us(20.0),
44        }
45    }
46
47    /// Compute KYC risk score for a customer.
48    ///
49    /// # Arguments
50    /// * `factors` - KYC risk factors
51    /// * `weights` - Optional custom weights for each factor
52    pub fn compute(factors: &KYCFactors, weights: Option<&KYCWeights>) -> KYCResult {
53        let default_weights = KYCWeights::default();
54        let w = weights.unwrap_or(&default_weights);
55
56        // Calculate weighted score
57        let mut weighted_sum = 0.0;
58        let mut total_weight = 0.0;
59        let mut contributions = Vec::new();
60
61        // Country risk
62        let country_contribution = factors.country_risk * w.country;
63        weighted_sum += country_contribution;
64        total_weight += w.country;
65        contributions.push(("Country Risk".to_string(), country_contribution / w.country));
66
67        // Industry risk
68        let industry_contribution = factors.industry_risk * w.industry;
69        weighted_sum += industry_contribution;
70        total_weight += w.industry;
71        contributions.push((
72            "Industry Risk".to_string(),
73            industry_contribution / w.industry,
74        ));
75
76        // Product risk
77        let product_contribution = factors.product_risk * w.product;
78        weighted_sum += product_contribution;
79        total_weight += w.product;
80        contributions.push(("Product Risk".to_string(), product_contribution / w.product));
81
82        // Transaction risk
83        let tx_contribution = factors.transaction_risk * w.transaction;
84        weighted_sum += tx_contribution;
85        total_weight += w.transaction;
86        contributions.push((
87            "Transaction Risk".to_string(),
88            tx_contribution / w.transaction,
89        ));
90
91        // Documentation (inverse - higher is better)
92        let doc_risk = 100.0 - factors.documentation_score;
93        let doc_contribution = doc_risk * w.documentation;
94        weighted_sum += doc_contribution;
95        total_weight += w.documentation;
96        contributions.push((
97            "Documentation Gap".to_string(),
98            doc_contribution / w.documentation,
99        ));
100
101        // Tenure (inverse - longer is better)
102        let tenure_risk = (10.0 - factors.tenure_years.min(10.0)) * 10.0;
103        let tenure_contribution = tenure_risk * w.tenure;
104        weighted_sum += tenure_contribution;
105        total_weight += w.tenure;
106        contributions.push(("Tenure Risk".to_string(), tenure_contribution / w.tenure));
107
108        let risk_score = if total_weight > 0.0 {
109            weighted_sum / total_weight
110        } else {
111            0.0
112        };
113
114        let risk_tier = RiskTier::from(risk_score);
115
116        KYCResult {
117            customer_id: factors.customer_id,
118            risk_score,
119            risk_tier,
120            factor_contributions: contributions,
121        }
122    }
123
124    /// Batch score multiple customers.
125    pub fn compute_batch(
126        factors_list: &[KYCFactors],
127        weights: Option<&KYCWeights>,
128    ) -> Vec<KYCResult> {
129        factors_list
130            .iter()
131            .map(|f| Self::compute(f, weights))
132            .collect()
133    }
134}
135
136impl GpuKernel for KYCScoring {
137    fn metadata(&self) -> &KernelMetadata {
138        &self.metadata
139    }
140}
141
142#[async_trait]
143impl BatchKernel<KYCScoringInput, KYCScoringOutput> for KYCScoring {
144    async fn execute(&self, input: KYCScoringInput) -> Result<KYCScoringOutput> {
145        let start = Instant::now();
146        let result = Self::compute(&input.factors, None);
147        Ok(KYCScoringOutput {
148            result,
149            compute_time_us: start.elapsed().as_micros() as u64,
150        })
151    }
152}
153
154/// Weights for KYC risk factors.
155#[derive(Debug, Clone)]
156pub struct KYCWeights {
157    /// Country risk weight.
158    pub country: f64,
159    /// Industry risk weight.
160    pub industry: f64,
161    /// Product risk weight.
162    pub product: f64,
163    /// Transaction pattern weight.
164    pub transaction: f64,
165    /// Documentation weight.
166    pub documentation: f64,
167    /// Tenure weight.
168    pub tenure: f64,
169}
170
171impl Default for KYCWeights {
172    fn default() -> Self {
173        Self {
174            country: 0.25,
175            industry: 0.20,
176            product: 0.15,
177            transaction: 0.20,
178            documentation: 0.10,
179            tenure: 0.10,
180        }
181    }
182}
183
184// ============================================================================
185// Entity Resolution Kernel
186// ============================================================================
187
188/// Entity resolution (fuzzy matching) kernel.
189///
190/// Matches entities using fuzzy string matching and other attributes.
191#[derive(Debug, Clone)]
192pub struct EntityResolution {
193    metadata: KernelMetadata,
194}
195
196impl Default for EntityResolution {
197    fn default() -> Self {
198        Self::new()
199    }
200}
201
202impl EntityResolution {
203    /// Create a new entity resolution kernel.
204    #[must_use]
205    pub fn new() -> Self {
206        Self {
207            metadata: KernelMetadata::batch("compliance/entity-resolution", Domain::Compliance)
208                .with_description("Fuzzy entity matching")
209                .with_throughput(10_000)
210                .with_latency_us(100.0),
211        }
212    }
213
214    /// Match a query entity against a list of candidates.
215    ///
216    /// # Arguments
217    /// * `query` - Entity to match
218    /// * `candidates` - List of candidate entities
219    /// * `min_score` - Minimum match score threshold
220    /// * `max_matches` - Maximum number of matches to return
221    pub fn compute(
222        query: &Entity,
223        candidates: &[Entity],
224        min_score: f64,
225        max_matches: usize,
226    ) -> EntityResolutionResult {
227        let mut matches: Vec<EntityMatch> = candidates
228            .iter()
229            .filter_map(|candidate| {
230                let (name_score, date_score, country_match) =
231                    Self::compute_scores(query, candidate);
232
233                // Weighted overall score
234                let mut score = name_score * 0.6;
235                if date_score > 0.0 {
236                    score += date_score * 0.25;
237                } else {
238                    score += 0.125; // Neutral if no date to compare
239                }
240                if country_match {
241                    score += 0.15;
242                }
243
244                if score >= min_score {
245                    Some(EntityMatch {
246                        entity_id: candidate.id,
247                        score,
248                        name_score,
249                        date_score,
250                        country_match,
251                    })
252                } else {
253                    None
254                }
255            })
256            .collect();
257
258        // Sort by score descending
259        matches.sort_by(|a, b| {
260            b.score
261                .partial_cmp(&a.score)
262                .unwrap_or(std::cmp::Ordering::Equal)
263        });
264
265        // Take top matches
266        matches.truncate(max_matches);
267
268        EntityResolutionResult {
269            query_id: query.id,
270            matches,
271        }
272    }
273
274    /// Compute individual scores between two entities.
275    fn compute_scores(query: &Entity, candidate: &Entity) -> (f64, f64, bool) {
276        // Name similarity (best match across all names/aliases)
277        let mut best_name_score = Self::name_similarity(&query.name, &candidate.name);
278
279        for alias in &candidate.aliases {
280            let alias_score = Self::name_similarity(&query.name, alias);
281            best_name_score = best_name_score.max(alias_score);
282        }
283
284        for query_alias in &query.aliases {
285            let alias_score = Self::name_similarity(query_alias, &candidate.name);
286            best_name_score = best_name_score.max(alias_score);
287
288            for candidate_alias in &candidate.aliases {
289                let aa_score = Self::name_similarity(query_alias, candidate_alias);
290                best_name_score = best_name_score.max(aa_score);
291            }
292        }
293
294        // Date similarity
295        let date_score = match (query.date, candidate.date) {
296            (Some(qd), Some(cd)) => Self::date_similarity(qd, cd),
297            _ => 0.0,
298        };
299
300        // Country match
301        let country_match = match (&query.country, &candidate.country) {
302            (Some(qc), Some(cc)) => qc.eq_ignore_ascii_case(cc),
303            _ => false,
304        };
305
306        (best_name_score, date_score, country_match)
307    }
308
309    /// Calculate name similarity using Jaro-Winkler distance.
310    fn name_similarity(s1: &str, s2: &str) -> f64 {
311        let s1 = s1.to_lowercase();
312        let s2 = s2.to_lowercase();
313
314        if s1 == s2 {
315            return 1.0;
316        }
317
318        if s1.is_empty() || s2.is_empty() {
319            return 0.0;
320        }
321
322        Self::jaro_winkler(&s1, &s2)
323    }
324
325    /// Jaro-Winkler similarity.
326    fn jaro_winkler(s1: &str, s2: &str) -> f64 {
327        let jaro = Self::jaro(s1, s2);
328
329        // Calculate common prefix length (up to 4 chars)
330        let prefix_len = s1
331            .chars()
332            .zip(s2.chars())
333            .take(4)
334            .take_while(|(a, b)| a == b)
335            .count();
336
337        // Winkler modification
338        jaro + (prefix_len as f64 * 0.1 * (1.0 - jaro))
339    }
340
341    /// Jaro similarity.
342    fn jaro(s1: &str, s2: &str) -> f64 {
343        let s1_chars: Vec<char> = s1.chars().collect();
344        let s2_chars: Vec<char> = s2.chars().collect();
345
346        let len1 = s1_chars.len();
347        let len2 = s2_chars.len();
348
349        if len1 == 0 || len2 == 0 {
350            return 0.0;
351        }
352
353        let match_distance = (len1.max(len2) / 2).saturating_sub(1);
354
355        let mut s1_matches = vec![false; len1];
356        let mut s2_matches = vec![false; len2];
357
358        let mut matches = 0usize;
359        let mut transpositions = 0usize;
360
361        // Find matches
362        for i in 0..len1 {
363            let start = i.saturating_sub(match_distance);
364            let end = (i + match_distance + 1).min(len2);
365
366            for j in start..end {
367                if s2_matches[j] || s1_chars[i] != s2_chars[j] {
368                    continue;
369                }
370                s1_matches[i] = true;
371                s2_matches[j] = true;
372                matches += 1;
373                break;
374            }
375        }
376
377        if matches == 0 {
378            return 0.0;
379        }
380
381        // Count transpositions
382        let mut k = 0usize;
383        for i in 0..len1 {
384            if !s1_matches[i] {
385                continue;
386            }
387            while !s2_matches[k] {
388                k += 1;
389            }
390            if s1_chars[i] != s2_chars[k] {
391                transpositions += 1;
392            }
393            k += 1;
394        }
395
396        let m = matches as f64;
397        let t = transpositions as f64 / 2.0;
398
399        (m / len1 as f64 + m / len2 as f64 + (m - t) / m) / 3.0
400    }
401
402    /// Date similarity (YYYYMMDD format).
403    fn date_similarity(d1: u32, d2: u32) -> f64 {
404        if d1 == d2 {
405            return 1.0;
406        }
407
408        // Extract year, month, day
409        let y1 = d1 / 10000;
410        let m1 = (d1 % 10000) / 100;
411        let _day1 = d1 % 100;
412
413        let y2 = d2 / 10000;
414        let m2 = (d2 % 10000) / 100;
415        let _day2 = d2 % 100;
416
417        // Same year and month is close
418        if y1 == y2 && m1 == m2 {
419            return 0.9;
420        }
421
422        // Same year
423        if y1 == y2 {
424            return 0.7;
425        }
426
427        // Within a few years
428        let year_diff = (y1 as i32 - y2 as i32).unsigned_abs();
429        if year_diff <= 2 {
430            return 0.5;
431        }
432        if year_diff <= 5 {
433            return 0.3;
434        }
435
436        0.0
437    }
438}
439
440impl GpuKernel for EntityResolution {
441    fn metadata(&self) -> &KernelMetadata {
442        &self.metadata
443    }
444}
445
446#[async_trait]
447impl BatchKernel<EntityResolutionInput, EntityResolutionOutput> for EntityResolution {
448    async fn execute(&self, input: EntityResolutionInput) -> Result<EntityResolutionOutput> {
449        let start = Instant::now();
450        let result = Self::compute(
451            &input.query,
452            &input.candidates,
453            input.min_score,
454            input.max_matches,
455        );
456        Ok(EntityResolutionOutput {
457            result,
458            compute_time_us: start.elapsed().as_micros() as u64,
459        })
460    }
461}
462
463#[cfg(test)]
464mod tests {
465    use super::*;
466    use crate::types::EntityType;
467
468    #[test]
469    fn test_kyc_scoring_metadata() {
470        let kernel = KYCScoring::new();
471        assert_eq!(kernel.metadata().id, "compliance/kyc-scoring");
472        assert_eq!(kernel.metadata().domain, Domain::Compliance);
473    }
474
475    #[test]
476    fn test_kyc_scoring_low_risk() {
477        let factors = KYCFactors {
478            customer_id: 1,
479            country_risk: 10.0,
480            industry_risk: 15.0,
481            product_risk: 10.0,
482            transaction_risk: 5.0,
483            documentation_score: 95.0,
484            tenure_years: 8.0,
485        };
486
487        let result = KYCScoring::compute(&factors, None);
488
489        assert_eq!(result.customer_id, 1);
490        assert!(result.risk_score < 25.0);
491        assert_eq!(result.risk_tier, RiskTier::Low);
492    }
493
494    #[test]
495    fn test_kyc_scoring_high_risk() {
496        let factors = KYCFactors {
497            customer_id: 2,
498            country_risk: 80.0,
499            industry_risk: 70.0,
500            product_risk: 60.0,
501            transaction_risk: 75.0,
502            documentation_score: 40.0,
503            tenure_years: 0.5,
504        };
505
506        let result = KYCScoring::compute(&factors, None);
507
508        assert!(result.risk_score > 50.0);
509        assert!(matches!(
510            result.risk_tier,
511            RiskTier::High | RiskTier::VeryHigh
512        ));
513    }
514
515    #[test]
516    fn test_entity_resolution_metadata() {
517        let kernel = EntityResolution::new();
518        assert_eq!(kernel.metadata().id, "compliance/entity-resolution");
519    }
520
521    #[test]
522    fn test_entity_resolution_exact_match() {
523        let query = Entity {
524            id: 1,
525            name: "John Smith".to_string(),
526            aliases: vec![],
527            date: Some(19800115),
528            country: Some("US".to_string()),
529            entity_type: EntityType::Individual,
530        };
531
532        let candidates = vec![
533            Entity {
534                id: 100,
535                name: "John Smith".to_string(),
536                aliases: vec![],
537                date: Some(19800115),
538                country: Some("US".to_string()),
539                entity_type: EntityType::Individual,
540            },
541            Entity {
542                id: 101,
543                name: "Jane Doe".to_string(),
544                aliases: vec![],
545                date: Some(19850620),
546                country: Some("UK".to_string()),
547                entity_type: EntityType::Individual,
548            },
549        ];
550
551        let result = EntityResolution::compute(&query, &candidates, 0.5, 10);
552
553        assert!(!result.matches.is_empty());
554        assert_eq!(result.matches[0].entity_id, 100);
555        assert!(result.matches[0].score > 0.9);
556    }
557
558    #[test]
559    fn test_entity_resolution_fuzzy_match() {
560        let query = Entity {
561            id: 1,
562            name: "Jon Smyth".to_string(), // Misspelled
563            aliases: vec![],
564            date: None,
565            country: Some("US".to_string()),
566            entity_type: EntityType::Individual,
567        };
568
569        let candidates = vec![Entity {
570            id: 100,
571            name: "John Smith".to_string(),
572            aliases: vec!["Johnny Smith".to_string()],
573            date: None,
574            country: Some("US".to_string()),
575            entity_type: EntityType::Individual,
576        }];
577
578        let result = EntityResolution::compute(&query, &candidates, 0.5, 10);
579
580        // Should still match with decent score due to fuzzy matching
581        assert!(!result.matches.is_empty());
582        assert!(result.matches[0].score > 0.6);
583    }
584
585    #[test]
586    fn test_entity_resolution_alias_match() {
587        let query = Entity {
588            id: 1,
589            name: "Johnny Smith".to_string(),
590            aliases: vec![],
591            date: None,
592            country: None,
593            entity_type: EntityType::Individual,
594        };
595
596        let candidates = vec![Entity {
597            id: 100,
598            name: "John Smith".to_string(),
599            aliases: vec!["Johnny Smith".to_string(), "J. Smith".to_string()],
600            date: None,
601            country: None,
602            entity_type: EntityType::Individual,
603        }];
604
605        let result = EntityResolution::compute(&query, &candidates, 0.5, 10);
606
607        // Should get perfect match via alias
608        assert!(!result.matches.is_empty());
609        assert!(result.matches[0].name_score > 0.95);
610    }
611}