Skip to main content

cortexai_data/
cpf.rs

1//! CPF (Cadastro de Pessoa FĂ­sica) matching and validation
2//!
3//! Brazilian individual taxpayer registry number validation and normalization.
4
5use tracing::debug;
6
7/// CPF matcher with normalization and validation
8#[derive(Debug, Clone, Default)]
9pub struct CpfMatcher {
10    /// Whether to validate CPF check digits
11    validate_digits: bool,
12}
13
14impl CpfMatcher {
15    /// Create a new CPF matcher
16    pub fn new() -> Self {
17        Self {
18            validate_digits: true,
19        }
20    }
21
22    /// Create a matcher that skips digit validation (for testing)
23    pub fn without_validation() -> Self {
24        Self {
25            validate_digits: false,
26        }
27    }
28
29    /// Normalize a CPF string to 11 digits
30    ///
31    /// Handles various input formats:
32    /// - "123.456.789-00"
33    /// - "12345678900"
34    /// - " 123 456 789 00 "
35    /// - "123-456-789-00"
36    pub fn normalize(&self, cpf: &str) -> Option<String> {
37        // Extract only digits
38        let digits: String = cpf.chars().filter(|c| c.is_ascii_digit()).collect();
39
40        // Must have exactly 11 digits
41        if digits.len() != 11 {
42            debug!(
43                input = cpf,
44                digits_found = digits.len(),
45                "Invalid CPF length"
46            );
47            return None;
48        }
49
50        Some(digits)
51    }
52
53    /// Format a normalized CPF with standard punctuation
54    pub fn format(&self, cpf: &str) -> Option<String> {
55        let normalized = self.normalize(cpf)?;
56        Some(format!(
57            "{}.{}.{}-{}",
58            &normalized[0..3],
59            &normalized[3..6],
60            &normalized[6..9],
61            &normalized[9..11]
62        ))
63    }
64
65    /// Check if a CPF is valid (proper check digits)
66    pub fn is_valid(&self, cpf: &str) -> bool {
67        let Some(digits) = self.normalize(cpf) else {
68            return false;
69        };
70
71        if !self.validate_digits {
72            return true;
73        }
74
75        // Check for all same digits (invalid)
76        let chars: Vec<char> = digits.chars().collect();
77        if chars.iter().all(|&c| c == chars[0]) {
78            debug!(cpf = cpf, "CPF has all same digits");
79            return false;
80        }
81
82        // Validate first check digit
83        let digits_vec: Vec<u32> = chars.iter().filter_map(|c| c.to_digit(10)).collect();
84
85        let sum1: u32 = digits_vec[0..9]
86            .iter()
87            .enumerate()
88            .map(|(i, &d)| d * (10 - i as u32))
89            .sum();
90
91        let remainder1 = (sum1 * 10) % 11;
92        let check1 = if remainder1 == 10 { 0 } else { remainder1 };
93
94        if check1 != digits_vec[9] {
95            debug!(
96                cpf = cpf,
97                expected = check1,
98                got = digits_vec[9],
99                "First check digit invalid"
100            );
101            return false;
102        }
103
104        // Validate second check digit
105        let sum2: u32 = digits_vec[0..10]
106            .iter()
107            .enumerate()
108            .map(|(i, &d)| d * (11 - i as u32))
109            .sum();
110
111        let remainder2 = (sum2 * 10) % 11;
112        let check2 = if remainder2 == 10 { 0 } else { remainder2 };
113
114        if check2 != digits_vec[10] {
115            debug!(
116                cpf = cpf,
117                expected = check2,
118                got = digits_vec[10],
119                "Second check digit invalid"
120            );
121            return false;
122        }
123
124        true
125    }
126
127    /// Compare two CPFs for equality (normalizing both)
128    pub fn matches(&self, cpf1: &str, cpf2: &str) -> bool {
129        match (self.normalize(cpf1), self.normalize(cpf2)) {
130            (Some(n1), Some(n2)) => n1 == n2,
131            _ => false,
132        }
133    }
134
135    /// Calculate match score between two CPFs
136    ///
137    /// Returns 1.0 for exact match, 0.0 for no match
138    pub fn score(&self, cpf1: &str, cpf2: &str) -> f64 {
139        if self.matches(cpf1, cpf2) {
140            1.0
141        } else {
142            0.0
143        }
144    }
145
146    /// Generate a valid CPF for testing (with proper check digits)
147    #[cfg(test)]
148    pub fn generate_valid() -> String {
149        // Base digits
150        let base = [1, 2, 3, 4, 5, 6, 7, 8, 9];
151
152        // Calculate first check digit
153        let sum1: u32 = base
154            .iter()
155            .enumerate()
156            .map(|(i, &d)| d * (10 - i as u32))
157            .sum();
158        let check1 = {
159            let r = (sum1 * 10) % 11;
160            if r == 10 {
161                0
162            } else {
163                r
164            }
165        };
166
167        // Calculate second check digit
168        let mut extended = base.to_vec();
169        extended.push(check1);
170        let sum2: u32 = extended
171            .iter()
172            .enumerate()
173            .map(|(i, &d)| d * (11 - i as u32))
174            .sum();
175        let check2 = {
176            let r = (sum2 * 10) % 11;
177            if r == 10 {
178                0
179            } else {
180                r
181            }
182        };
183
184        format!(
185            "{}{}{}.{}{}{}.{}{}{}-{}{}",
186            base[0],
187            base[1],
188            base[2],
189            base[3],
190            base[4],
191            base[5],
192            base[6],
193            base[7],
194            base[8],
195            check1,
196            check2
197        )
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_normalize_various_formats() {
207        let matcher = CpfMatcher::new();
208
209        // Standard format
210        assert_eq!(
211            matcher.normalize("123.456.789-00"),
212            Some("12345678900".to_string())
213        );
214
215        // Digits only
216        assert_eq!(
217            matcher.normalize("12345678900"),
218            Some("12345678900".to_string())
219        );
220
221        // With spaces
222        assert_eq!(
223            matcher.normalize(" 123 456 789 00 "),
224            Some("12345678900".to_string())
225        );
226
227        // Alternative separators
228        assert_eq!(
229            matcher.normalize("123-456-789-00"),
230            Some("12345678900".to_string())
231        );
232    }
233
234    #[test]
235    fn test_normalize_invalid() {
236        let matcher = CpfMatcher::new();
237
238        // Too short
239        assert_eq!(matcher.normalize("123456789"), None);
240
241        // Too long
242        assert_eq!(matcher.normalize("1234567890012"), None);
243
244        // Letters
245        assert_eq!(matcher.normalize("abc"), None);
246    }
247
248    #[test]
249    fn test_format() {
250        let matcher = CpfMatcher::new();
251
252        assert_eq!(
253            matcher.format("12345678900"),
254            Some("123.456.789-00".to_string())
255        );
256
257        assert_eq!(
258            matcher.format("123.456.789-00"),
259            Some("123.456.789-00".to_string())
260        );
261    }
262
263    #[test]
264    fn test_is_valid_rejects_same_digits() {
265        let matcher = CpfMatcher::new();
266
267        assert!(!matcher.is_valid("111.111.111-11"));
268        assert!(!matcher.is_valid("000.000.000-00"));
269        assert!(!matcher.is_valid("999.999.999-99"));
270    }
271
272    #[test]
273    fn test_is_valid_with_generated() {
274        let matcher = CpfMatcher::new();
275        let valid_cpf = CpfMatcher::generate_valid();
276
277        assert!(
278            matcher.is_valid(&valid_cpf),
279            "Generated CPF should be valid: {}",
280            valid_cpf
281        );
282    }
283
284    #[test]
285    fn test_matches() {
286        let matcher = CpfMatcher::new();
287
288        assert!(matcher.matches("123.456.789-00", "12345678900"));
289        assert!(matcher.matches("12345678900", " 123 456 789 00 "));
290        assert!(!matcher.matches("123.456.789-00", "123.456.789-01"));
291    }
292
293    #[test]
294    fn test_score() {
295        let matcher = CpfMatcher::new();
296
297        assert_eq!(matcher.score("123.456.789-00", "12345678900"), 1.0);
298        assert_eq!(matcher.score("123.456.789-00", "987.654.321-00"), 0.0);
299    }
300}