Skip to main content

cortexai_data/
cnpj.rs

1//! CNPJ (Cadastro Nacional da Pessoa Jurídica) matcher
2//!
3//! Validation and normalization for Brazilian company tax IDs.
4
5use tracing::debug;
6
7/// CNPJ matcher with validation and normalization
8#[derive(Debug, Clone, Default)]
9pub struct CnpjMatcher {
10    /// Whether to validate check digits
11    validate_digits: bool,
12}
13
14impl CnpjMatcher {
15    /// Create a new CNPJ matcher with validation enabled
16    pub fn new() -> Self {
17        Self {
18            validate_digits: true,
19        }
20    }
21
22    /// Create a matcher without check digit validation (faster)
23    pub fn without_validation() -> Self {
24        Self {
25            validate_digits: false,
26        }
27    }
28
29    /// Normalize a CNPJ to 14 digits only
30    pub fn normalize(&self, cnpj: &str) -> Option<String> {
31        let digits: String = cnpj.chars().filter(|c| c.is_ascii_digit()).collect();
32
33        if digits.len() != 14 {
34            debug!(
35                input = cnpj,
36                digits_len = digits.len(),
37                "Invalid CNPJ length"
38            );
39            return None;
40        }
41
42        Some(digits)
43    }
44
45    /// Validate a CNPJ including check digits
46    pub fn is_valid(&self, cnpj: &str) -> bool {
47        let Some(normalized) = self.normalize(cnpj) else {
48            return false;
49        };
50
51        // Check for all same digits (invalid)
52        if normalized
53            .chars()
54            .all(|c| c == normalized.chars().next().unwrap())
55        {
56            debug!(cnpj = %normalized, "CNPJ with all same digits");
57            return false;
58        }
59
60        if self.validate_digits {
61            self.validate_check_digits(&normalized)
62        } else {
63            true
64        }
65    }
66
67    /// Validate check digits using the official algorithm
68    fn validate_check_digits(&self, cnpj: &str) -> bool {
69        let digits: Vec<u32> = cnpj.chars().filter_map(|c| c.to_digit(10)).collect();
70
71        if digits.len() != 14 {
72            return false;
73        }
74
75        // First check digit
76        let weights1 = [5, 4, 3, 2, 9, 8, 7, 6, 5, 4, 3, 2];
77        let sum1: u32 = digits[..12]
78            .iter()
79            .zip(weights1.iter())
80            .map(|(d, w)| d * w)
81            .sum();
82        let remainder1 = sum1 % 11;
83        let check1 = if remainder1 < 2 { 0 } else { 11 - remainder1 };
84
85        if digits[12] != check1 {
86            return false;
87        }
88
89        // Second check digit
90        let weights2 = [6, 5, 4, 3, 2, 9, 8, 7, 6, 5, 4, 3, 2];
91        let sum2: u32 = digits[..13]
92            .iter()
93            .zip(weights2.iter())
94            .map(|(d, w)| d * w)
95            .sum();
96        let remainder2 = sum2 % 11;
97        let check2 = if remainder2 < 2 { 0 } else { 11 - remainder2 };
98
99        digits[13] == check2
100    }
101
102    /// Format a CNPJ with standard punctuation: XX.XXX.XXX/XXXX-XX
103    pub fn format(&self, cnpj: &str) -> Option<String> {
104        let normalized = self.normalize(cnpj)?;
105        Some(format!(
106            "{}.{}.{}/{}-{}",
107            &normalized[0..2],
108            &normalized[2..5],
109            &normalized[5..8],
110            &normalized[8..12],
111            &normalized[12..14]
112        ))
113    }
114
115    /// Check if two CNPJs match (normalized comparison)
116    pub fn matches(&self, cnpj1: &str, cnpj2: &str) -> bool {
117        match (self.normalize(cnpj1), self.normalize(cnpj2)) {
118            (Some(n1), Some(n2)) => n1 == n2,
119            _ => false,
120        }
121    }
122
123    /// Calculate similarity score between two CNPJs
124    pub fn score(&self, cnpj1: &str, cnpj2: &str) -> f64 {
125        if self.matches(cnpj1, cnpj2) {
126            1.0
127        } else {
128            0.0
129        }
130    }
131
132    /// Extract the root CNPJ (first 8 digits - company identifier)
133    pub fn root(&self, cnpj: &str) -> Option<String> {
134        self.normalize(cnpj).map(|n| n[0..8].to_string())
135    }
136
137    /// Extract the branch number (4 digits after root)
138    pub fn branch(&self, cnpj: &str) -> Option<String> {
139        self.normalize(cnpj).map(|n| n[8..12].to_string())
140    }
141
142    /// Check if two CNPJs belong to the same company (same root)
143    pub fn same_company(&self, cnpj1: &str, cnpj2: &str) -> bool {
144        match (self.root(cnpj1), self.root(cnpj2)) {
145            (Some(r1), Some(r2)) => r1 == r2,
146            _ => false,
147        }
148    }
149
150    /// Check if this is a headquarters CNPJ (branch = 0001)
151    pub fn is_headquarters(&self, cnpj: &str) -> bool {
152        self.branch(cnpj).map(|b| b == "0001").unwrap_or(false)
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    #[test]
161    fn test_normalize_various_formats() {
162        let matcher = CnpjMatcher::new();
163
164        assert_eq!(
165            matcher.normalize("11.222.333/0001-81"),
166            Some("11222333000181".to_string())
167        );
168        assert_eq!(
169            matcher.normalize("11222333000181"),
170            Some("11222333000181".to_string())
171        );
172        assert_eq!(
173            matcher.normalize(" 11 222 333 0001 81 "),
174            Some("11222333000181".to_string())
175        );
176    }
177
178    #[test]
179    fn test_normalize_invalid() {
180        let matcher = CnpjMatcher::new();
181
182        assert_eq!(matcher.normalize(""), None);
183        assert_eq!(matcher.normalize("1234567890"), None);
184        assert_eq!(matcher.normalize("123456789012345"), None);
185    }
186
187    #[test]
188    fn test_is_valid_rejects_same_digits() {
189        let matcher = CnpjMatcher::new();
190
191        assert!(!matcher.is_valid("00.000.000/0000-00"));
192        assert!(!matcher.is_valid("11.111.111/1111-11"));
193        assert!(!matcher.is_valid("99999999999999"));
194    }
195
196    #[test]
197    fn test_format() {
198        let matcher = CnpjMatcher::new();
199
200        assert_eq!(
201            matcher.format("11222333000181"),
202            Some("11.222.333/0001-81".to_string())
203        );
204        assert_eq!(
205            matcher.format("11.222.333/0001-81"),
206            Some("11.222.333/0001-81".to_string())
207        );
208    }
209
210    #[test]
211    fn test_matches() {
212        let matcher = CnpjMatcher::new();
213
214        assert!(matcher.matches("11.222.333/0001-81", "11222333000181"));
215        assert!(matcher.matches("11 222 333 0001 81", "11.222.333/0001-81"));
216        assert!(!matcher.matches("11.222.333/0001-81", "11.222.333/0002-62"));
217    }
218
219    #[test]
220    fn test_root_and_branch() {
221        let matcher = CnpjMatcher::new();
222
223        assert_eq!(
224            matcher.root("11.222.333/0001-81"),
225            Some("11222333".to_string())
226        );
227        assert_eq!(
228            matcher.branch("11.222.333/0001-81"),
229            Some("0001".to_string())
230        );
231        assert_eq!(
232            matcher.branch("11.222.333/0002-62"),
233            Some("0002".to_string())
234        );
235    }
236
237    #[test]
238    fn test_same_company() {
239        let matcher = CnpjMatcher::new();
240
241        assert!(matcher.same_company("11.222.333/0001-81", "11.222.333/0002-62"));
242        assert!(!matcher.same_company("11.222.333/0001-81", "99.888.777/0001-00"));
243    }
244
245    #[test]
246    fn test_is_headquarters() {
247        let matcher = CnpjMatcher::new();
248
249        assert!(matcher.is_headquarters("11.222.333/0001-81"));
250        assert!(!matcher.is_headquarters("11.222.333/0002-62"));
251    }
252}