Skip to main content

cloakrs_patterns/
iban.rs

1use crate::common::{alphanumeric_upper, compile_regex, confidence, context_boost};
2use cloakrs_core::{Confidence, EntityType, Locale, PiiEntity, Recognizer, Span};
3use once_cell::sync::Lazy;
4use regex::Regex;
5
6static IBAN_REGEX: Lazy<Regex> =
7    Lazy::new(|| compile_regex(r"\b[A-Z]{2}\d{2}(?: ?[A-Z0-9]){11,30}\b"));
8
9const CONTEXT_WORDS: &[&str] = &["iban", "account", "bank", "transfer", "wire", "swift"];
10
11/// Recognizes International Bank Account Numbers with MOD-97 validation.
12#[derive(Debug, Clone, Copy, Default)]
13pub struct IbanRecognizer;
14
15impl Recognizer for IbanRecognizer {
16    fn id(&self) -> &str {
17        "iban_mod97_v1"
18    }
19
20    fn entity_type(&self) -> EntityType {
21        EntityType::Iban
22    }
23
24    fn supported_locales(&self) -> &[Locale] {
25        &[]
26    }
27
28    fn scan(&self, text: &str) -> Vec<PiiEntity> {
29        IBAN_REGEX
30            .find_iter(text)
31            .filter(|matched| self.validate(matched.as_str()))
32            .map(|matched| {
33                let normalized = alphanumeric_upper(matched.as_str());
34                PiiEntity {
35                    entity_type: self.entity_type(),
36                    span: Span::new(matched.start(), matched.end()),
37                    text: matched.as_str().to_string(),
38                    confidence: compute_confidence(text, matched.start(), &normalized),
39                    recognizer_id: self.id().to_string(),
40                }
41            })
42            .collect()
43    }
44
45    fn validate(&self, candidate: &str) -> bool {
46        let normalized = alphanumeric_upper(candidate);
47        has_country_length(&normalized) && iban_mod97_valid(&normalized)
48    }
49}
50
51fn compute_confidence(text: &str, start: usize, normalized: &str) -> Confidence {
52    let base = if has_country_length(normalized) && iban_mod97_valid(normalized) {
53        0.99
54    } else {
55        0.50
56    };
57    confidence(base + context_boost(text, start, CONTEXT_WORDS))
58}
59
60fn has_country_length(normalized: &str) -> bool {
61    normalized
62        .get(..2)
63        .and_then(iban_country_length)
64        .is_some_and(|length| normalized.len() == length)
65}
66
67/// Returns the expected IBAN length for supported countries.
68#[must_use]
69pub fn iban_country_length(country: &str) -> Option<usize> {
70    match country {
71        "AD" => Some(24),
72        "AE" => Some(23),
73        "AL" => Some(28),
74        "AT" => Some(20),
75        "AZ" => Some(28),
76        "BA" => Some(20),
77        "BE" => Some(16),
78        "BG" => Some(22),
79        "BH" => Some(22),
80        "BR" => Some(29),
81        "CH" => Some(21),
82        "CR" => Some(22),
83        "CY" => Some(28),
84        "CZ" => Some(24),
85        "DE" => Some(22),
86        "DK" => Some(18),
87        "DO" => Some(28),
88        "EE" => Some(20),
89        "ES" => Some(24),
90        "FI" => Some(18),
91        "FO" => Some(18),
92        "FR" => Some(27),
93        "GB" => Some(22),
94        "GE" => Some(22),
95        "GI" => Some(23),
96        "GL" => Some(18),
97        "GR" => Some(27),
98        "GT" => Some(28),
99        "HR" => Some(21),
100        "HU" => Some(28),
101        "IE" => Some(22),
102        "IL" => Some(23),
103        "IS" => Some(26),
104        "IT" => Some(27),
105        "KW" => Some(30),
106        "KZ" => Some(20),
107        "LB" => Some(28),
108        "LI" => Some(21),
109        "LT" => Some(20),
110        "LU" => Some(20),
111        "LV" => Some(21),
112        "MC" => Some(27),
113        "MD" => Some(24),
114        "ME" => Some(22),
115        "MK" => Some(19),
116        "MR" => Some(27),
117        "MT" => Some(31),
118        "MU" => Some(30),
119        "NL" => Some(18),
120        "NO" => Some(15),
121        "PK" => Some(24),
122        "PL" => Some(28),
123        "PS" => Some(29),
124        "PT" => Some(25),
125        "QA" => Some(29),
126        "RO" => Some(24),
127        "RS" => Some(22),
128        "SA" => Some(24),
129        "SE" => Some(24),
130        "SI" => Some(19),
131        "SK" => Some(24),
132        "SM" => Some(27),
133        "TN" => Some(24),
134        "TR" => Some(26),
135        "UA" => Some(29),
136        "VG" => Some(24),
137        _ => None,
138    }
139}
140
141/// Returns true when an IBAN passes ISO 13616 MOD-97 validation.
142#[must_use]
143pub fn iban_mod97_valid(normalized: &str) -> bool {
144    if normalized.len() < 4 {
145        return false;
146    }
147    let rearranged = format!("{}{}", &normalized[4..], &normalized[..4]);
148    let mut remainder = 0u32;
149
150    for c in rearranged.chars() {
151        if c.is_ascii_digit() {
152            let Some(digit) = c.to_digit(10) else {
153                return false;
154            };
155            remainder = (remainder * 10 + digit) % 97;
156        } else if c.is_ascii_uppercase() {
157            let value = c as u32 - 'A' as u32 + 10;
158            remainder = (remainder * 100 + value) % 97;
159        } else {
160            return false;
161        }
162    }
163
164    remainder == 1
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    fn texts(input: &str) -> Vec<String> {
172        IbanRecognizer
173            .scan(input)
174            .into_iter()
175            .map(|finding| finding.text)
176            .collect()
177    }
178
179    #[test]
180    fn test_iban_de_with_spaces_detected() {
181        assert_eq!(
182            texts("IBAN DE89 3704 0044 0532 0130 00"),
183            ["DE89 3704 0044 0532 0130 00"]
184        );
185    }
186
187    #[test]
188    fn test_iban_nl_without_spaces_detected() {
189        assert_eq!(texts("NL91ABNA0417164300"), ["NL91ABNA0417164300"]);
190    }
191
192    #[test]
193    fn test_iban_gb_with_spaces_detected() {
194        assert_eq!(
195            texts("GB29 NWBK 6016 1331 9268 19"),
196            ["GB29 NWBK 6016 1331 9268 19"]
197        );
198    }
199
200    #[test]
201    fn test_iban_invalid_checksum_rejected() {
202        assert!(texts("DE88 3704 0044 0532 0130 00").is_empty());
203    }
204
205    #[test]
206    fn test_iban_invalid_country_length_rejected() {
207        assert!(texts("DE89 3704 0044").is_empty());
208    }
209
210    #[test]
211    fn test_iban_mod97_valid_accepts_known_example() {
212        assert!(iban_mod97_valid("GB29NWBK60161331926819"));
213    }
214
215    #[test]
216    fn test_iban_country_length_returns_expected_length() {
217        assert_eq!(iban_country_length("NL"), Some(18));
218    }
219
220    #[test]
221    fn test_iban_context_boosts_confidence() {
222        let with_context = IbanRecognizer.scan("iban NL91ABNA0417164300");
223        let without_context = IbanRecognizer.scan("value NL91ABNA0417164300");
224        assert!(with_context[0].confidence >= without_context[0].confidence);
225    }
226
227    #[test]
228    fn test_iban_fr_with_spaces_detected() {
229        assert_eq!(
230            texts("FR14 2004 1010 0505 0001 3M02 606"),
231            ["FR14 2004 1010 0505 0001 3M02 606"]
232        );
233    }
234
235    #[test]
236    fn test_iban_be_detected() {
237        assert_eq!(texts("BE68 5390 0754 7034"), ["BE68 5390 0754 7034"]);
238    }
239
240    #[test]
241    fn test_iban_es_detected() {
242        assert_eq!(
243            texts("ES91 2100 0418 4502 0005 1332"),
244            ["ES91 2100 0418 4502 0005 1332"]
245        );
246    }
247
248    #[test]
249    fn test_iban_it_detected() {
250        assert_eq!(
251            texts("IT60 X054 2811 1010 0000 0123 456"),
252            ["IT60 X054 2811 1010 0000 0123 456"]
253        );
254    }
255
256    #[test]
257    fn test_iban_ch_detected() {
258        assert_eq!(
259            texts("CH93 0076 2011 6238 5295 7"),
260            ["CH93 0076 2011 6238 5295 7"]
261        );
262    }
263
264    #[test]
265    fn test_iban_lowercase_not_detected() {
266        assert!(texts("nl91abna0417164300").is_empty());
267    }
268
269    #[test]
270    fn test_iban_unknown_country_rejected() {
271        assert!(texts("ZZ91ABNA0417164300").is_empty());
272    }
273
274    #[test]
275    fn test_iban_too_short_rejected() {
276        assert!(texts("NL91 ABNA 0417").is_empty());
277    }
278
279    #[test]
280    fn test_iban_mod97_rejects_known_bad_example() {
281        assert!(!iban_mod97_valid("GB28NWBK60161331926819"));
282    }
283
284    #[test]
285    fn test_iban_country_length_unknown_returns_none() {
286        assert_eq!(iban_country_length("ZZ"), None);
287    }
288
289    #[test]
290    fn test_iban_bank_context_boosts_confidence() {
291        let with_context = IbanRecognizer.scan("bank NL91ABNA0417164300");
292        let without_context = IbanRecognizer.scan("value NL91ABNA0417164300");
293        assert!(with_context[0].confidence >= without_context[0].confidence);
294    }
295
296    #[test]
297    fn test_iban_transfer_context_boosts_confidence() {
298        let with_context = IbanRecognizer.scan("transfer NL91ABNA0417164300");
299        let without_context = IbanRecognizer.scan("value NL91ABNA0417164300");
300        assert!(with_context[0].confidence >= without_context[0].confidence);
301    }
302}