1use crate::common::{alphanumeric_upper, compile_regex, confidence, context_boost};
2use cloakrs_core::{Confidence, EntityType, Locale, PiiEntity, Recognizer, Span};
3use once_cell::sync::Lazy;
4use regex::Regex;
5
6static IBAN_REGEX: Lazy<Regex> =
7 Lazy::new(|| compile_regex(r"\b[A-Z]{2}\d{2}(?: ?[A-Z0-9]){11,30}\b"));
8
9const CONTEXT_WORDS: &[&str] = &["iban", "account", "bank", "transfer", "wire", "swift"];
10
11#[derive(Debug, Clone, Copy, Default)]
13pub struct IbanRecognizer;
14
15impl Recognizer for IbanRecognizer {
16 fn id(&self) -> &str {
17 "iban_mod97_v1"
18 }
19
20 fn entity_type(&self) -> EntityType {
21 EntityType::Iban
22 }
23
24 fn supported_locales(&self) -> &[Locale] {
25 &[]
26 }
27
28 fn scan(&self, text: &str) -> Vec<PiiEntity> {
29 IBAN_REGEX
30 .find_iter(text)
31 .filter(|matched| self.validate(matched.as_str()))
32 .map(|matched| {
33 let normalized = alphanumeric_upper(matched.as_str());
34 PiiEntity {
35 entity_type: self.entity_type(),
36 span: Span::new(matched.start(), matched.end()),
37 text: matched.as_str().to_string(),
38 confidence: compute_confidence(text, matched.start(), &normalized),
39 recognizer_id: self.id().to_string(),
40 }
41 })
42 .collect()
43 }
44
45 fn validate(&self, candidate: &str) -> bool {
46 let normalized = alphanumeric_upper(candidate);
47 has_country_length(&normalized) && iban_mod97_valid(&normalized)
48 }
49}
50
51fn compute_confidence(text: &str, start: usize, normalized: &str) -> Confidence {
52 let base = if has_country_length(normalized) && iban_mod97_valid(normalized) {
53 0.99
54 } else {
55 0.50
56 };
57 confidence(base + context_boost(text, start, CONTEXT_WORDS))
58}
59
60fn has_country_length(normalized: &str) -> bool {
61 normalized
62 .get(..2)
63 .and_then(iban_country_length)
64 .is_some_and(|length| normalized.len() == length)
65}
66
67#[must_use]
69pub fn iban_country_length(country: &str) -> Option<usize> {
70 match country {
71 "AD" => Some(24),
72 "AE" => Some(23),
73 "AL" => Some(28),
74 "AT" => Some(20),
75 "AZ" => Some(28),
76 "BA" => Some(20),
77 "BE" => Some(16),
78 "BG" => Some(22),
79 "BH" => Some(22),
80 "BR" => Some(29),
81 "CH" => Some(21),
82 "CR" => Some(22),
83 "CY" => Some(28),
84 "CZ" => Some(24),
85 "DE" => Some(22),
86 "DK" => Some(18),
87 "DO" => Some(28),
88 "EE" => Some(20),
89 "ES" => Some(24),
90 "FI" => Some(18),
91 "FO" => Some(18),
92 "FR" => Some(27),
93 "GB" => Some(22),
94 "GE" => Some(22),
95 "GI" => Some(23),
96 "GL" => Some(18),
97 "GR" => Some(27),
98 "GT" => Some(28),
99 "HR" => Some(21),
100 "HU" => Some(28),
101 "IE" => Some(22),
102 "IL" => Some(23),
103 "IS" => Some(26),
104 "IT" => Some(27),
105 "KW" => Some(30),
106 "KZ" => Some(20),
107 "LB" => Some(28),
108 "LI" => Some(21),
109 "LT" => Some(20),
110 "LU" => Some(20),
111 "LV" => Some(21),
112 "MC" => Some(27),
113 "MD" => Some(24),
114 "ME" => Some(22),
115 "MK" => Some(19),
116 "MR" => Some(27),
117 "MT" => Some(31),
118 "MU" => Some(30),
119 "NL" => Some(18),
120 "NO" => Some(15),
121 "PK" => Some(24),
122 "PL" => Some(28),
123 "PS" => Some(29),
124 "PT" => Some(25),
125 "QA" => Some(29),
126 "RO" => Some(24),
127 "RS" => Some(22),
128 "SA" => Some(24),
129 "SE" => Some(24),
130 "SI" => Some(19),
131 "SK" => Some(24),
132 "SM" => Some(27),
133 "TN" => Some(24),
134 "TR" => Some(26),
135 "UA" => Some(29),
136 "VG" => Some(24),
137 _ => None,
138 }
139}
140
141#[must_use]
143pub fn iban_mod97_valid(normalized: &str) -> bool {
144 if normalized.len() < 4 {
145 return false;
146 }
147 let rearranged = format!("{}{}", &normalized[4..], &normalized[..4]);
148 let mut remainder = 0u32;
149
150 for c in rearranged.chars() {
151 if c.is_ascii_digit() {
152 let Some(digit) = c.to_digit(10) else {
153 return false;
154 };
155 remainder = (remainder * 10 + digit) % 97;
156 } else if c.is_ascii_uppercase() {
157 let value = c as u32 - 'A' as u32 + 10;
158 remainder = (remainder * 100 + value) % 97;
159 } else {
160 return false;
161 }
162 }
163
164 remainder == 1
165}
166
167#[cfg(test)]
168mod tests {
169 use super::*;
170
171 fn texts(input: &str) -> Vec<String> {
172 IbanRecognizer
173 .scan(input)
174 .into_iter()
175 .map(|finding| finding.text)
176 .collect()
177 }
178
179 #[test]
180 fn test_iban_de_with_spaces_detected() {
181 assert_eq!(
182 texts("IBAN DE89 3704 0044 0532 0130 00"),
183 ["DE89 3704 0044 0532 0130 00"]
184 );
185 }
186
187 #[test]
188 fn test_iban_nl_without_spaces_detected() {
189 assert_eq!(texts("NL91ABNA0417164300"), ["NL91ABNA0417164300"]);
190 }
191
192 #[test]
193 fn test_iban_gb_with_spaces_detected() {
194 assert_eq!(
195 texts("GB29 NWBK 6016 1331 9268 19"),
196 ["GB29 NWBK 6016 1331 9268 19"]
197 );
198 }
199
200 #[test]
201 fn test_iban_invalid_checksum_rejected() {
202 assert!(texts("DE88 3704 0044 0532 0130 00").is_empty());
203 }
204
205 #[test]
206 fn test_iban_invalid_country_length_rejected() {
207 assert!(texts("DE89 3704 0044").is_empty());
208 }
209
210 #[test]
211 fn test_iban_mod97_valid_accepts_known_example() {
212 assert!(iban_mod97_valid("GB29NWBK60161331926819"));
213 }
214
215 #[test]
216 fn test_iban_country_length_returns_expected_length() {
217 assert_eq!(iban_country_length("NL"), Some(18));
218 }
219
220 #[test]
221 fn test_iban_context_boosts_confidence() {
222 let with_context = IbanRecognizer.scan("iban NL91ABNA0417164300");
223 let without_context = IbanRecognizer.scan("value NL91ABNA0417164300");
224 assert!(with_context[0].confidence >= without_context[0].confidence);
225 }
226
227 #[test]
228 fn test_iban_fr_with_spaces_detected() {
229 assert_eq!(
230 texts("FR14 2004 1010 0505 0001 3M02 606"),
231 ["FR14 2004 1010 0505 0001 3M02 606"]
232 );
233 }
234
235 #[test]
236 fn test_iban_be_detected() {
237 assert_eq!(texts("BE68 5390 0754 7034"), ["BE68 5390 0754 7034"]);
238 }
239
240 #[test]
241 fn test_iban_es_detected() {
242 assert_eq!(
243 texts("ES91 2100 0418 4502 0005 1332"),
244 ["ES91 2100 0418 4502 0005 1332"]
245 );
246 }
247
248 #[test]
249 fn test_iban_it_detected() {
250 assert_eq!(
251 texts("IT60 X054 2811 1010 0000 0123 456"),
252 ["IT60 X054 2811 1010 0000 0123 456"]
253 );
254 }
255
256 #[test]
257 fn test_iban_ch_detected() {
258 assert_eq!(
259 texts("CH93 0076 2011 6238 5295 7"),
260 ["CH93 0076 2011 6238 5295 7"]
261 );
262 }
263
264 #[test]
265 fn test_iban_lowercase_not_detected() {
266 assert!(texts("nl91abna0417164300").is_empty());
267 }
268
269 #[test]
270 fn test_iban_unknown_country_rejected() {
271 assert!(texts("ZZ91ABNA0417164300").is_empty());
272 }
273
274 #[test]
275 fn test_iban_too_short_rejected() {
276 assert!(texts("NL91 ABNA 0417").is_empty());
277 }
278
279 #[test]
280 fn test_iban_mod97_rejects_known_bad_example() {
281 assert!(!iban_mod97_valid("GB28NWBK60161331926819"));
282 }
283
284 #[test]
285 fn test_iban_country_length_unknown_returns_none() {
286 assert_eq!(iban_country_length("ZZ"), None);
287 }
288
289 #[test]
290 fn test_iban_bank_context_boosts_confidence() {
291 let with_context = IbanRecognizer.scan("bank NL91ABNA0417164300");
292 let without_context = IbanRecognizer.scan("value NL91ABNA0417164300");
293 assert!(with_context[0].confidence >= without_context[0].confidence);
294 }
295
296 #[test]
297 fn test_iban_transfer_context_boosts_confidence() {
298 let with_context = IbanRecognizer.scan("transfer NL91ABNA0417164300");
299 let without_context = IbanRecognizer.scan("value NL91ABNA0417164300");
300 assert!(with_context[0].confidence >= without_context[0].confidence);
301 }
302}