postcode_extractor/
lib.rs1use std::collections::BTreeMap;
2
3use include_dir::{Dir, include_dir};
4use once_cell::sync::Lazy;
5use regex::{Match, Regex};
6
7mod country;
8mod json_models;
9
10use crate::json_models::{PositionLogic, RegexJson};
11
12pub use country::{
13 Country, FIVE_DIGIT_ADDITIONAL_NATIONS, FIVE_DIGIT_NATIONS, FIVE_DIGIT_WITH_SPACE_NATIONS,
14 FOUR_DIGIT_NATIONS, SIX_DIGIT_NATIONS, UNIQUE_COUNTRIES,
15};
16
17static TEMPLATES_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/postalcode-extractor/regex");
18
19struct RegexWrapper {
20 pub regex: Regex,
21 pub position_logic: PositionLogic,
22}
23
24static ALL: Lazy<BTreeMap<Country, RegexWrapper>> = Lazy::new(|| {
25 let mut m = BTreeMap::new();
26 for file in TEMPLATES_DIR.files() {
27 let text = file.contents_utf8().expect("Must be UTF8");
28 let regex_json: RegexJson = serde_json::from_str(text).unwrap();
29
30 assert!(regex_json.regex.position_logic.position >= 0.0);
31 assert!(regex_json.regex.position_logic.position <= 1.0);
32
33 let regex = Regex::new(®ex_json.regex.engines.rust).unwrap();
34
35 let regex_wrapper = RegexWrapper {
36 regex: regex,
37 position_logic: regex_json.regex.position_logic.clone(),
38 };
39
40 m.insert(regex_json.country.clone(), regex_wrapper);
41 }
42 m
43});
44
45#[derive(Debug, Clone)]
46pub struct PostcodeHolder {
47 pub base: String,
48 pub additional: Option<String>,
49}
50
51#[derive(Debug, Clone)]
52pub struct PostcodeWrapper {
53 pub country: Country,
54 pub postcode: PostcodeHolder,
55}
56
57#[derive(Debug)]
58pub enum PostcodeError {
59 UnsupportedCountry,
60}
61
62pub fn evaluate_single_country(
72 haystack: &str,
73 country: Country,
74 check_position: bool,
75) -> Result<Option<PostcodeHolder>, PostcodeError> {
76 let regex = ALL.get(&country).ok_or(PostcodeError::UnsupportedCountry)?;
77
78 let postalcode_captures = regex.regex.captures_iter(haystack);
79
80 let captures = match postalcode_captures.last() {
83 Some(x) => x,
84 None => return Ok(None),
85 };
86
87 let best_match = match captures.name("postcode") {
88 Some(x) => x,
89 None => return Ok(None),
90 };
91
92 if check_position {
93 if !check_positions(haystack, &best_match, ®ex.position_logic) {
94 return Ok(None);
95 }
96 }
97
98 let additional = if FIVE_DIGIT_ADDITIONAL_NATIONS.contains(&country) {
99 if let Some(add_match) = captures.name("postcode_additional") {
100 Some(add_match.as_str().to_string())
101 } else {
102 None
103 }
104 } else {
105 None
106 };
107
108 let best_match = best_match.as_str().to_string();
109 Ok(Some(PostcodeHolder {
110 base: best_match,
111 additional: additional,
112 }))
113}
114
115fn check_positions(haystack: &str, mat: &Match, position_logic: &PositionLogic) -> bool {
116 let hay_len = haystack.chars().count() as f64;
117 let match_char_idex = haystack[..mat.start()].chars().count() as f64;
118
119 let func = position_logic.operation.as_function();
120
121 return func(match_char_idex / hay_len, position_logic.position);
122}
123
124pub fn evaluate_all_countries(
136 haystack: &str,
137 check_position: bool,
138) -> Result<Option<PostcodeWrapper>, PostcodeError> {
139 for country in UNIQUE_COUNTRIES.iter().cloned() {
140 if let Ok(Some(pc)) = evaluate_single_country(haystack, country.clone(), check_position) {
141 return Ok(Some(PostcodeWrapper {
142 country: country,
143 postcode: pc,
144 }));
145 }
146 }
147
148 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::CZ, check_position) {
151 if pc.base.contains("CZ-") {
152 return Ok(Some(PostcodeWrapper {
153 country: Country::CZ,
154 postcode: pc,
155 }));
156 }
157 }
158
159 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::GR, check_position) {
161 if pc.base.contains("GR-") {
162 return Ok(Some(PostcodeWrapper {
163 country: Country::GR,
164 postcode: pc,
165 }));
166 }
167 }
168
169 if let Ok(Some(pc)) =
171 evaluate_single_country(haystack, Country::Unknown5DigitSpace, check_position)
172 {
173 return Ok(Some(PostcodeWrapper {
174 country: Country::Unknown5DigitSpace,
175 postcode: pc,
176 }));
177 }
178
179
180 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::US, check_position) {
182 if pc.additional.is_some() {
183 return Ok(Some(PostcodeWrapper {
184 country: Country::Unknown5DigitAdditional,
185 postcode: pc,
186 }));
187 }
188 }
189
190 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::Unknown6Digit, check_position)
192 {
193 return Ok(Some(PostcodeWrapper {
194 country: Country::Unknown6Digit,
195 postcode: pc,
196 }));
197 }
198
199 if check_position {
202 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::TW, check_position) {
203 return Ok(Some(PostcodeWrapper {
204 country: Country::TW,
205 postcode: pc,
206 }));
207 }
208 }
209
210 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::Unknown5Digit, check_position)
213 {
214 return Ok(Some(PostcodeWrapper {
215 country: Country::Unknown5Digit,
216 postcode: pc,
217 }));
218 }
219
220 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::CY, check_position) {
222 if pc.base.contains("CY-") {
223 return Ok(Some(PostcodeWrapper {
224 country: Country::CY,
225 postcode: pc,
226 }));
227 }
228 }
229
230 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::LU, check_position) {
232 if pc.base.starts_with("L-") {
233 return Ok(Some(PostcodeWrapper { country: Country::LU, postcode: pc }))
234 }
235 }
236
237 if let Ok(Some(pc)) = evaluate_single_country(haystack, Country::Unknown4Digit, check_position)
240 {
241 return Ok(Some(PostcodeWrapper {
242 country: Country::Unknown4Digit,
243 postcode: pc,
244 }));
245 }
246 return Ok(None);
247}