1use crate::{Component, Place};
2use anyhow::{anyhow, Context, Error};
3use itertools::Itertools;
4use regex::{Regex, RegexBuilder};
5use std::collections::HashMap;
6use std::str::FromStr;
7use strum::IntoEnumIterator;
8
9const MULTILINE_TEMPLATE_NAME: &str = "multi_line";
10const SHORT_ADDR_TEMPLATE_NAME: &str = "short_addr";
11
12#[derive(Debug, Clone)]
14pub(crate) struct Replacement {
15 pub regex: regex::Regex,
16 pub replacement_value: String,
17}
18
19#[derive(Debug, Clone)]
22pub(crate) enum ReplaceRule {
23 All(Replacement),
24 Component((Component, Replacement)),
25}
26
27#[derive(Debug, Hash, Eq, PartialEq, Clone)]
28pub struct CountryCode(String); impl FromStr for CountryCode {
31 type Err = Error;
32
33 fn from_str(s: &str) -> Result<Self, Self::Err> {
34 if s.len() == 2 {
35 if s == "UK" {
36 Ok(CountryCode("GB".to_owned()))
37 } else {
38 Ok(CountryCode(s.to_uppercase()))
39 }
40 } else {
41 Err(anyhow!(
42 "{} is not a valid ISO3166-1:alpha2 country code",
43 s,
44 ))
45 }
46 }
47}
48
49impl CountryCode {
50 pub fn as_str(&self) -> &str {
51 self.0.as_str()
52 }
53}
54
55impl std::fmt::Display for CountryCode {
56 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
57 write!(f, "{}", self.0)
58 }
59}
60
61#[derive(Debug, Clone)]
63pub(crate) struct NewComponent {
64 pub component: Component,
65 pub new_value: String,
66}
67
68#[derive(Debug, Default)]
70pub(crate) struct Template {
71 pub handlebar_handler: handlebars::Handlebars<'static>,
73 place_template: String, }
75
76fn compute_short_addr_template(place_template: &str) -> Option<String> {
80 place_template
81 .split('\n')
82 .find(|l| l.contains("house_number"))
83 .map(|l| l.trim().to_owned())
84}
85
86impl Template {
87 pub fn new(place_template: &str) -> Self {
88 let mut template_engine = crate::handlebar_helper::new_template_engine();
89 template_engine
90 .register_template_string(MULTILINE_TEMPLATE_NAME, place_template)
91 .expect("impossible to build multi line template");
92
93 if let Some(short_addr_template) = compute_short_addr_template(place_template) {
94 template_engine
95 .register_template_string(SHORT_ADDR_TEMPLATE_NAME, &short_addr_template)
96 .expect("impossible to build short addr template");
97 }
98
99 Template {
100 place_template: place_template.to_owned(),
101 handlebar_handler: template_engine,
102 }
103 }
104}
105
106impl Clone for Template {
107 fn clone(&self) -> Self {
108 Self::new(self.place_template.as_str())
109 }
110}
111
112#[derive(Debug, Default, Clone)]
117pub(crate) struct Rules {
118 pub replace: Vec<ReplaceRule>,
119 pub postformat_replace: Vec<Replacement>,
120 pub change_country: Option<String>,
121 pub change_country_code: Option<String>,
122 pub add_component: Option<NewComponent>,
124}
125
126#[derive(Debug)]
127pub(crate) struct Templates {
128 pub default_template: Template,
129 pub fallback_template: Template,
130 pub templates_by_country: HashMap<CountryCode, Template>,
131 pub rules_by_country: HashMap<CountryCode, Rules>,
132 pub fallback_templates_by_country: HashMap<CountryCode, Template>,
133 pub fallback_rules: Rules,
134}
135
136pub struct Formatter {
173 pub(crate) templates: Templates,
174 pub(crate) county_codes: HashMap<(CountryCode, String), String>,
175 pub(crate) state_codes: HashMap<(CountryCode, String), String>,
176 }
180
181#[derive(Default, Debug)]
183pub struct Configuration {
184 pub country_code: Option<String>,
186 pub abbreviate: Option<bool>,
188}
189
190impl Default for Formatter {
191 fn default() -> Self {
193 crate::read_configuration::read_configuration()
194 }
195}
196
197impl Formatter {
198 pub fn format(&self, into_addr: impl Into<Place>) -> Result<String, Error> {
229 self.format_with_config(into_addr.into(), Configuration::default())
230 }
231
232 pub fn format_with_config(
236 &self,
237 into_addr: impl Into<Place>,
238 conf: Configuration,
239 ) -> Result<String, Error> {
240 let mut addr = into_addr.into();
241 let country_code = self.find_country_code(&mut addr, conf);
242
243 sanity_clean_place(&mut addr);
244
245 let template = self.find_template(&addr, &country_code);
246 let rules = country_code
247 .as_ref()
248 .and_then(|c| self.templates.rules_by_country.get(c))
249 .unwrap_or(&self.templates.fallback_rules);
250
251 self.preformat(rules, &mut addr);
252
253 let text = template
254 .handlebar_handler
255 .render(MULTILINE_TEMPLATE_NAME, &addr)
256 .context("impossible to render template")?;
257
258 let text = cleanup_rendered(&text, rules);
259
260 Ok(text)
261 }
262
263 pub fn short_addr_format(&self, into_addr: impl Into<Place>) -> Result<String, Error> {
292 self.short_addr_format_with_config(into_addr.into(), Configuration::default())
293 }
294
295 pub fn short_addr_format_with_config(
299 &self,
300 into_addr: impl Into<Place>,
301 conf: Configuration,
302 ) -> Result<String, Error> {
303 let mut addr = into_addr.into();
304 let country_code = self.find_country_code(&mut addr, conf);
305
306 let template = self.find_template(&addr, &country_code);
307
308 let text = template
309 .handlebar_handler
310 .render(SHORT_ADDR_TEMPLATE_NAME, &addr)
311 .context("impossible to render short address template")?;
312
313 let text = text.trim().to_owned();
314 Ok(text)
315 }
316
317 fn find_country_code(&self, addr: &mut Place, conf: Configuration) -> Option<CountryCode> {
318 let mut country_code = conf
319 .country_code
320 .or_else(|| addr[Component::CountryCode].clone())
321 .and_then(|s| {
322 CountryCode::from_str(&s)
323 .map_err(|e| log::info!("impossible to find a country: {}", e))
324 .ok()
325 });
326
327 if country_code == CountryCode::from_str("NL").ok() {
329 if let Some(state) = addr[Component::State].clone() {
330 if state.as_str() == "Curaçao" {
331 country_code = CountryCode::from_str("CW").ok();
332 addr[Component::Country] = Some("Curaçao".to_owned());
333 }
334 let state = state.to_lowercase();
335
336 if state.as_str() == "sint maarten" {
337 country_code = CountryCode::from_str("SX").ok();
338 addr[Component::Country] = Some("Sint Maarten".to_owned());
339 } else if state.as_str() == "aruba" {
340 country_code = CountryCode::from_str("AW").ok();
341 addr[Component::Country] = Some("Aruba".to_owned());
342 }
343 }
344 }
345
346 country_code
347 }
348
349 fn find_template(&self, addr: &Place, country_code: &Option<CountryCode>) -> &Template {
350 country_code
351 .as_ref()
352 .and_then(|c| {
353 if !has_minimum_place_components(addr) {
354 self.templates
357 .fallback_templates_by_country
358 .get(c)
359 .or(Some(&self.templates.fallback_template))
360 } else {
361 self.templates.templates_by_country.get(c)
362 }
363 })
364 .unwrap_or(&self.templates.default_template)
365 }
366
367 fn preformat(&self, rules: &Rules, addr: &mut Place) {
368 for r in &rules.replace {
369 r.replace_fields(addr);
370 }
371
372 if let Some(add_component) = &rules.add_component {
374 addr[add_component.component] = Some(add_component.new_value.clone());
375 }
376 if let Some(change_country) = &rules.change_country {
377 addr[Component::Country] = Some(change_country.clone());
378 }
379 if let Some(change_country_code) = &rules.change_country_code {
380 addr[Component::CountryCode] = Some(change_country_code.clone());
381 }
382
383 if let Some(country) = addr[Component::CountryCode]
385 .as_ref()
386 .and_then(|c| CountryCode::from_str(c).ok())
387 {
388 if addr[Component::StateCode].is_none() {
389 if let Some(state) = &addr[Component::State] {
391 if let Some(new_state) = self
392 .state_codes
393 .get(&(country.clone(), state.to_string()))
394 .cloned()
395 {
396 addr[Component::StateCode] = Some(new_state);
397 }
398 }
399 }
400
401 if addr[Component::CountyCode].is_none() {
402 if let Some(county) = &addr[Component::County] {
404 if let Some(new_county) = self
405 .county_codes
406 .get(&(country, county.to_string()))
407 .cloned()
408 {
409 addr[Component::County] = Some(new_county);
410 }
411 }
412 }
413 }
414 }
415}
416
417pub struct PlaceBuilder {
421 pub(crate) component_aliases: HashMap<Component, Vec<String>>,
422}
423
424impl Default for PlaceBuilder {
425 fn default() -> Self {
426 crate::read_configuration::read_place_builder_configuration()
427 }
428}
429
430impl PlaceBuilder {
431 pub fn build_place<'a>(&self, values: impl IntoIterator<Item = (&'a str, String)>) -> Place {
433 let mut place = Place::default();
434 let mut unknown = HashMap::<String, String>::new();
435 for (k, v) in values.into_iter() {
436 let component = Component::from_str(k).ok();
437 if let Some(component) = component {
438 place[component] = Some(v);
439 } else {
440 unknown.insert(k.to_string(), v);
441 }
442 }
443
444 if !unknown.is_empty() {
446 for (c, aliases) in &self.component_aliases {
447 for alias in aliases {
449 if let Some(a) = unknown.remove(alias) {
450 if place[*c].is_none() {
451 place[*c] = Some(a);
452 }
453 }
454 }
455 }
456 place[Component::Attention] = Some(unknown.values().join(", "));
457 }
458
459 if let (Some(state), Some(country)) = (&place[Component::State], &place[Component::Country])
461 {
462 if country.parse::<usize>().is_ok() {
463 place[Component::Country] = Some(state.clone());
464 place[Component::State] = None;
465 }
466 }
467 place
468 }
469}
470
471fn sanity_clean_place(addr: &mut Place) {
472 lazy_static::lazy_static! {
473 static ref POST_CODE_RANGE: Regex = Regex::new(r#"\d+;\d+"#).unwrap();
474 static ref MATCHABLE_POST_CODE_RANGE: Regex = Regex::new(r#"^(\d{5}),\d{5}"#).unwrap();
475 static ref IS_URL: Regex= Regex::new(r#"https?://"#).unwrap();
476
477 }
478 if let Some(post_code) = &addr[Component::Postcode] {
480 if post_code.len() > 20 || POST_CODE_RANGE.is_match(post_code) {
481 addr[Component::Postcode] = None;
482 } else if let Some(r) = MATCHABLE_POST_CODE_RANGE
483 .captures(post_code)
484 .and_then(|r| r.get(1))
485 .map(|c| c.as_str())
486 {
487 addr[Component::Postcode] = Some(r.to_owned());
488 }
489 }
490
491 for c in Component::iter() {
493 if let Some(v) = &addr[c] {
494 if IS_URL.is_match(v) {
495 addr[c] = None;
496 }
497 }
498 }
499}
500
501fn cleanup_rendered(text: &str, rules: &Rules) -> String {
502 lazy_static::lazy_static! {
503 static ref REPLACEMENTS: [(Regex, &'static str); 12]= [
504 (RegexBuilder::new(r"[},\s]+$").multi_line(true).build().unwrap(), ""),
505 (RegexBuilder::new(r"^ - ").multi_line(true).build().unwrap(), ""), (RegexBuilder::new(r"^[,\s]+").multi_line(true).build().unwrap(), ""),
507 (RegexBuilder::new(r",\s*,").multi_line(true).build().unwrap(), ", "), (RegexBuilder::new(r"[\t\p{Zs}]+,[\t\p{Zs}]+").multi_line(true).build().unwrap(), ", "), (RegexBuilder::new(r"[\t ][\t ]+").multi_line(true).build().unwrap(), " "), (RegexBuilder::new(r"[\t\p{Zs}]\n").multi_line(true).build().unwrap(), "\n"), (RegexBuilder::new(r"\n,").multi_line(true).build().unwrap(), "\n"), (RegexBuilder::new(r",,+").multi_line(true).build().unwrap(), ","), (RegexBuilder::new(r",\n").multi_line(true).build().unwrap(), "\n"), (RegexBuilder::new(r"\n[\t\p{Zs}]+").multi_line(true).build().unwrap(), "\n"), (RegexBuilder::new(r"\n\n+").multi_line(true).build().unwrap(), "\n"), ];
517
518 static ref FINAL_CLEANUP: [(Regex, &'static str); 2]= [
519 (Regex::new(r"^\s+").unwrap(), ""), (Regex::new(r"\s+$").unwrap(), ""), ];
522 }
523
524 let mut res = text.to_owned();
525
526 for (rgx, new_val) in REPLACEMENTS.iter() {
527 let rep = rgx.replace_all(&res, *new_val);
528 match rep {
530 std::borrow::Cow::Borrowed(_) => {}
531 std::borrow::Cow::Owned(v) => {
532 res = v;
533 }
534 }
535 }
536
537 for r in &rules.postformat_replace {
538 let rep = r.regex.replace_all(&res, r.replacement_value.as_str());
539 match rep {
540 std::borrow::Cow::Borrowed(_) => {}
541 std::borrow::Cow::Owned(v) => {
542 res = v;
543 }
544 }
545 }
546
547 let mut res = res
551 .split('\n')
552 .map(|s| s.split(", ").map(|e| e.trim()).dedup().join(", "))
553 .dedup()
554 .join("\n");
555
556 for (rgx, new_val) in FINAL_CLEANUP.iter() {
557 let rep = rgx.replace(&res, *new_val);
558 match rep {
559 std::borrow::Cow::Borrowed(_) => {}
560 std::borrow::Cow::Owned(v) => {
561 res = v;
562 }
563 }
564 }
565
566 let res = res.trim();
567 format!("{}\n", res) }
569
570fn has_minimum_place_components(addr: &Place) -> bool {
571 addr[Component::Road].is_some() || addr[Component::Postcode].is_some()
574}
575
576impl ReplaceRule {
577 fn replace_fields(&self, addr: &mut Place) {
578 match self {
579 ReplaceRule::All(replace_rule) => {
580 for c in Component::iter() {
581 if let Some(v) = &addr[c] {
582 addr[c] = Some(
583 replace_rule
584 .regex
585 .replace(v, replace_rule.replacement_value.as_str())
586 .to_string(),
587 );
588 }
589 }
590 }
591 ReplaceRule::Component((c, replace_rule)) => {
592 if let Some(v) = &addr[*c] {
593 addr[*c] = Some(
594 replace_rule
595 .regex
596 .replace(v, replace_rule.replacement_value.as_str())
597 .to_string(),
598 );
599 }
600 }
601 }
602 }
603}