use crate::{Component, Place};
use failure::Fail;
use failure::{format_err, Error};
use itertools::Itertools;
use regex::{Regex, RegexBuilder};
use std::collections::HashMap;
use std::str::FromStr;
use strum::IntoEnumIterator;
const MULTILINE_TEMPLATE_NAME: &'static str = "multi_line";
const SHORT_ADDR_TEMPLATE_NAME: &'static str = "short_addr";
#[derive(Debug, Clone)]
pub(crate) struct Replacement {
pub regex: regex::Regex,
pub replacement_value: String,
}
#[derive(Debug, Clone)]
pub(crate) enum ReplaceRule {
All(Replacement),
Component((Component, Replacement)),
}
#[derive(Debug, Hash, Eq, PartialEq, Clone)]
pub struct CountryCode(String);
impl FromStr for CountryCode {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.len() == 2 {
if s == "UK" {
Ok(CountryCode("GB".to_owned()))
} else {
Ok(CountryCode(s.to_uppercase()))
}
} else {
Err(format_err!(
"{} is not a valid ISO3166-1:alpha2 country code",
s,
))
}
}
}
impl CountryCode {
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
impl std::fmt::Display for CountryCode {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone)]
pub(crate) struct NewComponent {
pub component: Component,
pub new_value: String,
}
#[derive(Debug, Default)]
pub(crate) struct Template {
pub handlebar_handler: handlebars::Handlebars,
place_template: String,
}
fn compute_short_addr_template(place_template: &str) -> Option<String> {
place_template
.split("\n")
.find(|l| l.contains("house_number"))
.map(|l| l.trim().to_owned())
}
impl Template {
pub fn new(place_template: &str) -> Self {
let mut template_engine = crate::handlebar_helper::new_template_engine();
template_engine
.register_template_string(MULTILINE_TEMPLATE_NAME, place_template)
.expect("impossible to build multi line template");
if let Some(short_addr_template) = compute_short_addr_template(place_template) {
template_engine
.register_template_string(SHORT_ADDR_TEMPLATE_NAME, &short_addr_template)
.expect("impossible to build short addr template");
}
Template {
place_template: place_template.to_owned(),
handlebar_handler: template_engine,
}
}
}
impl Clone for Template {
fn clone(&self) -> Self {
Self::new(self.place_template.as_str())
}
}
#[derive(Debug, Default, Clone)]
pub(crate) struct Rules {
pub replace: Vec<ReplaceRule>,
pub postformat_replace: Vec<Replacement>,
pub change_country: Option<String>,
pub change_country_code: Option<String>,
pub add_component: Option<NewComponent>,
}
#[derive(Debug)]
pub(crate) struct Templates {
pub default_template: Template,
pub fallback_template: Template,
pub templates_by_country: HashMap<CountryCode, Template>,
pub rules_by_country: HashMap<CountryCode, Rules>,
pub fallback_templates_by_country: HashMap<CountryCode, Template>,
pub fallback_rules: Rules,
}
pub struct Formatter {
pub(crate) templates: Templates,
pub(crate) county_codes: HashMap<(CountryCode, String), String>,
pub(crate) state_codes: HashMap<(CountryCode, String), String>,
}
#[derive(Default, Debug)]
pub struct Configuration {
pub country_code: Option<String>,
pub abbreviate: Option<bool>,
}
impl Default for Formatter {
fn default() -> Self {
crate::read_configuration::read_configuration()
}
}
impl Formatter {
pub fn format(&self, into_addr: impl Into<Place>) -> Result<String, Error> {
self.format_with_config(into_addr.into(), Configuration::default())
}
pub fn format_with_config(
&self,
into_addr: impl Into<Place>,
conf: Configuration,
) -> Result<String, Error> {
let mut addr = into_addr.into();
let country_code = self.find_country_code(&mut addr, conf);
sanity_clean_place(&mut addr);
let template = self.find_template(&addr, &country_code);
let rules = country_code
.as_ref()
.and_then(|c| self.templates.rules_by_country.get(c))
.unwrap_or_else(|| &self.templates.fallback_rules);
self.preformat(&rules, &mut addr);
let text = template
.handlebar_handler
.render(MULTILINE_TEMPLATE_NAME, &addr)
.map_err(|e| e.context("impossible to render template"))?;
let text = cleanup_rendered(&text, &rules);
Ok(text)
}
pub fn short_addr_format(&self, into_addr: impl Into<Place>) -> Result<String, Error> {
self.short_addr_format_with_config(into_addr.into(), Configuration::default())
}
pub fn short_addr_format_with_config(
&self,
into_addr: impl Into<Place>,
conf: Configuration,
) -> Result<String, Error> {
let mut addr = into_addr.into();
let country_code = self.find_country_code(&mut addr, conf);
let template = self.find_template(&addr, &country_code);
let text = template
.handlebar_handler
.render(SHORT_ADDR_TEMPLATE_NAME, &addr)
.map_err(|e| e.context("impossible to render short address template"))?;
let text = text.trim().to_owned();
Ok(text)
}
fn find_country_code(&self, addr: &mut Place, conf: Configuration) -> Option<CountryCode> {
let mut country_code = conf
.country_code
.or_else(|| addr[Component::CountryCode].clone())
.and_then(|s| {
CountryCode::from_str(&s)
.map_err(|e| log::info!("impossible to find a country: {}", e))
.ok()
});
if country_code == CountryCode::from_str("NL").ok() {
if let Some(state) = addr[Component::State].clone() {
if state.as_str() == "Curaçao" {
country_code = CountryCode::from_str("CW").ok();
addr[Component::Country] = Some("Curaçao".to_owned());
}
let state = state.to_lowercase();
if state.as_str() == "sint maarten" {
country_code = CountryCode::from_str("SX").ok();
addr[Component::Country] = Some("Sint Maarten".to_owned());
} else if state.as_str() == "aruba" {
country_code = CountryCode::from_str("AW").ok();
addr[Component::Country] = Some("Aruba".to_owned());
}
}
}
country_code
}
fn find_template<'a>(
&'a self,
addr: &Place,
country_code: &Option<CountryCode>,
) -> &'a Template {
country_code
.as_ref()
.and_then(|c| {
if !has_minimum_place_components(addr) {
self.templates
.fallback_templates_by_country
.get(&c)
.or_else(|| Some(&self.templates.fallback_template))
} else {
self.templates.templates_by_country.get(&c)
}
})
.unwrap_or(&self.templates.default_template)
}
fn preformat(&self, rules: &Rules, addr: &mut Place) {
for r in &rules.replace {
r.replace_fields(addr);
}
if let Some(add_component) = &rules.add_component {
addr[add_component.component] = Some(add_component.new_value.clone());
}
if let Some(change_country) = &rules.change_country {
addr[Component::Country] = Some(change_country.clone());
}
if let Some(change_country_code) = &rules.change_country_code {
addr[Component::CountryCode] = Some(change_country_code.clone());
}
if let Some(country) = addr[Component::CountryCode]
.as_ref()
.and_then(|c| CountryCode::from_str(c).ok())
{
if addr[Component::StateCode].is_none() {
if let Some(state) = &addr[Component::State] {
if let Some(new_state) = self
.state_codes
.get(&(country.clone(), state.to_string()))
.cloned()
{
addr[Component::StateCode] = Some(new_state);
}
}
}
if addr[Component::CountyCode].is_none() {
if let Some(county) = &addr[Component::County] {
if let Some(new_county) = self
.county_codes
.get(&(country, county.to_string()))
.cloned()
{
addr[Component::County] = Some(new_county);
}
}
}
}
}
}
pub struct PlaceBuilder {
pub(crate) component_aliases: HashMap<Component, Vec<String>>,
}
impl Default for PlaceBuilder {
fn default() -> Self {
crate::read_configuration::read_place_builder_configuration()
}
}
impl PlaceBuilder {
pub fn build_place<'a>(&self, values: impl IntoIterator<Item = (&'a str, String)>) -> Place {
let mut place = Place::default();
let mut unknown = HashMap::<String, String>::new();
for (k, v) in values.into_iter() {
let component = Component::from_str(k).ok();;
if let Some(component) = component {
place[component] = Some(v);
} else {
unknown.insert(k.to_string(), v);
}
}
if !unknown.is_empty() {
for (c, aliases) in &self.component_aliases {
for alias in aliases {
if let Some(a) = unknown.remove(alias) {
if place[*c].is_none() {
place[*c] = Some(a);
}
}
}
}
place[Component::Attention] = Some(unknown.values().join(", "));
}
if let (Some(state), Some(country)) = (&place[Component::State], &place[Component::Country])
{
if country.parse::<usize>().is_ok() {
place[Component::Country] = Some(state.clone());
place[Component::State] = None;
}
}
place
}
}
fn sanity_clean_place(addr: &mut Place) {
lazy_static::lazy_static! {
static ref POST_CODE_RANGE: Regex = Regex::new(r#"\d+;\d+"#).unwrap();
static ref MATCHABLE_POST_CODE_RANGE: Regex = Regex::new(r#"^(\d{5}),\d{5}"#).unwrap();
static ref IS_URL: Regex= Regex::new(r#"https?://"#).unwrap();
}
if let Some(post_code) = &addr[Component::Postcode] {
if post_code.len() > 20 || POST_CODE_RANGE.is_match(post_code) {
addr[Component::Postcode] = None;
} else if let Some(r) = MATCHABLE_POST_CODE_RANGE
.captures(post_code)
.and_then(|r| r.get(1))
.map(|c| c.as_str())
{
addr[Component::Postcode] = Some(r.to_owned());
}
}
for c in Component::iter() {
if let Some(v) = &addr[c] {
if IS_URL.is_match(v) {
addr[c] = None;
}
}
}
}
fn cleanup_rendered(text: &str, rules: &Rules) -> String {
use itertools::Itertools;
lazy_static::lazy_static! {
static ref REPLACEMENTS: [(Regex, &'static str); 12]= [
(RegexBuilder::new(r"[},\s]+$").multi_line(true).build().unwrap(), ""),
(RegexBuilder::new(r"^ - ").multi_line(true).build().unwrap(), ""),
(RegexBuilder::new(r"^[,\s]+").multi_line(true).build().unwrap(), ""),
(RegexBuilder::new(r",\s*,").multi_line(true).build().unwrap(), ", "),
(RegexBuilder::new(r"[\t\p{Zs}]+,[\t\p{Zs}]+").multi_line(true).build().unwrap(), ", "),
(RegexBuilder::new(r"[\t ][\t ]+").multi_line(true).build().unwrap(), " "),
(RegexBuilder::new(r"[\t\p{Zs}]\n").multi_line(true).build().unwrap(), "\n"),
(RegexBuilder::new(r"\n,").multi_line(true).build().unwrap(), "\n"),
(RegexBuilder::new(r",,+").multi_line(true).build().unwrap(), ","),
(RegexBuilder::new(r",\n").multi_line(true).build().unwrap(), "\n"),
(RegexBuilder::new(r"\n[\t\p{Zs}]+").multi_line(true).build().unwrap(), "\n"),
(RegexBuilder::new(r"\n\n+").multi_line(true).build().unwrap(), "\n"),
];
static ref FINAL_CLEANUP: [(Regex, &'static str); 2]= [
(Regex::new(r"^\s+").unwrap(), ""),
(Regex::new(r"\s+$").unwrap(), ""),
];
}
let mut res = text.to_owned();
for (rgx, new_val) in REPLACEMENTS.iter() {
let rep = rgx.replace_all(&res, *new_val);
match rep {
std::borrow::Cow::Borrowed(_) => {}
std::borrow::Cow::Owned(v) => {
res = v;
}
}
}
for r in &rules.postformat_replace {
let rep = r.regex.replace_all(&res, r.replacement_value.as_str());
match rep {
std::borrow::Cow::Borrowed(_) => {}
std::borrow::Cow::Owned(v) => {
res = v;
}
}
}
let mut res = res
.split('\n')
.map(|s| s.split(", ").map(|e| e.trim()).dedup().join(", "))
.dedup()
.join("\n");
for (rgx, new_val) in FINAL_CLEANUP.iter() {
let rep = rgx.replace(&res, *new_val);
match rep {
std::borrow::Cow::Borrowed(_) => {}
std::borrow::Cow::Owned(v) => {
res = v;
}
}
}
let res = res.trim();
format!("{}\n", res)
}
fn has_minimum_place_components(addr: &Place) -> bool {
addr[Component::Road].is_some() || addr[Component::Postcode].is_some()
}
impl ReplaceRule {
fn replace_fields(&self, addr: &mut Place) {
match self {
ReplaceRule::All(replace_rule) => {
for c in Component::iter() {
if let Some(v) = &addr[c] {
addr[c] = Some(
replace_rule
.regex
.replace(&v, replace_rule.replacement_value.as_str())
.to_string(),
);
}
}
}
ReplaceRule::Component((c, replace_rule)) => {
if let Some(v) = &addr[*c] {
addr[*c] = Some(
replace_rule
.regex
.replace(&v, replace_rule.replacement_value.as_str())
.to_string(),
);
}
}
}
}
}