use bitflags::bitflags;
use std::ffi::{CStr, CString, NulError};
use std::iter::Iterator;
use libc::{c_char, size_t};
use crate::ffi;
bitflags! {
#[derive(Default)]
pub struct StringOptions: u32 {
const TRANSLITERATE = 1 << 0;
const STRIP_ACCENTS = 1 << 1;
const DECOMPOSE = 1 << 2;
const LOWERCASE = 1 << 3;
const TRIM_STRING = 1 << 4;
const DROP_PARENTHETICALS = 1 << 5;
const REPLACE_NUMERIC_HYPHENS = 1 << 6;
const DELETE_NUMERIC_HYPHENS = 1 << 7;
const SPLIT_ALPHA_FROM_NUMERIC = 1 << 8;
const REPLACE_WORD_HYPHENS = 1 << 9;
const DELETE_WORD_HYPHENS = 1 << 10;
const DELETE_FINAL_PERIODS = 1 << 11;
const DELETE_ACRONYM_PERIODS = 1 << 12;
const DROP_ENGLISH_POSSESSIVES = 1 << 13;
const DELETE_APOSTROPHES = 1 << 14;
const EXPAND_NUMEX = 1 << 15;
const ROMAN_NUMERALS = 1 << 16;
const LATIN_ASCII = 1 << 17;
}
}
bitflags! {
#[derive(Default)]
pub struct AddressComponents: u16 {
const NONE = 0;
const ANY = 1 << 0;
const NAME = 1 << 1;
const HOUSE_NUMBER = 1 << 2;
const STREET = 1 << 3;
const UNIT = 1 << 4;
const LEVEL = 1 << 5;
const STAIRCASE = 1 << 6;
const ENTRANCE = 1 << 7;
const CATEGORY = 1 << 8;
const NEAR = 1 << 9;
const TOPONYM = 1 << 13;
const POSTAL_CODE = 1 << 14;
const PO_BOX = 1 << 15;
}
}
struct LibpostalNormalizeOptions {
ffi: Option<ffi::libpostal_normalize_options>,
lang_buffer: Option<Vec<*const c_char>>,
}
#[derive(Clone, Default, Hash, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct NormalizeOptions {
languages: Option<Vec<CString>>,
address_components: Option<AddressComponents>,
string_options: Option<StringOptions>,
}
#[derive(Clone, Hash, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct NormalizedAddress {
variations: Vec<String>,
n: size_t,
}
impl LibpostalNormalizeOptions {
fn inner_mut(&mut self) -> &mut ffi::libpostal_normalize_options {
self.ffi
.get_or_insert(unsafe { ffi::libpostal_get_default_options() })
}
fn update_string_options(&mut self, string_options: &StringOptions) {
let (src, dst) = (self.inner_mut(), string_options);
src.latin_ascii = dst.contains(StringOptions::LATIN_ASCII);
src.transliterate = dst.contains(StringOptions::TRANSLITERATE);
src.strip_accents = dst.contains(StringOptions::STRIP_ACCENTS);
src.decompose = dst.contains(StringOptions::DECOMPOSE);
src.lowercase = dst.contains(StringOptions::LOWERCASE);
src.trim_string = dst.contains(StringOptions::TRIM_STRING);
src.drop_parentheticals = dst.contains(StringOptions::DROP_PARENTHETICALS);
src.replace_numeric_hyphens = dst.contains(StringOptions::REPLACE_NUMERIC_HYPHENS);
src.delete_numeric_hyphens = dst.contains(StringOptions::DELETE_NUMERIC_HYPHENS);
src.split_alpha_from_numeric = dst.contains(StringOptions::SPLIT_ALPHA_FROM_NUMERIC);
src.replace_word_hyphens = dst.contains(StringOptions::REPLACE_WORD_HYPHENS);
src.delete_word_hyphens = dst.contains(StringOptions::DELETE_WORD_HYPHENS);
src.delete_final_periods = dst.contains(StringOptions::DELETE_FINAL_PERIODS);
src.delete_acronym_periods = dst.contains(StringOptions::DELETE_ACRONYM_PERIODS);
src.drop_english_possessives = dst.contains(StringOptions::DROP_ENGLISH_POSSESSIVES);
src.delete_apostrophes = dst.contains(StringOptions::DELETE_APOSTROPHES);
src.expand_numex = dst.contains(StringOptions::EXPAND_NUMEX);
src.roman_numerals = dst.contains(StringOptions::ROMAN_NUMERALS);
}
fn update_address_components(&mut self, address_components: &AddressComponents) {
self.inner_mut().address_components = address_components.bits;
}
fn update_languages<'a, T: Iterator<Item = &'a CString>>(&mut self, languages: T) {
let mut lang_buffer: Vec<*const c_char> = languages.map(|s| s.as_ptr()).collect();
let ffi = self.inner_mut();
ffi.languages = lang_buffer.as_mut_ptr();
ffi.num_languages = lang_buffer.len();
self.lang_buffer = Some(lang_buffer);
}
fn expand(&mut self, address: &CStr) -> NormalizedAddress {
let mut result: NormalizedAddress = Default::default();
let options = self.ffi.take().unwrap();
let raw =
unsafe { ffi::libpostal_expand_address(address.as_ptr(), options, &mut result.n) };
result.variations = Vec::with_capacity(result.n);
unsafe {
for i in 0..result.n {
if let Some(phrase) = raw.add(i).as_ref() {
let variation = CStr::from_ptr(*phrase);
result
.variations
.push(String::from(variation.to_str().unwrap()));
};
}
ffi::libpostal_expansion_array_destroy(raw, result.n);
}
result
}
}
impl Default for LibpostalNormalizeOptions {
fn default() -> Self {
LibpostalNormalizeOptions {
ffi: Some(unsafe { ffi::libpostal_get_default_options() }),
lang_buffer: Default::default(),
}
}
}
impl NormalizeOptions {
pub fn new<'a, 'b, T>(languages: Option<T>) -> Result<NormalizeOptions, NulError>
where
'a: 'b,
T: Iterator<Item = &'b &'a str>,
{
let mut options = NormalizeOptions::default();
if let Some(languages) = languages {
let mut new_langs = Vec::new();
for lang in languages {
let c_lang = CString::new(*lang)?;
new_langs.push(c_lang);
}
options.languages = Some(new_langs);
}
Ok(options)
}
pub fn add_string_option(&mut self, option: StringOptions) {
if let Some(options) = &mut self.string_options {
options.insert(option);
} else {
self.string_options = Some(option);
}
}
pub fn add_address_component(&mut self, component: AddressComponents) {
if let Some(components) = &mut self.address_components {
components.insert(component);
} else {
self.address_components = Some(component);
}
}
fn libpostal_options(&self) -> LibpostalNormalizeOptions {
let mut options: LibpostalNormalizeOptions = Default::default();
if let Some(string_options) = &self.string_options {
options.update_string_options(string_options);
}
if let Some(address_components) = &self.address_components {
options.update_address_components(address_components);
}
if let Some(languages) = &self.languages {
options.update_languages(languages.as_slice().iter());
}
options
}
pub fn languages(&self) -> Option<impl Iterator<Item = &str>> {
if let Some(languages) = &self.languages {
return Some(languages.as_slice().iter().map(|c| c.to_str().unwrap()));
}
None
}
pub fn address_components(&self) -> Option<&AddressComponents> {
self.address_components.as_ref()
}
pub fn string_options(&self) -> Option<&StringOptions> {
self.string_options.as_ref()
}
pub fn expand(&mut self, address: &str) -> Result<NormalizedAddress, NulError> {
let mut options = self.libpostal_options();
let c_address = CString::new(address)?;
Ok(options.expand(&c_address))
}
}
impl Default for NormalizedAddress {
fn default() -> Self {
NormalizedAddress {
variations: Vec::new(),
n: 0,
}
}
}
impl NormalizedAddress {
pub fn iter(&self) -> std::slice::Iter<String> {
self.variations.as_slice().iter()
}
pub fn iter_mut(&mut self) -> std::slice::IterMut<String> {
self.variations.as_mut_slice().iter_mut()
}
}
impl<'a> IntoIterator for &'a NormalizedAddress {
type Item = &'a String;
type IntoIter = std::slice::Iter<'a, String>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<'a> IntoIterator for &'a mut NormalizedAddress {
type Item = &'a mut String;
type IntoIter = std::slice::IterMut<'a, String>;
fn into_iter(self) -> Self::IntoIter {
self.iter_mut()
}
}
pub fn expand_address<'a>(address: &'a str) -> Result<NormalizedAddress, NulError> {
let mut options = NormalizeOptions::default();
options.expand(address)
}
pub fn expand_address_with_options<'a, 'b, T>(
address: &'a str,
languages: Option<T>,
) -> Result<NormalizedAddress, NulError>
where
'a: 'b,
T: Iterator<Item = &'b &'a str>,
{
let mut options = NormalizeOptions::new(languages)?;
options.expand(address)
}
#[cfg(test)]
mod test {
use super::*;
use crate::error::RuntimeError;
use crate::LibModules;
#[test]
fn default_libpostal_normalize_options() {
let options: LibpostalNormalizeOptions = Default::default();
assert!(options.ffi.is_some());
assert_eq!(*(&options.ffi.as_ref().unwrap().num_languages), 0);
assert!(options.lang_buffer.is_none());
}
#[test]
fn libpostal_normalize_options_update_languages() {
let languages = ["en", "gr"];
let c_languages: Vec<CString> = languages
.iter()
.map(|s| CString::new(*s).unwrap())
.collect();
let mut options: LibpostalNormalizeOptions = Default::default();
options.update_languages(c_languages.as_slice().iter());
let ffi = &options.ffi.as_ref().unwrap();
for i in 0..ffi.num_languages {
unsafe {
let ptr = ffi.languages.add(i);
let cstr = CStr::from_ptr(*ptr);
assert_eq!(cstr.to_str(), Ok(languages[i]));
}
}
}
#[test]
fn libpostal_normalize_options_update_string_options() {
let mut options: LibpostalNormalizeOptions = Default::default();
let s_options = StringOptions::TRANSLITERATE | StringOptions::LOWERCASE;
options.update_string_options(&s_options);
let ffi = &options.ffi.as_ref().unwrap();
assert!(ffi.transliterate);
assert!(ffi.lowercase);
assert!(!ffi.latin_ascii);
}
#[test]
fn libpostal_normalize_options_update_address_components() {
let mut options: LibpostalNormalizeOptions = Default::default();
let components = AddressComponents::NAME | AddressComponents::LEVEL;
options.update_address_components(&components);
let ffi = &options.ffi.as_ref().unwrap();
assert_eq!(ffi.address_components, components.bits);
}
#[test]
fn libpostal_normalize_options_expand() -> Result<(), RuntimeError> {
let postal_module = LibModules::Expand;
postal_module.setup()?;
let address = "St Johns Centre, Rope Walk, Bedford, Bedfordshire, MK42 0XE, United Kingdom";
let c_address = CString::new(address)?;
let mut libpostal_options: LibpostalNormalizeOptions = Default::default();
let expanded = libpostal_options.expand(&c_address);
assert!(expanded.n > 0);
for variation in &expanded.variations {
assert!(variation.ends_with("kingdom"));
}
for variation in &expanded {
assert!(variation.ends_with("kingdom"));
}
Ok(())
}
#[test]
fn normalized_address_iter() {
let mut normalized = NormalizedAddress::default();
normalized.variations.push(String::from("wat"));
normalized.variations.push(String::from("what"));
let mut iterator = normalized.iter();
assert_eq!(iterator.next(), Some(&String::from("wat")));
assert_eq!(iterator.next(), Some(&String::from("what")));
assert_eq!(iterator.next(), None);
}
#[test]
fn normalized_address_iter_mut() {
let mut normalized = NormalizedAddress::default();
normalized.variations.push(String::from("wat"));
normalized.variations.push(String::from("what"));
for variation in normalized.iter_mut() {
variation.push_str(" else");
}
let mut iterator = normalized.iter_mut();
assert_eq!(iterator.next(), Some(&mut String::from("wat else")));
assert_eq!(iterator.next(), Some(&mut String::from("what else")));
assert_eq!(iterator.next(), None);
}
#[test]
fn normalized_address_mut_into_iter() {
let mut normalized = NormalizedAddress::default();
let variations = ["wat", "what"];
for variation in &variations {
normalized.variations.push(String::from(*variation))
}
for variation in &mut normalized {
variation.push_str(" else");
}
let expected = ["wat else", "what else"];
for (i, variation) in normalized.iter().enumerate() {
assert_eq!(variation.as_str(), expected[i]);
}
}
}