use std::{
collections::HashMap,
ffi::{CStr, CString},
ops::DerefMut,
};
use init::{
initialize_libpostal, initialize_libpostal_language_classifier,
initialize_libpostal_parser,
};
use libpostal_sys::{
libpostal_address_parser_response_destroy, libpostal_expand_address,
libpostal_expansion_array_destroy, libpostal_get_address_parser_default_options,
libpostal_get_default_options, libpostal_parse_address, size_t, GLOBAL_LOCK,
};
mod errors;
mod init;
mod probe;
pub use self::errors::Error;
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, Default)]
pub struct ParseAddressOptions {}
pub fn parse_address(
addr: &str,
_opt: &ParseAddressOptions,
) -> Result<HashMap<String, String>> {
let mut initialization_state = GLOBAL_LOCK.lock().expect("mutex poisoned");
unsafe { initialize_libpostal(initialization_state.deref_mut()) }?;
unsafe { initialize_libpostal_parser(initialization_state.deref_mut()) }?;
let addr = CString::new(addr).map_err(|_| Error::NullByteInString {
string: addr.to_owned(),
})?;
let parse_options = unsafe { libpostal_get_address_parser_default_options() };
let parsed =
unsafe { libpostal_parse_address(addr.as_ptr() as *mut _, parse_options) };
let num_components = unsafe { (*parsed).num_components } as usize;
let mut result = HashMap::with_capacity(num_components);
for i in 0..num_components {
let (label, component) = unsafe {
(
CStr::from_ptr(*(*parsed).labels.add(i))
.to_str()
.expect("label contained invalid UTF-8"),
CStr::from_ptr(*(*parsed).components.add(i))
.to_str()
.expect("component contained invalid UTF-8"),
)
};
result.insert(label.to_owned(), component.to_owned());
}
unsafe { libpostal_address_parser_response_destroy(parsed) };
Ok(result)
}
#[derive(Debug, Default)]
pub struct ExpandAddressOptions {}
pub fn expand_address(addr: &str, _opt: &ExpandAddressOptions) -> Result<Vec<String>> {
let mut initialization_state = GLOBAL_LOCK.lock().expect("mutex poisoned");
unsafe { initialize_libpostal(initialization_state.deref_mut()) }?;
unsafe {
initialize_libpostal_language_classifier(initialization_state.deref_mut())
}?;
let addr = CString::new(addr).map_err(|_| Error::NullByteInString {
string: addr.to_owned(),
})?;
let expand_options = unsafe { libpostal_get_default_options() };
let mut num_expansions: size_t = 0;
let expansions = unsafe {
libpostal_expand_address(
addr.as_ptr() as *mut _,
expand_options,
&mut num_expansions,
)
};
let mut result = Vec::with_capacity(num_expansions as usize);
for i in 0..num_expansions {
let expansion = unsafe {
CStr::from_ptr(*expansions.offset(i as isize))
.to_str()
.expect("expansion contained invalid UTF-8")
};
result.push(expansion.to_owned());
}
unsafe { libpostal_expansion_array_destroy(expansions, num_expansions) };
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[ignore]
fn parse_address_returns_components() {
let addr = "781 Franklin Ave Crown Heights Brooklyn NYC NY 11216 USA";
let opt = ParseAddressOptions::default();
let parsed = parse_address(addr, &opt).unwrap();
assert_eq!(parsed.get("state"), Some(&"ny".to_owned()));
}
#[test]
#[ignore]
fn expand_address_returns_candidates() {
let addr = "Quatre-vingt-douze Ave des Champs-Élysées";
let opt = ExpandAddressOptions::default();
let expanded = expand_address(addr, &opt).unwrap();
assert!(expanded[0].contains("92"));
}
}