#![allow(missing_docs)]
use crate::error::{Error, Result};
use std::ffi::{CStr, CString};
use std::sync::Once;
#[allow(non_upper_case_globals)]
#[allow(non_camel_case_types)]
#[allow(non_snake_case)]
#[allow(dead_code)]
#[allow(missing_docs)]
mod bindings {
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
}
pub use bindings::*;
static INIT: Once = Once::new();
static mut INITIALIZED: bool = false;
static mut INIT_ERROR_MSG: [u8; 256] = [0; 256];
static mut INIT_ERROR_LEN: usize = 0;
pub(crate) fn initialize() -> Result<()> {
INIT.call_once(|| {
let data_manager = crate::data::DataManager::new();
let success = if data_manager.is_data_available() {
let data_dir = data_manager.data_dir();
match std::ffi::CString::new(data_dir.to_string_lossy().as_ref()) {
Ok(c_data_dir) => unsafe {
let setup_result = libpostal_setup_datadir(c_data_dir.as_ptr() as *mut _);
let parser_result =
libpostal_setup_parser_datadir(c_data_dir.as_ptr() as *mut _);
let classifier_result =
libpostal_setup_language_classifier_datadir(c_data_dir.as_ptr() as *mut _);
setup_result && parser_result && classifier_result
},
Err(_) => {
unsafe {
let msg = b"Invalid data directory path";
INIT_ERROR_LEN = msg.len().min(255);
INIT_ERROR_MSG[..INIT_ERROR_LEN].copy_from_slice(&msg[..INIT_ERROR_LEN]);
}
false
}
}
} else {
unsafe {
let setup_result = libpostal_setup();
let parser_result = libpostal_setup_parser();
let classifier_result = libpostal_setup_language_classifier();
setup_result && parser_result && classifier_result
}
};
if success {
unsafe {
INITIALIZED = true;
}
} else {
let error_msg = if !data_manager.is_data_available() {
"libpostal initialization failed - data files not found. Run data download first."
} else {
"libpostal initialization failed"
};
unsafe {
INITIALIZED = false;
if INIT_ERROR_LEN == 0 {
let msg_bytes = error_msg.as_bytes();
INIT_ERROR_LEN = msg_bytes.len().min(255);
INIT_ERROR_MSG[..INIT_ERROR_LEN].copy_from_slice(&msg_bytes[..INIT_ERROR_LEN]);
}
}
}
});
unsafe {
if INITIALIZED {
Ok(())
} else {
let error_msg = if INIT_ERROR_LEN > 0 {
std::str::from_utf8(&INIT_ERROR_MSG[..INIT_ERROR_LEN])
.unwrap_or("libpostal initialization failed")
} else {
"libpostal initialization failed"
};
Err(Error::initialization_failed(error_msg))
}
}
}
#[allow(dead_code)]
pub(crate) fn teardown() -> Result<()> {
unsafe {
if INITIALIZED {
libpostal_teardown();
libpostal_teardown_parser();
libpostal_teardown_language_classifier();
}
}
Ok(())
}
pub(crate) fn parse_address(
address: &str,
options: Option<&ParseOptions>,
) -> Result<Vec<AddressComponent>> {
initialize()?;
let c_address =
CString::new(address).map_err(|_| Error::ffi_error("Invalid address string"))?;
unsafe {
let opts = if let Some(opts) = options {
let mut language_holder = None;
let mut country_holder = None;
convert_parse_options(opts, &mut language_holder, &mut country_holder)?
} else {
libpostal_get_address_parser_default_options()
};
let response_ptr = libpostal_parse_address(
c_address.as_ptr() as *mut _, opts,
);
if response_ptr.is_null() {
return Err(Error::parse_error("libpostal_parse_address returned null"));
}
let response = &*response_ptr;
let mut results = Vec::new();
for i in 0..response.num_components {
let component_ptr = *response.components.add(i);
let label_ptr = *response.labels.add(i);
if component_ptr.is_null() || label_ptr.is_null() {
continue; }
let value = CStr::from_ptr(component_ptr).to_string_lossy().into_owned();
let label = CStr::from_ptr(label_ptr).to_string_lossy().into_owned();
results.push(AddressComponent { label, value });
}
libpostal_address_parser_response_destroy(response_ptr);
Ok(results)
}
}
pub(crate) fn normalize_string(
input: &str,
options: Option<&NormalizeOptions>,
) -> Result<Vec<String>> {
initialize()?;
let c_input = CString::new(input).map_err(|_| Error::ffi_error("Invalid input string"))?;
unsafe {
let opts = if let Some(opts) = options {
convert_normalize_options(opts)?
} else {
libpostal_get_default_options()
};
let mut num_expansions = 0;
let expansions_ptr = libpostal_expand_address(
c_input.as_ptr() as *mut _, opts,
&mut num_expansions,
);
if expansions_ptr.is_null() {
return Ok(Vec::new()); }
let mut results = Vec::new();
for i in 0..num_expansions {
let expansion_ptr = *expansions_ptr.add(i);
if !expansion_ptr.is_null() {
let expansion = CStr::from_ptr(expansion_ptr).to_string_lossy().into_owned();
results.push(expansion);
}
}
libpostal_expansion_array_destroy(expansions_ptr, num_expansions);
Ok(results)
}
}
fn convert_parse_options(
options: &ParseOptions,
language_cstr: &mut Option<CString>,
country_cstr: &mut Option<CString>,
) -> Result<libpostal_address_parser_options_t> {
let mut opts = unsafe { libpostal_get_address_parser_default_options() };
if let Some(ref language) = options.language {
let c_language = CString::new(language.as_str())
.map_err(|_| Error::ffi_error("Invalid language string"))?;
opts.language = c_language.as_ptr() as *mut i8;
*language_cstr = Some(c_language);
}
if let Some(ref country) = options.country {
let c_country = CString::new(country.as_str())
.map_err(|_| Error::ffi_error("Invalid country string"))?;
opts.country = c_country.as_ptr() as *mut i8;
*country_cstr = Some(c_country);
}
Ok(opts)
}
unsafe fn convert_normalize_options(
options: &NormalizeOptions,
) -> Result<libpostal_normalize_options_t> {
let mut opts = unsafe { libpostal_get_default_options() };
opts.address_components = options.address_components;
opts.latin_ascii = options.latin_ascii;
opts.transliterate = options.transliterate;
opts.strip_accents = options.strip_accents;
opts.decompose = options.decompose;
opts.lowercase = options.lowercase;
opts.trim_string = options.trim_string;
opts.replace_word_hyphens = options.replace_word_hyphens;
opts.delete_word_hyphens = options.delete_word_hyphens;
opts.replace_numeric_hyphens = options.replace_numeric_hyphens;
opts.delete_numeric_hyphens = options.delete_numeric_hyphens;
opts.split_alpha_from_numeric = options.split_alpha_from_numeric;
opts.delete_final_periods = options.delete_final_periods;
opts.delete_acronym_periods = options.delete_acronym_periods;
opts.drop_english_possessives = options.drop_english_possessives;
opts.delete_apostrophes = options.delete_apostrophes;
opts.expand_numex = options.expand_numex;
opts.roman_numerals = options.roman_numerals;
Ok(opts)
}
#[derive(Debug, Clone)]
pub struct AddressComponent {
pub label: String,
pub value: String,
}
#[derive(Debug, Clone)]
pub struct ParseOptions {
pub language: Option<String>,
pub country: Option<String>,
}
#[derive(Debug, Clone)]
pub struct NormalizeOptions {
pub languages: Vec<String>,
pub address_components: u16,
pub latin_ascii: bool,
pub transliterate: bool,
pub strip_accents: bool,
pub decompose: bool,
pub lowercase: bool,
pub trim_string: bool,
pub replace_word_hyphens: bool,
pub delete_word_hyphens: bool,
pub replace_numeric_hyphens: bool,
pub delete_numeric_hyphens: bool,
pub split_alpha_from_numeric: bool,
pub delete_final_periods: bool,
pub delete_acronym_periods: bool,
pub drop_english_possessives: bool,
pub delete_apostrophes: bool,
pub expand_numex: bool,
pub roman_numerals: bool,
}
#[cfg(test)]
mod tests {
use super::*;
use std::ffi::CString;
#[test]
fn test_initialization() {
let result = initialize();
assert!(
result.is_ok(),
"Failed to initialize libpostal: {:?}",
result
);
let result2 = initialize();
assert!(
result2.is_ok(),
"Second initialization failed: {:?}",
result2
);
}
#[test]
fn test_basic_address_parsing() {
initialize().expect("Failed to initialize libpostal");
let result = parse_address("123 Main St", None);
match result {
Ok(components) => {
println!("Parsed components: {:?}", components);
}
Err(e) => {
println!("Parsing failed (expected without data files): {:?}", e);
}
}
}
#[test]
fn test_basic_normalization() {
initialize().expect("Failed to initialize libpostal");
let result = normalize_string("St", None);
match result {
Ok(expansions) => {
println!("Normalizations: {:?}", expansions);
}
Err(e) => {
println!("Normalization failed (might be expected): {:?}", e);
}
}
}
#[test]
fn test_error_handling() {
let result = parse_address("test\0invalid", None);
assert!(result.is_err(), "Should fail with null bytes in string");
let result = normalize_string("test\0invalid", None);
assert!(result.is_err(), "Should fail with null bytes in string");
}
#[test]
fn test_option_conversion() {
unsafe {
let parse_opts = ParseOptions {
language: Some("en".to_string()),
country: Some("US".to_string()),
};
let mut language_holder = None;
let mut country_holder = None;
let result =
convert_parse_options(&parse_opts, &mut language_holder, &mut country_holder);
assert!(
result.is_ok(),
"Parse options conversion failed: {:?}",
result
);
let normalize_opts = NormalizeOptions {
languages: vec!["en".to_string()],
address_components: 0xFFFF,
latin_ascii: false,
transliterate: true,
strip_accents: false,
decompose: true,
lowercase: true,
trim_string: true,
replace_word_hyphens: false,
delete_word_hyphens: false,
replace_numeric_hyphens: false,
delete_numeric_hyphens: false,
split_alpha_from_numeric: false,
delete_final_periods: true,
delete_acronym_periods: true,
drop_english_possessives: true,
delete_apostrophes: true,
expand_numex: true,
roman_numerals: true,
};
let result = convert_normalize_options(&normalize_opts);
assert!(
result.is_ok(),
"Normalize options conversion failed: {:?}",
result
);
}
}
#[test]
fn test_cstring_safety() {
let valid = CString::new("Hello World");
assert!(valid.is_ok());
let invalid = CString::new("Hello\0World");
assert!(invalid.is_err());
let empty = CString::new("");
assert!(empty.is_ok());
let utf8 = CString::new("Héllo Wörld 🌍");
assert!(utf8.is_ok());
}
#[test]
fn test_thread_safety() {
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
let success_count = Arc::new(AtomicUsize::new(0));
let handles: Vec<_> = (0..10)
.map(|_| {
let counter = Arc::clone(&success_count);
thread::spawn(move || {
if initialize().is_ok() {
counter.fetch_add(1, Ordering::SeqCst);
}
})
})
.collect();
for handle in handles {
handle.join().unwrap();
}
assert_eq!(success_count.load(Ordering::SeqCst), 10);
}
}