1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
pub const ADDRESS_PARSER_DATA: &[u8] = include_bytes!("../usaddr.crfsuite");
mod error;
mod features;
mod tokenize;

use features::tokens_to_features;
pub use tokenize::tokenize;

use crfs::Model;

lazy_static::lazy_static! {
    pub static ref MODEL: Model<'static> = {
        let model = Model::new(ADDRESS_PARSER_DATA);
        model.expect("Model failed to load")
    };
}

pub fn parse(address: &str) -> Result<Vec<(String, String)>, crate::error::Error> {
    let tokens = tokenize(address);
    let tok = tokens.clone();

    let mut tagger = MODEL.tagger()?;

    let features = tokens_to_features(tokens);

    let labels = tagger.tag(&features)?;

    Ok(tok
        .into_iter()
        .zip(labels.iter().map(|s| s.to_string()))
        .collect())
}