modeling 0.6.2

Modeling is a tools to analysis different languages by Ctags
Documentation
pub mod camelcase_tok;
pub mod stop_words;

use crate::segment::camelcase_tok::CamelCaseTok;
use tokenizers::{OffsetReferential, OffsetType, PreTokenizedString, PreTokenizer};

pub fn segment(str: &str) -> Vec<String> {
    segment_camelcase(str)
}

pub fn segment_camelcase(str: &str) -> Vec<String> {
    let pretok = CamelCaseTok::default();

    let mut pretokenized = PreTokenizedString::from(str);
    pretok.pre_tokenize(&mut pretokenized).unwrap();

    let vec = pretokenized
        .get_splits(OffsetReferential::Original, OffsetType::Byte)
        .into_iter()
        .map(|(s, _o, _)| (s.to_string()))
        .collect::<Vec<String>>();

    vec
}

#[cfg(test)]
mod tests {
    use crate::segment::segment;

    #[test]
    fn should_segmentation() {
        assert_eq!(
            vec!["Hierarchy".to_string(), "Id".to_string()],
            segment("HierarchyId")
        );
    }
}