1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
//! `kytea-tokenizer` is a wrapper of KyTea, the japanese morphological analyzer.

pub(crate) mod kytea;
pub use kytea::run_cmd;

mod pos;
pub use pos::PoS;

mod tokenizer;
pub use tokenizer::Surface;
pub use tokenizer::WordIterator;

use tokenizer::{Word, Words};

pub fn strip(out: impl AsRef<str>) -> String {
    let mut stripped = String::new();

    for line in out.as_ref().lines() {
        for word in Words::from(line) {
            word.pushed_to(&mut stripped);
            stripped.push(' ');
        }
        stripped.push('\n');
    }

    stripped
}

pub fn get_surface_and_pos(s: &str) -> (Surface<'_>, Option<PoS>) {
    Word::from(s).surface_and_pos()
}