1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#[macro_use] extern crate log;
extern crate byteorder;
extern crate encoding;
extern crate glob;
extern crate bit_set;

mod util;
mod trie;
mod tagger;
pub use tagger::Tagger;
mod morpheme;
pub use morpheme::Morpheme;
pub mod dictionary;

type Utf16Char = u16;
type Utf16String = Vec<Utf16Char>;


#[cfg(test)]
mod tests {
    use std::path::PathBuf;
    use tagger::Tagger;
    use morpheme::MorphemeBuf;

    fn setup_tagger() -> Tagger {
        let dic_dir = PathBuf::from("data/ipadic");
        Tagger::new(&dic_dir).unwrap()
    }

    #[test]
    fn test_tagger() {
        let tagger = setup_tagger();
        assert_eq!(9, tagger.unknown().space_id);

        let text = "すもももももももものうち";
        let results = tagger.parse(text);
        assert_eq!(7, results.len());

        assert_eq!("すもも", results[0].surface);
        assert_eq!("も", results[1].surface);
        assert_eq!("もも", results[2].surface);
        assert_eq!("も", results[3].surface);
        assert_eq!("もも", results[4].surface);
        assert_eq!("の", results[5].surface);
        assert_eq!("うち", results[6].surface);

        assert_eq!("名詞,一般,*,*,*,*,すもも,スモモ,スモモ", results[0].feature);
        assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[1].feature);
        assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[2].feature);
        assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[3].feature);
        assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[4].feature);
        assert_eq!("助詞,連体化,*,*,*,*,の,ノ,ノ", results[5].feature);
        assert_eq!("名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ", results[6].feature);

        assert_eq!(0, results[0].start);
        assert_eq!(3, results[1].start);
        assert_eq!(4, results[2].start);
        assert_eq!(6, results[3].start);
        assert_eq!(7, results[4].start);
        assert_eq!(9, results[5].start);
        assert_eq!(10, results[6].start);

        // MorphemeBuf test
        let buf: MorphemeBuf = results[4].to_owned();
        assert_eq!("もも", buf.surface);
        assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", buf.feature);
        assert_eq!(7, buf.start);
    }

    #[test]
    fn test_wakati() {
        let tagger = setup_tagger();

        let text = "すもももももももものうち";
        let results = tagger.wakati(text);
        assert_eq!(7, results.len());

        let v = vec!["すもも", "も", "もも", "も", "もも", "の", "うち"].iter()
            .map(|s| s.to_string()).collect::<Vec<_>>();
        assert_eq!(v, results);
    }
}