igo/
lib.rs

1mod util;
2mod trie;
3mod tagger;
4pub use tagger::Tagger;
5mod morpheme;
6pub use morpheme::{Morpheme, MorphemeBuf};
7pub mod dictionary;
8pub use util::DirLike;
9#[cfg(feature = "zip")]
10pub use util::ZipDir;
11
12type Utf16Char = u16;
13type Utf16String = Vec<Utf16Char>;
14type Utf16Str = [Utf16Char];
15
16#[cfg(test)]
17mod tests {
18    use std::path::{Path, PathBuf};
19    use crate::tagger::Tagger;
20    use crate::morpheme::MorphemeBuf;
21    use crate::dictionary::build;
22
23    const TEST_DIC_SRC_PATH: &str = "tiny_test_dic/src";
24    const TEST_DIC_DST_PATH: &str = "tiny_test_dic/out";
25
26    fn setup_tagger() -> Tagger {
27        let dic_dir = PathBuf::from(TEST_DIC_DST_PATH);
28        Tagger::new(&dic_dir).unwrap()
29    }
30
31    #[test]
32    fn build_dic_then_test_tagger() {
33        build_dic();
34
35        test_tagger();
36        test_wakati();
37    }
38
39    fn build_dic() {
40        build_matrix_def_file();
41
42        build::build_dic(
43            &Path::new(TEST_DIC_SRC_PATH),
44            &Path::new(TEST_DIC_DST_PATH),
45            ",".to_string(),
46            "UTF-8"
47        ).unwrap();
48    }
49
50    fn build_matrix_def_file() {
51        let src_dir = Path::new(TEST_DIC_SRC_PATH);
52        let sparse_matrix = src_dir.join("matrix.def.sparse");
53        let dense_matrix = src_dir.join("matrix.def");
54        let default_cost = ::std::i16::MAX;
55        build::matrix::convert_sparse2dense(&sparse_matrix, &dense_matrix, default_cost).unwrap();
56
57        if !dense_matrix.exists() {
58            panic!("failed to convert: {}", dense_matrix.display());
59        }
60    }
61
62    fn test_tagger() {
63        let tagger = setup_tagger();
64        assert_eq!(9, tagger.unknown().space_id);
65
66        let text = "すもももももももものうち";
67        let results = tagger.parse(text);
68        assert_eq!(7, results.len());
69
70        assert_eq!("すもも", results[0].surface);
71        assert_eq!("も", results[1].surface);
72        assert_eq!("もも", results[2].surface);
73        assert_eq!("も", results[3].surface);
74        assert_eq!("もも", results[4].surface);
75        assert_eq!("の", results[5].surface);
76        assert_eq!("うち", results[6].surface);
77
78        assert_eq!("名詞,一般,*,*,*,*,すもも,スモモ,スモモ", results[0].feature);
79        assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[1].feature);
80        assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[2].feature);
81        assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[3].feature);
82        assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[4].feature);
83        assert_eq!("助詞,連体化,*,*,*,*,の,ノ,ノ", results[5].feature);
84        assert_eq!("名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ", results[6].feature);
85
86        assert_eq!(0, results[0].start);
87        assert_eq!(3, results[1].start);
88        assert_eq!(4, results[2].start);
89        assert_eq!(6, results[3].start);
90        assert_eq!(7, results[4].start);
91        assert_eq!(9, results[5].start);
92        assert_eq!(10, results[6].start);
93
94        // MorphemeBuf test
95        let buf: MorphemeBuf = results[4].to_owned();
96        assert_eq!("もも", buf.surface);
97        assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", buf.feature);
98        assert_eq!(7, buf.start);
99    }
100
101    fn test_wakati() {
102        let tagger = setup_tagger();
103
104        let text = "すもももももももものうち";
105        let results = tagger.wakati(text);
106        assert_eq!(7, results.len());
107
108        let v = vec!["すもも", "も", "もも", "も", "もも", "の", "うち"].iter()
109            .map(|s| s.to_string()).collect::<Vec<_>>();
110        assert_eq!(v, results);
111    }
112}