1mod util;
2mod trie;
3mod tagger;
4pub use tagger::Tagger;
5mod morpheme;
6pub use morpheme::{Morpheme, MorphemeBuf};
7pub mod dictionary;
8pub use util::DirLike;
9#[cfg(feature = "zip")]
10pub use util::ZipDir;
11
12type Utf16Char = u16;
13type Utf16String = Vec<Utf16Char>;
14type Utf16Str = [Utf16Char];
15
16#[cfg(test)]
17mod tests {
18 use std::path::{Path, PathBuf};
19 use crate::tagger::Tagger;
20 use crate::morpheme::MorphemeBuf;
21 use crate::dictionary::build;
22
23 const TEST_DIC_SRC_PATH: &str = "tiny_test_dic/src";
24 const TEST_DIC_DST_PATH: &str = "tiny_test_dic/out";
25
26 fn setup_tagger() -> Tagger {
27 let dic_dir = PathBuf::from(TEST_DIC_DST_PATH);
28 Tagger::new(&dic_dir).unwrap()
29 }
30
31 #[test]
32 fn build_dic_then_test_tagger() {
33 build_dic();
34
35 test_tagger();
36 test_wakati();
37 }
38
39 fn build_dic() {
40 build_matrix_def_file();
41
42 build::build_dic(
43 &Path::new(TEST_DIC_SRC_PATH),
44 &Path::new(TEST_DIC_DST_PATH),
45 ",".to_string(),
46 "UTF-8"
47 ).unwrap();
48 }
49
50 fn build_matrix_def_file() {
51 let src_dir = Path::new(TEST_DIC_SRC_PATH);
52 let sparse_matrix = src_dir.join("matrix.def.sparse");
53 let dense_matrix = src_dir.join("matrix.def");
54 let default_cost = ::std::i16::MAX;
55 build::matrix::convert_sparse2dense(&sparse_matrix, &dense_matrix, default_cost).unwrap();
56
57 if !dense_matrix.exists() {
58 panic!("failed to convert: {}", dense_matrix.display());
59 }
60 }
61
62 fn test_tagger() {
63 let tagger = setup_tagger();
64 assert_eq!(9, tagger.unknown().space_id);
65
66 let text = "すもももももももものうち";
67 let results = tagger.parse(text);
68 assert_eq!(7, results.len());
69
70 assert_eq!("すもも", results[0].surface);
71 assert_eq!("も", results[1].surface);
72 assert_eq!("もも", results[2].surface);
73 assert_eq!("も", results[3].surface);
74 assert_eq!("もも", results[4].surface);
75 assert_eq!("の", results[5].surface);
76 assert_eq!("うち", results[6].surface);
77
78 assert_eq!("名詞,一般,*,*,*,*,すもも,スモモ,スモモ", results[0].feature);
79 assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[1].feature);
80 assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[2].feature);
81 assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[3].feature);
82 assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[4].feature);
83 assert_eq!("助詞,連体化,*,*,*,*,の,ノ,ノ", results[5].feature);
84 assert_eq!("名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ", results[6].feature);
85
86 assert_eq!(0, results[0].start);
87 assert_eq!(3, results[1].start);
88 assert_eq!(4, results[2].start);
89 assert_eq!(6, results[3].start);
90 assert_eq!(7, results[4].start);
91 assert_eq!(9, results[5].start);
92 assert_eq!(10, results[6].start);
93
94 let buf: MorphemeBuf = results[4].to_owned();
96 assert_eq!("もも", buf.surface);
97 assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", buf.feature);
98 assert_eq!(7, buf.start);
99 }
100
101 fn test_wakati() {
102 let tagger = setup_tagger();
103
104 let text = "すもももももももものうち";
105 let results = tagger.wakati(text);
106 assert_eq!(7, results.len());
107
108 let v = vec!["すもも", "も", "もも", "も", "もも", "の", "うち"].iter()
109 .map(|s| s.to_string()).collect::<Vec<_>>();
110 assert_eq!(v, results);
111 }
112}