1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#[macro_use] extern crate log;
extern crate byteorder;
extern crate encoding;
extern crate glob;
extern crate bit_set;
mod util;
mod trie;
mod tagger;
pub use tagger::Tagger;
mod morpheme;
pub use morpheme::Morpheme;
pub mod dictionary;
type Utf16Char = u16;
type Utf16String = Vec<Utf16Char>;
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use tagger::Tagger;
use morpheme::MorphemeBuf;
fn setup_tagger() -> Tagger {
let dic_dir = PathBuf::from("data/ipadic");
Tagger::new(&dic_dir).unwrap()
}
#[test]
fn test_tagger() {
let tagger = setup_tagger();
assert_eq!(9, tagger.unknown().space_id);
let text = "すもももももももものうち";
let results = tagger.parse(text);
assert_eq!(7, results.len());
assert_eq!("すもも", results[0].surface);
assert_eq!("も", results[1].surface);
assert_eq!("もも", results[2].surface);
assert_eq!("も", results[3].surface);
assert_eq!("もも", results[4].surface);
assert_eq!("の", results[5].surface);
assert_eq!("うち", results[6].surface);
assert_eq!("名詞,一般,*,*,*,*,すもも,スモモ,スモモ", results[0].feature);
assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[1].feature);
assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[2].feature);
assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[3].feature);
assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[4].feature);
assert_eq!("助詞,連体化,*,*,*,*,の,ノ,ノ", results[5].feature);
assert_eq!("名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ", results[6].feature);
assert_eq!(0, results[0].start);
assert_eq!(3, results[1].start);
assert_eq!(4, results[2].start);
assert_eq!(6, results[3].start);
assert_eq!(7, results[4].start);
assert_eq!(9, results[5].start);
assert_eq!(10, results[6].start);
let buf: MorphemeBuf = results[4].to_owned();
assert_eq!("もも", buf.surface);
assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", buf.feature);
assert_eq!(7, buf.start);
}
#[test]
fn test_wakati() {
let tagger = setup_tagger();
let text = "すもももももももものうち";
let results = tagger.wakati(text);
assert_eq!(7, results.len());
let v = vec!["すもも", "も", "もも", "も", "もも", "の", "うち"].iter()
.map(|s| s.to_string()).collect::<Vec<_>>();
assert_eq!(v, results);
}
}