1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
#[macro_use] extern crate log; extern crate byteorder; mod util; mod trie; mod tagger; pub use tagger::Tagger; mod morpheme; pub use morpheme::Morpheme; mod dictionary; type Utf16Char = u16; type Utf16String = Vec<Utf16Char>; #[cfg(test)] mod tests { use std::path::PathBuf; use tagger::Tagger; fn setup_tagger() -> Tagger { let dic_dir = PathBuf::from("data/ipadic"); Tagger::new(&dic_dir).unwrap() } #[test] fn test_tagger() { let tagger = setup_tagger(); assert_eq!(9, tagger.unknown().space_id); let text = "すもももももももものうち"; let results = tagger.parse(text); assert_eq!(7, results.len()); assert_eq!("すもも", results[0].surface); assert_eq!("も", results[1].surface); assert_eq!("もも", results[2].surface); assert_eq!("も", results[3].surface); assert_eq!("もも", results[4].surface); assert_eq!("の", results[5].surface); assert_eq!("うち", results[6].surface); assert_eq!("名詞,一般,*,*,*,*,すもも,スモモ,スモモ", results[0].feature); assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[1].feature); assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[2].feature); assert_eq!("助詞,係助詞,*,*,*,*,も,モ,モ", results[3].feature); assert_eq!("名詞,一般,*,*,*,*,もも,モモ,モモ", results[4].feature); assert_eq!("助詞,連体化,*,*,*,*,の,ノ,ノ", results[5].feature); assert_eq!("名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ", results[6].feature); assert_eq!(0, results[0].start); assert_eq!(3, results[1].start); assert_eq!(4, results[2].start); assert_eq!(6, results[3].start); assert_eq!(7, results[4].start); assert_eq!(9, results[5].start); assert_eq!(10, results[6].start); } #[test] fn test_wakati() { let tagger = setup_tagger(); let text = "すもももももももものうち"; let results = tagger.wakati(text); assert_eq!(7, results.len()); let v = vec!["すもも", "も", "もも", "も", "もも", "の", "うち"].iter() .map(|s| s.to_string()).collect::<Vec<_>>(); assert_eq!(v, results); } }