1use std::borrow::Borrow;
2use std::iter::Iterator;
3
4mod matrix;
5pub use self::matrix::Matrix;
6
7mod morph;
8pub use self::morph::Morph;
9
10pub mod fst;
11use self::fst::Fst;
12
13pub mod unknown;
14
15pub trait Dic<'a> {
16 type Iterator: Iterator<Item = Morph<&'a str>>;
17 fn lookup_iter(&'a self, input: &'a [u8]) -> Self::Iterator;
18 fn lookup(&'a self, input: &'a [u8]) -> Vec<Morph<&'a str>> {
19 self.lookup_iter(input).collect()
20 }
21 fn lookup_str_iter(&'a self, input: &'a str) -> Self::Iterator {
22 self.lookup_iter(input.as_bytes())
23 }
24 fn lookup_str(&'a self, input: &'a str) -> Vec<Morph<&'a str>> {
25 self.lookup_str_iter(input).collect()
26 }
27}
28
29#[derive(Debug, Clone)]
30pub struct FstDic<T: Borrow<[u8]>> {
31 morph_bytes: T,
32 fst: Fst<T>,
33}
34
35impl<'a> FstDic<&'a [u8]> {
36 pub unsafe fn from_bytes(bytecodes: &'a [u8], morph_bytes: &'a [u8]) -> Self {
37 FstDic {
38 morph_bytes: morph_bytes,
39 fst: Fst::from_bytes(bytecodes),
40 }
41 }
42}
43
44impl FstDic<Vec<u8>> {
45 pub fn build<S: Borrow<str>>(morphs: &[Morph<S>]) -> Self {
46 let mut morph_bytes = Vec::new();
47 let mut fst_inputs = Vec::new();
48 for morph in morphs {
49 let offset = morph_bytes.len();
50 let surface = morph.surface.borrow().as_bytes();
51 fst_inputs.push((surface, offset as u32));
52 morph.encode_native(&mut morph_bytes).unwrap();
53 }
54 fst_inputs.sort();
55 let fst = Fst::build(fst_inputs);
56 FstDic {
57 morph_bytes: morph_bytes,
58 fst: fst,
59 }
60 }
61}
62
63impl<'a, T: Borrow<[u8]>> Dic<'a> for FstDic<T> {
64 type Iterator = Iter<'a>;
65
66 fn lookup_iter(&'a self, input: &'a [u8]) -> Iter<'a> {
67 Iter {
68 morph_bytes: self.morph_bytes.borrow(),
69 iter: self.fst.run_iter(input),
70 }
71 }
72}
73
74pub struct Iter<'a> {
75 morph_bytes: &'a [u8],
76 iter: fst::Iter<'a>,
77}
78
79impl<'a> Iter<'a> {
80 fn fetch_entry(&self, offset: usize) -> Morph<&'a str> {
81 let entry_bytes = &self.morph_bytes[offset..];
82 unsafe { Morph::decode(entry_bytes) }
83 }
84}
85
86impl<'a> Iterator for Iter<'a> {
87 type Item = Morph<&'a str>;
88
89 fn next(&mut self) -> Option<Self::Item> {
90 self.iter.next().map(|acc| self.fetch_entry(acc.0 as usize))
91 }
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97
98 #[test]
99 fn test_build_lookup() {
100 let morphs = vec![Morph {
101 surface: "す",
102 left_id: 1,
103 right_id: 1,
104 weight: 1,
105 contents: "contents 1",
106 },
107 Morph {
108 surface: "す",
109 left_id: 2,
110 right_id: 2,
111 weight: 2,
112 contents: "contents 2",
113 },
114 Morph {
115 surface: "すも",
116 left_id: 3,
117 right_id: 3,
118 weight: 3,
119 contents: "contents 3",
120 },
121 Morph {
122 surface: "すもも",
123 left_id: 4,
124 right_id: 4,
125 weight: 4,
126 contents: "contents 4",
127 }];
128 let dict = FstDic::build(&morphs);
129 let results = dict.lookup_str("すもも");
130 assert_eq!(results.len(), morphs.len());
131 for result in results {
133 assert!(morphs.iter().any(|m| *m == result),
134 "invalid result: {:?}",
135 result);
136 }
137 }
138}