1use std::{
16 collections::HashMap,
17 collections::hash_map::Entry,
18 io::{Read, BufRead, BufReader},
19 mem,
20};
21
22#[cfg(feature = "builtin_dicts")]
23pub mod dicts;
24
25#[cfg(test)]
26mod tests;
27
28#[derive(Debug, Clone)]
50pub struct Dict {
51 roots: Vec<DictNode>,
52}
53
54#[derive(Debug, Clone)]
55struct Leaf {
56 key: Box<str>,
57 value: Box<str>,
58}
59
60#[derive(Debug, Clone, Default)]
61struct Node {
62 value: Option<Box<str>>,
63 tails: HashMap<char, DictNode>,
64}
65
66#[derive(Debug, Clone)]
67enum DictNode {
68 Leaf (Leaf),
69 Node (Node),
70}
71
72impl DictNode {
73 fn node() -> Self {
74 DictNode::Node (
75 Node::default()
76 )
77 }
78
79 fn unwrap_node_mut(&mut self) -> &mut Node {
80 match self {
81 DictNode::Node (node) => node,
82 DictNode::Leaf (_) => panic!("expect Node, found Leaf"),
83 }
84
85 }
86
87 fn into_leaf(self) -> Leaf {
88 match self {
89 DictNode::Leaf (leaf) => leaf,
90 DictNode::Node (_) => panic!("expect Leaf, found Node"),
91 }
92 }
93
94 fn leaf(key: &str, value: Box<str>) -> Self {
95 DictNode::Leaf (
96 Leaf {
97 key: key.into(),
98 value,
99 }
100 )
101 }
102
103 fn add(&mut self, key: &str, value: Box<str>) {
104 let self_node = match self {
105 DictNode::Node (node) => node,
106 DictNode::Leaf (_) => {
107 let node = Node::default();
108 let leaf = mem::replace(self, DictNode::Node(node));
109 let Leaf { key, value } = leaf.into_leaf();
110 let mut node = self.unwrap_node_mut();
111 let mut key_chars = key.chars();
112 node.value = if let Some(hash_key) = key_chars.next() {
113 let suffix = key_chars.as_str().into();
114 node.tails.insert(hash_key, DictNode::leaf(suffix, value));
115 None
116 } else {
117 Some(value)
118 };
119 node
120 }
121 };
122
123 let mut key_chars = key.chars();
124 if let Some(hash_key) = key_chars.next() {
125 let suffix = key_chars.as_str().into();
126 match self_node.tails.entry(hash_key) {
127 Entry::Occupied(mut entry) => {
128 entry.get_mut().add(suffix, value);
129 }
130 Entry::Vacant(entry) => {
131 entry.insert(DictNode::leaf(suffix, value));
132 }
133 };
134 } else {
135 self_node.value = Some(value);
136 }
137 }
138
139 fn prefix_match<'a, 'b>(&'a self, query: &'b str)
140 -> Option<(&'b str, &'a str)> {
141 match self {
142 &DictNode::Leaf ( Leaf { ref key, ref value } ) => {
143 if query.starts_with(&**key) {
144 Some((&query[..key.len()], &value))
145 } else {
146 None
147 }
148 },
149 &DictNode::Node ( Node { ref value, ref tails } ) => {
150 let mut query_chars = query.chars();
151 let hash_key = query_chars.next();
152 let suffix = query_chars.as_str();
153
154 hash_key.and_then(|hash_key| {
155 tails.get(&hash_key)
156 .and_then(|node| node.prefix_match(suffix))
157 .map(|(prefix, value)| {
158 let n = query.len() - suffix.len() + prefix.len();
159 (&query[..n], value)
160 })
161 }).or_else(||
162 value.as_ref().map(|v| ("", &**v))
163 )
164 }
165 }
166 }
167}
168
169impl Dict {
170 pub fn load_str<T>(raw: T) -> Self
172 where T: AsRef<str> {
173 Dict::load_lines(raw.as_ref().lines())
174 }
175
176 pub fn load_lines<T, S>(lines: T) -> Self
178 where T: Iterator<Item=S>,
179 S: AsRef<str> {
180 let mut root = DictNode::node();
181 lines.filter_map(|line| {
182 let mut cols = line.as_ref().splitn(2, '\t');
183 let key = cols.next()?;
184 let value = cols.next()?.splitn(2, ' ').next()?;
185 Some((key.into(), value.into()))
186 }).for_each(|(key, value): (String, Box<str>)| {
187 root.add(&key, value)
188 });
189 Dict { roots: vec![root] }
190 }
191
192 pub fn load<T>(reader: T) -> Self
195 where T: Read {
196 let lines = BufReader::new(reader).lines().filter_map(|l| l.ok());
197 Dict::load_lines(lines)
198 }
199
200 pub fn chain(self, other: Dict) -> Self {
202 let Dict { mut roots } = self;
203 roots.extend(other.roots);
204 Dict { roots }
205 }
206
207 fn replace(dict: &DictNode, mut text: &str) -> String {
208 let mut output = String::with_capacity(text.len());
209 while !text.is_empty() {
210 match dict.prefix_match(text) {
211 Some((prefix, value)) => {
212 output.push_str(value);
213 text = &text[prefix.len()..];
214 },
215 None => {
216 let mut chars = text.chars();
217 output.push(chars.next().unwrap());
218 text = chars.as_str();
219 }
220 }
221 }
222 output
223 }
224
225 pub fn replace_all(&self, text: &str) -> String {
228 let mut buffer = Dict::replace(&self.roots[0], text);
229 for dict in &self.roots[1..] {
230 buffer = Dict::replace(dict, &buffer);
231 }
232 buffer
233 }
234}