fn find_longest_prefix(nodes: &[Node], value: &str) -> (usize, usize) {
let mut index = nodes.len();
let mut longest = 0;
let value_first_char = value.chars().next();
for (i, node) in nodes.iter().enumerate() {
let mut n = 0;
for (nv, cv) in node.value.chars().zip(value.chars()) {
if nv != cv {
break;
}
n += nv.len_utf8();
}
if n > longest {
index = i;
longest = n;
} else if node.value.chars().next() > value_first_char && longest == 0 {
index = i;
break
}
};
(index, longest)
}
#[allow(unused)]
fn try_merge(node: &mut Node) {
if node.terminal {
return
}
if let Some(ref mut childs) = node.childs {
if childs.len() != 1 {
return
}
} else {
return
};
let mut childs = node.childs.take().unwrap();
try_merge(&mut childs[0]); node.value.push_str(&childs[0].value);
node.terminal = childs[0].terminal; node.childs = childs[0].childs.take();
}
fn add_node(nodes: &mut Vec<Node>, value: String) {
let (i, len) = find_longest_prefix(&*nodes, &value);
if len == 0 {
nodes.insert(i, Node {childs: Some(vec![]), terminal: true, value: value});
} else {
let node_len = nodes[i].value.len();
let value_len = value.len();
if len == node_len {
if len == value_len {
nodes[i].terminal = true;
} else {
add_node(nodes[i].childs.as_mut().unwrap(), value[len..].to_owned());
}
} else {
if len >= value_len {
let remain = nodes[i].value[len..].to_owned(); nodes[i].value = nodes[i].value[..len].to_owned();
let child_of_childs = nodes[i].childs.take(); let child = Node { childs: child_of_childs, terminal: nodes[i].terminal, value: remain
};
nodes[i].childs = Some(vec![child]); nodes[i].terminal = true; } else {
let node_remain = nodes[i].value[len..].to_owned(); nodes[i].value = nodes[i].value[..len].to_owned(); let value_remain = value[len..].to_owned(); let child_of_childs = nodes[i].childs.take(); let child = Node { childs: child_of_childs, terminal: nodes[i].terminal, value: node_remain
};
let mut childs = vec![child]; add_node(&mut childs, value_remain); nodes[i].childs = Some(childs); nodes[i].terminal = false }
}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Dict {
root: Vec<Node>
}
impl Dict {
pub fn new() -> Dict {
Dict {
root: Vec::new()
}
}
pub fn load_txt<P: AsRef<std::path::Path>>(txt_file: P) -> std::io::Result<Dict> {
use std::io::{BufRead, BufReader};
let reader = BufReader::new(std::fs::File::open(txt_file)?);
let mut dict = Dict::new();
reader.lines().for_each(|line| {
dict.add(line.as_ref().unwrap());
});
Ok(dict)
}
pub fn add(&mut self, value: &str) {
add_node(&mut self.root, value.to_owned());
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct SizedDict {
pub(crate) root: Box<[SizedNode]>
}
impl core::convert::From<Dict> for SizedDict {
fn from(dict: Dict) -> SizedDict {
SizedDict {
root: dict.root.into_iter().map(|n| n.into()).collect::<Vec<SizedNode>>().into_boxed_slice()
}
}
}
#[derive(Debug, PartialEq)]
struct Node {
childs: Option<Vec<Node>>,
terminal: bool,
value: String,
}
impl core::convert::From<Node> for SizedNode {
fn from(node: Node) -> SizedNode {
SizedNode {
childs: node.childs.unwrap_or(vec![])
.into_iter().map(|c| c.into())
.collect::<Vec<SizedNode>>()
.into_boxed_slice(),
terminal: node.terminal,
value: node.value
}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct SizedNode {
childs: Box<[SizedNode]>,
terminal: bool,
value: String,
}
#[inline(always)]
fn childs_matched<'a, 'b>(nodes: &'a [SizedNode], value: &'b str) -> Vec<(&'a SizedNode, &'b str)> {
nodes.iter().filter_map(|n|
if value.starts_with(&*n.value) {
Some((n, &value[n.value.len()..]))
} else {
None
}).collect()
}
pub(crate) fn terminals_prefix(nodes: &[SizedNode], value: &str, offset: usize, results: &mut Vec<usize>) {
let mut eval_queue = std::collections::VecDeque::new();
eval_queue.push_back((nodes, &value[offset..], offset));
while let Some((nodes, value, offset)) = eval_queue.pop_front() {
for (child, remain) in childs_matched(nodes, value) {
let new_offset = offset + child.value.len();
if child.terminal {
results.push(new_offset);
}
if child.childs.len() > 0 && remain.len() > 0 {
eval_queue.push_back((&*child.childs, remain, new_offset));
}
}
}
}
#[cfg(test)]
mod tests;