use std::cell::RefCell;
use std::collections::HashMap;
use std::collections::VecDeque;
use std::fs::File;
use std::io::Read;
use std::rc::{Rc, Weak};
type Value = u8;
type NodePtr = Rc<RefCell<Node>>;
type WeakPtr = Weak<RefCell<Node>>;
#[derive(Debug)]
pub struct Node {
pub fail: WeakPtr,
pub children: HashMap<Value, NodePtr>,
pub val: Option<Value>,
pub key_word_len: Vec<usize>,
}
pub struct Trie {
pub root: NodePtr,
}
impl Default for Node {
fn default() -> Self {
Node {
fail: Weak::new(),
children: HashMap::new(),
val: None,
key_word_len: Vec::new(),
}
}
}
impl Node {
pub fn new(val: Value) -> NodePtr {
Rc::new(RefCell::new(Node {
fail: Weak::new(),
children: HashMap::new(),
val: Some(val),
key_word_len: Vec::new(),
}))
}
}
impl Default for Trie {
fn default() -> Self {
Trie {
root: Rc::new(RefCell::new(Node::default())),
}
}
}
impl Trie {
pub fn add_key_word_from_file(&mut self, file: &str) -> std::io::Result<()> {
let mut file = File::open(file)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
contents.split('\n').for_each(|x| self.add_key_word(x.as_bytes().to_vec()));
Ok(())
}
pub fn add_key_word(&mut self, key_word: Vec<Value>) {
let mut cur = self.root.clone();
for (_, val) in key_word.iter().enumerate() {
let temp = cur
.borrow_mut()
.children
.entry(*val)
.or_insert_with(|| Node::new(*val))
.clone();
cur = temp;
}
let c = cur.borrow().key_word_len.clone();
let temp: Vec<&usize> = c.iter().filter(|&x| *x == key_word.len()).collect();
if temp.is_empty() {
cur.borrow_mut().key_word_len.push(key_word.len());
}
}
pub fn build(&mut self) {
let mut queue = VecDeque::new();
let first_level_fail = self.root.clone();
for (_i, child) in self.root.borrow().children.iter() {
child.borrow_mut().fail = Rc::downgrade(&first_level_fail.clone());
queue.push_back(child.clone());
}
while let Some(node) = queue.pop_front() {
let cur = node.borrow();
for (i, child) in cur.children.iter() {
let mut fafail = cur.fail.clone();
while fafail.upgrade().is_some()
&& fafail
.upgrade()
.unwrap()
.borrow()
.children
.get(&i)
.is_none()
{
fafail = fafail.upgrade().unwrap().borrow().fail.clone();
}
let temp = match fafail.upgrade() {
None => Rc::downgrade(&self.root.clone()),
Some(v) => {
let children_i = v.borrow().children.get(&i).unwrap().clone();
children_i.borrow().key_word_len.iter().for_each(|&x| {
child.borrow_mut().key_word_len.push(x);
});
Rc::downgrade(&v.borrow().children.get(&i).unwrap().clone())
}
};
child.borrow_mut().fail = temp;
queue.push_back(child.clone())
}
}
}
pub fn query<'a>(&self, text: &'a [Value]) -> Vec<&'a [Value]> {
let mut result: Vec<&[Value]> = Vec::new();
let mut cur = Rc::downgrade(&self.root);
for (i, e) in text.iter().enumerate() {
if let Some(v) = cur.upgrade() {
let mut child = Rc::downgrade(&v.clone());
while child.upgrade().unwrap().borrow().children.get(e).is_none() {
if child.upgrade().unwrap().borrow().fail.upgrade().is_none() {
child = Rc::downgrade(&self.root.clone());
break;
}
child = child.upgrade().unwrap().borrow().fail.clone();
}
cur = match child.upgrade().unwrap().borrow().children.get(e) {
None => Rc::downgrade(&self.root),
Some(child) => {
result.append(
&mut child
.borrow()
.key_word_len
.iter()
.map(|x| &text[i + 1 - x..i + 1])
.collect(),
);
Rc::downgrade(&child)
}
}
} else {
cur = Rc::downgrade(&self.root);
}
}
result
}
}