use core::ptr::NonNull;
use std::collections::HashMap;
use std::collections::VecDeque;
use std::fs::File;
use std::io::Read;
use std::marker::PhantomData;
type Value = u8;
type NodePtr<T> = NonNull<Node<T>>;
pub trait NodeExt {
fn get_len(&self) -> usize;
fn eq(&self, other: &Self) -> bool;
fn get_weight(&self) -> usize;
fn get_cate(&self) -> usize;
}
impl NodeExt for usize {
fn get_len(&self) -> usize {
*self
}
fn eq(&self, other: &usize) -> bool {
*self == *other
}
fn get_weight(&self) -> usize {
1
}
fn get_cate(&self) -> usize {
1
}
}
#[derive(Debug)]
pub struct Node<T> {
pub fail: Option<NodePtr<T>>,
pub children: HashMap<Value, NodePtr<T>>,
pub val: Option<Value>,
pub ext: Vec<T>,
}
pub struct Trie<T> {
pub root: Option<NodePtr<T>>,
marker: PhantomData<Box<Node<T>>>,
}
impl<T> Default for Node<T> {
fn default() -> Self {
Node {
fail: None,
children: HashMap::new(),
val: None,
ext: Vec::new(),
}
}
}
impl<T: NodeExt> Node<T> {
pub fn new(val: Value) -> NodePtr<T> {
Box::leak(Box::new(Node {
fail: None,
children: HashMap::new(),
val: Some(val),
ext: Vec::<T>::new(),
}))
.into()
}
pub fn push(&mut self, element: T) {
for e in self.ext.iter_mut() {
if e.eq(&element) {
*e = element;
return;
}
}
self.ext.push(element);
}
}
impl<T> Default for Trie<T> {
fn default() -> Self {
Trie {
root: Some(Box::leak(Box::new(Node::default())).into()),
marker: PhantomData,
}
}
}
impl Trie<usize> {
pub fn add_key_word_from_file(&mut self, file: &str) -> std::io::Result<()> {
let mut file = File::open(file)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
contents
.split('\n')
.for_each(|x| self.add_key_word(x.as_bytes().to_vec()));
Ok(())
}
pub fn add_key_word(&mut self, key_word: Vec<Value>) {
let len = key_word.len();
self.add_key_word_ext(key_word, len)
}
}
impl<T: NodeExt + Clone> Trie<T> {
pub fn add_key_word_ext(&mut self, key_word: Vec<Value>, ext: T) {
if key_word.is_empty() {
return;
}
let mut cur = self.root.unwrap();
for (_, val) in key_word.iter().enumerate() {
unsafe {
let temp = (*cur.as_ptr())
.children
.entry(*val)
.or_insert_with(|| Node::<T>::new(*val));
cur = *temp;
}
}
unsafe {
(*cur.as_ptr()).push(ext);
}
}
pub fn build(&mut self) {
let mut queue = VecDeque::new();
unsafe {
let first_level_fail = self.root.unwrap();
for (_i, child) in (*self.root.unwrap().as_ptr()).children.iter() {
(*child.as_ptr()).fail = Some(first_level_fail);
queue.push_back(child);
}
while let Some(node) = queue.pop_front() {
let cur = node;
for (i, child) in (*cur.as_ptr()).children.iter() {
let mut fafail = (*cur.as_ptr()).fail;
while fafail.is_some() && (*fafail.unwrap().as_ptr()).children.get(&i).is_none()
{
fafail = (*fafail.unwrap().as_ptr()).fail;
}
let temp = match fafail {
None => self.root,
Some(v) => {
let children_i = (*v.as_ptr()).children.get(&i).unwrap();
(*children_i.as_ptr()).ext.iter().for_each(|x| {
(*child.as_ptr()).push(x.clone());
});
Some(*(*v.as_ptr()).children.get(&i).unwrap())
}
};
(*child.as_ptr()).fail = temp;
queue.push_back(child)
}
}
}
}
pub fn query<'a>(&self, text: &'a [Value]) -> Vec<&'a [Value]> {
self.query_ext(text)
.iter()
.map(|(i, x)| &text[*i - x.get_len()..*i])
.collect()
}
pub fn query_total_weight(&self, text: &[Value]) -> usize {
self.query_ext(text)
.iter()
.fold(0usize, |acc, (_, x)| acc + x.get_weight())
}
pub fn query_cate_weight(&self, text: &[Value]) -> HashMap<usize, usize> {
let mut result = HashMap::new();
self.query_ext(text).iter().for_each(|(_i, x)| {
if let Some(e) = result.get_mut(&x.get_cate()) {
*e += x.get_weight();
} else {
result.insert(x.get_cate(), x.get_weight());
}
});
result
}
pub fn query_ext<'a>(&self, text: &'a [Value]) -> Vec<(usize, &T)> {
let mut result = Vec::new();
let mut cur = self.root;
for (i, e) in text.iter().enumerate() {
unsafe {
if let Some(v) = cur {
let mut child = v;
while (*child.as_ptr()).children.get(e).is_none() {
if (*child.as_ptr()).fail.is_none() {
child = self.root.unwrap();
break;
}
child = (*child.as_ptr()).fail.unwrap();
}
cur = match (*child.as_ptr()).children.get(e) {
None => self.root,
Some(child) => {
result.append(
&mut (*child.as_ptr()).ext.iter().map(|x| (i + 1, x)).collect(),
);
Some(*child)
}
}
} else {
cur = self.root;
}
}
}
result
}
}
impl<T> Drop for Trie<T> {
fn drop(&mut self) {
let mut queue = VecDeque::new();
let mut queue_drop = VecDeque::new();
unsafe {
let node = self.root.unwrap();
queue.push_back(&node);
queue_drop.push_back(&node);
while let Some(node) = queue.pop_front() {
for (_i, child) in (*node.as_ptr()).children.iter() {
queue_drop.push_back(child);
queue.push_back(child)
}
}
while let Some(node) = queue_drop.pop_back() {
let mm = Box::from_raw(node.as_ptr());
drop(mm);
}
}
self.root = None;
}
}
unsafe impl<T> Send for Trie<T> {}
unsafe impl<T> Sync for Trie<T> {}