#![no_std]
extern crate alloc;
pub mod censor;
pub mod codegen;
pub mod pda;
mod constants;
use alloc::{string::String, vec, vec::Vec};
use constants::{EXCEPTION_INDEX, SEPARATOR_INDEX, WORD_INDEX};
use core::{cmp, iter::FromIterator};
use nested_containment_list::NestedContainmentList;
use pda::{InstantaneousDescription, State};
use unicode_segmentation::UnicodeSegmentation;
#[derive(Debug)]
pub struct WordFilter<'a, const N: usize> {
#[doc(hidden)]
pub states: [State<'a>; N],
}
impl<'a, const N: usize> WordFilter<'a, N> {
fn spawn_entry_ids(
&'a self,
start: usize,
) -> impl Iterator<Item = InstantaneousDescription<'_>> {
let mut ids = vec![
InstantaneousDescription::new(&self.states[WORD_INDEX], start),
InstantaneousDescription::new(&self.states[EXCEPTION_INDEX], start),
];
ids.extend(
ids.iter()
.map(|id| id.transition(None, &self.states[SEPARATOR_INDEX]))
.flatten()
.collect::<Vec<_>>(),
);
ids.into_iter()
}
fn compute(&'a self, input: &str) -> impl Iterator<Item = InstantaneousDescription<'_>> {
let mut ids = Vec::new();
let mut accepted_ids = Vec::new();
let mut index = 0;
for grapheme in input.graphemes(true) {
ids.extend(self.spawn_entry_ids(index));
let mut first_c = true;
for c in grapheme.chars() {
let mut new_ids = Vec::new();
for id in ids.drain(..) {
new_ids.extend(id.step(c, &self.states[SEPARATOR_INDEX], first_c));
}
index += c.len_utf8();
ids = new_ids;
first_c = false;
}
for id in &ids {
if id.is_accepting() {
accepted_ids.push(id.clone());
}
}
}
NestedContainmentList::from_iter(accepted_ids)
.into_iter()
.filter_map(|element| {
let instant = element.value;
if instant.is_word() {
Some(instant)
} else {
None
}
})
}
#[inline]
pub fn find(&'a self, input: &str) -> impl Iterator<Item = &str> {
self.compute(input).map(|id| unsafe {
id.unwrap_word_unchecked()
})
}
#[inline]
pub fn find_raw<'b, 'c>(&'a self, input: &'b str) -> impl Iterator<Item = &'c str>
where
'a: 'c,
'b: 'c,
{
self.compute(input).map(move |id| unsafe {
input.get_unchecked(id.start()..id.end())
})
}
#[inline]
#[must_use]
pub fn check(&'a self, input: &str) -> bool {
self.compute(input).next().is_some()
}
#[must_use]
pub fn censor_with(&'a self, input: &str, censor: fn(&str) -> String) -> String {
let mut output = String::with_capacity(input.len());
let mut prev_end = 0;
for id in self.compute(input) {
if id.start() > prev_end {
output.push_str(unsafe {
input.get_unchecked(prev_end..id.start())
});
}
output.push_str(&(censor)(unsafe {
input.get_unchecked(cmp::max(id.start(), prev_end)..id.end())
}));
prev_end = id.end();
}
output.push_str(unsafe {
input.get_unchecked(prev_end..)
});
output
}
#[inline]
#[must_use]
pub fn censor(&'a self, input: &str) -> String {
self.censor_with(input, censor::replace_graphemes_with!("*"))
}
}