#![doc(hidden)]
use alloc::{vec, vec::Vec};
use bitflags::bitflags;
use by_address::ByAddress;
use const_fn_assert::{cfn_assert, cfn_assert_eq};
use core::{
ops::{Bound, RangeBounds},
ptr,
};
use debug_unreachable::debug_unreachable;
use hashbrown::HashSet;
bitflags! {
pub struct Flags: u8 {
const WORD = 0b0000_0001;
const EXCEPTION = 0b0000_0010;
const SEPARATOR = 0b0000_0100;
const RETURN = 0b0000_1000;
const INTO_REPETITION = 0b0001_0000;
const TAKE_REPETITION = 0b0010_0000;
const INTO_SEPARATOR = 0b0100_0000;
const ACCEPTING = Flags::WORD.bits() | Flags::EXCEPTION.bits();
}
}
impl Default for Flags {
fn default() -> Self {
Self::empty()
}
}
#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
pub struct Attributes<'a> {
flags: Flags,
word: Option<&'a str>,
}
impl<'a> Attributes<'a> {
pub const fn new(flags: Flags, word: Option<&'a str>) -> Self {
cfn_assert_eq!(flags.contains(Flags::WORD), word.is_some());
cfn_assert!(!flags.contains(Flags::ACCEPTING));
Self { flags, word }
}
#[inline]
pub(crate) fn merge(&mut self, other: Attributes<'a>) {
if self.flags.contains(Flags::WORD) {
assert!(!other.flags.contains(Flags::EXCEPTION));
assert!(!other.flags.contains(Flags::WORD) || other.word == self.word);
}
if self.flags.contains(Flags::EXCEPTION) {
assert!(!other.flags.contains(Flags::WORD));
}
self.flags.insert(other.flags);
self.word = other.word;
}
#[inline]
pub(crate) fn flags(&self) -> Flags {
self.flags
}
#[inline]
pub(crate) fn word(&self) -> Option<&str> {
self.word
}
#[inline]
fn is_word(&self) -> bool {
self.flags.contains(Flags::WORD)
}
#[inline]
fn is_exception(&self) -> bool {
self.flags.contains(Flags::EXCEPTION)
}
#[inline]
fn is_separator(&self) -> bool {
self.flags.contains(Flags::SEPARATOR)
}
#[inline]
fn is_return(&self) -> bool {
self.flags.contains(Flags::RETURN)
}
#[inline]
fn is_into_repetition(&self) -> bool {
self.flags.contains(Flags::INTO_REPETITION)
}
#[inline]
pub(crate) fn remove_into_repetition(&mut self) {
self.flags.remove(Flags::INTO_REPETITION)
}
#[inline]
fn is_take_repetition(&self) -> bool {
self.flags.contains(Flags::TAKE_REPETITION)
}
#[inline]
pub(crate) fn remove_take_repetition(&mut self) {
self.flags.remove(Flags::TAKE_REPETITION)
}
#[inline]
fn is_into_separator(&self) -> bool {
self.flags.contains(Flags::INTO_SEPARATOR)
}
#[inline]
pub(crate) fn remove_into_separator(&mut self) {
self.flags.remove(Flags::INTO_SEPARATOR)
}
#[inline]
fn accepting(&self) -> bool {
self.flags.intersects(Flags::ACCEPTING)
}
}
mod stack {
use super::State;
#[derive(Clone, Debug)]
pub(super) enum Value<'a> {
None,
Return(&'a State<'a>),
Target(&'a State<'a>),
Repetition(&'a State<'a>),
}
#[derive(Debug)]
pub(super) enum Manipulation<'a> {
Push(Value<'a>),
Pop,
}
}
#[derive(Debug)]
struct Transition<'a> {
state: &'a State<'a>,
stack_manipulations: Vec<stack::Manipulation<'a>>,
took_repetition: bool,
}
#[derive(Debug)]
pub struct State<'a> {
pub attributes: Attributes<'a>,
pub c_transitions: fn(char) -> Option<&'a State<'a>>,
pub aliases: &'a [(&'a State<'a>, &'a State<'a>)],
pub graphemes: &'a [&'a State<'a>],
}
impl<'a> State<'a> {
#[inline]
fn is_into_repetition(&self) -> bool {
self.attributes.is_into_repetition()
}
#[inline]
fn is_take_repetition(&self) -> bool {
self.attributes.is_take_repetition()
}
#[inline]
fn is_into_separator(&self) -> bool {
self.attributes.is_into_separator()
}
#[inline]
fn transitions(
&'a self,
c: Option<char>,
s: stack::Value<'a>,
separator: &'a State<'a>,
) -> Vec<Transition<'a>> {
let mut result = Vec::new();
match s {
stack::Value::Repetition(repetition_state) => {
match c {
Some(c) => {
if !self.is_take_repetition() {
if let Some(state) = (self.c_transitions)(c) {
result.push(Transition {
state,
stack_manipulations: vec![],
took_repetition: false,
});
if self.is_into_repetition() {
if let Some(future_same_c_state) = (state.c_transitions)(c) {
if !state.is_into_repetition()
|| !future_same_c_state.is_take_repetition()
{
result.push(Transition {
state,
stack_manipulations: vec![
stack::Manipulation::Push(
stack::Value::Repetition(self),
),
],
took_repetition: false,
});
}
} else {
result.push(Transition {
state,
stack_manipulations: vec![stack::Manipulation::Push(
stack::Value::Repetition(self),
)],
took_repetition: false,
});
}
}
}
}
}
None => {
if self.is_into_separator() {
result.push(Transition {
state: separator,
stack_manipulations: vec![stack::Manipulation::Push(
stack::Value::Return(self),
)],
took_repetition: false,
});
}
if self.is_take_repetition() {
result.push(Transition {
state: repetition_state,
stack_manipulations: vec![
stack::Manipulation::Pop,
stack::Manipulation::Push(stack::Value::Target(self)),
],
took_repetition: true,
})
} else {
for alias in self.aliases {
result.push(Transition {
state: alias.0,
stack_manipulations: vec![stack::Manipulation::Push(
stack::Value::Return(alias.1),
)],
took_repetition: false,
});
if self.is_into_repetition() {
result.push(Transition {
state: alias.0,
stack_manipulations: vec![
stack::Manipulation::Push(stack::Value::Repetition(
self,
)),
stack::Manipulation::Push(stack::Value::Return(
alias.1,
)),
],
took_repetition: false,
})
}
}
for grapheme in self.graphemes {
result.push(Transition {
state: grapheme,
stack_manipulations: vec![],
took_repetition: false,
});
}
}
}
}
}
stack::Value::Target(target_state) => match c {
Some(c) => {
if let Some(state) = (self.c_transitions)(c) {
if state.is_take_repetition() {
if ptr::eq(state, target_state) {
result.push(Transition {
state,
stack_manipulations: vec![stack::Manipulation::Pop],
took_repetition: false,
});
if self.is_into_repetition() {
if let Some(future_same_c_state) = (state.c_transitions)(c) {
if !state.is_into_repetition()
|| !future_same_c_state.is_take_repetition()
{
result.push(Transition {
state,
stack_manipulations: vec![
stack::Manipulation::Pop,
stack::Manipulation::Push(
stack::Value::Repetition(self),
),
],
took_repetition: false,
});
}
} else {
result.push(Transition {
state,
stack_manipulations: vec![
stack::Manipulation::Pop,
stack::Manipulation::Push(
stack::Value::Repetition(self),
),
],
took_repetition: false,
});
}
}
}
} else {
result.push(Transition {
state,
stack_manipulations: vec![],
took_repetition: false,
});
if self.is_into_repetition() {
result.push(Transition {
state,
stack_manipulations: vec![stack::Manipulation::Push(
stack::Value::Repetition(self),
)],
took_repetition: false,
});
}
}
}
}
None => {
for alias in self.aliases {
if ptr::eq(alias.1, target_state) {
result.push(Transition {
state: alias.0,
stack_manipulations: vec![
stack::Manipulation::Pop,
stack::Manipulation::Push(stack::Value::Return(alias.1)),
],
took_repetition: false,
});
if self.is_into_repetition() {
result.push(Transition {
state: alias.0,
stack_manipulations: vec![
stack::Manipulation::Pop,
stack::Manipulation::Push(stack::Value::Repetition(self)),
stack::Manipulation::Push(stack::Value::Return(alias.1)),
],
took_repetition: false,
});
}
}
}
}
},
_ => match c {
Some(c) => {
if let Some(state) = (self.c_transitions)(c) {
result.push(Transition {
state,
stack_manipulations: vec![],
took_repetition: false,
});
if self.is_into_repetition() {
if let Some(future_same_c_state) = (state.c_transitions)(c) {
if !state.is_into_repetition()
|| !future_same_c_state.is_take_repetition()
{
result.push(Transition {
state,
stack_manipulations: vec![stack::Manipulation::Push(
stack::Value::Repetition(self),
)],
took_repetition: false,
});
}
} else {
result.push(Transition {
state,
stack_manipulations: vec![stack::Manipulation::Push(
stack::Value::Repetition(self),
)],
took_repetition: false,
});
}
}
}
}
None => {
if self.is_into_separator() {
result.push(Transition {
state: separator,
stack_manipulations: vec![stack::Manipulation::Push(
stack::Value::Return(self),
)],
took_repetition: false,
});
}
for alias in self.aliases {
result.push(Transition {
state: alias.0,
stack_manipulations: vec![stack::Manipulation::Push(
stack::Value::Return(alias.1),
)],
took_repetition: false,
});
if self.is_into_repetition() {
result.push(Transition {
state: alias.0,
stack_manipulations: vec![
stack::Manipulation::Push(stack::Value::Repetition(self)),
stack::Manipulation::Push(stack::Value::Return(alias.1)),
],
took_repetition: false,
});
}
}
for grapheme in self.graphemes {
result.push(Transition {
state: grapheme,
stack_manipulations: vec![],
took_repetition: false,
});
}
if self.attributes.is_return() {
if let stack::Value::Return(state) = s {
result.push(Transition {
state,
stack_manipulations: vec![stack::Manipulation::Pop],
took_repetition: false,
});
}
}
}
},
}
result
}
}
#[derive(Clone, Debug)]
pub(crate) struct InstantaneousDescription<'a> {
pub state: &'a State<'a>,
stack: Vec<stack::Value<'a>>,
start: usize,
end: usize,
separator_grapheme: bool,
took_repetition: bool,
}
impl<'a> InstantaneousDescription<'a> {
pub(crate) fn new(state: &'a State<'a>, start: usize) -> Self {
Self {
state,
stack: Vec::new(),
start,
end: start,
separator_grapheme: false,
took_repetition: false,
}
}
#[inline]
pub(crate) fn is_accepting(&self) -> bool {
self.state.attributes.accepting() && self.stack.is_empty() && !self.separator_grapheme
}
#[inline]
pub(crate) fn is_word(&self) -> bool {
self.state.attributes.is_word()
}
#[inline]
pub(crate) unsafe fn unwrap_word_unchecked(self) -> &'a str {
match self.state.attributes.word {
Some(word) => word,
None => debug_unreachable!(),
}
}
#[inline]
pub(crate) fn start(&self) -> usize {
self.start
}
#[inline]
pub(crate) fn end(&self) -> usize {
self.end
}
fn transition_with_visited(
&self,
c: Option<char>,
separator: &'a State<'a>,
visited: &mut HashSet<ByAddress<&State<'a>>>,
) -> impl Iterator<Item = InstantaneousDescription<'a>> {
let mut new_ids = Vec::new();
for transition in self
.state
.transitions(
c,
self.stack.last().unwrap_or(&stack::Value::None).clone(),
separator,
)
.iter()
{
if !visited.contains(&ByAddress(transition.state))
|| transition.state.attributes.is_return()
{
let mut new_id = self.clone();
new_id.state = transition.state;
for manipulation in &transition.stack_manipulations {
match manipulation {
stack::Manipulation::Push(value) => new_id.stack.push(value.clone()),
stack::Manipulation::Pop => {
new_id.stack.pop();
}
}
}
if transition.took_repetition {
new_id.took_repetition = true;
}
visited.insert(ByAddress(transition.state));
new_ids.extend(new_id.transition_with_visited(None, separator, visited));
visited.remove(&ByAddress(transition.state));
new_ids.push(new_id);
}
}
new_ids.into_iter()
}
#[inline]
pub(crate) fn transition(
&self,
c: Option<char>,
separator: &'a State<'a>,
) -> impl Iterator<Item = InstantaneousDescription<'a>> {
self.transition_with_visited(c, separator, &mut HashSet::new())
}
pub(crate) fn step(
mut self,
c: char,
separator: &'a State<'a>,
new_grapheme: bool,
) -> impl Iterator<Item = InstantaneousDescription<'a>> {
self.end += c.len_utf8();
if new_grapheme {
self.separator_grapheme = if self.separator_grapheme && self.took_repetition {
true
} else {
self.state.attributes.is_separator()
};
}
self.took_repetition = false;
self.transition(Some(c), separator)
}
}
impl RangeBounds<usize> for InstantaneousDescription<'_> {
#[inline]
fn start_bound(&self) -> Bound<&usize> {
Bound::Included(&self.start)
}
#[inline]
fn end_bound(&self) -> Bound<&usize> {
if self.state.attributes.is_exception() {
Bound::Included(&self.end)
} else {
Bound::Excluded(&self.end)
}
}
}