use std::cell::Cell;
use std::num::NonZeroU32;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::compiler::Atom;
pub(crate) mod bitmapset;
pub(crate) mod fast;
pub(crate) mod hir;
pub(crate) mod parser;
pub(crate) mod thompson;
pub(crate) const DEFAULT_SCAN_LIMIT: u16 = 4096;
pub(crate) const MAX_ALTERNATIVES: usize = 255;
#[derive(Error, Debug)]
pub(crate) enum Error {
#[error("regexp is too large")]
TooLarge,
#[error("too many alternatives in alternation (max: 255)")]
TooManyAlternatives,
#[error("regexp is incompatible with FastVM")]
FastIncompatible,
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub(crate) struct RegexpAtom {
pub atom: Atom,
pub fwd_code: Option<FwdCodeLoc>,
pub bck_code: Option<BckCodeLoc>,
}
impl RegexpAtom {
#[inline]
pub fn make_wide(mut self) -> Self {
self.atom = self.atom.make_wide();
self
}
#[inline]
pub fn set_exact(&mut self, yes: bool) -> &mut Self {
self.atom.set_exact(yes);
self
}
#[inline]
pub fn len(&self) -> usize {
self.atom.len()
}
}
pub(crate) trait CodeLoc: From<usize> {
fn location(&self) -> usize;
fn backwards(&self) -> bool;
}
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
pub(crate) struct FwdCodeLoc(NonZeroU32);
impl From<usize> for FwdCodeLoc {
fn from(value: usize) -> Self {
let value: u32 = value.try_into().unwrap();
Self(NonZeroU32::new(value + 1).unwrap())
}
}
impl CodeLoc for FwdCodeLoc {
#[inline]
fn location(&self) -> usize {
self.0.get() as usize - 1
}
#[inline]
fn backwards(&self) -> bool {
false
}
}
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
pub(crate) struct BckCodeLoc(NonZeroU32);
impl From<usize> for BckCodeLoc {
fn from(value: usize) -> Self {
let value: u32 = value.try_into().unwrap();
Self(NonZeroU32::new(value + 1).unwrap())
}
}
impl CodeLoc for BckCodeLoc {
#[inline]
fn location(&self) -> usize {
self.0.get() as usize - 1
}
#[inline]
fn backwards(&self) -> bool {
true
}
}
pub(crate) enum Action {
Continue,
Stop,
}
struct WideIter<'a, I>
where
I: Iterator<Item = &'a u8>,
{
iter: I,
error_pos: &'a Cell<Option<usize>>,
valid_pairs: usize,
zero_first: bool,
}
impl<'a, I> WideIter<'a, I>
where
I: Iterator<Item = &'a u8>,
{
pub fn non_zero_first(iter: I, error_pos: &'a Cell<Option<usize>>) -> Self
where
I: Iterator<Item = &'a u8>,
{
WideIter { iter, error_pos, valid_pairs: 0, zero_first: false }
}
pub fn zero_first(iter: I, error_pos: &'a Cell<Option<usize>>) -> Self
where
I: Iterator<Item = &'a u8>,
{
WideIter { iter, error_pos, valid_pairs: 0, zero_first: true }
}
}
impl<'a, I> Iterator for WideIter<'a, I>
where
I: Iterator<Item = &'a u8>,
{
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item> {
let first_byte = self.iter.next()?;
let second_byte = self.iter.next()?;
if self.zero_first {
if *first_byte != 0_u8 && self.error_pos.get().is_none() {
self.error_pos.set(Some(self.valid_pairs));
}
self.valid_pairs += 1;
Some(second_byte)
} else {
if *second_byte != 0_u8 && self.error_pos.get().is_none() {
self.error_pos.set(Some(self.valid_pairs));
}
self.valid_pairs += 1;
Some(first_byte)
}
}
}