use crate::nfa::StateId;
use std::collections::{BinaryHeap, HashMap};
use std::sync::Arc;
#[derive(Debug)]
pub struct PendingThread {
pub pos: usize,
pub thread: Thread,
}
impl PartialEq for PendingThread {
fn eq(&self, other: &Self) -> bool {
self.pos == other.pos
}
}
impl Eq for PendingThread {}
impl PartialOrd for PendingThread {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for PendingThread {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
other.pos.cmp(&self.pos)
}
}
#[derive(Debug)]
pub struct PikeVmContext {
pub current_threads: Vec<Thread>,
pub next_threads: Vec<Thread>,
pub future_threads: BinaryHeap<PendingThread>,
pub visited: Vec<usize>,
pub generation: usize,
#[allow(dead_code)]
pub capture_slots: Vec<Option<(usize, usize)>>,
pub epsilon_stack: Vec<Thread>,
pub lookaround_cache: HashMap<(StateId, usize), bool>,
}
impl PikeVmContext {
pub fn new(capture_count: usize, state_count: usize) -> Self {
Self {
current_threads: Vec::with_capacity(32),
next_threads: Vec::with_capacity(32),
future_threads: BinaryHeap::new(),
visited: vec![0; state_count],
generation: 0,
capture_slots: vec![None; capture_count + 1],
epsilon_stack: Vec::with_capacity(32),
lookaround_cache: HashMap::new(),
}
}
#[inline]
pub fn reset(&mut self) {
self.current_threads.clear();
self.next_threads.clear();
self.future_threads.clear();
self.epsilon_stack.clear();
self.generation = self.generation.wrapping_add(1);
for slot in &mut self.capture_slots {
*slot = None;
}
self.lookaround_cache.clear();
}
#[inline]
pub fn ensure_state_capacity(&mut self, state_count: usize) {
if self.visited.len() < state_count {
self.visited.resize(state_count, 0);
}
}
}
#[derive(Debug, Clone)]
pub enum CaptureAction {
Start(u32, usize),
End(u32, usize),
}
#[derive(Debug, Clone)]
pub struct CaptureNode {
pub action: CaptureAction,
pub parent: Option<Arc<CaptureNode>>,
}
impl CaptureNode {
#[inline]
pub fn new(action: CaptureAction, parent: Option<Arc<CaptureNode>>) -> Arc<Self> {
Arc::new(Self { action, parent })
}
}
#[derive(Debug, Clone)]
pub struct Thread {
pub state: StateId,
pub capture_head: Option<Arc<CaptureNode>>,
pub capture_count: usize,
pub non_greedy_exit: bool,
}
impl Thread {
#[inline]
pub fn new(state: StateId, capture_count: usize) -> Self {
Self {
state,
capture_head: None,
capture_count,
non_greedy_exit: false,
}
}
#[inline]
pub fn clone_with_state(&self, state: StateId) -> Self {
Self {
state,
capture_head: self.capture_head.clone(), capture_count: self.capture_count,
non_greedy_exit: self.non_greedy_exit,
}
}
#[inline]
pub fn record_capture_start(&mut self, group_idx: u32, pos: usize) {
let node = CaptureNode::new(
CaptureAction::Start(group_idx, pos),
self.capture_head.take(),
);
self.capture_head = Some(node);
}
#[inline]
pub fn record_capture_end(&mut self, group_idx: u32, pos: usize) {
let node = CaptureNode::new(CaptureAction::End(group_idx, pos), self.capture_head.take());
self.capture_head = Some(node);
}
pub fn reconstruct_captures(&self) -> Vec<Option<(usize, usize)>> {
let mut captures = vec![None; self.capture_count + 1];
let mut actions = Vec::new();
let mut current = self.capture_head.as_ref();
while let Some(node) = current {
actions.push(&node.action);
current = node.parent.as_ref();
}
for action in actions.into_iter().rev() {
match action {
CaptureAction::Start(idx, pos) => {
let idx = *idx as usize;
if idx < captures.len() {
captures[idx] = Some((*pos, *pos));
}
}
CaptureAction::End(idx, pos) => {
let idx = *idx as usize;
if idx < captures.len() {
if let Some((start, _)) = captures[idx] {
captures[idx] = Some((start, *pos));
}
}
}
}
}
captures
}
pub fn get_capture(&self, group_idx: u32) -> Option<(usize, usize)> {
let mut start: Option<usize> = None;
let mut end: Option<usize> = None;
let mut current = self.capture_head.as_ref();
while let Some(node) = current {
match &node.action {
CaptureAction::Start(idx, pos) if *idx == group_idx => {
if start.is_none() {
start = Some(*pos);
}
}
CaptureAction::End(idx, pos) if *idx == group_idx => {
if end.is_none() {
end = Some(*pos);
}
}
_ => {}
}
if start.is_some() && end.is_some() {
break;
}
current = node.parent.as_ref();
}
match (start, end) {
(Some(s), Some(e)) => Some((s, e)),
_ => None,
}
}
}
pub enum InstructionResult {
Continue,
Kill,
Jump(usize),
NonGreedyExit,
CodepointTransition {
bytes_consumed: usize,
target: StateId,
},
}
#[inline]
pub fn decode_utf8_codepoint(bytes: &[u8]) -> Option<(u32, usize)> {
if bytes.is_empty() {
return None;
}
let first = bytes[0];
if first < 0x80 {
return Some((first as u32, 1));
}
if first < 0xC0 {
return None;
}
if first < 0xE0 {
if bytes.len() < 2 {
return None;
}
let b1 = bytes[1];
if (b1 & 0xC0) != 0x80 {
return None;
}
let cp = ((first as u32 & 0x1F) << 6) | (b1 as u32 & 0x3F);
return Some((cp, 2));
}
if first < 0xF0 {
if bytes.len() < 3 {
return None;
}
let b1 = bytes[1];
let b2 = bytes[2];
if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 {
return None;
}
let cp = ((first as u32 & 0x0F) << 12) | ((b1 as u32 & 0x3F) << 6) | (b2 as u32 & 0x3F);
return Some((cp, 3));
}
if first < 0xF8 {
if bytes.len() < 4 {
return None;
}
let b1 = bytes[1];
let b2 = bytes[2];
let b3 = bytes[3];
if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80 {
return None;
}
let cp = ((first as u32 & 0x07) << 18)
| ((b1 as u32 & 0x3F) << 12)
| ((b2 as u32 & 0x3F) << 6)
| (b3 as u32 & 0x3F);
return Some((cp, 4));
}
None
}