vyre_std/pattern/types.rs
1//! Shared types for the vyre-std DFA assembly pipeline.
2//!
3//! The pipeline stages pass these types between each other:
4//!
5//! ```text
6//! regex_to_nfa → Nfa → nfa_to_dfa → Dfa → dfa_minimize → Dfa → dfa_pack → PackedDfa
7//! ```
8//!
9//! Each stage is a pure function; the final `PackedDfa` is a POD byte
10//! layout suitable for direct upload to a GPU buffer.
11
12/// Sentinel state id used to mark "no transition on this byte."
13pub const INVALID_STATE: u32 = u32::MAX;
14
15/// Construction error shared across the pipeline.
16#[derive(Debug, Clone, PartialEq, Eq)]
17#[non_exhaustive]
18pub enum PatternError {
19 /// Regex source contained a character outside the supported subset or a
20 /// malformed sequence.
21 ///
22 /// Carries the byte offset and a human-readable description. Every
23 /// message starts with `Fix:` so consumers can surface the remediation
24 /// directly to end users.
25 ParseError {
26 /// Byte offset at which the parser gave up.
27 offset: usize,
28 /// Actionable `Fix:` prose describing what went wrong.
29 message: String,
30 },
31 /// Regex source used an escape sequence reserved for regex shorthand or
32 /// anchor semantics that this byte-oriented frontend does not implement.
33 UnsupportedEscape {
34 /// Byte offset of the leading backslash.
35 offset: usize,
36 /// Escaped ASCII byte after the backslash.
37 escape: char,
38 },
39 /// Regex source exceeded the parser's bounded recursive group nesting.
40 RecursionLimit {
41 /// Maximum group nesting depth permitted.
42 limit: usize,
43 },
44 /// The NFA or DFA exceeded a configured state budget.
45 StateOverflow {
46 /// Maximum state count permitted.
47 limit: u32,
48 },
49 /// The pattern set was empty or otherwise structurally invalid for
50 /// composite assembly (e.g., mixing empty regex with literal patterns).
51 EmptyPatternSet,
52}
53
54impl core::fmt::Display for PatternError {
55 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
56 match self {
57 Self::ParseError { offset, message } => {
58 write!(f, "pattern parse error at byte {offset}: {message}")
59 }
60 Self::UnsupportedEscape { offset, escape } => {
61 write!(
62 f,
63 "pattern parse error at byte {offset}: Fix: unsupported escape `\\{escape}`; spell the byte set explicitly with a character class."
64 )
65 }
66 Self::RecursionLimit { limit } => {
67 write!(
68 f,
69 "Fix: regex group nesting exceeded the recursion limit ({limit}); flatten or split the expression."
70 )
71 }
72 Self::StateOverflow { limit } => {
73 write!(
74 f,
75 "Fix: state count exceeded the pipeline budget ({limit}); split the pattern set into smaller batches."
76 )
77 }
78 Self::EmptyPatternSet => write!(
79 f,
80 "Fix: pattern set must contain at least one non-empty pattern."
81 ),
82 }
83 }
84}
85
86impl std::error::Error for PatternError {}
87
88/// Thompson NFA state identifier.
89pub type NfaStateId = u32;
90
91/// NFA transition: `(from, label, to)` where `None` label is epsilon.
92#[derive(Debug, Clone, PartialEq, Eq)]
93pub struct NfaEdge {
94 /// Source state id.
95 pub from: NfaStateId,
96 /// Byte label, or `None` for an epsilon transition.
97 pub byte: Option<u8>,
98 /// Destination state id.
99 pub to: NfaStateId,
100}
101
102/// Non-deterministic finite automaton in Thompson form.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct Nfa {
105 /// Total state count; states are `0..state_count`.
106 pub state_count: u32,
107 /// Edges including both byte-labelled and epsilon transitions.
108 pub edges: Vec<NfaEdge>,
109 /// Index of the start state. Always present for a well-formed NFA.
110 pub start: NfaStateId,
111 /// Accept-state bitmap.
112 pub accept: Vec<bool>,
113}
114
115/// Deterministic finite automaton with a dense transition table.
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub struct Dfa {
118 /// Total state count.
119 pub state_count: u32,
120 /// Transitions laid out row-major: `transitions[state * 256 + byte]`.
121 /// Values equal to [`INVALID_STATE`] mean "no transition."
122 pub transitions: Vec<u32>,
123 /// Start state id.
124 pub start: u32,
125 /// Accept-state bitmap.
126 pub accept: Vec<bool>,
127}
128
129impl Dfa {
130 /// Read the transition for `(state, byte)`.
131 ///
132 /// Returns [`INVALID_STATE`] when no transition is defined.
133 #[must_use]
134 #[inline]
135 pub fn go(&self, state: u32, byte: u8) -> u32 {
136 self.transitions[(state as usize) * 256 + byte as usize]
137 }
138
139 /// Write a transition for `(state, byte)`.
140 #[inline]
141 pub fn set(&mut self, state: u32, byte: u8, next: u32) {
142 self.transitions[(state as usize) * 256 + byte as usize] = next;
143 }
144}
145
146/// Packing format selector for [`super::dfa_pack::dfa_pack`].
147#[derive(Debug, Clone, Copy, PartialEq, Eq)]
148#[non_exhaustive]
149pub enum DfaPackFormat {
150 /// Dense row-major: fastest scan, largest memory. 256 × 4 bytes per state.
151 Dense,
152 /// Byte equivalence-class compression: smallest when effective alphabet
153 /// is narrow. Emits a 256-entry class table + `state × num_classes`.
154 EquivClass,
155}
156
157/// Packed DFA ready for upload to a GPU buffer.
158#[derive(Debug, Clone, PartialEq, Eq)]
159pub struct PackedDfa {
160 /// Which format the bytes encode.
161 pub format: DfaPackFormat,
162 /// Number of DFA states before packing.
163 pub state_count: u32,
164 /// Start state id.
165 pub start: u32,
166 /// Raw serialized bytes (GPU-uploadable).
167 pub bytes: Vec<u8>,
168}