Skip to main content

ternary_regex/
lib.rs

1//! # ternary-regex
2//!
3//! Pattern matching on ternary sequences (`-1`, `0`, `+1`). Provides NFA and DFA construction
4//! from ternary patterns, NFA→DFA conversion with minimization, wildcard support, and
5//! stream matching against ternary input.
6
7#![forbid(unsafe_code)]
8
9/// A ternary value: Negative (-1), Zero (0), or Positive (+1).
10#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
11pub enum Ternary {
12    Neg,
13    Zero,
14    Pos,
15}
16
17impl Ternary {
18    pub fn to_i8(self) -> i8 {
19        match self {
20            Ternary::Neg => -1,
21            Ternary::Zero => 0,
22            Ternary::Pos => 1,
23        }
24    }
25}
26
27/// A single element in a ternary pattern.
28#[derive(Clone, Copy, Debug, PartialEq, Eq)]
29pub enum PatternElem {
30    /// Match exactly this ternary value.
31    Exact(Ternary),
32    /// Match any ternary value (wildcard).
33    Any,
34    /// Match either of two values.
35    Alt(Ternary, Ternary),
36    /// Match anything except this value.
37    Not(Ternary),
38}
39
40/// A compiled pattern: a sequence of PatternElems.
41#[derive(Clone, Debug)]
42pub struct TernaryPattern {
43    pub elements: Vec<PatternElem>,
44}
45
46impl TernaryPattern {
47    /// Create a pattern from a slice of PatternElems.
48    pub fn new(elements: Vec<PatternElem>) -> Self {
49        TernaryPattern { elements }
50    }
51
52    /// Create an exact-match pattern from a ternary sequence.
53    pub fn exact(seq: &[Ternary]) -> Self {
54        TernaryPattern {
55            elements: seq.iter().map(|&t| PatternElem::Exact(t)).collect(),
56        }
57    }
58
59    /// Check if an element matches a ternary value.
60    pub fn elem_matches(elem: PatternElem, val: Ternary) -> bool {
61        match elem {
62            PatternElem::Exact(t) => t == val,
63            PatternElem::Any => true,
64            PatternElem::Alt(a, b) => val == a || val == b,
65            PatternElem::Not(t) => val != t,
66        }
67    }
68}
69
70// ---- NFA ----
71
72/// An NFA state ID.
73type NfaState = usize;
74
75/// A transition in the NFA: from state, on matching element, to state.
76#[derive(Clone, Debug)]
77struct NfaTransition {
78    elem: PatternElem,
79    target: NfaState,
80}
81
82/// A ternary NFA for pattern matching.
83#[derive(Clone, Debug)]
84pub struct TernaryNFA {
85    /// Number of states.
86    pub state_count: usize,
87    /// Transitions: transitions[state] = list of (element, target).
88    transitions: Vec<Vec<NfaTransition>>,
89    /// Epsilon transitions.
90    epsilon: Vec<Vec<NfaState>>,
91    /// Start state.
92    pub start: NfaState,
93    /// Accept states.
94    pub accept: Vec<bool>,
95}
96
97impl TernaryNFA {
98    /// Create a new NFA with `n` states.
99    pub fn new(n: usize) -> Self {
100        TernaryNFA {
101            state_count: n,
102            transitions: vec![Vec::new(); n],
103            epsilon: vec![Vec::new(); n],
104            start: 0,
105            accept: vec![false; n],
106        }
107    }
108
109    /// Add a transition from `from` to `to` on `elem`.
110    pub fn add_transition(&mut self, from: NfaState, elem: PatternElem, to: NfaState) {
111        self.transitions[from].push(NfaTransition { elem, target: to });
112    }
113
114    /// Add an epsilon transition from `from` to `to`.
115    pub fn add_epsilon(&mut self, from: NfaState, to: NfaState) {
116        self.epsilon[from].push(to);
117    }
118
119    /// Set state `s` as accepting.
120    pub fn set_accept(&mut self, s: NfaState) {
121        self.accept[s] = true;
122    }
123
124    /// Compute epsilon closure of a set of states.
125    pub fn epsilon_closure(&self, states: &[NfaState]) -> Vec<NfaState> {
126        let mut closure = states.to_vec();
127        let mut idx = 0;
128        while idx < closure.len() {
129            let s = closure[idx];
130            for &t in &self.epsilon[s] {
131                if !closure.contains(&t) {
132                    closure.push(t);
133                }
134            }
135            idx += 1;
136        }
137        closure.sort();
138        closure.dedup();
139        closure
140    }
141
142    /// Compute the set of states reachable from `states` on input `val`.
143    pub fn move_on(&self, states: &[NfaState], val: Ternary) -> Vec<NfaState> {
144        let mut result = Vec::new();
145        for &s in states {
146            for tr in &self.transitions[s] {
147                if TernaryPattern::elem_matches(tr.elem, val) {
148                    if !result.contains(&tr.target) {
149                        result.push(tr.target);
150                    }
151                }
152            }
153        }
154        result.sort();
155        result.dedup();
156        result
157    }
158
159    /// Test if the NFA accepts the given input sequence.
160    pub fn accepts(&self, input: &[Ternary]) -> bool {
161        let mut current = self.epsilon_closure(&[self.start]);
162        for &val in input {
163            let moved = self.move_on(&current, val);
164            current = self.epsilon_closure(&moved);
165            if current.is_empty() {
166                return false;
167            }
168        }
169        current.iter().any(|&s| self.accept[s])
170    }
171
172    /// Compile a TernaryPattern into an NFA using Thompson's construction.
173    pub fn from_pattern(pattern: &TernaryPattern) -> Self {
174        let n = pattern.elements.len();
175        let mut nfa = TernaryNFA::new(n + 1);
176        for (i, elem) in pattern.elements.iter().enumerate() {
177            nfa.add_transition(i, *elem, i + 1);
178        }
179        nfa.set_accept(n);
180        nfa
181    }
182}
183
184// ---- DFA ----
185
186/// A DFA state, represented as a sorted set of NFA states.
187type DfaState = Vec<NfaState>;
188
189/// A ternary DFA for pattern matching.
190#[derive(Clone, Debug)]
191pub struct TernaryDFA {
192    /// DFA transitions: transitions[(state)][value] = target state index.
193    /// State is represented as a sorted Vec<NfaState>.
194    pub transitions: Vec<[usize; 3]>, // indexed by: 0=Neg, 1=Zero, 2=Pos
195    /// Accept states.
196    pub accept: Vec<bool>,
197    /// Start state index.
198    pub start: usize,
199    /// State labels (sets of NFA states for each DFA state).
200    pub state_labels: Vec<DfaState>,
201}
202
203/// Convert a ternary value to an index (0=Neg, 1=Zero, 2=Pos).
204fn ternary_index(t: Ternary) -> usize {
205    match t {
206        Ternary::Neg => 0,
207        Ternary::Zero => 1,
208        Ternary::Pos => 2,
209    }
210}
211
212impl TernaryDFA {
213    /// Convert an NFA to a DFA using subset construction.
214    pub fn from_nfa(nfa: &TernaryNFA) -> Self {
215        let mut dfa = TernaryDFA {
216            transitions: Vec::new(),
217            accept: Vec::new(),
218            start: 0,
219            state_labels: Vec::new(),
220        };
221
222        let start_state = nfa.epsilon_closure(&[nfa.start]);
223        dfa.state_labels.push(start_state.clone());
224        dfa.start = 0;
225        dfa.accept.push(start_state.iter().any(|&s| nfa.accept[s]));
226
227        let all_ternary = [Ternary::Neg, Ternary::Zero, Ternary::Pos];
228        let mut worklist = vec![0];
229
230        while let Some(current_idx) = worklist.pop() {
231            // Ensure transitions vec is large enough
232            while dfa.transitions.len() <= current_idx {
233                dfa.transitions.push([0; 3]);
234            }
235
236            let current_state = dfa.state_labels[current_idx].clone();
237
238            for &val in &all_ternary {
239                let moved = nfa.move_on(&current_state, val);
240                let closure = nfa.epsilon_closure(&moved);
241
242                if closure.is_empty() {
243                    // Dead state
244                    dfa.transitions[current_idx][ternary_index(val)] = usize::MAX;
245                    continue;
246                }
247
248                if let Some(existing) = dfa.state_labels.iter().position(|s| *s == closure) {
249                    dfa.transitions[current_idx][ternary_index(val)] = existing;
250                } else {
251                    let new_idx = dfa.state_labels.len();
252                    dfa.state_labels.push(closure.clone());
253                    dfa.accept.push(closure.iter().any(|&s| nfa.accept[s]));
254                    dfa.transitions.push([0; 3]);
255                    dfa.transitions[current_idx][ternary_index(val)] = new_idx;
256                    worklist.push(new_idx);
257                }
258            }
259        }
260
261        dfa
262    }
263
264    /// Test if the DFA accepts the given input sequence.
265    pub fn accepts(&self, input: &[Ternary]) -> bool {
266        let mut current = self.start;
267        for &val in input {
268            let idx = ternary_index(val);
269            if current >= self.transitions.len() || self.transitions[current][idx] == usize::MAX {
270                return false;
271            }
272            current = self.transitions[current][idx];
273        }
274        current < self.accept.len() && self.accept[current]
275    }
276
277    /// Minimize the DFA using Hopcroft's algorithm (partition refinement).
278    pub fn minimize(&self) -> TernaryDFA {
279        let n = self.state_labels.len();
280        if n <= 1 {
281            return self.clone();
282        }
283
284        // Partition into accepting and non-accepting
285        let mut partition: Vec<Vec<usize>> = Vec::new();
286        let mut accept_set = Vec::new();
287        let mut reject_set = Vec::new();
288        for i in 0..n {
289            if self.accept[i] {
290                accept_set.push(i);
291            } else {
292                reject_set.push(i);
293            }
294        }
295        if !accept_set.is_empty() {
296            partition.push(accept_set);
297        }
298        if !reject_set.is_empty() {
299            partition.push(reject_set);
300        }
301
302        // Find which partition a state belongs to
303        let mut state_partition = vec![0usize; n];
304        for (p_idx, part) in partition.iter().enumerate() {
305            for &s in part {
306                state_partition[s] = p_idx;
307            }
308        }
309
310        // Refine partitions
311        let mut changed = true;
312        while changed {
313            changed = false;
314            let mut new_partition = Vec::new();
315            for part in &partition {
316                if part.len() <= 1 {
317                    new_partition.push(part.clone());
318                    continue;
319                }
320
321                // Split by transition signatures
322                let mut groups: std::collections::HashMap<Vec<usize>, Vec<usize>> = std::collections::HashMap::new();
323                for &s in part {
324                    let sig: Vec<usize> = [Ternary::Neg, Ternary::Zero, Ternary::Pos].iter().map(|&val| {
325                        let idx = ternary_index(val);
326                        if s < self.transitions.len() && self.transitions[s][idx] != usize::MAX {
327                            state_partition[self.transitions[s][idx]]
328                        } else {
329                            usize::MAX
330                        }
331                    }).collect();
332                    groups.entry(sig).or_default().push(s);
333                }
334
335                if groups.len() > 1 {
336                    changed = true;
337                }
338                for (_, group) in groups {
339                    new_partition.push(group);
340                }
341            }
342
343            partition = new_partition;
344            for (p_idx, part) in partition.iter().enumerate() {
345                for &s in part {
346                    state_partition[s] = p_idx;
347                }
348            }
349        }
350
351        // Build minimized DFA
352        let minimized_n = partition.len();
353        let mut min_dfa = TernaryDFA {
354            transitions: vec![[usize::MAX; 3]; minimized_n],
355            accept: vec![false; minimized_n],
356            start: state_partition[self.start],
357            state_labels: Vec::new(),
358        };
359
360        // Pick representative from each partition
361        for (p_idx, part) in partition.iter().enumerate() {
362            let rep = part[0];
363            min_dfa.accept[p_idx] = self.accept[rep];
364            min_dfa.state_labels.push(part.clone());
365
366            for &val in &[Ternary::Neg, Ternary::Zero, Ternary::Pos] {
367                let idx = ternary_index(val);
368                if rep < self.transitions.len() && self.transitions[rep][idx] != usize::MAX {
369                    min_dfa.transitions[p_idx][idx] = state_partition[self.transitions[rep][idx]];
370                }
371            }
372        }
373
374        min_dfa
375    }
376
377    /// Find all matches of the pattern in the given ternary stream.
378    /// Returns starting positions of all matches (prefix matching).
379    pub fn find_all(&self, input: &[Ternary]) -> Vec<usize> {
380        let mut matches = Vec::new();
381        for start in 0..input.len() {
382            // Try to match the pattern as a prefix of input[start..]
383            let mut current = self.start;
384            for (offset, &val) in input[start..].iter().enumerate() {
385                let idx = ternary_index(val);
386                if current >= self.transitions.len() || self.transitions[current][idx] == usize::MAX {
387                    break;
388                }
389                current = self.transitions[current][idx];
390                if current < self.accept.len() && self.accept[current] {
391                    matches.push(start);
392                    break;
393                }
394            }
395        }
396        matches
397    }
398}
399
400/// Convenience: compile a pattern to a minimized DFA.
401pub fn compile(pattern: &TernaryPattern) -> TernaryDFA {
402    let nfa = TernaryNFA::from_pattern(pattern);
403    let dfa = TernaryDFA::from_nfa(&nfa);
404    dfa.minimize()
405}
406
407/// Convenience: match a pattern against a ternary sequence.
408pub fn matches(pattern: &TernaryPattern, input: &[Ternary]) -> bool {
409    let dfa = compile(pattern);
410    dfa.accepts(input)
411}
412
413/// Convenience: find all occurrences of a pattern in a ternary stream.
414pub fn find_matches(pattern: &TernaryPattern, input: &[Ternary]) -> Vec<usize> {
415    let dfa = compile(pattern);
416    dfa.find_all(input)
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    fn t(v: i8) -> Ternary {
424        match v {
425            -1 => Ternary::Neg,
426            0 => Ternary::Zero,
427            _ => Ternary::Pos,
428        }
429    }
430
431    fn ts(vals: &[i8]) -> Vec<Ternary> {
432        vals.iter().map(|&v| t(v)).collect()
433    }
434
435    #[test]
436    fn test_pattern_exact_match() {
437        let pattern = TernaryPattern::exact(&ts(&[1, 0, -1]));
438        assert!(matches(&pattern, &ts(&[1, 0, -1])));
439        assert!(!matches(&pattern, &ts(&[1, 0, 0])));
440    }
441
442    #[test]
443    fn test_pattern_wildcard() {
444        let pattern = TernaryPattern::new(vec![
445            PatternElem::Exact(Ternary::Pos),
446            PatternElem::Any,
447            PatternElem::Exact(Ternary::Neg),
448        ]);
449        assert!(matches(&pattern, &ts(&[1, 0, -1])));
450        assert!(matches(&pattern, &ts(&[1, 1, -1])));
451        assert!(matches(&pattern, &ts(&[1, -1, -1])));
452        assert!(!matches(&pattern, &ts(&[0, 0, -1])));
453    }
454
455    #[test]
456    fn test_pattern_alt() {
457        let pattern = TernaryPattern::new(vec![
458            PatternElem::Alt(Ternary::Pos, Ternary::Zero),
459            PatternElem::Exact(Ternary::Neg),
460        ]);
461        assert!(matches(&pattern, &ts(&[1, -1])));
462        assert!(matches(&pattern, &ts(&[0, -1])));
463        assert!(!matches(&pattern, &ts(&[-1, -1])));
464    }
465
466    #[test]
467    fn test_pattern_not() {
468        let pattern = TernaryPattern::new(vec![
469            PatternElem::Not(Ternary::Neg),
470            PatternElem::Exact(Ternary::Pos),
471        ]);
472        assert!(matches(&pattern, &ts(&[1, 1])));
473        assert!(matches(&pattern, &ts(&[0, 1])));
474        assert!(!matches(&pattern, &ts(&[-1, 1])));
475    }
476
477    #[test]
478    fn test_nfa_basic() {
479        let mut nfa = TernaryNFA::new(3);
480        nfa.add_transition(0, PatternElem::Exact(Ternary::Pos), 1);
481        nfa.add_transition(1, PatternElem::Exact(Ternary::Neg), 2);
482        nfa.set_accept(2);
483        assert!(nfa.accepts(&ts(&[1, -1])));
484        assert!(!nfa.accepts(&ts(&[1, 0])));
485        assert!(!nfa.accepts(&ts(&[1, -1, 0])));
486    }
487
488    #[test]
489    fn test_nfa_epsilon() {
490        let mut nfa = TernaryNFA::new(3);
491        nfa.add_transition(0, PatternElem::Exact(Ternary::Pos), 1);
492        nfa.add_epsilon(1, 2);
493        nfa.set_accept(2);
494        // "Pos" should reach state 2 via epsilon
495        assert!(nfa.accepts(&ts(&[1])));
496    }
497
498    #[test]
499    fn test_nfa_from_pattern() {
500        let pattern = TernaryPattern::new(vec![
501            PatternElem::Exact(Ternary::Pos),
502            PatternElem::Any,
503            PatternElem::Exact(Ternary::Neg),
504        ]);
505        let nfa = TernaryNFA::from_pattern(&pattern);
506        assert!(nfa.accepts(&ts(&[1, 0, -1])));
507        assert!(nfa.accepts(&ts(&[1, 1, -1])));
508        assert!(!nfa.accepts(&ts(&[0, 0, -1])));
509    }
510
511    #[test]
512    fn test_dfa_from_nfa() {
513        let pattern = TernaryPattern::new(vec![
514            PatternElem::Exact(Ternary::Pos),
515            PatternElem::Exact(Ternary::Zero),
516        ]);
517        let nfa = TernaryNFA::from_pattern(&pattern);
518        let dfa = TernaryDFA::from_nfa(&nfa);
519        assert!(dfa.accepts(&ts(&[1, 0])));
520        assert!(!dfa.accepts(&ts(&[1, 1])));
521        assert!(!dfa.accepts(&ts(&[0, 0])));
522    }
523
524    #[test]
525    fn test_dfa_minimize() {
526        // Create an NFA with potential for redundant states
527        let pattern = TernaryPattern::new(vec![
528            PatternElem::Any,
529            PatternElem::Any,
530        ]);
531        let nfa = TernaryNFA::from_pattern(&pattern);
532        let dfa = TernaryDFA::from_nfa(&nfa);
533        let min = dfa.minimize();
534        // Minimized DFA should accept the same inputs
535        assert!(min.accepts(&ts(&[1, 0])));
536        assert!(min.accepts(&ts(&[-1, -1])));
537        assert!(!min.accepts(&ts(&[1])));
538        assert!(!min.accepts(&ts(&[1, 0, -1])));
539    }
540
541    #[test]
542    fn test_dfa_minimization_reduces_states() {
543        let pattern = TernaryPattern::new(vec![
544            PatternElem::Exact(Ternary::Pos),
545            PatternElem::Exact(Ternary::Neg),
546        ]);
547        let nfa = TernaryNFA::from_pattern(&pattern);
548        let dfa = TernaryDFA::from_nfa(&nfa);
549        let min = dfa.minimize();
550        // The minimal DFA should have fewer or equal states
551        assert!(min.transitions.len() <= dfa.transitions.len());
552    }
553
554    #[test]
555    fn test_find_all_matches() {
556        let pattern = TernaryPattern::exact(&ts(&[1, -1]));
557        let dfa = compile(&pattern);
558        let input = ts(&[1, -1, 0, 1, -1, 1, 0]);
559        let found = dfa.find_all(&input);
560        assert_eq!(found, vec![0, 3]);
561    }
562
563    #[test]
564    fn test_find_all_no_matches() {
565        let pattern = TernaryPattern::exact(&ts(&[1, 1]));
566        let found = find_matches(&pattern, &ts(&[-1, 0, -1, 0]));
567        assert!(found.is_empty());
568    }
569
570    #[test]
571    fn test_find_all_overlapping() {
572        let pattern = TernaryPattern::exact(&ts(&[1]));
573        let found = find_matches(&pattern, &ts(&[1, 1, 1]));
574        assert_eq!(found, vec![0, 1, 2]);
575    }
576
577    #[test]
578    fn test_empty_pattern() {
579        let pattern = TernaryPattern::new(vec![]);
580        let dfa = compile(&pattern);
581        assert!(dfa.accepts(&ts(&[])));
582        assert!(!dfa.accepts(&ts(&[1])));
583    }
584
585    #[test]
586    fn test_convenience_matches() {
587        let pattern = TernaryPattern::exact(&ts(&[0, 1, -1]));
588        assert!(matches(&pattern, &ts(&[0, 1, -1])));
589        assert!(!matches(&pattern, &ts(&[0, 1, 0])));
590    }
591
592    #[test]
593    fn test_nfa_epsilon_closure() {
594        let mut nfa = TernaryNFA::new(4);
595        nfa.add_epsilon(0, 1);
596        nfa.add_epsilon(1, 2);
597        nfa.add_epsilon(2, 3);
598        let closure = nfa.epsilon_closure(&[0]);
599        assert_eq!(closure, vec![0, 1, 2, 3]);
600    }
601
602    #[test]
603    fn test_dfa_single_element_pattern() {
604        let pattern = TernaryPattern::exact(&ts(&[1]));
605        let found = find_matches(&pattern, &ts(&[0, 1, 0, 1, 0]));
606        assert_eq!(found, vec![1, 3]);
607    }
608
609    #[test]
610    fn test_wildcard_stream_match() {
611        let pattern = TernaryPattern::new(vec![
612            PatternElem::Exact(Ternary::Pos),
613            PatternElem::Any,
614            PatternElem::Any,
615            PatternElem::Exact(Ternary::Neg),
616        ]);
617        assert!(matches(&pattern, &ts(&[1, 0, 0, -1])));
618        assert!(matches(&pattern, &ts(&[1, 1, -1, -1])));
619        assert!(!matches(&pattern, &ts(&[1, 0, 0, 0])));
620    }
621}