Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::UnicodeClassCache;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23    AnchorLimit,
24    StateSpaceExplosion,
25    UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32            AlgebraError::StateSpaceExplosion => {
33                write!(f, "too many states, likely infinite state space")
34            }
35            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36        }
37    }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43    pub const MISSING: u32 = 0;
44    pub const BOT: u32 = 1;
45    pub const EPS: u32 = 2;
46    pub const TOP: u32 = 3;
47    pub const TOPSTAR: u32 = 4;
48    pub const TOPPLUS: u32 = 5;
49    pub const BEGIN: u32 = 6;
50    pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62    pub const MISSING: NodeId = NodeId(id::MISSING);
63    pub const BOT: NodeId = NodeId(id::BOT);
64    pub const EPS: NodeId = NodeId(id::EPS);
65    pub const TOP: NodeId = NodeId(id::TOP);
66    pub const TS: NodeId = NodeId(id::TOPSTAR);
67    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68    pub const BEGIN: NodeId = NodeId(id::BEGIN);
69    pub const END: NodeId = NodeId(id::END);
70
71    #[inline]
72    pub fn as_u32(self) -> u32 {
73        self.0
74    }
75
76    #[inline]
77    pub fn from_u32(v: u32) -> NodeId {
78        NodeId(v)
79    }
80}
81impl Debug for NodeId {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let num = &self.0;
84        f.write_str(format!("{num}").as_str())
85    }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91    pub const MISSING: TRegexId = TRegexId(id::MISSING);
92    pub const EPS: TRegexId = TRegexId(id::EPS);
93    pub const BOT: TRegexId = TRegexId(id::BOT);
94    pub const TOP: TRegexId = TRegexId(id::TOP);
95    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
102
103    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106    pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110    #[inline]
111    pub(crate) fn nullability(self) -> Nullability {
112        Nullability(self.0 & Self::NULL_MASK)
113    }
114
115    #[inline]
116    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118    }
119
120    #[inline]
121    pub(crate) fn has(self, flag: MetaFlags) -> bool {
122        self.0 & flag.0 != 0
123    }
124    #[inline]
125    const fn and(self, other: MetaFlags) -> MetaFlags {
126        MetaFlags(self.0 & other.0)
127    }
128    #[inline]
129    const fn or(self, other: MetaFlags) -> MetaFlags {
130        MetaFlags(self.0 | other.0)
131    }
132
133    pub(crate) fn contains_lookaround(self) -> bool {
134        self.has(MetaFlags::CONTAINS_LOOKAROUND)
135    }
136    pub(crate) fn contains_inter(self) -> bool {
137        self.has(MetaFlags::CONTAINS_INTER)
138    }
139
140    pub(crate) fn all_contains_flags(self) -> MetaFlags {
141        self.and(
142            MetaFlags::CONTAINS_LOOKAROUND
143                .or(MetaFlags::CONTAINS_ANCHORS)
144                .or(MetaFlags::CONTAINS_INTER),
145        )
146    }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152    #[default]
153    Pred,
154    Star,
155    Begin,
156    End,
157    Concat,
158    Union,
159    Compl,
160    Lookbehind,
161    Lookahead,
162    Inter,
163    Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168    flags: MetaFlags,
169    nulls: NullsId,
170}
171
172struct MetadataBuilder {
173    num_created: u32,
174    solver: Solver,
175    nb: NullsBuilder,
176    index: FxHashMap<Metadata, MetadataId>,
177    pub array: Vec<Metadata>,
178}
179
180mod helpers {
181    pub(crate) fn incr_rel(n1: u32) -> u32 {
182        match n1.overflowing_add(1) {
183            (_, true) => u32::MAX,
184            (res, false) => res,
185        }
186    }
187}
188
189impl MetadataBuilder {
190    fn new() -> MetadataBuilder {
191        Self {
192            index: FxHashMap::default(),
193            array: vec![Metadata {
194                flags: MetaFlags::ZERO,
195                nulls: NullsId::EMPTY,
196            }],
197            solver: Solver::new(),
198            num_created: 0,
199            nb: NullsBuilder::new(),
200        }
201    }
202
203    fn init(&mut self, inst: Metadata) -> MetadataId {
204        self.num_created += 1;
205        let new_id = MetadataId(self.num_created);
206        self.index.insert(inst.clone(), new_id);
207        self.array.push(inst);
208        new_id
209    }
210
211    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212        match self.index.get(&inst) {
213            Some(&id) => id,
214            None => self.init(inst),
215        }
216    }
217
218    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219        &self.array[inst.0 as usize]
220    }
221
222    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223        setflags.all_contains_flags()
224    }
225
226    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227        let left = &self.array[body.0 as usize].flags;
228        let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229        // BOT* = EPS (empty string only), not infinite
230        let inf = if body_id == NodeId::BOT {
231            MetaFlags::ZERO
232        } else {
233            MetaFlags::INFINITE_LENGTH
234        };
235        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236    }
237
238    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239        let left = &self.array[left_id.0 as usize].flags;
240        let null = left.nullability().not().and(Nullability::ALWAYS);
241        let contains = self.get_contains(*left);
242        MetaFlags::with_nullability(
243            null,
244            contains
245                .or(MetaFlags::INFINITE_LENGTH)
246                .or(MetaFlags::CONTAINS_COMPL),
247        )
248    }
249
250    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
251        let left = &self.array[left_id.0 as usize].flags;
252        let right = &self.array[right_id.0 as usize].flags;
253        let null = left.nullability().and(right.nullability());
254        let contains = self.get_contains(left.or(*right));
255        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
256        MetaFlags::with_nullability(null, contains.or(len))
257    }
258
259    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260        let left = &self.array[left_id.0 as usize].flags;
261        let right = &self.array[right_id.0 as usize].flags;
262        let null = left.nullability().and(right.nullability());
263        let contains = self
264            .get_contains(left.or(*right))
265            .or(MetaFlags::CONTAINS_INTER);
266        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
267        MetaFlags::with_nullability(null, contains.or(len))
268    }
269
270    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
271        let left = &self.array[left_id.0 as usize].flags;
272        let right = &self.array[right_id.0 as usize].flags;
273        let null = left.nullability().or(right.nullability());
274        let contains = self.get_contains(left.or(*right));
275        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
276        MetaFlags::with_nullability(null, contains.or(len))
277    }
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
281pub struct NodeKey {
282    pub(crate) kind: Kind,
283    pub(crate) left: NodeId,
284    pub(crate) right: NodeId,
285    pub(crate) extra: u32,
286}
287
288#[derive(Eq, Hash, PartialEq, Clone, Debug)]
289pub enum TRegex<TSet> {
290    Leaf(NodeId),
291    ITE(TSet, TRegexId, TRegexId),
292}
293
294#[derive(Eq, Hash, PartialEq, Clone, Copy)]
295pub(crate) struct NodeFlags(u8);
296impl NodeFlags {
297    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
298    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
299    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
300
301    fn is_checked(self) -> bool {
302        self.0 >= NodeFlags::IS_CHECKED.0
303    }
304    fn is_empty(self) -> bool {
305        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
306    }
307}
308
309#[derive(Eq, Hash, PartialEq, Clone, Copy)]
310pub(crate) struct BuilderFlags(u8);
311impl BuilderFlags {
312    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
313    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
314}
315
316pub struct RegexBuilder {
317    mb: MetadataBuilder,
318    temp_vec: Vec<NodeId>,
319    num_created: u32,
320    index: FxHashMap<NodeKey, NodeId>,
321    array: Vec<NodeKey>,
322    metadata: Vec<MetadataId>,
323    reversed: Vec<NodeId>,
324    cache_empty: FxHashMap<NodeId, NodeFlags>,
325    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
326    tr_array: Vec<TRegex<TSetId>>,
327    tr_der_center: Vec<TRegexId>,
328    tr_der_begin: Vec<TRegexId>,
329    flags: BuilderFlags,
330    /// maximum lookahead context distance before returning `AnchorLimit`.
331    pub lookahead_context_max: u32,
332    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
333    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
334}
335
336macro_rules! iter_inter {
337    ($compiler:ident, $start:ident, $expression:expr) => {
338        let mut curr = $start;
339        while $compiler.get_kind(curr) == Kind::Inter {
340            let left = $compiler.get_left(curr);
341            $expression(left);
342            curr = $compiler.get_right(curr);
343        }
344        $expression(curr);
345    };
346}
347
348impl NodeId {
349    fn is_missing(&self) -> bool {
350        *self == NodeId::MISSING
351    }
352    #[inline]
353    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
354        b.get_flags_contains(self)
355    }
356    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
357        if self == tail {
358            true
359        } else if self.is_kind(b, Kind::Concat) {
360            self.right(b).has_concat_tail(b, tail)
361        } else {
362            false
363        }
364    }
365    fn missing_to_eps(&self) -> NodeId {
366        if *self == NodeId::MISSING {
367            NodeId::EPS
368        } else {
369            *self
370        }
371    }
372
373    #[inline]
374    fn kind(self, b: &RegexBuilder) -> Kind {
375        b.get_kind(self)
376    }
377    #[inline]
378    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
379        b.get_kind(self) == k
380    }
381    #[inline]
382    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
383        b.nullability(self) == Nullability::NEVER
384    }
385    #[inline]
386    fn nullability(self, b: &RegexBuilder) -> Nullability {
387        b.nullability(self)
388    }
389
390    #[inline]
391    fn is_center_nullable(self, b: &RegexBuilder) -> bool {
392        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
393    }
394    #[inline]
395    pub fn left(self, b: &RegexBuilder) -> NodeId {
396        b.get_left(self)
397    }
398
399    #[inline]
400    pub fn right(self, b: &RegexBuilder) -> NodeId {
401        b.get_right(self)
402    }
403
404    #[inline]
405    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
406        b.der(self, mask)
407    }
408
409    #[inline]
410    fn extra(self, b: &RegexBuilder) -> u32 {
411        b.get_extra(self)
412    }
413
414    #[inline]
415    fn is_pred(self, b: &RegexBuilder) -> bool {
416        b.get_kind(self) == Kind::Pred
417    }
418    #[inline]
419    fn is_lookahead(self, b: &RegexBuilder) -> bool {
420        b.get_kind(self) == Kind::Lookahead
421    }
422    #[inline]
423    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
424        debug_assert!(self.is_pred(b));
425        TSetId(b.get_extra(self))
426    }
427    #[inline]
428    fn is_star(self, b: &RegexBuilder) -> bool {
429        if NodeId::EPS == self {
430            return false;
431        }
432        b.get_kind(self) == Kind::Star
433    }
434
435    #[inline]
436    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
437        if self.is_concat(b) {
438            let r = self.right(b);
439            return r.is_star(b) && r.left(b) == self.left(b);
440        }
441        false
442    }
443
444    #[inline]
445    fn is_concat(self, b: &RegexBuilder) -> bool {
446        b.get_kind(self) == Kind::Concat
447    }
448
449    #[inline]
450    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
451        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
452            Some(b.get_right(self))
453        } else {
454            None
455        }
456    }
457
458    #[inline]
459    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
460        if b.get_kind(self) == Kind::Concat {
461            let left = self.left(b);
462            let right = self.right(b);
463            if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
464                return right == NodeId::END;
465            }
466        }
467        false
468    }
469
470    #[inline]
471    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
472        if NodeId::EPS == self {
473            return None;
474        }
475        if self.is_star(b) && self.left(b).is_pred(b) {
476            Some(self.left(b))
477        } else {
478            None
479        }
480    }
481
482    #[inline]
483    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
484        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
485            let middle = self.right(b);
486            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
487                return Some(middle.left(b));
488            }
489        }
490        None
491    }
492
493    #[inline]
494    pub(crate) fn iter_union(
495        self,
496        b: &mut RegexBuilder,
497        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
498    ) {
499        let mut curr: NodeId = self;
500        while curr.kind(b) == Kind::Union {
501            f(b, curr.left(b));
502            curr = curr.right(b);
503        }
504        f(b, curr);
505    }
506
507    #[inline]
508    pub(crate) fn iter_union_while(
509        self,
510        b: &mut RegexBuilder,
511        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
512    ) {
513        let mut curr: NodeId = self;
514        let mut continue_loop = true;
515        while continue_loop && curr.kind(b) == Kind::Union {
516            continue_loop = f(b, curr.left(b));
517            curr = curr.right(b);
518        }
519        if continue_loop {
520            f(b, curr);
521        }
522    }
523}
524
525impl Default for RegexBuilder {
526    fn default() -> Self {
527        Self::new()
528    }
529}
530
531impl RegexBuilder {
532    pub fn new() -> RegexBuilder {
533        let mut inst = Self {
534            mb: MetadataBuilder::new(),
535            array: Vec::new(),
536            index: FxHashMap::default(),
537            cache_empty: FxHashMap::default(),
538            tr_array: Vec::new(),
539            tr_cache: FxHashMap::default(),
540            flags: BuilderFlags::ZERO,
541            lookahead_context_max: 800,
542            num_created: 0,
543            metadata: Vec::new(),
544            reversed: Vec::new(),
545            tr_der_center: Vec::new(),
546            tr_der_begin: Vec::new(),
547            temp_vec: Vec::new(),
548            mk_binary_memo: FxHashMap::default(),
549            clean_cache: FxHashMap::default(),
550        };
551        inst.array.push(NodeKey::default());
552        inst.mk_pred(TSetId::EMPTY);
553        inst.mk_star(NodeId::BOT);
554        inst.mk_pred(TSetId::FULL);
555        inst.mk_star(NodeId::TOP);
556        let top_plus_id = inst.mk_plus(NodeId::TOP);
557        inst.mk_unset(Kind::Begin);
558        inst.mk_unset(Kind::End);
559        debug_assert!(top_plus_id == NodeId::TOPPLUS);
560
561        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
562        inst.mk_leaf(NodeId::BOT);
563        inst.mk_leaf(NodeId::EPS);
564        inst.mk_leaf(NodeId::TOP);
565        inst.mk_leaf(NodeId::TS);
566
567        inst.flags = BuilderFlags::SUBSUME;
568        inst
569    }
570
571    #[inline]
572    pub fn solver_ref(&self) -> &Solver {
573        &self.mb.solver
574    }
575
576    #[inline]
577    pub fn solver(&mut self) -> &mut Solver {
578        &mut self.mb.solver
579    }
580
581    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
582        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
583        self.tr_cache.insert(inst.clone(), new_id);
584        self.tr_array.push(inst);
585        new_id
586    }
587
588    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
589        match self.tr_cache.get(&inst) {
590            Some(&id) => id,
591            None => self.tr_init(inst),
592        }
593    }
594
595    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
596        &self.tr_array[inst.0 as usize]
597    }
598
599    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
600        let solv = self.solver();
601        !solv.is_sat_id(cond1, cond2)
602    }
603
604    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
605        let term = TRegex::<TSetId>::Leaf(node_id);
606        self.get_tregex_id(term)
607    }
608
609    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
610        let tmp_inst = TRegex::ITE(cond, _then, _else);
611        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
612            return *cached;
613        }
614        if _then == _else {
615            return _then;
616        }
617        if self.solver().is_full_id(cond) {
618            return _then;
619        }
620        if self.solver().is_empty_id(cond) {
621            return _else;
622        }
623        let _clean_then = match self.tr_array[_then.0 as usize] {
624            TRegex::Leaf(_) => _then,
625            _ => self.clean(cond, _then),
626        };
627        let notcond = self.solver().not_id(cond);
628        let _clean_else = match self.tr_array[_else.0 as usize] {
629            TRegex::Leaf(_) => _else,
630            _ => self.clean(notcond, _else),
631        };
632
633        if _clean_then == _clean_else {
634            return _clean_then;
635        }
636        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
637        match self.get_tregex(_clean_then) {
638            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
639                let _cond_then = *leftcond;
640                let new_then = *_inner_then;
641                let sand = self.solver().and_id(cond, _cond_then);
642                return self.mk_ite(sand, new_then, _clean_else);
643            }
644            _ => {}
645        }
646        // attempt right side flattening:
647        match self.get_tregex(_clean_else) {
648            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
649            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
650                let e2clone = *_e2;
651                let new_cond = self.mb.solver.or_id(cond, *_c2);
652                return self.mk_ite(new_cond, _clean_then, e2clone);
653            }
654            _ => {}
655        }
656
657        if _clean_then == TRegexId::BOT {
658            let flip_cond = self.solver().not_id(cond);
659            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
660            return cleaned_id;
661        }
662
663        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
664    }
665
666    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
667        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
668            return cached;
669        }
670        let result = match *self.get_tregex(tterm) {
671            TRegex::Leaf(_) => tterm,
672            TRegex::ITE(alpha, _then_id, _else_id) => {
673                let notalpha = self.mb.solver.not_id(alpha);
674                if self.mb.solver.unsat_id(beta, alpha) {
675                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
676                    self.clean(beta, _else_id)
677                } else if self.unsat(beta, notalpha) {
678                    // beta ⊆ alpha, so beta ∧ alpha = beta
679                    self.clean(beta, _then_id)
680                } else {
681                    let tc = self.mb.solver.and_id(beta, alpha);
682                    let ec = self.mb.solver.and_id(beta, notalpha);
683                    let new_then = self.clean(tc, _then_id);
684                    let new_else = self.clean(ec, _else_id);
685                    self.mk_ite(alpha, new_then, new_else)
686                }
687            }
688        };
689        self.clean_cache.insert((beta, tterm), result);
690        result
691    }
692
693    fn mk_unary(
694        &mut self,
695        term: TRegexId,
696        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
697    ) -> TRegexId {
698        match self.tr_array[term.0 as usize] {
699            TRegex::Leaf(node_id) => {
700                let applied = apply(self, node_id);
701                self.mk_leaf(applied)
702            }
703            TRegex::ITE(c1, _then, _else) => {
704                let _then1 = self.mk_unary(_then, apply);
705                let _else1 = self.mk_unary(_else, apply);
706                self.mk_ite(c1, _then1, _else1)
707            }
708        }
709    }
710
711    fn mk_binary_result(
712        &mut self,
713        left: TRegexId,
714        right: TRegexId,
715        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
716    ) -> Result<TRegexId, AlgebraError> {
717        match self.tr_array[left.0 as usize] {
718            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
719                TRegex::Leaf(right_node_id) => {
720                    let applied = apply(self, left_node_id, right_node_id)?;
721                    Ok(self.mk_leaf(applied))
722                }
723                TRegex::ITE(c2, _then, _else) => {
724                    let then2 = self.mk_binary_result(left, _then, apply)?;
725                    let else2 = self.mk_binary_result(left, _else, apply)?;
726                    Ok(self.mk_ite(c2, then2, else2))
727                }
728            },
729            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
730                TRegex::Leaf(_) => {
731                    let then2 = self.mk_binary_result(_then1, right, apply)?;
732                    let else2 = self.mk_binary_result(_else1, right, apply)?;
733                    Ok(self.mk_ite(c1, then2, else2))
734                }
735                TRegex::ITE(c2, _then2, _else2) => {
736                    if c1 == c2 {
737                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
738                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
739                        return Ok(self.mk_ite(c1, _then, _else));
740                    }
741                    if c1.0 > c2.0 {
742                        let _then = self.mk_binary_result(_then1, right, apply)?;
743                        let _else = self.mk_binary_result(_else1, right, apply)?;
744                        Ok(self.mk_ite(c1, _then, _else))
745                    } else {
746                        let _then = self.mk_binary_result(left, _then2, apply)?;
747                        let _else = self.mk_binary_result(left, _else2, apply)?;
748                        Ok(self.mk_ite(c2, _then, _else))
749                    }
750                }
751            },
752        }
753    }
754
755    fn mk_binary(
756        &mut self,
757        left: TRegexId,
758        right: TRegexId,
759        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
760    ) -> TRegexId {
761        self.mk_binary_memo.clear();
762        self.mk_binary_inner(left, right, apply)
763    }
764
765    fn mk_binary_inner(
766        &mut self,
767        left: TRegexId,
768        right: TRegexId,
769        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
770    ) -> TRegexId {
771        if left == right {
772            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
773        }
774        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
775            return cached;
776        }
777        let result = match self.tr_array[left.0 as usize] {
778            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
779                TRegex::Leaf(right_node_id) => {
780                    let applied = apply(self, left_node_id, right_node_id);
781                    self.mk_leaf(applied)
782                }
783                TRegex::ITE(c2, _then, _else) => {
784                    let then2 = self.mk_binary_inner(left, _then, apply);
785                    let else2 = self.mk_binary_inner(left, _else, apply);
786                    self.mk_ite(c2, then2, else2)
787                }
788            },
789            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
790                TRegex::Leaf(_) => {
791                    let then2 = self.mk_binary_inner(_then1, right, apply);
792                    let else2 = self.mk_binary_inner(_else1, right, apply);
793                    self.mk_ite(c1, then2, else2)
794                }
795                TRegex::ITE(c2, _then2, _else2) => {
796                    if c1 == c2 {
797                        let _then = self.mk_binary_inner(_then1, _then2, apply);
798                        let _else = self.mk_binary_inner(_else1, _else2, apply);
799                        self.mk_ite(c1, _then, _else)
800                    } else if c1.0 > c2.0 {
801                        let _then = self.mk_binary_inner(_then1, right, apply);
802                        let _else = self.mk_binary_inner(_else1, right, apply);
803                        self.mk_ite(c1, _then, _else)
804                    } else {
805                        let _then = self.mk_binary_inner(left, _then2, apply);
806                        let _else = self.mk_binary_inner(left, _else2, apply);
807                        self.mk_ite(c2, _then, _else)
808                    }
809                }
810            },
811        };
812        self.mk_binary_memo.insert((left, right), result);
813        result
814    }
815
816    pub fn get_nulls(
817        &mut self,
818        pending_rel: u32,
819        mask: Nullability,
820        acc: &mut BTreeSet<NullState>,
821        node_id: NodeId,
822    ) {
823        debug_assert!(node_id != NodeId::MISSING);
824        if !self.is_nullable(node_id, mask) {
825            return;
826        }
827        match self.get_kind(node_id) {
828            Kind::Pred => {}
829            Kind::End => {
830                if mask.has(Nullability::END) {
831                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
832                }
833            }
834            Kind::Begin => {
835                if mask.has(Nullability::BEGIN) {
836                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
837                }
838            }
839            Kind::Concat => {
840                let new_mask = self.nullability(node_id).and(mask);
841                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
842                if self.is_nullable(node_id.left(self), mask) {
843                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
844                }
845            }
846            Kind::Union => {
847                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
848                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
849            }
850            Kind::Inter => {
851                let new_mask = self.nullability(node_id).and(mask);
852                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
853                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
854            }
855            Kind::Star => {
856                acc.insert(NullState::new(mask, pending_rel));
857                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
858            }
859            Kind::Compl => {
860                if !self.is_nullable(node_id.left(self), mask) {
861                    acc.insert(NullState::new(mask, 0));
862                }
863            }
864            Kind::Lookbehind => {
865                let new_mask = self.nullability(node_id).and(mask);
866                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
867                if node_id.right(self) != NodeId::MISSING {
868                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
869                }
870            }
871            Kind::Lookahead => {
872                let la_inner = self.get_lookahead_inner(node_id);
873                if self.is_nullable(la_inner, mask) {
874                    let rel = self.get_lookahead_rel(node_id);
875                    if rel != u32::MAX {
876                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
877                    }
878                    // tail only contributes when body is satisfied
879                    let la_tail = self.get_lookahead_tail(node_id);
880                    if la_tail != NodeId::MISSING {
881                        self.get_nulls(pending_rel, mask, acc, la_tail);
882                    }
883                }
884            }
885            Kind::Counted => {
886                let packed = self.get_extra(node_id);
887                let best = packed >> 16;
888                if best > 0 {
889                    acc.insert(NullState::new(mask, pending_rel + best));
890                }
891            }
892        }
893    }
894
895    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
896        self.get_meta_flags(node_id)
897            .has(MetaFlags::CONTAINS_LOOKAROUND)
898    }
899
900    /// whether node contains `^`, `$`, `\A`, `\z` anchors.
901    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
902        self.get_meta_flags(node_id)
903            .has(MetaFlags::CONTAINS_ANCHORS)
904    }
905
906    pub fn is_infinite(&self, node_id: NodeId) -> bool {
907        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
908    }
909
910    /// returns (min_length, max_length). max = u32::MAX means unbounded.
911    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
912        if self.is_infinite(node_id) {
913            if self.get_kind(node_id) == Kind::Inter {
914                self.get_bounded_length(node_id)
915            } else {
916                (self.get_min_length_only(node_id), u32::MAX)
917            }
918        } else {
919            self.get_bounded_length(node_id)
920        }
921    }
922
923    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
924        if node_id == NodeId::EPS {
925            return (0, 0);
926        }
927        match self.get_kind(node_id) {
928            Kind::End | Kind::Begin => (0, 0),
929            Kind::Pred => (1, 1),
930            Kind::Concat => {
931                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
932                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
933                (lmin + rmin, lmax.saturating_add(rmax))
934            }
935            Kind::Union => {
936                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
937                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
938                (lmin.min(rmin), lmax.max(rmax))
939            }
940            Kind::Inter => {
941                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
942                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
943                (lmin.max(rmin), lmax.min(rmax))
944            }
945            Kind::Lookahead => {
946                let body = node_id.left(self);
947                if self.is_infinite(body) {
948                    return (0, u32::MAX);
949                }
950                let right = node_id.right(self);
951                if right.is_missing() {
952                    (0, 0)
953                } else {
954                    self.get_min_max_length(right)
955                }
956            }
957            Kind::Counted => (0, 0),
958            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
959        }
960    }
961
962    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
963        match self.get_kind(node_id) {
964            Kind::End | Kind::Begin => Some(0),
965            Kind::Pred => Some(1),
966            Kind::Concat => {
967                let l = self.get_fixed_length(node_id.left(self))?;
968                let r = self.get_fixed_length(node_id.right(self))?;
969                Some(l + r)
970            }
971            Kind::Union => {
972                let l = self.get_fixed_length(node_id.left(self))?;
973                let r = self.get_fixed_length(node_id.right(self))?;
974                if l == r {
975                    Some(l)
976                } else {
977                    None
978                }
979            }
980            Kind::Inter => {
981                let l = self.get_fixed_length(node_id.left(self))?;
982                let r = self.get_fixed_length(node_id.right(self))?;
983                if l == r {
984                    Some(l)
985                } else {
986                    None
987                }
988            }
989            Kind::Lookahead => {
990                let right = node_id.right(self);
991                if right.is_missing() {
992                    Some(0)
993                } else {
994                    self.get_fixed_length(right)
995                }
996            }
997            Kind::Counted => Some(0),
998            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
999        }
1000    }
1001
1002    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1003        match self.get_kind(node_id) {
1004            Kind::End | Kind::Begin => 0,
1005            Kind::Pred => 1,
1006            Kind::Concat => {
1007                self.get_min_length_only(node_id.left(self))
1008                    + self.get_min_length_only(node_id.right(self))
1009            }
1010            Kind::Union => self
1011                .get_min_length_only(node_id.left(self))
1012                .min(self.get_min_length_only(node_id.right(self))),
1013            Kind::Inter => self
1014                .get_min_length_only(node_id.left(self))
1015                .max(self.get_min_length_only(node_id.right(self))),
1016            Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1017            Kind::Compl => {
1018                if self.nullability(node_id.left(self)) == Nullability::NEVER {
1019                    0
1020                } else {
1021                    1
1022                }
1023            }
1024        }
1025    }
1026
1027    fn starts_with_ts(&self, node_id: NodeId) -> bool {
1028        if node_id == NodeId::TS {
1029            return true;
1030        }
1031        match self.get_kind(node_id) {
1032            Kind::Inter => {
1033                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1034            }
1035            Kind::Union => {
1036                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1037            }
1038            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1039            _ => false,
1040        }
1041    }
1042
1043    #[inline]
1044    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1045        if self.get_kind(node_id) == Kind::Concat {
1046            self.ends_with_ts(node_id.right(self))
1047        } else {
1048            node_id == NodeId::TS
1049        }
1050    }
1051
1052    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1053        debug_assert!(node_id != NodeId::MISSING);
1054        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1055    }
1056
1057    pub(crate) fn cache_der(
1058        &mut self,
1059        node_id: NodeId,
1060        result: TRegexId,
1061        mask: Nullability,
1062    ) -> TRegexId {
1063        if mask == Nullability::CENTER {
1064            self.tr_der_center[node_id.0 as usize] = result
1065        } else {
1066            self.tr_der_begin[node_id.0 as usize] = result
1067        };
1068        result
1069    }
1070
1071    pub(crate) fn try_cached_der(
1072        &mut self,
1073        node_id: NodeId,
1074        mask: Nullability,
1075    ) -> Option<TRegexId> {
1076        let cache = if mask == Nullability::CENTER {
1077            &mut self.tr_der_center
1078        } else {
1079            &mut self.tr_der_begin
1080        };
1081        match cache.get(node_id.0 as usize) {
1082            Some(&TRegexId::MISSING) => {}
1083            Some(&result) => {
1084                return Some(result);
1085            }
1086            None => {
1087                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1088            }
1089        }
1090        None
1091    }
1092
1093    pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1094        let mut term = self.get_tregex(der);
1095        loop {
1096            match *term {
1097                TRegex::Leaf(node_id) => return node_id,
1098                TRegex::ITE(cond, _then, _else) => {
1099                    if self.solver().is_sat_id(set, cond) {
1100                        term = self.get_tregex(_then);
1101                    } else {
1102                        term = self.get_tregex(_else);
1103                    }
1104                }
1105            }
1106        }
1107    }
1108
1109    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1110        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1111        debug_assert!(
1112            node_id != NodeId::MISSING,
1113            "attempting to derive missing node"
1114        );
1115        if let Some(result) = self.try_cached_der(node_id, mask) {
1116            return Ok(result);
1117        }
1118
1119        let result = match node_id.kind(self) {
1120            Kind::Compl => {
1121                let leftd = node_id.left(self).der(self, mask)?;
1122                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1123            }
1124            Kind::Inter => {
1125                let leftd = node_id.left(self).der(self, mask)?;
1126                let rightd = node_id.right(self).der(self, mask)?;
1127                {
1128                    let this = &mut *self;
1129                    this.mk_binary(
1130                        leftd,
1131                        rightd,
1132                        &mut (|b, left, right| b.mk_inter(left, right)),
1133                    )
1134                }
1135            }
1136            Kind::Union => {
1137                let leftd = self.der(node_id.left(self), mask)?;
1138                let rightd = self.der(node_id.right(self), mask)?;
1139                {
1140                    let this = &mut *self;
1141                    this.mk_binary(
1142                        leftd,
1143                        rightd,
1144                        &mut (|b, left, right| b.mk_union(left, right)),
1145                    )
1146                }
1147            }
1148            Kind::Concat => {
1149                let head = node_id.left(self);
1150                let tail = node_id.right(self);
1151                let tail_leaf = self.mk_leaf(tail);
1152                let head_der = self.der(head, mask)?;
1153
1154                if self.is_nullable(head, mask) {
1155                    let rightd = self.der(tail, mask)?;
1156                    let ldr = {
1157                        let this = &mut *self;
1158                        this.mk_binary(
1159                            head_der,
1160                            tail_leaf,
1161                            &mut (|b, left, right| b.mk_concat(left, right)),
1162                        )
1163                    };
1164                    {
1165                        let this = &mut *self;
1166                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1167                    }
1168                } else {
1169                    let this = &mut *self;
1170                    this.mk_binary(
1171                        head_der,
1172                        tail_leaf,
1173                        &mut (|b, left, right| b.mk_concat(left, right)),
1174                    )
1175                }
1176            }
1177            Kind::Star => {
1178                if node_id == NodeId::EPS {
1179                    TRegexId::BOT
1180                } else {
1181                    let left = node_id.left(self);
1182                    let r_decr_leaf = self.mk_leaf(node_id);
1183                    let r_der = self.der(left, mask)?;
1184                    let this = &mut *self;
1185                    this.mk_binary(
1186                        r_der,
1187                        r_decr_leaf,
1188                        &mut (|b, left, right| b.mk_concat(left, right)),
1189                    )
1190                }
1191            }
1192            Kind::Lookbehind => {
1193                let lb_prev_der = {
1194                    let lb_prev = self.get_lookbehind_prev(node_id);
1195                    if lb_prev == NodeId::MISSING {
1196                        TRegexId::MISSING
1197                    } else {
1198                        self.der(lb_prev, mask)?
1199                    }
1200                };
1201                let lb_inner = self.get_lookbehind_inner(node_id);
1202                let lb_inner_der = self.der(lb_inner, mask)?;
1203                {
1204                    let this = &mut *self;
1205                    this.mk_binary_result(
1206                        lb_inner_der,
1207                        lb_prev_der,
1208                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1209                    )?
1210                }
1211            }
1212            Kind::Lookahead => {
1213                let la_tail = self.get_lookahead_tail(node_id);
1214                let la_body = node_id.left(self);
1215                let rel = self.get_lookahead_rel(node_id);
1216
1217                if self.is_nullable(la_body, mask) {
1218                    // nullabilty is taken once, just keep the body
1219                    let right = node_id.right(self).missing_to_eps();
1220                    let rder = self.der(right, mask).clone();
1221                    return rder;
1222                }
1223
1224                if rel == u32::MAX {
1225                    let la_body_der = self.der(la_body, mask)?;
1226                    if la_tail.is_kind(self, Kind::Pred) {
1227                        let transitioned =
1228                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1229                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1230                        let concated = self.mk_concat(la_tail, new_la);
1231                        return self.der(concated, mask);
1232                    }
1233                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1234                        let left = la_tail.left(self);
1235                        let tset = left.pred_tset(self);
1236                        let transitioned = self.transition_term(la_body_der, tset);
1237                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1238                        let tail_right = la_tail.right(self);
1239                        let concated = self.mk_concat(new_la, tail_right);
1240                        let concated = self.mk_concat(left, concated);
1241                        return self.der(concated, mask);
1242                    }
1243                }
1244
1245                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1246                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1247                    let concated = self.mk_concat(la_body, nulls_mask);
1248                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1249                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1250                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1251                    let new_union = self.mk_union(concated_look, new_look);
1252                    return self.der(new_union, mask);
1253                }
1254
1255                let la_tail_der = if la_tail == NodeId::MISSING {
1256                    TRegexId::MISSING
1257                } else {
1258                    if self.is_nullable(la_tail, mask) {
1259                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1260                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1261                        let la_union = self.mk_union(la_tail, nulls_la);
1262                        self.der(la_union, mask)?
1263                    } else {
1264                        self.der(la_tail, mask)?
1265                    }
1266                };
1267
1268                let la_body_der = self.der(la_body, mask)?;
1269
1270                if rel != u32::MAX && rel > self.lookahead_context_max {
1271                    return Err(AlgebraError::AnchorLimit);
1272                }
1273
1274                let la = {
1275                    let this = &mut *self;
1276                    let rel = helpers::incr_rel(rel);
1277                    this.mk_binary(
1278                        la_body_der,
1279                        la_tail_der,
1280                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1281                    )
1282                };
1283
1284                if rel != u32::MAX
1285                    && la_tail_der != TRegexId::MISSING
1286                    && self.is_nullable(la_tail, mask)
1287                {
1288                    let look_only = {
1289                        let this = &mut *self;
1290                        let right = TRegexId::MISSING;
1291                        let rel = helpers::incr_rel(rel);
1292                        this.mk_binary(
1293                            la_body_der,
1294                            right,
1295                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1296                        )
1297                    };
1298                    {
1299                        let this = &mut *self;
1300                        this.mk_binary(
1301                            look_only,
1302                            la,
1303                            &mut (|b, left, right| b.mk_union(left, right)),
1304                        )
1305                    }
1306                } else {
1307                    la
1308                }
1309            }
1310            Kind::Counted => {
1311                let body = node_id.left(self);
1312                let chain = node_id.right(self);
1313                let packed = self.get_extra(node_id);
1314                let step = (packed & 0xFFFF) as u16;
1315                let best = (packed >> 16) as u16;
1316
1317                let mid_best = if self.is_nullable(body, mask) && step >= best {
1318                    step
1319                } else {
1320                    best
1321                };
1322
1323                let body_der = self.der(body, mask)?;
1324                let new_step = step.saturating_add(1);
1325                self.mk_unary(body_der, &mut |b, new_body| {
1326                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1327                        new_step
1328                    } else {
1329                        mid_best
1330                    };
1331                    let packed = (final_best as u32) << 16 | new_step as u32;
1332                    b.mk_counted(new_body, chain, packed)
1333                })
1334            }
1335            Kind::Begin | Kind::End => TRegexId::BOT,
1336            Kind::Pred => {
1337                let psi = node_id.pred_tset(self);
1338                if psi == TSetId::EMPTY {
1339                    TRegexId::BOT
1340                } else {
1341                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1342                }
1343            }
1344        };
1345
1346        // println!("{} {}", node_id.0, self.pp(node_id));
1347        // println!("node: {} (total: {})", node_id.0, self.num_created);
1348
1349        self.cache_der(node_id, result, mask);
1350        Ok(result)
1351    }
1352
1353    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1354        debug_assert!(meta_id != MetadataId::MISSING);
1355        match self.metadata.get_mut(node_id.0 as usize) {
1356            Some(v) => *v = meta_id,
1357            None => {
1358                self.metadata
1359                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1360                self.metadata[node_id.0 as usize] = meta_id;
1361            }
1362        }
1363    }
1364
1365    fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1366        debug_assert!(reversed_id != NodeId::MISSING);
1367        match self.reversed.get_mut(node_id.0 as usize) {
1368            Some(v) => *v = reversed_id,
1369            None => {
1370                self.reversed
1371                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1372                self.reversed[node_id.0 as usize] = reversed_id;
1373            }
1374        }
1375    }
1376
1377    fn init(&mut self, inst: NodeKey) -> NodeId {
1378        self.num_created += 1;
1379        let node_id = NodeId(self.num_created);
1380        self.index.insert(inst.clone(), node_id);
1381        match inst.kind {
1382            Kind::Pred => {
1383                let meta_id = self.mb.get_meta_id(Metadata {
1384                    flags: MetaFlags::ZERO,
1385                    nulls: NullsId::EMPTY,
1386                });
1387                self.init_metadata(node_id, meta_id);
1388            }
1389            Kind::Begin => {
1390                let meta_id = self.mb.get_meta_id(Metadata {
1391                    flags: MetaFlags::with_nullability(
1392                        Nullability::BEGIN,
1393                        MetaFlags::CONTAINS_ANCHORS,
1394                    ),
1395                    nulls: NullsId::BEGIN0,
1396                });
1397                self.init_metadata(node_id, meta_id);
1398            }
1399            Kind::End => {
1400                let meta_id = self.mb.get_meta_id(Metadata {
1401                    flags: MetaFlags::with_nullability(
1402                        Nullability::END,
1403                        MetaFlags::CONTAINS_ANCHORS,
1404                    ),
1405                    nulls: NullsId::END0,
1406                });
1407                self.init_metadata(node_id, meta_id);
1408            }
1409            Kind::Inter => {
1410                let m1 = self.get_node_meta_id(inst.left);
1411                let m2 = self.get_node_meta_id(inst.right);
1412                let meta_id = {
1413                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1414                    let mask_l = inst.left.nullability(self);
1415                    let mask_r = inst.right.nullability(self);
1416                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1417                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1418                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1419                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1420                    let new_meta = Metadata {
1421                        flags: self.mb.flags_inter(m1, m2),
1422                        nulls,
1423                    };
1424                    self.mb.get_meta_id(new_meta)
1425                };
1426                self.init_metadata(node_id, meta_id);
1427            }
1428            Kind::Union => {
1429                let m1 = self.get_node_meta_id(inst.left);
1430                let m2 = self.get_node_meta_id(inst.right);
1431                let meta_id = {
1432                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1433                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1434                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1435                    let new_meta = Metadata {
1436                        flags: self.mb.flags_union(m1, m2),
1437                        nulls,
1438                    };
1439                    self.mb.get_meta_id(new_meta)
1440                };
1441                self.init_metadata(node_id, meta_id);
1442            }
1443            Kind::Concat => {
1444                let flags = self.mb.flags_concat(
1445                    self.get_node_meta_id(inst.left),
1446                    self.get_node_meta_id(inst.right),
1447                );
1448
1449                let right_nullability = inst.right.nullability(self);
1450                let left_nullability = inst.left.nullability(self);
1451                let nulls_left = self.get_nulls_id(inst.left);
1452                let nulls_right = self.get_nulls_id(inst.right);
1453                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1454                let mask = right_nullability.and(left_nullability);
1455                nulls = self.mb.nb.and_mask(nulls, mask);
1456
1457                let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1458                self.init_metadata(node_id, new_id);
1459            }
1460            Kind::Star => {
1461                let left_nulls = self.get_nulls_id(inst.left);
1462                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1463                let meta_id = self.mb.get_meta_id(Metadata {
1464                    flags: self
1465                        .mb
1466                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1467                    nulls,
1468                });
1469                self.init_metadata(node_id, meta_id);
1470            }
1471            Kind::Compl => {
1472                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1473                let meta_id = self.mb.get_meta_id(Metadata {
1474                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1475                    nulls,
1476                });
1477                self.init_metadata(node_id, meta_id);
1478            }
1479            Kind::Lookbehind => {
1480                let mut null = self.get_meta_flags(inst.left).nullability();
1481                let mut contains_flags = self.get_flags_contains(inst.left);
1482                if !inst.right.is_missing() {
1483                    null = null.and(self.get_meta_flags(inst.right).nullability());
1484                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1485                }
1486
1487                let left_nulls = self.get_nulls_id(inst.left);
1488                let right_nulls = self.get_nulls_id(inst.right);
1489                let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1490                let meta_id = self.mb.get_meta_id(Metadata {
1491                    flags: MetaFlags::with_nullability(
1492                        null,
1493                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1494                    ),
1495                    nulls,
1496                });
1497                self.init_metadata(node_id, meta_id);
1498            }
1499            Kind::Lookahead => {
1500                let mut nulls = self.get_nulls_id(inst.left);
1501                let left_nullability = inst.left.nullability(self);
1502                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1503                nulls = self.mb.nb.or_id(nulls, nulls_right);
1504                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1505
1506                let la_inner = inst.left;
1507                let la_tail = inst.right;
1508                let null = self
1509                    .get_meta_flags(la_inner)
1510                    .nullability()
1511                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1512                let contains_flags = self
1513                    .get_flags_contains(la_inner)
1514                    .or(self.get_flags_contains(la_tail));
1515
1516                let meta_id = self.mb.get_meta_id(Metadata {
1517                    flags: MetaFlags::with_nullability(
1518                        null,
1519                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1520                    ),
1521                    nulls,
1522                });
1523                self.init_metadata(node_id, meta_id);
1524            }
1525            Kind::Counted => {
1526                let best = inst.extra >> 16;
1527                let (null, nulls) = if best > 0 {
1528                    let mut ns = BTreeSet::new();
1529                    ns.insert(NullState::new(Nullability::CENTER, best));
1530                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1531                } else {
1532                    (Nullability::NEVER, NullsId::EMPTY)
1533                };
1534                let meta_id = self.mb.get_meta_id(Metadata {
1535                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1536                    nulls,
1537                });
1538                self.init_metadata(node_id, meta_id);
1539            }
1540        }
1541
1542        self.array.push(inst);
1543
1544        if let Some(rw) = self.post_init_simplify(node_id) {
1545            self.override_as(node_id, rw)
1546        } else {
1547            node_id
1548        }
1549    }
1550
1551    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1552        match self.get_kind(node_id) {
1553            Kind::Concat => {
1554                let lhs = node_id.left(self);
1555                let rhs = node_id.right(self);
1556                if lhs.is_pred_star(self).is_some() {
1557                    if let Some(opttail) = rhs.is_opt_v(self) {
1558                        if let Some(true) = self.subsumes(lhs, opttail) {
1559                            // return self.override_as(node_id, lhs);
1560                            return Some(lhs);
1561                        }
1562                    }
1563                }
1564            }
1565            Kind::Union => {
1566                let lhs = node_id.left(self);
1567                let rhs = node_id.right(self);
1568                let mut subsumed = false;
1569                rhs.iter_union_while(self, &mut |b, branch| {
1570                    if b.nullable_subsumes(branch, lhs) {
1571                        subsumed = true;
1572                    }
1573                    !subsumed
1574                });
1575                if subsumed {
1576                    return Some(rhs);
1577                }
1578                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1579                    return Some(rhs);
1580                }
1581            }
1582            _ => {}
1583        }
1584
1585        None
1586    }
1587
1588    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1589    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1590        if self.get_kind(lhs) != Kind::Union {
1591            return false; // single branch already checked by nullable_subsumes
1592        }
1593        let mut rhs_branches = Vec::new();
1594        let mut curr = rhs;
1595        while self.get_kind(curr) == Kind::Union {
1596            rhs_branches.push(self.get_left(curr));
1597            curr = self.get_right(curr);
1598        }
1599        rhs_branches.push(curr);
1600
1601        curr = lhs;
1602        while self.get_kind(curr) == Kind::Union {
1603            if !rhs_branches.contains(&self.get_left(curr)) {
1604                return false;
1605            }
1606            curr = self.get_right(curr);
1607        }
1608        rhs_branches.contains(&curr)
1609    }
1610
1611    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1612    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1613        if node == target {
1614            return true;
1615        }
1616        match self.get_kind(node) {
1617            Kind::Union => {
1618                self.nullable_subsumes(self.get_left(node), target)
1619                    || self.nullable_subsumes(self.get_right(node), target)
1620            }
1621            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1622                self.nullable_subsumes(self.get_right(node), target)
1623            }
1624            _ => false,
1625        }
1626    }
1627
1628    pub fn num_nodes(&self) -> u32 {
1629        self.num_created
1630    }
1631    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1632        match self.index.get(&inst) {
1633            Some(&id) => id,
1634            None => self.init(inst),
1635        }
1636    }
1637    #[inline]
1638    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1639        self.index.get(inst)
1640    }
1641
1642    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1643        self.index.insert(key, subsumed);
1644        subsumed
1645    }
1646
1647    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1648        let key = &self.array[key.0 as usize];
1649        let entry = self.index.get_mut(key).unwrap();
1650        *entry = subsumed;
1651        subsumed
1652    }
1653
1654    #[inline]
1655    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1656        self.array[node_id.0 as usize].left
1657    }
1658
1659    #[inline]
1660    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1661        self.array[node_id.0 as usize].right
1662    }
1663
1664    #[inline]
1665    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1666        self.array[node_id.0 as usize].extra
1667    }
1668
1669    #[inline]
1670    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1671        debug_assert!(self.get_kind(node_id) == Kind::Concat);
1672        let mut curr = node_id;
1673        while self.get_kind(curr) == Kind::Concat {
1674            curr = curr.right(self);
1675        }
1676        curr
1677    }
1678
1679    /// true if node ends with a trailing Lookahead(_, MISSING)
1680    fn has_trailing_la(&self, node: NodeId) -> bool {
1681        let end = match self.get_kind(node) {
1682            Kind::Concat => self.get_concat_end(node),
1683            Kind::Lookahead => node,
1684            _ => return false,
1685        };
1686        self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1687    }
1688
1689    /// strip the trailing Lookahead from a node, returning (body, la).
1690    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1691        if self.get_kind(node) == Kind::Lookahead {
1692            return (NodeId::EPS, node);
1693        }
1694        debug_assert!(self.get_kind(node) == Kind::Concat);
1695        let right = node.right(self);
1696        if self.get_kind(right) != Kind::Concat {
1697            // Concat(body, LA) → (body, LA)
1698            return (node.left(self), right);
1699        }
1700        let (stripped, la) = self.strip_trailing_la(right);
1701        (self.mk_concat(node.left(self), stripped), la)
1702    }
1703    #[inline]
1704    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1705        debug_assert!(matches!(
1706            self.get_kind(lookahead_node_id),
1707            Kind::Lookahead | Kind::Counted
1708        ));
1709        lookahead_node_id.left(self)
1710    }
1711    #[inline]
1712    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1713        debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1714        lookahead_node_id.right(self)
1715    }
1716    #[inline]
1717    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1718        debug_assert!(
1719            matches!(
1720                self.get_kind(lookahead_node_id),
1721                Kind::Lookahead | Kind::Counted
1722            ),
1723            "not lookahead/counted: {:?}",
1724            self.pp(lookahead_node_id)
1725        );
1726        self.get_extra(lookahead_node_id)
1727    }
1728    #[inline]
1729    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1730        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1731        lookbehind_node_id.left(self)
1732    }
1733    #[inline]
1734    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1735        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1736        lookbehind_node_id.right(self)
1737    }
1738
1739    #[inline]
1740    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1741        debug_assert!(
1742            self.array.len() > node_id.0 as usize,
1743            "array len: {:?}",
1744            node_id
1745        );
1746        self.array[node_id.0 as usize].kind
1747    }
1748
1749    #[inline]
1750    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1751        &self.array[node_id.0 as usize]
1752    }
1753
1754    #[inline]
1755    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1756        self.metadata[node_id.0 as usize]
1757    }
1758
1759    #[inline]
1760    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1761        debug_assert!(node_id.0 != u32::MAX);
1762        let meta_id = self.get_node_meta_id(node_id);
1763        debug_assert!(meta_id != MetadataId::MISSING);
1764        &self.mb.array[meta_id.0 as usize]
1765    }
1766
1767    #[inline]
1768    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1769        if node_id == NodeId::MISSING {
1770            NullsId::EMPTY
1771        } else {
1772            self.get_meta(node_id).nulls
1773        }
1774    }
1775
1776    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1777        self.mb
1778            .nb
1779            .array
1780            .iter()
1781            .map(|set| set.iter().cloned().collect())
1782            .collect()
1783    }
1784
1785    pub fn nulls_count(&self) -> usize {
1786        self.mb.nb.array.len()
1787    }
1788
1789    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1790        self.mb.nb.array[id as usize].iter().cloned().collect()
1791    }
1792
1793    #[inline]
1794    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1795        if node_id == NodeId::MISSING {
1796            NullsId::EMPTY
1797        } else {
1798            let nulls = self.get_meta(node_id).nulls;
1799            self.mb.nb.and_mask(nulls, mask)
1800        }
1801    }
1802
1803    #[inline]
1804    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1805        let meta_id = self.get_node_meta_id(node_id);
1806        let meta = &self.mb.array[meta_id.0 as usize];
1807        meta.flags
1808    }
1809
1810    #[inline]
1811    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1812        self.get_meta(node_id).flags.nullability()
1813    }
1814
1815    #[inline]
1816    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1817        let meta_id = self.get_node_meta_id(node_id);
1818        let meta = &self.mb.array[meta_id.0 as usize];
1819        meta.flags.all_contains_flags()
1820    }
1821
1822    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1823        if self.get_kind(node_id) == Kind::Concat && node_id.left(self) == NodeId::BEGIN {
1824            return self.strip_lb(node_id.right(self));
1825        }
1826        self.strip_lb_inner(node_id)
1827    }
1828
1829    fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1830        if !self.contains_look(node_id) {
1831            return Ok(node_id);
1832        }
1833        if self.get_kind(node_id) == Kind::Concat
1834            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1835        {
1836            return self.strip_lb_inner(node_id.right(self));
1837        }
1838        if self.get_kind(node_id) == Kind::Inter {
1839            let left = self.strip_lb_inner(node_id.left(self))?;
1840            let right = self.strip_lb_inner(node_id.right(self))?;
1841            return Ok(self.mk_inter(left, right));
1842        }
1843        if self.get_kind(node_id) == Kind::Union {
1844            let left = self.strip_lb_inner(node_id.left(self))?;
1845            let right = self.strip_lb_inner(node_id.right(self))?;
1846            return Ok(self.mk_union(left, right));
1847        }
1848        match self.get_kind(node_id) {
1849            Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1850            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1851                Err(AlgebraError::UnsupportedPattern)
1852            }
1853            _ => Ok(node_id),
1854        }
1855    }
1856
1857    // for prefix purposes we prune any \A leading paths
1858    pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1859        if !self.contains_anchors(node_id) {
1860            return node_id;
1861        }
1862        match self.get_kind(node_id) {
1863            Kind::Begin => NodeId::BOT,
1864            Kind::Concat => {
1865                let left = self.nonbegins(node_id.left(self));
1866                if left == NodeId::BOT {
1867                    return NodeId::BOT;
1868                }
1869                self.mk_concat(left, node_id.right(self))
1870            }
1871            Kind::Union => {
1872                let left = self.nonbegins(node_id.left(self));
1873                let right = self.nonbegins(node_id.right(self));
1874                self.mk_union(left, right)
1875            }
1876            _ => node_id,
1877        }
1878    }
1879
1880    pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1881        match self.get_kind(node_id) {
1882            Kind::Concat => {
1883                let head = node_id.left(self);
1884                match self.get_kind(head) {
1885                    _ if self.any_nonbegin_nullable(head) => {
1886                        self.strip_prefix_safe(node_id.right(self))
1887                    }
1888                    _ => node_id,
1889                }
1890            }
1891            _ => node_id,
1892        }
1893    }
1894    pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1895        match self.get_kind(node_id) {
1896            Kind::Begin => NodeId::BOT,
1897            Kind::Concat => {
1898                let head = self.prune_begin(node_id.left(self));
1899                let tail = self.prune_begin(node_id.right(self));
1900                self.mk_concat(head, tail)
1901            }
1902            Kind::Lookbehind => {
1903                if !node_id.right(self).is_missing() {
1904                    return node_id;
1905                }
1906                let head = self.prune_begin(node_id.left(self));
1907                head
1908            }
1909            Kind::Union => {
1910                let left = self.prune_begin(node_id.left(self));
1911                let right = self.prune_begin(node_id.right(self));
1912                self.mk_union(left, right)
1913            }
1914            _ => node_id,
1915        }
1916    }
1917
1918    pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1919        if !self.contains_look(node_id) {
1920            return Ok(node_id);
1921        }
1922        if self.get_kind(node_id) == Kind::Concat
1923            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1924        {
1925            let left = node_id.left(self);
1926            let ll = left.left(self).missing_to_eps();
1927            let lr = left.right(self).missing_to_eps();
1928            let new_l = self.mk_concat(ll, lr);
1929            let new = self.mk_concat(new_l, node_id.right(self));
1930            return Ok(new);
1931        }
1932        if self.get_kind(node_id) == Kind::Inter {
1933            let left = self.normalize_rev(node_id.left(self))?;
1934            let right = self.normalize_rev(node_id.right(self))?;
1935            return Ok(self.mk_inter(left, right));
1936        }
1937        if self.get_kind(node_id) == Kind::Union {
1938            let left = self.normalize_rev(node_id.left(self))?;
1939            let right = self.normalize_rev(node_id.right(self))?;
1940            return Ok(self.mk_union(left, right));
1941        }
1942        match self.get_kind(node_id) {
1943            Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1944            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1945                Err(AlgebraError::UnsupportedPattern)
1946            }
1947            _ => Ok(node_id),
1948        }
1949    }
1950
1951    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1952        debug_assert!(node_id != NodeId::MISSING);
1953        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1954            if *rev != NodeId::MISSING {
1955                return Ok(*rev);
1956            }
1957        }
1958        let rw = match self.get_kind(node_id) {
1959            Kind::End => NodeId::BEGIN,
1960            Kind::Begin => NodeId::END,
1961            Kind::Pred => node_id,
1962            Kind::Concat => {
1963                let left = self.reverse(node_id.left(self))?;
1964                let right = self.reverse(node_id.right(self))?;
1965                self.mk_concat(right, left)
1966            }
1967            Kind::Union => {
1968                let left = self.reverse(node_id.left(self))?;
1969                let right = self.reverse(node_id.right(self))?;
1970                self.mk_union(left, right)
1971            }
1972            Kind::Inter => {
1973                let left = self.reverse(node_id.left(self))?;
1974                let right = self.reverse(node_id.right(self))?;
1975                self.mk_inter(left, right)
1976            }
1977            Kind::Star => {
1978                let body = self.reverse(node_id.left(self))?;
1979                self.mk_star(body)
1980            }
1981            Kind::Compl => {
1982                if self.contains_look(node_id.left(self)) {
1983                    return Err(AlgebraError::UnsupportedPattern);
1984                }
1985                let body = self.reverse(node_id.left(self))?;
1986                self.mk_compl(body)
1987            }
1988            Kind::Lookbehind => {
1989                let prev = self.get_lookbehind_prev(node_id);
1990                let inner_id = self.get_lookbehind_inner(node_id);
1991                let rev_inner = self.reverse(inner_id)?;
1992                let rev_prev = if prev != NodeId::MISSING {
1993                    self.reverse(prev)?
1994                } else {
1995                    NodeId::MISSING
1996                };
1997                self.mk_lookahead(rev_inner, rev_prev, 0)
1998            }
1999            Kind::Lookahead => {
2000                let rel = self.get_lookahead_rel(node_id);
2001                if rel == u32::MAX {
2002                    // rel MAX holds no nullability - rewrite to intersection
2003                    let lbody = self.get_lookahead_inner(node_id);
2004                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2005                    let lbody_ts = self.mk_concat(lbody, NodeId::TS);
2006                    let ltail_ts = self.mk_concat(ltail, NodeId::TS);
2007                    let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2008                    let rev = self.reverse(as_inter)?;
2009                    self.init_reversed(node_id, rev);
2010                    return Ok(rev);
2011                }
2012                if rel != 0 {
2013                    return Err(AlgebraError::UnsupportedPattern);
2014                }
2015                let tail_node = self.get_lookahead_tail(node_id);
2016                let rev_tail = if tail_node != NodeId::MISSING {
2017                    self.reverse(tail_node)?
2018                } else {
2019                    NodeId::MISSING
2020                };
2021                let inner_id = self.get_lookahead_inner(node_id);
2022                let rev_inner = self.reverse(inner_id)?;
2023                self.mk_lookbehind(rev_inner, rev_tail)
2024            }
2025            Kind::Counted => {
2026                return Err(AlgebraError::UnsupportedPattern);
2027            }
2028        };
2029        self.init_reversed(node_id, rw);
2030        Ok(rw)
2031    }
2032
2033    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2034        let node = NodeKey {
2035            kind: Kind::Pred,
2036            left: NodeId::MISSING,
2037            right: NodeId::MISSING,
2038            extra: pred.0,
2039        };
2040        self.get_node_id(node)
2041    }
2042
2043    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2044        let key = NodeKey {
2045            kind: Kind::Compl,
2046            left: body,
2047            right: NodeId::MISSING,
2048            extra: u32::MAX,
2049        };
2050        if let Some(id) = self.key_is_created(&key) {
2051            return *id;
2052        }
2053        if body == NodeId::BOT {
2054            return NodeId::TS;
2055        }
2056        if body == NodeId::TS {
2057            return NodeId::BOT;
2058        }
2059
2060        if let Some(contains_body) = body.is_contains(self) {
2061            if contains_body.is_pred(self) {
2062                let pred = contains_body.pred_tset(self);
2063                let notpred = self.mk_pred_not(pred);
2064                let node = self.mk_star(notpred);
2065                return self.init_as(key, node);
2066            } else if contains_body == NodeId::END {
2067                return self.init_as(key, NodeId::BOT);
2068            }
2069        };
2070
2071        if self.get_kind(body) == Kind::Compl {
2072            return self.get_node(body).left;
2073        }
2074
2075        self.get_node_id(key)
2076    }
2077
2078    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2079        let nid = self.get_nulls_id(body);
2080        let nref = self.mb.nb.get_set_ref(nid).clone();
2081        let mut futures = NodeId::BOT;
2082        for n in nref.iter() {
2083            if !n.is_mask_nullable(mask) {
2084                continue;
2085            }
2086
2087            let eff = if n.rel == 0 {
2088                NodeId::EPS
2089            } else {
2090                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2091            };
2092            futures = self.mk_union(futures, eff)
2093        }
2094        futures
2095    }
2096
2097    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2098        if cfg!(feature = "norewrite") {
2099            return None;
2100        }
2101
2102        if self.get_kind(tail) == Kind::Lookbehind {
2103            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2104            return self
2105                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2106                .ok();
2107        }
2108        if self.get_kind(head) == Kind::Lookahead {
2109            let la_tail = self.get_lookahead_tail(head);
2110            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2111            if new_la_tail.is_center_nullable(self) {
2112                let non_null_tail = self.mk_non_nullable_safe(new_la_tail);
2113                if non_null_tail == NodeId::BOT {
2114                    return None;
2115                }
2116                let la_body = self.get_lookahead_inner(head);
2117                return Some(self.mk_lookahead_internal(la_body, non_null_tail, u32::MAX));
2118            }
2119            let la_body = self.get_lookahead_inner(head);
2120            let la_rel = self.get_lookahead_rel(head);
2121            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2122                let tail_rel = self.get_lookahead_rel(new_la_tail);
2123                tail_rel + la_rel
2124            } else {
2125                u32::MAX
2126            };
2127
2128            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2129        }
2130
2131        if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2132            return Some(head);
2133        }
2134        if head == NodeId::TS && tail == NodeId::END {
2135            return Some(head);
2136        }
2137
2138        if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2139            return Some(NodeId::TS);
2140        }
2141
2142        if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2143            return Some(NodeId::TS);
2144        }
2145
2146        if self.get_kind(tail) == Kind::Union && head == NodeId::TS {
2147            let mut should_distribute_top = false;
2148            self.iter_unions(tail, |v| {
2149                if v == NodeId::BEGIN || self.starts_with_ts(v) {
2150                    should_distribute_top = true;
2151                }
2152            });
2153            if should_distribute_top {
2154                let mut new_union = NodeId::BOT;
2155                let mut curr = tail;
2156                while self.get_kind(curr) == Kind::Union {
2157                    let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2158                    new_union = self.mk_union(new_node, new_union);
2159                    curr = curr.right(self);
2160                }
2161                let new_node = self.mk_concat(NodeId::TS, curr);
2162                new_union = self.mk_union(new_union, new_node);
2163                return Some(new_union);
2164            }
2165        }
2166
2167        if self.get_kind(head) == Kind::Union && head == NodeId::TS {
2168            let mut should_distribute_top = false;
2169            self.iter_unions(head, |v| {
2170                if v == NodeId::END || self.starts_with_ts(v) {
2171                    should_distribute_top = true;
2172                }
2173            });
2174            if should_distribute_top {
2175                let mut new_union = NodeId::BOT;
2176                let mut curr = head;
2177                while self.get_kind(curr) == Kind::Union {
2178                    let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2179                    new_union = self.mk_union(new_node, new_union);
2180                    curr = curr.right(self);
2181                }
2182                let new_node = self.mk_concat(curr, NodeId::TS);
2183                new_union = self.mk_union(new_union, new_node);
2184                return Some(new_union);
2185            }
2186        }
2187
2188        if self.get_kind(head) == Kind::Inter && tail == NodeId::TS {
2189            let mut alltopstar = true;
2190            iter_inter!(self, head, |v| {
2191                alltopstar = self.ends_with_ts(v);
2192            });
2193            if alltopstar {
2194                return Some(head);
2195            }
2196        }
2197
2198        if head.is_star(self) && head == tail {
2199            return Some(head);
2200        }
2201
2202        None
2203    }
2204
2205    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2206        use Kind::*;
2207
2208        if cfg!(feature = "norewrite") {
2209            return None;
2210        }
2211        if left == right {
2212            return Some(left);
2213        }
2214
2215        if right.is_kind(self, Kind::Union) && left == right.left(self) {
2216            return Some(right);
2217        }
2218
2219        if self.get_kind(left) == Kind::Pred && self.get_kind(right) == Kind::Pred {
2220            let l = left.pred_tset(self);
2221            let r = right.pred_tset(self);
2222            let solver = self.solver();
2223            let psi = solver.or_id(l, r);
2224            let rewrite = self.mk_pred(psi);
2225            return Some(rewrite);
2226        }
2227
2228        if left == NodeId::EPS
2229            && self.get_extra(left) == 0
2230            && self.nullability(right) == Nullability::ALWAYS
2231        {
2232            return Some(right);
2233        }
2234
2235        if self.get_kind(left) == Kind::Lookahead && self.get_kind(right) == Kind::Lookahead {
2236            let lb = left.left(self);
2237            let lt = left.right(self);
2238            let lrel = left.extra(self);
2239
2240            let rb = right.left(self);
2241            let rt = right.right(self);
2242            let rrel = right.extra(self);
2243
2244            if lrel == rrel && lt.is_missing() && rt.is_missing() {
2245                let unioned = self.mk_union(lb, rb);
2246                let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2247                return Some(node);
2248            }
2249        }
2250
2251        if right.is_kind(self, Concat) {
2252            if left == NodeId::END
2253                && right.left(self) == NodeId::END
2254                && self.nullability(right).has(Nullability::END)
2255            {
2256                return Some(right);
2257            }
2258            // .*|.*a.* => .*(a.*|)
2259            if left == right.left(self) {
2260                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2261                let rw = self.mk_concat(left, rhs);
2262                return Some(rw);
2263            }
2264            if left.is_kind(self, Concat) {
2265                let head1 = left.left(self);
2266                let head2 = right.left(self);
2267
2268                if head1 == head2 {
2269                    let t1 = left.right(self);
2270                    let t2 = right.right(self);
2271                    // opportunistic rewrites
2272                    if head1 == NodeId::TS {
2273                        if t2.has_concat_tail(self, t1) {
2274                            return Some(left);
2275                        }
2276                        if t1.has_concat_tail(self, t2) {
2277                            return Some(right);
2278                        }
2279                    }
2280                    let un = self.mk_union(t1, t2);
2281                    return Some(self.mk_concat(left.left(self), un));
2282                }
2283
2284                // xa|ya => (x|y)a - suffix factoring via reverse
2285                // disabled: introduces reversed intermediate patterns that bloat derivative DFA
2286                if false {
2287                    let end1 = self.get_concat_end(left);
2288                    let end2 = self.get_concat_end(right);
2289                    if end1 == end2 {
2290                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2291                        if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2292                            let rev1 = self.reverse(left).unwrap();
2293                            let rev2 = self.reverse(right).unwrap();
2294
2295                            let union_rev = self.mk_union(rev1, rev2);
2296                            return Some(self.reverse(union_rev).unwrap());
2297                        }
2298                    }
2299                }
2300            }
2301            if left.is_pred(self) && left == right.left(self) {
2302                let un = self.mk_opt(right.right(self));
2303                let conc = self.mk_concat(left, un);
2304                return Some(conc);
2305            }
2306        }
2307
2308        if left == NodeId::EPS && right.is_plus(self) {
2309            let result = self.mk_star(right.left(self));
2310            return Some(result);
2311        }
2312
2313        // e.g. (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2314        if self.flags == BuilderFlags::SUBSUME {
2315            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2316                return Some(rw);
2317            }
2318        }
2319
2320        None
2321    }
2322
2323    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2324        let mut curr = node;
2325        while self.get_kind(curr) == Kind::Inter {
2326            out.push(self.get_left(curr));
2327            curr = self.get_right(curr);
2328        }
2329        out.push(curr);
2330    }
2331
2332    /// check if node is [_*] Pred(X)^count . Star(Y)
2333    /// returns (has_prefix_star, pred_tset, star_node, count)
2334    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2335        let mut curr = node;
2336        let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2337        if has_prefix {
2338            curr = self.get_right(curr);
2339        }
2340        let mut count = 0u32;
2341        let mut pred_set = None;
2342        while self.get_kind(curr) == Kind::Concat {
2343            let head = self.get_left(curr);
2344            if self.get_kind(head) != Kind::Pred {
2345                return None;
2346            }
2347            let ps = head.pred_tset(self);
2348            match pred_set {
2349                None => pred_set = Some(ps),
2350                Some(existing) => {
2351                    if existing != ps {
2352                        return None;
2353                    }
2354                }
2355            }
2356            curr = self.get_right(curr);
2357            count += 1;
2358        }
2359        if count == 0 || self.get_kind(curr) != Kind::Star {
2360            return None;
2361        }
2362        Some((has_prefix, pred_set.unwrap(), curr, count))
2363    }
2364
2365    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2366        let mut j = 0;
2367        for &s in sub {
2368            while j < sup.len() && sup[j] < s {
2369                j += 1;
2370            }
2371            if j >= sup.len() || sup[j] != s {
2372                return false;
2373            }
2374            j += 1;
2375        }
2376        true
2377    }
2378
2379    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2380        if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2381            return None;
2382        }
2383
2384        let mut lc: Vec<NodeId> = Vec::new();
2385        let mut rc: Vec<NodeId> = Vec::new();
2386        self.collect_inter_components(left, &mut lc);
2387        self.collect_inter_components(right, &mut rc);
2388
2389        // component subset check: fewer constraints = larger language
2390        // if lc ⊆ rc then L(left) ⊇ L(right), keep left
2391        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2392            return Some(left);
2393        }
2394        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2395        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2396            return Some(right);
2397        }
2398
2399        if lc.len() == rc.len() {
2400            let mut diff_idx = None;
2401            for i in 0..lc.len() {
2402                if lc[i] != rc[i] {
2403                    if diff_idx.is_some() {
2404                        return None;
2405                    }
2406                    diff_idx = Some(i);
2407                }
2408            }
2409            if let Some(idx) = diff_idx {
2410                let a = lc[idx];
2411                let b = rc[idx];
2412                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2413                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2414                {
2415                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2416                        return if ca < cb { Some(left) } else { Some(right) };
2417                    }
2418                }
2419            }
2420        }
2421
2422        None
2423    }
2424
2425    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2426        if cfg!(feature = "norewrite") {
2427            return None;
2428        }
2429        if left == right {
2430            return Some(left);
2431        }
2432
2433        if self.get_kind(right) == Kind::Union {
2434            let mut result = NodeId::BOT;
2435            self.iter_unions_b(
2436                right,
2437                &mut (|b, v| {
2438                    let new_inter = b.mk_inter(v, left);
2439                    result = b.mk_union(result, new_inter);
2440                }),
2441            );
2442            return Some(result);
2443        }
2444        if self.get_kind(left) == Kind::Union {
2445            let mut result = NodeId::BOT;
2446            self.iter_unions_b(
2447                left,
2448                &mut (|b, v| {
2449                    let new_inter = b.mk_inter(v, right);
2450                    result = b.mk_union(result, new_inter);
2451                }),
2452            );
2453            return Some(result);
2454        }
2455
2456        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2457            return Some(NodeId::BOT);
2458        }
2459
2460        if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2461            let bodies = self.mk_union(left.left(self), right.left(self));
2462            return Some(self.mk_compl(bodies));
2463        }
2464
2465        if left == NodeId::TOPPLUS {
2466            if right.is_pred_star(self).is_some() {
2467                let newloop = self.mk_plus(right.left(self));
2468                return Some(newloop);
2469            }
2470            if right.is_never_nullable(self) {
2471                return Some(right);
2472            }
2473            if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2474                return Some(NodeId::BOT);
2475            }
2476            if right.is_kind(self, Kind::Concat) {}
2477        }
2478
2479        // hoist lookahead out of intersection:
2480        //   L & (?=X)T  → (?=X)(L & T)                  [bare Lookahead]
2481        //   L & ((?=X)·body) → (?=X)(L & body)           [Concat with leading Lookahead]
2482        {
2483            let l_is_la = left.is_lookahead(self);
2484            let r_is_la = right.is_lookahead(self);
2485            let l_is_cla = !l_is_la
2486                && self.get_kind(left) == Kind::Concat
2487                && self.get_kind(left.left(self)) == Kind::Lookahead;
2488            let r_is_cla = !r_is_la
2489                && self.get_kind(right) == Kind::Concat
2490                && self.get_kind(right.left(self)) == Kind::Lookahead;
2491            if l_is_la || r_is_la || l_is_cla || r_is_cla {
2492                let (la_node, other, concat_body) = if r_is_la {
2493                    (right, left, NodeId::MISSING)
2494                } else if l_is_la {
2495                    (left, right, NodeId::MISSING)
2496                } else if r_is_cla {
2497                    (right.left(self), left, right.right(self))
2498                } else {
2499                    (left.left(self), right, left.right(self))
2500                };
2501                let la_body = la_node.left(self);
2502                let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2503                let inter_right = if concat_body.is_missing() {
2504                    la_tail
2505                } else {
2506                    self.mk_concat(la_tail, concat_body)
2507                };
2508                let new_body = self.mk_inter(other, inter_right);
2509                let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2510                return Some(self.mk_concat(la, new_body));
2511            }
2512        }
2513
2514        if self.get_kind(right) == Kind::Compl {
2515            let compl_body = right.left(self);
2516            if left == compl_body {
2517                return Some(NodeId::BOT);
2518            }
2519            if self.get_kind(compl_body) == Kind::Concat {
2520                let compl_head = compl_body.left(self);
2521                // L & ~(R·_*) = BOT when L ⊂ R·_*
2522                if compl_body.right(self) == NodeId::TS && compl_head == left {
2523                    return Some(NodeId::BOT);
2524                }
2525            }
2526        }
2527
2528        if let Some(pleft) = left.is_pred_star(self) {
2529            if let Some(pright) = right.is_pred_star(self) {
2530                let merged = self.mk_inter(pleft, pright);
2531                return Some(self.mk_star(merged));
2532            }
2533        }
2534
2535        // factor lookbehinds out of intersections
2536        // Concat(LB1, body1) & Concat(LB2, body2) → Concat(LB(inner1&inner2), body1&body2)
2537        // Concat(LB, body1) & body2 → Concat(LB, body1&body2)
2538        {
2539            let l_is_clb = self.get_kind(left) == Kind::Concat
2540                && self.get_kind(left.left(self)) == Kind::Lookbehind;
2541            let r_is_clb = self.get_kind(right) == Kind::Concat
2542                && self.get_kind(right.left(self)) == Kind::Lookbehind;
2543            if l_is_clb || r_is_clb {
2544                let (lb, body) = if l_is_clb && r_is_clb {
2545                    let lb1 = left.left(self);
2546                    let lb2 = right.left(self);
2547                    let inner = self.mk_inter(
2548                        self.get_lookbehind_inner(lb1),
2549                        self.get_lookbehind_inner(lb2),
2550                    );
2551                    // lb_prev is MISSING - cannot trigger UnsupportedPattern
2552                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2553                    let body = self.mk_inter(left.right(self), right.right(self));
2554                    (lb, body)
2555                } else if l_is_clb {
2556                    let lb = left.left(self);
2557                    let body = self.mk_inter(left.right(self), right);
2558                    (lb, body)
2559                } else {
2560                    let lb = right.left(self);
2561                    let body = self.mk_inter(left, right.right(self));
2562                    (lb, body)
2563                };
2564                return Some(self.mk_concat(lb, body));
2565            }
2566        }
2567
2568        // factor trailing lookaheads out of intersections
2569        // body1·LA & body2 → (body1&body2)·LA
2570        // body1·LA1 & body2·LA2 → (body1&body2)·LA(inner1&inner2)
2571        {
2572            let l_has_la = self.has_trailing_la(left);
2573            let r_has_la = self.has_trailing_la(right);
2574            if l_has_la || r_has_la {
2575                let (body, la) = if l_has_la && r_has_la {
2576                    let (lbody, l_la) = self.strip_trailing_la(left);
2577                    let (rbody, r_la) = self.strip_trailing_la(right);
2578                    let inner = self.mk_inter(
2579                        self.get_lookahead_inner(l_la),
2580                        self.get_lookahead_inner(r_la),
2581                    );
2582                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2583                    let body = self.mk_inter(lbody, rbody);
2584                    (body, la)
2585                } else if l_has_la {
2586                    let (lbody, la) = self.strip_trailing_la(left);
2587                    let body = self.mk_inter(lbody, right);
2588                    (body, la)
2589                } else {
2590                    let (rbody, la) = self.strip_trailing_la(right);
2591                    let body = self.mk_inter(left, rbody);
2592                    (body, la)
2593                };
2594                return Some(self.mk_concat(body, la));
2595            }
2596        }
2597
2598        None
2599    }
2600
2601    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2602        if cfg!(feature = "norewrite") {
2603            return None;
2604        }
2605        debug_assert!(self.get_kind(right_union) == Kind::Union);
2606
2607        let mut rewritten = None;
2608        right_union.iter_union_while(
2609            self,
2610            &mut (|b, v| {
2611                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2612                    rewritten = Some((v, rw));
2613                    false
2614                } else {
2615                    true
2616                }
2617            }),
2618        );
2619
2620        if let Some(rw) = rewritten {
2621            let mut new_union = NodeId::BOT;
2622            right_union.iter_union(
2623                self,
2624                &mut (|b, v| {
2625                    if v == rw.0 {
2626                        new_union = b.mk_union(rw.1, new_union)
2627                    } else {
2628                        new_union = b.mk_union(v, new_union)
2629                    }
2630                }),
2631            );
2632            return Some(new_union);
2633        };
2634
2635        None
2636    }
2637
2638    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2639        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2640        debug_assert!(tail != NodeId::MISSING);
2641        let key = NodeKey {
2642            kind: Kind::Concat,
2643            left: head,
2644            right: tail,
2645            extra: u32::MAX,
2646        };
2647        if let Some(id) = self.key_is_created(&key) {
2648            return *id;
2649        }
2650
2651        if head == NodeId::BOT || tail == NodeId::BOT {
2652            return NodeId::BOT;
2653        }
2654        if head == NodeId::EPS {
2655            return tail;
2656        }
2657        if tail == NodeId::EPS {
2658            return head;
2659        }
2660
2661        match tail {
2662            // this is only valid if direction known;
2663            NodeId::BEGIN => {
2664                if !self.is_nullable(head, Nullability::BEGIN) {
2665                    return NodeId::BOT;
2666                } else {
2667                    return NodeId::BEGIN;
2668                }
2669            }
2670            _ => {}
2671        }
2672
2673        // normalize concats to right
2674        if head.is_kind(self, Kind::Concat) {
2675            let left = head.left(self);
2676            let newright = self.mk_concat(head.right(self), tail);
2677            let updated = self.mk_concat(left, newright);
2678            return self.init_as(key, updated);
2679        }
2680
2681        if head == NodeId::TS && tail == NodeId::END {
2682            return NodeId::TS;
2683        }
2684
2685        if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2686            return NodeId::BOT;
2687        }
2688
2689        if self.get_kind(tail) == Kind::Concat {
2690            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2691                let upd = self.mk_concat(rw, tail.right(self));
2692                return self.init_as(key, upd);
2693            }
2694        }
2695
2696        if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2697            return self.init_as(key, new);
2698        }
2699
2700        match (self.get_kind(head), self.get_kind(tail)) {
2701            // merge stars
2702            (Kind::Star, Kind::Concat) if head.is_star(self) => {
2703                let rl = tail.left(self);
2704                if head == rl {
2705                    return self.init_as(key, tail);
2706                }
2707            }
2708            // attempt longer concat rw
2709            (_, Kind::Concat) => {
2710                let curr = head;
2711                let h2 = self.mk_concat(curr, tail.left(self));
2712                let tr = tail.right(self);
2713                if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2714                    return self.init_as(key, new);
2715                }
2716            }
2717            _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2718                return self.init_as(key, tail);
2719            }
2720            _ => {}
2721        }
2722
2723        self.init(key)
2724    }
2725
2726    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2727        // LNF: lookbehind must start with ts
2728        let lb_body = {
2729            match self.starts_with_ts(lb_body) {
2730                true => lb_body,
2731                false => self.mk_concat(NodeId::TS, lb_body),
2732            }
2733        };
2734        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2735        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2736    }
2737
2738    fn mk_lookbehind_internal(
2739        &mut self,
2740        lb_body: NodeId,
2741        lb_prev: NodeId,
2742    ) -> Result<NodeId, AlgebraError> {
2743        debug_assert!(lb_body != NodeId::MISSING);
2744        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2745        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2746            return Ok(NodeId::BOT);
2747        }
2748        if lb_body == NodeId::TS {
2749            return Ok(lb_prev);
2750        }
2751        if lb_body == NodeId::EPS {
2752            match lb_prev {
2753                NodeId::MISSING => return Ok(NodeId::EPS),
2754                NodeId::EPS => return Ok(NodeId::EPS),
2755                _ => return Ok(lb_prev),
2756            }
2757        }
2758
2759        let key = NodeKey {
2760            kind: Kind::Lookbehind,
2761            left: lb_body,
2762            right: lb_prev,
2763            extra: u32::MAX,
2764        };
2765        match self.key_is_created(&key) {
2766            Some(id) => Ok(*id),
2767            None => {
2768                if lb_prev == NodeId::TS {
2769                    return Ok(self.mk_concat(lb_prev, lb_body));
2770                }
2771
2772                Ok(self.init(key))
2773            }
2774        }
2775    }
2776
2777    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2778        // LNF: lookahead must end with ts
2779        let la_body = {
2780            match self.ends_with_ts(la_body) {
2781                true => la_body,
2782                false => self.mk_concat(la_body, NodeId::TS),
2783            }
2784        };
2785        let rel = if NodeId::MISSING == la_tail {
2786            rel
2787        } else {
2788            match la_tail.is_center_nullable(self) {
2789                false => u32::MAX,
2790                true => rel,
2791            }
2792        };
2793
2794        self.mk_lookahead_internal(la_body, la_tail, rel)
2795    }
2796
2797    // rel max = carries no nullability, can potentially rw to intersection
2798    pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2799        let key = NodeKey {
2800            kind: Kind::Lookahead,
2801            left: la_body,
2802            right: la_tail,
2803            extra: rel,
2804        };
2805        if let Some(id) = self.key_is_created(&key) {
2806            return *id;
2807        }
2808        if la_body == NodeId::TS {
2809            if rel == 0 {
2810                return la_tail.missing_to_eps();
2811            } else {
2812                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2813            }
2814        }
2815        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2816            return NodeId::BOT;
2817        }
2818        if la_tail.is_missing() && rel == u32::MAX {
2819            return NodeId::BOT;
2820        }
2821
2822        if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2823            return la_body;
2824        }
2825
2826        if la_tail == NodeId::TS {
2827            if rel == 0 || rel == u32::MAX {
2828                return self.mk_concat(la_body, NodeId::TS);
2829            } else if rel == u32::MAX {
2830                return self.mk_begins_with(la_body);
2831            }
2832        }
2833
2834        if rel == u32::MAX {
2835            if la_tail.is_missing() {
2836                return NodeId::BOT;
2837            }
2838
2839            if self.is_always_nullable(la_body) {
2840                return la_tail.missing_to_eps();
2841            }
2842
2843            if la_tail != NodeId::MISSING {
2844                match self.get_kind(la_tail) {
2845                    _ => {
2846                        if la_body.is_compl_plus_end(self) {
2847                            let minlen = self.get_min_length_only(la_tail);
2848                            if minlen >= 1 {
2849                                return NodeId::BOT;
2850                            }
2851                        }
2852                    }
2853                }
2854            }
2855        }
2856
2857        // flatten consecutive lookaheads: (?=A, tail=(?=B, tail=C)) → (?=A._* & B._*, tail=C)
2858        if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2859            let la_body2 = self.get_lookahead_inner(la_tail);
2860            let body1_ts = self.mk_concat(la_body, NodeId::TS);
2861            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2862            let new_la_body = self.mk_inter(body1_ts, body2_ts);
2863            let new_la_rel = self.get_lookahead_rel(la_tail);
2864            let new_la_tail = self.get_lookahead_tail(la_tail);
2865            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2866        }
2867
2868        // _*\z -> too expensive to keep; null it
2869        if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2870            let la_body_right = la_body.right(self);
2871            if self.is_always_nullable(la_body_right) {
2872                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2873            }
2874            if la_body.right(self) == NodeId::END {
2875                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2876            }
2877            let bodyright = la_body.right(self);
2878            if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2879                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2880                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2881            }
2882        }
2883
2884        if la_tail != NodeId::MISSING {
2885            if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2886                let lpred = la_body.left(self);
2887                if self.get_kind(lpred) == Kind::Pred {
2888                    let l = lpred.pred_tset(self);
2889                    let r = la_tail.pred_tset(self);
2890                    let psi_and = self.solver().and_id(l, r);
2891                    let rewrite = self.mk_pred(psi_and);
2892                    let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2893                    let new_right =
2894                        self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2895                    return self.mk_concat(rewrite, new_right);
2896                }
2897            }
2898        }
2899
2900        self.get_node_id(key)
2901    }
2902
2903    pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2904        let has_match = (packed >> 16) > 0;
2905        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2906            return NodeId::BOT;
2907        }
2908        debug_assert!(
2909            body == NodeId::BOT || !self.is_infinite(body),
2910            "Counted body must have finite max length"
2911        );
2912        let chain = self.prune_counted_chain(body, chain);
2913        let key = NodeKey {
2914            kind: Kind::Counted,
2915            left: body,
2916            right: chain,
2917            extra: packed,
2918        };
2919        if let Some(id) = self.key_is_created(&key) {
2920            return *id;
2921        }
2922        self.get_node_id(key)
2923    }
2924
2925    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2926        if chain == NodeId::MISSING || body == NodeId::BOT {
2927            return chain;
2928        }
2929        // nullable body → body·_* = _*, all chain candidates subsumed
2930        if self.nullability(body) != Nullability::NEVER {
2931            return NodeId::MISSING;
2932        }
2933        let chain_body = chain.left(self);
2934        if chain_body == NodeId::BOT {
2935            return chain; // pending match, keep
2936        }
2937        let not_begins = self.mk_not_begins_with(body);
2938        let inter = self.mk_inter(chain_body, not_begins);
2939        let is_empty = inter == NodeId::BOT;
2940        if is_empty {
2941            self.prune_counted_chain(body, chain.right(self))
2942        } else {
2943            chain
2944        }
2945    }
2946
2947    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2948        let neg_inner = self.mk_concat(body, NodeId::TS);
2949        let neg_part = self.mk_compl(neg_inner);
2950        let conc = self.mk_concat(neg_part, NodeId::END);
2951        self.mk_lookahead(conc, NodeId::MISSING, rel)
2952    }
2953
2954    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2955        match self.get_node(body).kind {
2956            Kind::Pred => {
2957                let psi = body.pred_tset(self);
2958                let negated = self.mk_pred_not(psi);
2959                let union = self.mk_union(NodeId::BEGIN, negated);
2960                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2961                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2962            }
2963            _ => {
2964                // ~body ∩ utf8_char: non-nullable (utf8_char requires ≥1 byte),
2965                // so \A | negated won't be stripped by strip_prefix_safe
2966                let uc = crate::unicode_classes::utf8_char(self);
2967                let neg = self.mk_compl(body);
2968                let negated = self.mk_inter(neg, uc);
2969                let union = self.mk_union(NodeId::BEGIN, negated);
2970                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2971                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2972            }
2973        }
2974    }
2975
2976    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2977        debug_assert!(left != NodeId::MISSING);
2978        debug_assert!(right != NodeId::MISSING);
2979        if left > right {
2980            return self.mk_union(right, left);
2981        }
2982        let key = NodeKey {
2983            kind: Kind::Union,
2984            left,
2985            right,
2986            extra: u32::MAX,
2987        };
2988        if let Some(id) = self.key_is_created(&key) {
2989            return *id;
2990        }
2991
2992        if left == right {
2993            return left;
2994        }
2995        if left == NodeId::BOT {
2996            return right;
2997        }
2998        if right == NodeId::BOT {
2999            return left;
3000        }
3001        if right == NodeId::TS {
3002            return right;
3003        }
3004        if left == NodeId::TS {
3005            return left;
3006        }
3007
3008        match (self.get_kind(left), self.get_kind(right)) {
3009            (Kind::Union, _) => {
3010                self.iter_unions_b(left, &mut |b, v| {
3011                    b.temp_vec.push(v);
3012                });
3013                self.iter_unions_b(right, &mut |b, v| {
3014                    b.temp_vec.push(v);
3015                });
3016                self.temp_vec.sort();
3017                let tree = self.temp_vec.clone();
3018                self.temp_vec.clear();
3019                let newnode = tree
3020                    .iter()
3021                    .rev()
3022                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3023                return self.init_as(key, newnode);
3024            }
3025            (_, Kind::Union) => {
3026                let rleft = right.left(self);
3027                // if left_node id is smaller than rleft, just create a new union
3028                if left > rleft {
3029                    self.iter_unions_b(left, &mut |b, v| {
3030                        b.temp_vec.push(v);
3031                    });
3032                    self.iter_unions_b(right, &mut |b, v| {
3033                        b.temp_vec.push(v);
3034                    });
3035                    self.temp_vec.sort();
3036                    let tree = self.temp_vec.clone();
3037                    self.temp_vec.clear();
3038                    let newnode = tree
3039                        .iter()
3040                        .rev()
3041                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3042                    return self.init_as(key, newnode);
3043                } else {
3044                    if let Some(rw) = self.attempt_rw_unions(left, right) {
3045                        return self.init_as(key, rw);
3046                    }
3047                }
3048            }
3049            _ => {}
3050        }
3051
3052        if let Some(rw) = self.attempt_rw_union_2(left, right) {
3053            return self.init_as(key, rw);
3054        }
3055        self.init(key)
3056    }
3057
3058    pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3059        debug_assert!(left_id != NodeId::MISSING);
3060        debug_assert!(right_id != NodeId::MISSING);
3061        if left_id == right_id {
3062            return left_id;
3063        }
3064        if left_id == NodeId::BOT || right_id == NodeId::BOT {
3065            return NodeId::BOT;
3066        }
3067        if left_id == NodeId::TS {
3068            return right_id;
3069        }
3070        if right_id == NodeId::TS {
3071            return left_id;
3072        }
3073        if left_id > right_id {
3074            return self.mk_inter(right_id, left_id);
3075        }
3076        let key = NodeKey {
3077            kind: Kind::Inter,
3078            left: left_id,
3079            right: right_id,
3080            extra: u32::MAX,
3081        };
3082        if let Some(id) = self.key_is_created(&key) {
3083            return *id;
3084        }
3085
3086        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3087            return self.init_as(key, rw);
3088        }
3089
3090        self.init(key)
3091    }
3092
3093    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3094        let node = NodeKey {
3095            kind,
3096            left: NodeId::MISSING,
3097            right: NodeId::MISSING,
3098            extra: u32::MAX,
3099        };
3100        self.init(node)
3101    }
3102
3103    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3104        let star = self.mk_star(body_id);
3105        self.mk_concat(body_id, star)
3106    }
3107    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3108        let opt = self.mk_opt(body_id);
3109        let mut nodes1 = vec![];
3110        for _ in lower..upper {
3111            nodes1.push(opt);
3112        }
3113        for _ in 0..lower {
3114            nodes1.push(body_id);
3115        }
3116        self.mk_concats(nodes1.into_iter())
3117    }
3118    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3119        self.mk_union(NodeId::EPS, body_id)
3120    }
3121
3122    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3123        let key = NodeKey {
3124            kind: Kind::Star,
3125            left: body_id,
3126            right: NodeId::MISSING,
3127            extra: 0,
3128        };
3129        if let Some(id) = self.key_is_created(&key) {
3130            return *id;
3131        }
3132        // _*{500} is still _*
3133        if body_id.is_kind(self, Kind::Star) {
3134            return body_id;
3135        }
3136        self.get_node_id(key)
3137    }
3138
3139    /// it's cheaper to check this once as an edge-case
3140    /// than to compute a 4th nullability bit for every node
3141    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3142        match self.get_kind(node_id) {
3143            Kind::End => Nullability::EMPTYSTRING,
3144            Kind::Begin => Nullability::EMPTYSTRING,
3145            Kind::Pred => Nullability::NEVER,
3146            Kind::Star => Nullability::ALWAYS,
3147            Kind::Inter | Kind::Concat => {
3148                let lnull = self.nullability_emptystring(node_id.left(self));
3149                let rnull = self.nullability_emptystring(node_id.right(self));
3150                lnull.and(rnull) // left = 010, right = 001, left & right = 000
3151            }
3152            Kind::Union => {
3153                let lnull = self.nullability_emptystring(node_id.left(self));
3154                let rnull = self.nullability_emptystring(node_id.right(self));
3155                lnull.or(rnull)
3156            }
3157            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3158            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3159            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3160            Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3161        }
3162    }
3163
3164    #[inline(always)]
3165    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3166        self.get_meta(node_id)
3167            .flags
3168            .nullability()
3169            .has(Nullability::CENTER.or(Nullability::END))
3170    }
3171
3172    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3173        self.get_only_nullability(node_id)
3174    }
3175
3176    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3177        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3178    }
3179
3180    pub fn pp(&self, node_id: NodeId) -> String {
3181        let mut s = String::new();
3182        self.ppw(&mut s, node_id).unwrap();
3183        s
3184    }
3185
3186    #[allow(dead_code)]
3187    pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
3188        let nu = self.get_nulls_id(node_id);
3189        let nr = self.mb.nb.get_set_ref(nu);
3190        let s1 = format!("{:?}", nr);
3191        s1
3192    }
3193
3194    #[allow(dead_code)]
3195    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3196        match self.get_tregex(term_id) {
3197            TRegex::Leaf(node_id) => {
3198                let mut s = String::new();
3199                self.ppw(&mut s, *node_id).unwrap();
3200                s
3201            }
3202            TRegex::ITE(cond, then_id, else_id) => {
3203                format!(
3204                    "ITE({},{},{})",
3205                    self.solver_ref().pp(*cond),
3206                    self.ppt(*then_id),
3207                    self.ppt(*else_id)
3208                )
3209            }
3210        }
3211    }
3212
3213    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3214        if cfg!(feature = "graphviz") {
3215            match node_id {
3216                NodeId::MISSING => return write!(s, "MISSING"),
3217                NodeId::BOT => return write!(s, "⊥"),
3218                NodeId::TS => return write!(s, "_*"),
3219                NodeId::TOP => return write!(s, "_"),
3220                NodeId::EPS => return write!(s, "ε"),
3221                _ => {}
3222            }
3223        }
3224
3225        match node_id {
3226            NodeId::MISSING => return write!(s, "MISSING"),
3227            NodeId::BOT => return write!(s, "⊥"),
3228            NodeId::TS => return write!(s, "_*"),
3229            NodeId::TOP => return write!(s, "_"),
3230            NodeId::EPS => return write!(s, ""),
3231            _ => {}
3232        }
3233
3234        match self.get_kind(node_id) {
3235            Kind::End => write!(s, r"\z"),
3236            Kind::Begin => write!(s, r"\A"),
3237            Kind::Pred => {
3238                let psi = node_id.pred_tset(self);
3239                if psi == TSetId::EMPTY {
3240                    write!(s, r"⊥")
3241                } else if psi == TSetId::FULL {
3242                    write!(s, r"_")
3243                } else {
3244                    write!(s, "{}", self.solver_ref().pp(psi))
3245                }
3246            }
3247            Kind::Inter => {
3248                write!(s, "(")?;
3249                self.ppw(s, node_id.left(self))?;
3250                write!(s, "&")?;
3251                let mut curr = node_id.right(self);
3252                while self.get_kind(curr) == Kind::Inter {
3253                    let n = curr.left(self);
3254                    self.ppw(s, n)?;
3255                    write!(s, "&")?;
3256                    curr = curr.right(self);
3257                }
3258                self.ppw(s, curr)?;
3259                write!(s, ")")
3260            }
3261            Kind::Union => {
3262                let left = node_id.left(self);
3263                let right = node_id.right(self);
3264                write!(s, "(")?;
3265                self.ppw(s, left)?;
3266                write!(s, "|")?;
3267                let mut curr = right;
3268                while self.get_kind(curr) == Kind::Union {
3269                    let n = curr.left(self);
3270                    self.ppw(s, n)?;
3271                    write!(s, "|")?;
3272                    curr = curr.right(self);
3273                }
3274                self.ppw(s, curr)?;
3275                write!(s, ")")
3276            }
3277            Kind::Concat => {
3278                let left = node_id.left(self);
3279                let right = node_id.right(self);
3280                if right.is_star(self) && right.left(self) == left {
3281                    self.ppw(s, left)?;
3282                    write!(s, "+")?;
3283                    return Ok(());
3284                }
3285                if right.is_concat(self) {
3286                    let rl = right.left(self);
3287                    if rl.is_star(self) && rl.left(self) == left {
3288                        self.ppw(s, left)?;
3289                        write!(s, "+")?;
3290                        return self.ppw(s, right.right(self));
3291                    }
3292                }
3293                if right.is_concat(self) && right.left(self) == left {
3294                    let mut num = 1;
3295                    let mut right = right;
3296                    while right.is_concat(self) && right.left(self) == left {
3297                        num += 1;
3298                        right = right.right(self);
3299                    }
3300                    // (|X){n} followed by X{m} -> X{m,m+n}
3301                    if let Some(inner) = left.is_opt_v(self) {
3302                        let mut inner_count = 0;
3303                        let mut right2 = right;
3304                        while right2.is_concat(self) && right2.left(self) == inner {
3305                            inner_count += 1;
3306                            right2 = right2.right(self);
3307                        }
3308                        if right2 == inner {
3309                            inner_count += 1;
3310                            self.ppw(s, inner)?;
3311                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3312                        }
3313                        if inner_count > 0 {
3314                            self.ppw(s, inner)?;
3315                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3316                            return self.ppw(s, right2);
3317                        }
3318                    }
3319                    self.ppw(s, left)?;
3320                    if right == left {
3321                        num += 1;
3322                        return write!(s, "{{{}}}", num);
3323                    }
3324                    if num <= 3 && left.is_pred(self) {
3325                        for _ in 1..num {
3326                            self.ppw(s, left)?;
3327                        }
3328                        return self.ppw(s, right);
3329                    } else {
3330                        write!(s, "{{{}}}", num)?;
3331                        return self.ppw(s, right);
3332                    }
3333                }
3334                self.ppw(s, left)?;
3335                self.ppw(s, right)
3336            }
3337            Kind::Star => {
3338                let left = node_id.left(self);
3339                let leftkind = self.get_kind(left);
3340                match leftkind {
3341                    Kind::Concat | Kind::Star | Kind::Compl => {
3342                        write!(s, "(")?;
3343                        self.ppw(s, left)?;
3344                        write!(s, ")")?;
3345                    }
3346                    _ => {
3347                        self.ppw(s, left)?;
3348                    }
3349                };
3350                write!(s, "*")
3351            }
3352            Kind::Compl => {
3353                write!(s, "~(")?;
3354                self.ppw(s, node_id.left(self))?;
3355                write!(s, ")")
3356            }
3357            Kind::Lookbehind => {
3358                let lbleft = self.get_lookbehind_prev(node_id);
3359                let lbinner = self.get_lookbehind_inner(node_id);
3360                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3361                if lbleft != NodeId::MISSING {
3362                    write!(s, "「")?;
3363                    self.ppw(s, lbleft)?;
3364                    write!(s, "」")?;
3365                }
3366
3367                write!(s, "(?<=")?;
3368                self.ppw(s, lbinner)?;
3369                write!(s, ")")
3370            }
3371            Kind::Lookahead => {
3372                let inner = self.get_lookahead_inner(node_id);
3373                write!(s, "(?=")?;
3374                self.ppw(s, inner)?;
3375                write!(s, ")")?;
3376                if self.get_lookahead_rel(node_id) != 0 {
3377                    write!(s, "{{")?;
3378                    let rel = self.get_lookahead_rel(node_id);
3379                    if rel == u32::MAX {
3380                        write!(s, "∅")?;
3381                    } else {
3382                        write!(s, "{}", rel)?;
3383                    }
3384                    write!(s, "}}")?;
3385                }
3386                if node_id.right(self) == NodeId::MISSING {
3387                    Ok(())
3388                } else {
3389                    write!(s, "『")?;
3390                    self.ppw(s, node_id.right(self))?;
3391                    write!(s, "』")
3392                }
3393            }
3394            Kind::Counted => {
3395                let body = node_id.left(self);
3396                let packed = self.get_extra(node_id);
3397                let step = packed & 0xFFFF;
3398                let best = packed >> 16;
3399                write!(s, "#(")?;
3400                self.ppw(s, body)?;
3401                write!(s, ")s{}b{}", step, best)
3402            }
3403        }
3404    }
3405
3406    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3407        self.mk_concat(node, NodeId::TS)
3408    }
3409
3410    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3411        let node_ts = self.mk_concat(node, NodeId::TS);
3412        self.mk_compl(node_ts)
3413    }
3414
3415    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3416        let notset = self.solver().not_id(set);
3417        self.mk_pred(notset)
3418    }
3419
3420    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3421        let set_id = self.solver().u8_to_set_id(char);
3422        self.mk_pred(set_id)
3423    }
3424
3425    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3426        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3427        self.mk_pred(rangeset)
3428    }
3429
3430    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3431        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3432        for &(lo, hi) in &ranges[1..] {
3433            let r = self.mk_range_u8(lo, hi);
3434            node = self.mk_union(node, r);
3435        }
3436        node
3437    }
3438
3439    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3440        let mut prefix = Vec::new();
3441        let mut curr = node;
3442        loop {
3443            if curr == NodeId::EPS {
3444                let full = !prefix.is_empty();
3445                return (prefix, full);
3446            }
3447            if curr == NodeId::BOT {
3448                break;
3449            }
3450            if self.get_kind(curr) == Kind::Pred {
3451                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3452                    Some(byte) => {
3453                        prefix.push(byte);
3454                        return (prefix, true);
3455                    }
3456                    None => break, // multi-byte pred: pattern not fully consumed
3457                }
3458            }
3459            if self.get_kind(curr) != Kind::Concat {
3460                break;
3461            }
3462            let left = curr.left(self);
3463            if self.get_kind(left) != Kind::Pred {
3464                break;
3465            }
3466            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3467                Some(byte) => prefix.push(byte),
3468                None => break,
3469            }
3470            curr = curr.right(self);
3471        }
3472        (prefix, false)
3473    }
3474
3475    #[allow(dead_code)]
3476    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3477        let mut result = NodeId::EPS;
3478        for byte in raw_str.iter().rev() {
3479            let node = self.mk_u8(*byte);
3480            result = self.mk_concat(node, result);
3481        }
3482        result
3483    }
3484
3485    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3486        let mut result = NodeId::EPS;
3487        for byte in raw_str.bytes().rev() {
3488            let node = self.mk_u8(byte);
3489            result = self.mk_concat(node, result);
3490        }
3491        result
3492    }
3493
3494    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3495        let mut sorted: Vec<NodeId> = nodes.collect();
3496        if sorted.len() <= 1 {
3497            return sorted.pop().unwrap_or(NodeId::BOT);
3498        }
3499        sorted.sort();
3500        sorted.dedup();
3501        sorted.retain(|&x| x != NodeId::BOT);
3502        if sorted.is_empty() {
3503            return NodeId::BOT;
3504        }
3505        if sorted.len() > 16 {
3506            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3507            let mut non_concat: Vec<NodeId> = Vec::new();
3508            for &n in &sorted {
3509                if self.get_kind(n) == Kind::Concat {
3510                    by_head.entry(self.get_left(n)).or_default().push(n);
3511                } else {
3512                    non_concat.push(n);
3513                }
3514            }
3515            // absorb non-concat nodes whose id matches a by_head key:
3516            // x | x·t1 | x·t2 → x·(ε|t1|t2)
3517            // without this, factoring reproduces identical groups and
3518            // recurses infinitely.
3519            let mut absorbed: Vec<NodeId> = Vec::new();
3520            for &n in &non_concat {
3521                if by_head.contains_key(&n) {
3522                    absorbed.push(n);
3523                }
3524            }
3525            if !absorbed.is_empty() {
3526                non_concat.retain(|n| !absorbed.contains(n));
3527            }
3528            if by_head.len() < sorted.len() {
3529                let mut groups: Vec<NodeId> = non_concat;
3530                for (head, tails) in by_head {
3531                    let mut tail_nodes: Vec<NodeId> =
3532                        tails.iter().map(|&n| self.get_right(n)).collect();
3533                    if absorbed.contains(&head) {
3534                        tail_nodes.push(NodeId::EPS);
3535                    }
3536                    let tail_union = self.mk_unions(tail_nodes.into_iter());
3537                    let factored = self.mk_concat(head, tail_union);
3538                    groups.push(factored);
3539                }
3540                groups.sort();
3541                groups.dedup();
3542                return self.mk_unions_balanced(&groups);
3543            }
3544        }
3545        self.mk_unions_balanced(&sorted)
3546    }
3547
3548    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3549        match nodes.len() {
3550            0 => NodeId::BOT,
3551            1 => nodes[0],
3552            n => {
3553                let mid = n / 2;
3554                let left = self.mk_unions_balanced(&nodes[..mid]);
3555                let right = self.mk_unions_balanced(&nodes[mid..]);
3556                self.mk_union(left, right)
3557            }
3558        }
3559    }
3560
3561    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3562        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3563    }
3564
3565    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3566        nodes
3567            .rev()
3568            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3569    }
3570}
3571
3572impl RegexBuilder {
3573    #[allow(dead_code)]
3574    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3575        match self.get_tregex(term_id).clone() {
3576            TRegex::Leaf(node_id) => {
3577                if NodeId::BOT == node_id {
3578                    vec![]
3579                } else {
3580                    vec![node_id]
3581                }
3582            }
3583            TRegex::ITE(_, then_id, else_id) => {
3584                let mut then_nodes = self.extract_sat(then_id);
3585                let mut else_nodes = self.extract_sat(else_id);
3586                then_nodes.append(&mut else_nodes);
3587                then_nodes
3588            }
3589        }
3590    }
3591
3592    pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3593        debug_assert!(self.get_kind(start) == Kind::Union);
3594        let mut curr = start;
3595        while self.get_kind(curr) == Kind::Union {
3596            f(curr.left(self));
3597            curr = curr.right(self);
3598        }
3599        f(curr);
3600    }
3601
3602    pub(crate) fn iter_unions_b(
3603        &mut self,
3604        curr: NodeId,
3605        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3606    ) {
3607        let mut curr = curr;
3608        while self.get_kind(curr) == Kind::Union {
3609            f(self, curr.left(self));
3610            curr = curr.right(self);
3611        }
3612        f(self, curr);
3613    }
3614
3615    pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3616        if !self.contains_look(node_id) {
3617            return Some(node_id);
3618        }
3619        match self.get_kind(node_id) {
3620            Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3621            Kind::Concat => {
3622                let left = node_id.left(self);
3623                let right = node_id.right(self);
3624                let elim_l = self.try_elim_lookarounds(left)?;
3625                let elim_r = self.try_elim_lookarounds(right)?;
3626                let rw = self.mk_concat(elim_l, elim_r);
3627                Some(rw)
3628            }
3629            Kind::Union => {
3630                let left = node_id.left(self);
3631                let right = node_id.right(self);
3632                let elim_l = self.try_elim_lookarounds(left)?;
3633                let elim_r = self.try_elim_lookarounds(right)?;
3634                let rw = self.mk_union(elim_l, elim_r);
3635                Some(rw)
3636            }
3637
3638            Kind::Star => {
3639                let body = node_id.left(self);
3640                let elim_l = self.try_elim_lookarounds(body)?;
3641                Some(self.mk_star(elim_l))
3642            }
3643            Kind::Compl => {
3644                let left = node_id.left(self);
3645                let elim_l = self.try_elim_lookarounds(left)?;
3646                Some(self.mk_compl(elim_l))
3647            }
3648            Kind::Lookahead => {
3649                let rel = self.get_lookahead_rel(node_id);
3650                if rel != 0 {
3651                    return None;
3652                }
3653                let lbody = self.get_lookahead_inner(node_id);
3654                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3655                let elim_l = self.try_elim_lookarounds(lbody)?;
3656                let elim_r = self.try_elim_lookarounds(ltail)?;
3657                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3658                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3659                let rw = self.mk_inter(lbody_ts, ltail_ts);
3660                Some(rw)
3661            }
3662            Kind::Lookbehind => {
3663                let linner = self.get_lookbehind_inner(node_id);
3664                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3665                let elim_l = self.try_elim_lookarounds(linner)?;
3666                let elim_r = self.try_elim_lookarounds(lprev)?;
3667                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3668                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3669                let rw = self.mk_inter(lbody_ts, ltail_ts);
3670                Some(rw)
3671            }
3672            Kind::Inter => {
3673                let left = node_id.left(self);
3674                let right = node_id.right(self);
3675                let elim_l = self.try_elim_lookarounds(left)?;
3676                let elim_r = self.try_elim_lookarounds(right)?;
3677                let rw = self.mk_inter(elim_l, elim_r);
3678                Some(rw)
3679            }
3680            Kind::Counted => None,
3681        }
3682    }
3683
3684    /// R & _+ is a safe overapproximation of R that's nonempty
3685    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3686        if self.nullability(node) == Nullability::NEVER {
3687            node
3688        } else {
3689            self.mk_inter(NodeId::TOPPLUS, node)
3690        }
3691    }
3692
3693    fn iter_find_stack(
3694        &self,
3695        stack: &mut Vec<TRegexId>,
3696        mut f: impl FnMut(NodeId) -> bool,
3697    ) -> bool {
3698        loop {
3699            match stack.pop() {
3700                None => return false,
3701                Some(curr) => match self.get_tregex(curr) {
3702                    TRegex::Leaf(n) => {
3703                        let mut curr = *n;
3704                        while curr != NodeId::BOT {
3705                            match self.get_kind(curr) {
3706                                Kind::Union => {
3707                                    if f(curr.left(self)) {
3708                                        return true;
3709                                    }
3710                                    curr = curr.right(self);
3711                                }
3712                                _ => {
3713                                    if f(*n) {
3714                                        return true;
3715                                    }
3716                                    curr = NodeId::BOT;
3717                                }
3718                            }
3719                        }
3720                    }
3721                    TRegex::ITE(_, then_id, else_id) => {
3722                        if *else_id != TRegexId::BOT {
3723                            stack.push(*else_id);
3724                        }
3725                        stack.push(*then_id);
3726                    }
3727                },
3728            }
3729        }
3730    }
3731
3732    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3733        if node == NodeId::BOT {
3734            return Some(true);
3735        }
3736        if self.nullability(node) != Nullability::NEVER {
3737            return Some(false);
3738        }
3739        if let Some(cached) = self.cache_empty.get(&node) {
3740            if cached.is_checked() {
3741                return Some(cached.is_empty());
3742            }
3743        }
3744        let node = if !self.contains_look(node) {
3745            node
3746        } else {
3747            self.try_elim_lookarounds(node)?
3748        };
3749        let isempty_flag = self.is_empty_lang_internal(node);
3750
3751        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3752    }
3753
3754    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3755        // without inter, no need to check
3756        if !self.get_meta_flags(initial_node).contains_inter() {
3757            return Ok(NodeFlags::ZERO);
3758        }
3759
3760        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3761        let mut worklist: VecDeque<NodeId> = VecDeque::new();
3762        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3763        let mut stack = Vec::new();
3764        stack.push(begin_der);
3765        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3766            visited.insert(node, initial_node);
3767            let nullability = self.nullability(node);
3768            if nullability != Nullability::NEVER {
3769                true
3770            } else {
3771                worklist.push_back(node);
3772                false
3773            }
3774        });
3775        if found_nullable_right_away {
3776            return Ok(NodeFlags::ZERO);
3777        }
3778
3779        worklist.push_back(initial_node);
3780        let isempty_flag: NodeFlags;
3781        let mut found_node = NodeId::BOT;
3782
3783        loop {
3784            match worklist.pop_front() {
3785                None => {
3786                    isempty_flag = NodeFlags::IS_EMPTY;
3787                    break;
3788                }
3789                Some(outer) => {
3790                    if let Some(cached) = self.cache_empty.get(&outer) {
3791                        if cached.is_checked() {
3792                            if cached.is_empty() {
3793                                continue;
3794                            } else {
3795                                return Ok(NodeFlags::ZERO);
3796                            }
3797                        }
3798                    }
3799
3800                    let derivative = self.der(outer, Nullability::CENTER)?;
3801
3802                    stack.push(derivative);
3803
3804                    let found_null = self.iter_find_stack(&mut stack, |node| {
3805                        if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3806                            found_node = node;
3807                            if !self.get_meta_flags(node).contains_inter() {
3808                                true
3809                            } else {
3810                                e.insert(outer);
3811                                worklist.push_front(node);
3812                                self.any_nonbegin_nullable(node)
3813                            }
3814                        } else {
3815                            false
3816                        }
3817                    });
3818                    if found_null {
3819                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3820                        isempty_flag = NodeFlags::ZERO;
3821                        break;
3822                    }
3823                }
3824            }
3825        }
3826
3827        self.cache_empty.insert(
3828            initial_node,
3829            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3830        );
3831        Ok(isempty_flag)
3832    }
3833
3834    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
3835    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3836        if larger_lang == smaller_lang {
3837            return Some(true);
3838        }
3839
3840        // assess initial nullability
3841        if self
3842            .nullability(larger_lang)
3843            .not()
3844            .and(self.nullability(smaller_lang))
3845            != Nullability::NEVER
3846        {
3847            return Some(false);
3848        }
3849
3850        // check language nullability
3851        // if (B &~ A) ≡ ⊥ then B ⊆ A
3852        // this means  L(B) \ L(A) = {}
3853        // eg. (a &~ .*) = ⊥ means .* subsumes a
3854        let (smaller_lang, larger_lang) =
3855            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3856                let wrap = |b: &mut Self, n: NodeId| {
3857                    let tmp = b.mk_concat(n, NodeId::TS);
3858                    b.mk_concat(NodeId::TS, tmp)
3859                };
3860                (wrap(self, smaller_lang), wrap(self, larger_lang))
3861            } else {
3862                (smaller_lang, larger_lang)
3863            };
3864
3865        let nota = self.mk_compl(larger_lang);
3866        let diff = self.mk_inter(smaller_lang, nota);
3867        self.is_empty_lang(diff)
3868    }
3869}