Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::UnicodeClassCache;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23    AnchorLimit,
24    StateSpaceExplosion,
25    UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32            AlgebraError::StateSpaceExplosion => {
33                write!(f, "too many states, likely infinite state space")
34            }
35            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36        }
37    }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43    pub const MISSING: u32 = 0;
44    pub const BOT: u32 = 1;
45    pub const EPS: u32 = 2;
46    pub const TOP: u32 = 3;
47    pub const TOPSTAR: u32 = 4;
48    pub const TOPPLUS: u32 = 5;
49    pub const BEGIN: u32 = 6;
50    pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62    pub const MISSING: NodeId = NodeId(id::MISSING);
63    pub const BOT: NodeId = NodeId(id::BOT);
64    pub const EPS: NodeId = NodeId(id::EPS);
65    pub const TOP: NodeId = NodeId(id::TOP);
66    pub const TS: NodeId = NodeId(id::TOPSTAR);
67    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68    pub const BEGIN: NodeId = NodeId(id::BEGIN);
69    pub const END: NodeId = NodeId(id::END);
70
71    #[inline]
72    pub fn as_u32(self) -> u32 {
73        self.0
74    }
75
76    #[inline]
77    pub fn from_u32(v: u32) -> NodeId {
78        NodeId(v)
79    }
80}
81impl Debug for NodeId {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let num = &self.0;
84        f.write_str(format!("{num}").as_str())
85    }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91    pub const MISSING: TRegexId = TRegexId(id::MISSING);
92    pub const EPS: TRegexId = TRegexId(id::EPS);
93    pub const BOT: TRegexId = TRegexId(id::BOT);
94    pub const TOP: TRegexId = TRegexId(id::TOP);
95    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
102
103    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106    pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 5);
107    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110    #[inline]
111    pub(crate) fn nullability(self) -> Nullability {
112        Nullability(self.0 & Self::NULL_MASK)
113    }
114
115    #[inline]
116    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118    }
119
120    #[inline]
121    pub(crate) fn has(self, flag: MetaFlags) -> bool {
122        self.0 & flag.0 != 0
123    }
124    #[inline]
125    const fn and(self, other: MetaFlags) -> MetaFlags {
126        MetaFlags(self.0 & other.0)
127    }
128    #[inline]
129    const fn or(self, other: MetaFlags) -> MetaFlags {
130        MetaFlags(self.0 | other.0)
131    }
132
133    pub(crate) fn contains_lookaround(self) -> bool {
134        self.has(MetaFlags::CONTAINS_LOOKAROUND)
135    }
136    pub(crate) fn contains_inter(self) -> bool {
137        self.has(MetaFlags::CONTAINS_INTER)
138    }
139
140    pub(crate) fn all_contains_flags(self) -> MetaFlags {
141        self.and(
142            MetaFlags::CONTAINS_LOOKAROUND
143                .or(MetaFlags::CONTAINS_ANCHORS)
144                .or(MetaFlags::CONTAINS_INTER),
145        )
146    }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152    #[default]
153    Pred,
154    Star,
155    Begin,
156    End,
157    Concat,
158    Union,
159    Compl,
160    Lookbehind,
161    Lookahead,
162    Inter,
163    Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168    flags: MetaFlags,
169    nulls: NullsId,
170}
171
172struct MetadataBuilder {
173    num_created: u32,
174    solver: Solver,
175    nb: NullsBuilder,
176    index: FxHashMap<Metadata, MetadataId>,
177    pub array: Vec<Metadata>,
178}
179
180mod helpers {
181    pub(crate) fn incr_rel(n1: u32) -> u32 {
182        match n1.overflowing_add(1) {
183            (_, true) => u32::MAX,
184            (res, false) => res,
185        }
186    }
187}
188
189impl MetadataBuilder {
190    fn new() -> MetadataBuilder {
191        Self {
192            index: FxHashMap::default(),
193            array: vec![Metadata {
194                flags: MetaFlags::ZERO,
195                nulls: NullsId::EMPTY,
196            }],
197            solver: Solver::new(),
198            num_created: 0,
199            nb: NullsBuilder::new(),
200        }
201    }
202
203    fn init(&mut self, inst: Metadata) -> MetadataId {
204        self.num_created += 1;
205        let new_id = MetadataId(self.num_created);
206        self.index.insert(inst.clone(), new_id);
207        self.array.push(inst);
208        new_id
209    }
210
211    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212        match self.index.get(&inst) {
213            Some(&id) => id,
214            None => self.init(inst),
215        }
216    }
217
218    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219        &self.array[inst.0 as usize]
220    }
221
222    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223        setflags.all_contains_flags()
224    }
225
226    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227        let left = &self.array[body.0 as usize].flags;
228        let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229        // BOT* = EPS (empty string only), not infinite
230        let inf = if body_id == NodeId::BOT {
231            MetaFlags::ZERO
232        } else {
233            MetaFlags::INFINITE_LENGTH
234        };
235        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236    }
237
238    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239        let left = &self.array[left_id.0 as usize].flags;
240        let null = left.nullability().not().and(Nullability::ALWAYS);
241        let contains = self.get_contains(*left);
242        MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
243    }
244
245    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
246        let left = &self.array[left_id.0 as usize].flags;
247        let right = &self.array[right_id.0 as usize].flags;
248        let null = left.nullability().and(right.nullability());
249        let contains = self.get_contains(left.or(*right));
250        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
251        MetaFlags::with_nullability(null, contains.or(len))
252    }
253
254    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
255        let left = &self.array[left_id.0 as usize].flags;
256        let right = &self.array[right_id.0 as usize].flags;
257        let null = left.nullability().and(right.nullability());
258        let contains = self
259            .get_contains(left.or(*right))
260            .or(MetaFlags::CONTAINS_INTER);
261        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
262        MetaFlags::with_nullability(null, contains.or(len))
263    }
264
265    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
266        let left = &self.array[left_id.0 as usize].flags;
267        let right = &self.array[right_id.0 as usize].flags;
268        let null = left.nullability().or(right.nullability());
269        let contains = self.get_contains(left.or(*right));
270        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
271        MetaFlags::with_nullability(null, contains.or(len))
272    }
273}
274
275#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
276pub struct NodeKey {
277    pub(crate) kind: Kind,
278    pub(crate) left: NodeId,
279    pub(crate) right: NodeId,
280    pub(crate) extra: u32,
281}
282
283#[derive(Eq, Hash, PartialEq, Clone, Debug)]
284pub enum TRegex<TSet> {
285    Leaf(NodeId),
286    ITE(TSet, TRegexId, TRegexId),
287}
288
289#[derive(Eq, Hash, PartialEq, Clone, Copy)]
290pub(crate) struct NodeFlags(u8);
291impl NodeFlags {
292    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
293    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
294    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
295
296    fn is_checked(self) -> bool {
297        self.0 >= NodeFlags::IS_CHECKED.0
298    }
299    fn is_empty(self) -> bool {
300        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
301    }
302}
303
304#[derive(Eq, Hash, PartialEq, Clone, Copy)]
305pub(crate) struct BuilderFlags(u8);
306impl BuilderFlags {
307    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
308    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
309}
310
311pub struct RegexBuilder {
312    mb: MetadataBuilder,
313    temp_vec: Vec<NodeId>,
314    num_created: u32,
315    index: FxHashMap<NodeKey, NodeId>,
316    array: Vec<NodeKey>,
317    metadata: Vec<MetadataId>,
318    reversed: Vec<NodeId>,
319    cache_empty: FxHashMap<NodeId, NodeFlags>,
320    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
321    tr_array: Vec<TRegex<TSetId>>,
322    tr_der_center: Vec<TRegexId>,
323    tr_der_begin: Vec<TRegexId>,
324    flags: BuilderFlags,
325    /// maximum lookahead context distance before returning `AnchorLimit`.
326    pub lookahead_context_max: u32,
327    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
328    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
329}
330
331macro_rules! iter_inter {
332    ($compiler:ident, $start:ident, $expression:expr) => {
333        let mut curr = $start;
334        while $compiler.get_kind(curr) == Kind::Inter {
335            let left = $compiler.get_left(curr);
336            $expression(left);
337            curr = $compiler.get_right(curr);
338        }
339        $expression(curr);
340    };
341}
342
343impl NodeId {
344    fn is_missing(&self) -> bool {
345        *self == NodeId::MISSING
346    }
347    #[inline]
348    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
349        b.get_flags_contains(self)
350    }
351    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
352        if self == tail {
353            true
354        } else if self.is_kind(b, Kind::Concat) {
355            self.right(b).has_concat_tail(b, tail)
356        } else {
357            false
358        }
359    }
360    fn missing_to_eps(&self) -> NodeId {
361        if *self == NodeId::MISSING {
362            NodeId::EPS
363        } else {
364            *self
365        }
366    }
367
368    #[inline]
369    fn kind(self, b: &RegexBuilder) -> Kind {
370        b.get_kind(self)
371    }
372    #[inline]
373    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
374        b.get_kind(self) == k
375    }
376    #[inline]
377    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
378        b.nullability(self) == Nullability::NEVER
379    }
380    #[inline]
381    fn nullability(self, b: &RegexBuilder) -> Nullability {
382        b.nullability(self)
383    }
384
385    #[inline]
386    fn is_center_nullable(self, b: &RegexBuilder) -> bool {
387        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
388    }
389    #[inline]
390    pub fn left(self, b: &RegexBuilder) -> NodeId {
391        b.get_left(self)
392    }
393
394    #[inline]
395    pub fn right(self, b: &RegexBuilder) -> NodeId {
396        b.get_right(self)
397    }
398
399    #[inline]
400    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
401        b.der(self, mask)
402    }
403
404    #[inline]
405    fn extra(self, b: &RegexBuilder) -> u32 {
406        b.get_extra(self)
407    }
408
409    #[inline]
410    fn is_pred(self, b: &RegexBuilder) -> bool {
411        b.get_kind(self) == Kind::Pred
412    }
413
414    #[inline]
415    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
416        debug_assert!(self.is_pred(b));
417        TSetId(b.get_extra(self))
418    }
419    #[inline]
420    fn is_star(self, b: &RegexBuilder) -> bool {
421        if NodeId::EPS == self {
422            return false;
423        }
424        b.get_kind(self) == Kind::Star
425    }
426
427    #[inline]
428    pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
429        b.get_kind(self) == Kind::Inter
430    }
431
432    #[inline]
433    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
434        if self.is_concat(b) {
435            let r = self.right(b);
436            return r.is_star(b) && r.left(b) == self.left(b);
437        }
438        false
439    }
440
441    #[inline]
442    fn is_concat(self, b: &RegexBuilder) -> bool {
443        b.get_kind(self) == Kind::Concat
444    }
445
446    #[inline]
447    fn is_lookahead(self, b: &RegexBuilder) -> bool {
448        b.get_kind(self) == Kind::Lookahead
449    }
450
451    #[inline]
452    fn is_lookbehind(self, b: &RegexBuilder) -> bool {
453        b.get_kind(self) == Kind::Lookbehind
454    }
455
456    #[inline]
457    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
458        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
459            Some(b.get_right(self))
460        } else {
461            None
462        }
463    }
464
465    #[inline]
466    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
467        if b.get_kind(self) == Kind::Concat {
468            let left = self.left(b);
469            let right = self.right(b);
470            if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
471                return right == NodeId::END;
472            }
473        }
474        false
475    }
476
477    #[inline]
478    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
479        if NodeId::EPS == self {
480            return None;
481        }
482        if self.is_star(b) && self.left(b).is_pred(b) {
483            Some(self.left(b))
484        } else {
485            None
486        }
487    }
488
489    #[inline]
490    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
491        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
492            let middle = self.right(b);
493            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
494                return Some(middle.left(b));
495            }
496        }
497        None
498    }
499
500    #[inline]
501    pub(crate) fn iter_union(
502        self,
503        b: &mut RegexBuilder,
504        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
505    ) {
506        let mut curr: NodeId = self;
507        while curr.kind(b) == Kind::Union {
508            f(b, curr.left(b));
509            curr = curr.right(b);
510        }
511        f(b, curr);
512    }
513
514    #[inline]
515    pub(crate) fn iter_union_while(
516        self,
517        b: &mut RegexBuilder,
518        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
519    ) {
520        let mut curr: NodeId = self;
521        let mut continue_loop = true;
522        while continue_loop && curr.kind(b) == Kind::Union {
523            continue_loop = f(b, curr.left(b));
524            curr = curr.right(b);
525        }
526        if continue_loop {
527            f(b, curr);
528        }
529    }
530}
531
532impl Default for RegexBuilder {
533    fn default() -> Self {
534        Self::new()
535    }
536}
537
538impl RegexBuilder {
539    pub fn new() -> RegexBuilder {
540        let mut inst = Self {
541            mb: MetadataBuilder::new(),
542            array: Vec::new(),
543            index: FxHashMap::default(),
544            cache_empty: FxHashMap::default(),
545            tr_array: Vec::new(),
546            tr_cache: FxHashMap::default(),
547            flags: BuilderFlags::ZERO,
548            lookahead_context_max: 800,
549            num_created: 0,
550            metadata: Vec::new(),
551            reversed: Vec::new(),
552            tr_der_center: Vec::new(),
553            tr_der_begin: Vec::new(),
554            temp_vec: Vec::new(),
555            mk_binary_memo: FxHashMap::default(),
556            clean_cache: FxHashMap::default(),
557        };
558        inst.array.push(NodeKey::default());
559        inst.mk_pred(TSetId::EMPTY);
560        inst.mk_star(NodeId::BOT);
561        inst.mk_pred(TSetId::FULL);
562        inst.mk_star(NodeId::TOP);
563        let top_plus_id = inst.mk_plus(NodeId::TOP);
564        inst.mk_unset(Kind::Begin);
565        inst.mk_unset(Kind::End);
566        debug_assert!(top_plus_id == NodeId::TOPPLUS);
567
568        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
569        inst.mk_leaf(NodeId::BOT);
570        inst.mk_leaf(NodeId::EPS);
571        inst.mk_leaf(NodeId::TOP);
572        inst.mk_leaf(NodeId::TS);
573
574        inst.flags = BuilderFlags::SUBSUME;
575        inst
576    }
577
578    #[inline]
579    pub fn solver_ref(&self) -> &Solver {
580        &self.mb.solver
581    }
582
583    #[inline]
584    pub fn solver(&mut self) -> &mut Solver {
585        &mut self.mb.solver
586    }
587
588    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
589        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
590        self.tr_cache.insert(inst.clone(), new_id);
591        self.tr_array.push(inst);
592        new_id
593    }
594
595    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
596        match self.tr_cache.get(&inst) {
597            Some(&id) => id,
598            None => self.tr_init(inst),
599        }
600    }
601
602    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
603        &self.tr_array[inst.0 as usize]
604    }
605
606    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
607        let solv = self.solver();
608        !solv.is_sat_id(cond1, cond2)
609    }
610
611    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
612        let term = TRegex::<TSetId>::Leaf(node_id);
613        self.get_tregex_id(term)
614    }
615
616    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
617        let tmp_inst = TRegex::ITE(cond, _then, _else);
618        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
619            return *cached;
620        }
621        if _then == _else {
622            return _then;
623        }
624        if self.solver().is_full_id(cond) {
625            return _then;
626        }
627        if self.solver().is_empty_id(cond) {
628            return _else;
629        }
630        let _clean_then = match self.tr_array[_then.0 as usize] {
631            TRegex::Leaf(_) => _then,
632            _ => self.clean(cond, _then),
633        };
634        let notcond = self.solver().not_id(cond);
635        let _clean_else = match self.tr_array[_else.0 as usize] {
636            TRegex::Leaf(_) => _else,
637            _ => self.clean(notcond, _else),
638        };
639
640        if _clean_then == _clean_else {
641            return _clean_then;
642        }
643        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
644        match self.get_tregex(_clean_then) {
645            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
646                let _cond_then = *leftcond;
647                let new_then = *_inner_then;
648                let sand = self.solver().and_id(cond, _cond_then);
649                return self.mk_ite(sand, new_then, _clean_else);
650            }
651            _ => {}
652        }
653        // attempt right side flattening:
654        match self.get_tregex(_clean_else) {
655            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
656            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
657                let e2clone = *_e2;
658                let new_cond = self.mb.solver.or_id(cond, *_c2);
659                return self.mk_ite(new_cond, _clean_then, e2clone);
660            }
661            _ => {}
662        }
663
664        if _clean_then == TRegexId::BOT {
665            let flip_cond = self.solver().not_id(cond);
666            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
667            return cleaned_id;
668        }
669
670        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
671    }
672
673    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
674        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
675            return cached;
676        }
677        let result = match *self.get_tregex(tterm) {
678            TRegex::Leaf(_) => tterm,
679            TRegex::ITE(alpha, _then_id, _else_id) => {
680                let notalpha = self.mb.solver.not_id(alpha);
681                if self.mb.solver.unsat_id(beta, alpha) {
682                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
683                    self.clean(beta, _else_id)
684                } else if self.unsat(beta, notalpha) {
685                    // beta ⊆ alpha, so beta ∧ alpha = beta
686                    self.clean(beta, _then_id)
687                } else {
688                    let tc = self.mb.solver.and_id(beta, alpha);
689                    let ec = self.mb.solver.and_id(beta, notalpha);
690                    let new_then = self.clean(tc, _then_id);
691                    let new_else = self.clean(ec, _else_id);
692                    self.mk_ite(alpha, new_then, new_else)
693                }
694            }
695        };
696        self.clean_cache.insert((beta, tterm), result);
697        result
698    }
699
700    fn mk_unary(
701        &mut self,
702        term: TRegexId,
703        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
704    ) -> TRegexId {
705        match self.tr_array[term.0 as usize] {
706            TRegex::Leaf(node_id) => {
707                let applied = apply(self, node_id);
708                self.mk_leaf(applied)
709            }
710            TRegex::ITE(c1, _then, _else) => {
711                let _then1 = self.mk_unary(_then, apply);
712                let _else1 = self.mk_unary(_else, apply);
713                self.mk_ite(c1, _then1, _else1)
714            }
715        }
716    }
717
718    fn mk_binary_result(
719        &mut self,
720        left: TRegexId,
721        right: TRegexId,
722        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
723    ) -> Result<TRegexId, AlgebraError> {
724        match self.tr_array[left.0 as usize] {
725            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
726                TRegex::Leaf(right_node_id) => {
727                    let applied = apply(self, left_node_id, right_node_id)?;
728                    Ok(self.mk_leaf(applied))
729                }
730                TRegex::ITE(c2, _then, _else) => {
731                    let then2 = self.mk_binary_result(left, _then, apply)?;
732                    let else2 = self.mk_binary_result(left, _else, apply)?;
733                    Ok(self.mk_ite(c2, then2, else2))
734                }
735            },
736            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
737                TRegex::Leaf(_) => {
738                    let then2 = self.mk_binary_result(_then1, right, apply)?;
739                    let else2 = self.mk_binary_result(_else1, right, apply)?;
740                    Ok(self.mk_ite(c1, then2, else2))
741                }
742                TRegex::ITE(c2, _then2, _else2) => {
743                    if c1 == c2 {
744                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
745                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
746                        return Ok(self.mk_ite(c1, _then, _else));
747                    }
748                    if c1.0 > c2.0 {
749                        let _then = self.mk_binary_result(_then1, right, apply)?;
750                        let _else = self.mk_binary_result(_else1, right, apply)?;
751                        Ok(self.mk_ite(c1, _then, _else))
752                    } else {
753                        let _then = self.mk_binary_result(left, _then2, apply)?;
754                        let _else = self.mk_binary_result(left, _else2, apply)?;
755                        Ok(self.mk_ite(c2, _then, _else))
756                    }
757                }
758            },
759        }
760    }
761
762    fn mk_binary(
763        &mut self,
764        left: TRegexId,
765        right: TRegexId,
766        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
767    ) -> TRegexId {
768        self.mk_binary_memo.clear();
769        self.mk_binary_inner(left, right, apply)
770    }
771
772    fn mk_binary_inner(
773        &mut self,
774        left: TRegexId,
775        right: TRegexId,
776        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
777    ) -> TRegexId {
778        if left == right {
779            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
780        }
781        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
782            return cached;
783        }
784        let result = match self.tr_array[left.0 as usize] {
785            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
786                TRegex::Leaf(right_node_id) => {
787                    let applied = apply(self, left_node_id, right_node_id);
788                    self.mk_leaf(applied)
789                }
790                TRegex::ITE(c2, _then, _else) => {
791                    let then2 = self.mk_binary_inner(left, _then, apply);
792                    let else2 = self.mk_binary_inner(left, _else, apply);
793                    self.mk_ite(c2, then2, else2)
794                }
795            },
796            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
797                TRegex::Leaf(_) => {
798                    let then2 = self.mk_binary_inner(_then1, right, apply);
799                    let else2 = self.mk_binary_inner(_else1, right, apply);
800                    self.mk_ite(c1, then2, else2)
801                }
802                TRegex::ITE(c2, _then2, _else2) => {
803                    if c1 == c2 {
804                        let _then = self.mk_binary_inner(_then1, _then2, apply);
805                        let _else = self.mk_binary_inner(_else1, _else2, apply);
806                        self.mk_ite(c1, _then, _else)
807                    } else if c1.0 > c2.0 {
808                        let _then = self.mk_binary_inner(_then1, right, apply);
809                        let _else = self.mk_binary_inner(_else1, right, apply);
810                        self.mk_ite(c1, _then, _else)
811                    } else {
812                        let _then = self.mk_binary_inner(left, _then2, apply);
813                        let _else = self.mk_binary_inner(left, _else2, apply);
814                        self.mk_ite(c2, _then, _else)
815                    }
816                }
817            },
818        };
819        self.mk_binary_memo.insert((left, right), result);
820        result
821    }
822
823    pub fn get_nulls(
824        &mut self,
825        pending_rel: u32,
826        mask: Nullability,
827        acc: &mut BTreeSet<NullState>,
828        node_id: NodeId,
829    ) {
830        debug_assert!(node_id != NodeId::MISSING);
831        if !self.is_nullable(node_id, mask) {
832            return;
833        }
834        match self.get_kind(node_id) {
835            Kind::Pred => {}
836            Kind::End => {
837                if mask.has(Nullability::END) {
838                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
839                }
840            }
841            Kind::Begin => {
842                if mask.has(Nullability::BEGIN) {
843                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
844                }
845            }
846            Kind::Concat => {
847                let new_mask = self.nullability(node_id).and(mask);
848                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
849                if self.is_nullable(node_id.left(self), mask) {
850                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
851                }
852            }
853            Kind::Union => {
854                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
855                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
856            }
857            Kind::Inter => {
858                let new_mask = self.nullability(node_id).and(mask);
859                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
860                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
861            }
862            Kind::Star => {
863                acc.insert(NullState::new(mask, pending_rel));
864                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
865            }
866            Kind::Compl => {
867                if !self.is_nullable(node_id.left(self), mask) {
868                    acc.insert(NullState::new(mask, 0));
869                }
870            }
871            Kind::Lookbehind => {
872                let new_mask = self.nullability(node_id).and(mask);
873                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
874                if node_id.right(self) != NodeId::MISSING {
875                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
876                }
877            }
878            Kind::Lookahead => {
879                let la_inner = self.get_lookahead_inner(node_id);
880                if self.is_nullable(la_inner, mask) {
881                    let rel = self.get_lookahead_rel(node_id);
882                    if rel != u32::MAX {
883                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
884                    }
885                    // tail only contributes when body is sat
886                    let la_tail = self.get_lookahead_tail(node_id);
887                    if la_tail != NodeId::MISSING {
888                        self.get_nulls(pending_rel, mask, acc, la_tail);
889                    }
890                }
891            }
892            Kind::Counted => {
893                let packed = self.get_extra(node_id);
894                let best = packed >> 16;
895                if best > 0 {
896                    acc.insert(NullState::new(mask, pending_rel + best));
897                }
898            }
899        }
900    }
901
902    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
903        self.get_meta_flags(node_id)
904            .has(MetaFlags::CONTAINS_LOOKAROUND)
905    }
906
907    /// whether node contains `^`, `$`, `\A`, `\z` anchors.
908    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
909        self.get_meta_flags(node_id)
910            .has(MetaFlags::CONTAINS_ANCHORS)
911    }
912
913    pub fn is_infinite(&self, node_id: NodeId) -> bool {
914        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
915    }
916
917    /// returns (min_length, max_length). max = u32::MAX means unbounded.
918    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
919        if self.is_infinite(node_id) {
920            if node_id.is_inter(self) {
921                self.get_bounded_length(node_id)
922            } else {
923                (self.get_min_length_only(node_id), u32::MAX)
924            }
925        } else {
926            self.get_bounded_length(node_id)
927        }
928    }
929
930    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
931        if node_id == NodeId::EPS {
932            return (0, 0);
933        }
934        match self.get_kind(node_id) {
935            Kind::End | Kind::Begin => (0, 0),
936            Kind::Pred => (1, 1),
937            Kind::Concat => {
938                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
939                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
940                (lmin + rmin, lmax.saturating_add(rmax))
941            }
942            Kind::Union => {
943                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
944                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
945                (lmin.min(rmin), lmax.max(rmax))
946            }
947            Kind::Inter => {
948                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
949                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
950                (lmin.max(rmin), lmax.min(rmax))
951            }
952            Kind::Lookahead => {
953                let body = node_id.left(self);
954                if self.is_infinite(body) {
955                    return (0, u32::MAX);
956                }
957                let right = node_id.right(self);
958                if right.is_missing() {
959                    (0, 0)
960                } else {
961                    self.get_min_max_length(right)
962                }
963            }
964            Kind::Counted => (0, 0),
965            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
966        }
967    }
968
969    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
970        match self.get_kind(node_id) {
971            Kind::End | Kind::Begin => Some(0),
972            Kind::Pred => Some(1),
973            Kind::Concat => {
974                let l = self.get_fixed_length(node_id.left(self))?;
975                let r = self.get_fixed_length(node_id.right(self))?;
976                Some(l + r)
977            }
978            Kind::Union => {
979                let l = self.get_fixed_length(node_id.left(self))?;
980                let r = self.get_fixed_length(node_id.right(self))?;
981                if l == r {
982                    Some(l)
983                } else {
984                    None
985                }
986            }
987            Kind::Inter => {
988                let l = self.get_fixed_length(node_id.left(self))?;
989                let r = self.get_fixed_length(node_id.right(self))?;
990                if l == r {
991                    Some(l)
992                } else {
993                    None
994                }
995            }
996            Kind::Lookahead => {
997                let right = node_id.right(self);
998                if right.is_missing() {
999                    Some(0)
1000                } else {
1001                    self.get_fixed_length(right)
1002                }
1003            }
1004            Kind::Counted => Some(0),
1005            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1006        }
1007    }
1008
1009    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1010        match self.get_kind(node_id) {
1011            Kind::End | Kind::Begin => 0,
1012            Kind::Pred => 1,
1013            Kind::Concat => {
1014                self.get_min_length_only(node_id.left(self))
1015                    + self.get_min_length_only(node_id.right(self))
1016            }
1017            Kind::Union => self
1018                .get_min_length_only(node_id.left(self))
1019                .min(self.get_min_length_only(node_id.right(self))),
1020            Kind::Inter => self
1021                .get_min_length_only(node_id.left(self))
1022                .max(self.get_min_length_only(node_id.right(self))),
1023            Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1024            Kind::Compl => {
1025                if self.nullability(node_id.left(self)) == Nullability::NEVER {
1026                    0
1027                } else {
1028                    1
1029                }
1030            }
1031        }
1032    }
1033
1034    fn starts_with_ts(&self, node_id: NodeId) -> bool {
1035        if node_id == NodeId::TS {
1036            return true;
1037        }
1038        match self.get_kind(node_id) {
1039            Kind::Inter => {
1040                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1041            }
1042            Kind::Union => {
1043                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1044            }
1045            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1046            _ => false,
1047        }
1048    }
1049
1050    #[inline]
1051    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1052        if node_id.is_concat(self) {
1053            self.ends_with_ts(node_id.right(self))
1054        } else {
1055            node_id == NodeId::TS
1056        }
1057    }
1058
1059    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1060        debug_assert!(node_id != NodeId::MISSING);
1061        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1062    }
1063
1064    pub(crate) fn cache_der(
1065        &mut self,
1066        node_id: NodeId,
1067        result: TRegexId,
1068        mask: Nullability,
1069    ) -> TRegexId {
1070        if mask == Nullability::CENTER {
1071            self.tr_der_center[node_id.0 as usize] = result
1072        } else {
1073            self.tr_der_begin[node_id.0 as usize] = result
1074        };
1075        result
1076    }
1077
1078    pub(crate) fn try_cached_der(
1079        &mut self,
1080        node_id: NodeId,
1081        mask: Nullability,
1082    ) -> Option<TRegexId> {
1083        let cache = if mask == Nullability::CENTER {
1084            &mut self.tr_der_center
1085        } else {
1086            &mut self.tr_der_begin
1087        };
1088        match cache.get(node_id.0 as usize) {
1089            Some(&TRegexId::MISSING) => {}
1090            Some(&result) => {
1091                return Some(result);
1092            }
1093            None => {
1094                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1095            }
1096        }
1097        None
1098    }
1099
1100    pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1101        let mut term = self.get_tregex(der);
1102        loop {
1103            match *term {
1104                TRegex::Leaf(node_id) => return node_id,
1105                TRegex::ITE(cond, _then, _else) => {
1106                    if self.solver().is_sat_id(set, cond) {
1107                        term = self.get_tregex(_then);
1108                    } else {
1109                        term = self.get_tregex(_else);
1110                    }
1111                }
1112            }
1113        }
1114    }
1115
1116    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1117        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1118        debug_assert!(
1119            node_id != NodeId::MISSING,
1120            "attempting to derive missing node"
1121        );
1122        if let Some(result) = self.try_cached_der(node_id, mask) {
1123            return Ok(result);
1124        }
1125
1126        let result = match node_id.kind(self) {
1127            Kind::Compl => {
1128                let leftd = node_id.left(self).der(self, mask)?;
1129                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1130            }
1131            Kind::Inter => {
1132                let leftd = node_id.left(self).der(self, mask)?;
1133                let rightd = node_id.right(self).der(self, mask)?;
1134                {
1135                    let this = &mut *self;
1136                    this.mk_binary(
1137                        leftd,
1138                        rightd,
1139                        &mut (|b, left, right| b.mk_inter(left, right)),
1140                    )
1141                }
1142            }
1143            Kind::Union => {
1144                let leftd = self.der(node_id.left(self), mask)?;
1145                let rightd = self.der(node_id.right(self), mask)?;
1146                {
1147                    let this = &mut *self;
1148                    this.mk_binary(
1149                        leftd,
1150                        rightd,
1151                        &mut (|b, left, right| b.mk_union(left, right)),
1152                    )
1153                }
1154            }
1155            Kind::Concat => {
1156                let head = node_id.left(self);
1157                let tail = node_id.right(self);
1158                let tail_leaf = self.mk_leaf(tail);
1159                let head_der = self.der(head, mask)?;
1160
1161                if self.is_nullable(head, mask) {
1162                    let rightd = self.der(tail, mask)?;
1163                    let ldr = {
1164                        let this = &mut *self;
1165                        this.mk_binary(
1166                            head_der,
1167                            tail_leaf,
1168                            &mut (|b, left, right| b.mk_concat(left, right)),
1169                        )
1170                    };
1171                    {
1172                        let this = &mut *self;
1173                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1174                    }
1175                } else {
1176                    let this = &mut *self;
1177                    this.mk_binary(
1178                        head_der,
1179                        tail_leaf,
1180                        &mut (|b, left, right| b.mk_concat(left, right)),
1181                    )
1182                }
1183            }
1184            Kind::Star => {
1185                if node_id == NodeId::EPS {
1186                    TRegexId::BOT
1187                } else {
1188                    let left = node_id.left(self);
1189                    let r_decr_leaf = self.mk_leaf(node_id);
1190                    let r_der = self.der(left, mask)?;
1191                    let this = &mut *self;
1192                    this.mk_binary(
1193                        r_der,
1194                        r_decr_leaf,
1195                        &mut (|b, left, right| b.mk_concat(left, right)),
1196                    )
1197                }
1198            }
1199            Kind::Lookbehind => {
1200                let lb_prev_der = {
1201                    let lb_prev = self.get_lookbehind_prev(node_id);
1202                    if lb_prev == NodeId::MISSING {
1203                        TRegexId::MISSING
1204                    } else {
1205                        self.der(lb_prev, mask)?
1206                    }
1207                };
1208                let lb_inner = self.get_lookbehind_inner(node_id);
1209                let lb_inner_der = self.der(lb_inner, mask)?;
1210                {
1211                    let this = &mut *self;
1212                    this.mk_binary_result(
1213                        lb_inner_der,
1214                        lb_prev_der,
1215                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1216                    )?
1217                }
1218            }
1219            Kind::Lookahead => {
1220                let la_tail = self.get_lookahead_tail(node_id);
1221                let la_body = node_id.left(self);
1222                let rel = self.get_lookahead_rel(node_id);
1223
1224                if self.is_nullable(la_body, mask) {
1225                    // nullabilty is taken once, just keep the body
1226                    let right = node_id.right(self).missing_to_eps();
1227                    let rder = self.der(right, mask).clone();
1228                    return rder;
1229                }
1230
1231                if rel == u32::MAX {
1232                    let la_body_der = self.der(la_body, mask)?;
1233                    if la_tail.is_kind(self, Kind::Pred) {
1234                        let transitioned =
1235                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1236                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1237                        let concated = self.mk_concat(la_tail, new_la);
1238                        return self.der(concated, mask);
1239                    }
1240                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1241                        let left = la_tail.left(self);
1242                        let tset = left.pred_tset(self);
1243                        let transitioned = self.transition_term(la_body_der, tset);
1244                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1245                        let tail_right = la_tail.right(self);
1246                        let concated = self.mk_concat(new_la, tail_right);
1247                        let concated = self.mk_concat(left, concated);
1248                        return self.der(concated, mask);
1249                    }
1250                }
1251
1252                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1253                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1254                    let concated = self.mk_concat(la_body, nulls_mask);
1255                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1256                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1257                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1258                    let new_union = self.mk_union(concated_look, new_look);
1259                    return self.der(new_union, mask);
1260                }
1261
1262                let la_tail_der = if la_tail == NodeId::MISSING {
1263                    TRegexId::MISSING
1264                } else {
1265                    if self.is_nullable(la_tail, mask) {
1266                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1267                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1268                        let la_union = self.mk_union(la_tail, nulls_la);
1269                        self.der(la_union, mask)?
1270                    } else {
1271                        self.der(la_tail, mask)?
1272                    }
1273                };
1274
1275                let la_body_der = self.der(la_body, mask)?;
1276
1277                if rel != u32::MAX && rel > self.lookahead_context_max {
1278                    return Err(AlgebraError::AnchorLimit);
1279                }
1280
1281                let la = {
1282                    let this = &mut *self;
1283                    let rel = helpers::incr_rel(rel);
1284                    this.mk_binary(
1285                        la_body_der,
1286                        la_tail_der,
1287                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1288                    )
1289                };
1290
1291                if rel != u32::MAX
1292                    && la_tail_der != TRegexId::MISSING
1293                    && self.is_nullable(la_tail, mask)
1294                {
1295                    let look_only = {
1296                        let this = &mut *self;
1297                        let right = TRegexId::MISSING;
1298                        let rel = helpers::incr_rel(rel);
1299                        this.mk_binary(
1300                            la_body_der,
1301                            right,
1302                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1303                        )
1304                    };
1305                    {
1306                        let this = &mut *self;
1307                        this.mk_binary(
1308                            look_only,
1309                            la,
1310                            &mut (|b, left, right| b.mk_union(left, right)),
1311                        )
1312                    }
1313                } else {
1314                    la
1315                }
1316            }
1317            Kind::Counted => {
1318                let body = node_id.left(self);
1319                let chain = node_id.right(self);
1320                let packed = self.get_extra(node_id);
1321                let step = (packed & 0xFFFF) as u16;
1322                let best = (packed >> 16) as u16;
1323
1324                let mid_best = if self.is_nullable(body, mask) && step >= best {
1325                    step
1326                } else {
1327                    best
1328                };
1329
1330                let body_der = self.der(body, mask)?;
1331                let new_step = step.saturating_add(1);
1332                self.mk_unary(body_der, &mut |b, new_body| {
1333                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1334                        new_step
1335                    } else {
1336                        mid_best
1337                    };
1338                    let packed = (final_best as u32) << 16 | new_step as u32;
1339                    b.mk_counted(new_body, chain, packed)
1340                })
1341            }
1342            Kind::Begin | Kind::End => TRegexId::BOT,
1343            Kind::Pred => {
1344                let psi = node_id.pred_tset(self);
1345                if psi == TSetId::EMPTY {
1346                    TRegexId::BOT
1347                } else {
1348                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1349                }
1350            }
1351        };
1352
1353        // println!("{} {}", node_id.0, self.pp(node_id));
1354        // println!("node: {} (total: {})", node_id.0, self.num_created);
1355
1356        self.cache_der(node_id, result, mask);
1357        Ok(result)
1358    }
1359
1360    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1361        debug_assert!(meta_id != MetadataId::MISSING);
1362        match self.metadata.get_mut(node_id.0 as usize) {
1363            Some(v) => *v = meta_id,
1364            None => {
1365                self.metadata
1366                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1367                self.metadata[node_id.0 as usize] = meta_id;
1368            }
1369        }
1370    }
1371
1372    fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1373        debug_assert!(reversed_id != NodeId::MISSING);
1374        match self.reversed.get_mut(node_id.0 as usize) {
1375            Some(v) => *v = reversed_id,
1376            None => {
1377                self.reversed
1378                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1379                self.reversed[node_id.0 as usize] = reversed_id;
1380            }
1381        }
1382        return reversed_id;
1383    }
1384
1385    fn init(&mut self, inst: NodeKey) -> NodeId {
1386        self.num_created += 1;
1387        let node_id = NodeId(self.num_created);
1388        self.index.insert(inst.clone(), node_id);
1389        match inst.kind {
1390            Kind::Pred => {
1391                let meta_id = self.mb.get_meta_id(Metadata {
1392                    flags: MetaFlags::ZERO,
1393                    nulls: NullsId::EMPTY,
1394                });
1395                self.init_metadata(node_id, meta_id);
1396            }
1397            Kind::Begin => {
1398                let meta_id = self.mb.get_meta_id(Metadata {
1399                    flags: MetaFlags::with_nullability(
1400                        Nullability::BEGIN,
1401                        MetaFlags::CONTAINS_ANCHORS,
1402                    ),
1403                    nulls: NullsId::BEGIN0,
1404                });
1405                self.init_metadata(node_id, meta_id);
1406            }
1407            Kind::End => {
1408                let meta_id = self.mb.get_meta_id(Metadata {
1409                    flags: MetaFlags::with_nullability(
1410                        Nullability::END,
1411                        MetaFlags::CONTAINS_ANCHORS,
1412                    ),
1413                    nulls: NullsId::END0,
1414                });
1415                self.init_metadata(node_id, meta_id);
1416            }
1417            Kind::Inter => {
1418                let m1 = self.get_node_meta_id(inst.left);
1419                let m2 = self.get_node_meta_id(inst.right);
1420                let meta_id = {
1421                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1422                    let mask_l = inst.left.nullability(self);
1423                    let mask_r = inst.right.nullability(self);
1424                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1425                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1426                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1427                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1428                    let new_meta = Metadata {
1429                        flags: self.mb.flags_inter(m1, m2),
1430                        nulls,
1431                    };
1432                    self.mb.get_meta_id(new_meta)
1433                };
1434                self.init_metadata(node_id, meta_id);
1435            }
1436            Kind::Union => {
1437                let m1 = self.get_node_meta_id(inst.left);
1438                let m2 = self.get_node_meta_id(inst.right);
1439                let meta_id = {
1440                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1441                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1442                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1443                    let new_meta = Metadata {
1444                        flags: self.mb.flags_union(m1, m2),
1445                        nulls,
1446                    };
1447                    self.mb.get_meta_id(new_meta)
1448                };
1449                self.init_metadata(node_id, meta_id);
1450            }
1451            Kind::Concat => {
1452                let flags = self.mb.flags_concat(
1453                    self.get_node_meta_id(inst.left),
1454                    self.get_node_meta_id(inst.right),
1455                );
1456
1457                let right_nullability = inst.right.nullability(self);
1458                let left_nullability = inst.left.nullability(self);
1459                let nulls_left = self.get_nulls_id(inst.left);
1460                let nulls_right = self.get_nulls_id(inst.right);
1461                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1462                let mask = right_nullability.and(left_nullability);
1463                nulls = self.mb.nb.and_mask(nulls, mask);
1464
1465                let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1466                self.init_metadata(node_id, new_id);
1467            }
1468            Kind::Star => {
1469                let left_nulls = self.get_nulls_id(inst.left);
1470                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1471                let meta_id = self.mb.get_meta_id(Metadata {
1472                    flags: self
1473                        .mb
1474                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1475                    nulls,
1476                });
1477                self.init_metadata(node_id, meta_id);
1478            }
1479            Kind::Compl => {
1480                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1481                let meta_id = self.mb.get_meta_id(Metadata {
1482                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1483                    nulls,
1484                });
1485                self.init_metadata(node_id, meta_id);
1486            }
1487            Kind::Lookbehind => {
1488                let mut left_nullability = self.nullability(inst.left);
1489                let mut contains_flags = self.get_flags_contains(inst.left);
1490                let nulls = if inst.right.is_missing() {
1491                    self.get_nulls_id(inst.left)
1492                } else {
1493                    let right_nullability = self.nullability(inst.right);
1494                    let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1495                    let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1496                    left_nullability = left_nullability.and(right_nullability);
1497                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1498                    self.mb.nb.and_id(nulls_left, nulls_right)
1499                };
1500
1501                let meta_id = self.mb.get_meta_id(Metadata {
1502                    flags: MetaFlags::with_nullability(
1503                        left_nullability,
1504                        contains_flags
1505                            .or(MetaFlags::CONTAINS_LOOKAROUND)
1506                            .or(MetaFlags::CONTAINS_LOOKBEHIND),
1507                    ),
1508                    nulls,
1509                });
1510                self.init_metadata(node_id, meta_id);
1511            }
1512            Kind::Lookahead => {
1513                let mut nulls = self.get_nulls_id(inst.left);
1514                // lookahead nulls shifted by rel (distance to body match)
1515                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1516                let left_nullability = inst.left.nullability(self);
1517                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1518
1519                nulls = self.mb.nb.or_id(nulls, nulls_right);
1520
1521                let la_inner = inst.left;
1522                let la_tail = inst.right;
1523                let null = self
1524                    .get_meta_flags(la_inner)
1525                    .nullability()
1526                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1527                let contains_flags = self
1528                    .get_flags_contains(la_inner)
1529                    .or(self.get_flags_contains(la_tail));
1530
1531                let meta_id = self.mb.get_meta_id(Metadata {
1532                    flags: MetaFlags::with_nullability(
1533                        null,
1534                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1535                    ),
1536                    nulls,
1537                });
1538                self.init_metadata(node_id, meta_id);
1539            }
1540            Kind::Counted => {
1541                let best = inst.extra >> 16;
1542                let (null, nulls) = if best > 0 {
1543                    let mut ns = BTreeSet::new();
1544                    ns.insert(NullState::new(Nullability::CENTER, best));
1545                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1546                } else {
1547                    (Nullability::NEVER, NullsId::EMPTY)
1548                };
1549                let meta_id = self.mb.get_meta_id(Metadata {
1550                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1551                    nulls,
1552                });
1553                self.init_metadata(node_id, meta_id);
1554            }
1555        }
1556
1557        self.array.push(inst);
1558
1559        if let Some(rw) = self.post_init_simplify(node_id) {
1560            self.override_as(node_id, rw)
1561        } else {
1562            node_id
1563        }
1564    }
1565
1566    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1567        match self.get_kind(node_id) {
1568            Kind::Concat => {
1569                let lhs = node_id.left(self);
1570                let rhs = node_id.right(self);
1571                if lhs.is_pred_star(self).is_some() {
1572                    if let Some(opttail) = rhs.is_opt_v(self) {
1573                        if let Some(true) = self.subsumes(lhs, opttail) {
1574                            return Some(lhs);
1575                        }
1576                    }
1577                }
1578            }
1579            Kind::Union => {
1580                let lhs = node_id.left(self);
1581                let rhs = node_id.right(self);
1582                let mut subsumed = false;
1583                rhs.iter_union_while(self, &mut |b, branch| {
1584                    if b.nullable_subsumes(branch, lhs) {
1585                        subsumed = true;
1586                    }
1587                    !subsumed
1588                });
1589                if subsumed {
1590                    return Some(rhs);
1591                }
1592                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1593                    return Some(rhs);
1594                }
1595            }
1596            _ => {}
1597        }
1598
1599        None
1600    }
1601
1602    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1603    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1604        if self.get_kind(lhs) != Kind::Union {
1605            return false; // single branch already checked by nullable_subsumes
1606        }
1607        let mut rhs_branches = Vec::new();
1608        let mut curr = rhs;
1609        while self.get_kind(curr) == Kind::Union {
1610            rhs_branches.push(self.get_left(curr));
1611            curr = self.get_right(curr);
1612        }
1613        rhs_branches.push(curr);
1614
1615        curr = lhs;
1616        while self.get_kind(curr) == Kind::Union {
1617            if !rhs_branches.contains(&self.get_left(curr)) {
1618                return false;
1619            }
1620            curr = self.get_right(curr);
1621        }
1622        rhs_branches.contains(&curr)
1623    }
1624
1625    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1626    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1627        if node == target {
1628            return true;
1629        }
1630        match self.get_kind(node) {
1631            Kind::Union => {
1632                self.nullable_subsumes(self.get_left(node), target)
1633                    || self.nullable_subsumes(self.get_right(node), target)
1634            }
1635            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1636                self.nullable_subsumes(self.get_right(node), target)
1637            }
1638            _ => false,
1639        }
1640    }
1641
1642    pub fn num_nodes(&self) -> u32 {
1643        self.num_created
1644    }
1645    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1646        match self.index.get(&inst) {
1647            Some(&id) => id,
1648            None => self.init(inst),
1649        }
1650    }
1651    #[inline]
1652    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1653        self.index.get(inst)
1654    }
1655
1656    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1657        self.index.insert(key, subsumed);
1658        subsumed
1659    }
1660
1661    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1662        let key = &self.array[key.0 as usize];
1663        let entry = self.index.get_mut(key).unwrap();
1664        *entry = subsumed;
1665        subsumed
1666    }
1667
1668    #[inline]
1669    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1670        self.array[node_id.0 as usize].left
1671    }
1672
1673    #[inline]
1674    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1675        self.array[node_id.0 as usize].right
1676    }
1677
1678    #[inline]
1679    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1680        self.array[node_id.0 as usize].extra
1681    }
1682
1683    #[inline]
1684    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1685        debug_assert!(node_id.is_concat(self));
1686        let mut curr = node_id;
1687        while curr.is_concat(self) {
1688            curr = curr.right(self);
1689        }
1690        curr
1691    }
1692
1693    fn has_trailing_la(&self, node: NodeId) -> bool {
1694        let end = match self.get_kind(node) {
1695            Kind::Concat => self.get_concat_end(node),
1696            Kind::Lookahead => node,
1697            _ => return false,
1698        };
1699        end.is_lookahead(self) && end.right(self).is_missing()
1700    }
1701
1702    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1703        if node.is_lookahead(self) {
1704            return (NodeId::EPS, node);
1705        }
1706        debug_assert!(node.is_concat(self));
1707        let right = node.right(self);
1708        if !right.is_concat(self) {
1709            return (node.left(self), right);
1710        }
1711        let (stripped, la) = self.strip_trailing_la(right);
1712        (self.mk_concat(node.left(self), stripped), la)
1713    }
1714    #[inline]
1715    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1716        debug_assert!(matches!(
1717            self.get_kind(lookahead_node_id),
1718            Kind::Lookahead | Kind::Counted
1719        ));
1720        lookahead_node_id.left(self)
1721    }
1722    #[inline]
1723    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1724        debug_assert!(lookahead_node_id.is_lookahead(self));
1725        lookahead_node_id.right(self)
1726    }
1727    #[inline]
1728    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1729        debug_assert!(
1730            matches!(
1731                self.get_kind(lookahead_node_id),
1732                Kind::Lookahead | Kind::Counted
1733            ),
1734            "not lookahead/counted: {:?}",
1735            self.pp(lookahead_node_id)
1736        );
1737        self.get_extra(lookahead_node_id)
1738    }
1739    #[inline]
1740    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1741        debug_assert!(lookbehind_node_id.is_lookbehind(self));
1742        lookbehind_node_id.left(self)
1743    }
1744    #[inline]
1745    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1746        debug_assert!(lookbehind_node_id.is_lookbehind(self));
1747        lookbehind_node_id.right(self)
1748    }
1749
1750    #[inline]
1751    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1752        debug_assert!(
1753            self.array.len() > node_id.0 as usize,
1754            "array len: {:?}",
1755            node_id
1756        );
1757        self.array[node_id.0 as usize].kind
1758    }
1759
1760    #[inline]
1761    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1762        &self.array[node_id.0 as usize]
1763    }
1764
1765    #[inline]
1766    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1767        self.metadata[node_id.0 as usize]
1768    }
1769
1770    #[inline]
1771    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1772        debug_assert!(node_id.0 != u32::MAX);
1773        let meta_id = self.get_node_meta_id(node_id);
1774        debug_assert!(meta_id != MetadataId::MISSING);
1775        &self.mb.array[meta_id.0 as usize]
1776    }
1777
1778    #[inline]
1779    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1780        if node_id == NodeId::MISSING {
1781            NullsId::EMPTY
1782        } else {
1783            self.get_meta(node_id).nulls
1784        }
1785    }
1786
1787    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1788        self.mb
1789            .nb
1790            .array
1791            .iter()
1792            .map(|set| set.iter().cloned().collect())
1793            .collect()
1794    }
1795
1796    pub fn nulls_count(&self) -> usize {
1797        self.mb.nb.array.len()
1798    }
1799
1800    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1801        self.mb.nb.array[id as usize].iter().cloned().collect()
1802    }
1803
1804    #[inline]
1805    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1806        if node_id == NodeId::MISSING {
1807            NullsId::EMPTY
1808        } else {
1809            let nulls = self.get_meta(node_id).nulls;
1810            self.mb.nb.and_mask(nulls, mask)
1811        }
1812    }
1813
1814    #[inline]
1815    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1816        let meta_id = self.get_node_meta_id(node_id);
1817        let meta = &self.mb.array[meta_id.0 as usize];
1818        meta.flags
1819    }
1820
1821    #[inline]
1822    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1823        self.get_meta(node_id).flags.nullability()
1824    }
1825
1826    #[inline]
1827    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1828        let meta_id = self.get_node_meta_id(node_id);
1829        let meta = &self.mb.array[meta_id.0 as usize];
1830        meta.flags.all_contains_flags()
1831    }
1832
1833    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1834        if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
1835            return self.strip_lb(node_id.right(self));
1836        }
1837        self.strip_lb_inner(node_id)
1838    }
1839
1840    fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1841        if !self.contains_look(node_id) {
1842            return Ok(node_id);
1843        }
1844        if node_id.is_concat(self) && node_id.left(self).is_lookbehind(self) {
1845            let lb = node_id.left(self);
1846            let prev = self.get_lookbehind_prev(lb);
1847            let tail = self.strip_lb_inner(node_id.right(self))?;
1848            if prev != NodeId::MISSING {
1849                let stripped_prev = self.strip_lb_inner(prev)?;
1850                return Ok(self.mk_concat(stripped_prev, tail));
1851            }
1852            return Ok(tail);
1853        }
1854        if node_id.is_inter(self) {
1855            let left = self.strip_lb_inner(node_id.left(self))?;
1856            let right = self.strip_lb_inner(node_id.right(self))?;
1857            return Ok(self.mk_inter(left, right));
1858        }
1859        if self.get_kind(node_id) == Kind::Union {
1860            let left = self.strip_lb_inner(node_id.left(self))?;
1861            let right = self.strip_lb_inner(node_id.right(self))?;
1862            return Ok(self.mk_union(left, right));
1863        }
1864        match self.get_kind(node_id) {
1865            Kind::Lookbehind => {
1866                let prev = self.get_lookbehind_prev(node_id);
1867                if prev != NodeId::MISSING {
1868                    self.strip_lb_inner(prev)
1869                } else {
1870                    Ok(NodeId::EPS)
1871                }
1872            }
1873            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1874                Err(AlgebraError::UnsupportedPattern)
1875            }
1876            _ => Ok(node_id),
1877        }
1878    }
1879
1880    // for prefix purposes we prune any \A leading paths
1881    pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1882        if !self.contains_anchors(node_id) {
1883            return node_id;
1884        }
1885        match self.get_kind(node_id) {
1886            Kind::Begin => NodeId::BOT,
1887            Kind::Concat => {
1888                let left = self.nonbegins(node_id.left(self));
1889                if left == NodeId::BOT {
1890                    return NodeId::BOT;
1891                }
1892                self.mk_concat(left, node_id.right(self))
1893            }
1894            Kind::Union => {
1895                let left = self.nonbegins(node_id.left(self));
1896                let right = self.nonbegins(node_id.right(self));
1897                self.mk_union(left, right)
1898            }
1899            _ => node_id,
1900        }
1901    }
1902
1903    pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1904        match self.get_kind(node_id) {
1905            Kind::Concat => {
1906                let head = node_id.left(self);
1907                match self.get_kind(head) {
1908                    _ if self.any_nonbegin_nullable(head) => {
1909                        self.strip_prefix_safe(node_id.right(self))
1910                    }
1911                    _ => node_id,
1912                }
1913            }
1914            _ => node_id,
1915        }
1916    }
1917    pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1918        match self.get_kind(node_id) {
1919            Kind::Begin => NodeId::BOT,
1920            Kind::Concat => {
1921                let head = self.prune_begin(node_id.left(self));
1922                let tail = self.prune_begin(node_id.right(self));
1923                self.mk_concat(head, tail)
1924            }
1925            Kind::Lookbehind => {
1926                if !node_id.right(self).is_missing() {
1927                    return node_id;
1928                }
1929                let head = self.prune_begin(node_id.left(self));
1930                head
1931            }
1932            Kind::Union => {
1933                let left = self.prune_begin(node_id.left(self));
1934                let right = self.prune_begin(node_id.right(self));
1935                self.mk_union(left, right)
1936            }
1937            _ => node_id,
1938        }
1939    }
1940
1941    pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1942        if !self.contains_look(node_id) {
1943            return Ok(node_id);
1944        }
1945        let result = match self.get_kind(node_id) {
1946            Kind::Concat => {
1947                let left = self.normalize_rev(node_id.left(self))?;
1948                let right = self.normalize_rev(node_id.right(self))?;
1949                self.mk_concat(left, right)
1950            }
1951            Kind::Inter => {
1952                let left = self.normalize_rev(node_id.left(self))?;
1953                let right = self.normalize_rev(node_id.right(self))?;
1954                self.mk_inter(left, right)
1955            }
1956            Kind::Union => {
1957                let left = self.normalize_rev(node_id.left(self))?;
1958                let right = self.normalize_rev(node_id.right(self))?;
1959                self.mk_union(left, right)
1960            }
1961            Kind::Lookbehind => {
1962                let left = self.normalize_rev(node_id.left(self))?;
1963                let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
1964                let lbody_ts = self.mk_concat(NodeId::TS, left);
1965                let ltail_ts = self.mk_concat(NodeId::TS, right);
1966                let as_inter = self.mk_inter(lbody_ts, ltail_ts);
1967                as_inter
1968            }
1969            Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
1970                return Err(AlgebraError::UnsupportedPattern)
1971            }
1972            _ => node_id,
1973        };
1974        Ok(result)
1975    }
1976
1977    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1978        debug_assert!(node_id != NodeId::MISSING);
1979        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1980            if *rev != NodeId::MISSING {
1981                return Ok(*rev);
1982            }
1983        }
1984        let rw = match self.get_kind(node_id) {
1985            Kind::End => NodeId::BEGIN,
1986            Kind::Begin => NodeId::END,
1987            Kind::Pred => node_id,
1988            Kind::Concat => {
1989                let left = self.reverse(node_id.left(self))?;
1990                let right = self.reverse(node_id.right(self))?;
1991                self.mk_concat(right, left)
1992            }
1993            Kind::Union => {
1994                let left = self.reverse(node_id.left(self))?;
1995                let right = self.reverse(node_id.right(self))?;
1996                self.mk_union(left, right)
1997            }
1998            Kind::Inter => {
1999                let left = self.reverse(node_id.left(self))?;
2000                let right = self.reverse(node_id.right(self))?;
2001                self.mk_inter(left, right)
2002            }
2003            Kind::Star => {
2004                let body = self.reverse(node_id.left(self))?;
2005                self.mk_star(body)
2006            }
2007            Kind::Compl => {
2008                if self.contains_look(node_id.left(self)) {
2009                    return Err(AlgebraError::UnsupportedPattern);
2010                }
2011                let body = self.reverse(node_id.left(self))?;
2012                self.mk_compl(body)
2013            }
2014            Kind::Lookbehind => {
2015                let prev = self.get_lookbehind_prev(node_id);
2016                let inner_id = self.get_lookbehind_inner(node_id);
2017                let rev_inner = self.reverse(inner_id)?;
2018                let rev_prev = if prev != NodeId::MISSING {
2019                    self.reverse(prev)?
2020                } else {
2021                    NodeId::MISSING
2022                };
2023                self.mk_lookahead(rev_inner, rev_prev, 0)
2024            }
2025            Kind::Lookahead => {
2026                let rel = self.get_lookahead_rel(node_id);
2027                // no nullability, we can write as intersection
2028                if rel == u32::MAX {
2029                    let lbody = self.get_lookahead_inner(node_id);
2030                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2031                    let rbody = self.reverse(lbody);
2032                    let rtail = self.reverse(ltail);
2033                    let rev = self.mk_lookbehind(rbody?, rtail?);
2034                    return Ok(self.cache_reversed(node_id, rev));
2035                }
2036                if rel != 0 {
2037                    return Err(AlgebraError::UnsupportedPattern);
2038                }
2039                let tail_node = self.get_lookahead_tail(node_id);
2040                let rev_tail = if tail_node != NodeId::MISSING {
2041                    self.reverse(tail_node)?
2042                } else {
2043                    NodeId::MISSING
2044                };
2045                let inner_id = self.get_lookahead_inner(node_id);
2046                let rev_inner = self.reverse(inner_id)?;
2047                self.mk_lookbehind(rev_inner, rev_tail)
2048            }
2049            Kind::Counted => {
2050                return Err(AlgebraError::UnsupportedPattern);
2051            }
2052        };
2053        self.cache_reversed(node_id, rw);
2054        Ok(rw)
2055    }
2056
2057    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2058        let node = NodeKey {
2059            kind: Kind::Pred,
2060            left: NodeId::MISSING,
2061            right: NodeId::MISSING,
2062            extra: pred.0,
2063        };
2064        self.get_node_id(node)
2065    }
2066
2067    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2068        let key = NodeKey {
2069            kind: Kind::Compl,
2070            left: body,
2071            right: NodeId::MISSING,
2072            extra: u32::MAX,
2073        };
2074        if let Some(id) = self.key_is_created(&key) {
2075            return *id;
2076        }
2077        if body == NodeId::BOT {
2078            return NodeId::TS;
2079        }
2080        if body == NodeId::TS {
2081            return NodeId::BOT;
2082        }
2083
2084        if let Some(contains_body) = body.is_contains(self) {
2085            if contains_body.is_pred(self) {
2086                let pred = contains_body.pred_tset(self);
2087                let notpred = self.mk_pred_not(pred);
2088                let node = self.mk_star(notpred);
2089                return self.init_as(key, node);
2090            } else if contains_body == NodeId::END {
2091                return self.init_as(key, NodeId::BOT);
2092            }
2093        };
2094
2095        if self.get_kind(body) == Kind::Compl {
2096            return self.get_node(body).left;
2097        }
2098
2099        self.get_node_id(key)
2100    }
2101
2102    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2103        let nid = self.get_nulls_id(body);
2104        let nref = self.mb.nb.get_set_ref(nid).clone();
2105        let mut futures = NodeId::BOT;
2106        for n in nref.iter() {
2107            if !n.is_mask_nullable(mask) {
2108                continue;
2109            }
2110
2111            let eff = if n.rel == 0 {
2112                NodeId::EPS
2113            } else {
2114                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2115            };
2116            futures = self.mk_union(futures, eff)
2117        }
2118        futures
2119    }
2120
2121    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2122        if cfg!(feature = "norewrite") {
2123            return None;
2124        }
2125        #[cfg(debug_assertions)]
2126        if std::env::var("TRACE_CONCAT").is_ok() {
2127            eprintln!("rw2 head={} tail={}", self.pp(head), self.pp(tail));
2128        }
2129
2130        if tail.is_lookbehind(self) {
2131            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2132            return self
2133                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2134                .ok();
2135        }
2136        if head.is_lookahead(self) {
2137            let la_tail = self.get_lookahead_tail(head);
2138            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2139            if new_la_tail.is_center_nullable(self) {
2140                let la_new = self.mk_lookahead_internal(
2141                    self.get_lookahead_inner(head),
2142                    new_la_tail,
2143                    u32::MAX,
2144                );
2145                return Some(la_new);
2146            }
2147            let la_body = self.get_lookahead_inner(head);
2148            let la_rel = self.get_lookahead_rel(head);
2149            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2150                let tail_rel = self.get_lookahead_rel(new_la_tail);
2151                tail_rel + la_rel
2152            } else {
2153                u32::MAX
2154            };
2155
2156            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2157        }
2158
2159        if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2160            return Some(head);
2161        }
2162
2163        // breaks:rev
2164        // if head == NodeId::TS && tail == NodeId::END {
2165        //     return Some(head);
2166        // }
2167
2168        if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2169            return Some(NodeId::TS);
2170        }
2171
2172        if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2173            return Some(NodeId::TS);
2174        }
2175
2176        if head.is_inter(self) && tail == NodeId::TS {
2177            let mut alltopstar = true;
2178            iter_inter!(self, head, |v| {
2179                alltopstar = self.ends_with_ts(v);
2180            });
2181            if alltopstar {
2182                return Some(head);
2183            }
2184        }
2185
2186        if head.is_star(self) && head == tail {
2187            return Some(head);
2188        }
2189
2190        None
2191    }
2192
2193    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2194        use Kind::*;
2195
2196        if cfg!(feature = "norewrite") {
2197            return None;
2198        }
2199        if left == right {
2200            return Some(left);
2201        }
2202
2203        if right.is_kind(self, Kind::Union) && left == right.left(self) {
2204            return Some(right);
2205        }
2206
2207        if left.is_pred(self) && right.is_pred(self) {
2208            let l = left.pred_tset(self);
2209            let r = right.pred_tset(self);
2210            let solver = self.solver();
2211            let psi = solver.or_id(l, r);
2212            let rewrite = self.mk_pred(psi);
2213            return Some(rewrite);
2214        }
2215
2216        if left == NodeId::EPS
2217            && self.get_extra(left) == 0
2218            && self.nullability(right) == Nullability::ALWAYS
2219        {
2220            return Some(right);
2221        }
2222
2223        if left.is_lookahead(self) && right.is_lookahead(self) {
2224            let lb = left.left(self);
2225            let lt = left.right(self);
2226            let lrel = left.extra(self);
2227
2228            let rb = right.left(self);
2229            let rt = right.right(self);
2230            let rrel = right.extra(self);
2231
2232            if lrel == rrel && lt.is_missing() && rt.is_missing() {
2233                let unioned = self.mk_union(lb, rb);
2234                let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2235                return Some(node);
2236            }
2237        }
2238
2239        if right.is_kind(self, Concat) {
2240            if left == NodeId::END
2241                && right.left(self) == NodeId::END
2242                && self.nullability(right).has(Nullability::END)
2243            {
2244                return Some(right);
2245            }
2246            // .*|.*a.* => .*(a.*|)
2247            if left == right.left(self) {
2248                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2249                let rw = self.mk_concat(left, rhs);
2250                return Some(rw);
2251            }
2252            if left.is_kind(self, Concat) {
2253                let head1 = left.left(self);
2254                let head2 = right.left(self);
2255
2256                if head1 == head2 {
2257                    let t1 = left.right(self);
2258                    let t2 = right.right(self);
2259                    // opportunistic rewrites
2260                    if head1 == NodeId::TS {
2261                        if t2.has_concat_tail(self, t1) {
2262                            return Some(left);
2263                        }
2264                        if t1.has_concat_tail(self, t2) {
2265                            return Some(right);
2266                        }
2267                    }
2268                    let un = self.mk_union(t1, t2);
2269                    return Some(self.mk_concat(left.left(self), un));
2270                }
2271
2272                // xa|ya => (x|y)a - suffix factoring via reverse
2273                // TODO: valid and looks prettier but .reverse is not good for builder perf,
2274                // leaving out unless i find a case where it helps significantly
2275                if false {
2276                    let end1 = self.get_concat_end(left);
2277                    let end2 = self.get_concat_end(right);
2278                    if end1 == end2 {
2279                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2280                        if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2281                            let rev1 = self.reverse(left).unwrap();
2282                            let rev2 = self.reverse(right).unwrap();
2283
2284                            let union_rev = self.mk_union(rev1, rev2);
2285                            return Some(self.reverse(union_rev).unwrap());
2286                        }
2287                    }
2288                }
2289            }
2290            if left.is_pred(self) && left == right.left(self) {
2291                let un = self.mk_opt(right.right(self));
2292                let conc = self.mk_concat(left, un);
2293                return Some(conc);
2294            }
2295        }
2296
2297        if left == NodeId::EPS && right.is_plus(self) {
2298            let result = self.mk_star(right.left(self));
2299            return Some(result);
2300        }
2301
2302        // (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2303        if left.is_inter(self) && right.is_inter(self) {
2304            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2305                return Some(rw);
2306            }
2307        }
2308
2309        None
2310    }
2311
2312    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2313        let mut curr = node;
2314        while curr.is_inter(self) {
2315            out.push(self.get_left(curr));
2316            curr = self.get_right(curr);
2317        }
2318        out.push(curr);
2319    }
2320
2321    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2322        let mut curr = node;
2323        let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2324        if has_prefix {
2325            curr = self.get_right(curr);
2326        }
2327        let mut count = 0u32;
2328        let mut pred_set = None;
2329        while curr.is_concat(self) {
2330            let head = self.get_left(curr);
2331            if !head.is_pred(self) {
2332                return None;
2333            }
2334            let ps = head.pred_tset(self);
2335            match pred_set {
2336                None => pred_set = Some(ps),
2337                Some(existing) => {
2338                    if existing != ps {
2339                        return None;
2340                    }
2341                }
2342            }
2343            curr = self.get_right(curr);
2344            count += 1;
2345        }
2346        if count == 0 || !curr.is_star(self) {
2347            return None;
2348        }
2349        Some((has_prefix, pred_set.unwrap(), curr, count))
2350    }
2351
2352    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2353        let mut j = 0;
2354        for &s in sub {
2355            while j < sup.len() && sup[j] < s {
2356                j += 1;
2357            }
2358            if j >= sup.len() || sup[j] != s {
2359                return false;
2360            }
2361            j += 1;
2362        }
2363        true
2364    }
2365
2366    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2367        if !left.is_inter(self) || !right.is_inter(self) {
2368            return None;
2369        }
2370
2371        let mut lc: Vec<NodeId> = Vec::new();
2372        let mut rc: Vec<NodeId> = Vec::new();
2373        self.collect_inter_components(left, &mut lc);
2374        self.collect_inter_components(right, &mut rc);
2375
2376        // component subset check: fewer constraints = larger language
2377        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2378            return Some(left);
2379        }
2380        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2381        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2382            return Some(right);
2383        }
2384
2385        if lc.len() == rc.len() {
2386            let mut diff_idx = None;
2387            for i in 0..lc.len() {
2388                if lc[i] != rc[i] {
2389                    if diff_idx.is_some() {
2390                        return None;
2391                    }
2392                    diff_idx = Some(i);
2393                }
2394            }
2395            if let Some(idx) = diff_idx {
2396                let a = lc[idx];
2397                let b = rc[idx];
2398                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2399                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2400                {
2401                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2402                        return if ca < cb { Some(left) } else { Some(right) };
2403                    }
2404                }
2405            }
2406        }
2407
2408        None
2409    }
2410
2411    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2412        if cfg!(feature = "norewrite") {
2413            return None;
2414        }
2415        if left == right {
2416            return Some(left);
2417        }
2418
2419        if self.get_kind(right) == Kind::Union {
2420            let mut result = NodeId::BOT;
2421            self.iter_unions_b(
2422                right,
2423                &mut (|b, v| {
2424                    let new_inter = b.mk_inter(v, left);
2425                    result = b.mk_union(result, new_inter);
2426                }),
2427            );
2428            return Some(result);
2429        }
2430        if self.get_kind(left) == Kind::Union {
2431            let mut result = NodeId::BOT;
2432            self.iter_unions_b(
2433                left,
2434                &mut (|b, v| {
2435                    let new_inter = b.mk_inter(v, right);
2436                    result = b.mk_union(result, new_inter);
2437                }),
2438            );
2439            return Some(result);
2440        }
2441
2442        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2443            return Some(NodeId::BOT);
2444        }
2445
2446        if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2447            let bodies = self.mk_union(left.left(self), right.left(self));
2448            return Some(self.mk_compl(bodies));
2449        }
2450
2451        if left == NodeId::TOPPLUS {
2452            if right.is_pred_star(self).is_some() {
2453                let newloop = self.mk_plus(right.left(self));
2454                return Some(newloop);
2455            }
2456            if right.is_never_nullable(self) {
2457                return Some(right);
2458            }
2459            if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2460                return Some(NodeId::BOT);
2461            }
2462            if right.is_kind(self, Kind::Concat) {}
2463        }
2464
2465        {
2466            let l_is_la = left.is_lookahead(self);
2467            let r_is_la = right.is_lookahead(self);
2468            let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
2469            let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
2470            if l_is_la || r_is_la || l_is_cla || r_is_cla {
2471                let (la_node, other, concat_body) = if r_is_la {
2472                    (right, left, NodeId::MISSING)
2473                } else if l_is_la {
2474                    (left, right, NodeId::MISSING)
2475                } else if r_is_cla {
2476                    (right.left(self), left, right.right(self))
2477                } else {
2478                    (left.left(self), right, left.right(self))
2479                };
2480                let la_body = la_node.left(self);
2481                let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2482                let inter_right = if concat_body.is_missing() {
2483                    la_tail
2484                } else {
2485                    self.mk_concat(la_tail, concat_body)
2486                };
2487                let new_body = self.mk_inter(other, inter_right);
2488                let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2489                return Some(self.mk_concat(la, new_body));
2490            }
2491        }
2492
2493        if self.get_kind(right) == Kind::Compl {
2494            let compl_body = right.left(self);
2495            if left == compl_body {
2496                return Some(NodeId::BOT);
2497            }
2498            if compl_body.is_concat(self) {
2499                let compl_head = compl_body.left(self);
2500                if compl_body.right(self) == NodeId::TS && compl_head == left {
2501                    return Some(NodeId::BOT);
2502                }
2503            }
2504        }
2505
2506        if let Some(pleft) = left.is_pred_star(self) {
2507            if let Some(pright) = right.is_pred_star(self) {
2508                let merged = self.mk_inter(pleft, pright);
2509                return Some(self.mk_star(merged));
2510            }
2511        }
2512
2513        {
2514            let l_is_clb = left.is_concat(self) && left.left(self).is_lookbehind(self);
2515            let r_is_clb = right.is_concat(self) && right.left(self).is_lookbehind(self);
2516            if l_is_clb || r_is_clb {
2517                let (lb, body) = if l_is_clb && r_is_clb {
2518                    let lb1 = left.left(self);
2519                    let lb2 = right.left(self);
2520                    let inner = self.mk_inter(
2521                        self.get_lookbehind_inner(lb1),
2522                        self.get_lookbehind_inner(lb2),
2523                    );
2524                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2525                    let body = self.mk_inter(left.right(self), right.right(self));
2526                    (lb, body)
2527                } else if l_is_clb {
2528                    let lb = left.left(self);
2529                    let body = self.mk_inter(left.right(self), right);
2530                    (lb, body)
2531                } else {
2532                    let lb = right.left(self);
2533                    let body = self.mk_inter(left, right.right(self));
2534                    (lb, body)
2535                };
2536                return Some(self.mk_concat(lb, body));
2537            }
2538        }
2539
2540        {
2541            let l_has_la = self.has_trailing_la(left);
2542            let r_has_la = self.has_trailing_la(right);
2543            if l_has_la || r_has_la {
2544                let (body, la) = if l_has_la && r_has_la {
2545                    let (lbody, l_la) = self.strip_trailing_la(left);
2546                    let (rbody, r_la) = self.strip_trailing_la(right);
2547                    let inner = self.mk_inter(
2548                        self.get_lookahead_inner(l_la),
2549                        self.get_lookahead_inner(r_la),
2550                    );
2551                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2552                    let body = self.mk_inter(lbody, rbody);
2553                    (body, la)
2554                } else if l_has_la {
2555                    let (lbody, la) = self.strip_trailing_la(left);
2556                    let body = self.mk_inter(lbody, right);
2557                    (body, la)
2558                } else {
2559                    let (rbody, la) = self.strip_trailing_la(right);
2560                    let body = self.mk_inter(left, rbody);
2561                    (body, la)
2562                };
2563                return Some(self.mk_concat(body, la));
2564            }
2565        }
2566
2567        None
2568    }
2569
2570    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2571        if cfg!(feature = "norewrite") {
2572            return None;
2573        }
2574        debug_assert!(self.get_kind(right_union) == Kind::Union);
2575
2576        let mut rewritten = None;
2577        right_union.iter_union_while(
2578            self,
2579            &mut (|b, v| {
2580                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2581                    rewritten = Some((v, rw));
2582                    false
2583                } else {
2584                    true
2585                }
2586            }),
2587        );
2588
2589        if let Some(rw) = rewritten {
2590            let mut new_union = NodeId::BOT;
2591            right_union.iter_union(
2592                self,
2593                &mut (|b, v| {
2594                    if v == rw.0 {
2595                        new_union = b.mk_union(rw.1, new_union)
2596                    } else {
2597                        new_union = b.mk_union(v, new_union)
2598                    }
2599                }),
2600            );
2601            return Some(new_union);
2602        };
2603
2604        None
2605    }
2606
2607    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2608        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2609        debug_assert!(tail != NodeId::MISSING);
2610        let key = NodeKey {
2611            kind: Kind::Concat,
2612            left: head,
2613            right: tail,
2614            extra: u32::MAX,
2615        };
2616        if let Some(id) = self.key_is_created(&key) {
2617            return *id;
2618        }
2619
2620        if head == NodeId::BOT || tail == NodeId::BOT {
2621            return NodeId::BOT;
2622        }
2623        if head == NodeId::EPS {
2624            return tail;
2625        }
2626        if tail == NodeId::EPS {
2627            return head;
2628        }
2629
2630        match tail {
2631            // breaks:rev
2632            NodeId::BEGIN => {
2633                if !self.is_nullable(head, Nullability::BEGIN) {
2634                    return NodeId::BOT;
2635                } else {
2636                    return NodeId::BEGIN;
2637                }
2638            }
2639            _ => {}
2640        }
2641
2642        // normalize concats to right
2643        if head.is_kind(self, Kind::Concat) {
2644            let left = head.left(self);
2645            let newright = self.mk_concat(head.right(self), tail);
2646            let updated = self.mk_concat(left, newright);
2647            return self.init_as(key, updated);
2648        }
2649
2650        if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2651            return NodeId::BOT;
2652        }
2653
2654        if tail.is_concat(self) {
2655            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2656                let upd = self.mk_concat(rw, tail.right(self));
2657                return self.init_as(key, upd);
2658            }
2659        }
2660
2661        if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2662            return self.init_as(key, new);
2663        }
2664
2665        match (self.get_kind(head), self.get_kind(tail)) {
2666            // merge stars
2667            (Kind::Star, Kind::Concat) if head.is_star(self) => {
2668                let rl = tail.left(self);
2669                if head == rl {
2670                    return self.init_as(key, tail);
2671                }
2672            }
2673            // attempt longer concat rw
2674            (_, Kind::Concat) => {
2675                let curr = head;
2676                let h2 = self.mk_concat(curr, tail.left(self));
2677                let tr = tail.right(self);
2678                if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2679                    return self.init_as(key, new);
2680                }
2681            }
2682            _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2683                return self.init_as(key, tail);
2684            }
2685            _ => {}
2686        }
2687
2688        self.init(key)
2689    }
2690
2691    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2692        // LNF: lookbehind must start with ts
2693        let lb_body = {
2694            match self.starts_with_ts(lb_body) {
2695                true => lb_body,
2696                false => self.mk_concat(NodeId::TS, lb_body),
2697            }
2698        };
2699        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2700        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2701    }
2702
2703    fn mk_lookbehind_internal(
2704        &mut self,
2705        lb_body: NodeId,
2706        lb_prev: NodeId,
2707    ) -> Result<NodeId, AlgebraError> {
2708        debug_assert!(lb_body != NodeId::MISSING);
2709        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2710        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2711            return Ok(NodeId::BOT);
2712        }
2713        if lb_body == NodeId::TS {
2714            return Ok(lb_prev);
2715        }
2716        if lb_body == NodeId::EPS {
2717            match lb_prev {
2718                NodeId::MISSING => return Ok(NodeId::EPS),
2719                NodeId::EPS => return Ok(NodeId::EPS),
2720                _ => return Ok(lb_prev),
2721            }
2722        }
2723
2724        let key = NodeKey {
2725            kind: Kind::Lookbehind,
2726            left: lb_body,
2727            right: lb_prev,
2728            extra: u32::MAX,
2729        };
2730        match self.key_is_created(&key) {
2731            Some(id) => Ok(*id),
2732            None => {
2733                if lb_prev == NodeId::TS {
2734                    return Ok(self.mk_concat(lb_prev, lb_body));
2735                }
2736
2737                Ok(self.init(key))
2738            }
2739        }
2740    }
2741
2742    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2743        // LNF: lookahead must end with ts
2744        let la_body = {
2745            match self.ends_with_ts(la_body) {
2746                true => la_body,
2747                false => self.mk_concat(la_body, NodeId::TS),
2748            }
2749        };
2750        let rel = if NodeId::MISSING == la_tail {
2751            rel
2752        } else {
2753            match la_tail.is_center_nullable(self) {
2754                false => u32::MAX,
2755                true => rel,
2756            }
2757        };
2758
2759        self.mk_lookahead_internal(la_body, la_tail, rel)
2760    }
2761
2762    // rel max = carries no nullability, can potentially rw to intersection
2763    pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2764        let key = NodeKey {
2765            kind: Kind::Lookahead,
2766            left: la_body,
2767            right: la_tail,
2768            extra: rel,
2769        };
2770        if let Some(id) = self.key_is_created(&key) {
2771            return *id;
2772        }
2773        if la_body == NodeId::TS {
2774            if rel == 0 {
2775                return la_tail.missing_to_eps();
2776            } else {
2777                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2778            }
2779        }
2780        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2781            return NodeId::BOT;
2782        }
2783        if la_tail.is_missing() && rel == u32::MAX {
2784            return NodeId::BOT;
2785        }
2786
2787        if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2788            return la_body;
2789        }
2790
2791        if la_tail == NodeId::TS {
2792            if rel == 0 || rel == u32::MAX {
2793                return self.mk_concat(la_body, NodeId::TS);
2794            } else if rel == u32::MAX {
2795                return self.mk_begins_with(la_body);
2796            }
2797        }
2798
2799        if rel == u32::MAX {
2800            if la_tail.is_missing() {
2801                return NodeId::BOT;
2802            }
2803
2804            if self.is_always_nullable(la_body) {
2805                return la_tail.missing_to_eps();
2806            }
2807
2808            if la_tail != NodeId::MISSING {
2809                match self.get_kind(la_tail) {
2810                    _ => {
2811                        if la_body.is_compl_plus_end(self) {
2812                            let minlen = self.get_min_length_only(la_tail);
2813                            if minlen >= 1 {
2814                                return NodeId::BOT;
2815                            }
2816                        }
2817                    }
2818                }
2819            }
2820        }
2821
2822        if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
2823            let la_body2 = self.get_lookahead_inner(la_tail);
2824            let body1_ts = self.mk_concat(la_body, NodeId::TS);
2825            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2826            let new_la_body = self.mk_inter(body1_ts, body2_ts);
2827            let new_la_rel = self.get_lookahead_rel(la_tail);
2828            let new_la_tail = self.get_lookahead_tail(la_tail);
2829            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2830        }
2831
2832        if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
2833            let la_body_right = la_body.right(self);
2834            if self.is_always_nullable(la_body_right) {
2835                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2836            }
2837            if la_body.right(self) == NodeId::END {
2838                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2839            }
2840            let bodyright = la_body.right(self);
2841            if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
2842                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2843                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2844            }
2845        }
2846
2847        if la_tail != NodeId::MISSING {
2848            if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2849                let lpred = la_body.left(self);
2850                if lpred.is_pred(self) {
2851                    let l = lpred.pred_tset(self);
2852                    let r = la_tail.pred_tset(self);
2853                    let psi_and = self.solver().and_id(l, r);
2854                    let rewrite = self.mk_pred(psi_and);
2855                    let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2856                    let new_right =
2857                        self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2858                    return self.mk_concat(rewrite, new_right);
2859                }
2860            }
2861        }
2862
2863        self.get_node_id(key)
2864    }
2865
2866    pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2867        let has_match = (packed >> 16) > 0;
2868        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2869            return NodeId::BOT;
2870        }
2871        debug_assert!(
2872            body == NodeId::BOT || !self.is_infinite(body),
2873            "Counted body must have finite max length"
2874        );
2875        let chain = self.prune_counted_chain(body, chain);
2876        let key = NodeKey {
2877            kind: Kind::Counted,
2878            left: body,
2879            right: chain,
2880            extra: packed,
2881        };
2882        if let Some(id) = self.key_is_created(&key) {
2883            return *id;
2884        }
2885        self.get_node_id(key)
2886    }
2887
2888    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2889        if chain == NodeId::MISSING || body == NodeId::BOT {
2890            return chain;
2891        }
2892        if self.nullability(body) != Nullability::NEVER {
2893            return NodeId::MISSING;
2894        }
2895        let chain_body = chain.left(self);
2896        if chain_body == NodeId::BOT {
2897            return chain;
2898        }
2899        let not_begins = self.mk_not_begins_with(body);
2900        let inter = self.mk_inter(chain_body, not_begins);
2901        let is_empty = inter == NodeId::BOT;
2902        if is_empty {
2903            self.prune_counted_chain(body, chain.right(self))
2904        } else {
2905            chain
2906        }
2907    }
2908
2909    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2910        let neg_inner = self.mk_concat(body, NodeId::TS);
2911        let neg_part = self.mk_compl(neg_inner);
2912        let conc = self.mk_concat(neg_part, NodeId::END);
2913        self.mk_lookahead(conc, NodeId::MISSING, rel)
2914    }
2915
2916    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2917        match self.get_node(body).kind {
2918            Kind::Pred => {
2919                let psi = body.pred_tset(self);
2920                let negated = self.mk_pred_not(psi);
2921                let union = self.mk_union(NodeId::BEGIN, negated);
2922                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2923                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2924            }
2925            _ => {
2926                // ~body ∩ utf8_char: non-nullable (utf8_char requires ≥1 byte),
2927                // so \A | negated won't be stripped by strip_prefix_safe
2928                let uc = crate::unicode_classes::utf8_char(self);
2929                let neg = self.mk_compl(body);
2930                let negated = self.mk_inter(neg, uc);
2931                let union = self.mk_union(NodeId::BEGIN, negated);
2932                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2933                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2934            }
2935        }
2936    }
2937
2938    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2939        debug_assert!(left != NodeId::MISSING);
2940        debug_assert!(right != NodeId::MISSING);
2941        if left > right {
2942            return self.mk_union(right, left);
2943        }
2944        let key = NodeKey {
2945            kind: Kind::Union,
2946            left,
2947            right,
2948            extra: u32::MAX,
2949        };
2950        if let Some(id) = self.key_is_created(&key) {
2951            return *id;
2952        }
2953
2954        if left == right {
2955            return left;
2956        }
2957        if left == NodeId::BOT {
2958            return right;
2959        }
2960        if right == NodeId::BOT {
2961            return left;
2962        }
2963        if right == NodeId::TS {
2964            return right;
2965        }
2966        if left == NodeId::TS {
2967            return left;
2968        }
2969
2970        match (self.get_kind(left), self.get_kind(right)) {
2971            (Kind::Union, _) => {
2972                self.iter_unions_b(left, &mut |b, v| {
2973                    b.temp_vec.push(v);
2974                });
2975                self.iter_unions_b(right, &mut |b, v| {
2976                    b.temp_vec.push(v);
2977                });
2978                self.temp_vec.sort();
2979                let tree = self.temp_vec.clone();
2980                self.temp_vec.clear();
2981                let newnode = tree
2982                    .iter()
2983                    .rev()
2984                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2985                return self.init_as(key, newnode);
2986            }
2987            (_, Kind::Union) => {
2988                let rleft = right.left(self);
2989                // if left_node id is smaller than rleft, just create a new union
2990                if left > rleft {
2991                    self.iter_unions_b(left, &mut |b, v| {
2992                        b.temp_vec.push(v);
2993                    });
2994                    self.iter_unions_b(right, &mut |b, v| {
2995                        b.temp_vec.push(v);
2996                    });
2997                    self.temp_vec.sort();
2998                    let tree = self.temp_vec.clone();
2999                    self.temp_vec.clear();
3000                    let newnode = tree
3001                        .iter()
3002                        .rev()
3003                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3004                    return self.init_as(key, newnode);
3005                } else {
3006                    if let Some(rw) = self.attempt_rw_unions(left, right) {
3007                        return self.init_as(key, rw);
3008                    }
3009                }
3010            }
3011            _ => {}
3012        }
3013
3014        if let Some(rw) = self.attempt_rw_union_2(left, right) {
3015            return self.init_as(key, rw);
3016        }
3017        self.init(key)
3018    }
3019
3020    pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3021        debug_assert!(left_id != NodeId::MISSING);
3022        debug_assert!(right_id != NodeId::MISSING);
3023        if left_id == right_id {
3024            return left_id;
3025        }
3026        if left_id == NodeId::BOT || right_id == NodeId::BOT {
3027            return NodeId::BOT;
3028        }
3029        if left_id == NodeId::TS {
3030            return right_id;
3031        }
3032        if right_id == NodeId::TS {
3033            return left_id;
3034        }
3035        if left_id > right_id {
3036            return self.mk_inter(right_id, left_id);
3037        }
3038        let key = NodeKey {
3039            kind: Kind::Inter,
3040            left: left_id,
3041            right: right_id,
3042            extra: u32::MAX,
3043        };
3044        if let Some(id) = self.key_is_created(&key) {
3045            return *id;
3046        }
3047
3048        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3049            return self.init_as(key, rw);
3050        }
3051
3052        self.init(key)
3053    }
3054
3055    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3056        let node = NodeKey {
3057            kind,
3058            left: NodeId::MISSING,
3059            right: NodeId::MISSING,
3060            extra: u32::MAX,
3061        };
3062        self.init(node)
3063    }
3064
3065    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3066        let star = self.mk_star(body_id);
3067        self.mk_concat(body_id, star)
3068    }
3069    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3070        let opt = self.mk_opt(body_id);
3071        let mut nodes1 = vec![];
3072        for _ in lower..upper {
3073            nodes1.push(opt);
3074        }
3075        for _ in 0..lower {
3076            nodes1.push(body_id);
3077        }
3078        self.mk_concats(nodes1.into_iter())
3079    }
3080    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3081        self.mk_union(NodeId::EPS, body_id)
3082    }
3083
3084    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3085        let key = NodeKey {
3086            kind: Kind::Star,
3087            left: body_id,
3088            right: NodeId::MISSING,
3089            extra: 0,
3090        };
3091        if let Some(id) = self.key_is_created(&key) {
3092            return *id;
3093        }
3094        // _*{500} is still _*
3095        if body_id.is_kind(self, Kind::Star) {
3096            return body_id;
3097        }
3098        self.get_node_id(key)
3099    }
3100
3101    /// it's cheaper to check this once as an edge-case
3102    /// than to compute a 4th nullability bit for every node
3103    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3104        match self.get_kind(node_id) {
3105            Kind::End => Nullability::EMPTYSTRING,
3106            Kind::Begin => Nullability::EMPTYSTRING,
3107            Kind::Pred => Nullability::NEVER,
3108            Kind::Star => Nullability::ALWAYS,
3109            Kind::Inter | Kind::Concat => {
3110                let lnull = self.nullability_emptystring(node_id.left(self));
3111                let rnull = self.nullability_emptystring(node_id.right(self));
3112                lnull.and(rnull) // left = 010, right = 001, left & right = 000
3113            }
3114            Kind::Union => {
3115                let lnull = self.nullability_emptystring(node_id.left(self));
3116                let rnull = self.nullability_emptystring(node_id.right(self));
3117                lnull.or(rnull)
3118            }
3119            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3120            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3121            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3122            Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3123        }
3124    }
3125
3126    #[inline(always)]
3127    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3128        self.get_meta(node_id)
3129            .flags
3130            .nullability()
3131            .has(Nullability::CENTER.or(Nullability::END))
3132    }
3133
3134    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3135        self.get_only_nullability(node_id)
3136    }
3137
3138    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3139        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3140    }
3141
3142    pub fn pp(&self, node_id: NodeId) -> String {
3143        let mut s = String::new();
3144        self.ppw(&mut s, node_id).unwrap();
3145        s
3146    }
3147
3148    #[allow(dead_code)]
3149    pub fn pp_nulls(&self, node_id: NodeId) -> String {
3150        let nu = self.get_nulls_id(node_id);
3151        let nr = self.mb.nb.get_set_ref(nu);
3152        let s1 = format!("{:?}", nr);
3153        s1
3154    }
3155
3156    #[allow(dead_code)]
3157    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3158        match self.get_tregex(term_id) {
3159            TRegex::Leaf(node_id) => {
3160                let mut s = String::new();
3161                self.ppw(&mut s, *node_id).unwrap();
3162                s
3163            }
3164            TRegex::ITE(cond, then_id, else_id) => {
3165                format!(
3166                    "ITE({},{},{})",
3167                    self.solver_ref().pp(*cond),
3168                    self.ppt(*then_id),
3169                    self.ppt(*else_id)
3170                )
3171            }
3172        }
3173    }
3174
3175    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3176        if cfg!(feature = "graphviz") {
3177            match node_id {
3178                NodeId::MISSING => return write!(s, "MISSING"),
3179                NodeId::BOT => return write!(s, "⊥"),
3180                NodeId::TS => return write!(s, "_*"),
3181                NodeId::TOP => return write!(s, "_"),
3182                NodeId::EPS => return write!(s, "ε"),
3183                _ => {}
3184            }
3185        }
3186
3187        match node_id {
3188            NodeId::MISSING => return write!(s, "MISSING"),
3189            NodeId::BOT => return write!(s, "⊥"),
3190            NodeId::TS => return write!(s, "_*"),
3191            NodeId::TOP => return write!(s, "_"),
3192            NodeId::EPS => return write!(s, ""),
3193            _ => {}
3194        }
3195
3196        match self.get_kind(node_id) {
3197            Kind::End => write!(s, r"\z"),
3198            Kind::Begin => write!(s, r"\A"),
3199            Kind::Pred => {
3200                let psi = node_id.pred_tset(self);
3201                if psi == TSetId::EMPTY {
3202                    write!(s, r"⊥")
3203                } else if psi == TSetId::FULL {
3204                    write!(s, r"_")
3205                } else {
3206                    write!(s, "{}", self.solver_ref().pp(psi))
3207                }
3208            }
3209            Kind::Inter => {
3210                write!(s, "(")?;
3211                self.ppw(s, node_id.left(self))?;
3212                write!(s, "&")?;
3213                let mut curr = node_id.right(self);
3214                while curr.is_inter(self) {
3215                    let n = curr.left(self);
3216                    self.ppw(s, n)?;
3217                    write!(s, "&")?;
3218                    curr = curr.right(self);
3219                }
3220                self.ppw(s, curr)?;
3221                write!(s, ")")
3222            }
3223            Kind::Union => {
3224                let left = node_id.left(self);
3225                let right = node_id.right(self);
3226                write!(s, "(")?;
3227                self.ppw(s, left)?;
3228                write!(s, "|")?;
3229                let mut curr = right;
3230                while self.get_kind(curr) == Kind::Union {
3231                    let n = curr.left(self);
3232                    self.ppw(s, n)?;
3233                    write!(s, "|")?;
3234                    curr = curr.right(self);
3235                }
3236                self.ppw(s, curr)?;
3237                write!(s, ")")
3238            }
3239            Kind::Concat => {
3240                let left = node_id.left(self);
3241                let right = node_id.right(self);
3242                if right.is_star(self) && right.left(self) == left {
3243                    self.ppw(s, left)?;
3244                    write!(s, "+")?;
3245                    return Ok(());
3246                }
3247                if right.is_concat(self) {
3248                    let rl = right.left(self);
3249                    if rl.is_star(self) && rl.left(self) == left {
3250                        self.ppw(s, left)?;
3251                        write!(s, "+")?;
3252                        return self.ppw(s, right.right(self));
3253                    }
3254                }
3255                if right.is_concat(self) && right.left(self) == left {
3256                    let mut num = 1;
3257                    let mut right = right;
3258                    while right.is_concat(self) && right.left(self) == left {
3259                        num += 1;
3260                        right = right.right(self);
3261                    }
3262                    // (|X){n} followed by X{m} -> X{m,m+n}
3263                    if let Some(inner) = left.is_opt_v(self) {
3264                        let mut inner_count = 0;
3265                        let mut right2 = right;
3266                        while right2.is_concat(self) && right2.left(self) == inner {
3267                            inner_count += 1;
3268                            right2 = right2.right(self);
3269                        }
3270                        if right2 == inner {
3271                            inner_count += 1;
3272                            self.ppw(s, inner)?;
3273                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3274                        }
3275                        if inner_count > 0 {
3276                            self.ppw(s, inner)?;
3277                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3278                            return self.ppw(s, right2);
3279                        }
3280                    }
3281                    self.ppw(s, left)?;
3282                    if right == left {
3283                        num += 1;
3284                        return write!(s, "{{{}}}", num);
3285                    }
3286                    if num <= 3 && left.is_pred(self) {
3287                        for _ in 1..num {
3288                            self.ppw(s, left)?;
3289                        }
3290                        return self.ppw(s, right);
3291                    } else {
3292                        write!(s, "{{{}}}", num)?;
3293                        return self.ppw(s, right);
3294                    }
3295                }
3296                self.ppw(s, left)?;
3297                self.ppw(s, right)
3298            }
3299            Kind::Star => {
3300                let left = node_id.left(self);
3301                let leftkind = self.get_kind(left);
3302                match leftkind {
3303                    Kind::Concat | Kind::Star | Kind::Compl => {
3304                        write!(s, "(")?;
3305                        self.ppw(s, left)?;
3306                        write!(s, ")")?;
3307                    }
3308                    _ => {
3309                        self.ppw(s, left)?;
3310                    }
3311                };
3312                write!(s, "*")
3313            }
3314            Kind::Compl => {
3315                write!(s, "~(")?;
3316                self.ppw(s, node_id.left(self))?;
3317                write!(s, ")")
3318            }
3319            Kind::Lookbehind => {
3320                let lbleft = self.get_lookbehind_prev(node_id);
3321                let lbinner = self.get_lookbehind_inner(node_id);
3322                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3323                if lbleft != NodeId::MISSING {
3324                    write!(s, "❮")?;
3325                    self.ppw(s, lbleft)?;
3326                    write!(s, "❯")?;
3327                }
3328
3329                write!(s, "(?<=")?;
3330                self.ppw(s, lbinner)?;
3331                write!(s, ")")
3332            }
3333            Kind::Lookahead => {
3334                let inner = self.get_lookahead_inner(node_id);
3335                write!(s, "(?=")?;
3336                self.ppw(s, inner)?;
3337                write!(s, ")")?;
3338                if self.get_lookahead_rel(node_id) != 0 {
3339                    write!(s, "{{")?;
3340                    let rel = self.get_lookahead_rel(node_id);
3341                    if rel == u32::MAX {
3342                        write!(s, "∅")?;
3343                    } else {
3344                        write!(s, "{}", rel)?;
3345                    }
3346                    write!(s, "}}")?;
3347                }
3348                if node_id.right(self) == NodeId::MISSING {
3349                    Ok(())
3350                } else {
3351                    write!(s, "❮")?;
3352                    self.ppw(s, node_id.right(self))?;
3353                    write!(s, "❯")
3354                }
3355            }
3356            Kind::Counted => {
3357                let body = node_id.left(self);
3358                let packed = self.get_extra(node_id);
3359                let step = packed & 0xFFFF;
3360                let best = packed >> 16;
3361                write!(s, "#(")?;
3362                self.ppw(s, body)?;
3363                write!(s, ")s{}b{}", step, best)
3364            }
3365        }
3366    }
3367
3368    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3369        self.mk_concat(node, NodeId::TS)
3370    }
3371
3372    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3373        let node_ts = self.mk_concat(node, NodeId::TS);
3374        self.mk_compl(node_ts)
3375    }
3376
3377    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3378        let notset = self.solver().not_id(set);
3379        self.mk_pred(notset)
3380    }
3381
3382    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3383        let set_id = self.solver().u8_to_set_id(char);
3384        self.mk_pred(set_id)
3385    }
3386
3387    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3388        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3389        self.mk_pred(rangeset)
3390    }
3391
3392    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3393        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3394        for &(lo, hi) in &ranges[1..] {
3395            let r = self.mk_range_u8(lo, hi);
3396            node = self.mk_union(node, r);
3397        }
3398        node
3399    }
3400
3401    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3402        let mut prefix = Vec::new();
3403        let mut curr = node;
3404        loop {
3405            if curr == NodeId::EPS {
3406                let full = !prefix.is_empty();
3407                return (prefix, full);
3408            }
3409            if curr == NodeId::BOT {
3410                break;
3411            }
3412            if curr.is_pred(self) {
3413                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3414                    Some(byte) => {
3415                        prefix.push(byte);
3416                        return (prefix, true);
3417                    }
3418                    None => break, // multi-byte pred: pattern not fully consumed
3419                }
3420            }
3421            if !curr.is_concat(self) {
3422                break;
3423            }
3424            let left = curr.left(self);
3425            if !left.is_pred(self) {
3426                break;
3427            }
3428            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3429                Some(byte) => prefix.push(byte),
3430                None => break,
3431            }
3432            curr = curr.right(self);
3433        }
3434        (prefix, false)
3435    }
3436
3437    #[allow(dead_code)]
3438    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3439        let mut result = NodeId::EPS;
3440        for byte in raw_str.iter().rev() {
3441            let node = self.mk_u8(*byte);
3442            result = self.mk_concat(node, result);
3443        }
3444        result
3445    }
3446
3447    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3448        let mut result = NodeId::EPS;
3449        for byte in raw_str.bytes().rev() {
3450            let node = self.mk_u8(byte);
3451            result = self.mk_concat(node, result);
3452        }
3453        result
3454    }
3455
3456    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3457        let mut sorted: Vec<NodeId> = nodes.collect();
3458        if sorted.len() <= 1 {
3459            return sorted.pop().unwrap_or(NodeId::BOT);
3460        }
3461        sorted.sort();
3462        sorted.dedup();
3463        sorted.retain(|&x| x != NodeId::BOT);
3464        if sorted.is_empty() {
3465            return NodeId::BOT;
3466        }
3467        if sorted.len() > 16 {
3468            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3469            let mut non_concat: Vec<NodeId> = Vec::new();
3470            for &n in &sorted {
3471                if n.is_concat(self) {
3472                    by_head.entry(self.get_left(n)).or_default().push(n);
3473                } else {
3474                    non_concat.push(n);
3475                }
3476            }
3477            let mut absorbed: Vec<NodeId> = Vec::new();
3478            for &n in &non_concat {
3479                if by_head.contains_key(&n) {
3480                    absorbed.push(n);
3481                }
3482            }
3483            if !absorbed.is_empty() {
3484                non_concat.retain(|n| !absorbed.contains(n));
3485            }
3486            if by_head.len() < sorted.len() {
3487                let mut groups: Vec<NodeId> = non_concat;
3488                for (head, tails) in by_head {
3489                    let mut tail_nodes: Vec<NodeId> =
3490                        tails.iter().map(|&n| self.get_right(n)).collect();
3491                    if absorbed.contains(&head) {
3492                        tail_nodes.push(NodeId::EPS);
3493                    }
3494                    let tail_union = self.mk_unions(tail_nodes.into_iter());
3495                    let factored = self.mk_concat(head, tail_union);
3496                    groups.push(factored);
3497                }
3498                groups.sort();
3499                groups.dedup();
3500                return self.mk_unions_balanced(&groups);
3501            }
3502        }
3503        self.mk_unions_balanced(&sorted)
3504    }
3505
3506    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3507        match nodes.len() {
3508            0 => NodeId::BOT,
3509            1 => nodes[0],
3510            n => {
3511                let mid = n / 2;
3512                let left = self.mk_unions_balanced(&nodes[..mid]);
3513                let right = self.mk_unions_balanced(&nodes[mid..]);
3514                self.mk_union(left, right)
3515            }
3516        }
3517    }
3518
3519    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3520        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3521    }
3522
3523    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3524        nodes
3525            .rev()
3526            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3527    }
3528}
3529
3530impl RegexBuilder {
3531    #[allow(dead_code)]
3532    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3533        match self.get_tregex(term_id).clone() {
3534            TRegex::Leaf(node_id) => {
3535                if NodeId::BOT == node_id {
3536                    vec![]
3537                } else {
3538                    vec![node_id]
3539                }
3540            }
3541            TRegex::ITE(_, then_id, else_id) => {
3542                let mut then_nodes = self.extract_sat(then_id);
3543                let mut else_nodes = self.extract_sat(else_id);
3544                then_nodes.append(&mut else_nodes);
3545                then_nodes
3546            }
3547        }
3548    }
3549
3550    pub(crate) fn iter_unions_b(
3551        &mut self,
3552        curr: NodeId,
3553        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3554    ) {
3555        let mut curr = curr;
3556        while self.get_kind(curr) == Kind::Union {
3557            f(self, curr.left(self));
3558            curr = curr.right(self);
3559        }
3560        f(self, curr);
3561    }
3562
3563    pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3564        if !self.contains_look(node_id) {
3565            return Some(node_id);
3566        }
3567        match self.get_kind(node_id) {
3568            Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3569            Kind::Concat => {
3570                let left = node_id.left(self);
3571                let right = node_id.right(self);
3572                let elim_l = self.try_elim_lookarounds(left)?;
3573                let elim_r = self.try_elim_lookarounds(right)?;
3574                let rw = self.mk_concat(elim_l, elim_r);
3575                Some(rw)
3576            }
3577            Kind::Union => {
3578                let left = node_id.left(self);
3579                let right = node_id.right(self);
3580                let elim_l = self.try_elim_lookarounds(left)?;
3581                let elim_r = self.try_elim_lookarounds(right)?;
3582                let rw = self.mk_union(elim_l, elim_r);
3583                Some(rw)
3584            }
3585
3586            Kind::Star => {
3587                let body = node_id.left(self);
3588                let elim_l = self.try_elim_lookarounds(body)?;
3589                Some(self.mk_star(elim_l))
3590            }
3591            Kind::Compl => {
3592                let left = node_id.left(self);
3593                let elim_l = self.try_elim_lookarounds(left)?;
3594                Some(self.mk_compl(elim_l))
3595            }
3596            Kind::Lookahead => {
3597                let rel = self.get_lookahead_rel(node_id);
3598                if rel != 0 {
3599                    return None;
3600                }
3601                let lbody = self.get_lookahead_inner(node_id);
3602                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3603                let elim_l = self.try_elim_lookarounds(lbody)?;
3604                let elim_r = self.try_elim_lookarounds(ltail)?;
3605                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3606                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3607                let rw = self.mk_inter(lbody_ts, ltail_ts);
3608                Some(rw)
3609            }
3610            Kind::Lookbehind => {
3611                let linner = self.get_lookbehind_inner(node_id);
3612                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3613                let elim_l = self.try_elim_lookarounds(linner)?;
3614                let elim_r = self.try_elim_lookarounds(lprev)?;
3615                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3616                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3617                let rw = self.mk_inter(lbody_ts, ltail_ts);
3618                Some(rw)
3619            }
3620            Kind::Inter => {
3621                let left = node_id.left(self);
3622                let right = node_id.right(self);
3623                let elim_l = self.try_elim_lookarounds(left)?;
3624                let elim_r = self.try_elim_lookarounds(right)?;
3625                let rw = self.mk_inter(elim_l, elim_r);
3626                Some(rw)
3627            }
3628            Kind::Counted => None,
3629        }
3630    }
3631
3632    /// R & _+ is a safe overapproximation of R that's nonempty
3633    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3634        if self.nullability(node) == Nullability::NEVER {
3635            node
3636        } else {
3637            self.mk_inter(NodeId::TOPPLUS, node)
3638        }
3639    }
3640
3641    fn iter_find_stack(
3642        &self,
3643        stack: &mut Vec<TRegexId>,
3644        mut f: impl FnMut(NodeId) -> bool,
3645    ) -> bool {
3646        loop {
3647            match stack.pop() {
3648                None => return false,
3649                Some(curr) => match self.get_tregex(curr) {
3650                    TRegex::Leaf(n) => {
3651                        let mut curr = *n;
3652                        while curr != NodeId::BOT {
3653                            match self.get_kind(curr) {
3654                                Kind::Union => {
3655                                    if f(curr.left(self)) {
3656                                        return true;
3657                                    }
3658                                    curr = curr.right(self);
3659                                }
3660                                _ => {
3661                                    if f(*n) {
3662                                        return true;
3663                                    }
3664                                    curr = NodeId::BOT;
3665                                }
3666                            }
3667                        }
3668                    }
3669                    TRegex::ITE(_, then_id, else_id) => {
3670                        if *else_id != TRegexId::BOT {
3671                            stack.push(*else_id);
3672                        }
3673                        stack.push(*then_id);
3674                    }
3675                },
3676            }
3677        }
3678    }
3679
3680    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3681        if node == NodeId::BOT {
3682            return Some(true);
3683        }
3684        if self.nullability(node) != Nullability::NEVER {
3685            return Some(false);
3686        }
3687        if let Some(cached) = self.cache_empty.get(&node) {
3688            if cached.is_checked() {
3689                return Some(cached.is_empty());
3690            }
3691        }
3692        let node = if !self.contains_look(node) {
3693            node
3694        } else {
3695            self.try_elim_lookarounds(node)?
3696        };
3697        let isempty_flag = self.is_empty_lang_internal(node);
3698
3699        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3700    }
3701
3702    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3703        // without inter, no need to check
3704        if !self.get_meta_flags(initial_node).contains_inter() {
3705            return Ok(NodeFlags::ZERO);
3706        }
3707
3708        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3709        let mut worklist: VecDeque<NodeId> = VecDeque::new();
3710        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3711        let mut stack = Vec::new();
3712        stack.push(begin_der);
3713        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3714            visited.insert(node, initial_node);
3715            let nullability = self.nullability(node);
3716            if nullability != Nullability::NEVER {
3717                true
3718            } else {
3719                worklist.push_back(node);
3720                false
3721            }
3722        });
3723        if found_nullable_right_away {
3724            return Ok(NodeFlags::ZERO);
3725        }
3726
3727        worklist.push_back(initial_node);
3728        let isempty_flag: NodeFlags;
3729        let mut found_node = NodeId::BOT;
3730
3731        loop {
3732            match worklist.pop_front() {
3733                None => {
3734                    isempty_flag = NodeFlags::IS_EMPTY;
3735                    break;
3736                }
3737                Some(outer) => {
3738                    if let Some(cached) = self.cache_empty.get(&outer) {
3739                        if cached.is_checked() {
3740                            if cached.is_empty() {
3741                                continue;
3742                            } else {
3743                                return Ok(NodeFlags::ZERO);
3744                            }
3745                        }
3746                    }
3747
3748                    let derivative = self.der(outer, Nullability::CENTER)?;
3749
3750                    stack.push(derivative);
3751
3752                    let found_null = self.iter_find_stack(&mut stack, |node| {
3753                        if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3754                            found_node = node;
3755                            if !self.get_meta_flags(node).contains_inter() {
3756                                true
3757                            } else {
3758                                e.insert(outer);
3759                                worklist.push_front(node);
3760                                self.any_nonbegin_nullable(node)
3761                            }
3762                        } else {
3763                            false
3764                        }
3765                    });
3766                    if found_null {
3767                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3768                        isempty_flag = NodeFlags::ZERO;
3769                        break;
3770                    }
3771                }
3772            }
3773        }
3774
3775        self.cache_empty.insert(
3776            initial_node,
3777            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3778        );
3779        Ok(isempty_flag)
3780    }
3781
3782    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
3783    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3784        if larger_lang == smaller_lang {
3785            return Some(true);
3786        }
3787
3788        // assess initial nullability
3789        if self
3790            .nullability(larger_lang)
3791            .not()
3792            .and(self.nullability(smaller_lang))
3793            != Nullability::NEVER
3794        {
3795            return Some(false);
3796        }
3797
3798        // check language nullability
3799        // if (B &~ A) ≡ ⊥ then B ⊆ A
3800        // this means  L(B) \ L(A) = {}
3801        // eg. (a &~ .*) = ⊥ means .* subsumes a
3802        let (smaller_lang, larger_lang) =
3803            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3804                let wrap = |b: &mut Self, n: NodeId| {
3805                    let tmp = b.mk_concat(n, NodeId::TS);
3806                    b.mk_concat(NodeId::TS, tmp)
3807                };
3808                (wrap(self, smaller_lang), wrap(self, larger_lang))
3809            } else {
3810                (smaller_lang, larger_lang)
3811            };
3812
3813        let nota = self.mk_compl(larger_lang);
3814        let diff = self.mk_inter(smaller_lang, nota);
3815        self.is_empty_lang(diff)
3816    }
3817}