Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::{neg_class, utf8_char, UnicodeClassCache};
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23    AnchorLimit,
24    StateSpaceExplosion,
25    UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32            AlgebraError::StateSpaceExplosion => {
33                write!(f, "too many states, likely infinite state space")
34            }
35            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36        }
37    }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43    pub const MISSING: u32 = 0;
44    pub const BOT: u32 = 1;
45    pub const EPS: u32 = 2;
46    pub const TOP: u32 = 3;
47    pub const TOPSTAR: u32 = 4;
48    pub const TOPPLUS: u32 = 5;
49    pub const BEGIN: u32 = 6;
50    pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62    pub const MISSING: NodeId = NodeId(id::MISSING);
63    pub const BOT: NodeId = NodeId(id::BOT);
64    pub const EPS: NodeId = NodeId(id::EPS);
65    pub const TOP: NodeId = NodeId(id::TOP);
66    pub const TS: NodeId = NodeId(id::TOPSTAR);
67    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68    pub const BEGIN: NodeId = NodeId(id::BEGIN);
69    pub const END: NodeId = NodeId(id::END);
70
71    #[inline]
72    pub fn as_u32(self) -> u32 {
73        self.0
74    }
75
76    #[inline]
77    pub fn from_u32(v: u32) -> NodeId {
78        NodeId(v)
79    }
80}
81impl Debug for NodeId {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let num = &self.0;
84        f.write_str(format!("{num}").as_str())
85    }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91    pub const MISSING: TRegexId = TRegexId(id::MISSING);
92    pub const EPS: TRegexId = TRegexId(id::EPS);
93    pub const BOT: TRegexId = TRegexId(id::BOT);
94    pub const TOP: TRegexId = TRegexId(id::TOP);
95    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
102
103    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106    pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 5);
107    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110    #[inline]
111    pub(crate) fn nullability(self) -> Nullability {
112        Nullability(self.0 & Self::NULL_MASK)
113    }
114
115    #[inline]
116    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118    }
119
120    #[inline]
121    pub(crate) fn has(self, flag: MetaFlags) -> bool {
122        self.0 & flag.0 != 0
123    }
124    #[inline]
125    const fn and(self, other: MetaFlags) -> MetaFlags {
126        MetaFlags(self.0 & other.0)
127    }
128    #[inline]
129    const fn or(self, other: MetaFlags) -> MetaFlags {
130        MetaFlags(self.0 | other.0)
131    }
132
133    pub(crate) fn contains_inter(self) -> bool {
134        self.has(MetaFlags::CONTAINS_INTER)
135    }
136
137    pub(crate) fn all_contains_flags(self) -> MetaFlags {
138        self.and(
139            MetaFlags::CONTAINS_LOOKAROUND
140                .or(MetaFlags::CONTAINS_ANCHORS)
141                .or(MetaFlags::CONTAINS_INTER)
142                .or(MetaFlags::CONTAINS_LOOKBEHIND),
143        )
144    }
145}
146
147#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
148#[repr(u8)]
149pub enum Kind {
150    #[default]
151    Pred,
152    Star,
153    Begin,
154    End,
155    Concat,
156    Union,
157    Compl,
158    Lookbehind,
159    Lookahead,
160    Inter,
161    Counted,
162}
163
164#[derive(Eq, Hash, PartialEq, Clone)]
165struct Metadata {
166    flags: MetaFlags,
167    nulls: NullsId,
168}
169
170struct MetadataBuilder {
171    num_created: u32,
172    solver: Solver,
173    nb: NullsBuilder,
174    index: FxHashMap<Metadata, MetadataId>,
175    pub array: Vec<Metadata>,
176}
177
178mod helpers {
179    pub(crate) fn incr_rel(n1: u32) -> u32 {
180        match n1.overflowing_add(1) {
181            (_, true) => u32::MAX,
182            (res, false) => res,
183        }
184    }
185}
186
187impl MetadataBuilder {
188    fn new() -> MetadataBuilder {
189        Self {
190            index: FxHashMap::default(),
191            array: vec![Metadata {
192                flags: MetaFlags::ZERO,
193                nulls: NullsId::EMPTY,
194            }],
195            solver: Solver::new(),
196            num_created: 0,
197            nb: NullsBuilder::new(),
198        }
199    }
200
201    fn init(&mut self, inst: Metadata) -> MetadataId {
202        self.num_created += 1;
203        let new_id = MetadataId(self.num_created);
204        self.index.insert(inst.clone(), new_id);
205        self.array.push(inst);
206        new_id
207    }
208
209    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
210        match self.index.get(&inst) {
211            Some(&id) => id,
212            None => self.init(inst),
213        }
214    }
215
216    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
217        &self.array[inst.0 as usize]
218    }
219
220    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
221        setflags.all_contains_flags()
222    }
223
224    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
225        let left = &self.array[body.0 as usize].flags;
226        let contains = self.get_contains(*left);
227        // BOT* = EPS (empty string only), not infinite
228        let inf = if body_id == NodeId::BOT {
229            MetaFlags::ZERO
230        } else {
231            MetaFlags::INFINITE_LENGTH
232        };
233        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
234    }
235
236    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
237        let left = &self.array[left_id.0 as usize].flags;
238        let null = left.nullability().not().and(Nullability::ALWAYS);
239        let contains = self.get_contains(*left);
240        MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
241    }
242
243    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
244        let left = &self.array[left_id.0 as usize].flags;
245        let right = &self.array[right_id.0 as usize].flags;
246        let null = left.nullability().and(right.nullability());
247        let contains = self.get_contains(left.or(*right));
248        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
249        MetaFlags::with_nullability(null, contains.or(len))
250    }
251
252    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
253        let left = &self.array[left_id.0 as usize].flags;
254        let right = &self.array[right_id.0 as usize].flags;
255        let null = left.nullability().and(right.nullability());
256        let contains = self
257            .get_contains(left.or(*right))
258            .or(MetaFlags::CONTAINS_INTER);
259        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
260        MetaFlags::with_nullability(null, contains.or(len))
261    }
262
263    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
264        let left = &self.array[left_id.0 as usize].flags;
265        let right = &self.array[right_id.0 as usize].flags;
266        let null = left.nullability().or(right.nullability());
267        let contains = self.get_contains(left.or(*right));
268        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
269        MetaFlags::with_nullability(null, contains.or(len))
270    }
271}
272
273#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
274pub struct NodeKey {
275    pub(crate) kind: Kind,
276    pub(crate) left: NodeId,
277    pub(crate) right: NodeId,
278    pub(crate) extra: u32,
279}
280
281#[derive(Eq, Hash, PartialEq, Clone, Debug)]
282pub enum TRegex<TSet> {
283    Leaf(NodeId),
284    ITE(TSet, TRegexId, TRegexId),
285}
286
287#[derive(Eq, Hash, PartialEq, Clone, Copy)]
288pub(crate) struct NodeFlags(u8);
289impl NodeFlags {
290    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
291    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
292    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
293
294    fn is_checked(self) -> bool {
295        self.0 >= NodeFlags::IS_CHECKED.0
296    }
297    fn is_empty(self) -> bool {
298        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
299    }
300}
301
302#[derive(Eq, Hash, PartialEq, Clone, Copy)]
303pub(crate) struct BuilderFlags(u8);
304impl BuilderFlags {
305    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
306    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
307}
308
309pub struct RegexBuilder {
310    mb: MetadataBuilder,
311    temp_vec: Vec<NodeId>,
312    num_created: u32,
313    index: FxHashMap<NodeKey, NodeId>,
314    array: Vec<NodeKey>,
315    metadata: Vec<MetadataId>,
316    reversed: Vec<NodeId>,
317    cache_empty: FxHashMap<NodeId, NodeFlags>,
318    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
319    tr_array: Vec<TRegex<TSetId>>,
320    tr_der_center: Vec<TRegexId>,
321    tr_der_begin: Vec<TRegexId>,
322    flags: BuilderFlags,
323    /// maximum lookahead context distance before returning `AnchorLimit`.
324    pub lookahead_context_max: u32,
325    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
326    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
327}
328
329impl NodeId {
330    fn is_missing(&self) -> bool {
331        *self == NodeId::MISSING
332    }
333    #[inline]
334    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
335        b.get_flags_contains(self)
336    }
337    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
338        if self == tail {
339            true
340        } else if self.is_kind(b, Kind::Concat) {
341            self.right(b).has_concat_tail(b, tail)
342        } else {
343            false
344        }
345    }
346    fn missing_to_eps(&self) -> NodeId {
347        if *self == NodeId::MISSING {
348            NodeId::EPS
349        } else {
350            *self
351        }
352    }
353
354    #[inline]
355    fn kind(self, b: &RegexBuilder) -> Kind {
356        b.get_kind(self)
357    }
358    #[inline]
359    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
360        b.get_kind(self) == k
361    }
362    #[inline]
363    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
364        b.nullability(self) == Nullability::NEVER
365    }
366    #[inline]
367    pub fn nullability(self, b: &RegexBuilder) -> Nullability {
368        b.nullability(self)
369    }
370
371    #[inline]
372    pub fn is_center_nullable(self, b: &RegexBuilder) -> bool {
373        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
374    }
375    #[inline]
376    pub fn left(self, b: &RegexBuilder) -> NodeId {
377        b.get_left(self)
378    }
379
380    #[inline]
381    pub fn right(self, b: &RegexBuilder) -> NodeId {
382        b.get_right(self)
383    }
384
385    #[inline]
386    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
387        b.der(self, mask)
388    }
389
390    #[inline]
391    fn extra(self, b: &RegexBuilder) -> u32 {
392        b.get_extra(self)
393    }
394
395    #[inline]
396    fn is_pred(self, b: &RegexBuilder) -> bool {
397        b.get_kind(self) == Kind::Pred
398    }
399
400    #[inline]
401    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
402        debug_assert!(self.is_pred(b));
403        TSetId(b.get_extra(self))
404    }
405    #[inline]
406    fn is_star(self, b: &RegexBuilder) -> bool {
407        if NodeId::EPS == self {
408            return false;
409        }
410        b.get_kind(self) == Kind::Star
411    }
412
413    pub fn contains_lookbehind(self, b: &RegexBuilder) -> bool {
414        b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKBEHIND)
415    }
416
417    pub fn contains_lookaround(self, b: &RegexBuilder) -> bool {
418        b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKAROUND)
419    }
420
421    #[inline]
422    pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
423        b.get_kind(self) == Kind::Inter
424    }
425
426    #[inline]
427    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
428        if self.is_concat(b) {
429            let r = self.right(b);
430            return r.is_star(b) && r.left(b) == self.left(b);
431        }
432        false
433    }
434
435    #[inline]
436    fn is_union(self, b: &RegexBuilder) -> bool {
437        b.get_kind(self) == Kind::Union
438    }
439
440    #[inline]
441    fn is_concat(self, b: &RegexBuilder) -> bool {
442        b.get_kind(self) == Kind::Concat
443    }
444
445    #[inline]
446    fn is_lookahead(self, b: &RegexBuilder) -> bool {
447        b.get_kind(self) == Kind::Lookahead
448    }
449
450    #[inline]
451    fn is_lookbehind(self, b: &RegexBuilder) -> bool {
452        b.get_kind(self) == Kind::Lookbehind
453    }
454
455    #[inline]
456    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
457        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
458            Some(b.get_right(self))
459        } else {
460            None
461        }
462    }
463
464    #[inline]
465    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
466        if b.get_kind(self) == Kind::Concat {
467            let left = self.left(b);
468            let right = self.right(b);
469            if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
470                return right == NodeId::END;
471            }
472        }
473        false
474    }
475
476    #[inline]
477    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
478        if NodeId::EPS == self {
479            return None;
480        }
481        if self.is_star(b) && self.left(b).is_pred(b) {
482            Some(self.left(b))
483        } else {
484            None
485        }
486    }
487
488    #[inline]
489    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
490        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
491            let middle = self.right(b);
492            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
493                return Some(middle.left(b));
494            }
495        }
496        None
497    }
498
499    #[inline]
500    pub(crate) fn iter_union(
501        self,
502        b: &mut RegexBuilder,
503        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
504    ) {
505        let mut curr: NodeId = self;
506        while curr.kind(b) == Kind::Union {
507            f(b, curr.left(b));
508            curr = curr.right(b);
509        }
510        f(b, curr);
511    }
512
513    #[inline]
514    pub(crate) fn iter_inter(
515        self,
516        b: &mut RegexBuilder,
517        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
518    ) {
519        let mut curr: NodeId = self;
520        while curr.kind(b) == Kind::Inter {
521            f(b, curr.left(b));
522            curr = curr.right(b);
523        }
524        f(b, curr);
525    }
526
527    #[inline]
528    pub(crate) fn iter_union_while(
529        self,
530        b: &mut RegexBuilder,
531        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
532    ) {
533        let mut curr: NodeId = self;
534        let mut continue_loop = true;
535        while continue_loop && curr.kind(b) == Kind::Union {
536            continue_loop = f(b, curr.left(b));
537            curr = curr.right(b);
538        }
539        if continue_loop {
540            f(b, curr);
541        }
542    }
543
544    #[inline]
545    pub(crate) fn any_inter_component(
546        self,
547        b: &RegexBuilder,
548        mut f: impl FnMut(NodeId) -> bool,
549    ) -> bool {
550        debug_assert!(self.kind(b) == Kind::Inter);
551        let mut cur = self;
552        while cur.kind(b) == Kind::Inter {
553            if f(cur.left(b)) {
554                return true;
555            }
556            cur = cur.right(b);
557        }
558        f(cur)
559    }
560
561    #[inline]
562    pub(crate) fn any_union_component(
563        self,
564        b: &RegexBuilder,
565        mut f: impl FnMut(NodeId) -> bool,
566    ) -> bool {
567        let mut cur = self;
568        while cur.kind(b) == Kind::Union {
569            if f(cur.left(b)) {
570                return true;
571            }
572            cur = cur.right(b);
573        }
574        f(cur)
575    }
576}
577
578impl Default for RegexBuilder {
579    fn default() -> Self {
580        Self::new()
581    }
582}
583
584impl RegexBuilder {
585    pub fn new() -> RegexBuilder {
586        let mut inst = Self {
587            mb: MetadataBuilder::new(),
588            array: Vec::new(),
589            index: FxHashMap::default(),
590            cache_empty: FxHashMap::default(),
591            tr_array: Vec::new(),
592            tr_cache: FxHashMap::default(),
593            flags: BuilderFlags::ZERO,
594            lookahead_context_max: 800,
595            num_created: 0,
596            metadata: Vec::new(),
597            reversed: Vec::new(),
598            tr_der_center: Vec::new(),
599            tr_der_begin: Vec::new(),
600            temp_vec: Vec::new(),
601            mk_binary_memo: FxHashMap::default(),
602            clean_cache: FxHashMap::default(),
603        };
604        inst.array.push(NodeKey::default());
605        inst.mk_pred(TSetId::EMPTY);
606        inst.mk_star(NodeId::BOT);
607        inst.mk_pred(TSetId::FULL);
608        inst.mk_star(NodeId::TOP);
609        let top_plus_id = inst.mk_plus(NodeId::TOP);
610        inst.mk_unset(Kind::Begin);
611        inst.mk_unset(Kind::End);
612        debug_assert!(top_plus_id == NodeId::TOPPLUS);
613
614        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
615        inst.mk_leaf(NodeId::BOT);
616        inst.mk_leaf(NodeId::EPS);
617        inst.mk_leaf(NodeId::TOP);
618        inst.mk_leaf(NodeId::TS);
619
620        inst.flags = BuilderFlags::SUBSUME;
621        inst
622    }
623
624    #[inline]
625    pub fn solver_ref(&self) -> &Solver {
626        &self.mb.solver
627    }
628
629    #[inline]
630    pub fn solver(&mut self) -> &mut Solver {
631        &mut self.mb.solver
632    }
633
634    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
635        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
636        self.tr_cache.insert(inst.clone(), new_id);
637        self.tr_array.push(inst);
638        new_id
639    }
640
641    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
642        match self.tr_cache.get(&inst) {
643            Some(&id) => id,
644            None => self.tr_init(inst),
645        }
646    }
647
648    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
649        &self.tr_array[inst.0 as usize]
650    }
651
652    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
653        let solv = self.solver();
654        !solv.is_sat_id(cond1, cond2)
655    }
656
657    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
658        let term = TRegex::<TSetId>::Leaf(node_id);
659        self.get_tregex_id(term)
660    }
661
662    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
663        let tmp_inst = TRegex::ITE(cond, _then, _else);
664        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
665            return *cached;
666        }
667        if _then == _else {
668            return _then;
669        }
670        if self.solver().is_full_id(cond) {
671            return _then;
672        }
673        if self.solver().is_empty_id(cond) {
674            return _else;
675        }
676        let _clean_then = match self.tr_array[_then.0 as usize] {
677            TRegex::Leaf(_) => _then,
678            _ => self.clean(cond, _then),
679        };
680        let notcond = self.solver().not_id(cond);
681        let _clean_else = match self.tr_array[_else.0 as usize] {
682            TRegex::Leaf(_) => _else,
683            _ => self.clean(notcond, _else),
684        };
685
686        if _clean_then == _clean_else {
687            return _clean_then;
688        }
689        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
690        match self.get_tregex(_clean_then) {
691            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
692                let _cond_then = *leftcond;
693                let new_then = *_inner_then;
694                let sand = self.solver().and_id(cond, _cond_then);
695                return self.mk_ite(sand, new_then, _clean_else);
696            }
697            _ => {}
698        }
699        // attempt right side flattening:
700        match self.get_tregex(_clean_else) {
701            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
702            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
703                let e2clone = *_e2;
704                let new_cond = self.mb.solver.or_id(cond, *_c2);
705                return self.mk_ite(new_cond, _clean_then, e2clone);
706            }
707            _ => {}
708        }
709
710        if _clean_then == TRegexId::BOT {
711            let flip_cond = self.solver().not_id(cond);
712            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
713            return cleaned_id;
714        }
715
716        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
717    }
718
719    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
720        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
721            return cached;
722        }
723        let result = match *self.get_tregex(tterm) {
724            TRegex::Leaf(_) => tterm,
725            TRegex::ITE(alpha, _then_id, _else_id) => {
726                let notalpha = self.mb.solver.not_id(alpha);
727                if self.mb.solver.unsat_id(beta, alpha) {
728                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
729                    self.clean(beta, _else_id)
730                } else if self.unsat(beta, notalpha) {
731                    // beta ⊆ alpha, so beta ∧ alpha = beta
732                    self.clean(beta, _then_id)
733                } else {
734                    let tc = self.mb.solver.and_id(beta, alpha);
735                    let ec = self.mb.solver.and_id(beta, notalpha);
736                    let new_then = self.clean(tc, _then_id);
737                    let new_else = self.clean(ec, _else_id);
738                    self.mk_ite(alpha, new_then, new_else)
739                }
740            }
741        };
742        self.clean_cache.insert((beta, tterm), result);
743        result
744    }
745
746    fn mk_unary(
747        &mut self,
748        term: TRegexId,
749        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
750    ) -> TRegexId {
751        match self.tr_array[term.0 as usize] {
752            TRegex::Leaf(node_id) => {
753                let applied = apply(self, node_id);
754                self.mk_leaf(applied)
755            }
756            TRegex::ITE(c1, _then, _else) => {
757                let _then1 = self.mk_unary(_then, apply);
758                let _else1 = self.mk_unary(_else, apply);
759                self.mk_ite(c1, _then1, _else1)
760            }
761        }
762    }
763
764    fn mk_binary_result(
765        &mut self,
766        left: TRegexId,
767        right: TRegexId,
768        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
769    ) -> Result<TRegexId, AlgebraError> {
770        match self.tr_array[left.0 as usize] {
771            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
772                TRegex::Leaf(right_node_id) => {
773                    let applied = apply(self, left_node_id, right_node_id)?;
774                    Ok(self.mk_leaf(applied))
775                }
776                TRegex::ITE(c2, _then, _else) => {
777                    let then2 = self.mk_binary_result(left, _then, apply)?;
778                    let else2 = self.mk_binary_result(left, _else, apply)?;
779                    Ok(self.mk_ite(c2, then2, else2))
780                }
781            },
782            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
783                TRegex::Leaf(_) => {
784                    let then2 = self.mk_binary_result(_then1, right, apply)?;
785                    let else2 = self.mk_binary_result(_else1, right, apply)?;
786                    Ok(self.mk_ite(c1, then2, else2))
787                }
788                TRegex::ITE(c2, _then2, _else2) => {
789                    if c1 == c2 {
790                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
791                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
792                        return Ok(self.mk_ite(c1, _then, _else));
793                    }
794                    if c1.0 > c2.0 {
795                        let _then = self.mk_binary_result(_then1, right, apply)?;
796                        let _else = self.mk_binary_result(_else1, right, apply)?;
797                        Ok(self.mk_ite(c1, _then, _else))
798                    } else {
799                        let _then = self.mk_binary_result(left, _then2, apply)?;
800                        let _else = self.mk_binary_result(left, _else2, apply)?;
801                        Ok(self.mk_ite(c2, _then, _else))
802                    }
803                }
804            },
805        }
806    }
807
808    fn mk_binary(
809        &mut self,
810        left: TRegexId,
811        right: TRegexId,
812        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
813    ) -> TRegexId {
814        self.mk_binary_memo.clear();
815        self.mk_binary_inner(left, right, apply)
816    }
817
818    fn mk_binary_inner(
819        &mut self,
820        left: TRegexId,
821        right: TRegexId,
822        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
823    ) -> TRegexId {
824        if left == right {
825            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
826        }
827        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
828            return cached;
829        }
830        let result = match self.tr_array[left.0 as usize] {
831            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
832                TRegex::Leaf(right_node_id) => {
833                    let applied = apply(self, left_node_id, right_node_id);
834                    self.mk_leaf(applied)
835                }
836                TRegex::ITE(c2, _then, _else) => {
837                    let then2 = self.mk_binary_inner(left, _then, apply);
838                    let else2 = self.mk_binary_inner(left, _else, apply);
839                    self.mk_ite(c2, then2, else2)
840                }
841            },
842            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
843                TRegex::Leaf(_) => {
844                    let then2 = self.mk_binary_inner(_then1, right, apply);
845                    let else2 = self.mk_binary_inner(_else1, right, apply);
846                    self.mk_ite(c1, then2, else2)
847                }
848                TRegex::ITE(c2, _then2, _else2) => {
849                    if c1 == c2 {
850                        let _then = self.mk_binary_inner(_then1, _then2, apply);
851                        let _else = self.mk_binary_inner(_else1, _else2, apply);
852                        self.mk_ite(c1, _then, _else)
853                    } else if c1.0 > c2.0 {
854                        let _then = self.mk_binary_inner(_then1, right, apply);
855                        let _else = self.mk_binary_inner(_else1, right, apply);
856                        self.mk_ite(c1, _then, _else)
857                    } else {
858                        let _then = self.mk_binary_inner(left, _then2, apply);
859                        let _else = self.mk_binary_inner(left, _else2, apply);
860                        self.mk_ite(c2, _then, _else)
861                    }
862                }
863            },
864        };
865        self.mk_binary_memo.insert((left, right), result);
866        result
867    }
868
869    pub fn get_nulls(
870        &mut self,
871        pending_rel: u32,
872        mask: Nullability,
873        acc: &mut BTreeSet<NullState>,
874        node_id: NodeId,
875    ) {
876        debug_assert!(node_id != NodeId::MISSING);
877        if !self.is_nullable(node_id, mask) {
878            return;
879        }
880        match self.get_kind(node_id) {
881            Kind::Pred => {}
882            Kind::End => {
883                if mask.has(Nullability::END) {
884                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
885                }
886            }
887            Kind::Begin => {
888                if mask.has(Nullability::BEGIN) {
889                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
890                }
891            }
892            Kind::Concat => {
893                let new_mask = self.nullability(node_id).and(mask);
894                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
895                if self.is_nullable(node_id.left(self), mask) {
896                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
897                }
898            }
899            Kind::Union => {
900                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
901                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
902            }
903            Kind::Inter => {
904                let new_mask = self.nullability(node_id).and(mask);
905                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
906                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
907            }
908            Kind::Star => {
909                acc.insert(NullState::new(mask, pending_rel));
910                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
911            }
912            Kind::Compl => {
913                if !self.is_nullable(node_id.left(self), mask) {
914                    acc.insert(NullState::new(mask, 0));
915                }
916            }
917            Kind::Lookbehind => {
918                let new_mask = self.nullability(node_id).and(mask);
919                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
920                if node_id.right(self) != NodeId::MISSING {
921                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
922                }
923            }
924            Kind::Lookahead => {
925                let la_inner = self.get_lookahead_inner(node_id);
926                if self.is_nullable(la_inner, mask) {
927                    let rel = self.get_lookahead_rel(node_id);
928                    if rel != u32::MAX {
929                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
930                    }
931                    // tail only contributes when body is sat
932                    let la_tail = self.get_lookahead_tail(node_id);
933                    if la_tail != NodeId::MISSING {
934                        self.get_nulls(pending_rel, mask, acc, la_tail);
935                    }
936                }
937            }
938            Kind::Counted => {
939                let packed = self.get_extra(node_id);
940                let best = packed >> 16;
941                if best > 0 {
942                    acc.insert(NullState::new(mask, pending_rel + best));
943                }
944            }
945        }
946    }
947
948    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
949        self.get_meta_flags(node_id)
950            .has(MetaFlags::CONTAINS_LOOKAROUND)
951    }
952
953    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
954        self.get_meta_flags(node_id)
955            .has(MetaFlags::CONTAINS_ANCHORS)
956    }
957
958    pub fn is_infinite(&self, node_id: NodeId) -> bool {
959        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
960    }
961
962    /// returns (min_length, max_length). max = u32::MAX means unbounded.
963    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
964        if self.is_infinite(node_id) {
965            if node_id.is_inter(self) {
966                self.get_bounded_length(node_id)
967            } else {
968                (self.get_min_length_only(node_id), u32::MAX)
969            }
970        } else {
971            self.get_bounded_length(node_id)
972        }
973    }
974
975    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
976        if node_id == NodeId::EPS {
977            return (0, 0);
978        }
979        match self.get_kind(node_id) {
980            Kind::End | Kind::Begin => (0, 0),
981            Kind::Pred => (1, 1),
982            Kind::Concat => {
983                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
984                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
985                (lmin + rmin, lmax.saturating_add(rmax))
986            }
987            Kind::Union => {
988                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
989                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
990                (lmin.min(rmin), lmax.max(rmax))
991            }
992            Kind::Inter => {
993                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
994                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
995                (lmin.max(rmin), lmax.min(rmax))
996            }
997            Kind::Lookahead => {
998                let body = node_id.left(self);
999                if self.is_infinite(body) {
1000                    return (0, u32::MAX);
1001                }
1002                let right = node_id.right(self);
1003                if right.is_missing() {
1004                    (0, 0)
1005                } else {
1006                    self.get_min_max_length(right)
1007                }
1008            }
1009            Kind::Counted => (0, 0),
1010            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
1011        }
1012    }
1013
1014    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
1015        match self.get_kind(node_id) {
1016            Kind::End | Kind::Begin => Some(0),
1017            Kind::Pred => Some(1),
1018            Kind::Concat => {
1019                let l = self.get_fixed_length(node_id.left(self))?;
1020                let r = self.get_fixed_length(node_id.right(self))?;
1021                Some(l + r)
1022            }
1023            Kind::Union => {
1024                let l = self.get_fixed_length(node_id.left(self))?;
1025                let r = self.get_fixed_length(node_id.right(self))?;
1026                if l == r {
1027                    Some(l)
1028                } else {
1029                    None
1030                }
1031            }
1032            Kind::Inter => {
1033                let l = self.get_fixed_length(node_id.left(self))?;
1034                let r = self.get_fixed_length(node_id.right(self))?;
1035                if l == r {
1036                    Some(l)
1037                } else {
1038                    None
1039                }
1040            }
1041            Kind::Lookahead => {
1042                let right = node_id.right(self);
1043                if right.is_missing() {
1044                    Some(0)
1045                } else {
1046                    self.get_fixed_length(right)
1047                }
1048            }
1049            Kind::Counted => Some(0),
1050            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1051        }
1052    }
1053
1054    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1055        match self.get_kind(node_id) {
1056            Kind::End | Kind::Begin => 0,
1057            Kind::Pred => 1,
1058            Kind::Concat => {
1059                self.get_min_length_only(node_id.left(self))
1060                    + self.get_min_length_only(node_id.right(self))
1061            }
1062            Kind::Union => self
1063                .get_min_length_only(node_id.left(self))
1064                .min(self.get_min_length_only(node_id.right(self))),
1065            Kind::Inter => self
1066                .get_min_length_only(node_id.left(self))
1067                .max(self.get_min_length_only(node_id.right(self))),
1068            Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1069            Kind::Compl => {
1070                if self.nullability(node_id.left(self)) == Nullability::NEVER {
1071                    0
1072                } else {
1073                    1
1074                }
1075            }
1076        }
1077    }
1078
1079    fn starts_with_ts(&self, node_id: NodeId) -> bool {
1080        if node_id == NodeId::TS {
1081            return true;
1082        }
1083        match self.get_kind(node_id) {
1084            Kind::Inter => {
1085                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1086            }
1087            Kind::Union => {
1088                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1089            }
1090            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1091            _ => false,
1092        }
1093    }
1094
1095    #[inline]
1096    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1097        if node_id.is_concat(self) {
1098            self.ends_with_ts(node_id.right(self))
1099        } else {
1100            node_id == NodeId::TS
1101        }
1102    }
1103
1104    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1105        debug_assert!(node_id != NodeId::MISSING);
1106        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1107    }
1108
1109    pub(crate) fn cache_der(
1110        &mut self,
1111        node_id: NodeId,
1112        result: TRegexId,
1113        mask: Nullability,
1114    ) -> TRegexId {
1115        if mask == Nullability::CENTER {
1116            self.tr_der_center[node_id.0 as usize] = result
1117        } else {
1118            self.tr_der_begin[node_id.0 as usize] = result
1119        };
1120        result
1121    }
1122
1123    pub(crate) fn try_cached_der(
1124        &mut self,
1125        node_id: NodeId,
1126        mask: Nullability,
1127    ) -> Option<TRegexId> {
1128        let cache = if mask == Nullability::CENTER {
1129            &mut self.tr_der_center
1130        } else {
1131            &mut self.tr_der_begin
1132        };
1133        match cache.get(node_id.0 as usize) {
1134            Some(&TRegexId::MISSING) => {}
1135            Some(&result) => {
1136                return Some(result);
1137            }
1138            None => {
1139                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1140            }
1141        }
1142        None
1143    }
1144
1145    pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1146        let mut term = self.get_tregex(der);
1147        loop {
1148            match *term {
1149                TRegex::Leaf(node_id) => return node_id,
1150                TRegex::ITE(cond, _then, _else) => {
1151                    if self.solver().is_sat_id(set, cond) {
1152                        term = self.get_tregex(_then);
1153                    } else {
1154                        term = self.get_tregex(_else);
1155                    }
1156                }
1157            }
1158        }
1159    }
1160
1161    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1162        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1163        debug_assert!(
1164            node_id != NodeId::MISSING,
1165            "attempting to derive missing node"
1166        );
1167        if let Some(result) = self.try_cached_der(node_id, mask) {
1168            return Ok(result);
1169        }
1170
1171        let result = match node_id.kind(self) {
1172            Kind::Compl => {
1173                let leftd = node_id.left(self).der(self, mask)?;
1174                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1175            }
1176            Kind::Inter => {
1177                let leftd = node_id.left(self).der(self, mask)?;
1178                let rightd = node_id.right(self).der(self, mask)?;
1179                {
1180                    let this = &mut *self;
1181                    this.mk_binary(
1182                        leftd,
1183                        rightd,
1184                        &mut (|b, left, right| b.mk_inter(left, right)),
1185                    )
1186                }
1187            }
1188            Kind::Union => {
1189                let leftd = self.der(node_id.left(self), mask)?;
1190                let rightd = self.der(node_id.right(self), mask)?;
1191                {
1192                    let this = &mut *self;
1193                    this.mk_binary(
1194                        leftd,
1195                        rightd,
1196                        &mut (|b, left, right| b.mk_union(left, right)),
1197                    )
1198                }
1199            }
1200            Kind::Concat => {
1201                let head = node_id.left(self);
1202                let tail = node_id.right(self);
1203                let tail_leaf = self.mk_leaf(tail);
1204                let head_der = self.der(head, mask)?;
1205
1206                if self.is_nullable(head, mask) {
1207                    let rightd = self.der(tail, mask)?;
1208                    let ldr = {
1209                        let this = &mut *self;
1210                        this.mk_binary(
1211                            head_der,
1212                            tail_leaf,
1213                            &mut (|b, left, right| b.mk_concat(left, right)),
1214                        )
1215                    };
1216                    {
1217                        let this = &mut *self;
1218                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1219                    }
1220                } else {
1221                    let this = &mut *self;
1222                    this.mk_binary(
1223                        head_der,
1224                        tail_leaf,
1225                        &mut (|b, left, right| b.mk_concat(left, right)),
1226                    )
1227                }
1228            }
1229            Kind::Star => {
1230                if node_id == NodeId::EPS {
1231                    TRegexId::BOT
1232                } else {
1233                    let left = node_id.left(self);
1234                    let r_decr_leaf = self.mk_leaf(node_id);
1235                    let r_der = self.der(left, mask)?;
1236                    let this = &mut *self;
1237                    this.mk_binary(
1238                        r_der,
1239                        r_decr_leaf,
1240                        &mut (|b, left, right| b.mk_concat(left, right)),
1241                    )
1242                }
1243            }
1244            Kind::Lookbehind => {
1245                let lb_prev_der = {
1246                    let lb_prev = self.get_lookbehind_prev(node_id);
1247                    if lb_prev == NodeId::MISSING {
1248                        TRegexId::MISSING
1249                    } else {
1250                        self.der(lb_prev, mask)?
1251                    }
1252                };
1253                let lb_inner = self.get_lookbehind_inner(node_id);
1254                let lb_inner_der = self.der(lb_inner, mask)?;
1255                {
1256                    let this = &mut *self;
1257                    this.mk_binary_result(
1258                        lb_inner_der,
1259                        lb_prev_der,
1260                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1261                    )?
1262                }
1263            }
1264            Kind::Lookahead => {
1265                let la_tail = self.get_lookahead_tail(node_id);
1266                let la_body = node_id.left(self);
1267                let rel = self.get_lookahead_rel(node_id);
1268
1269                if self.is_nullable(la_body, mask) {
1270                    // nullabilty is taken once, just keep the body
1271                    let right = node_id.right(self).missing_to_eps();
1272                    let rder = self.der(right, mask).clone();
1273                    return rder;
1274                }
1275
1276                if rel == u32::MAX {
1277                    let la_body_der = self.der(la_body, mask)?;
1278                    if la_tail.is_kind(self, Kind::Pred) {
1279                        let transitioned =
1280                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1281                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1282                        let concated = self.mk_concat(la_tail, new_la);
1283                        return self.der(concated, mask);
1284                    }
1285                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1286                        let left = la_tail.left(self);
1287                        let tset = left.pred_tset(self);
1288                        let transitioned = self.transition_term(la_body_der, tset);
1289                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1290                        let tail_right = la_tail.right(self);
1291                        let concated = self.mk_concat(new_la, tail_right);
1292                        let concated = self.mk_concat(left, concated);
1293                        return self.der(concated, mask);
1294                    }
1295                }
1296
1297                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1298                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1299                    let concated = self.mk_concat(la_body, nulls_mask);
1300                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1301                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1302                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1303                    let new_union = self.mk_union(concated_look, new_look);
1304                    return self.der(new_union, mask);
1305                }
1306
1307                let la_tail_der = if la_tail == NodeId::MISSING {
1308                    TRegexId::MISSING
1309                } else {
1310                    if self.is_nullable(la_tail, mask) {
1311                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1312                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1313                        let la_union = self.mk_union(la_tail, nulls_la);
1314                        self.der(la_union, mask)?
1315                    } else {
1316                        self.der(la_tail, mask)?
1317                    }
1318                };
1319
1320                let la_body_der = self.der(la_body, mask)?;
1321
1322                if rel != u32::MAX && rel > self.lookahead_context_max {
1323                    return Err(AlgebraError::AnchorLimit);
1324                }
1325
1326                let la = {
1327                    let this = &mut *self;
1328                    let rel = helpers::incr_rel(rel);
1329                    this.mk_binary(
1330                        la_body_der,
1331                        la_tail_der,
1332                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1333                    )
1334                };
1335
1336                if rel != u32::MAX
1337                    && la_tail_der != TRegexId::MISSING
1338                    && self.is_nullable(la_tail, mask)
1339                {
1340                    let look_only = {
1341                        let this = &mut *self;
1342                        let right = TRegexId::MISSING;
1343                        let rel = helpers::incr_rel(rel);
1344                        this.mk_binary(
1345                            la_body_der,
1346                            right,
1347                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1348                        )
1349                    };
1350                    {
1351                        let this = &mut *self;
1352                        this.mk_binary(
1353                            look_only,
1354                            la,
1355                            &mut (|b, left, right| b.mk_union(left, right)),
1356                        )
1357                    }
1358                } else {
1359                    la
1360                }
1361            }
1362            Kind::Counted => {
1363                let body = node_id.left(self);
1364                let chain = node_id.right(self);
1365                let packed = self.get_extra(node_id);
1366                let step = (packed & 0xFFFF) as u16;
1367                let best = (packed >> 16) as u16;
1368
1369                let mid_best = if self.is_nullable(body, mask) && step >= best {
1370                    step
1371                } else {
1372                    best
1373                };
1374
1375                let body_der = self.der(body, mask)?;
1376                let new_step = step.saturating_add(1);
1377                self.mk_unary(body_der, &mut |b, new_body| {
1378                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1379                        new_step
1380                    } else {
1381                        mid_best
1382                    };
1383                    let packed = (final_best as u32) << 16 | new_step as u32;
1384                    b.mk_counted(new_body, chain, packed)
1385                })
1386            }
1387            Kind::Begin | Kind::End => TRegexId::BOT,
1388            Kind::Pred => {
1389                let psi = node_id.pred_tset(self);
1390                if psi == TSetId::EMPTY {
1391                    TRegexId::BOT
1392                } else {
1393                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1394                }
1395            }
1396        };
1397
1398        // println!("{} {}", node_id.0, self.pp(node_id));
1399        // println!("node: {} (total: {})", node_id.0, self.num_created);
1400
1401        self.cache_der(node_id, result, mask);
1402        Ok(result)
1403    }
1404
1405    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1406        debug_assert!(meta_id != MetadataId::MISSING);
1407        match self.metadata.get_mut(node_id.0 as usize) {
1408            Some(v) => *v = meta_id,
1409            None => {
1410                self.metadata
1411                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1412                self.metadata[node_id.0 as usize] = meta_id;
1413            }
1414        }
1415    }
1416
1417    fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1418        debug_assert!(reversed_id != NodeId::MISSING);
1419        match self.reversed.get_mut(node_id.0 as usize) {
1420            Some(v) => *v = reversed_id,
1421            None => {
1422                self.reversed
1423                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1424                self.reversed[node_id.0 as usize] = reversed_id;
1425            }
1426        }
1427        return reversed_id;
1428    }
1429
1430    fn init(&mut self, inst: NodeKey) -> NodeId {
1431        self.num_created += 1;
1432        let node_id = NodeId(self.num_created);
1433        self.index.insert(inst.clone(), node_id);
1434        match inst.kind {
1435            Kind::Pred => {
1436                let meta_id = self.mb.get_meta_id(Metadata {
1437                    flags: MetaFlags::ZERO,
1438                    nulls: NullsId::EMPTY,
1439                });
1440                self.init_metadata(node_id, meta_id);
1441            }
1442            Kind::Begin => {
1443                let meta_id = self.mb.get_meta_id(Metadata {
1444                    flags: MetaFlags::with_nullability(
1445                        Nullability::BEGIN,
1446                        MetaFlags::CONTAINS_ANCHORS,
1447                    ),
1448                    nulls: NullsId::BEGIN0,
1449                });
1450                self.init_metadata(node_id, meta_id);
1451            }
1452            Kind::End => {
1453                let meta_id = self.mb.get_meta_id(Metadata {
1454                    flags: MetaFlags::with_nullability(
1455                        Nullability::END,
1456                        MetaFlags::CONTAINS_ANCHORS,
1457                    ),
1458                    nulls: NullsId::END0,
1459                });
1460                self.init_metadata(node_id, meta_id);
1461            }
1462            Kind::Inter => {
1463                let m1 = self.get_node_meta_id(inst.left);
1464                let m2 = self.get_node_meta_id(inst.right);
1465                let meta_id = {
1466                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1467                    let mask_l = inst.left.nullability(self);
1468                    let mask_r = inst.right.nullability(self);
1469                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1470                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1471                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1472                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1473                    let new_meta = Metadata {
1474                        flags: self.mb.flags_inter(m1, m2),
1475                        nulls,
1476                    };
1477                    self.mb.get_meta_id(new_meta)
1478                };
1479                self.init_metadata(node_id, meta_id);
1480            }
1481            Kind::Union => {
1482                let m1 = self.get_node_meta_id(inst.left);
1483                let m2 = self.get_node_meta_id(inst.right);
1484                let meta_id = {
1485                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1486                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1487                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1488                    let new_meta = Metadata {
1489                        flags: self.mb.flags_union(m1, m2),
1490                        nulls,
1491                    };
1492                    self.mb.get_meta_id(new_meta)
1493                };
1494                self.init_metadata(node_id, meta_id);
1495            }
1496            Kind::Concat => {
1497                let flags = self.mb.flags_concat(
1498                    self.get_node_meta_id(inst.left),
1499                    self.get_node_meta_id(inst.right),
1500                );
1501
1502                let right_nullability = inst.right.nullability(self);
1503                let left_nullability = inst.left.nullability(self);
1504                let nulls_left = self.get_nulls_id(inst.left);
1505                let nulls_right = self.get_nulls_id(inst.right);
1506                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1507                let mask = right_nullability.and(left_nullability);
1508                nulls = self.mb.nb.and_mask(nulls, mask);
1509
1510                let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1511                self.init_metadata(node_id, new_id);
1512            }
1513            Kind::Star => {
1514                let left_nulls = self.get_nulls_id(inst.left);
1515                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1516                let meta_id = self.mb.get_meta_id(Metadata {
1517                    flags: self
1518                        .mb
1519                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1520                    nulls,
1521                });
1522                self.init_metadata(node_id, meta_id);
1523            }
1524            Kind::Compl => {
1525                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1526                let meta_id = self.mb.get_meta_id(Metadata {
1527                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1528                    nulls,
1529                });
1530                self.init_metadata(node_id, meta_id);
1531            }
1532            Kind::Lookbehind => {
1533                let mut left_nullability = self.nullability(inst.left);
1534                let mut contains_flags = self.get_flags_contains(inst.left);
1535                let nulls = if inst.right.is_missing() {
1536                    self.get_nulls_id(inst.left)
1537                } else {
1538                    let right_nullability = self.nullability(inst.right);
1539                    let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1540                    let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1541                    left_nullability = left_nullability.and(right_nullability);
1542                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1543                    self.mb.nb.and_id(nulls_left, nulls_right)
1544                };
1545
1546                let meta_id = self.mb.get_meta_id(Metadata {
1547                    flags: MetaFlags::with_nullability(
1548                        left_nullability,
1549                        contains_flags
1550                            .or(MetaFlags::CONTAINS_LOOKAROUND)
1551                            .or(MetaFlags::CONTAINS_LOOKBEHIND),
1552                    ),
1553                    nulls,
1554                });
1555                self.init_metadata(node_id, meta_id);
1556            }
1557            Kind::Lookahead => {
1558                let mut nulls = self.get_nulls_id(inst.left);
1559                // lookahead nulls shifted by rel (distance to body match)
1560                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1561                let left_nullability = inst.left.nullability(self);
1562                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1563
1564                nulls = self.mb.nb.or_id(nulls, nulls_right);
1565
1566                let la_inner = inst.left;
1567                let la_tail = inst.right;
1568                let null = self
1569                    .get_meta_flags(la_inner)
1570                    .nullability()
1571                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1572                let contains_flags = self
1573                    .get_flags_contains(la_inner)
1574                    .or(self.get_flags_contains(la_tail));
1575
1576                let meta_id = self.mb.get_meta_id(Metadata {
1577                    flags: MetaFlags::with_nullability(
1578                        null,
1579                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1580                    ),
1581                    nulls,
1582                });
1583                self.init_metadata(node_id, meta_id);
1584            }
1585            Kind::Counted => {
1586                let best = inst.extra >> 16;
1587                let (null, nulls) = if best > 0 {
1588                    let mut ns = BTreeSet::new();
1589                    ns.insert(NullState::new(Nullability::CENTER, best));
1590                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1591                } else {
1592                    (Nullability::NEVER, NullsId::EMPTY)
1593                };
1594                let meta_id = self.mb.get_meta_id(Metadata {
1595                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1596                    nulls,
1597                });
1598                self.init_metadata(node_id, meta_id);
1599            }
1600        }
1601
1602        self.array.push(inst);
1603
1604        if let Some(rw) = self.post_init_simplify(node_id) {
1605            self.override_as(node_id, rw)
1606        } else {
1607            node_id
1608        }
1609    }
1610
1611    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1612        match self.get_kind(node_id) {
1613            Kind::Concat => {
1614                let lhs = node_id.left(self);
1615                let rhs = node_id.right(self);
1616                if lhs.is_pred_star(self).is_some() {
1617                    if let Some(opttail) = rhs.is_opt_v(self) {
1618                        if let Some(true) = self.subsumes(lhs, opttail) {
1619                            return Some(lhs);
1620                        }
1621                    }
1622                }
1623            }
1624            Kind::Union => {
1625                let lhs = node_id.left(self);
1626                let rhs = node_id.right(self);
1627                let mut subsumed = false;
1628                rhs.iter_union_while(self, &mut |b, branch| {
1629                    if b.nullable_subsumes(branch, lhs) {
1630                        subsumed = true;
1631                    }
1632                    !subsumed
1633                });
1634                if subsumed {
1635                    return Some(rhs);
1636                }
1637                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1638                    return Some(rhs);
1639                }
1640            }
1641            _ => {}
1642        }
1643
1644        None
1645    }
1646
1647    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1648    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1649        if self.get_kind(lhs) != Kind::Union {
1650            return false; // single branch already checked by nullable_subsumes
1651        }
1652        let mut rhs_branches = Vec::new();
1653        let mut curr = rhs;
1654        while self.get_kind(curr) == Kind::Union {
1655            rhs_branches.push(self.get_left(curr));
1656            curr = self.get_right(curr);
1657        }
1658        rhs_branches.push(curr);
1659
1660        curr = lhs;
1661        while self.get_kind(curr) == Kind::Union {
1662            if !rhs_branches.contains(&self.get_left(curr)) {
1663                return false;
1664            }
1665            curr = self.get_right(curr);
1666        }
1667        rhs_branches.contains(&curr)
1668    }
1669
1670    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1671    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1672        if node == target {
1673            return true;
1674        }
1675        match self.get_kind(node) {
1676            Kind::Union => {
1677                self.nullable_subsumes(self.get_left(node), target)
1678                    || self.nullable_subsumes(self.get_right(node), target)
1679            }
1680            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1681                self.nullable_subsumes(self.get_right(node), target)
1682            }
1683            _ => false,
1684        }
1685    }
1686
1687    pub fn num_nodes(&self) -> u32 {
1688        self.num_created
1689    }
1690
1691    pub fn tree_size(&self, root: NodeId, limit: usize) -> usize {
1692        let mut seen: FxHashMap<NodeId, ()> = FxHashMap::default();
1693        let mut stack: Vec<NodeId> = vec![root];
1694        while let Some(n) = stack.pop() {
1695            if n == NodeId::MISSING
1696                || n == NodeId::BOT
1697                || n == NodeId::EPS
1698                || n == NodeId::TS
1699                || n == NodeId::BEGIN
1700                || n == NodeId::END
1701            {
1702                continue;
1703            }
1704            if seen.insert(n, ()).is_some() {
1705                continue;
1706            }
1707            if seen.len() >= limit {
1708                return limit;
1709            }
1710            stack.push(self.get_left(n));
1711            stack.push(self.get_right(n));
1712        }
1713        seen.len()
1714    }
1715    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1716        match self.index.get(&inst) {
1717            Some(&id) => id,
1718            None => self.init(inst),
1719        }
1720    }
1721    #[inline]
1722    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1723        self.index.get(inst)
1724    }
1725
1726    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1727        self.index.insert(key, subsumed);
1728        subsumed
1729    }
1730
1731    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1732        let key = &self.array[key.0 as usize];
1733        let entry = self.index.get_mut(key).unwrap();
1734        *entry = subsumed;
1735        subsumed
1736    }
1737
1738    #[inline]
1739    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1740        self.array[node_id.0 as usize].left
1741    }
1742
1743    #[inline]
1744    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1745        self.array[node_id.0 as usize].right
1746    }
1747
1748    #[inline]
1749    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1750        self.array[node_id.0 as usize].extra
1751    }
1752
1753    #[inline]
1754    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1755        debug_assert!(node_id.is_concat(self));
1756        let mut curr = node_id;
1757        while curr.is_concat(self) {
1758            curr = curr.right(self);
1759        }
1760        curr
1761    }
1762
1763    fn has_trailing_la(&self, node: NodeId) -> bool {
1764        let end = match self.get_kind(node) {
1765            Kind::Concat => self.get_concat_end(node),
1766            Kind::Lookahead => node,
1767            _ => return false,
1768        };
1769        end.is_lookahead(self) && end.right(self).is_missing()
1770    }
1771
1772    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1773        if node.is_lookahead(self) {
1774            return (NodeId::EPS, node);
1775        }
1776        debug_assert!(node.is_concat(self));
1777        let right = node.right(self);
1778        if !right.is_concat(self) {
1779            return (node.left(self), right);
1780        }
1781        let (stripped, la) = self.strip_trailing_la(right);
1782        (self.mk_concat(node.left(self), stripped), la)
1783    }
1784    #[inline]
1785    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1786        debug_assert!(matches!(
1787            self.get_kind(lookahead_node_id),
1788            Kind::Lookahead | Kind::Counted
1789        ));
1790        lookahead_node_id.left(self)
1791    }
1792    #[inline]
1793    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1794        debug_assert!(lookahead_node_id.is_lookahead(self));
1795        lookahead_node_id.right(self)
1796    }
1797    #[inline]
1798    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1799        debug_assert!(
1800            matches!(
1801                self.get_kind(lookahead_node_id),
1802                Kind::Lookahead | Kind::Counted
1803            ),
1804            "not lookahead/counted: {:?}",
1805            self.pp(lookahead_node_id)
1806        );
1807        self.get_extra(lookahead_node_id)
1808    }
1809    #[inline]
1810    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1811        debug_assert!(lookbehind_node_id.is_lookbehind(self));
1812        lookbehind_node_id.left(self)
1813    }
1814    #[inline]
1815    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1816        debug_assert!(lookbehind_node_id.is_lookbehind(self));
1817        lookbehind_node_id.right(self)
1818    }
1819
1820    #[inline]
1821    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1822        debug_assert!(
1823            self.array.len() > node_id.0 as usize,
1824            "array len: {:?}",
1825            node_id
1826        );
1827        self.array[node_id.0 as usize].kind
1828    }
1829
1830    #[inline]
1831    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1832        &self.array[node_id.0 as usize]
1833    }
1834
1835    #[inline]
1836    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1837        self.metadata[node_id.0 as usize]
1838    }
1839
1840    #[inline]
1841    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1842        debug_assert!(node_id.0 != u32::MAX);
1843        let meta_id = self.get_node_meta_id(node_id);
1844        debug_assert!(meta_id != MetadataId::MISSING);
1845        &self.mb.array[meta_id.0 as usize]
1846    }
1847
1848    #[inline]
1849    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1850        if node_id == NodeId::MISSING {
1851            NullsId::EMPTY
1852        } else {
1853            self.get_meta(node_id).nulls
1854        }
1855    }
1856
1857    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1858        self.mb
1859            .nb
1860            .array
1861            .iter()
1862            .map(|set| set.iter().cloned().collect())
1863            .collect()
1864    }
1865
1866    pub fn nulls_count(&self) -> usize {
1867        self.mb.nb.array.len()
1868    }
1869
1870    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1871        self.mb.nb.array[id as usize].iter().cloned().collect()
1872    }
1873
1874    /// Return the interned NullsId whose entries are `nid` filtered to
1875    /// CENTER-nullability only (mask ANDed with CENTER, NEVER entries dropped).
1876    pub fn center_nulls_id(&mut self, nid: NullsId) -> NullsId {
1877        self.mb.nb.and_mask(nid, Nullability::CENTER)
1878    }
1879
1880    #[inline]
1881    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1882        if node_id == NodeId::MISSING {
1883            NullsId::EMPTY
1884        } else {
1885            let nulls = self.get_meta(node_id).nulls;
1886            self.mb.nb.and_mask(nulls, mask)
1887        }
1888    }
1889
1890    #[inline]
1891    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1892        let meta_id = self.get_node_meta_id(node_id);
1893        let meta = &self.mb.array[meta_id.0 as usize];
1894        meta.flags
1895    }
1896
1897    #[inline]
1898    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1899        self.get_meta(node_id).flags.nullability()
1900    }
1901
1902    #[inline]
1903    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1904        let meta_id = self.get_node_meta_id(node_id);
1905        let meta = &self.mb.array[meta_id.0 as usize];
1906        meta.flags.all_contains_flags()
1907    }
1908
1909    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1910        if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
1911            return self.strip_lb(node_id.right(self));
1912        }
1913        self.strip_lb_inner(node_id)
1914    }
1915
1916    fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1917        if !self.contains_look(node_id) {
1918            return Ok(node_id);
1919        }
1920        if node_id.is_concat(self) && node_id.left(self).is_lookbehind(self) {
1921            let lb = node_id.left(self);
1922            let prev = self.get_lookbehind_prev(lb);
1923            let tail = self.strip_lb_inner(node_id.right(self))?;
1924            if prev != NodeId::MISSING {
1925                let stripped_prev = self.strip_lb_inner(prev)?;
1926                return Ok(self.mk_concat(stripped_prev, tail));
1927            }
1928            return Ok(tail);
1929        }
1930        if node_id.is_inter(self) {
1931            let left = self.strip_lb_inner(node_id.left(self))?;
1932            let right = self.strip_lb_inner(node_id.right(self))?;
1933            return Ok(self.mk_inter(left, right));
1934        }
1935        if self.get_kind(node_id) == Kind::Union {
1936            let left = self.strip_lb_inner(node_id.left(self))?;
1937            let right = self.strip_lb_inner(node_id.right(self))?;
1938            return Ok(self.mk_union(left, right));
1939        }
1940        match self.get_kind(node_id) {
1941            Kind::Lookbehind => {
1942                let prev = self.get_lookbehind_prev(node_id);
1943                if prev != NodeId::MISSING {
1944                    self.strip_lb_inner(prev)
1945                } else {
1946                    Ok(NodeId::EPS)
1947                }
1948            }
1949            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1950                Err(AlgebraError::UnsupportedPattern)
1951            }
1952            _ => Ok(node_id),
1953        }
1954    }
1955
1956    // for prefix purposes we prune any \A leading paths
1957    pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1958        if !self.contains_anchors(node_id) {
1959            return node_id;
1960        }
1961        match self.get_kind(node_id) {
1962            Kind::Begin => NodeId::BOT,
1963            Kind::Concat => {
1964                let left = self.nonbegins(node_id.left(self));
1965                if left == NodeId::BOT {
1966                    return NodeId::BOT;
1967                }
1968                self.mk_concat(left, node_id.right(self))
1969            }
1970            Kind::Union => {
1971                let left = self.nonbegins(node_id.left(self));
1972                let right = self.nonbegins(node_id.right(self));
1973                self.mk_union(left, right)
1974            }
1975            _ => node_id,
1976        }
1977    }
1978
1979    pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1980        match self.get_kind(node_id) {
1981            Kind::Concat => {
1982                let head = node_id.left(self);
1983                match self.get_kind(head) {
1984                    _ if self.any_nonbegin_nullable(head) => {
1985                        self.strip_prefix_safe(node_id.right(self))
1986                    }
1987                    _ => node_id,
1988                }
1989            }
1990            _ => node_id,
1991        }
1992    }
1993    pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1994        match self.get_kind(node_id) {
1995            Kind::Begin => NodeId::BOT,
1996            Kind::Concat => {
1997                let head = self.prune_begin(node_id.left(self));
1998                let tail = self.prune_begin(node_id.right(self));
1999                self.mk_concat(head, tail)
2000            }
2001            Kind::Lookbehind => {
2002                if !node_id.right(self).is_missing() {
2003                    return node_id;
2004                }
2005                let head = self.prune_begin(node_id.left(self));
2006                head
2007            }
2008            Kind::Union => {
2009                let left = self.prune_begin(node_id.left(self));
2010                let right = self.prune_begin(node_id.right(self));
2011                self.mk_union(left, right)
2012            }
2013            _ => node_id,
2014        }
2015    }
2016
2017    pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
2018        if !self.contains_look(node_id) {
2019            return Ok(node_id);
2020        }
2021        let result = match self.get_kind(node_id) {
2022            Kind::Concat => {
2023                let left = self.normalize_rev(node_id.left(self))?;
2024                let right = self.normalize_rev(node_id.right(self))?;
2025                self.mk_concat(left, right)
2026            }
2027            Kind::Inter => {
2028                let left = self.normalize_rev(node_id.left(self))?;
2029                let right = self.normalize_rev(node_id.right(self))?;
2030                self.mk_inter(left, right)
2031            }
2032            Kind::Union => {
2033                let left = self.normalize_rev(node_id.left(self))?;
2034                let right = self.normalize_rev(node_id.right(self))?;
2035                self.mk_union(left, right)
2036            }
2037            Kind::Lookbehind => {
2038                let left = self.normalize_rev(node_id.left(self))?;
2039                let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
2040                let lbody_ts = self.mk_concat(NodeId::TS, left);
2041                let ltail_ts = self.mk_concat(NodeId::TS, right);
2042                let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2043                as_inter
2044            }
2045            Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2046                return Err(AlgebraError::UnsupportedPattern)
2047            }
2048            _ => node_id,
2049        };
2050        Ok(result)
2051    }
2052
2053    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
2054        debug_assert!(node_id != NodeId::MISSING);
2055        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
2056            if *rev != NodeId::MISSING {
2057                return Ok(*rev);
2058            }
2059        }
2060        let rw = match self.get_kind(node_id) {
2061            Kind::End => NodeId::BEGIN,
2062            Kind::Begin => NodeId::END,
2063            Kind::Pred => node_id,
2064            Kind::Concat => {
2065                let left = self.reverse(node_id.left(self))?;
2066                let right = self.reverse(node_id.right(self))?;
2067                self.mk_concat(right, left)
2068            }
2069            Kind::Union => {
2070                let left = self.reverse(node_id.left(self))?;
2071                let right = self.reverse(node_id.right(self))?;
2072                self.mk_union(left, right)
2073            }
2074            Kind::Inter => {
2075                let left = self.reverse(node_id.left(self))?;
2076                let right = self.reverse(node_id.right(self))?;
2077                self.mk_inter(left, right)
2078            }
2079            Kind::Star => {
2080                let body = self.reverse(node_id.left(self))?;
2081                self.mk_star(body)
2082            }
2083            Kind::Compl => {
2084                if self.contains_look(node_id.left(self)) {
2085                    return Err(AlgebraError::UnsupportedPattern);
2086                }
2087                let body = self.reverse(node_id.left(self))?;
2088                self.mk_compl(body)
2089            }
2090            Kind::Lookbehind => {
2091                let prev = self.get_lookbehind_prev(node_id);
2092                let inner_id = self.get_lookbehind_inner(node_id);
2093                let rev_inner = self.reverse(inner_id)?;
2094                let rev_prev = if prev != NodeId::MISSING {
2095                    self.reverse(prev)?
2096                } else {
2097                    NodeId::MISSING
2098                };
2099                self.mk_lookahead(rev_inner, rev_prev, 0)
2100            }
2101            Kind::Lookahead => {
2102                let rel = self.get_lookahead_rel(node_id);
2103                // no nullability, we can write as intersection
2104                if rel == u32::MAX {
2105                    let lbody = self.get_lookahead_inner(node_id);
2106                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2107                    let rbody = self.reverse(lbody);
2108                    let rtail = self.reverse(ltail);
2109                    let rev = self.mk_lookbehind(rbody?, rtail?);
2110                    return Ok(self.cache_reversed(node_id, rev));
2111                }
2112                if rel != 0 {
2113                    return Err(AlgebraError::UnsupportedPattern);
2114                }
2115                let tail_node = self.get_lookahead_tail(node_id);
2116                let rev_tail = if tail_node != NodeId::MISSING {
2117                    self.reverse(tail_node)?
2118                } else {
2119                    NodeId::MISSING
2120                };
2121                let inner_id = self.get_lookahead_inner(node_id);
2122                let rev_inner = self.reverse(inner_id)?;
2123                self.mk_lookbehind(rev_inner, rev_tail)
2124            }
2125            Kind::Counted => {
2126                return Err(AlgebraError::UnsupportedPattern);
2127            }
2128        };
2129        self.cache_reversed(node_id, rw);
2130        Ok(rw)
2131    }
2132
2133    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2134        let node = NodeKey {
2135            kind: Kind::Pred,
2136            left: NodeId::MISSING,
2137            right: NodeId::MISSING,
2138            extra: pred.0,
2139        };
2140        self.get_node_id(node)
2141    }
2142
2143    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2144        let key = NodeKey {
2145            kind: Kind::Compl,
2146            left: body,
2147            right: NodeId::MISSING,
2148            extra: u32::MAX,
2149        };
2150        if let Some(id) = self.key_is_created(&key) {
2151            return *id;
2152        }
2153        if body == NodeId::BOT {
2154            return NodeId::TS;
2155        }
2156        if body == NodeId::TS {
2157            return NodeId::BOT;
2158        }
2159
2160        if let Some(contains_body) = body.is_contains(self) {
2161            if contains_body.is_pred(self) {
2162                let pred = contains_body.pred_tset(self);
2163                let notpred = self.mk_pred_not(pred);
2164                let node = self.mk_star(notpred);
2165                return self.init_as(key, node);
2166            } else if contains_body == NodeId::END {
2167                return self.init_as(key, NodeId::BOT);
2168            }
2169        };
2170
2171        if self.get_kind(body) == Kind::Compl {
2172            return self.get_node(body).left;
2173        }
2174
2175        self.get_node_id(key)
2176    }
2177
2178    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2179        let nid = self.get_nulls_id(body);
2180        let nref = self.mb.nb.get_set_ref(nid).clone();
2181        let mut futures = NodeId::BOT;
2182        for n in nref.iter() {
2183            if !n.is_mask_nullable(mask) {
2184                continue;
2185            }
2186
2187            let eff = if n.rel == 0 {
2188                NodeId::EPS
2189            } else {
2190                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2191            };
2192            futures = self.mk_union(futures, eff)
2193        }
2194        futures
2195    }
2196
2197    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2198        if cfg!(feature = "norewrite") {
2199            return None;
2200        }
2201        #[cfg(debug_assertions)]
2202        if std::env::var("TRACE_CONCAT").is_ok() {
2203            eprintln!("rw2 head={} tail={}", self.pp(head), self.pp(tail));
2204        }
2205
2206        if tail.is_lookbehind(self) {
2207            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2208            return self
2209                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2210                .ok();
2211        }
2212        if head.is_lookahead(self) {
2213            let la_tail = self.get_lookahead_tail(head);
2214            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2215            if new_la_tail.is_center_nullable(self) {
2216                let la_new = self.mk_lookahead_internal(
2217                    self.get_lookahead_inner(head),
2218                    new_la_tail,
2219                    u32::MAX,
2220                );
2221                return Some(la_new);
2222            }
2223            let la_body = self.get_lookahead_inner(head);
2224            let la_rel = self.get_lookahead_rel(head);
2225            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2226                let tail_rel = self.get_lookahead_rel(new_la_tail);
2227                tail_rel + la_rel
2228            } else {
2229                u32::MAX
2230            };
2231
2232            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2233        }
2234
2235        if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2236            return Some(head);
2237        }
2238
2239        // breaks:rev
2240        // if head == NodeId::TS && tail == NodeId::END {
2241        //     return Some(head);
2242        // }
2243
2244        if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2245            return Some(NodeId::TS);
2246        }
2247
2248        if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2249            return Some(NodeId::TS);
2250        }
2251
2252        if head.is_inter(self) && tail == NodeId::TS {
2253            let mut alltopstar = true;
2254            head.iter_inter(
2255                self,
2256                &mut (|b, v| {
2257                    alltopstar = b.ends_with_ts(v);
2258                }),
2259            );
2260
2261            if alltopstar {
2262                return Some(head);
2263            }
2264        }
2265
2266        if head.is_star(self) && head == tail {
2267            return Some(head);
2268        }
2269
2270        None
2271    }
2272
2273    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2274        use Kind::*;
2275
2276        if cfg!(feature = "norewrite") {
2277            return None;
2278        }
2279        if left == right {
2280            return Some(left);
2281        }
2282
2283        if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2284            return Some(left);
2285        }
2286        if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2287            return Some(right);
2288        }
2289
2290        if left.is_inter(self) && right.is_inter(self) {
2291            let mut lconj: Vec<NodeId> = Vec::new();
2292            left.any_inter_component(self, |v| {
2293                lconj.push(v);
2294                false
2295            });
2296            let lconj_initial = lconj.len();
2297            let mut common = NodeId::TS;
2298            let mut r_rest = NodeId::TS;
2299            let mut cur = right;
2300            loop {
2301                let (v, next) = if cur.kind(self) == Inter {
2302                    (cur.left(self), Some(cur.right(self)))
2303                } else {
2304                    (cur, None)
2305                };
2306                if let Some(pos) = lconj.iter().position(|&x| x == v) {
2307                    lconj.swap_remove(pos);
2308                    common = self.mk_inter(v, common);
2309                } else {
2310                    r_rest = self.mk_inter(v, r_rest);
2311                }
2312                match next {
2313                    Some(n) => cur = n,
2314                    None => break,
2315                }
2316            }
2317            if lconj.len() < lconj_initial {
2318                let l_rest = lconj
2319                    .iter()
2320                    .fold(NodeId::TS, |acc, &v| self.mk_inter(v, acc));
2321                let inner_union = self.mk_union(l_rest, r_rest);
2322                return Some(self.mk_inter(common, inner_union));
2323            }
2324        }
2325
2326        if left.is_pred(self) && right.is_pred(self) {
2327            let l = left.pred_tset(self);
2328            let r = right.pred_tset(self);
2329            let solver = self.solver();
2330            let psi = solver.or_id(l, r);
2331            let rewrite = self.mk_pred(psi);
2332            return Some(rewrite);
2333        }
2334
2335        if left == NodeId::EPS
2336            && self.get_extra(left) == 0
2337            && self.nullability(right) == Nullability::ALWAYS
2338        {
2339            return Some(right);
2340        }
2341
2342        if left.is_lookahead(self) && right.is_lookahead(self) {
2343            let lb = left.left(self);
2344            let lt = left.right(self);
2345            let lrel = left.extra(self);
2346
2347            let rb = right.left(self);
2348            let rt = right.right(self);
2349            let rrel = right.extra(self);
2350
2351            if lrel == rrel && lt.is_missing() && rt.is_missing() {
2352                let unioned = self.mk_union(lb, rb);
2353                let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2354                return Some(node);
2355            }
2356        }
2357
2358        if right.is_kind(self, Concat) {
2359            if left == NodeId::END
2360                && right.left(self) == NodeId::END
2361                && self.nullability(right).has(Nullability::END)
2362            {
2363                return Some(right);
2364            }
2365            // .*|.*a.* => .*(a.*|)
2366            if left == right.left(self) {
2367                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2368                let rw = self.mk_concat(left, rhs);
2369                return Some(rw);
2370            }
2371            if left.is_kind(self, Concat) {
2372                let head1 = left.left(self);
2373                let head2 = right.left(self);
2374
2375                if head1 == head2 {
2376                    let t1 = left.right(self);
2377                    let t2 = right.right(self);
2378                    // opportunistic rewrites
2379                    if head1 == NodeId::TS {
2380                        if t2.has_concat_tail(self, t1) {
2381                            return Some(left);
2382                        }
2383                        if t1.has_concat_tail(self, t2) {
2384                            return Some(right);
2385                        }
2386                    }
2387                    let un = self.mk_union(t1, t2);
2388                    return Some(self.mk_concat(left.left(self), un));
2389                }
2390
2391                // xa|ya => (x|y)a - suffix factoring via reverse
2392                // TODO: valid and looks prettier but .reverse is not good for builder perf,
2393                // leaving out unless i find a case where it helps significantly
2394                if false {
2395                    let end1 = self.get_concat_end(left);
2396                    let end2 = self.get_concat_end(right);
2397                    if end1 == end2 {
2398                        let contains_lookarounds =
2399                            left.contains_lookaround(self) || right.contains_lookaround(self);
2400                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2401                        if !contains_lookarounds && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2402                            let rev1 = self.reverse(left).unwrap();
2403                            let rev2 = self.reverse(right).unwrap();
2404
2405                            let union_rev = self.mk_union(rev1, rev2);
2406                            return Some(self.reverse(union_rev).unwrap());
2407                        }
2408                    }
2409                }
2410            }
2411            if left.is_pred(self) && left == right.left(self) {
2412                let un = self.mk_opt(right.right(self));
2413                let conc = self.mk_concat(left, un);
2414                return Some(conc);
2415            }
2416        }
2417
2418        if left == NodeId::EPS && right.is_plus(self) {
2419            let result = self.mk_star(right.left(self));
2420            return Some(result);
2421        }
2422
2423        // (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2424        if left.is_inter(self) && right.is_inter(self) {
2425            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2426                return Some(rw);
2427            }
2428        }
2429
2430        None
2431    }
2432
2433    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2434        let mut curr = node;
2435        while curr.is_inter(self) {
2436            out.push(self.get_left(curr));
2437            curr = self.get_right(curr);
2438        }
2439        out.push(curr);
2440    }
2441
2442    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2443        let mut curr = node;
2444        let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2445        if has_prefix {
2446            curr = self.get_right(curr);
2447        }
2448        let mut count = 0u32;
2449        let mut pred_set = None;
2450        while curr.is_concat(self) {
2451            let head = self.get_left(curr);
2452            if !head.is_pred(self) {
2453                return None;
2454            }
2455            let ps = head.pred_tset(self);
2456            match pred_set {
2457                None => pred_set = Some(ps),
2458                Some(existing) => {
2459                    if existing != ps {
2460                        return None;
2461                    }
2462                }
2463            }
2464            curr = self.get_right(curr);
2465            count += 1;
2466        }
2467        if count == 0 || !curr.is_star(self) {
2468            return None;
2469        }
2470        Some((has_prefix, pred_set.unwrap(), curr, count))
2471    }
2472
2473    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2474        let mut j = 0;
2475        for &s in sub {
2476            while j < sup.len() && sup[j] < s {
2477                j += 1;
2478            }
2479            if j >= sup.len() || sup[j] != s {
2480                return false;
2481            }
2482            j += 1;
2483        }
2484        true
2485    }
2486
2487    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2488        if !left.is_inter(self) || !right.is_inter(self) {
2489            return None;
2490        }
2491
2492        let mut lc: Vec<NodeId> = Vec::new();
2493        let mut rc: Vec<NodeId> = Vec::new();
2494        self.collect_inter_components(left, &mut lc);
2495        self.collect_inter_components(right, &mut rc);
2496
2497        // component subset check: fewer constraints = larger language
2498        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2499            return Some(left);
2500        }
2501        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2502        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2503            return Some(right);
2504        }
2505
2506        if lc.len() == rc.len() {
2507            let mut diff_idx = None;
2508            for i in 0..lc.len() {
2509                if lc[i] != rc[i] {
2510                    if diff_idx.is_some() {
2511                        return None;
2512                    }
2513                    diff_idx = Some(i);
2514                }
2515            }
2516            if let Some(idx) = diff_idx {
2517                let a = lc[idx];
2518                let b = rc[idx];
2519                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2520                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2521                {
2522                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2523                        return if ca < cb { Some(left) } else { Some(right) };
2524                    }
2525                }
2526            }
2527        }
2528
2529        None
2530    }
2531
2532    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2533        if cfg!(feature = "norewrite") {
2534            return None;
2535        }
2536        if left == right {
2537            return Some(left);
2538        }
2539        if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2540            return Some(right);
2541        }
2542        if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2543            return Some(left);
2544        }
2545
2546        if left.is_pred(self) && right.is_pred(self) {
2547            let l = left.pred_tset(self);
2548            let r = right.pred_tset(self);
2549            let solver = self.solver();
2550            let psi = solver.and_id(l, r);
2551            let rewrite = self.mk_pred(psi);
2552            return Some(rewrite);
2553        }
2554
2555        if left.is_concat(self) && right.is_concat(self) {
2556            if left.left(self) == right.left(self) {
2557                if left.left(self).is_pred(self) {
2558                    let new_right = self.mk_inter(left.right(self), right.right(self));
2559                    return Some(self.mk_concat(left.left(self), new_right));
2560                }
2561            }
2562        }
2563
2564        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2565            return Some(NodeId::BOT);
2566        }
2567
2568        if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2569            let bodies = self.mk_union(left.left(self), right.left(self));
2570            return Some(self.mk_compl(bodies));
2571        }
2572
2573        if left == NodeId::TOPPLUS {
2574            if right.is_pred_star(self).is_some() {
2575                let newloop = self.mk_plus(right.left(self));
2576                return Some(newloop);
2577            }
2578            if right.is_never_nullable(self) {
2579                return Some(right);
2580            }
2581            if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2582                return Some(NodeId::BOT);
2583            }
2584            if right.is_kind(self, Kind::Concat) {}
2585        }
2586
2587        {
2588            let l_is_la = left.is_lookahead(self);
2589            let r_is_la = right.is_lookahead(self);
2590            let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
2591            let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
2592            if l_is_la || r_is_la || l_is_cla || r_is_cla {
2593                let (la_node, other, concat_body) = if r_is_la {
2594                    (right, left, NodeId::MISSING)
2595                } else if l_is_la {
2596                    (left, right, NodeId::MISSING)
2597                } else if r_is_cla {
2598                    (right.left(self), left, right.right(self))
2599                } else {
2600                    (left.left(self), right, left.right(self))
2601                };
2602                let la_body = la_node.left(self);
2603                let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2604                let inter_right = if concat_body.is_missing() {
2605                    la_tail
2606                } else {
2607                    self.mk_concat(la_tail, concat_body)
2608                };
2609                let new_body = self.mk_inter(other, inter_right);
2610                let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2611                return Some(self.mk_concat(la, new_body));
2612            }
2613        }
2614
2615        if self.get_kind(right) == Kind::Compl {
2616            let compl_body = right.left(self);
2617            if left == compl_body {
2618                return Some(NodeId::BOT);
2619            }
2620            if compl_body.is_concat(self) {
2621                let compl_head = compl_body.left(self);
2622                if compl_body.right(self) == NodeId::TS && compl_head == left {
2623                    return Some(NodeId::BOT);
2624                }
2625            }
2626        }
2627
2628        if let Some(pleft) = left.is_pred_star(self) {
2629            if let Some(pright) = right.is_pred_star(self) {
2630                let merged = self.mk_inter(pleft, pright);
2631                return Some(self.mk_star(merged));
2632            }
2633        }
2634
2635        {
2636            let l_is_clb = left.is_concat(self) && left.left(self).is_lookbehind(self);
2637            let r_is_clb = right.is_concat(self) && right.left(self).is_lookbehind(self);
2638            if l_is_clb || r_is_clb {
2639                let (lb, body) = if l_is_clb && r_is_clb {
2640                    let lb1 = left.left(self);
2641                    let lb2 = right.left(self);
2642                    let inner = self.mk_inter(
2643                        self.get_lookbehind_inner(lb1),
2644                        self.get_lookbehind_inner(lb2),
2645                    );
2646                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2647                    let body = self.mk_inter(left.right(self), right.right(self));
2648                    (lb, body)
2649                } else if l_is_clb {
2650                    let lb = left.left(self);
2651                    let body = self.mk_inter(left.right(self), right);
2652                    (lb, body)
2653                } else {
2654                    let lb = right.left(self);
2655                    let body = self.mk_inter(left, right.right(self));
2656                    (lb, body)
2657                };
2658                return Some(self.mk_concat(lb, body));
2659            }
2660        }
2661
2662        {
2663            let l_has_la = self.has_trailing_la(left);
2664            let r_has_la = self.has_trailing_la(right);
2665            if l_has_la || r_has_la {
2666                let (body, la) = if l_has_la && r_has_la {
2667                    let (lbody, l_la) = self.strip_trailing_la(left);
2668                    let (rbody, r_la) = self.strip_trailing_la(right);
2669                    let inner = self.mk_inter(
2670                        self.get_lookahead_inner(l_la),
2671                        self.get_lookahead_inner(r_la),
2672                    );
2673                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2674                    let body = self.mk_inter(lbody, rbody);
2675                    (body, la)
2676                } else if l_has_la {
2677                    let (lbody, la) = self.strip_trailing_la(left);
2678                    let body = self.mk_inter(lbody, right);
2679                    (body, la)
2680                } else {
2681                    let (rbody, la) = self.strip_trailing_la(right);
2682                    let body = self.mk_inter(left, rbody);
2683                    (body, la)
2684                };
2685                return Some(self.mk_concat(body, la));
2686            }
2687        }
2688
2689        None
2690    }
2691
2692    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2693        if cfg!(feature = "norewrite") {
2694            return None;
2695        }
2696        debug_assert!(self.get_kind(right_union) == Kind::Union);
2697
2698        let mut rewritten = None;
2699        right_union.iter_union_while(
2700            self,
2701            &mut (|b, v| {
2702                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2703                    rewritten = Some((v, rw));
2704                    false
2705                } else {
2706                    true
2707                }
2708            }),
2709        );
2710
2711        if let Some(rw) = rewritten {
2712            let mut new_union = NodeId::BOT;
2713            right_union.iter_union(
2714                self,
2715                &mut (|b, v| {
2716                    if v == rw.0 {
2717                        new_union = b.mk_union(rw.1, new_union)
2718                    } else {
2719                        new_union = b.mk_union(v, new_union)
2720                    }
2721                }),
2722            );
2723            return Some(new_union);
2724        };
2725
2726        None
2727    }
2728
2729    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2730        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2731        debug_assert!(tail != NodeId::MISSING);
2732        let key = NodeKey {
2733            kind: Kind::Concat,
2734            left: head,
2735            right: tail,
2736            extra: u32::MAX,
2737        };
2738        if let Some(id) = self.key_is_created(&key) {
2739            return *id;
2740        }
2741
2742        if head == NodeId::BOT || tail == NodeId::BOT {
2743            return NodeId::BOT;
2744        }
2745        if head == NodeId::EPS {
2746            return tail;
2747        }
2748        if tail == NodeId::EPS {
2749            return head;
2750        }
2751
2752        match tail {
2753            // breaks:rev
2754            NodeId::BEGIN => {
2755                if !self.is_nullable(head, Nullability::BEGIN) {
2756                    return NodeId::BOT;
2757                } else {
2758                    return NodeId::BEGIN;
2759                }
2760            }
2761            _ => {}
2762        }
2763
2764        // normalize concats to right
2765        if head.is_kind(self, Kind::Concat) {
2766            let left = head.left(self);
2767            let newright = self.mk_concat(head.right(self), tail);
2768            let updated = self.mk_concat(left, newright);
2769            return self.init_as(key, updated);
2770        }
2771
2772        if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2773            return NodeId::BOT;
2774        }
2775
2776        if tail.is_concat(self) {
2777            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2778                let upd = self.mk_concat(rw, tail.right(self));
2779                return self.init_as(key, upd);
2780            }
2781        }
2782
2783        if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2784            return self.init_as(key, new);
2785        }
2786
2787        match (self.get_kind(head), self.get_kind(tail)) {
2788            // merge stars
2789            (Kind::Star, Kind::Concat) if head.is_star(self) => {
2790                let rl = tail.left(self);
2791                if head == rl {
2792                    return self.init_as(key, tail);
2793                }
2794            }
2795            // attempt longer concat rw
2796            (_, Kind::Concat) => {
2797                let curr = head;
2798                let h2 = self.mk_concat(curr, tail.left(self));
2799                let tr = tail.right(self);
2800                if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2801                    return self.init_as(key, new);
2802                }
2803            }
2804            _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2805                return self.init_as(key, tail);
2806            }
2807            _ => {}
2808        }
2809
2810        self.init(key)
2811    }
2812
2813    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2814        // LNF: lookbehind must start with ts
2815        let lb_body = {
2816            match self.starts_with_ts(lb_body) {
2817                true => lb_body,
2818                false => self.mk_concat(NodeId::TS, lb_body),
2819            }
2820        };
2821        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2822        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2823    }
2824
2825    fn mk_lookbehind_internal(
2826        &mut self,
2827        lb_body: NodeId,
2828        lb_prev: NodeId,
2829    ) -> Result<NodeId, AlgebraError> {
2830        debug_assert!(lb_body != NodeId::MISSING);
2831        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2832        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2833            return Ok(NodeId::BOT);
2834        }
2835        if lb_body == NodeId::TS {
2836            return Ok(lb_prev);
2837        }
2838        if lb_body == NodeId::EPS {
2839            match lb_prev {
2840                NodeId::MISSING => return Ok(NodeId::EPS),
2841                NodeId::EPS => return Ok(NodeId::EPS),
2842                _ => return Ok(lb_prev),
2843            }
2844        }
2845
2846        let key = NodeKey {
2847            kind: Kind::Lookbehind,
2848            left: lb_body,
2849            right: lb_prev,
2850            extra: u32::MAX,
2851        };
2852        match self.key_is_created(&key) {
2853            Some(id) => Ok(*id),
2854            None => {
2855                if lb_prev == NodeId::TS {
2856                    return Ok(self.mk_concat(lb_prev, lb_body));
2857                }
2858
2859                Ok(self.init(key))
2860            }
2861        }
2862    }
2863
2864    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2865        // LNF: lookahead must end with ts
2866        let la_body = {
2867            match self.ends_with_ts(la_body) {
2868                true => la_body,
2869                false => self.mk_concat(la_body, NodeId::TS),
2870            }
2871        };
2872        let rel = if NodeId::MISSING == la_tail {
2873            rel
2874        } else {
2875            match la_tail.is_center_nullable(self) {
2876                false => u32::MAX,
2877                true => rel,
2878            }
2879        };
2880
2881        self.mk_lookahead_internal(la_body, la_tail, rel)
2882    }
2883
2884    // rel max = carries no nullability, can potentially rw to intersection
2885    pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2886        let key = NodeKey {
2887            kind: Kind::Lookahead,
2888            left: la_body,
2889            right: la_tail,
2890            extra: rel,
2891        };
2892        if let Some(id) = self.key_is_created(&key) {
2893            return *id;
2894        }
2895        if la_body == NodeId::TS {
2896            if rel == 0 {
2897                return la_tail.missing_to_eps();
2898            } else {
2899                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2900            }
2901        }
2902        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2903            return NodeId::BOT;
2904        }
2905        if la_tail.is_missing() && rel == u32::MAX {
2906            return NodeId::BOT;
2907        }
2908
2909        if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2910            return la_body;
2911        }
2912
2913        if la_tail == NodeId::TS {
2914            if rel == 0 || rel == u32::MAX {
2915                return self.mk_concat(la_body, NodeId::TS);
2916            } else if rel == u32::MAX {
2917                return self.mk_begins_with(la_body);
2918            }
2919        }
2920
2921        if rel == u32::MAX {
2922            if la_tail.is_missing() {
2923                return NodeId::BOT;
2924            }
2925
2926            if self.is_always_nullable(la_body) {
2927                return la_tail.missing_to_eps();
2928            }
2929
2930            if la_tail != NodeId::MISSING {
2931                match self.get_kind(la_tail) {
2932                    _ => {
2933                        if la_body.is_compl_plus_end(self) {
2934                            let minlen = self.get_min_length_only(la_tail);
2935                            if minlen >= 1 {
2936                                return NodeId::BOT;
2937                            }
2938                        }
2939                    }
2940                }
2941            }
2942        }
2943
2944        if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
2945            let la_body2 = self.get_lookahead_inner(la_tail);
2946            let body1_ts = self.mk_concat(la_body, NodeId::TS);
2947            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2948            let new_la_body = self.mk_inter(body1_ts, body2_ts);
2949            let new_la_rel = self.get_lookahead_rel(la_tail);
2950            let new_la_tail = self.get_lookahead_tail(la_tail);
2951            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2952        }
2953
2954        if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
2955            let la_body_right = la_body.right(self);
2956            if self.is_always_nullable(la_body_right) {
2957                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2958            }
2959            if la_body.right(self) == NodeId::END {
2960                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2961            }
2962            let bodyright = la_body.right(self);
2963            if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
2964                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2965                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2966            }
2967        }
2968
2969        if la_tail != NodeId::MISSING {
2970            if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2971                let lpred = la_body.left(self);
2972                if lpred.is_pred(self) {
2973                    let l = lpred.pred_tset(self);
2974                    let r = la_tail.pred_tset(self);
2975                    let psi_and = self.solver().and_id(l, r);
2976                    let rewrite = self.mk_pred(psi_and);
2977                    let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2978                    let new_right =
2979                        self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2980                    return self.mk_concat(rewrite, new_right);
2981                }
2982            }
2983        }
2984
2985        self.get_node_id(key)
2986    }
2987
2988    pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2989        let has_match = (packed >> 16) > 0;
2990        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2991            return NodeId::BOT;
2992        }
2993        debug_assert!(
2994            body == NodeId::BOT || !self.is_infinite(body),
2995            "Counted body must have finite max length"
2996        );
2997        let chain = self.prune_counted_chain(body, chain);
2998        let key = NodeKey {
2999            kind: Kind::Counted,
3000            left: body,
3001            right: chain,
3002            extra: packed,
3003        };
3004        if let Some(id) = self.key_is_created(&key) {
3005            return *id;
3006        }
3007        self.get_node_id(key)
3008    }
3009
3010    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
3011        if chain == NodeId::MISSING || body == NodeId::BOT {
3012            return chain;
3013        }
3014        if self.nullability(body) != Nullability::NEVER {
3015            return NodeId::MISSING;
3016        }
3017        let chain_body = chain.left(self);
3018        if chain_body == NodeId::BOT {
3019            return chain;
3020        }
3021        let not_begins = self.mk_not_begins_with(body);
3022        let inter = self.mk_inter(chain_body, not_begins);
3023        let is_empty = inter == NodeId::BOT;
3024        if is_empty {
3025            self.prune_counted_chain(body, chain.right(self))
3026        } else {
3027            chain
3028        }
3029    }
3030
3031    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
3032        let neg_inner = self.mk_concat(body, NodeId::TS);
3033        let neg_part = self.mk_compl(neg_inner);
3034        let conc = self.mk_concat(neg_part, NodeId::END);
3035        self.mk_lookahead(conc, NodeId::MISSING, rel)
3036    }
3037
3038    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
3039        match self.get_node(body).kind {
3040            Kind::Pred => {
3041                let psi = body.pred_tset(self);
3042                let negated = self.mk_pred_not(psi);
3043                let union = self.mk_union(NodeId::BEGIN, negated);
3044                // lb_prev is MISSING - cannot trigger UnsupportedPattern
3045                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3046            }
3047            _ => {
3048                // ~body ∩ utf8_char: non-nullable (utf8_char requires ≥1 byte),
3049                // so \A | negated won't be stripped by strip_prefix_safe
3050                let uc = crate::unicode_classes::utf8_char(self);
3051                let neg = self.mk_compl(body);
3052                let negated = self.mk_inter(neg, uc);
3053                let union = self.mk_union(NodeId::BEGIN, negated);
3054                // lb_prev is MISSING - cannot trigger UnsupportedPattern
3055                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3056            }
3057        }
3058    }
3059
3060    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
3061        debug_assert!(left != NodeId::MISSING);
3062        debug_assert!(right != NodeId::MISSING);
3063        if left > right {
3064            return self.mk_union(right, left);
3065        }
3066        let key = NodeKey {
3067            kind: Kind::Union,
3068            left,
3069            right,
3070            extra: u32::MAX,
3071        };
3072        if let Some(id) = self.key_is_created(&key) {
3073            return *id;
3074        }
3075        if left == right {
3076            return self.init_as(key, left);
3077        }
3078        if left == NodeId::BOT {
3079            return self.init_as(key, right);
3080        }
3081        if right == NodeId::BOT {
3082            return self.init_as(key, left);
3083        }
3084        if right == NodeId::TS {
3085            return self.init_as(key, right);
3086        }
3087        if left == NodeId::TS {
3088            return self.init_as(key, left);
3089        }
3090        match (self.get_kind(left), self.get_kind(right)) {
3091            (Kind::Union, _) => {
3092                self.iter_unions_b(left, &mut |b, v| {
3093                    b.temp_vec.push(v);
3094                });
3095                self.iter_unions_b(right, &mut |b, v| {
3096                    b.temp_vec.push(v);
3097                });
3098                self.temp_vec.sort();
3099                let tree = self.temp_vec.clone();
3100                self.temp_vec.clear();
3101                let newnode = tree
3102                    .iter()
3103                    .rev()
3104                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3105                return self.init_as(key, newnode);
3106            }
3107            (_, Kind::Union) => {
3108                let rleft = right.left(self);
3109                // if left_node id is smaller than rleft, just create a new union
3110                if left > rleft {
3111                    self.iter_unions_b(left, &mut |b, v| {
3112                        b.temp_vec.push(v);
3113                    });
3114                    self.iter_unions_b(right, &mut |b, v| {
3115                        b.temp_vec.push(v);
3116                    });
3117                    self.temp_vec.sort();
3118                    let tree = self.temp_vec.clone();
3119                    self.temp_vec.clear();
3120                    let newnode = tree
3121                        .iter()
3122                        .rev()
3123                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3124                    return self.init_as(key, newnode);
3125                } else {
3126                    if let Some(rw) = self.attempt_rw_unions(left, right) {
3127                        return self.init_as(key, rw);
3128                    }
3129                }
3130            }
3131            _ => {}
3132        }
3133
3134        if let Some(rw) = self.attempt_rw_union_2(left, right) {
3135            return self.init_as(key, rw);
3136        }
3137        self.init(key)
3138    }
3139
3140    pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3141        debug_assert!(left_id != NodeId::MISSING);
3142        debug_assert!(right_id != NodeId::MISSING);
3143        if left_id == right_id {
3144            return left_id;
3145        }
3146        if left_id == NodeId::BOT || right_id == NodeId::BOT {
3147            return NodeId::BOT;
3148        }
3149        if left_id == NodeId::TS {
3150            return right_id;
3151        }
3152        if right_id == NodeId::TS {
3153            return left_id;
3154        }
3155        if left_id > right_id {
3156            return self.mk_inter(right_id, left_id);
3157        }
3158        let key = NodeKey {
3159            kind: Kind::Inter,
3160            left: left_id,
3161            right: right_id,
3162            extra: u32::MAX,
3163        };
3164        if let Some(id) = self.key_is_created(&key) {
3165            return *id;
3166        }
3167
3168        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3169            return self.init_as(key, rw);
3170        }
3171
3172        self.init(key)
3173    }
3174
3175    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3176        let node = NodeKey {
3177            kind,
3178            left: NodeId::MISSING,
3179            right: NodeId::MISSING,
3180            extra: u32::MAX,
3181        };
3182        self.init(node)
3183    }
3184
3185    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3186        let star = self.mk_star(body_id);
3187        self.mk_concat(body_id, star)
3188    }
3189    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3190        let opt = self.mk_opt(body_id);
3191        let mut nodes1 = vec![];
3192        for _ in lower..upper {
3193            nodes1.push(opt);
3194        }
3195        for _ in 0..lower {
3196            nodes1.push(body_id);
3197        }
3198        self.mk_concats(nodes1.into_iter())
3199    }
3200    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3201        self.mk_union(NodeId::EPS, body_id)
3202    }
3203
3204    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3205        let key = NodeKey {
3206            kind: Kind::Star,
3207            left: body_id,
3208            right: NodeId::MISSING,
3209            extra: 0,
3210        };
3211        if let Some(id) = self.key_is_created(&key) {
3212            return *id;
3213        }
3214        // _*{500} is still _*
3215        if body_id.is_kind(self, Kind::Star) {
3216            return body_id;
3217        }
3218        self.get_node_id(key)
3219    }
3220
3221    /// it's cheaper to check this once as an edge-case
3222    /// than to compute a 4th nullability bit for every node
3223    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3224        match self.get_kind(node_id) {
3225            Kind::End => Nullability::EMPTYSTRING,
3226            Kind::Begin => Nullability::EMPTYSTRING,
3227            Kind::Pred => Nullability::NEVER,
3228            Kind::Star => Nullability::ALWAYS,
3229            Kind::Inter | Kind::Concat => {
3230                let lnull = self.nullability_emptystring(node_id.left(self));
3231                let rnull = self.nullability_emptystring(node_id.right(self));
3232                lnull.and(rnull) // left = 010, right = 001, left & right = 000
3233            }
3234            Kind::Union => {
3235                let lnull = self.nullability_emptystring(node_id.left(self));
3236                let rnull = self.nullability_emptystring(node_id.right(self));
3237                lnull.or(rnull)
3238            }
3239            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3240            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3241            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3242            Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3243        }
3244    }
3245
3246    #[inline(always)]
3247    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3248        self.get_meta(node_id)
3249            .flags
3250            .nullability()
3251            .has(Nullability::CENTER.or(Nullability::END))
3252    }
3253
3254    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3255        self.get_only_nullability(node_id)
3256    }
3257
3258    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3259        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3260    }
3261
3262    pub fn pp(&self, node_id: NodeId) -> String {
3263        let mut s = String::new();
3264        self.ppw(&mut s, node_id).unwrap();
3265        s
3266    }
3267
3268    #[allow(dead_code)]
3269    pub fn pp_nulls(&self, node_id: NodeId) -> String {
3270        let nu = self.get_nulls_id(node_id);
3271        let nr = self.mb.nb.get_set_ref(nu);
3272        let s1 = format!("{:?}", nr);
3273        s1
3274    }
3275
3276    #[allow(dead_code)]
3277    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3278        match self.get_tregex(term_id) {
3279            TRegex::Leaf(node_id) => {
3280                let mut s = String::new();
3281                self.ppw(&mut s, *node_id).unwrap();
3282                s
3283            }
3284            TRegex::ITE(cond, then_id, else_id) => {
3285                format!(
3286                    "ITE({},{},{})",
3287                    self.solver_ref().pp(*cond),
3288                    self.ppt(*then_id),
3289                    self.ppt(*else_id)
3290                )
3291            }
3292        }
3293    }
3294
3295    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3296        if cfg!(feature = "graphviz") {
3297            match node_id {
3298                NodeId::MISSING => return write!(s, "MISSING"),
3299                NodeId::BOT => return write!(s, "⊥"),
3300                NodeId::TS => return write!(s, "_*"),
3301                NodeId::TOP => return write!(s, "_"),
3302                NodeId::EPS => return write!(s, "ε"),
3303                _ => {}
3304            }
3305        }
3306
3307        match node_id {
3308            NodeId::MISSING => return write!(s, "MISSING"),
3309            NodeId::BOT => return write!(s, "⊥"),
3310            NodeId::TS => return write!(s, "_*"),
3311            NodeId::TOP => return write!(s, "_"),
3312            NodeId::EPS => return write!(s, ""),
3313            _ => {}
3314        }
3315
3316        match self.get_kind(node_id) {
3317            Kind::End => write!(s, r"\z"),
3318            Kind::Begin => write!(s, r"\A"),
3319            Kind::Pred => {
3320                let psi = node_id.pred_tset(self);
3321                if psi == TSetId::EMPTY {
3322                    write!(s, r"⊥")
3323                } else if psi == TSetId::FULL {
3324                    write!(s, r"_")
3325                } else {
3326                    write!(s, "{}", self.solver_ref().pp(psi))
3327                }
3328            }
3329            Kind::Inter => {
3330                write!(s, "(")?;
3331                self.ppw(s, node_id.left(self))?;
3332                write!(s, "&")?;
3333                let mut curr = node_id.right(self);
3334                while curr.is_inter(self) {
3335                    let n = curr.left(self);
3336                    self.ppw(s, n)?;
3337                    write!(s, "&")?;
3338                    curr = curr.right(self);
3339                }
3340                self.ppw(s, curr)?;
3341                write!(s, ")")
3342            }
3343            Kind::Union => {
3344                let left = node_id.left(self);
3345                let right = node_id.right(self);
3346                write!(s, "(")?;
3347                self.ppw(s, left)?;
3348                write!(s, "|")?;
3349                let mut curr = right;
3350                while self.get_kind(curr) == Kind::Union {
3351                    let n = curr.left(self);
3352                    self.ppw(s, n)?;
3353                    write!(s, "|")?;
3354                    curr = curr.right(self);
3355                }
3356                self.ppw(s, curr)?;
3357                write!(s, ")")
3358            }
3359            Kind::Concat => {
3360                let left = node_id.left(self);
3361                let right = node_id.right(self);
3362                if right.is_star(self) && right.left(self) == left {
3363                    self.ppw(s, left)?;
3364                    write!(s, "+")?;
3365                    return Ok(());
3366                }
3367                if right.is_concat(self) {
3368                    let rl = right.left(self);
3369                    if rl.is_star(self) && rl.left(self) == left {
3370                        self.ppw(s, left)?;
3371                        write!(s, "+")?;
3372                        return self.ppw(s, right.right(self));
3373                    }
3374                }
3375                if right.is_concat(self) && right.left(self) == left {
3376                    let mut num = 1;
3377                    let mut right = right;
3378                    while right.is_concat(self) && right.left(self) == left {
3379                        num += 1;
3380                        right = right.right(self);
3381                    }
3382                    // (|X){n} followed by X{m} -> X{m,m+n}
3383                    if let Some(inner) = left.is_opt_v(self) {
3384                        let mut inner_count = 0;
3385                        let mut right2 = right;
3386                        while right2.is_concat(self) && right2.left(self) == inner {
3387                            inner_count += 1;
3388                            right2 = right2.right(self);
3389                        }
3390                        if right2 == inner {
3391                            inner_count += 1;
3392                            self.ppw(s, inner)?;
3393                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3394                        }
3395                        if inner_count > 0 {
3396                            self.ppw(s, inner)?;
3397                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3398                            return self.ppw(s, right2);
3399                        }
3400                    }
3401                    self.ppw(s, left)?;
3402                    if right == left {
3403                        num += 1;
3404                        return write!(s, "{{{}}}", num);
3405                    }
3406                    if num <= 3 && left.is_pred(self) {
3407                        for _ in 1..num {
3408                            self.ppw(s, left)?;
3409                        }
3410                        return self.ppw(s, right);
3411                    } else {
3412                        write!(s, "{{{}}}", num)?;
3413                        return self.ppw(s, right);
3414                    }
3415                }
3416                self.ppw(s, left)?;
3417                self.ppw(s, right)
3418            }
3419            Kind::Star => {
3420                let left = node_id.left(self);
3421                let leftkind = self.get_kind(left);
3422                match leftkind {
3423                    Kind::Concat | Kind::Star | Kind::Compl => {
3424                        write!(s, "(")?;
3425                        self.ppw(s, left)?;
3426                        write!(s, ")")?;
3427                    }
3428                    _ => {
3429                        self.ppw(s, left)?;
3430                    }
3431                };
3432                write!(s, "*")
3433            }
3434            Kind::Compl => {
3435                write!(s, "~(")?;
3436                self.ppw(s, node_id.left(self))?;
3437                write!(s, ")")
3438            }
3439            Kind::Lookbehind => {
3440                let lbleft = self.get_lookbehind_prev(node_id);
3441                let lbinner = self.get_lookbehind_inner(node_id);
3442                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3443                if lbleft != NodeId::MISSING {
3444                    write!(s, "❮")?;
3445                    self.ppw(s, lbleft)?;
3446                    write!(s, "❯")?;
3447                }
3448
3449                write!(s, "(?<=")?;
3450                self.ppw(s, lbinner)?;
3451                write!(s, ")")
3452            }
3453            Kind::Lookahead => {
3454                let inner = self.get_lookahead_inner(node_id);
3455                write!(s, "(?=")?;
3456                self.ppw(s, inner)?;
3457                write!(s, ")")?;
3458                if self.get_lookahead_rel(node_id) != 0 {
3459                    write!(s, "{{")?;
3460                    let rel = self.get_lookahead_rel(node_id);
3461                    if rel == u32::MAX {
3462                        write!(s, "∅")?;
3463                    } else {
3464                        write!(s, "{}", rel)?;
3465                    }
3466                    write!(s, "}}")?;
3467                }
3468                if node_id.right(self) == NodeId::MISSING {
3469                    Ok(())
3470                } else {
3471                    write!(s, "❮")?;
3472                    self.ppw(s, node_id.right(self))?;
3473                    write!(s, "❯")
3474                }
3475            }
3476            Kind::Counted => {
3477                let body = node_id.left(self);
3478                let packed = self.get_extra(node_id);
3479                let step = packed & 0xFFFF;
3480                let best = packed >> 16;
3481                write!(s, "#(")?;
3482                self.ppw(s, body)?;
3483                write!(s, ")s{}b{}", step, best)
3484            }
3485        }
3486    }
3487
3488    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3489        self.mk_concat(node, NodeId::TS)
3490    }
3491
3492    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3493        let node_ts = self.mk_concat(node, NodeId::TS);
3494        self.mk_compl(node_ts)
3495    }
3496
3497    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3498        let notset = self.solver().not_id(set);
3499        self.mk_pred(notset)
3500    }
3501
3502    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3503        let set_id = self.solver().u8_to_set_id(char);
3504        self.mk_pred(set_id)
3505    }
3506
3507    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3508        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3509        self.mk_pred(rangeset)
3510    }
3511
3512    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3513        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3514        for &(lo, hi) in &ranges[1..] {
3515            let r = self.mk_range_u8(lo, hi);
3516            node = self.mk_union(node, r);
3517        }
3518        node
3519    }
3520
3521    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3522        let mut prefix = Vec::new();
3523        let mut curr = node;
3524        loop {
3525            if curr == NodeId::EPS {
3526                let full = !prefix.is_empty();
3527                return (prefix, full);
3528            }
3529            if curr == NodeId::BOT {
3530                break;
3531            }
3532            if curr.is_pred(self) {
3533                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3534                    Some(byte) => {
3535                        prefix.push(byte);
3536                        return (prefix, true);
3537                    }
3538                    None => break, // multi-byte pred: pattern not fully consumed
3539                }
3540            }
3541            if !curr.is_concat(self) {
3542                break;
3543            }
3544            let left = curr.left(self);
3545            if !left.is_pred(self) {
3546                break;
3547            }
3548            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3549                Some(byte) => prefix.push(byte),
3550                None => break,
3551            }
3552            curr = curr.right(self);
3553        }
3554        (prefix, false)
3555    }
3556
3557    #[allow(dead_code)]
3558    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3559        let mut result = NodeId::EPS;
3560        for byte in raw_str.iter().rev() {
3561            let node = self.mk_u8(*byte);
3562            result = self.mk_concat(node, result);
3563        }
3564        result
3565    }
3566
3567    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3568        let mut result = NodeId::EPS;
3569        for byte in raw_str.bytes().rev() {
3570            let node = self.mk_u8(byte);
3571            result = self.mk_concat(node, result);
3572        }
3573        result
3574    }
3575
3576    pub fn prune_fwd(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3577        self.prune_rec::<true>(node_id, memo)
3578    }
3579
3580    pub fn prune_rev(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3581        self.prune_rec::<false>(node_id, memo)
3582    }
3583
3584    pub fn simplify_rev_initial(&mut self, node_id: NodeId) -> NodeId {
3585        let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3586        self.simplify_rev_initial_rec(node_id, &mut memo)
3587    }
3588
3589    fn simplify_rev_initial_rec(
3590        &mut self,
3591        node_id: NodeId,
3592        memo: &mut FxHashMap<NodeId, NodeId>,
3593    ) -> NodeId {
3594        if let Some(&v) = memo.get(&node_id) {
3595            return v;
3596        }
3597        let out = match self.get_kind(node_id) {
3598            Kind::Union => {
3599                let mut parts: Vec<NodeId> = Vec::new();
3600                self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
3601                for p in &mut parts {
3602                    *p = self.simplify_rev_initial_rec(*p, memo);
3603                }
3604                parts
3605                    .iter()
3606                    .rev()
3607                    .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
3608            }
3609            Kind::Concat => {
3610                let l = node_id.left(self);
3611                let r = node_id.right(self);
3612                let l_new = self.simplify_rev_initial_rec(l, memo);
3613                let r_new = self.simplify_rev_initial_rec(r, memo);
3614                let rebuilt = if l_new == l && r_new == r {
3615                    node_id
3616                } else {
3617                    self.mk_concat(l_new, r_new)
3618                };
3619                if self.get_kind(rebuilt) != Kind::Concat {
3620                    return rebuilt;
3621                }
3622                let cl = rebuilt.left(self);
3623                let cr = rebuilt.right(self);
3624                if cl != NodeId::TS {
3625                    return rebuilt;
3626                }
3627                let (head, tail) = if self.get_kind(cr) == Kind::Concat {
3628                    (cr.left(self), cr.right(self))
3629                } else {
3630                    (cr, NodeId::EPS)
3631                };
3632                if self.nullability(tail) != Nullability::ALWAYS {
3633                    return rebuilt;
3634                }
3635                if self.get_kind(head) != Kind::Union {
3636                    return rebuilt;
3637                }
3638                let mut has_begin = false;
3639                self.iter_unions_b(head, &mut |_, v| {
3640                    if v == NodeId::BEGIN {
3641                        has_begin = true;
3642                    }
3643                });
3644                if has_begin {
3645                    NodeId::TS
3646                } else {
3647                    rebuilt
3648                }
3649            }
3650            _ => node_id,
3651        };
3652        memo.insert(node_id, out);
3653        out
3654    }
3655
3656    fn strip_la_body_end(&mut self, n: NodeId) -> NodeId {
3657        if self.get_kind(n) != Kind::Concat {
3658            return n;
3659        }
3660        let l = n.left(self);
3661        let r = n.right(self);
3662        if l == NodeId::TS && r == NodeId::END {
3663            return NodeId::TS;
3664        }
3665        let new_r = self.strip_la_body_end(r);
3666        if new_r == r {
3667            n
3668        } else {
3669            self.mk_concat(l, new_r)
3670        }
3671    }
3672
3673    fn prune_rec<const FWD: bool>(
3674        &mut self,
3675        node_id: NodeId,
3676        memo: &mut FxHashMap<NodeId, NodeId>,
3677    ) -> NodeId {
3678        if node_id == NodeId::MISSING {
3679            return node_id;
3680        }
3681        if let Some(&v) = memo.get(&node_id) {
3682            return v;
3683        }
3684        let (l, r) = (node_id.left(self), node_id.right(self));
3685        let out = match node_id.kind(self) {
3686            Kind::Union => {
3687                let l = self.prune_rec::<FWD>(l, memo);
3688                let r = self.prune_rec::<FWD>(r, memo);
3689                let mut parts: Vec<NodeId> = Vec::new();
3690                parts.push(l);
3691                self.iter_unions_b(r, &mut |_, v| parts.push(v));
3692                for p in &mut parts {
3693                    *p = self.prune_rec::<FWD>(*p, memo);
3694                }
3695
3696                if FWD {
3697                    // min rel per body for pure lookaheads (la_tail MISSING)
3698                    let mut best: FxHashMap<NodeId, u32> = FxHashMap::default();
3699                    for &p in &parts {
3700                        if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
3701                            let body = self.get_lookahead_inner(p);
3702                            let rel = self.get_lookahead_rel(p);
3703                            best.entry(body)
3704                                .and_modify(|r| *r = (*r).min(rel))
3705                                .or_insert(rel);
3706                        }
3707                    }
3708                    parts.iter().rev().fold(NodeId::BOT, |acc, &p| {
3709                        if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
3710                            let body = self.get_lookahead_inner(p);
3711                            if self.get_lookahead_rel(p) != best[&body] {
3712                                return acc;
3713                            }
3714                        }
3715                        self.mk_union(p, acc)
3716                    })
3717                } else {
3718                    parts
3719                        .iter()
3720                        .rev()
3721                        .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
3722                }
3723            }
3724            Kind::Concat => {
3725                // Rev-only collapse:  `TS · head · tail ≡ TS` when `head`
3726                // is a union containing `BEGIN` (= `\A`) as a branch and
3727                // `tail` is always-nullable.  Reasoning: the leading `TS`
3728                // can consume zero bytes, letting `BEGIN` fire at input
3729                // position 0; the always-nullable tail then accepts ε,
3730                // yielding an empty match at pos 0 for every string.  So
3731                // `L(TS · head · tail) ⊇ Σ*`.  Must run before the default
3732                // `Kind::Begin => BOT` rewrite destroys the BEGIN branch.
3733                if !FWD && l == NodeId::TS {
3734                    // decompose r as head . tail (tail may be ε if r is just head)
3735                    let (head, tail) = if self.get_kind(r) == Kind::Concat {
3736                        (r.left(self), r.right(self))
3737                    } else {
3738                        (r, NodeId::EPS)
3739                    };
3740
3741                    if self.nullability(tail) == Nullability::ALWAYS
3742                        && self.get_kind(head) == Kind::Union
3743                    {
3744                        let mut has_begin = false;
3745                        self.iter_unions_b(head, &mut |_, v| {
3746                            if v == NodeId::BEGIN {
3747                                has_begin = true;
3748                            }
3749                        });
3750                        if has_begin {
3751                            let out = NodeId::TS;
3752                            memo.insert(node_id, out);
3753                            return out;
3754                        }
3755                    }
3756                }
3757                let l = self.prune_rec::<FWD>(l, memo);
3758                let r = self.prune_rec::<FWD>(r, memo);
3759                self.mk_concat(l, r)
3760            }
3761            Kind::Inter => {
3762                let l = self.prune_rec::<FWD>(l, memo);
3763                let r = self.prune_rec::<FWD>(r, memo);
3764                self.mk_inter(l, r)
3765            }
3766            Kind::Compl => {
3767                let l = self.prune_rec::<FWD>(l, memo);
3768                self.mk_compl(l)
3769            }
3770            Kind::Lookahead => {
3771                let lrel = self.get_lookahead_rel(node_id);
3772                let body = self.strip_la_body_end(self.get_lookahead_inner(node_id));
3773                let body = self.prune_rec::<FWD>(body, memo);
3774                let tail = self.prune_rec::<FWD>(self.get_lookahead_tail(node_id), memo);
3775                self.mk_lookahead_internal(body, tail, lrel)
3776            }
3777            Kind::Begin => NodeId::BOT,
3778            Kind::Counted => {
3779                let body = self.prune_rec::<FWD>(l, memo);
3780                let chain = self.prune_rec::<FWD>(r, memo);
3781                self.mk_counted(body, chain, self.get_extra(node_id))
3782            }
3783            Kind::End | Kind::Pred => node_id,
3784            Kind::Star => {
3785                let l = self.prune_rec::<FWD>(l, memo);
3786                self.mk_star(l)
3787            }
3788            Kind::Lookbehind => {
3789                let l = self.prune_rec::<FWD>(l, memo);
3790                if r.is_missing() {
3791                    l
3792                } else {
3793                    node_id
3794                }
3795            }
3796        };
3797        memo.insert(node_id, out);
3798        out
3799    }
3800
3801    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3802        let mut sorted: Vec<NodeId> = nodes.collect();
3803        if sorted.len() <= 1 {
3804            return sorted.pop().unwrap_or(NodeId::BOT);
3805        }
3806        sorted.sort();
3807        sorted.dedup();
3808        sorted.retain(|&x| x != NodeId::BOT);
3809        if sorted.is_empty() {
3810            return NodeId::BOT;
3811        }
3812        if sorted.len() > 16 {
3813            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3814            let mut non_concat: Vec<NodeId> = Vec::new();
3815            for &n in &sorted {
3816                if n.is_concat(self) {
3817                    by_head.entry(self.get_left(n)).or_default().push(n);
3818                } else {
3819                    non_concat.push(n);
3820                }
3821            }
3822            let mut absorbed: Vec<NodeId> = Vec::new();
3823            for &n in &non_concat {
3824                if by_head.contains_key(&n) {
3825                    absorbed.push(n);
3826                }
3827            }
3828            if !absorbed.is_empty() {
3829                non_concat.retain(|n| !absorbed.contains(n));
3830            }
3831            if by_head.len() < sorted.len() {
3832                let mut groups: Vec<NodeId> = non_concat;
3833                for (head, tails) in by_head {
3834                    let mut tail_nodes: Vec<NodeId> =
3835                        tails.iter().map(|&n| self.get_right(n)).collect();
3836                    if absorbed.contains(&head) {
3837                        tail_nodes.push(NodeId::EPS);
3838                    }
3839                    let tail_union = self.mk_unions(tail_nodes.into_iter());
3840                    let factored = self.mk_concat(head, tail_union);
3841                    groups.push(factored);
3842                }
3843                groups.sort();
3844                groups.dedup();
3845                return self.mk_unions_balanced(&groups);
3846            }
3847        }
3848        self.mk_unions_balanced(&sorted)
3849    }
3850
3851    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3852        match nodes.len() {
3853            0 => NodeId::BOT,
3854            1 => nodes[0],
3855            n => {
3856                let mid = n / 2;
3857                let left = self.mk_unions_balanced(&nodes[..mid]);
3858                let right = self.mk_unions_balanced(&nodes[mid..]);
3859                self.mk_union(left, right)
3860            }
3861        }
3862    }
3863
3864    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3865        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3866    }
3867
3868    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3869        nodes
3870            .rev()
3871            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3872    }
3873}
3874
3875impl RegexBuilder {
3876    pub fn iter_sat(
3877        &mut self,
3878        stack: &mut Vec<(TRegexId, TSetId)>,
3879        f: &mut impl FnMut(&mut RegexBuilder, NodeId, TSetId),
3880    ) {
3881        debug_assert!(!stack.is_empty());
3882        loop {
3883            match stack.pop() {
3884                None => return,
3885                Some((curr, curr_set)) => match *self.get_tregex(curr) {
3886                    TRegex::Leaf(n) => {
3887                        let mut curr = n;
3888                        while curr != NodeId::BOT {
3889                            match self.get_kind(curr) {
3890                                _ => {
3891                                    f(self, n, curr_set);
3892                                    curr = NodeId::BOT;
3893                                }
3894                            }
3895                        }
3896                    }
3897                    TRegex::ITE(cnd, then_id, else_id) => {
3898                        if else_id != TRegexId::BOT {
3899                            let notcnd = self.solver().not_id(cnd);
3900                            let interset1 = self.solver().and_id(curr_set, notcnd);
3901                            stack.push((else_id, interset1));
3902                        }
3903                        let interset2 = self.solver().and_id(curr_set, cnd);
3904                        stack.push((then_id, interset2));
3905                    }
3906                },
3907            }
3908        }
3909    }
3910
3911    #[allow(dead_code)]
3912    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3913        match self.get_tregex(term_id).clone() {
3914            TRegex::Leaf(node_id) => {
3915                if NodeId::BOT == node_id {
3916                    vec![]
3917                } else {
3918                    vec![node_id]
3919                }
3920            }
3921            TRegex::ITE(_, then_id, else_id) => {
3922                let mut then_nodes = self.extract_sat(then_id);
3923                let mut else_nodes = self.extract_sat(else_id);
3924                then_nodes.append(&mut else_nodes);
3925                then_nodes
3926            }
3927        }
3928    }
3929
3930    pub(crate) fn iter_unions_b(
3931        &mut self,
3932        curr: NodeId,
3933        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3934    ) {
3935        let mut curr = curr;
3936        while self.get_kind(curr) == Kind::Union {
3937            f(self, curr.left(self));
3938            curr = curr.right(self);
3939        }
3940        f(self, curr);
3941    }
3942
3943    pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3944        if !self.contains_look(node_id) {
3945            return Some(node_id);
3946        }
3947        match self.get_kind(node_id) {
3948            Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3949            Kind::Concat => {
3950                let left = node_id.left(self);
3951                let right = node_id.right(self);
3952                let elim_l = self.try_elim_lookarounds(left)?;
3953                let elim_r = self.try_elim_lookarounds(right)?;
3954                let rw = self.mk_concat(elim_l, elim_r);
3955                Some(rw)
3956            }
3957            Kind::Union => {
3958                let left = node_id.left(self);
3959                let right = node_id.right(self);
3960                let elim_l = self.try_elim_lookarounds(left)?;
3961                let elim_r = self.try_elim_lookarounds(right)?;
3962                let rw = self.mk_union(elim_l, elim_r);
3963                Some(rw)
3964            }
3965
3966            Kind::Star => {
3967                let body = node_id.left(self);
3968                let elim_l = self.try_elim_lookarounds(body)?;
3969                Some(self.mk_star(elim_l))
3970            }
3971            Kind::Compl => {
3972                let left = node_id.left(self);
3973                let elim_l = self.try_elim_lookarounds(left)?;
3974                Some(self.mk_compl(elim_l))
3975            }
3976            Kind::Lookahead => {
3977                let rel = self.get_lookahead_rel(node_id);
3978                if rel != 0 {
3979                    return None;
3980                }
3981                let lbody = self.get_lookahead_inner(node_id);
3982                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3983                let elim_l = self.try_elim_lookarounds(lbody)?;
3984                let elim_r = self.try_elim_lookarounds(ltail)?;
3985                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3986                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3987                let rw = self.mk_inter(lbody_ts, ltail_ts);
3988                Some(rw)
3989            }
3990            Kind::Lookbehind => {
3991                let linner = self.get_lookbehind_inner(node_id);
3992                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3993                let elim_l = self.try_elim_lookarounds(linner)?;
3994                let elim_r = self.try_elim_lookarounds(lprev)?;
3995                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3996                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3997                let rw = self.mk_inter(lbody_ts, ltail_ts);
3998                Some(rw)
3999            }
4000            Kind::Inter => {
4001                let left = node_id.left(self);
4002                let right = node_id.right(self);
4003                let elim_l = self.try_elim_lookarounds(left)?;
4004                let elim_r = self.try_elim_lookarounds(right)?;
4005                let rw = self.mk_inter(elim_l, elim_r);
4006                Some(rw)
4007            }
4008            Kind::Counted => None,
4009        }
4010    }
4011
4012    /// R & _+ is a safe overapproximation of R that's nonempty
4013    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
4014        if self.nullability(node) == Nullability::NEVER {
4015            node
4016        } else {
4017            self.mk_inter(NodeId::TOPPLUS, node)
4018        }
4019    }
4020
4021    pub fn iter_find_stack(
4022        &self,
4023        stack: &mut Vec<TRegexId>,
4024        mut f: impl FnMut(NodeId) -> bool,
4025    ) -> bool {
4026        loop {
4027            match stack.pop() {
4028                None => return false,
4029                Some(curr) => match self.get_tregex(curr) {
4030                    TRegex::Leaf(n) => {
4031                        let mut curr = *n;
4032                        while curr != NodeId::BOT {
4033                            match self.get_kind(curr) {
4034                                Kind::Union => {
4035                                    if f(curr.left(self)) {
4036                                        return true;
4037                                    }
4038                                    curr = curr.right(self);
4039                                }
4040                                _ => {
4041                                    if f(*n) {
4042                                        return true;
4043                                    }
4044                                    curr = NodeId::BOT;
4045                                }
4046                            }
4047                        }
4048                    }
4049                    TRegex::ITE(_, then_id, else_id) => {
4050                        if *else_id != TRegexId::BOT {
4051                            stack.push(*else_id);
4052                        }
4053                        stack.push(*then_id);
4054                    }
4055                },
4056            }
4057        }
4058    }
4059
4060    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
4061        if node == NodeId::BOT {
4062            return Some(true);
4063        }
4064        if self.nullability(node) != Nullability::NEVER {
4065            return Some(false);
4066        }
4067        if let Some(cached) = self.cache_empty.get(&node) {
4068            if cached.is_checked() {
4069                return Some(cached.is_empty());
4070            }
4071        }
4072        let node = if !self.contains_look(node) {
4073            node
4074        } else {
4075            self.try_elim_lookarounds(node)?
4076        };
4077        let isempty_flag = self.is_empty_lang_internal(node);
4078
4079        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
4080    }
4081
4082    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
4083        // without inter, no need to check
4084        if !self.get_meta_flags(initial_node).contains_inter() {
4085            return Ok(NodeFlags::ZERO);
4086        }
4087
4088        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4089        let mut worklist: VecDeque<NodeId> = VecDeque::new();
4090        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
4091        let mut stack = Vec::new();
4092        stack.push(begin_der);
4093        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
4094            visited.insert(node, initial_node);
4095            let nullability = self.nullability(node);
4096            if nullability != Nullability::NEVER {
4097                true
4098            } else {
4099                worklist.push_back(node);
4100                false
4101            }
4102        });
4103        if found_nullable_right_away {
4104            return Ok(NodeFlags::ZERO);
4105        }
4106
4107        worklist.push_back(initial_node);
4108        let isempty_flag: NodeFlags;
4109        let mut found_node = NodeId::BOT;
4110
4111        loop {
4112            match worklist.pop_front() {
4113                None => {
4114                    isempty_flag = NodeFlags::IS_EMPTY;
4115                    break;
4116                }
4117                Some(outer) => {
4118                    if let Some(cached) = self.cache_empty.get(&outer) {
4119                        if cached.is_checked() {
4120                            if cached.is_empty() {
4121                                continue;
4122                            } else {
4123                                return Ok(NodeFlags::ZERO);
4124                            }
4125                        }
4126                    }
4127
4128                    let derivative = self.der(outer, Nullability::CENTER)?;
4129
4130                    stack.push(derivative);
4131
4132                    let found_null = self.iter_find_stack(&mut stack, |node| {
4133                        if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
4134                            found_node = node;
4135                            if !self.get_meta_flags(node).contains_inter() {
4136                                true
4137                            } else {
4138                                e.insert(outer);
4139                                worklist.push_front(node);
4140                                self.any_nonbegin_nullable(node)
4141                            }
4142                        } else {
4143                            false
4144                        }
4145                    });
4146                    if found_null {
4147                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
4148                        isempty_flag = NodeFlags::ZERO;
4149                        break;
4150                    }
4151                }
4152            }
4153        }
4154
4155        self.cache_empty.insert(
4156            initial_node,
4157            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
4158        );
4159        Ok(isempty_flag)
4160    }
4161
4162    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
4163    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
4164        if larger_lang == smaller_lang {
4165            return Some(true);
4166        }
4167
4168        // assess initial nullability
4169        if self
4170            .nullability(larger_lang)
4171            .not()
4172            .and(self.nullability(smaller_lang))
4173            != Nullability::NEVER
4174        {
4175            return Some(false);
4176        }
4177
4178        // check language nullability
4179        // if (B &~ A) ≡ ⊥ then B ⊆ A
4180        // this means  L(B) \ L(A) = {}
4181        // eg. (a &~ .*) = ⊥ means .* subsumes a
4182        let (smaller_lang, larger_lang) =
4183            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
4184                let wrap = |b: &mut Self, n: NodeId| {
4185                    let tmp = b.mk_concat(n, NodeId::TS);
4186                    b.mk_concat(NodeId::TS, tmp)
4187                };
4188                (wrap(self, smaller_lang), wrap(self, larger_lang))
4189            } else {
4190                (smaller_lang, larger_lang)
4191            };
4192
4193        let nota = self.mk_compl(larger_lang);
4194        let diff = self.mk_inter(smaller_lang, nota);
4195        self.is_empty_lang(diff)
4196    }
4197}