Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::{neg_class, utf8_char, UnicodeClassCache};
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23    AnchorLimit,
24    StateSpaceExplosion,
25    UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32            AlgebraError::StateSpaceExplosion => {
33                write!(f, "too many states, likely infinite state space")
34            }
35            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36        }
37    }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43    pub const MISSING: u32 = 0;
44    pub const BOT: u32 = 1;
45    pub const EPS: u32 = 2;
46    pub const TOP: u32 = 3;
47    pub const TOPSTAR: u32 = 4;
48    pub const TOPPLUS: u32 = 5;
49    pub const BEGIN: u32 = 6;
50    pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62    pub const MISSING: NodeId = NodeId(id::MISSING);
63    pub const BOT: NodeId = NodeId(id::BOT);
64    pub const EPS: NodeId = NodeId(id::EPS);
65    pub const TOP: NodeId = NodeId(id::TOP);
66    pub const TS: NodeId = NodeId(id::TOPSTAR);
67    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68    pub const BEGIN: NodeId = NodeId(id::BEGIN);
69    pub const END: NodeId = NodeId(id::END);
70
71    #[inline]
72    pub fn as_u32(self) -> u32 {
73        self.0
74    }
75
76    #[inline]
77    pub fn from_u32(v: u32) -> NodeId {
78        NodeId(v)
79    }
80}
81impl Debug for NodeId {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let num = &self.0;
84        f.write_str(format!("{num}").as_str())
85    }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91    pub const MISSING: TRegexId = TRegexId(id::MISSING);
92    pub const EPS: TRegexId = TRegexId(id::EPS);
93    pub const BOT: TRegexId = TRegexId(id::BOT);
94    pub const TOP: TRegexId = TRegexId(id::TOP);
95    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
102
103    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106    pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 5);
107    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110    #[inline]
111    pub(crate) fn nullability(self) -> Nullability {
112        Nullability(self.0 & Self::NULL_MASK)
113    }
114
115    #[inline]
116    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118    }
119
120    #[inline]
121    pub(crate) fn has(self, flag: MetaFlags) -> bool {
122        self.0 & flag.0 != 0
123    }
124    #[inline]
125    const fn and(self, other: MetaFlags) -> MetaFlags {
126        MetaFlags(self.0 & other.0)
127    }
128    #[inline]
129    const fn or(self, other: MetaFlags) -> MetaFlags {
130        MetaFlags(self.0 | other.0)
131    }
132
133    pub(crate) fn contains_inter(self) -> bool {
134        self.has(MetaFlags::CONTAINS_INTER)
135    }
136
137    pub(crate) fn all_contains_flags(self) -> MetaFlags {
138        self.and(
139            MetaFlags::CONTAINS_LOOKAROUND
140                .or(MetaFlags::CONTAINS_ANCHORS)
141                .or(MetaFlags::CONTAINS_INTER)
142                .or(MetaFlags::CONTAINS_LOOKBEHIND),
143        )
144    }
145}
146
147#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
148#[repr(u8)]
149pub enum Kind {
150    #[default]
151    Pred,
152    Star,
153    Begin,
154    End,
155    Concat,
156    Union,
157    Compl,
158    Lookbehind,
159    Lookahead,
160    Inter,
161    Counted,
162}
163
164#[derive(Eq, Hash, PartialEq, Clone)]
165struct Metadata {
166    flags: MetaFlags,
167    nulls: NullsId,
168}
169
170struct MetadataBuilder {
171    num_created: u32,
172    solver: Solver,
173    nb: NullsBuilder,
174    index: FxHashMap<Metadata, MetadataId>,
175    pub array: Vec<Metadata>,
176}
177
178mod helpers {
179    pub(crate) fn incr_rel(n1: u32) -> u32 {
180        match n1.overflowing_add(1) {
181            (_, true) => u32::MAX,
182            (res, false) => res,
183        }
184    }
185}
186
187impl MetadataBuilder {
188    fn new() -> MetadataBuilder {
189        Self {
190            index: FxHashMap::default(),
191            array: vec![Metadata {
192                flags: MetaFlags::ZERO,
193                nulls: NullsId::EMPTY,
194            }],
195            solver: Solver::new(),
196            num_created: 0,
197            nb: NullsBuilder::new(),
198        }
199    }
200
201    fn init(&mut self, inst: Metadata) -> MetadataId {
202        self.num_created += 1;
203        let new_id = MetadataId(self.num_created);
204        self.index.insert(inst.clone(), new_id);
205        self.array.push(inst);
206        new_id
207    }
208
209    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
210        match self.index.get(&inst) {
211            Some(&id) => id,
212            None => self.init(inst),
213        }
214    }
215
216    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
217        &self.array[inst.0 as usize]
218    }
219
220    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
221        setflags.all_contains_flags()
222    }
223
224    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
225        let left = &self.array[body.0 as usize].flags;
226        let contains = self.get_contains(*left);
227        // BOT* = EPS (empty string only), not infinite
228        let inf = if body_id == NodeId::BOT {
229            MetaFlags::ZERO
230        } else {
231            MetaFlags::INFINITE_LENGTH
232        };
233        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
234    }
235
236    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
237        let left = &self.array[left_id.0 as usize].flags;
238        let null = left.nullability().not().and(Nullability::ALWAYS);
239        let contains = self.get_contains(*left);
240        MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
241    }
242
243    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
244        let left = &self.array[left_id.0 as usize].flags;
245        let right = &self.array[right_id.0 as usize].flags;
246        let null = left.nullability().and(right.nullability());
247        let contains = self.get_contains(left.or(*right));
248        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
249        MetaFlags::with_nullability(null, contains.or(len))
250    }
251
252    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
253        let left = &self.array[left_id.0 as usize].flags;
254        let right = &self.array[right_id.0 as usize].flags;
255        let null = left.nullability().and(right.nullability());
256        let contains = self
257            .get_contains(left.or(*right))
258            .or(MetaFlags::CONTAINS_INTER);
259        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
260        MetaFlags::with_nullability(null, contains.or(len))
261    }
262
263    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
264        let left = &self.array[left_id.0 as usize].flags;
265        let right = &self.array[right_id.0 as usize].flags;
266        let null = left.nullability().or(right.nullability());
267        let contains = self.get_contains(left.or(*right));
268        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
269        MetaFlags::with_nullability(null, contains.or(len))
270    }
271}
272
273#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
274pub struct NodeKey {
275    pub(crate) kind: Kind,
276    pub(crate) left: NodeId,
277    pub(crate) right: NodeId,
278    pub(crate) extra: u32,
279}
280
281#[derive(Eq, Hash, PartialEq, Clone, Debug)]
282pub enum TRegex<TSet> {
283    Leaf(NodeId),
284    ITE(TSet, TRegexId, TRegexId),
285}
286
287#[derive(Eq, Hash, PartialEq, Clone, Copy)]
288pub(crate) struct NodeFlags(u8);
289impl NodeFlags {
290    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
291    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
292    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
293
294    fn is_checked(self) -> bool {
295        self.0 >= NodeFlags::IS_CHECKED.0
296    }
297    fn is_empty(self) -> bool {
298        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
299    }
300}
301
302#[derive(Eq, Hash, PartialEq, Clone, Copy)]
303pub(crate) struct BuilderFlags(u8);
304impl BuilderFlags {
305    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
306    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
307}
308
309pub struct RegexBuilder {
310    mb: MetadataBuilder,
311    temp_vec: Vec<NodeId>,
312    num_created: u32,
313    index: FxHashMap<NodeKey, NodeId>,
314    array: Vec<NodeKey>,
315    metadata: Vec<MetadataId>,
316    reversed: Vec<NodeId>,
317    cache_empty: FxHashMap<NodeId, NodeFlags>,
318    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
319    tr_array: Vec<TRegex<TSetId>>,
320    tr_der_center: Vec<TRegexId>,
321    tr_der_begin: Vec<TRegexId>,
322    flags: BuilderFlags,
323    /// maximum lookahead context distance before returning `AnchorLimit`.
324    pub lookahead_context_max: u32,
325    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
326    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
327}
328
329impl NodeId {
330    fn is_missing(&self) -> bool {
331        *self == NodeId::MISSING
332    }
333    #[inline]
334    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
335        b.get_flags_contains(self)
336    }
337    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
338        if self == tail {
339            true
340        } else if self.is_kind(b, Kind::Concat) {
341            self.right(b).has_concat_tail(b, tail)
342        } else {
343            false
344        }
345    }
346    fn missing_to_eps(&self) -> NodeId {
347        if *self == NodeId::MISSING {
348            NodeId::EPS
349        } else {
350            *self
351        }
352    }
353
354    #[inline]
355    fn kind(self, b: &RegexBuilder) -> Kind {
356        b.get_kind(self)
357    }
358    #[inline]
359    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
360        b.get_kind(self) == k
361    }
362    #[inline]
363    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
364        b.nullability(self) == Nullability::NEVER
365    }
366    #[inline]
367    pub fn nullability(self, b: &RegexBuilder) -> Nullability {
368        b.nullability(self)
369    }
370
371    #[inline]
372    pub fn is_center_nullable(self, b: &RegexBuilder) -> bool {
373        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
374    }
375    #[inline]
376    pub fn left(self, b: &RegexBuilder) -> NodeId {
377        b.get_left(self)
378    }
379
380    #[inline]
381    pub fn right(self, b: &RegexBuilder) -> NodeId {
382        b.get_right(self)
383    }
384
385    #[inline]
386    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
387        b.der(self, mask)
388    }
389
390    #[inline]
391    fn extra(self, b: &RegexBuilder) -> u32 {
392        b.get_extra(self)
393    }
394
395    #[inline]
396    fn is_pred(self, b: &RegexBuilder) -> bool {
397        b.get_kind(self) == Kind::Pred
398    }
399
400    #[inline]
401    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
402        debug_assert!(self.is_pred(b));
403        TSetId(b.get_extra(self))
404    }
405    #[inline]
406    fn is_star(self, b: &RegexBuilder) -> bool {
407        if NodeId::EPS == self {
408            return false;
409        }
410        b.get_kind(self) == Kind::Star
411    }
412
413    pub fn contains_lookbehind(self, b: &RegexBuilder) -> bool {
414        b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKBEHIND)
415    }
416
417    pub fn contains_lookaround(self, b: &RegexBuilder) -> bool {
418        b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKAROUND)
419    }
420
421    #[inline]
422    pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
423        b.get_kind(self) == Kind::Inter
424    }
425
426    #[inline]
427    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
428        if self.is_concat(b) {
429            let r = self.right(b);
430            return r.is_star(b) && r.left(b) == self.left(b);
431        }
432        false
433    }
434
435    #[inline]
436    fn is_begin(self) -> bool {
437        self == NodeId::BEGIN
438    }
439
440    #[inline]
441    fn is_union(self, b: &RegexBuilder) -> bool {
442        b.get_kind(self) == Kind::Union
443    }
444
445    #[inline]
446    fn is_concat(self, b: &RegexBuilder) -> bool {
447        b.get_kind(self) == Kind::Concat
448    }
449
450    #[inline]
451    fn is_lookahead(self, b: &RegexBuilder) -> bool {
452        b.get_kind(self) == Kind::Lookahead
453    }
454
455    #[inline]
456    fn is_lookbehind(self, b: &RegexBuilder) -> bool {
457        b.get_kind(self) == Kind::Lookbehind
458    }
459
460    #[inline]
461    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
462        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
463            Some(b.get_right(self))
464        } else {
465            None
466        }
467    }
468
469    #[inline]
470    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
471        if b.get_kind(self) == Kind::Concat {
472            let left = self.left(b);
473            let right = self.right(b);
474            if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
475                return right == NodeId::END;
476            }
477        }
478        false
479    }
480
481    #[inline]
482    fn is_ts(self) -> bool {
483        NodeId::TS == self
484    }
485
486    #[inline]
487    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
488        if NodeId::EPS == self {
489            return None;
490        }
491        if self.is_star(b) && self.left(b).is_pred(b) {
492            Some(self.left(b))
493        } else {
494            None
495        }
496    }
497
498    #[inline]
499    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
500        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
501            let middle = self.right(b);
502            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
503                return Some(middle.left(b));
504            }
505        }
506        None
507    }
508
509    #[inline]
510    pub(crate) fn iter_union(
511        self,
512        b: &mut RegexBuilder,
513        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
514    ) {
515        let mut curr: NodeId = self;
516        while curr.kind(b) == Kind::Union {
517            f(b, curr.left(b));
518            curr = curr.right(b);
519        }
520        f(b, curr);
521    }
522
523    #[inline]
524    pub(crate) fn iter_inter(
525        self,
526        b: &mut RegexBuilder,
527        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
528    ) {
529        let mut curr: NodeId = self;
530        while curr.kind(b) == Kind::Inter {
531            f(b, curr.left(b));
532            curr = curr.right(b);
533        }
534        f(b, curr);
535    }
536
537    #[inline]
538    pub(crate) fn iter_union_while(
539        self,
540        b: &mut RegexBuilder,
541        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
542    ) {
543        let mut curr: NodeId = self;
544        let mut continue_loop = true;
545        while continue_loop && curr.kind(b) == Kind::Union {
546            continue_loop = f(b, curr.left(b));
547            curr = curr.right(b);
548        }
549        if continue_loop {
550            f(b, curr);
551        }
552    }
553
554    #[inline]
555    pub(crate) fn any_inter_component(
556        self,
557        b: &RegexBuilder,
558        mut f: impl FnMut(NodeId) -> bool,
559    ) -> bool {
560        debug_assert!(self.kind(b) == Kind::Inter);
561        let mut cur = self;
562        while cur.kind(b) == Kind::Inter {
563            if f(cur.left(b)) {
564                return true;
565            }
566            cur = cur.right(b);
567        }
568        f(cur)
569    }
570
571    #[inline]
572    pub(crate) fn any_union_component(
573        self,
574        b: &RegexBuilder,
575        mut f: impl FnMut(NodeId) -> bool,
576    ) -> bool {
577        let mut cur = self;
578        while cur.kind(b) == Kind::Union {
579            if f(cur.left(b)) {
580                return true;
581            }
582            cur = cur.right(b);
583        }
584        f(cur)
585    }
586}
587
588impl Default for RegexBuilder {
589    fn default() -> Self {
590        Self::new()
591    }
592}
593
594impl RegexBuilder {
595    pub fn new() -> RegexBuilder {
596        let mut inst = Self {
597            mb: MetadataBuilder::new(),
598            array: Vec::new(),
599            index: FxHashMap::default(),
600            cache_empty: FxHashMap::default(),
601            tr_array: Vec::new(),
602            tr_cache: FxHashMap::default(),
603            flags: BuilderFlags::ZERO,
604            lookahead_context_max: 800,
605            num_created: 0,
606            metadata: Vec::new(),
607            reversed: Vec::new(),
608            tr_der_center: Vec::new(),
609            tr_der_begin: Vec::new(),
610            temp_vec: Vec::new(),
611            mk_binary_memo: FxHashMap::default(),
612            clean_cache: FxHashMap::default(),
613        };
614        inst.array.push(NodeKey::default());
615        inst.mk_pred(TSetId::EMPTY);
616        inst.mk_star(NodeId::BOT);
617        inst.mk_pred(TSetId::FULL);
618        inst.mk_star(NodeId::TOP);
619        let top_plus_id = inst.mk_plus(NodeId::TOP);
620        inst.mk_unset(Kind::Begin);
621        inst.mk_unset(Kind::End);
622        debug_assert!(top_plus_id == NodeId::TOPPLUS);
623
624        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
625        inst.mk_leaf(NodeId::BOT);
626        inst.mk_leaf(NodeId::EPS);
627        inst.mk_leaf(NodeId::TOP);
628        inst.mk_leaf(NodeId::TS);
629
630        inst.flags = BuilderFlags::SUBSUME;
631        inst
632    }
633
634    #[inline]
635    pub fn solver_ref(&self) -> &Solver {
636        &self.mb.solver
637    }
638
639    #[inline]
640    pub fn solver(&mut self) -> &mut Solver {
641        &mut self.mb.solver
642    }
643
644    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
645        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
646        self.tr_cache.insert(inst.clone(), new_id);
647        self.tr_array.push(inst);
648        new_id
649    }
650
651    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
652        match self.tr_cache.get(&inst) {
653            Some(&id) => id,
654            None => self.tr_init(inst),
655        }
656    }
657
658    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
659        &self.tr_array[inst.0 as usize]
660    }
661
662    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
663        let solv = self.solver();
664        !solv.is_sat_id(cond1, cond2)
665    }
666
667    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
668        let term = TRegex::<TSetId>::Leaf(node_id);
669        self.get_tregex_id(term)
670    }
671
672    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
673        let tmp_inst = TRegex::ITE(cond, _then, _else);
674        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
675            return *cached;
676        }
677        if _then == _else {
678            return _then;
679        }
680        if self.solver().is_full_id(cond) {
681            return _then;
682        }
683        if self.solver().is_empty_id(cond) {
684            return _else;
685        }
686        let _clean_then = match self.tr_array[_then.0 as usize] {
687            TRegex::Leaf(_) => _then,
688            _ => self.clean(cond, _then),
689        };
690        let notcond = self.solver().not_id(cond);
691        let _clean_else = match self.tr_array[_else.0 as usize] {
692            TRegex::Leaf(_) => _else,
693            _ => self.clean(notcond, _else),
694        };
695
696        if _clean_then == _clean_else {
697            return _clean_then;
698        }
699        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
700        match self.get_tregex(_clean_then) {
701            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
702                let _cond_then = *leftcond;
703                let new_then = *_inner_then;
704                let sand = self.solver().and_id(cond, _cond_then);
705                return self.mk_ite(sand, new_then, _clean_else);
706            }
707            _ => {}
708        }
709        // attempt right side flattening:
710        match self.get_tregex(_clean_else) {
711            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
712            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
713                let e2clone = *_e2;
714                let new_cond = self.mb.solver.or_id(cond, *_c2);
715                return self.mk_ite(new_cond, _clean_then, e2clone);
716            }
717            _ => {}
718        }
719
720        if _clean_then == TRegexId::BOT {
721            let flip_cond = self.solver().not_id(cond);
722            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
723            return cleaned_id;
724        }
725
726        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
727    }
728
729    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
730        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
731            return cached;
732        }
733        let result = match *self.get_tregex(tterm) {
734            TRegex::Leaf(_) => tterm,
735            TRegex::ITE(alpha, _then_id, _else_id) => {
736                let notalpha = self.mb.solver.not_id(alpha);
737                if self.mb.solver.unsat_id(beta, alpha) {
738                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
739                    self.clean(beta, _else_id)
740                } else if self.unsat(beta, notalpha) {
741                    // beta ⊆ alpha, so beta ∧ alpha = beta
742                    self.clean(beta, _then_id)
743                } else {
744                    let tc = self.mb.solver.and_id(beta, alpha);
745                    let ec = self.mb.solver.and_id(beta, notalpha);
746                    let new_then = self.clean(tc, _then_id);
747                    let new_else = self.clean(ec, _else_id);
748                    self.mk_ite(alpha, new_then, new_else)
749                }
750            }
751        };
752        self.clean_cache.insert((beta, tterm), result);
753        result
754    }
755
756    fn mk_unary(
757        &mut self,
758        term: TRegexId,
759        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
760    ) -> TRegexId {
761        match self.tr_array[term.0 as usize] {
762            TRegex::Leaf(node_id) => {
763                let applied = apply(self, node_id);
764                self.mk_leaf(applied)
765            }
766            TRegex::ITE(c1, _then, _else) => {
767                let _then1 = self.mk_unary(_then, apply);
768                let _else1 = self.mk_unary(_else, apply);
769                self.mk_ite(c1, _then1, _else1)
770            }
771        }
772    }
773
774    fn mk_binary_result(
775        &mut self,
776        left: TRegexId,
777        right: TRegexId,
778        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
779    ) -> Result<TRegexId, AlgebraError> {
780        match self.tr_array[left.0 as usize] {
781            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
782                TRegex::Leaf(right_node_id) => {
783                    let applied = apply(self, left_node_id, right_node_id)?;
784                    Ok(self.mk_leaf(applied))
785                }
786                TRegex::ITE(c2, _then, _else) => {
787                    let then2 = self.mk_binary_result(left, _then, apply)?;
788                    let else2 = self.mk_binary_result(left, _else, apply)?;
789                    Ok(self.mk_ite(c2, then2, else2))
790                }
791            },
792            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
793                TRegex::Leaf(_) => {
794                    let then2 = self.mk_binary_result(_then1, right, apply)?;
795                    let else2 = self.mk_binary_result(_else1, right, apply)?;
796                    Ok(self.mk_ite(c1, then2, else2))
797                }
798                TRegex::ITE(c2, _then2, _else2) => {
799                    if c1 == c2 {
800                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
801                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
802                        return Ok(self.mk_ite(c1, _then, _else));
803                    }
804                    if c1.0 > c2.0 {
805                        let _then = self.mk_binary_result(_then1, right, apply)?;
806                        let _else = self.mk_binary_result(_else1, right, apply)?;
807                        Ok(self.mk_ite(c1, _then, _else))
808                    } else {
809                        let _then = self.mk_binary_result(left, _then2, apply)?;
810                        let _else = self.mk_binary_result(left, _else2, apply)?;
811                        Ok(self.mk_ite(c2, _then, _else))
812                    }
813                }
814            },
815        }
816    }
817
818    fn mk_binary(
819        &mut self,
820        left: TRegexId,
821        right: TRegexId,
822        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
823    ) -> TRegexId {
824        self.mk_binary_memo.clear();
825        self.mk_binary_inner(left, right, apply)
826    }
827
828    fn mk_binary_inner(
829        &mut self,
830        left: TRegexId,
831        right: TRegexId,
832        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
833    ) -> TRegexId {
834        if left == right {
835            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
836        }
837        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
838            return cached;
839        }
840        let result = match self.tr_array[left.0 as usize] {
841            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
842                TRegex::Leaf(right_node_id) => {
843                    let applied = apply(self, left_node_id, right_node_id);
844                    self.mk_leaf(applied)
845                }
846                TRegex::ITE(c2, _then, _else) => {
847                    let then2 = self.mk_binary_inner(left, _then, apply);
848                    let else2 = self.mk_binary_inner(left, _else, apply);
849                    self.mk_ite(c2, then2, else2)
850                }
851            },
852            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
853                TRegex::Leaf(_) => {
854                    let then2 = self.mk_binary_inner(_then1, right, apply);
855                    let else2 = self.mk_binary_inner(_else1, right, apply);
856                    self.mk_ite(c1, then2, else2)
857                }
858                TRegex::ITE(c2, _then2, _else2) => {
859                    if c1 == c2 {
860                        let _then = self.mk_binary_inner(_then1, _then2, apply);
861                        let _else = self.mk_binary_inner(_else1, _else2, apply);
862                        self.mk_ite(c1, _then, _else)
863                    } else if c1.0 > c2.0 {
864                        let _then = self.mk_binary_inner(_then1, right, apply);
865                        let _else = self.mk_binary_inner(_else1, right, apply);
866                        self.mk_ite(c1, _then, _else)
867                    } else {
868                        let _then = self.mk_binary_inner(left, _then2, apply);
869                        let _else = self.mk_binary_inner(left, _else2, apply);
870                        self.mk_ite(c2, _then, _else)
871                    }
872                }
873            },
874        };
875        self.mk_binary_memo.insert((left, right), result);
876        result
877    }
878
879    pub fn get_nulls(
880        &mut self,
881        pending_rel: u32,
882        mask: Nullability,
883        acc: &mut BTreeSet<NullState>,
884        node_id: NodeId,
885    ) {
886        debug_assert!(node_id != NodeId::MISSING);
887        if !self.is_nullable(node_id, mask) {
888            return;
889        }
890        match self.get_kind(node_id) {
891            Kind::Pred => {}
892            Kind::End => {
893                if mask.has(Nullability::END) {
894                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
895                }
896            }
897            Kind::Begin => {
898                if mask.has(Nullability::BEGIN) {
899                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
900                }
901            }
902            Kind::Concat => {
903                let new_mask = self.nullability(node_id).and(mask);
904                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
905                if self.is_nullable(node_id.left(self), mask) {
906                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
907                }
908            }
909            Kind::Union => {
910                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
911                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
912            }
913            Kind::Inter => {
914                let new_mask = self.nullability(node_id).and(mask);
915                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
916                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
917            }
918            Kind::Star => {
919                acc.insert(NullState::new(mask, pending_rel));
920                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
921            }
922            Kind::Compl => {
923                if !self.is_nullable(node_id.left(self), mask) {
924                    acc.insert(NullState::new(mask, 0));
925                }
926            }
927            Kind::Lookbehind => {
928                let new_mask = self.nullability(node_id).and(mask);
929                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
930                if node_id.right(self) != NodeId::MISSING {
931                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
932                }
933            }
934            Kind::Lookahead => {
935                let la_inner = self.get_lookahead_inner(node_id);
936                if self.is_nullable(la_inner, mask) {
937                    let rel = self.get_lookahead_rel(node_id);
938                    if rel != u32::MAX {
939                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
940                    }
941                    // tail only contributes when body is sat
942                    let la_tail = self.get_lookahead_tail(node_id);
943                    if la_tail != NodeId::MISSING {
944                        self.get_nulls(pending_rel, mask, acc, la_tail);
945                    }
946                }
947            }
948            Kind::Counted => {
949                let packed = self.get_extra(node_id);
950                let best = packed >> 16;
951                if best > 0 {
952                    acc.insert(NullState::new(mask, pending_rel + best));
953                }
954            }
955        }
956    }
957
958    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
959        self.get_meta_flags(node_id)
960            .has(MetaFlags::CONTAINS_LOOKAROUND)
961    }
962
963    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
964        self.get_meta_flags(node_id)
965            .has(MetaFlags::CONTAINS_ANCHORS)
966    }
967
968    pub fn is_infinite(&self, node_id: NodeId) -> bool {
969        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
970    }
971
972    /// returns (min_length, max_length). max = u32::MAX means unbounded.
973    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
974        if self.is_infinite(node_id) {
975            if node_id.is_inter(self) {
976                self.get_bounded_length(node_id)
977            } else {
978                (self.get_min_length_only(node_id), u32::MAX)
979            }
980        } else {
981            self.get_bounded_length(node_id)
982        }
983    }
984
985    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
986        if node_id == NodeId::EPS {
987            return (0, 0);
988        }
989        match self.get_kind(node_id) {
990            Kind::End | Kind::Begin => (0, 0),
991            Kind::Pred => (1, 1),
992            Kind::Concat => {
993                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
994                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
995                (lmin + rmin, lmax.saturating_add(rmax))
996            }
997            Kind::Union => {
998                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
999                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
1000                (lmin.min(rmin), lmax.max(rmax))
1001            }
1002            Kind::Inter => {
1003                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
1004                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
1005                (lmin.max(rmin), lmax.min(rmax))
1006            }
1007            Kind::Lookahead => {
1008                let body = node_id.left(self);
1009                if self.is_infinite(body) {
1010                    return (0, u32::MAX);
1011                }
1012                let right = node_id.right(self);
1013                if right.is_missing() {
1014                    (0, 0)
1015                } else {
1016                    self.get_min_max_length(right)
1017                }
1018            }
1019            Kind::Counted => (0, 0),
1020            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
1021        }
1022    }
1023
1024    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
1025        match self.get_kind(node_id) {
1026            Kind::End | Kind::Begin => Some(0),
1027            Kind::Pred => Some(1),
1028            Kind::Concat => {
1029                let l = self.get_fixed_length(node_id.left(self))?;
1030                let r = self.get_fixed_length(node_id.right(self))?;
1031                Some(l + r)
1032            }
1033            Kind::Union => {
1034                let l = self.get_fixed_length(node_id.left(self))?;
1035                let r = self.get_fixed_length(node_id.right(self))?;
1036                if l == r {
1037                    Some(l)
1038                } else {
1039                    None
1040                }
1041            }
1042            Kind::Inter => {
1043                let l = self.get_fixed_length(node_id.left(self))?;
1044                let r = self.get_fixed_length(node_id.right(self))?;
1045                if l == r {
1046                    Some(l)
1047                } else {
1048                    None
1049                }
1050            }
1051            Kind::Lookahead => {
1052                let right = node_id.right(self);
1053                if right.is_missing() {
1054                    Some(0)
1055                } else {
1056                    self.get_fixed_length(right)
1057                }
1058            }
1059            Kind::Counted => Some(0),
1060            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1061        }
1062    }
1063
1064    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1065        match self.get_kind(node_id) {
1066            Kind::End | Kind::Begin => 0,
1067            Kind::Pred => 1,
1068            Kind::Concat => {
1069                self.get_min_length_only(node_id.left(self))
1070                    + self.get_min_length_only(node_id.right(self))
1071            }
1072            Kind::Union => self
1073                .get_min_length_only(node_id.left(self))
1074                .min(self.get_min_length_only(node_id.right(self))),
1075            Kind::Inter => self
1076                .get_min_length_only(node_id.left(self))
1077                .max(self.get_min_length_only(node_id.right(self))),
1078            Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1079            Kind::Compl => {
1080                if self.nullability(node_id.left(self)) == Nullability::NEVER {
1081                    0
1082                } else {
1083                    1
1084                }
1085            }
1086        }
1087    }
1088
1089    fn starts_with_ts(&self, node_id: NodeId) -> bool {
1090        if node_id == NodeId::TS {
1091            return true;
1092        }
1093        match self.get_kind(node_id) {
1094            Kind::Inter => {
1095                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1096            }
1097            Kind::Union => {
1098                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1099            }
1100            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1101            _ => false,
1102        }
1103    }
1104
1105    #[inline]
1106    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1107        if node_id.is_concat(self) {
1108            self.ends_with_ts(node_id.right(self))
1109        } else {
1110            node_id == NodeId::TS
1111        }
1112    }
1113
1114    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1115        debug_assert!(node_id != NodeId::MISSING);
1116        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1117    }
1118
1119    pub(crate) fn cache_der(
1120        &mut self,
1121        node_id: NodeId,
1122        result: TRegexId,
1123        mask: Nullability,
1124    ) -> TRegexId {
1125        if mask == Nullability::CENTER {
1126            self.tr_der_center[node_id.0 as usize] = result
1127        } else {
1128            self.tr_der_begin[node_id.0 as usize] = result
1129        };
1130        result
1131    }
1132
1133    pub(crate) fn try_cached_der(
1134        &mut self,
1135        node_id: NodeId,
1136        mask: Nullability,
1137    ) -> Option<TRegexId> {
1138        let cache = if mask == Nullability::CENTER {
1139            &mut self.tr_der_center
1140        } else {
1141            &mut self.tr_der_begin
1142        };
1143        match cache.get(node_id.0 as usize) {
1144            Some(&TRegexId::MISSING) => {}
1145            Some(&result) => {
1146                return Some(result);
1147            }
1148            None => {
1149                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1150            }
1151        }
1152        None
1153    }
1154
1155    pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1156        let mut term = self.get_tregex(der);
1157        loop {
1158            match *term {
1159                TRegex::Leaf(node_id) => return node_id,
1160                TRegex::ITE(cond, _then, _else) => {
1161                    if self.solver().is_sat_id(set, cond) {
1162                        term = self.get_tregex(_then);
1163                    } else {
1164                        term = self.get_tregex(_else);
1165                    }
1166                }
1167            }
1168        }
1169    }
1170
1171    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1172        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1173        debug_assert!(
1174            node_id != NodeId::MISSING,
1175            "attempting to derive missing node"
1176        );
1177        if let Some(result) = self.try_cached_der(node_id, mask) {
1178            return Ok(result);
1179        }
1180
1181        let result = match node_id.kind(self) {
1182            Kind::Compl => {
1183                let leftd = node_id.left(self).der(self, mask)?;
1184                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1185            }
1186            Kind::Inter => {
1187                let leftd = node_id.left(self).der(self, mask)?;
1188                let rightd = node_id.right(self).der(self, mask)?;
1189                {
1190                    let this = &mut *self;
1191                    this.mk_binary(
1192                        leftd,
1193                        rightd,
1194                        &mut (|b, left, right| b.mk_inter(left, right)),
1195                    )
1196                }
1197            }
1198            Kind::Union => {
1199                let leftd = self.der(node_id.left(self), mask)?;
1200                let rightd = self.der(node_id.right(self), mask)?;
1201                {
1202                    let this = &mut *self;
1203                    this.mk_binary(
1204                        leftd,
1205                        rightd,
1206                        &mut (|b, left, right| b.mk_union(left, right)),
1207                    )
1208                }
1209            }
1210            Kind::Concat => {
1211                let head = node_id.left(self);
1212                let tail = node_id.right(self);
1213                let tail_leaf = self.mk_leaf(tail);
1214                let head_der = self.der(head, mask)?;
1215
1216                if self.is_nullable(head, mask) {
1217                    let rightd = self.der(tail, mask)?;
1218                    let ldr = {
1219                        let this = &mut *self;
1220                        this.mk_binary(
1221                            head_der,
1222                            tail_leaf,
1223                            &mut (|b, left, right| b.mk_concat(left, right)),
1224                        )
1225                    };
1226                    {
1227                        let this = &mut *self;
1228                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1229                    }
1230                } else {
1231                    let this = &mut *self;
1232                    this.mk_binary(
1233                        head_der,
1234                        tail_leaf,
1235                        &mut (|b, left, right| b.mk_concat(left, right)),
1236                    )
1237                }
1238            }
1239            Kind::Star => {
1240                if node_id == NodeId::EPS {
1241                    TRegexId::BOT
1242                } else {
1243                    let left = node_id.left(self);
1244                    let r_decr_leaf = self.mk_leaf(node_id);
1245                    let r_der = self.der(left, mask)?;
1246                    let this = &mut *self;
1247                    this.mk_binary(
1248                        r_der,
1249                        r_decr_leaf,
1250                        &mut (|b, left, right| b.mk_concat(left, right)),
1251                    )
1252                }
1253            }
1254            Kind::Lookbehind => {
1255                let lb_prev_der = {
1256                    let lb_prev = self.get_lookbehind_prev(node_id);
1257                    if lb_prev == NodeId::MISSING {
1258                        TRegexId::MISSING
1259                    } else {
1260                        self.der(lb_prev, mask)?
1261                    }
1262                };
1263                let lb_inner = self.get_lookbehind_inner(node_id);
1264                let lb_inner_der = self.der(lb_inner, mask)?;
1265                {
1266                    let this = &mut *self;
1267                    this.mk_binary_result(
1268                        lb_inner_der,
1269                        lb_prev_der,
1270                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1271                    )?
1272                }
1273            }
1274            Kind::Lookahead => {
1275                let la_tail = self.get_lookahead_tail(node_id);
1276                let la_body = node_id.left(self);
1277                let rel = self.get_lookahead_rel(node_id);
1278
1279                if self.is_nullable(la_body, mask) {
1280                    // nullabilty is taken once, just keep the body
1281                    let right = node_id.right(self).missing_to_eps();
1282                    let rder = self.der(right, mask).clone();
1283                    return rder;
1284                }
1285
1286                if rel == u32::MAX {
1287                    let la_body_der = self.der(la_body, mask)?;
1288                    if la_tail.is_kind(self, Kind::Pred) {
1289                        let transitioned =
1290                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1291                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1292                        let concated = self.mk_concat(la_tail, new_la);
1293                        return self.der(concated, mask);
1294                    }
1295                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1296                        let left = la_tail.left(self);
1297                        let tset = left.pred_tset(self);
1298                        let transitioned = self.transition_term(la_body_der, tset);
1299                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1300                        let tail_right = la_tail.right(self);
1301                        let concated = self.mk_concat(new_la, tail_right);
1302                        let concated = self.mk_concat(left, concated);
1303                        return self.der(concated, mask);
1304                    }
1305                }
1306
1307                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1308                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1309                    let concated = self.mk_concat(la_body, nulls_mask);
1310                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1311                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1312                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1313                    let new_union = self.mk_union(concated_look, new_look);
1314                    return self.der(new_union, mask);
1315                }
1316
1317                let la_tail_der = if la_tail == NodeId::MISSING {
1318                    TRegexId::MISSING
1319                } else {
1320                    if self.is_nullable(la_tail, mask) {
1321                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1322                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1323                        let la_union = self.mk_union(la_tail, nulls_la);
1324                        self.der(la_union, mask)?
1325                    } else {
1326                        self.der(la_tail, mask)?
1327                    }
1328                };
1329
1330                let la_body_der = self.der(la_body, mask)?;
1331
1332                if rel != u32::MAX && rel > self.lookahead_context_max {
1333                    return Err(AlgebraError::AnchorLimit);
1334                }
1335
1336                let la = {
1337                    let this = &mut *self;
1338                    let rel = helpers::incr_rel(rel);
1339                    this.mk_binary(
1340                        la_body_der,
1341                        la_tail_der,
1342                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1343                    )
1344                };
1345
1346                if rel != u32::MAX
1347                    && la_tail_der != TRegexId::MISSING
1348                    && self.is_nullable(la_tail, mask)
1349                {
1350                    let look_only = {
1351                        let this = &mut *self;
1352                        let right = TRegexId::MISSING;
1353                        let rel = helpers::incr_rel(rel);
1354                        this.mk_binary(
1355                            la_body_der,
1356                            right,
1357                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1358                        )
1359                    };
1360                    {
1361                        let this = &mut *self;
1362                        this.mk_binary(
1363                            look_only,
1364                            la,
1365                            &mut (|b, left, right| b.mk_union(left, right)),
1366                        )
1367                    }
1368                } else {
1369                    la
1370                }
1371            }
1372            Kind::Counted => {
1373                let body = node_id.left(self);
1374                let chain = node_id.right(self);
1375                let packed = self.get_extra(node_id);
1376                let step = (packed & 0xFFFF) as u16;
1377                let best = (packed >> 16) as u16;
1378
1379                let mid_best = if self.is_nullable(body, mask) && step >= best {
1380                    step
1381                } else {
1382                    best
1383                };
1384
1385                let body_der = self.der(body, mask)?;
1386                let new_step = step.saturating_add(1);
1387                self.mk_unary(body_der, &mut |b, new_body| {
1388                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1389                        new_step
1390                    } else {
1391                        mid_best
1392                    };
1393                    let packed = (final_best as u32) << 16 | new_step as u32;
1394                    b.mk_counted(new_body, chain, packed)
1395                })
1396            }
1397            Kind::Begin | Kind::End => TRegexId::BOT,
1398            Kind::Pred => {
1399                let psi = node_id.pred_tset(self);
1400                if psi == TSetId::EMPTY {
1401                    TRegexId::BOT
1402                } else {
1403                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1404                }
1405            }
1406        };
1407
1408        // println!("{} {}", node_id.0, self.pp(node_id));
1409        // println!("node: {} (total: {})", node_id.0, self.num_created);
1410
1411        self.cache_der(node_id, result, mask);
1412        Ok(result)
1413    }
1414
1415    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1416        debug_assert!(meta_id != MetadataId::MISSING);
1417        match self.metadata.get_mut(node_id.0 as usize) {
1418            Some(v) => *v = meta_id,
1419            None => {
1420                self.metadata
1421                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1422                self.metadata[node_id.0 as usize] = meta_id;
1423            }
1424        }
1425    }
1426
1427    fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1428        debug_assert!(reversed_id != NodeId::MISSING);
1429        match self.reversed.get_mut(node_id.0 as usize) {
1430            Some(v) => *v = reversed_id,
1431            None => {
1432                self.reversed
1433                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1434                self.reversed[node_id.0 as usize] = reversed_id;
1435            }
1436        }
1437        return reversed_id;
1438    }
1439
1440    fn init(&mut self, inst: NodeKey) -> NodeId {
1441        self.num_created += 1;
1442        let node_id = NodeId(self.num_created);
1443        self.index.insert(inst.clone(), node_id);
1444        match inst.kind {
1445            Kind::Pred => {
1446                let meta_id = self.mb.get_meta_id(Metadata {
1447                    flags: MetaFlags::ZERO,
1448                    nulls: NullsId::EMPTY,
1449                });
1450                self.init_metadata(node_id, meta_id);
1451            }
1452            Kind::Begin => {
1453                let meta_id = self.mb.get_meta_id(Metadata {
1454                    flags: MetaFlags::with_nullability(
1455                        Nullability::BEGIN,
1456                        MetaFlags::CONTAINS_ANCHORS,
1457                    ),
1458                    nulls: NullsId::BEGIN0,
1459                });
1460                self.init_metadata(node_id, meta_id);
1461            }
1462            Kind::End => {
1463                let meta_id = self.mb.get_meta_id(Metadata {
1464                    flags: MetaFlags::with_nullability(
1465                        Nullability::END,
1466                        MetaFlags::CONTAINS_ANCHORS,
1467                    ),
1468                    nulls: NullsId::END0,
1469                });
1470                self.init_metadata(node_id, meta_id);
1471            }
1472            Kind::Inter => {
1473                let m1 = self.get_node_meta_id(inst.left);
1474                let m2 = self.get_node_meta_id(inst.right);
1475                let meta_id = {
1476                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1477                    let mask_l = inst.left.nullability(self);
1478                    let mask_r = inst.right.nullability(self);
1479                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1480                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1481                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1482                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1483                    let new_meta = Metadata {
1484                        flags: self.mb.flags_inter(m1, m2),
1485                        nulls,
1486                    };
1487                    self.mb.get_meta_id(new_meta)
1488                };
1489                self.init_metadata(node_id, meta_id);
1490            }
1491            Kind::Union => {
1492                let m1 = self.get_node_meta_id(inst.left);
1493                let m2 = self.get_node_meta_id(inst.right);
1494                let meta_id = {
1495                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1496                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1497                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1498                    let new_meta = Metadata {
1499                        flags: self.mb.flags_union(m1, m2),
1500                        nulls,
1501                    };
1502                    self.mb.get_meta_id(new_meta)
1503                };
1504                self.init_metadata(node_id, meta_id);
1505            }
1506            Kind::Concat => {
1507                let flags = self.mb.flags_concat(
1508                    self.get_node_meta_id(inst.left),
1509                    self.get_node_meta_id(inst.right),
1510                );
1511
1512                let right_nullability = inst.right.nullability(self);
1513                let left_nullability = inst.left.nullability(self);
1514                let nulls_left = self.get_nulls_id(inst.left);
1515                let nulls_right = self.get_nulls_id(inst.right);
1516                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1517                let mask = right_nullability.and(left_nullability);
1518                nulls = self.mb.nb.and_mask(nulls, mask);
1519
1520                let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1521                self.init_metadata(node_id, new_id);
1522            }
1523            Kind::Star => {
1524                let left_nulls = self.get_nulls_id(inst.left);
1525                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1526                let meta_id = self.mb.get_meta_id(Metadata {
1527                    flags: self
1528                        .mb
1529                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1530                    nulls,
1531                });
1532                self.init_metadata(node_id, meta_id);
1533            }
1534            Kind::Compl => {
1535                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1536                let meta_id = self.mb.get_meta_id(Metadata {
1537                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1538                    nulls,
1539                });
1540                self.init_metadata(node_id, meta_id);
1541            }
1542            Kind::Lookbehind => {
1543                let mut left_nullability = self.nullability(inst.left);
1544                let mut contains_flags = self.get_flags_contains(inst.left);
1545                let nulls = if inst.right.is_missing() {
1546                    self.get_nulls_id(inst.left)
1547                } else {
1548                    let right_nullability = self.nullability(inst.right);
1549                    let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1550                    let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1551                    left_nullability = left_nullability.and(right_nullability);
1552                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1553                    self.mb.nb.and_id(nulls_left, nulls_right)
1554                };
1555
1556                let meta_id = self.mb.get_meta_id(Metadata {
1557                    flags: MetaFlags::with_nullability(
1558                        left_nullability,
1559                        contains_flags
1560                            .or(MetaFlags::CONTAINS_LOOKAROUND)
1561                            .or(MetaFlags::CONTAINS_LOOKBEHIND),
1562                    ),
1563                    nulls,
1564                });
1565                self.init_metadata(node_id, meta_id);
1566            }
1567            Kind::Lookahead => {
1568                let mut nulls = self.get_nulls_id(inst.left);
1569                // lookahead nulls shifted by rel (distance to body match)
1570                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1571                let left_nullability = inst.left.nullability(self);
1572                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1573
1574                nulls = self.mb.nb.or_id(nulls, nulls_right);
1575
1576                let la_inner = inst.left;
1577                let la_tail = inst.right;
1578                let null = self
1579                    .get_meta_flags(la_inner)
1580                    .nullability()
1581                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1582                let contains_flags = self
1583                    .get_flags_contains(la_inner)
1584                    .or(self.get_flags_contains(la_tail));
1585
1586                let meta_id = self.mb.get_meta_id(Metadata {
1587                    flags: MetaFlags::with_nullability(
1588                        null,
1589                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1590                    ),
1591                    nulls,
1592                });
1593                self.init_metadata(node_id, meta_id);
1594            }
1595            Kind::Counted => {
1596                let best = inst.extra >> 16;
1597                let (null, nulls) = if best > 0 {
1598                    let mut ns = BTreeSet::new();
1599                    ns.insert(NullState::new(Nullability::CENTER, best));
1600                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1601                } else {
1602                    (Nullability::NEVER, NullsId::EMPTY)
1603                };
1604                let meta_id = self.mb.get_meta_id(Metadata {
1605                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1606                    nulls,
1607                });
1608                self.init_metadata(node_id, meta_id);
1609            }
1610        }
1611
1612        self.array.push(inst);
1613
1614        if let Some(rw) = self.post_init_simplify(node_id) {
1615            self.override_as(node_id, rw)
1616        } else {
1617            node_id
1618        }
1619    }
1620
1621    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1622        match self.get_kind(node_id) {
1623            Kind::Concat => {
1624                let lhs = node_id.left(self);
1625                let rhs = node_id.right(self);
1626                if lhs.is_pred_star(self).is_some() {
1627                    if let Some(opttail) = rhs.is_opt_v(self) {
1628                        if let Some(true) = self.subsumes(lhs, opttail) {
1629                            return Some(lhs);
1630                        }
1631                    }
1632                }
1633            }
1634            Kind::Union => {
1635                let lhs = node_id.left(self);
1636                let rhs = node_id.right(self);
1637                let mut subsumed = false;
1638                rhs.iter_union_while(self, &mut |b, branch| {
1639                    if b.nullable_subsumes(branch, lhs) {
1640                        subsumed = true;
1641                    }
1642                    !subsumed
1643                });
1644                if subsumed {
1645                    return Some(rhs);
1646                }
1647                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1648                    return Some(rhs);
1649                }
1650            }
1651            _ => {}
1652        }
1653
1654        None
1655    }
1656
1657    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1658    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1659        if self.get_kind(lhs) != Kind::Union {
1660            return false; // single branch already checked by nullable_subsumes
1661        }
1662        let mut rhs_branches = Vec::new();
1663        let mut curr = rhs;
1664        while self.get_kind(curr) == Kind::Union {
1665            rhs_branches.push(self.get_left(curr));
1666            curr = self.get_right(curr);
1667        }
1668        rhs_branches.push(curr);
1669
1670        curr = lhs;
1671        while self.get_kind(curr) == Kind::Union {
1672            if !rhs_branches.contains(&self.get_left(curr)) {
1673                return false;
1674            }
1675            curr = self.get_right(curr);
1676        }
1677        rhs_branches.contains(&curr)
1678    }
1679
1680    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1681    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1682        if node == target {
1683            return true;
1684        }
1685        match self.get_kind(node) {
1686            Kind::Union => {
1687                self.nullable_subsumes(self.get_left(node), target)
1688                    || self.nullable_subsumes(self.get_right(node), target)
1689            }
1690            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1691                self.nullable_subsumes(self.get_right(node), target)
1692            }
1693            _ => false,
1694        }
1695    }
1696
1697    pub fn num_nodes(&self) -> u32 {
1698        self.num_created
1699    }
1700
1701    pub fn tree_size(&self, root: NodeId, limit: usize) -> usize {
1702        let mut seen: FxHashMap<NodeId, ()> = FxHashMap::default();
1703        let mut stack: Vec<NodeId> = vec![root];
1704        while let Some(n) = stack.pop() {
1705            if n == NodeId::MISSING
1706                || n == NodeId::BOT
1707                || n == NodeId::EPS
1708                || n == NodeId::TS
1709                || n == NodeId::BEGIN
1710                || n == NodeId::END
1711            {
1712                continue;
1713            }
1714            if seen.insert(n, ()).is_some() {
1715                continue;
1716            }
1717            if seen.len() >= limit {
1718                return limit;
1719            }
1720            stack.push(self.get_left(n));
1721            stack.push(self.get_right(n));
1722        }
1723        seen.len()
1724    }
1725    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1726        match self.index.get(&inst) {
1727            Some(&id) => id,
1728            None => self.init(inst),
1729        }
1730    }
1731    #[inline]
1732    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1733        self.index.get(inst)
1734    }
1735
1736    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1737        self.index.insert(key, subsumed);
1738        subsumed
1739    }
1740
1741    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1742        let key = &self.array[key.0 as usize];
1743        let entry = self.index.get_mut(key).unwrap();
1744        *entry = subsumed;
1745        subsumed
1746    }
1747
1748    #[inline]
1749    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1750        self.array[node_id.0 as usize].left
1751    }
1752
1753    #[inline]
1754    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1755        self.array[node_id.0 as usize].right
1756    }
1757
1758    #[inline]
1759    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1760        self.array[node_id.0 as usize].extra
1761    }
1762
1763    #[inline]
1764    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1765        debug_assert!(node_id.is_concat(self));
1766        let mut curr = node_id;
1767        while curr.is_concat(self) {
1768            curr = curr.right(self);
1769        }
1770        curr
1771    }
1772
1773    fn has_trailing_la(&self, node: NodeId) -> bool {
1774        let end = match self.get_kind(node) {
1775            Kind::Concat => self.get_concat_end(node),
1776            Kind::Lookahead => node,
1777            _ => return false,
1778        };
1779        end.is_lookahead(self) && end.right(self).is_missing()
1780    }
1781
1782    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1783        if node.is_lookahead(self) {
1784            return (NodeId::EPS, node);
1785        }
1786        debug_assert!(node.is_concat(self));
1787        let right = node.right(self);
1788        if !right.is_concat(self) {
1789            return (node.left(self), right);
1790        }
1791        let (stripped, la) = self.strip_trailing_la(right);
1792        (self.mk_concat(node.left(self), stripped), la)
1793    }
1794    #[inline]
1795    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1796        debug_assert!(matches!(
1797            self.get_kind(lookahead_node_id),
1798            Kind::Lookahead | Kind::Counted
1799        ));
1800        lookahead_node_id.left(self)
1801    }
1802    #[inline]
1803    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1804        debug_assert!(lookahead_node_id.is_lookahead(self));
1805        lookahead_node_id.right(self)
1806    }
1807    #[inline]
1808    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1809        debug_assert!(
1810            matches!(
1811                self.get_kind(lookahead_node_id),
1812                Kind::Lookahead | Kind::Counted
1813            ),
1814            "not lookahead/counted: {:?}",
1815            self.pp(lookahead_node_id)
1816        );
1817        self.get_extra(lookahead_node_id)
1818    }
1819    #[inline]
1820    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1821        debug_assert!(lookbehind_node_id.is_lookbehind(self));
1822        lookbehind_node_id.left(self)
1823    }
1824    #[inline]
1825    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1826        debug_assert!(lookbehind_node_id.is_lookbehind(self));
1827        lookbehind_node_id.right(self)
1828    }
1829
1830    #[inline]
1831    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1832        debug_assert!(
1833            self.array.len() > node_id.0 as usize,
1834            "array len: {:?}",
1835            node_id
1836        );
1837        self.array[node_id.0 as usize].kind
1838    }
1839
1840    #[inline]
1841    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1842        &self.array[node_id.0 as usize]
1843    }
1844
1845    #[inline]
1846    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1847        self.metadata[node_id.0 as usize]
1848    }
1849
1850    #[inline]
1851    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1852        debug_assert!(node_id.0 != u32::MAX);
1853        let meta_id = self.get_node_meta_id(node_id);
1854        debug_assert!(meta_id != MetadataId::MISSING);
1855        &self.mb.array[meta_id.0 as usize]
1856    }
1857
1858    #[inline]
1859    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1860        if node_id == NodeId::MISSING {
1861            NullsId::EMPTY
1862        } else {
1863            self.get_meta(node_id).nulls
1864        }
1865    }
1866
1867    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1868        self.mb
1869            .nb
1870            .array
1871            .iter()
1872            .map(|set| set.iter().cloned().collect())
1873            .collect()
1874    }
1875
1876    pub fn nulls_count(&self) -> usize {
1877        self.mb.nb.array.len()
1878    }
1879
1880    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1881        self.mb.nb.array[id as usize].iter().cloned().collect()
1882    }
1883
1884    /// Return the interned NullsId whose entries are `nid` filtered to
1885    /// CENTER-nullability only (mask ANDed with CENTER, NEVER entries dropped).
1886    pub fn center_nulls_id(&mut self, nid: NullsId) -> NullsId {
1887        self.mb.nb.and_mask(nid, Nullability::CENTER)
1888    }
1889
1890    #[inline]
1891    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1892        if node_id == NodeId::MISSING {
1893            NullsId::EMPTY
1894        } else {
1895            let nulls = self.get_meta(node_id).nulls;
1896            self.mb.nb.and_mask(nulls, mask)
1897        }
1898    }
1899
1900    #[inline]
1901    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1902        let meta_id = self.get_node_meta_id(node_id);
1903        let meta = &self.mb.array[meta_id.0 as usize];
1904        meta.flags
1905    }
1906
1907    #[inline]
1908    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1909        self.get_meta(node_id).flags.nullability()
1910    }
1911
1912    #[inline]
1913    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1914        let meta_id = self.get_node_meta_id(node_id);
1915        let meta = &self.mb.array[meta_id.0 as usize];
1916        meta.flags.all_contains_flags()
1917    }
1918
1919    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1920        if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
1921            return self.strip_lb(node_id.right(self));
1922        }
1923        self.strip_lb_inner(node_id)
1924    }
1925
1926    fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1927        if !self.contains_look(node_id) {
1928            return Ok(node_id);
1929        }
1930        if node_id.is_concat(self) && node_id.left(self).is_lookbehind(self) {
1931            let lb = node_id.left(self);
1932            let prev = self.get_lookbehind_prev(lb);
1933            let tail = self.strip_lb_inner(node_id.right(self))?;
1934            if prev != NodeId::MISSING {
1935                let stripped_prev = self.strip_lb_inner(prev)?;
1936                return Ok(self.mk_concat(stripped_prev, tail));
1937            }
1938            return Ok(tail);
1939        }
1940        if node_id.is_inter(self) {
1941            let left = self.strip_lb_inner(node_id.left(self))?;
1942            let right = self.strip_lb_inner(node_id.right(self))?;
1943            return Ok(self.mk_inter(left, right));
1944        }
1945        if self.get_kind(node_id) == Kind::Union {
1946            let left = self.strip_lb_inner(node_id.left(self))?;
1947            let right = self.strip_lb_inner(node_id.right(self))?;
1948            return Ok(self.mk_union(left, right));
1949        }
1950        match self.get_kind(node_id) {
1951            Kind::Lookbehind => {
1952                let prev = self.get_lookbehind_prev(node_id);
1953                if prev != NodeId::MISSING {
1954                    self.strip_lb_inner(prev)
1955                } else {
1956                    Ok(NodeId::EPS)
1957                }
1958            }
1959            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1960                Err(AlgebraError::UnsupportedPattern)
1961            }
1962            _ => Ok(node_id),
1963        }
1964    }
1965
1966    // for prefix purposes we prune any \A leading paths
1967    pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1968        if !self.contains_anchors(node_id) {
1969            return node_id;
1970        }
1971        match self.get_kind(node_id) {
1972            Kind::Begin => NodeId::BOT,
1973            Kind::Concat => {
1974                let left = self.nonbegins(node_id.left(self));
1975                if left == NodeId::BOT {
1976                    return NodeId::BOT;
1977                }
1978                self.mk_concat(left, node_id.right(self))
1979            }
1980            Kind::Union => {
1981                let left = self.nonbegins(node_id.left(self));
1982                let right = self.nonbegins(node_id.right(self));
1983                self.mk_union(left, right)
1984            }
1985            _ => node_id,
1986        }
1987    }
1988
1989    pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1990        match self.get_kind(node_id) {
1991            Kind::Concat => {
1992                let head = node_id.left(self);
1993                match self.get_kind(head) {
1994                    _ if self.any_nonbegin_nullable(head) => {
1995                        self.strip_prefix_safe(node_id.right(self))
1996                    }
1997                    _ => node_id,
1998                }
1999            }
2000            _ => node_id,
2001        }
2002    }
2003    pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
2004        match self.get_kind(node_id) {
2005            Kind::Begin => NodeId::BOT,
2006            Kind::Concat => {
2007                let head = self.prune_begin(node_id.left(self));
2008                let tail = self.prune_begin(node_id.right(self));
2009                self.mk_concat(head, tail)
2010            }
2011            Kind::Lookbehind => {
2012                if !node_id.right(self).is_missing() {
2013                    return node_id;
2014                }
2015                let head = self.prune_begin(node_id.left(self));
2016                head
2017            }
2018            Kind::Union => {
2019                let left = self.prune_begin(node_id.left(self));
2020                let right = self.prune_begin(node_id.right(self));
2021                self.mk_union(left, right)
2022            }
2023            _ => node_id,
2024        }
2025    }
2026    pub fn prune_begin_eps(&mut self, node_id: NodeId) -> NodeId {
2027        match self.get_kind(node_id) {
2028            Kind::Begin => NodeId::BOT,
2029            Kind::Concat => {
2030                let l = node_id.left(self);
2031                let r = node_id.right(self);
2032                if r.is_concat(self) {
2033                    let r_left = r.left(self);
2034                    if l.is_ts() && r_left.is_begin() {
2035                        return self.prune_begin_eps(r.right(self));
2036                    }
2037                }
2038                let head = self.prune_begin_eps(l);
2039                let tail = self.prune_begin_eps(r);
2040                self.mk_concat(head, tail)
2041            }
2042            Kind::Lookbehind => {
2043                if !node_id.right(self).is_missing() {
2044                    return node_id;
2045                }
2046                let head = self.prune_begin_eps(node_id.left(self));
2047                head
2048            }
2049            Kind::Union => {
2050                let left = self.prune_begin_eps(node_id.left(self));
2051                let right = self.prune_begin_eps(node_id.right(self));
2052                self.mk_union(left, right)
2053            }
2054            _ => node_id,
2055        }
2056    }
2057
2058    pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
2059        if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2060            return Ok(node_id);
2061        }
2062        let result = match self.get_kind(node_id) {
2063            Kind::Concat => {
2064                let left = self.normalize_rev(node_id.left(self))?;
2065                let right = self.normalize_rev(node_id.right(self))?;
2066                self.mk_concat(left, right)
2067            }
2068            Kind::Inter => {
2069                let left = self.normalize_rev(node_id.left(self))?;
2070                let right = self.normalize_rev(node_id.right(self))?;
2071                self.mk_inter(left, right)
2072            }
2073            Kind::Union => {
2074                let left = self.normalize_rev(node_id.left(self))?;
2075                let right = self.normalize_rev(node_id.right(self))?;
2076                self.mk_union(left, right)
2077            }
2078            Kind::Lookbehind => {
2079                let left = self.normalize_rev(node_id.left(self))?;
2080                let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
2081                let lbody_ts = self.mk_concat(NodeId::TS, left);
2082                let ltail_ts = self.mk_concat(NodeId::TS, right);
2083                let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2084                as_inter
2085            }
2086            Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2087                return Err(AlgebraError::UnsupportedPattern)
2088            }
2089            _ => node_id,
2090        };
2091        Ok(result)
2092    }
2093
2094    pub fn normalize_rev_prune(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
2095        if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2096            return Ok(node_id);
2097        }
2098        let result = match self.get_kind(node_id) {
2099            Kind::Concat => {
2100                let left = self.normalize_rev(node_id.left(self))?;
2101                let right = self.normalize_rev(node_id.right(self))?;
2102                self.mk_concat(left, right)
2103            }
2104            Kind::Inter => {
2105                let left = self.normalize_rev(node_id.left(self))?;
2106                let right = self.normalize_rev(node_id.right(self))?;
2107                self.mk_inter(left, right)
2108            }
2109            Kind::Union => {
2110                let left = self.normalize_rev(node_id.left(self))?;
2111                let right = self.normalize_rev(node_id.right(self))?;
2112                self.mk_union(left, right)
2113            }
2114            Kind::Lookbehind => {
2115                let left = self.normalize_rev(node_id.left(self))?;
2116                let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
2117                let lbody_ts = self.mk_concat(NodeId::TS, left);
2118                let ltail_ts = self.mk_concat(NodeId::TS, right);
2119                let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2120                as_inter
2121            }
2122            Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2123                return Err(AlgebraError::UnsupportedPattern)
2124            }
2125            _ => node_id,
2126        };
2127        Ok(result)
2128    }
2129
2130    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
2131        debug_assert!(node_id != NodeId::MISSING);
2132        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
2133            if *rev != NodeId::MISSING {
2134                return Ok(*rev);
2135            }
2136        }
2137        let rw = match self.get_kind(node_id) {
2138            Kind::End => NodeId::BEGIN,
2139            Kind::Begin => NodeId::END,
2140            Kind::Pred => node_id,
2141            Kind::Concat => {
2142                let left = self.reverse(node_id.left(self))?;
2143                let right = self.reverse(node_id.right(self))?;
2144                self.mk_concat(right, left)
2145            }
2146            Kind::Union => {
2147                let left = self.reverse(node_id.left(self))?;
2148                let right = self.reverse(node_id.right(self))?;
2149                self.mk_union(left, right)
2150            }
2151            Kind::Inter => {
2152                let left = self.reverse(node_id.left(self))?;
2153                let right = self.reverse(node_id.right(self))?;
2154                self.mk_inter(left, right)
2155            }
2156            Kind::Star => {
2157                let body = self.reverse(node_id.left(self))?;
2158                self.mk_star(body)
2159            }
2160            Kind::Compl => {
2161                if self.contains_look(node_id.left(self)) {
2162                    return Err(AlgebraError::UnsupportedPattern);
2163                }
2164                let body = self.reverse(node_id.left(self))?;
2165                self.mk_compl(body)
2166            }
2167            Kind::Lookbehind => {
2168                let prev = self.get_lookbehind_prev(node_id);
2169                let inner_id = self.get_lookbehind_inner(node_id);
2170                let rev_inner = self.reverse(inner_id)?;
2171                let rev_prev = if prev != NodeId::MISSING {
2172                    self.reverse(prev)?
2173                } else {
2174                    NodeId::MISSING
2175                };
2176                self.mk_lookahead(rev_inner, rev_prev, 0)
2177            }
2178            Kind::Lookahead => {
2179                let rel = self.get_lookahead_rel(node_id);
2180                // no nullability, we can write as intersection
2181                if rel == u32::MAX {
2182                    let lbody = self.get_lookahead_inner(node_id);
2183                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2184                    let rbody = self.reverse(lbody);
2185                    let rtail = self.reverse(ltail);
2186                    let rev = self.mk_lookbehind(rbody?, rtail?);
2187                    return Ok(self.cache_reversed(node_id, rev));
2188                }
2189                if rel != 0 {
2190                    return Err(AlgebraError::UnsupportedPattern);
2191                }
2192                let tail_node = self.get_lookahead_tail(node_id);
2193                let rev_tail = if tail_node != NodeId::MISSING {
2194                    self.reverse(tail_node)?
2195                } else {
2196                    NodeId::MISSING
2197                };
2198                let inner_id = self.get_lookahead_inner(node_id);
2199                let rev_inner = self.reverse(inner_id)?;
2200                self.mk_lookbehind(rev_inner, rev_tail)
2201            }
2202            Kind::Counted => {
2203                return Err(AlgebraError::UnsupportedPattern);
2204            }
2205        };
2206        self.cache_reversed(node_id, rw);
2207        Ok(rw)
2208    }
2209
2210    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2211        let node = NodeKey {
2212            kind: Kind::Pred,
2213            left: NodeId::MISSING,
2214            right: NodeId::MISSING,
2215            extra: pred.0,
2216        };
2217        self.get_node_id(node)
2218    }
2219
2220    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2221        let key = NodeKey {
2222            kind: Kind::Compl,
2223            left: body,
2224            right: NodeId::MISSING,
2225            extra: u32::MAX,
2226        };
2227        if let Some(id) = self.key_is_created(&key) {
2228            return *id;
2229        }
2230        if body == NodeId::BOT {
2231            return NodeId::TS;
2232        }
2233        if body == NodeId::TS {
2234            return NodeId::BOT;
2235        }
2236
2237        if let Some(contains_body) = body.is_contains(self) {
2238            if contains_body.is_pred(self) {
2239                let pred = contains_body.pred_tset(self);
2240                let notpred = self.mk_pred_not(pred);
2241                let node = self.mk_star(notpred);
2242                return self.init_as(key, node);
2243            } else if contains_body == NodeId::END {
2244                return self.init_as(key, NodeId::BOT);
2245            }
2246        };
2247
2248        if self.get_kind(body) == Kind::Compl {
2249            return self.get_node(body).left;
2250        }
2251
2252        self.get_node_id(key)
2253    }
2254
2255    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2256        let nid = self.get_nulls_id(body);
2257        let nref = self.mb.nb.get_set_ref(nid).clone();
2258        let mut futures = NodeId::BOT;
2259        for n in nref.iter() {
2260            if !n.is_mask_nullable(mask) {
2261                continue;
2262            }
2263
2264            let eff = if n.rel == 0 {
2265                NodeId::EPS
2266            } else {
2267                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2268            };
2269            futures = self.mk_union(futures, eff)
2270        }
2271        futures
2272    }
2273
2274    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2275        if cfg!(feature = "norewrite") {
2276            return None;
2277        }
2278        #[cfg(debug_assertions)]
2279        if std::env::var("TRACE_CONCAT").is_ok() {
2280            eprintln!("rw2 head={} tail={}", self.pp(head), self.pp(tail));
2281        }
2282
2283        if tail.is_lookbehind(self) {
2284            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2285            return self
2286                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2287                .ok();
2288        }
2289        if head.is_lookahead(self) {
2290            let la_tail = self.get_lookahead_tail(head);
2291            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2292            if new_la_tail.is_center_nullable(self) {
2293                let la_new = self.mk_lookahead_internal(
2294                    self.get_lookahead_inner(head),
2295                    new_la_tail,
2296                    u32::MAX,
2297                );
2298                return Some(la_new);
2299            }
2300            let la_body = self.get_lookahead_inner(head);
2301            let la_rel = self.get_lookahead_rel(head);
2302            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2303                let tail_rel = self.get_lookahead_rel(new_la_tail);
2304                tail_rel + la_rel
2305            } else {
2306                u32::MAX
2307            };
2308
2309            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2310        }
2311
2312        if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2313            return Some(head);
2314        }
2315
2316        // breaks:rev
2317        // if head == NodeId::TS && tail == NodeId::END {
2318        //     return Some(head);
2319        // }
2320
2321        if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2322            return Some(NodeId::TS);
2323        }
2324
2325        if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2326            return Some(NodeId::TS);
2327        }
2328
2329        if head.is_inter(self) && tail == NodeId::TS {
2330            let mut alltopstar = true;
2331            head.iter_inter(
2332                self,
2333                &mut (|b, v| {
2334                    alltopstar = b.ends_with_ts(v);
2335                }),
2336            );
2337
2338            if alltopstar {
2339                return Some(head);
2340            }
2341        }
2342
2343        if head.is_star(self) && head == tail {
2344            return Some(head);
2345        }
2346
2347        None
2348    }
2349
2350    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2351        use Kind::*;
2352
2353        if cfg!(feature = "norewrite") {
2354            return None;
2355        }
2356        if left == right {
2357            return Some(left);
2358        }
2359
2360        if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2361            return Some(left);
2362        }
2363        if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2364            return Some(right);
2365        }
2366
2367        if left.is_inter(self) && right.is_inter(self) {
2368            let mut lconj: Vec<NodeId> = Vec::new();
2369            left.any_inter_component(self, |v| {
2370                lconj.push(v);
2371                false
2372            });
2373            let lconj_initial = lconj.len();
2374            let mut common = NodeId::TS;
2375            let mut r_rest = NodeId::TS;
2376            let mut cur = right;
2377            loop {
2378                let (v, next) = if cur.kind(self) == Inter {
2379                    (cur.left(self), Some(cur.right(self)))
2380                } else {
2381                    (cur, None)
2382                };
2383                if let Some(pos) = lconj.iter().position(|&x| x == v) {
2384                    lconj.swap_remove(pos);
2385                    common = self.mk_inter(v, common);
2386                } else {
2387                    r_rest = self.mk_inter(v, r_rest);
2388                }
2389                match next {
2390                    Some(n) => cur = n,
2391                    None => break,
2392                }
2393            }
2394            if lconj.len() < lconj_initial {
2395                let l_rest = lconj
2396                    .iter()
2397                    .fold(NodeId::TS, |acc, &v| self.mk_inter(v, acc));
2398                let inner_union = self.mk_union(l_rest, r_rest);
2399                return Some(self.mk_inter(common, inner_union));
2400            }
2401        }
2402
2403        if left.is_pred(self) && right.is_pred(self) {
2404            let l = left.pred_tset(self);
2405            let r = right.pred_tset(self);
2406            let solver = self.solver();
2407            let psi = solver.or_id(l, r);
2408            let rewrite = self.mk_pred(psi);
2409            return Some(rewrite);
2410        }
2411
2412        if left == NodeId::EPS
2413            && self.get_extra(left) == 0
2414            && self.nullability(right) == Nullability::ALWAYS
2415        {
2416            return Some(right);
2417        }
2418
2419        if left.is_lookahead(self) && right.is_lookahead(self) {
2420            let lb = left.left(self);
2421            let lt = left.right(self);
2422            let lrel = left.extra(self);
2423
2424            let rb = right.left(self);
2425            let rt = right.right(self);
2426            let rrel = right.extra(self);
2427
2428            if lrel == rrel && lt.is_missing() && rt.is_missing() {
2429                let unioned = self.mk_union(lb, rb);
2430                let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2431                return Some(node);
2432            }
2433        }
2434
2435        if right.is_kind(self, Concat) {
2436            if left == NodeId::END
2437                && right.left(self) == NodeId::END
2438                && self.nullability(right).has(Nullability::END)
2439            {
2440                return Some(right);
2441            }
2442            // .*|.*a.* => .*(a.*|)
2443            if left == right.left(self) {
2444                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2445                let rw = self.mk_concat(left, rhs);
2446                return Some(rw);
2447            }
2448            if left.is_kind(self, Concat) {
2449                let head1 = left.left(self);
2450                let head2 = right.left(self);
2451
2452                if head1 == head2 {
2453                    let t1 = left.right(self);
2454                    let t2 = right.right(self);
2455                    // opportunistic rewrites
2456                    if head1 == NodeId::TS {
2457                        if t2.has_concat_tail(self, t1) {
2458                            return Some(left);
2459                        }
2460                        if t1.has_concat_tail(self, t2) {
2461                            return Some(right);
2462                        }
2463                    }
2464                    let un = self.mk_union(t1, t2);
2465                    return Some(self.mk_concat(left.left(self), un));
2466                }
2467
2468                // xa|ya => (x|y)a - suffix factoring via reverse
2469                // TODO: valid and looks prettier but .reverse is not good for builder perf,
2470                // leaving out unless i find a case where it helps significantly
2471                if false {
2472                    let end1 = self.get_concat_end(left);
2473                    let end2 = self.get_concat_end(right);
2474                    if end1 == end2 {
2475                        let contains_lookarounds =
2476                            left.contains_lookaround(self) || right.contains_lookaround(self);
2477                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2478                        if !contains_lookarounds && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2479                            let rev1 = self.reverse(left).unwrap();
2480                            let rev2 = self.reverse(right).unwrap();
2481
2482                            let union_rev = self.mk_union(rev1, rev2);
2483                            return Some(self.reverse(union_rev).unwrap());
2484                        }
2485                    }
2486                }
2487            }
2488            if left.is_pred(self) && left == right.left(self) {
2489                let un = self.mk_opt(right.right(self));
2490                let conc = self.mk_concat(left, un);
2491                return Some(conc);
2492            }
2493        }
2494
2495        if left == NodeId::EPS && right.is_plus(self) {
2496            let result = self.mk_star(right.left(self));
2497            return Some(result);
2498        }
2499
2500        // (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2501        if left.is_inter(self) && right.is_inter(self) {
2502            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2503                return Some(rw);
2504            }
2505        }
2506
2507        None
2508    }
2509
2510    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2511        let mut curr = node;
2512        while curr.is_inter(self) {
2513            out.push(self.get_left(curr));
2514            curr = self.get_right(curr);
2515        }
2516        out.push(curr);
2517    }
2518
2519    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2520        let mut curr = node;
2521        let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2522        if has_prefix {
2523            curr = self.get_right(curr);
2524        }
2525        let mut count = 0u32;
2526        let mut pred_set = None;
2527        while curr.is_concat(self) {
2528            let head = self.get_left(curr);
2529            if !head.is_pred(self) {
2530                return None;
2531            }
2532            let ps = head.pred_tset(self);
2533            match pred_set {
2534                None => pred_set = Some(ps),
2535                Some(existing) => {
2536                    if existing != ps {
2537                        return None;
2538                    }
2539                }
2540            }
2541            curr = self.get_right(curr);
2542            count += 1;
2543        }
2544        if count == 0 || !curr.is_star(self) {
2545            return None;
2546        }
2547        Some((has_prefix, pred_set.unwrap(), curr, count))
2548    }
2549
2550    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2551        let mut j = 0;
2552        for &s in sub {
2553            while j < sup.len() && sup[j] < s {
2554                j += 1;
2555            }
2556            if j >= sup.len() || sup[j] != s {
2557                return false;
2558            }
2559            j += 1;
2560        }
2561        true
2562    }
2563
2564    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2565        if !left.is_inter(self) || !right.is_inter(self) {
2566            return None;
2567        }
2568
2569        let mut lc: Vec<NodeId> = Vec::new();
2570        let mut rc: Vec<NodeId> = Vec::new();
2571        self.collect_inter_components(left, &mut lc);
2572        self.collect_inter_components(right, &mut rc);
2573
2574        // component subset check: fewer constraints = larger language
2575        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2576            return Some(left);
2577        }
2578        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2579        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2580            return Some(right);
2581        }
2582
2583        if lc.len() == rc.len() {
2584            let mut diff_idx = None;
2585            for i in 0..lc.len() {
2586                if lc[i] != rc[i] {
2587                    if diff_idx.is_some() {
2588                        return None;
2589                    }
2590                    diff_idx = Some(i);
2591                }
2592            }
2593            if let Some(idx) = diff_idx {
2594                let a = lc[idx];
2595                let b = rc[idx];
2596                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2597                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2598                {
2599                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2600                        return if ca < cb { Some(left) } else { Some(right) };
2601                    }
2602                }
2603            }
2604        }
2605
2606        None
2607    }
2608
2609    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2610        if cfg!(feature = "norewrite") {
2611            return None;
2612        }
2613        if left == right {
2614            return Some(left);
2615        }
2616        if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2617            return Some(right);
2618        }
2619        if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2620            return Some(left);
2621        }
2622
2623        if left.is_pred(self) && right.is_pred(self) {
2624            let l = left.pred_tset(self);
2625            let r = right.pred_tset(self);
2626            let solver = self.solver();
2627            let psi = solver.and_id(l, r);
2628            let rewrite = self.mk_pred(psi);
2629            return Some(rewrite);
2630        }
2631
2632        if left.is_concat(self) && right.is_concat(self) {
2633            if left.left(self) == right.left(self) {
2634                if left.left(self).is_pred(self) {
2635                    let new_right = self.mk_inter(left.right(self), right.right(self));
2636                    return Some(self.mk_concat(left.left(self), new_right));
2637                }
2638            }
2639        }
2640
2641        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2642            return Some(NodeId::BOT);
2643        }
2644
2645        if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2646            let bodies = self.mk_union(left.left(self), right.left(self));
2647            return Some(self.mk_compl(bodies));
2648        }
2649
2650        if left == NodeId::TOPPLUS {
2651            if right.is_pred_star(self).is_some() {
2652                let newloop = self.mk_plus(right.left(self));
2653                return Some(newloop);
2654            }
2655            if right.is_never_nullable(self) {
2656                return Some(right);
2657            }
2658            if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2659                return Some(NodeId::BOT);
2660            }
2661            if right.is_kind(self, Kind::Concat) {}
2662        }
2663
2664        {
2665            let l_is_la = left.is_lookahead(self);
2666            let r_is_la = right.is_lookahead(self);
2667            let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
2668            let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
2669            if l_is_la || r_is_la || l_is_cla || r_is_cla {
2670                let (la_node, other, concat_body) = if r_is_la {
2671                    (right, left, NodeId::MISSING)
2672                } else if l_is_la {
2673                    (left, right, NodeId::MISSING)
2674                } else if r_is_cla {
2675                    (right.left(self), left, right.right(self))
2676                } else {
2677                    (left.left(self), right, left.right(self))
2678                };
2679                let la_body = la_node.left(self);
2680                let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2681                let inter_right = if concat_body.is_missing() {
2682                    la_tail
2683                } else {
2684                    self.mk_concat(la_tail, concat_body)
2685                };
2686                let new_body = self.mk_inter(other, inter_right);
2687                let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2688                return Some(self.mk_concat(la, new_body));
2689            }
2690        }
2691
2692        if self.get_kind(right) == Kind::Compl {
2693            let compl_body = right.left(self);
2694            if left == compl_body {
2695                return Some(NodeId::BOT);
2696            }
2697            if compl_body.is_concat(self) {
2698                let compl_head = compl_body.left(self);
2699                if compl_body.right(self) == NodeId::TS && compl_head == left {
2700                    return Some(NodeId::BOT);
2701                }
2702            }
2703        }
2704
2705        if let Some(pleft) = left.is_pred_star(self) {
2706            if let Some(pright) = right.is_pred_star(self) {
2707                let merged = self.mk_inter(pleft, pright);
2708                return Some(self.mk_star(merged));
2709            }
2710        }
2711
2712        {
2713            let l_is_clb = left.is_concat(self) && left.left(self).is_lookbehind(self);
2714            let r_is_clb = right.is_concat(self) && right.left(self).is_lookbehind(self);
2715            if l_is_clb || r_is_clb {
2716                let (lb, body) = if l_is_clb && r_is_clb {
2717                    let lb1 = left.left(self);
2718                    let lb2 = right.left(self);
2719                    let inner = self.mk_inter(
2720                        self.get_lookbehind_inner(lb1),
2721                        self.get_lookbehind_inner(lb2),
2722                    );
2723                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2724                    let body = self.mk_inter(left.right(self), right.right(self));
2725                    (lb, body)
2726                } else if l_is_clb {
2727                    let lb = left.left(self);
2728                    let body = self.mk_inter(left.right(self), right);
2729                    (lb, body)
2730                } else {
2731                    let lb = right.left(self);
2732                    let body = self.mk_inter(left, right.right(self));
2733                    (lb, body)
2734                };
2735                return Some(self.mk_concat(lb, body));
2736            }
2737        }
2738
2739        {
2740            let l_has_la = self.has_trailing_la(left);
2741            let r_has_la = self.has_trailing_la(right);
2742            if l_has_la || r_has_la {
2743                let (body, la) = if l_has_la && r_has_la {
2744                    let (lbody, l_la) = self.strip_trailing_la(left);
2745                    let (rbody, r_la) = self.strip_trailing_la(right);
2746                    let inner = self.mk_inter(
2747                        self.get_lookahead_inner(l_la),
2748                        self.get_lookahead_inner(r_la),
2749                    );
2750                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2751                    let body = self.mk_inter(lbody, rbody);
2752                    (body, la)
2753                } else if l_has_la {
2754                    let (lbody, la) = self.strip_trailing_la(left);
2755                    let body = self.mk_inter(lbody, right);
2756                    (body, la)
2757                } else {
2758                    let (rbody, la) = self.strip_trailing_la(right);
2759                    let body = self.mk_inter(left, rbody);
2760                    (body, la)
2761                };
2762                return Some(self.mk_concat(body, la));
2763            }
2764        }
2765
2766        None
2767    }
2768
2769    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2770        if cfg!(feature = "norewrite") {
2771            return None;
2772        }
2773        debug_assert!(self.get_kind(right_union) == Kind::Union);
2774
2775        let mut rewritten = None;
2776        right_union.iter_union_while(
2777            self,
2778            &mut (|b, v| {
2779                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2780                    rewritten = Some((v, rw));
2781                    false
2782                } else {
2783                    true
2784                }
2785            }),
2786        );
2787
2788        if let Some(rw) = rewritten {
2789            let mut new_union = NodeId::BOT;
2790            right_union.iter_union(
2791                self,
2792                &mut (|b, v| {
2793                    if v == rw.0 {
2794                        new_union = b.mk_union(rw.1, new_union)
2795                    } else {
2796                        new_union = b.mk_union(v, new_union)
2797                    }
2798                }),
2799            );
2800            return Some(new_union);
2801        };
2802
2803        None
2804    }
2805
2806    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2807        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2808        debug_assert!(tail != NodeId::MISSING);
2809        let key = NodeKey {
2810            kind: Kind::Concat,
2811            left: head,
2812            right: tail,
2813            extra: u32::MAX,
2814        };
2815        if let Some(id) = self.key_is_created(&key) {
2816            return *id;
2817        }
2818
2819        if head == NodeId::BOT || tail == NodeId::BOT {
2820            return NodeId::BOT;
2821        }
2822        if head == NodeId::EPS {
2823            return tail;
2824        }
2825        if tail == NodeId::EPS {
2826            return head;
2827        }
2828
2829        match tail {
2830            // breaks:rev
2831            NodeId::BEGIN => {
2832                if !self.is_nullable(head, Nullability::BEGIN) {
2833                    return NodeId::BOT;
2834                } else {
2835                    return NodeId::BEGIN;
2836                }
2837            }
2838            _ => {}
2839        }
2840
2841        // normalize concats to right
2842        if head.is_kind(self, Kind::Concat) {
2843            let left = head.left(self);
2844            let newright = self.mk_concat(head.right(self), tail);
2845            let updated = self.mk_concat(left, newright);
2846            return self.init_as(key, updated);
2847        }
2848
2849        if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2850            return NodeId::BOT;
2851        }
2852
2853        if tail.is_concat(self) {
2854            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2855                let upd = self.mk_concat(rw, tail.right(self));
2856                return self.init_as(key, upd);
2857            }
2858        }
2859
2860        if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2861            return self.init_as(key, new);
2862        }
2863
2864        match (self.get_kind(head), self.get_kind(tail)) {
2865            // merge stars
2866            (Kind::Star, Kind::Concat) if head.is_star(self) => {
2867                let rl = tail.left(self);
2868                if head == rl {
2869                    return self.init_as(key, tail);
2870                }
2871            }
2872            // attempt longer concat rw
2873            (_, Kind::Concat) => {
2874                let curr = head;
2875                let h2 = self.mk_concat(curr, tail.left(self));
2876                let tr = tail.right(self);
2877                if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2878                    return self.init_as(key, new);
2879                }
2880            }
2881            _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2882                return self.init_as(key, tail);
2883            }
2884            _ => {}
2885        }
2886
2887        self.init(key)
2888    }
2889
2890    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2891        // LNF: lookbehind must start with ts
2892        let lb_body = {
2893            match self.starts_with_ts(lb_body) {
2894                true => lb_body,
2895                false => self.mk_concat(NodeId::TS, lb_body),
2896            }
2897        };
2898        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2899        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2900    }
2901
2902    fn mk_lookbehind_internal(
2903        &mut self,
2904        lb_body: NodeId,
2905        lb_prev: NodeId,
2906    ) -> Result<NodeId, AlgebraError> {
2907        debug_assert!(lb_body != NodeId::MISSING);
2908        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2909        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2910            return Ok(NodeId::BOT);
2911        }
2912        if lb_body == NodeId::TS {
2913            return Ok(lb_prev);
2914        }
2915        if lb_body == NodeId::EPS {
2916            match lb_prev {
2917                NodeId::MISSING => return Ok(NodeId::EPS),
2918                NodeId::EPS => return Ok(NodeId::EPS),
2919                _ => return Ok(lb_prev),
2920            }
2921        }
2922
2923        let key = NodeKey {
2924            kind: Kind::Lookbehind,
2925            left: lb_body,
2926            right: lb_prev,
2927            extra: u32::MAX,
2928        };
2929        match self.key_is_created(&key) {
2930            Some(id) => Ok(*id),
2931            None => {
2932                if lb_prev == NodeId::TS {
2933                    return Ok(self.mk_concat(lb_prev, lb_body));
2934                }
2935
2936                Ok(self.init(key))
2937            }
2938        }
2939    }
2940
2941    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2942        // LNF: lookahead must end with ts
2943        let la_body = {
2944            match self.ends_with_ts(la_body) {
2945                true => la_body,
2946                false => self.mk_concat(la_body, NodeId::TS),
2947            }
2948        };
2949        let rel = if NodeId::MISSING == la_tail {
2950            rel
2951        } else {
2952            match la_tail.is_center_nullable(self) {
2953                false => u32::MAX,
2954                true => rel,
2955            }
2956        };
2957
2958        self.mk_lookahead_internal(la_body, la_tail, rel)
2959    }
2960
2961    // rel max = carries no nullability, can potentially rw to intersection
2962    pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2963        let key = NodeKey {
2964            kind: Kind::Lookahead,
2965            left: la_body,
2966            right: la_tail,
2967            extra: rel,
2968        };
2969        if let Some(id) = self.key_is_created(&key) {
2970            return *id;
2971        }
2972        if la_body == NodeId::TS {
2973            if rel == 0 {
2974                return la_tail.missing_to_eps();
2975            } else {
2976                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2977            }
2978        }
2979        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2980            return NodeId::BOT;
2981        }
2982        if la_tail.is_missing() && rel == u32::MAX {
2983            return NodeId::BOT;
2984        }
2985
2986        if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2987            return la_body;
2988        }
2989
2990        if la_tail == NodeId::TS {
2991            if rel == 0 || rel == u32::MAX {
2992                return self.mk_concat(la_body, NodeId::TS);
2993            } else if rel == u32::MAX {
2994                return self.mk_begins_with(la_body);
2995            }
2996        }
2997
2998        if rel == u32::MAX {
2999            if la_tail.is_missing() {
3000                return NodeId::BOT;
3001            }
3002
3003            if self.is_always_nullable(la_body) {
3004                return la_tail.missing_to_eps();
3005            }
3006
3007            if la_tail != NodeId::MISSING {
3008                match self.get_kind(la_tail) {
3009                    _ => {
3010                        if la_body.is_compl_plus_end(self) {
3011                            let minlen = self.get_min_length_only(la_tail);
3012                            if minlen >= 1 {
3013                                return NodeId::BOT;
3014                            }
3015                        }
3016                    }
3017                }
3018            }
3019        }
3020
3021        if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
3022            let la_body2 = self.get_lookahead_inner(la_tail);
3023            let body1_ts = self.mk_concat(la_body, NodeId::TS);
3024            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
3025            let new_la_body = self.mk_inter(body1_ts, body2_ts);
3026            let new_la_rel = self.get_lookahead_rel(la_tail);
3027            let new_la_tail = self.get_lookahead_tail(la_tail);
3028            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
3029        }
3030
3031        if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
3032            let la_body_right = la_body.right(self);
3033            if self.is_always_nullable(la_body_right) {
3034                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
3035            }
3036            if la_body.right(self) == NodeId::END {
3037                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
3038            }
3039            let bodyright = la_body.right(self);
3040            if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
3041                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
3042                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
3043            }
3044        }
3045
3046        if la_tail != NodeId::MISSING {
3047            if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
3048                let lpred = la_body.left(self);
3049                if lpred.is_pred(self) {
3050                    let l = lpred.pred_tset(self);
3051                    let r = la_tail.pred_tset(self);
3052                    let psi_and = self.solver().and_id(l, r);
3053                    let rewrite = self.mk_pred(psi_and);
3054                    let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
3055                    let new_right =
3056                        self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
3057                    return self.mk_concat(rewrite, new_right);
3058                }
3059            }
3060        }
3061
3062        self.get_node_id(key)
3063    }
3064
3065    pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
3066        let has_match = (packed >> 16) > 0;
3067        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
3068            return NodeId::BOT;
3069        }
3070        debug_assert!(
3071            body == NodeId::BOT || !self.is_infinite(body),
3072            "Counted body must have finite max length"
3073        );
3074        let chain = self.prune_counted_chain(body, chain);
3075        let key = NodeKey {
3076            kind: Kind::Counted,
3077            left: body,
3078            right: chain,
3079            extra: packed,
3080        };
3081        if let Some(id) = self.key_is_created(&key) {
3082            return *id;
3083        }
3084        self.get_node_id(key)
3085    }
3086
3087    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
3088        if chain == NodeId::MISSING || body == NodeId::BOT {
3089            return chain;
3090        }
3091        if self.nullability(body) != Nullability::NEVER {
3092            return NodeId::MISSING;
3093        }
3094        let chain_body = chain.left(self);
3095        if chain_body == NodeId::BOT {
3096            return chain;
3097        }
3098        let not_begins = self.mk_not_begins_with(body);
3099        let inter = self.mk_inter(chain_body, not_begins);
3100        let is_empty = inter == NodeId::BOT;
3101        if is_empty {
3102            self.prune_counted_chain(body, chain.right(self))
3103        } else {
3104            chain
3105        }
3106    }
3107
3108    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
3109        let neg_inner = self.mk_concat(body, NodeId::TS);
3110        let neg_part = self.mk_compl(neg_inner);
3111        let conc = self.mk_concat(neg_part, NodeId::END);
3112        self.mk_lookahead(conc, NodeId::MISSING, rel)
3113    }
3114
3115    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
3116        match self.get_node(body).kind {
3117            Kind::Pred => {
3118                let psi = body.pred_tset(self);
3119                let negated = self.mk_pred_not(psi);
3120                let union = self.mk_union(NodeId::BEGIN, negated);
3121                // lb_prev is MISSING - cannot trigger UnsupportedPattern
3122                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3123            }
3124            _ => {
3125                // ~body ∩ utf8_char: non-nullable (utf8_char requires ≥1 byte),
3126                // so \A | negated won't be stripped by strip_prefix_safe
3127                let uc = crate::unicode_classes::utf8_char(self);
3128                let neg = self.mk_compl(body);
3129                let negated = self.mk_inter(neg, uc);
3130                let union = self.mk_union(NodeId::BEGIN, negated);
3131                // lb_prev is MISSING - cannot trigger UnsupportedPattern
3132                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3133            }
3134        }
3135    }
3136
3137    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
3138        debug_assert!(left != NodeId::MISSING);
3139        debug_assert!(right != NodeId::MISSING);
3140        if left > right {
3141            return self.mk_union(right, left);
3142        }
3143        let key = NodeKey {
3144            kind: Kind::Union,
3145            left,
3146            right,
3147            extra: u32::MAX,
3148        };
3149        if let Some(id) = self.key_is_created(&key) {
3150            return *id;
3151        }
3152        if left == right {
3153            return self.init_as(key, left);
3154        }
3155        if left == NodeId::BOT {
3156            return self.init_as(key, right);
3157        }
3158        if right == NodeId::BOT {
3159            return self.init_as(key, left);
3160        }
3161        if right == NodeId::TS {
3162            return self.init_as(key, right);
3163        }
3164        if left == NodeId::TS {
3165            return self.init_as(key, left);
3166        }
3167        match (self.get_kind(left), self.get_kind(right)) {
3168            (Kind::Union, _) => {
3169                self.iter_unions_b(left, &mut |b, v| {
3170                    b.temp_vec.push(v);
3171                });
3172                self.iter_unions_b(right, &mut |b, v| {
3173                    b.temp_vec.push(v);
3174                });
3175                self.temp_vec.sort();
3176                let tree = self.temp_vec.clone();
3177                self.temp_vec.clear();
3178                let newnode = tree
3179                    .iter()
3180                    .rev()
3181                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3182                return self.init_as(key, newnode);
3183            }
3184            (_, Kind::Union) => {
3185                let rleft = right.left(self);
3186                // if left_node id is smaller than rleft, just create a new union
3187                if left > rleft {
3188                    self.iter_unions_b(left, &mut |b, v| {
3189                        b.temp_vec.push(v);
3190                    });
3191                    self.iter_unions_b(right, &mut |b, v| {
3192                        b.temp_vec.push(v);
3193                    });
3194                    self.temp_vec.sort();
3195                    let tree = self.temp_vec.clone();
3196                    self.temp_vec.clear();
3197                    let newnode = tree
3198                        .iter()
3199                        .rev()
3200                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3201                    return self.init_as(key, newnode);
3202                } else {
3203                    if let Some(rw) = self.attempt_rw_unions(left, right) {
3204                        return self.init_as(key, rw);
3205                    }
3206                }
3207            }
3208            _ => {}
3209        }
3210
3211        if let Some(rw) = self.attempt_rw_union_2(left, right) {
3212            return self.init_as(key, rw);
3213        }
3214        self.init(key)
3215    }
3216
3217    pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3218        debug_assert!(left_id != NodeId::MISSING);
3219        debug_assert!(right_id != NodeId::MISSING);
3220        if left_id == right_id {
3221            return left_id;
3222        }
3223        if left_id == NodeId::BOT || right_id == NodeId::BOT {
3224            return NodeId::BOT;
3225        }
3226        if left_id == NodeId::TS {
3227            return right_id;
3228        }
3229        if right_id == NodeId::TS {
3230            return left_id;
3231        }
3232        if left_id > right_id {
3233            return self.mk_inter(right_id, left_id);
3234        }
3235        let key = NodeKey {
3236            kind: Kind::Inter,
3237            left: left_id,
3238            right: right_id,
3239            extra: u32::MAX,
3240        };
3241        if let Some(id) = self.key_is_created(&key) {
3242            return *id;
3243        }
3244
3245        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3246            return self.init_as(key, rw);
3247        }
3248
3249        self.init(key)
3250    }
3251
3252    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3253        let node = NodeKey {
3254            kind,
3255            left: NodeId::MISSING,
3256            right: NodeId::MISSING,
3257            extra: u32::MAX,
3258        };
3259        self.init(node)
3260    }
3261
3262    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3263        let star = self.mk_star(body_id);
3264        self.mk_concat(body_id, star)
3265    }
3266    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3267        let opt = self.mk_opt(body_id);
3268        let mut nodes1 = vec![];
3269        for _ in lower..upper {
3270            nodes1.push(opt);
3271        }
3272        for _ in 0..lower {
3273            nodes1.push(body_id);
3274        }
3275        self.mk_concats(nodes1.into_iter())
3276    }
3277    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3278        self.mk_union(NodeId::EPS, body_id)
3279    }
3280
3281    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3282        let key = NodeKey {
3283            kind: Kind::Star,
3284            left: body_id,
3285            right: NodeId::MISSING,
3286            extra: 0,
3287        };
3288        if let Some(id) = self.key_is_created(&key) {
3289            return *id;
3290        }
3291        // _*{500} is still _*
3292        if body_id.is_kind(self, Kind::Star) {
3293            return body_id;
3294        }
3295        self.get_node_id(key)
3296    }
3297
3298    /// it's cheaper to check this once as an edge-case
3299    /// than to compute a 4th nullability bit for every node
3300    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3301        match self.get_kind(node_id) {
3302            Kind::End => Nullability::EMPTYSTRING,
3303            Kind::Begin => Nullability::EMPTYSTRING,
3304            Kind::Pred => Nullability::NEVER,
3305            Kind::Star => Nullability::ALWAYS,
3306            Kind::Inter | Kind::Concat => {
3307                let lnull = self.nullability_emptystring(node_id.left(self));
3308                let rnull = self.nullability_emptystring(node_id.right(self));
3309                lnull.and(rnull) // left = 010, right = 001, left & right = 000
3310            }
3311            Kind::Union => {
3312                let lnull = self.nullability_emptystring(node_id.left(self));
3313                let rnull = self.nullability_emptystring(node_id.right(self));
3314                lnull.or(rnull)
3315            }
3316            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3317            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3318            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3319            Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3320        }
3321    }
3322
3323    #[inline(always)]
3324    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3325        self.get_meta(node_id)
3326            .flags
3327            .nullability()
3328            .has(Nullability::CENTER.or(Nullability::END))
3329    }
3330
3331    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3332        self.get_only_nullability(node_id)
3333    }
3334
3335    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3336        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3337    }
3338
3339    pub fn pp(&self, node_id: NodeId) -> String {
3340        let mut s = String::new();
3341        self.ppw(&mut s, node_id).unwrap();
3342        s
3343    }
3344
3345    #[allow(dead_code)]
3346    pub fn pp_nulls(&self, node_id: NodeId) -> String {
3347        let nu = self.get_nulls_id(node_id);
3348        let nr = self.mb.nb.get_set_ref(nu);
3349        let s1 = format!("{:?}", nr);
3350        s1
3351    }
3352
3353    #[allow(dead_code)]
3354    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3355        match self.get_tregex(term_id) {
3356            TRegex::Leaf(node_id) => {
3357                let mut s = String::new();
3358                self.ppw(&mut s, *node_id).unwrap();
3359                s
3360            }
3361            TRegex::ITE(cond, then_id, else_id) => {
3362                format!(
3363                    "ITE({},{},{})",
3364                    self.solver_ref().pp(*cond),
3365                    self.ppt(*then_id),
3366                    self.ppt(*else_id)
3367                )
3368            }
3369        }
3370    }
3371
3372    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3373        if cfg!(feature = "graphviz") {
3374            match node_id {
3375                NodeId::MISSING => return write!(s, "MISSING"),
3376                NodeId::BOT => return write!(s, "⊥"),
3377                NodeId::TS => return write!(s, "_*"),
3378                NodeId::TOP => return write!(s, "_"),
3379                NodeId::EPS => return write!(s, "ε"),
3380                _ => {}
3381            }
3382        }
3383
3384        match node_id {
3385            NodeId::MISSING => return write!(s, "MISSING"),
3386            NodeId::BOT => return write!(s, "⊥"),
3387            NodeId::TS => return write!(s, "_*"),
3388            NodeId::TOP => return write!(s, "_"),
3389            NodeId::EPS => return write!(s, ""),
3390            _ => {}
3391        }
3392
3393        match self.get_kind(node_id) {
3394            Kind::End => write!(s, r"\z"),
3395            Kind::Begin => write!(s, r"\A"),
3396            Kind::Pred => {
3397                let psi = node_id.pred_tset(self);
3398                if psi == TSetId::EMPTY {
3399                    write!(s, r"⊥")
3400                } else if psi == TSetId::FULL {
3401                    write!(s, r"_")
3402                } else {
3403                    write!(s, "{}", self.solver_ref().pp(psi))
3404                }
3405            }
3406            Kind::Inter => {
3407                write!(s, "(")?;
3408                self.ppw(s, node_id.left(self))?;
3409                write!(s, "&")?;
3410                let mut curr = node_id.right(self);
3411                while curr.is_inter(self) {
3412                    let n = curr.left(self);
3413                    self.ppw(s, n)?;
3414                    write!(s, "&")?;
3415                    curr = curr.right(self);
3416                }
3417                self.ppw(s, curr)?;
3418                write!(s, ")")
3419            }
3420            Kind::Union => {
3421                let left = node_id.left(self);
3422                let right = node_id.right(self);
3423                write!(s, "(")?;
3424                self.ppw(s, left)?;
3425                write!(s, "|")?;
3426                let mut curr = right;
3427                while self.get_kind(curr) == Kind::Union {
3428                    let n = curr.left(self);
3429                    self.ppw(s, n)?;
3430                    write!(s, "|")?;
3431                    curr = curr.right(self);
3432                }
3433                self.ppw(s, curr)?;
3434                write!(s, ")")
3435            }
3436            Kind::Concat => {
3437                let left = node_id.left(self);
3438                let right = node_id.right(self);
3439                if right.is_star(self) && right.left(self) == left {
3440                    self.ppw(s, left)?;
3441                    write!(s, "+")?;
3442                    return Ok(());
3443                }
3444                if right.is_concat(self) {
3445                    let rl = right.left(self);
3446                    if rl.is_star(self) && rl.left(self) == left {
3447                        self.ppw(s, left)?;
3448                        write!(s, "+")?;
3449                        return self.ppw(s, right.right(self));
3450                    }
3451                }
3452                if right.is_concat(self) && right.left(self) == left {
3453                    let mut num = 1;
3454                    let mut right = right;
3455                    while right.is_concat(self) && right.left(self) == left {
3456                        num += 1;
3457                        right = right.right(self);
3458                    }
3459                    // (|X){n} followed by X{m} -> X{m,m+n}
3460                    if let Some(inner) = left.is_opt_v(self) {
3461                        let mut inner_count = 0;
3462                        let mut right2 = right;
3463                        while right2.is_concat(self) && right2.left(self) == inner {
3464                            inner_count += 1;
3465                            right2 = right2.right(self);
3466                        }
3467                        if right2 == inner {
3468                            inner_count += 1;
3469                            self.ppw(s, inner)?;
3470                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3471                        }
3472                        if inner_count > 0 {
3473                            self.ppw(s, inner)?;
3474                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3475                            return self.ppw(s, right2);
3476                        }
3477                    }
3478                    self.ppw(s, left)?;
3479                    if right == left {
3480                        num += 1;
3481                        return write!(s, "{{{}}}", num);
3482                    }
3483                    if num <= 3 && left.is_pred(self) {
3484                        for _ in 1..num {
3485                            self.ppw(s, left)?;
3486                        }
3487                        return self.ppw(s, right);
3488                    } else {
3489                        write!(s, "{{{}}}", num)?;
3490                        return self.ppw(s, right);
3491                    }
3492                }
3493                self.ppw(s, left)?;
3494                self.ppw(s, right)
3495            }
3496            Kind::Star => {
3497                let left = node_id.left(self);
3498                let leftkind = self.get_kind(left);
3499                match leftkind {
3500                    Kind::Concat | Kind::Star | Kind::Compl => {
3501                        write!(s, "(")?;
3502                        self.ppw(s, left)?;
3503                        write!(s, ")")?;
3504                    }
3505                    _ => {
3506                        self.ppw(s, left)?;
3507                    }
3508                };
3509                write!(s, "*")
3510            }
3511            Kind::Compl => {
3512                write!(s, "~(")?;
3513                self.ppw(s, node_id.left(self))?;
3514                write!(s, ")")
3515            }
3516            Kind::Lookbehind => {
3517                let lbleft = self.get_lookbehind_prev(node_id);
3518                let lbinner = self.get_lookbehind_inner(node_id);
3519                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3520                if lbleft != NodeId::MISSING {
3521                    write!(s, "❮")?;
3522                    self.ppw(s, lbleft)?;
3523                    write!(s, "❯")?;
3524                }
3525
3526                write!(s, "(?<=")?;
3527                self.ppw(s, lbinner)?;
3528                write!(s, ")")
3529            }
3530            Kind::Lookahead => {
3531                let inner = self.get_lookahead_inner(node_id);
3532                write!(s, "(?=")?;
3533                self.ppw(s, inner)?;
3534                write!(s, ")")?;
3535                if self.get_lookahead_rel(node_id) != 0 {
3536                    write!(s, "{{")?;
3537                    let rel = self.get_lookahead_rel(node_id);
3538                    if rel == u32::MAX {
3539                        write!(s, "∅")?;
3540                    } else {
3541                        write!(s, "{}", rel)?;
3542                    }
3543                    write!(s, "}}")?;
3544                }
3545                if node_id.right(self) == NodeId::MISSING {
3546                    Ok(())
3547                } else {
3548                    write!(s, "❮")?;
3549                    self.ppw(s, node_id.right(self))?;
3550                    write!(s, "❯")
3551                }
3552            }
3553            Kind::Counted => {
3554                let body = node_id.left(self);
3555                let packed = self.get_extra(node_id);
3556                let step = packed & 0xFFFF;
3557                let best = packed >> 16;
3558                write!(s, "#(")?;
3559                self.ppw(s, body)?;
3560                write!(s, ")s{}b{}", step, best)
3561            }
3562        }
3563    }
3564
3565    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3566        self.mk_concat(node, NodeId::TS)
3567    }
3568
3569    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3570        let node_ts = self.mk_concat(node, NodeId::TS);
3571        self.mk_compl(node_ts)
3572    }
3573
3574    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3575        let notset = self.solver().not_id(set);
3576        self.mk_pred(notset)
3577    }
3578
3579    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3580        let set_id = self.solver().u8_to_set_id(char);
3581        self.mk_pred(set_id)
3582    }
3583
3584    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3585        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3586        self.mk_pred(rangeset)
3587    }
3588
3589    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3590        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3591        for &(lo, hi) in &ranges[1..] {
3592            let r = self.mk_range_u8(lo, hi);
3593            node = self.mk_union(node, r);
3594        }
3595        node
3596    }
3597
3598    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3599        let mut prefix = Vec::new();
3600        let mut curr = node;
3601        loop {
3602            if curr == NodeId::EPS {
3603                let full = !prefix.is_empty();
3604                return (prefix, full);
3605            }
3606            if curr == NodeId::BOT {
3607                break;
3608            }
3609            if curr.is_pred(self) {
3610                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3611                    Some(byte) => {
3612                        prefix.push(byte);
3613                        return (prefix, true);
3614                    }
3615                    None => break, // multi-byte pred: pattern not fully consumed
3616                }
3617            }
3618            if !curr.is_concat(self) {
3619                break;
3620            }
3621            let left = curr.left(self);
3622            if !left.is_pred(self) {
3623                break;
3624            }
3625            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3626                Some(byte) => prefix.push(byte),
3627                None => break,
3628            }
3629            curr = curr.right(self);
3630        }
3631        (prefix, false)
3632    }
3633
3634    #[allow(dead_code)]
3635    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3636        let mut result = NodeId::EPS;
3637        for byte in raw_str.iter().rev() {
3638            let node = self.mk_u8(*byte);
3639            result = self.mk_concat(node, result);
3640        }
3641        result
3642    }
3643
3644    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3645        let mut result = NodeId::EPS;
3646        for byte in raw_str.bytes().rev() {
3647            let node = self.mk_u8(byte);
3648            result = self.mk_concat(node, result);
3649        }
3650        result
3651    }
3652
3653    pub fn prune_fwd(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3654        self.prune_rec::<true>(node_id, memo)
3655    }
3656
3657    pub fn prune_rev(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3658        self.prune_rec::<false>(node_id, memo)
3659    }
3660
3661    pub fn simplify_rev_initial(&mut self, node_id: NodeId) -> NodeId {
3662        let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3663        self.simplify_rev_initial_rec(node_id, &mut memo)
3664    }
3665
3666    fn simplify_rev_initial_rec(
3667        &mut self,
3668        node_id: NodeId,
3669        memo: &mut FxHashMap<NodeId, NodeId>,
3670    ) -> NodeId {
3671        if let Some(&v) = memo.get(&node_id) {
3672            return v;
3673        }
3674        let out = match self.get_kind(node_id) {
3675            Kind::Union => {
3676                let mut parts: Vec<NodeId> = Vec::new();
3677                self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
3678                for p in &mut parts {
3679                    *p = self.simplify_rev_initial_rec(*p, memo);
3680                }
3681                parts
3682                    .iter()
3683                    .rev()
3684                    .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
3685            }
3686            Kind::Concat => {
3687                let l = node_id.left(self);
3688                let r = node_id.right(self);
3689                let l_new = self.simplify_rev_initial_rec(l, memo);
3690                let r_new = self.simplify_rev_initial_rec(r, memo);
3691                let rebuilt = if l_new == l && r_new == r {
3692                    node_id
3693                } else {
3694                    self.mk_concat(l_new, r_new)
3695                };
3696                if self.get_kind(rebuilt) != Kind::Concat {
3697                    return rebuilt;
3698                }
3699                let cl = rebuilt.left(self);
3700                let cr = rebuilt.right(self);
3701                if cl != NodeId::TS {
3702                    return rebuilt;
3703                }
3704                let (head, tail) = if self.get_kind(cr) == Kind::Concat {
3705                    (cr.left(self), cr.right(self))
3706                } else {
3707                    (cr, NodeId::EPS)
3708                };
3709                if self.nullability(tail) != Nullability::ALWAYS {
3710                    return rebuilt;
3711                }
3712                if self.get_kind(head) != Kind::Union {
3713                    return rebuilt;
3714                }
3715                let mut has_begin = false;
3716                self.iter_unions_b(head, &mut |_, v| {
3717                    if v == NodeId::BEGIN {
3718                        has_begin = true;
3719                    }
3720                });
3721                if has_begin {
3722                    NodeId::TS
3723                } else {
3724                    rebuilt
3725                }
3726            }
3727            _ => node_id,
3728        };
3729        memo.insert(node_id, out);
3730        out
3731    }
3732
3733    fn strip_la_body_end(&mut self, n: NodeId) -> NodeId {
3734        if self.get_kind(n) != Kind::Concat {
3735            return n;
3736        }
3737        let l = n.left(self);
3738        let r = n.right(self);
3739        if l == NodeId::TS && r == NodeId::END {
3740            return NodeId::TS;
3741        }
3742        let new_r = self.strip_la_body_end(r);
3743        if new_r == r {
3744            n
3745        } else {
3746            self.mk_concat(l, new_r)
3747        }
3748    }
3749
3750    fn prune_rec<const FWD: bool>(
3751        &mut self,
3752        node_id: NodeId,
3753        memo: &mut FxHashMap<NodeId, NodeId>,
3754    ) -> NodeId {
3755        if node_id == NodeId::MISSING {
3756            return node_id;
3757        }
3758        if let Some(&v) = memo.get(&node_id) {
3759            return v;
3760        }
3761        let (l, r) = (node_id.left(self), node_id.right(self));
3762        let out = match node_id.kind(self) {
3763            Kind::Union => {
3764                let l = self.prune_rec::<FWD>(l, memo);
3765                let r = self.prune_rec::<FWD>(r, memo);
3766                let mut parts: Vec<NodeId> = Vec::new();
3767                parts.push(l);
3768                self.iter_unions_b(r, &mut |_, v| parts.push(v));
3769                for p in &mut parts {
3770                    *p = self.prune_rec::<FWD>(*p, memo);
3771                }
3772
3773                if FWD {
3774                    // min rel per body for pure lookaheads (la_tail MISSING)
3775                    let mut best: FxHashMap<NodeId, u32> = FxHashMap::default();
3776                    for &p in &parts {
3777                        if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
3778                            let body = self.get_lookahead_inner(p);
3779                            let rel = self.get_lookahead_rel(p);
3780                            best.entry(body)
3781                                .and_modify(|r| *r = (*r).min(rel))
3782                                .or_insert(rel);
3783                        }
3784                    }
3785                    parts.iter().rev().fold(NodeId::BOT, |acc, &p| {
3786                        if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
3787                            let body = self.get_lookahead_inner(p);
3788                            if self.get_lookahead_rel(p) != best[&body] {
3789                                return acc;
3790                            }
3791                        }
3792                        self.mk_union(p, acc)
3793                    })
3794                } else {
3795                    parts
3796                        .iter()
3797                        .rev()
3798                        .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
3799                }
3800            }
3801            Kind::Concat => {
3802                // Rev-only collapse:  `TS · head · tail ≡ TS` when `head`
3803                // is a union containing `BEGIN` (= `\A`) as a branch and
3804                // `tail` is always-nullable.  Reasoning: the leading `TS`
3805                // can consume zero bytes, letting `BEGIN` fire at input
3806                // position 0; the always-nullable tail then accepts ε,
3807                // yielding an empty match at pos 0 for every string.  So
3808                // `L(TS · head · tail) ⊇ Σ*`.  Must run before the default
3809                // `Kind::Begin => BOT` rewrite destroys the BEGIN branch.
3810                if !FWD && l == NodeId::TS {
3811                    // decompose r as head . tail (tail may be ε if r is just head)
3812                    let (head, tail) = if self.get_kind(r) == Kind::Concat {
3813                        (r.left(self), r.right(self))
3814                    } else {
3815                        (r, NodeId::EPS)
3816                    };
3817
3818                    if self.nullability(tail) == Nullability::ALWAYS
3819                        && self.get_kind(head) == Kind::Union
3820                    {
3821                        let mut has_begin = false;
3822                        self.iter_unions_b(head, &mut |_, v| {
3823                            if v == NodeId::BEGIN {
3824                                has_begin = true;
3825                            }
3826                        });
3827                        if has_begin {
3828                            let out = NodeId::TS;
3829                            memo.insert(node_id, out);
3830                            return out;
3831                        }
3832                    }
3833                }
3834                let l = self.prune_rec::<FWD>(l, memo);
3835                let r = self.prune_rec::<FWD>(r, memo);
3836                self.mk_concat(l, r)
3837            }
3838            Kind::Inter => {
3839                let l = self.prune_rec::<FWD>(l, memo);
3840                let r = self.prune_rec::<FWD>(r, memo);
3841                self.mk_inter(l, r)
3842            }
3843            Kind::Compl => {
3844                let l = self.prune_rec::<FWD>(l, memo);
3845                self.mk_compl(l)
3846            }
3847            Kind::Lookahead => {
3848                let lrel = self.get_lookahead_rel(node_id);
3849                let body = self.strip_la_body_end(self.get_lookahead_inner(node_id));
3850                let body = self.prune_rec::<FWD>(body, memo);
3851                let tail = self.prune_rec::<FWD>(self.get_lookahead_tail(node_id), memo);
3852                self.mk_lookahead_internal(body, tail, lrel)
3853            }
3854            Kind::Begin => NodeId::BOT,
3855            Kind::Counted => {
3856                let body = self.prune_rec::<FWD>(l, memo);
3857                let chain = self.prune_rec::<FWD>(r, memo);
3858                self.mk_counted(body, chain, self.get_extra(node_id))
3859            }
3860            Kind::End | Kind::Pred => node_id,
3861            Kind::Star => {
3862                let l = self.prune_rec::<FWD>(l, memo);
3863                self.mk_star(l)
3864            }
3865            Kind::Lookbehind => {
3866                let l = self.prune_rec::<FWD>(l, memo);
3867                if r.is_missing() {
3868                    l
3869                } else {
3870                    node_id
3871                }
3872            }
3873        };
3874        memo.insert(node_id, out);
3875        out
3876    }
3877
3878    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3879        let mut sorted: Vec<NodeId> = nodes.collect();
3880        if sorted.len() <= 1 {
3881            return sorted.pop().unwrap_or(NodeId::BOT);
3882        }
3883        sorted.sort();
3884        sorted.dedup();
3885        sorted.retain(|&x| x != NodeId::BOT);
3886        if sorted.is_empty() {
3887            return NodeId::BOT;
3888        }
3889        if sorted.len() > 16 {
3890            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3891            let mut non_concat: Vec<NodeId> = Vec::new();
3892            for &n in &sorted {
3893                if n.is_concat(self) {
3894                    by_head.entry(self.get_left(n)).or_default().push(n);
3895                } else {
3896                    non_concat.push(n);
3897                }
3898            }
3899            let mut absorbed: Vec<NodeId> = Vec::new();
3900            for &n in &non_concat {
3901                if by_head.contains_key(&n) {
3902                    absorbed.push(n);
3903                }
3904            }
3905            if !absorbed.is_empty() {
3906                non_concat.retain(|n| !absorbed.contains(n));
3907            }
3908            if by_head.len() < sorted.len() {
3909                let mut groups: Vec<NodeId> = non_concat;
3910                for (head, tails) in by_head {
3911                    let mut tail_nodes: Vec<NodeId> =
3912                        tails.iter().map(|&n| self.get_right(n)).collect();
3913                    if absorbed.contains(&head) {
3914                        tail_nodes.push(NodeId::EPS);
3915                    }
3916                    let tail_union = self.mk_unions(tail_nodes.into_iter());
3917                    let factored = self.mk_concat(head, tail_union);
3918                    groups.push(factored);
3919                }
3920                groups.sort();
3921                groups.dedup();
3922                return self.mk_unions_balanced(&groups);
3923            }
3924        }
3925        self.mk_unions_balanced(&sorted)
3926    }
3927
3928    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3929        match nodes.len() {
3930            0 => NodeId::BOT,
3931            1 => nodes[0],
3932            n => {
3933                let mid = n / 2;
3934                let left = self.mk_unions_balanced(&nodes[..mid]);
3935                let right = self.mk_unions_balanced(&nodes[mid..]);
3936                self.mk_union(left, right)
3937            }
3938        }
3939    }
3940
3941    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3942        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3943    }
3944
3945    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3946        nodes
3947            .rev()
3948            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3949    }
3950}
3951
3952impl RegexBuilder {
3953    pub fn iter_sat(
3954        &mut self,
3955        stack: &mut Vec<(TRegexId, TSetId)>,
3956        f: &mut impl FnMut(&mut RegexBuilder, NodeId, TSetId),
3957    ) {
3958        debug_assert!(!stack.is_empty());
3959        loop {
3960            match stack.pop() {
3961                None => return,
3962                Some((curr, curr_set)) => match *self.get_tregex(curr) {
3963                    TRegex::Leaf(n) => {
3964                        let mut curr = n;
3965                        while curr != NodeId::BOT {
3966                            match self.get_kind(curr) {
3967                                _ => {
3968                                    f(self, n, curr_set);
3969                                    curr = NodeId::BOT;
3970                                }
3971                            }
3972                        }
3973                    }
3974                    TRegex::ITE(cnd, then_id, else_id) => {
3975                        if else_id != TRegexId::BOT {
3976                            let notcnd = self.solver().not_id(cnd);
3977                            let interset1 = self.solver().and_id(curr_set, notcnd);
3978                            stack.push((else_id, interset1));
3979                        }
3980                        let interset2 = self.solver().and_id(curr_set, cnd);
3981                        stack.push((then_id, interset2));
3982                    }
3983                },
3984            }
3985        }
3986    }
3987
3988    #[allow(dead_code)]
3989    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3990        match self.get_tregex(term_id).clone() {
3991            TRegex::Leaf(node_id) => {
3992                if NodeId::BOT == node_id {
3993                    vec![]
3994                } else {
3995                    vec![node_id]
3996                }
3997            }
3998            TRegex::ITE(_, then_id, else_id) => {
3999                let mut then_nodes = self.extract_sat(then_id);
4000                let mut else_nodes = self.extract_sat(else_id);
4001                then_nodes.append(&mut else_nodes);
4002                then_nodes
4003            }
4004        }
4005    }
4006
4007    pub(crate) fn iter_unions_b(
4008        &mut self,
4009        curr: NodeId,
4010        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
4011    ) {
4012        let mut curr = curr;
4013        while self.get_kind(curr) == Kind::Union {
4014            f(self, curr.left(self));
4015            curr = curr.right(self);
4016        }
4017        f(self, curr);
4018    }
4019
4020    pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
4021        if !self.contains_look(node_id) {
4022            return Some(node_id);
4023        }
4024        match self.get_kind(node_id) {
4025            Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
4026            Kind::Concat => {
4027                let left = node_id.left(self);
4028                let right = node_id.right(self);
4029                let elim_l = self.try_elim_lookarounds(left)?;
4030                let elim_r = self.try_elim_lookarounds(right)?;
4031                let rw = self.mk_concat(elim_l, elim_r);
4032                Some(rw)
4033            }
4034            Kind::Union => {
4035                let left = node_id.left(self);
4036                let right = node_id.right(self);
4037                let elim_l = self.try_elim_lookarounds(left)?;
4038                let elim_r = self.try_elim_lookarounds(right)?;
4039                let rw = self.mk_union(elim_l, elim_r);
4040                Some(rw)
4041            }
4042
4043            Kind::Star => {
4044                let body = node_id.left(self);
4045                let elim_l = self.try_elim_lookarounds(body)?;
4046                Some(self.mk_star(elim_l))
4047            }
4048            Kind::Compl => {
4049                let left = node_id.left(self);
4050                let elim_l = self.try_elim_lookarounds(left)?;
4051                Some(self.mk_compl(elim_l))
4052            }
4053            Kind::Lookahead => {
4054                let rel = self.get_lookahead_rel(node_id);
4055                if rel != 0 {
4056                    return None;
4057                }
4058                let lbody = self.get_lookahead_inner(node_id);
4059                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
4060                let elim_l = self.try_elim_lookarounds(lbody)?;
4061                let elim_r = self.try_elim_lookarounds(ltail)?;
4062                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
4063                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
4064                let rw = self.mk_inter(lbody_ts, ltail_ts);
4065                Some(rw)
4066            }
4067            Kind::Lookbehind => {
4068                let linner = self.get_lookbehind_inner(node_id);
4069                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
4070                let elim_l = self.try_elim_lookarounds(linner)?;
4071                let elim_r = self.try_elim_lookarounds(lprev)?;
4072                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
4073                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
4074                let rw = self.mk_inter(lbody_ts, ltail_ts);
4075                Some(rw)
4076            }
4077            Kind::Inter => {
4078                let left = node_id.left(self);
4079                let right = node_id.right(self);
4080                let elim_l = self.try_elim_lookarounds(left)?;
4081                let elim_r = self.try_elim_lookarounds(right)?;
4082                let rw = self.mk_inter(elim_l, elim_r);
4083                Some(rw)
4084            }
4085            Kind::Counted => None,
4086        }
4087    }
4088
4089    /// R & _+ is a safe overapproximation of R that's nonempty
4090    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
4091        if self.nullability(node) == Nullability::NEVER {
4092            node
4093        } else {
4094            self.mk_inter(NodeId::TOPPLUS, node)
4095        }
4096    }
4097
4098    pub fn iter_find_stack(
4099        &self,
4100        stack: &mut Vec<TRegexId>,
4101        mut f: impl FnMut(NodeId) -> bool,
4102    ) -> bool {
4103        loop {
4104            match stack.pop() {
4105                None => return false,
4106                Some(curr) => match self.get_tregex(curr) {
4107                    TRegex::Leaf(n) => {
4108                        let mut curr = *n;
4109                        while curr != NodeId::BOT {
4110                            match self.get_kind(curr) {
4111                                Kind::Union => {
4112                                    if f(curr.left(self)) {
4113                                        return true;
4114                                    }
4115                                    curr = curr.right(self);
4116                                }
4117                                _ => {
4118                                    if f(*n) {
4119                                        return true;
4120                                    }
4121                                    curr = NodeId::BOT;
4122                                }
4123                            }
4124                        }
4125                    }
4126                    TRegex::ITE(_, then_id, else_id) => {
4127                        if *else_id != TRegexId::BOT {
4128                            stack.push(*else_id);
4129                        }
4130                        stack.push(*then_id);
4131                    }
4132                },
4133            }
4134        }
4135    }
4136
4137    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
4138        if node == NodeId::BOT {
4139            return Some(true);
4140        }
4141        if self.nullability(node) != Nullability::NEVER {
4142            return Some(false);
4143        }
4144        if let Some(cached) = self.cache_empty.get(&node) {
4145            if cached.is_checked() {
4146                return Some(cached.is_empty());
4147            }
4148        }
4149        let node = if !self.contains_look(node) {
4150            node
4151        } else {
4152            self.try_elim_lookarounds(node)?
4153        };
4154        let isempty_flag = self.is_empty_lang_internal(node);
4155
4156        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
4157    }
4158
4159    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
4160        // without inter, no need to check
4161        if !self.get_meta_flags(initial_node).contains_inter() {
4162            return Ok(NodeFlags::ZERO);
4163        }
4164
4165        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4166        let mut worklist: VecDeque<NodeId> = VecDeque::new();
4167        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
4168        let mut stack = Vec::new();
4169        stack.push(begin_der);
4170        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
4171            visited.insert(node, initial_node);
4172            let nullability = self.nullability(node);
4173            if nullability != Nullability::NEVER {
4174                true
4175            } else {
4176                worklist.push_back(node);
4177                false
4178            }
4179        });
4180        if found_nullable_right_away {
4181            return Ok(NodeFlags::ZERO);
4182        }
4183
4184        worklist.push_back(initial_node);
4185        let isempty_flag: NodeFlags;
4186        let mut found_node = NodeId::BOT;
4187
4188        loop {
4189            match worklist.pop_front() {
4190                None => {
4191                    isempty_flag = NodeFlags::IS_EMPTY;
4192                    break;
4193                }
4194                Some(outer) => {
4195                    if let Some(cached) = self.cache_empty.get(&outer) {
4196                        if cached.is_checked() {
4197                            if cached.is_empty() {
4198                                continue;
4199                            } else {
4200                                return Ok(NodeFlags::ZERO);
4201                            }
4202                        }
4203                    }
4204
4205                    let derivative = self.der(outer, Nullability::CENTER)?;
4206
4207                    stack.push(derivative);
4208
4209                    let found_null = self.iter_find_stack(&mut stack, |node| {
4210                        if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
4211                            found_node = node;
4212                            if !self.get_meta_flags(node).contains_inter() {
4213                                true
4214                            } else {
4215                                e.insert(outer);
4216                                worklist.push_front(node);
4217                                self.any_nonbegin_nullable(node)
4218                            }
4219                        } else {
4220                            false
4221                        }
4222                    });
4223                    if found_null {
4224                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
4225                        isempty_flag = NodeFlags::ZERO;
4226                        break;
4227                    }
4228                }
4229            }
4230        }
4231
4232        self.cache_empty.insert(
4233            initial_node,
4234            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
4235        );
4236        Ok(isempty_flag)
4237    }
4238
4239    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
4240    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
4241        if larger_lang == smaller_lang {
4242            return Some(true);
4243        }
4244
4245        // assess initial nullability
4246        if self
4247            .nullability(larger_lang)
4248            .not()
4249            .and(self.nullability(smaller_lang))
4250            != Nullability::NEVER
4251        {
4252            return Some(false);
4253        }
4254
4255        // check language nullability
4256        // if (B &~ A) ≡ ⊥ then B ⊆ A
4257        // this means  L(B) \ L(A) = {}
4258        // eg. (a &~ .*) = ⊥ means .* subsumes a
4259        let (smaller_lang, larger_lang) =
4260            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
4261                let wrap = |b: &mut Self, n: NodeId| {
4262                    let tmp = b.mk_concat(n, NodeId::TS);
4263                    b.mk_concat(NodeId::TS, tmp)
4264                };
4265                (wrap(self, smaller_lang), wrap(self, larger_lang))
4266            } else {
4267                (smaller_lang, larger_lang)
4268            };
4269
4270        let nota = self.mk_compl(larger_lang);
4271        let diff = self.mk_inter(smaller_lang, nota);
4272        self.is_empty_lang(diff)
4273    }
4274}