Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9pub use unicode_classes::UnicodeClassCache;
10use solver::{Solver, TSetId};
11use rustc_hash::FxHashMap;
12use std::collections::{BTreeSet, VecDeque};
13use std::fmt::Debug;
14use std::fmt::Write;
15use std::hash::Hash;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23    AnchorLimit,
24    StateSpaceExplosion,
25    UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32            AlgebraError::StateSpaceExplosion => {
33                write!(f, "too many states, likely infinite state space")
34            }
35            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36        }
37    }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43    pub const MISSING: u32 = 0;
44    pub const BOT: u32 = 1;
45    pub const EPS: u32 = 2;
46    pub const TOP: u32 = 3;
47    pub const TOPSTAR: u32 = 4;
48    pub const TOPPLUS: u32 = 5;
49    pub const BEGIN: u32 = 6;
50    pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62    pub const MISSING: NodeId = NodeId(id::MISSING);
63    pub const BOT: NodeId = NodeId(id::BOT);
64    pub const EPS: NodeId = NodeId(id::EPS);
65    pub const TOP: NodeId = NodeId(id::TOP);
66    pub const TS: NodeId = NodeId(id::TOPSTAR);
67    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68    pub const BEGIN: NodeId = NodeId(id::BEGIN);
69    pub const END: NodeId = NodeId(id::END);
70
71    #[inline]
72    pub fn as_u32(self) -> u32 {
73        self.0
74    }
75
76    #[inline]
77    pub fn from_u32(v: u32) -> NodeId {
78        NodeId(v)
79    }
80}
81impl Debug for NodeId {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let num = &self.0;
84        f.write_str(format!("{num}").as_str())
85    }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91    pub const MISSING: TRegexId = TRegexId(id::MISSING);
92    pub const EPS: TRegexId = TRegexId(id::EPS);
93    pub const BOT: TRegexId = TRegexId(id::BOT);
94    pub const TOP: TRegexId = TRegexId(id::TOP);
95    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
102
103    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106    pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110    #[inline]
111    pub(crate) fn nullability(self) -> Nullability {
112        Nullability(self.0 & Self::NULL_MASK)
113    }
114
115    #[inline]
116    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118    }
119
120    #[inline]
121    pub(crate) fn has(self, flag: MetaFlags) -> bool {
122        self.0 & flag.0 != 0
123    }
124    #[inline]
125    const fn and(self, other: MetaFlags) -> MetaFlags {
126        MetaFlags(self.0 & other.0)
127    }
128    #[inline]
129    const fn or(self, other: MetaFlags) -> MetaFlags {
130        MetaFlags(self.0 | other.0)
131    }
132
133    pub(crate) fn contains_lookaround(self) -> bool {
134        self.has(MetaFlags::CONTAINS_LOOKAROUND)
135    }
136    pub(crate) fn contains_inter(self) -> bool {
137        self.has(MetaFlags::CONTAINS_INTER)
138    }
139
140    pub(crate) fn all_contains_flags(self) -> MetaFlags {
141        self.and(
142            MetaFlags::CONTAINS_LOOKAROUND
143                .or(MetaFlags::CONTAINS_ANCHORS)
144                .or(MetaFlags::CONTAINS_INTER),
145        )
146    }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152    #[default]
153    Pred,
154    Star,
155    Begin,
156    End,
157    Concat,
158    Union,
159    Compl,
160    Lookbehind,
161    Lookahead,
162    Inter,
163    Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168    flags: MetaFlags,
169    nulls: NullsId,
170}
171
172struct MetadataBuilder {
173    num_created: u32,
174    solver: Solver,
175    nb: NullsBuilder,
176    index: FxHashMap<Metadata, MetadataId>,
177    pub array: Vec<Metadata>,
178}
179
180mod helpers {
181    pub(crate) fn incr_rel(n1: u32) -> u32 {
182        match n1.overflowing_add(1) {
183            (_, true) => u32::MAX,
184            (res, false) => res,
185        }
186    }
187}
188
189impl MetadataBuilder {
190    fn new() -> MetadataBuilder {
191        Self {
192            index: FxHashMap::default(),
193            array: vec![Metadata {
194                flags: MetaFlags::ZERO,
195                nulls: NullsId::EMPTY,
196            }],
197            solver: Solver::new(),
198            num_created: 0,
199            nb: NullsBuilder::new(),
200        }
201    }
202
203    fn init(&mut self, inst: Metadata) -> MetadataId {
204        self.num_created += 1;
205        let new_id = MetadataId(self.num_created);
206        self.index.insert(inst.clone(), new_id);
207        self.array.push(inst);
208        new_id
209    }
210
211    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212        match self.index.get(&inst) {
213            Some(&id) => id,
214            None => self.init(inst),
215        }
216    }
217
218    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219        &self.array[inst.0 as usize]
220    }
221
222    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223        setflags.all_contains_flags()
224    }
225
226    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227        let left = &self.array[body.0 as usize].flags;
228        let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229        // BOT* = EPS (empty string only), not infinite
230        let inf = if body_id == NodeId::BOT {
231            MetaFlags::ZERO
232        } else {
233            MetaFlags::INFINITE_LENGTH
234        };
235        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236    }
237
238    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239        let left = &self.array[left_id.0 as usize].flags;
240        let null = left.nullability().not().and(Nullability::ALWAYS);
241        let contains = self.get_contains(*left);
242        MetaFlags::with_nullability(
243            null,
244            contains
245                .or(MetaFlags::INFINITE_LENGTH)
246                .or(MetaFlags::CONTAINS_COMPL),
247        )
248    }
249
250    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
251        let left = &self.array[left_id.0 as usize].flags;
252        let right = &self.array[right_id.0 as usize].flags;
253        let null = left.nullability().and(right.nullability());
254        let contains = self.get_contains(left.or(*right));
255        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
256        MetaFlags::with_nullability(null, contains.or(len))
257    }
258
259    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260        let left = &self.array[left_id.0 as usize].flags;
261        let right = &self.array[right_id.0 as usize].flags;
262        let null = left.nullability().and(right.nullability());
263        let contains = self
264            .get_contains(left.or(*right))
265            .or(MetaFlags::CONTAINS_INTER);
266        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
267        MetaFlags::with_nullability(null, contains.or(len))
268    }
269
270    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
271        let left = &self.array[left_id.0 as usize].flags;
272        let right = &self.array[right_id.0 as usize].flags;
273        let null = left.nullability().or(right.nullability());
274        let contains = self.get_contains(left.or(*right));
275        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
276        MetaFlags::with_nullability(null, contains.or(len))
277    }
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
281pub struct NodeKey {
282    pub(crate) kind: Kind,
283    pub(crate) left: NodeId,
284    pub(crate) right: NodeId,
285    pub(crate) extra: u32,
286}
287
288#[derive(Eq, Hash, PartialEq, Clone, Debug)]
289pub enum TRegex<TSet> {
290    Leaf(NodeId),
291    ITE(TSet, TRegexId, TRegexId),
292}
293
294#[derive(Eq, Hash, PartialEq, Clone, Copy)]
295pub(crate) struct NodeFlags(u8);
296impl NodeFlags {
297    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
298    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
299    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
300
301    fn is_checked(self) -> bool {
302        self.0 >= NodeFlags::IS_CHECKED.0
303    }
304    fn is_empty(self) -> bool {
305        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
306    }
307}
308
309#[derive(Eq, Hash, PartialEq, Clone, Copy)]
310pub(crate) struct BuilderFlags(u8);
311impl BuilderFlags {
312    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
313    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
314}
315
316pub struct RegexBuilder {
317    mb: MetadataBuilder,
318    temp_vec: Vec<NodeId>,
319    num_created: u32,
320    index: FxHashMap<NodeKey, NodeId>,
321    array: Vec<NodeKey>,
322    metadata: Vec<MetadataId>,
323    reversed: Vec<NodeId>,
324    cache_empty: FxHashMap<NodeId, NodeFlags>,
325    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
326    tr_array: Vec<TRegex<TSetId>>,
327    tr_der_center: Vec<TRegexId>,
328    tr_der_begin: Vec<TRegexId>,
329    flags: BuilderFlags,
330    /// maximum lookahead context distance before returning `AnchorLimit`.
331    pub lookahead_context_max: u32,
332    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
333    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
334}
335
336
337macro_rules! iter_inter {
338    ($compiler:ident, $start:ident, $expression:expr) => {
339        let mut curr = $start;
340        while $compiler.get_kind(curr) == Kind::Inter {
341            let left = $compiler.get_left(curr);
342            $expression(left);
343            curr = $compiler.get_right(curr);
344        }
345        $expression(curr);
346    };
347}
348
349impl NodeId {
350    fn is_missing(&self) -> bool {
351        *self == NodeId::MISSING
352    }
353    #[inline]
354    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
355        b.get_flags_contains(self)
356    }
357    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
358        if self == tail {
359            true
360        } else if self.is_kind(b, Kind::Concat) {
361            self.right(b).has_concat_tail(b, tail)
362        } else {
363            false
364        }
365    }
366    fn missing_to_eps(&self) -> NodeId {
367        if *self == NodeId::MISSING {
368            NodeId::EPS
369        } else {
370            *self
371        }
372    }
373
374    #[inline]
375    fn kind(self, b: &RegexBuilder) -> Kind {
376        b.get_kind(self)
377    }
378    #[inline]
379    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
380        b.get_kind(self) == k
381    }
382    #[inline]
383    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
384        b.nullability(self) == Nullability::NEVER
385    }
386    #[inline]
387    fn nullability(self, b: &RegexBuilder) -> Nullability {
388        b.nullability(self)
389    }
390
391    #[inline]
392    fn is_center_nullable(self, b: &RegexBuilder) -> bool {
393        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
394    }
395    #[inline]
396    pub fn left(self, b: &RegexBuilder) -> NodeId {
397        b.get_left(self)
398    }
399
400    #[inline]
401    pub fn right(self, b: &RegexBuilder) -> NodeId {
402        b.get_right(self)
403    }
404
405    #[inline]
406    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
407        b.der(self, mask)
408    }
409
410    #[inline]
411    fn extra(self, b: &RegexBuilder) -> u32 {
412        b.get_extra(self)
413    }
414
415    #[inline]
416    fn is_pred(self, b: &RegexBuilder) -> bool {
417        b.get_kind(self) == Kind::Pred
418    }
419    #[inline]
420    fn is_lookahead(self, b: &RegexBuilder) -> bool {
421        b.get_kind(self) == Kind::Lookahead
422    }
423    #[inline]
424    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
425        debug_assert!(self.is_pred(b));
426        TSetId(b.get_extra(self))
427    }
428    #[inline]
429    fn is_star(self, b: &RegexBuilder) -> bool {
430        if NodeId::EPS == self {
431            return false;
432        }
433        b.get_kind(self) == Kind::Star
434    }
435
436    #[inline]
437    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
438        if self.is_concat(b) {
439            let r = self.right(b);
440            return r.is_star(b) && r.left(b) == self.left(b);
441        }
442        false
443    }
444
445    #[inline]
446    fn is_concat(self, b: &RegexBuilder) -> bool {
447        b.get_kind(self) == Kind::Concat
448    }
449
450    #[inline]
451    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
452        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
453            Some(b.get_right(self))
454        } else {
455            None
456        }
457    }
458
459    #[inline]
460    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
461        if b.get_kind(self) == Kind::Concat {
462            let left = self.left(b);
463            let right = self.right(b);
464            if left.is_kind(b, Kind::Compl)
465                && left.left(b) == NodeId::TOPPLUS {
466                    return right == NodeId::END;
467                }
468        }
469        false
470    }
471
472    #[inline]
473    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
474        if NodeId::EPS == self {
475            return None;
476        }
477        if self.is_star(b) && self.left(b).is_pred(b) {
478            Some(self.left(b))
479        } else {
480            None
481        }
482    }
483
484    #[inline]
485    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
486        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
487            let middle = self.right(b);
488            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
489                return Some(middle.left(b));
490            }
491        }
492        None
493    }
494
495    #[inline]
496    pub(crate) fn iter_union(
497        self,
498        b: &mut RegexBuilder,
499        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
500    ) {
501        let mut curr: NodeId = self;
502        while curr.kind(b) == Kind::Union {
503            f(b, curr.left(b));
504            curr = curr.right(b);
505        }
506        f(b, curr);
507    }
508
509    #[inline]
510    pub(crate) fn iter_union_while(
511        self,
512        b: &mut RegexBuilder,
513        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
514    ) {
515        let mut curr: NodeId = self;
516        let mut continue_loop = true;
517        while continue_loop && curr.kind(b) == Kind::Union {
518            continue_loop = f(b, curr.left(b));
519            curr = curr.right(b);
520        }
521        if continue_loop {
522            f(b, curr);
523        }
524    }
525}
526
527impl Default for RegexBuilder {
528    fn default() -> Self {
529        Self::new()
530    }
531}
532
533impl RegexBuilder {
534    pub fn new() -> RegexBuilder {
535        let mut inst = Self {
536            mb: MetadataBuilder::new(),
537            array: Vec::new(),
538            index: FxHashMap::default(),
539            cache_empty: FxHashMap::default(),
540            tr_array: Vec::new(),
541            tr_cache: FxHashMap::default(),
542            flags: BuilderFlags::ZERO,
543            lookahead_context_max: 800,
544            num_created: 0,
545            metadata: Vec::new(),
546            reversed: Vec::new(),
547            tr_der_center: Vec::new(),
548            tr_der_begin: Vec::new(),
549            temp_vec: Vec::new(),
550            mk_binary_memo: FxHashMap::default(),
551            clean_cache: FxHashMap::default(),
552        };
553        inst.array.push(NodeKey::default());
554        inst.mk_pred(TSetId::EMPTY);
555        inst.mk_star(NodeId::BOT);
556        inst.mk_pred(TSetId::FULL);
557        inst.mk_star(NodeId::TOP);
558        let top_plus_id = inst.mk_plus(NodeId::TOP);
559        inst.mk_unset(Kind::Begin);
560        inst.mk_unset(Kind::End);
561        debug_assert!(top_plus_id == NodeId::TOPPLUS);
562
563        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
564        inst.mk_leaf(NodeId::BOT);
565        inst.mk_leaf(NodeId::EPS);
566        inst.mk_leaf(NodeId::TOP);
567        inst.mk_leaf(NodeId::TS);
568
569        inst.flags = BuilderFlags::SUBSUME;
570        inst
571    }
572
573    #[inline]
574    pub fn solver_ref(&self) -> &Solver {
575        &self.mb.solver
576    }
577
578    #[inline]
579    pub fn solver(&mut self) -> &mut Solver {
580        &mut self.mb.solver
581    }
582
583    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
584        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
585        self.tr_cache.insert(inst.clone(), new_id);
586        self.tr_array.push(inst);
587        new_id
588    }
589
590    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
591        match self.tr_cache.get(&inst) {
592            Some(&id) => id,
593            None => self.tr_init(inst),
594        }
595    }
596
597    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
598        &self.tr_array[inst.0 as usize]
599    }
600
601    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
602        let solv = self.solver();
603        !solv.is_sat_id(cond1, cond2)
604    }
605
606    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
607        let term = TRegex::<TSetId>::Leaf(node_id);
608        self.get_tregex_id(term)
609    }
610
611    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
612        let tmp_inst = TRegex::ITE(cond, _then, _else);
613        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
614            return *cached;
615        }
616        if _then == _else {
617            return _then;
618        }
619        if self.solver().is_full_id(cond) {
620            return _then;
621        }
622        if self.solver().is_empty_id(cond) {
623            return _else;
624        }
625        let _clean_then = match self.tr_array[_then.0 as usize] {
626            TRegex::Leaf(_) => _then,
627            _ => self.clean(cond, _then),
628        };
629        let notcond = self.solver().not_id(cond);
630        let _clean_else = match self.tr_array[_else.0 as usize] {
631            TRegex::Leaf(_) => _else,
632            _ => self.clean(notcond, _else),
633        };
634
635        if _clean_then == _clean_else {
636            return _clean_then;
637        }
638        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
639        match self.get_tregex(_clean_then) {
640            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
641                let _cond_then = *leftcond;
642                let new_then = *_inner_then;
643                let sand = self.solver().and_id(cond, _cond_then);
644                return self.mk_ite(sand, new_then, _clean_else);
645            }
646            _ => {}
647        }
648        // attempt right side flattening:
649        match self.get_tregex(_clean_else) {
650            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
651            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
652                let e2clone = *_e2;
653                let new_cond = self.mb.solver.or_id(cond, *_c2);
654                return self.mk_ite(new_cond, _clean_then, e2clone);
655            }
656            _ => {}
657        }
658
659        if _clean_then == TRegexId::BOT {
660            let flip_cond = self.solver().not_id(cond);
661            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
662            return cleaned_id;
663        }
664
665        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
666    }
667
668    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
669        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
670            return cached;
671        }
672        let result = match *self.get_tregex(tterm) {
673            TRegex::Leaf(_) => tterm,
674            TRegex::ITE(alpha, _then_id, _else_id) => {
675                let notalpha = self.mb.solver.not_id(alpha);
676                if self.mb.solver.unsat_id(beta, alpha) {
677                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
678                    self.clean(beta, _else_id)
679                } else if self.unsat(beta, notalpha) {
680                    // beta ⊆ alpha, so beta ∧ alpha = beta
681                    self.clean(beta, _then_id)
682                } else {
683                    let tc = self.mb.solver.and_id(beta, alpha);
684                    let ec = self.mb.solver.and_id(beta, notalpha);
685                    let new_then = self.clean(tc, _then_id);
686                    let new_else = self.clean(ec, _else_id);
687                    self.mk_ite(alpha, new_then, new_else)
688                }
689            }
690        };
691        self.clean_cache.insert((beta, tterm), result);
692        result
693    }
694
695    fn mk_unary(
696        &mut self,
697        term: TRegexId,
698        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
699    ) -> TRegexId {
700        match self.tr_array[term.0 as usize] {
701            TRegex::Leaf(node_id) => {
702                let applied = apply(self, node_id);
703                self.mk_leaf(applied)
704            }
705            TRegex::ITE(c1, _then, _else) => {
706                let _then1 = self.mk_unary(_then, apply);
707                let _else1 = self.mk_unary(_else, apply);
708                self.mk_ite(c1, _then1, _else1)
709            }
710        }
711    }
712
713    fn mk_binary_result(
714        &mut self,
715        left: TRegexId,
716        right: TRegexId,
717        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
718    ) -> Result<TRegexId, AlgebraError> {
719        match self.tr_array[left.0 as usize] {
720            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
721                TRegex::Leaf(right_node_id) => {
722                    let applied = apply(self, left_node_id, right_node_id)?;
723                    Ok(self.mk_leaf(applied))
724                }
725                TRegex::ITE(c2, _then, _else) => {
726                    let then2 = self.mk_binary_result(left, _then, apply)?;
727                    let else2 = self.mk_binary_result(left, _else, apply)?;
728                    Ok(self.mk_ite(c2, then2, else2))
729                }
730            },
731            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
732                TRegex::Leaf(_) => {
733                    let then2 = self.mk_binary_result(_then1, right, apply)?;
734                    let else2 = self.mk_binary_result(_else1, right, apply)?;
735                    Ok(self.mk_ite(c1, then2, else2))
736                }
737                TRegex::ITE(c2, _then2, _else2) => {
738                    if c1 == c2 {
739                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
740                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
741                        return Ok(self.mk_ite(c1, _then, _else));
742                    }
743                    if c1.0 > c2.0 {
744                        let _then = self.mk_binary_result(_then1, right, apply)?;
745                        let _else = self.mk_binary_result(_else1, right, apply)?;
746                        Ok(self.mk_ite(c1, _then, _else))
747                    } else {
748                        let _then = self.mk_binary_result(left, _then2, apply)?;
749                        let _else = self.mk_binary_result(left, _else2, apply)?;
750                        Ok(self.mk_ite(c2, _then, _else))
751                    }
752                }
753            },
754        }
755    }
756
757    fn mk_binary(
758        &mut self,
759        left: TRegexId,
760        right: TRegexId,
761        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
762    ) -> TRegexId {
763        self.mk_binary_memo.clear();
764        self.mk_binary_inner(left, right, apply)
765    }
766
767    fn mk_binary_inner(
768        &mut self,
769        left: TRegexId,
770        right: TRegexId,
771        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
772    ) -> TRegexId {
773        if left == right {
774            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
775        }
776        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
777            return cached;
778        }
779        let result = match self.tr_array[left.0 as usize] {
780            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
781                TRegex::Leaf(right_node_id) => {
782                    let applied = apply(self, left_node_id, right_node_id);
783                    self.mk_leaf(applied)
784                }
785                TRegex::ITE(c2, _then, _else) => {
786                    let then2 = self.mk_binary_inner(left, _then, apply);
787                    let else2 = self.mk_binary_inner(left, _else, apply);
788                    self.mk_ite(c2, then2, else2)
789                }
790            },
791            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
792                TRegex::Leaf(_) => {
793                    let then2 = self.mk_binary_inner(_then1, right, apply);
794                    let else2 = self.mk_binary_inner(_else1, right, apply);
795                    self.mk_ite(c1, then2, else2)
796                }
797                TRegex::ITE(c2, _then2, _else2) => {
798                    if c1 == c2 {
799                        let _then = self.mk_binary_inner(_then1, _then2, apply);
800                        let _else = self.mk_binary_inner(_else1, _else2, apply);
801                        self.mk_ite(c1, _then, _else)
802                    } else if c1.0 > c2.0 {
803                        let _then = self.mk_binary_inner(_then1, right, apply);
804                        let _else = self.mk_binary_inner(_else1, right, apply);
805                        self.mk_ite(c1, _then, _else)
806                    } else {
807                        let _then = self.mk_binary_inner(left, _then2, apply);
808                        let _else = self.mk_binary_inner(left, _else2, apply);
809                        self.mk_ite(c2, _then, _else)
810                    }
811                }
812            },
813        };
814        self.mk_binary_memo.insert((left, right), result);
815        result
816    }
817
818    pub fn get_nulls(
819        &mut self,
820        pending_rel: u32,
821        mask: Nullability,
822        acc: &mut BTreeSet<NullState>,
823        node_id: NodeId,
824    ) {
825        debug_assert!(node_id != NodeId::MISSING);
826        if !self.is_nullable(node_id, mask) {
827            return;
828        }
829        match self.get_kind(node_id) {
830            Kind::Pred => {}
831            Kind::End => {
832                if mask.has(Nullability::END) {
833                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
834                }
835            }
836            Kind::Begin => {
837                if mask.has(Nullability::BEGIN) {
838                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
839                }
840            }
841            Kind::Concat => {
842                let new_mask = self.nullability(node_id).and(mask);
843                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
844                if self.is_nullable(node_id.left(self), mask) {
845                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
846                }
847            }
848            Kind::Union => {
849                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
850                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
851            }
852            Kind::Inter => {
853                let new_mask = self.nullability(node_id).and(mask);
854                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
855                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
856            }
857            Kind::Star => {
858                acc.insert(NullState::new(mask, pending_rel));
859                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
860            }
861            Kind::Compl => {
862                if !self.is_nullable(node_id.left(self), mask) {
863                    acc.insert(NullState::new(mask, 0));
864                }
865            }
866            Kind::Lookbehind => {
867                let new_mask = self.nullability(node_id).and(mask);
868                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
869                if node_id.right(self) != NodeId::MISSING {
870                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
871                }
872            }
873            Kind::Lookahead => {
874                let la_inner = self.get_lookahead_inner(node_id);
875                if self.is_nullable(la_inner, mask) {
876                    let rel = self.get_lookahead_rel(node_id);
877                    if rel != u32::MAX {
878                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
879                    }
880                    // tail only contributes when body is satisfied
881                    let la_tail = self.get_lookahead_tail(node_id);
882                    if la_tail != NodeId::MISSING {
883                        self.get_nulls(pending_rel, mask, acc, la_tail);
884                    }
885                }
886            }
887            Kind::Counted => {
888                let packed = self.get_extra(node_id);
889                let best = packed >> 16;
890                if best > 0 {
891                    acc.insert(NullState::new(mask, pending_rel + best));
892                }
893            }
894        }
895    }
896
897    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
898        self.get_meta_flags(node_id)
899            .has(MetaFlags::CONTAINS_LOOKAROUND)
900    }
901
902    /// whether node contains `^`, `$`, `\A`, `\z` anchors.
903    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
904        self.get_meta_flags(node_id)
905            .has(MetaFlags::CONTAINS_ANCHORS)
906    }
907
908
909    pub fn is_infinite(&self, node_id: NodeId) -> bool {
910        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
911    }
912
913    /// returns (min_length, max_length). max = u32::MAX means unbounded.
914    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
915        if self.is_infinite(node_id) {
916            if self.get_kind(node_id) == Kind::Inter {
917                self.get_bounded_length(node_id)
918            } else {
919                (self.get_min_length_only(node_id), u32::MAX)
920            }
921        } else {
922            self.get_bounded_length(node_id)
923        }
924    }
925
926    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
927        if node_id == NodeId::EPS {
928            return (0, 0);
929        }
930        match self.get_kind(node_id) {
931            Kind::End | Kind::Begin => (0, 0),
932            Kind::Pred => (1, 1),
933            Kind::Concat => {
934                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
935                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
936                (lmin + rmin, lmax.saturating_add(rmax))
937            }
938            Kind::Union => {
939                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
940                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
941                (lmin.min(rmin), lmax.max(rmax))
942            }
943            Kind::Inter => {
944                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
945                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
946                (lmin.max(rmin), lmax.min(rmax))
947            }
948            Kind::Lookahead => {
949                let body = node_id.left(self);
950                if self.is_infinite(body) {
951                    return (0, u32::MAX);
952                }
953                let right = node_id.right(self);
954                if right.is_missing() {
955                    (0, 0)
956                } else {
957                    self.get_min_max_length(right)
958                }
959            }
960            Kind::Counted => (0, 0),
961            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
962        }
963    }
964
965    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
966        match self.get_kind(node_id) {
967            Kind::End | Kind::Begin => Some(0),
968            Kind::Pred => Some(1),
969            Kind::Concat => {
970                let l = self.get_fixed_length(node_id.left(self))?;
971                let r = self.get_fixed_length(node_id.right(self))?;
972                Some(l + r)
973            }
974            Kind::Union => {
975                let l = self.get_fixed_length(node_id.left(self))?;
976                let r = self.get_fixed_length(node_id.right(self))?;
977                if l == r {
978                    Some(l)
979                } else {
980                    None
981                }
982            }
983            Kind::Inter => {
984                let l = self.get_fixed_length(node_id.left(self))?;
985                let r = self.get_fixed_length(node_id.right(self))?;
986                if l == r {
987                    Some(l)
988                } else {
989                    None
990                }
991            }
992            Kind::Lookahead => {
993                let right = node_id.right(self);
994                if right.is_missing() {
995                    Some(0)
996                } else {
997                    self.get_fixed_length(right)
998                }
999            }
1000            Kind::Counted => Some(0),
1001            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1002        }
1003    }
1004
1005    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1006        match self.get_kind(node_id) {
1007            Kind::End | Kind::Begin => 0,
1008            Kind::Pred => 1,
1009            Kind::Concat => {
1010                self.get_min_length_only(node_id.left(self))
1011                    + self.get_min_length_only(node_id.right(self))
1012            }
1013            Kind::Union => self
1014                .get_min_length_only(node_id.left(self))
1015                .min(self.get_min_length_only(node_id.right(self))),
1016            Kind::Inter => self
1017                .get_min_length_only(node_id.left(self))
1018                .max(self.get_min_length_only(node_id.right(self))),
1019            Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1020            Kind::Compl => {
1021                if self.nullability(node_id.left(self)) == Nullability::NEVER {
1022                    0
1023                } else {
1024                    1
1025                }
1026            }
1027        }
1028    }
1029
1030    fn starts_with_ts(&self, node_id: NodeId) -> bool {
1031        if node_id == NodeId::TS {
1032            return true;
1033        }
1034        match self.get_kind(node_id) {
1035            Kind::Inter => {
1036                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1037            }
1038            Kind::Union => {
1039                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1040            }
1041            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1042            _ => false,
1043        }
1044    }
1045
1046    #[inline]
1047    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1048        if self.get_kind(node_id) == Kind::Concat {
1049            self.ends_with_ts(node_id.right(self))
1050        } else {
1051            node_id == NodeId::TS
1052        }
1053    }
1054
1055    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1056        debug_assert!(node_id != NodeId::MISSING);
1057        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1058    }
1059
1060    pub(crate) fn cache_der(
1061        &mut self,
1062        node_id: NodeId,
1063        result: TRegexId,
1064        mask: Nullability,
1065    ) -> TRegexId {
1066        if mask == Nullability::CENTER {
1067            self.tr_der_center[node_id.0 as usize] = result
1068        } else {
1069            self.tr_der_begin[node_id.0 as usize] = result
1070        };
1071        result
1072    }
1073
1074    pub(crate) fn try_cached_der(
1075        &mut self,
1076        node_id: NodeId,
1077        mask: Nullability,
1078    ) -> Option<TRegexId> {
1079        let cache = if mask == Nullability::CENTER {
1080            &mut self.tr_der_center
1081        } else {
1082            &mut self.tr_der_begin
1083        };
1084        match cache.get(node_id.0 as usize) {
1085            Some(&TRegexId::MISSING) => {}
1086            Some(&result) => {
1087                return Some(result);
1088            }
1089            None => {
1090                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1091            }
1092        }
1093        None
1094    }
1095
1096    pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1097        let mut term = self.get_tregex(der);
1098        loop {
1099            match *term {
1100                TRegex::Leaf(node_id) => return node_id,
1101                TRegex::ITE(cond, _then, _else) => {
1102                    if self.solver().is_sat_id(set, cond) {
1103                        term = self.get_tregex(_then);
1104                    } else {
1105                        term = self.get_tregex(_else);
1106                    }
1107                }
1108            }
1109        }
1110    }
1111
1112    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1113        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1114        debug_assert!(
1115            node_id != NodeId::MISSING,
1116            "attempting to derive missing node"
1117        );
1118        if let Some(result) = self.try_cached_der(node_id, mask) {
1119            return Ok(result);
1120        }
1121
1122        let result = match node_id.kind(self) {
1123            Kind::Compl => {
1124                let leftd = node_id.left(self).der(self, mask)?;
1125                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1126            }
1127            Kind::Inter => {
1128                let leftd = node_id.left(self).der(self, mask)?;
1129                let rightd = node_id.right(self).der(self, mask)?;
1130                {
1131                    let this = &mut *self;
1132                    this.mk_binary(
1133                        leftd,
1134                        rightd,
1135                        &mut (|b, left, right| b.mk_inter(left, right)),
1136                    )
1137                }
1138            }
1139            Kind::Union => {
1140                let leftd = self.der(node_id.left(self), mask)?;
1141                let rightd = self.der(node_id.right(self), mask)?;
1142                {
1143                    let this = &mut *self;
1144                    this.mk_binary(
1145                        leftd,
1146                        rightd,
1147                        &mut (|b, left, right| b.mk_union(left, right)),
1148                    )
1149                }
1150            }
1151            Kind::Concat => {
1152                let head = node_id.left(self);
1153                let tail = node_id.right(self);
1154                let tail_leaf = self.mk_leaf(tail);
1155                let head_der = self.der(head, mask)?;
1156
1157                if self.is_nullable(head, mask) {
1158                    let rightd = self.der(tail, mask)?;
1159                    let ldr = {
1160                        let this = &mut *self;
1161                        this.mk_binary(
1162                            head_der,
1163                            tail_leaf,
1164                            &mut (|b, left, right| b.mk_concat(left, right)),
1165                        )
1166                    };
1167                    {
1168                        let this = &mut *self;
1169                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1170                    }
1171                } else {
1172                    let this = &mut *self;
1173                    this.mk_binary(
1174                        head_der,
1175                        tail_leaf,
1176                        &mut (|b, left, right| b.mk_concat(left, right)),
1177                    )
1178                }
1179            }
1180            Kind::Star => {
1181                if node_id == NodeId::EPS {
1182                    TRegexId::BOT
1183                } else {
1184                    let left = node_id.left(self);
1185                    let r_decr_leaf = self.mk_leaf(node_id);
1186                    let r_der = self.der(left, mask)?;
1187                    let this = &mut *self;
1188                    this.mk_binary(
1189                        r_der,
1190                        r_decr_leaf,
1191                        &mut (|b, left, right| b.mk_concat(left, right)),
1192                    )
1193                }
1194            }
1195            Kind::Lookbehind => {
1196                let lb_prev_der = {
1197                    let lb_prev = self.get_lookbehind_prev(node_id);
1198                    if lb_prev == NodeId::MISSING {
1199                        TRegexId::MISSING
1200                    } else {
1201                        self.der(lb_prev, mask)?
1202                    }
1203                };
1204                let lb_inner = self.get_lookbehind_inner(node_id);
1205                let lb_inner_der = self.der(lb_inner, mask)?;
1206                {
1207                    let this = &mut *self;
1208                    this.mk_binary_result(
1209                        lb_inner_der,
1210                        lb_prev_der,
1211                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1212                    )?
1213                }
1214            }
1215            Kind::Lookahead => {
1216                let la_tail = self.get_lookahead_tail(node_id);
1217                let la_body = node_id.left(self);
1218                let rel = self.get_lookahead_rel(node_id);
1219
1220                if self.is_nullable(la_body, mask) {
1221                    // nullabilty is taken once, just keep the body
1222                    let right = node_id.right(self).missing_to_eps();
1223                    let rder = self.der(right, mask).clone();
1224                    return rder;
1225                }
1226
1227                if rel == u32::MAX {
1228                    let la_body_der = self.der(la_body, mask)?;
1229                    if la_tail.is_kind(self, Kind::Pred) {
1230                        let transitioned =
1231                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1232                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1233                        let concated = self.mk_concat(la_tail, new_la);
1234                        return self.der(concated, mask);
1235                    }
1236                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1237                        let left = la_tail.left(self);
1238                        let tset = left.pred_tset(self);
1239                        let transitioned = self.transition_term(la_body_der, tset);
1240                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1241                        let tail_right = la_tail.right(self);
1242                        let concated = self.mk_concat(new_la, tail_right);
1243                        let concated = self.mk_concat(left, concated);
1244                        return self.der(concated, mask);
1245                    }
1246                }
1247
1248                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1249                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1250                    let concated = self.mk_concat(la_body, nulls_mask);
1251                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1252                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1253                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1254                    let new_union = self.mk_union(concated_look, new_look);
1255                    return self.der(new_union, mask);
1256                }
1257
1258                let la_tail_der = if la_tail == NodeId::MISSING {
1259                    TRegexId::MISSING
1260                } else {
1261                    if self.is_nullable(la_tail, mask) {
1262                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1263                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1264                        let la_union = self.mk_union(la_tail, nulls_la);
1265                        self.der(la_union, mask)?
1266                    } else {
1267                        self.der(la_tail, mask)?
1268                    }
1269                };
1270
1271                let la_body_der = self.der(la_body, mask)?;
1272
1273                if rel != u32::MAX && rel > self.lookahead_context_max {
1274                    return Err(AlgebraError::AnchorLimit);
1275                }
1276
1277                let la = {
1278                    let this = &mut *self;
1279                    let rel = helpers::incr_rel(rel);
1280                    this.mk_binary(
1281                        la_body_der,
1282                        la_tail_der,
1283                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1284                    )
1285                };
1286
1287                if rel != u32::MAX
1288                    && la_tail_der != TRegexId::MISSING
1289                    && self.is_nullable(la_tail, mask)
1290                {
1291                    let look_only = {
1292                        let this = &mut *self;
1293                        let right = TRegexId::MISSING;
1294                        let rel = helpers::incr_rel(rel);
1295                        this.mk_binary(
1296                            la_body_der,
1297                            right,
1298                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1299                        )
1300                    };
1301                    {
1302                        let this = &mut *self;
1303                        this.mk_binary(
1304                            look_only,
1305                            la,
1306                            &mut (|b, left, right| b.mk_union(left, right)),
1307                        )
1308                    }
1309                } else {
1310                    la
1311                }
1312            }
1313            Kind::Counted => {
1314                let body = node_id.left(self);
1315                let chain = node_id.right(self);
1316                let packed = self.get_extra(node_id);
1317                let step = (packed & 0xFFFF) as u16;
1318                let best = (packed >> 16) as u16;
1319
1320                let mid_best = if self.is_nullable(body, mask) && step >= best {
1321                    step
1322                } else {
1323                    best
1324                };
1325
1326                let body_der = self.der(body, mask)?;
1327                let new_step = step.saturating_add(1);
1328                self.mk_unary(body_der, &mut |b, new_body| {
1329                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1330                        new_step
1331                    } else {
1332                        mid_best
1333                    };
1334                    let packed = (final_best as u32) << 16 | new_step as u32;
1335                    b.mk_counted(new_body, chain, packed)
1336                })
1337            }
1338            Kind::Begin | Kind::End => TRegexId::BOT,
1339            Kind::Pred => {
1340                let psi = node_id.pred_tset(self);
1341                if psi == TSetId::EMPTY {
1342                    TRegexId::BOT
1343                } else {
1344                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1345                }
1346            }
1347        };
1348
1349        // println!("{} {}", node_id.0, self.pp(node_id));
1350        // println!("node: {} (total: {})", node_id.0, self.num_created);
1351
1352        self.cache_der(node_id, result, mask);
1353        Ok(result)
1354    }
1355
1356    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1357        debug_assert!(meta_id != MetadataId::MISSING);
1358        match self.metadata.get_mut(node_id.0 as usize) {
1359            Some(v) => *v = meta_id,
1360            None => {
1361                self.metadata
1362                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1363                self.metadata[node_id.0 as usize] = meta_id;
1364            }
1365        }
1366    }
1367
1368    fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1369        debug_assert!(reversed_id != NodeId::MISSING);
1370        match self.reversed.get_mut(node_id.0 as usize) {
1371            Some(v) => *v = reversed_id,
1372            None => {
1373                self.reversed
1374                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1375                self.reversed[node_id.0 as usize] = reversed_id;
1376            }
1377        }
1378    }
1379
1380    fn init(&mut self, inst: NodeKey) -> NodeId {
1381        self.num_created += 1;
1382        let node_id = NodeId(self.num_created);
1383        self.index.insert(inst.clone(), node_id);
1384        match inst.kind {
1385            Kind::Pred => {
1386                let meta_id = self.mb.get_meta_id(Metadata {
1387                    flags: MetaFlags::ZERO,
1388                    nulls: NullsId::EMPTY,
1389                });
1390                self.init_metadata(node_id, meta_id);
1391            }
1392            Kind::Begin => {
1393                let meta_id = self.mb.get_meta_id(Metadata {
1394                    flags: MetaFlags::with_nullability(
1395                        Nullability::BEGIN,
1396                        MetaFlags::CONTAINS_ANCHORS,
1397                    ),
1398                    nulls: NullsId::BEGIN0,
1399                });
1400                self.init_metadata(node_id, meta_id);
1401            }
1402            Kind::End => {
1403                let meta_id = self.mb.get_meta_id(Metadata {
1404                    flags: MetaFlags::with_nullability(
1405                        Nullability::END,
1406                        MetaFlags::CONTAINS_ANCHORS,
1407                    ),
1408                    nulls: NullsId::END0,
1409                });
1410                self.init_metadata(node_id, meta_id);
1411            }
1412            Kind::Inter => {
1413                let m1 = self.get_node_meta_id(inst.left);
1414                let m2 = self.get_node_meta_id(inst.right);
1415                let meta_id = {
1416                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1417                    let mask_l = inst.left.nullability(self);
1418                    let mask_r = inst.right.nullability(self);
1419                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1420                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1421                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1422                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1423                    let new_meta = Metadata {
1424                        flags: self.mb.flags_inter(m1, m2),
1425                        nulls,
1426                    };
1427                    self.mb.get_meta_id(new_meta)
1428                };
1429                self.init_metadata(node_id, meta_id);
1430            }
1431            Kind::Union => {
1432                let m1 = self.get_node_meta_id(inst.left);
1433                let m2 = self.get_node_meta_id(inst.right);
1434                let meta_id = {
1435                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1436                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1437                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1438                    let new_meta = Metadata {
1439                        flags: self.mb.flags_union(m1, m2),
1440                        nulls,
1441                    };
1442                    self.mb.get_meta_id(new_meta)
1443                };
1444                self.init_metadata(node_id, meta_id);
1445            }
1446            Kind::Concat => {
1447                let flags = self.mb.flags_concat(
1448                    self.get_node_meta_id(inst.left),
1449                    self.get_node_meta_id(inst.right),
1450                );
1451
1452                let right_nullability = inst.right.nullability(self);
1453                let left_nullability = inst.left.nullability(self);
1454                let nulls_left = self.get_nulls_id(inst.left);
1455                let nulls_right = self.get_nulls_id(inst.right);
1456                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1457                let mask = right_nullability.and(left_nullability);
1458                nulls = self.mb.nb.and_mask(nulls, mask);
1459
1460                let new_id = self.mb.get_meta_id(Metadata {
1461                    flags,
1462                    nulls,
1463                });
1464                self.init_metadata(node_id, new_id);
1465            }
1466            Kind::Star => {
1467                let left_nulls = self.get_nulls_id(inst.left);
1468                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1469                let meta_id = self.mb.get_meta_id(Metadata {
1470                    flags: self
1471                        .mb
1472                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1473                    nulls,
1474                });
1475                self.init_metadata(node_id, meta_id);
1476            }
1477            Kind::Compl => {
1478                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1479                let meta_id = self.mb.get_meta_id(Metadata {
1480                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1481                    nulls,
1482                });
1483                self.init_metadata(node_id, meta_id);
1484            }
1485            Kind::Lookbehind => {
1486                let mut null = self.get_meta_flags(inst.left).nullability();
1487                let mut contains_flags = self.get_flags_contains(inst.left);
1488                if !inst.right.is_missing() {
1489                    null = null.and(self.get_meta_flags(inst.right).nullability());
1490                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1491                }
1492
1493                let left_nulls = self.get_nulls_id(inst.left);
1494                let right_nulls = self.get_nulls_id(inst.right);
1495                let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1496                let meta_id = self.mb.get_meta_id(Metadata {
1497                    flags: MetaFlags::with_nullability(
1498                        null,
1499                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1500                    ),
1501                    nulls,
1502                });
1503                self.init_metadata(node_id, meta_id);
1504            }
1505            Kind::Lookahead => {
1506                let mut nulls = self.get_nulls_id(inst.left);
1507                let left_nullability = inst.left.nullability(self);
1508                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1509                nulls = self.mb.nb.or_id(nulls, nulls_right);
1510                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1511
1512                let la_inner = inst.left;
1513                let la_tail = inst.right;
1514                let null = self
1515                    .get_meta_flags(la_inner)
1516                    .nullability()
1517                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1518                let contains_flags = self
1519                    .get_flags_contains(la_inner)
1520                    .or(self.get_flags_contains(la_tail));
1521
1522                let meta_id = self.mb.get_meta_id(Metadata {
1523                    flags: MetaFlags::with_nullability(
1524                        null,
1525                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1526                    ),
1527                    nulls,
1528                });
1529                self.init_metadata(node_id, meta_id);
1530            }
1531            Kind::Counted => {
1532                let best = inst.extra >> 16;
1533                let (null, nulls) = if best > 0 {
1534                    let mut ns = BTreeSet::new();
1535                    ns.insert(NullState::new(Nullability::CENTER, best));
1536                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1537                } else {
1538                    (Nullability::NEVER, NullsId::EMPTY)
1539                };
1540                let meta_id = self.mb.get_meta_id(Metadata {
1541                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1542                    nulls,
1543                });
1544                self.init_metadata(node_id, meta_id);
1545            }
1546        }
1547
1548        self.array.push(inst);
1549
1550        if let Some(rw) = self.post_init_simplify(node_id) {
1551            self.override_as(node_id, rw)
1552        } else {
1553            node_id
1554        }
1555    }
1556
1557    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1558        match self.get_kind(node_id) {
1559            Kind::Concat => {
1560                let lhs = node_id.left(self);
1561                let rhs = node_id.right(self);
1562                if lhs.is_pred_star(self).is_some() {
1563                    if let Some(opttail) = rhs.is_opt_v(self) {
1564                        if let Some(true) = self.subsumes(lhs, opttail) {
1565                            // return self.override_as(node_id, lhs);
1566                            return Some(lhs);
1567                        }
1568                    }
1569                }
1570            }
1571            Kind::Union => {
1572                let lhs = node_id.left(self);
1573                let rhs = node_id.right(self);
1574                let mut subsumed = false;
1575                rhs.iter_union_while(self, &mut |b, branch| {
1576                    if b.nullable_subsumes(branch, lhs) {
1577                        subsumed = true;
1578                    }
1579                    !subsumed
1580                });
1581                if subsumed {
1582                    return Some(rhs);
1583                }
1584                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1585                    return Some(rhs);
1586                }
1587            }
1588            _ => {}
1589        }
1590
1591        None
1592    }
1593
1594    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1595    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1596        if self.get_kind(lhs) != Kind::Union {
1597            return false; // single branch already checked by nullable_subsumes
1598        }
1599        let mut rhs_branches = Vec::new();
1600        let mut curr = rhs;
1601        while self.get_kind(curr) == Kind::Union {
1602            rhs_branches.push(self.get_left(curr));
1603            curr = self.get_right(curr);
1604        }
1605        rhs_branches.push(curr);
1606
1607        curr = lhs;
1608        while self.get_kind(curr) == Kind::Union {
1609            if !rhs_branches.contains(&self.get_left(curr)) {
1610                return false;
1611            }
1612            curr = self.get_right(curr);
1613        }
1614        rhs_branches.contains(&curr)
1615    }
1616
1617    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1618    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1619        if node == target {
1620            return true;
1621        }
1622        match self.get_kind(node) {
1623            Kind::Union => {
1624                self.nullable_subsumes(self.get_left(node), target)
1625                    || self.nullable_subsumes(self.get_right(node), target)
1626            }
1627            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1628                self.nullable_subsumes(self.get_right(node), target)
1629            }
1630            _ => false,
1631        }
1632    }
1633
1634    pub fn num_nodes(&self) -> u32 {
1635        self.num_created
1636    }
1637    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1638        match self.index.get(&inst) {
1639            Some(&id) => id,
1640            None => self.init(inst),
1641        }
1642    }
1643    #[inline]
1644    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1645        self.index.get(inst)
1646    }
1647
1648    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1649        self.index.insert(key, subsumed);
1650        subsumed
1651    }
1652
1653    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1654        let key = &self.array[key.0 as usize];
1655        let entry = self.index.get_mut(key).unwrap();
1656        *entry = subsumed;
1657        subsumed
1658    }
1659
1660    #[inline]
1661    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1662        self.array[node_id.0 as usize].left
1663    }
1664
1665    #[inline]
1666    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1667        self.array[node_id.0 as usize].right
1668    }
1669
1670    #[inline]
1671    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1672        self.array[node_id.0 as usize].extra
1673    }
1674
1675    #[inline]
1676    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1677        debug_assert!(self.get_kind(node_id) == Kind::Concat);
1678        let mut curr = node_id;
1679        while self.get_kind(curr) == Kind::Concat {
1680            curr = curr.right(self);
1681        }
1682        curr
1683    }
1684
1685    /// true if node ends with a trailing Lookahead(_, MISSING)
1686    fn has_trailing_la(&self, node: NodeId) -> bool {
1687        let end = match self.get_kind(node) {
1688            Kind::Concat => self.get_concat_end(node),
1689            Kind::Lookahead => node,
1690            _ => return false,
1691        };
1692        self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1693    }
1694
1695    /// strip the trailing Lookahead from a node, returning (body, la).
1696    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1697        if self.get_kind(node) == Kind::Lookahead {
1698            return (NodeId::EPS, node);
1699        }
1700        debug_assert!(self.get_kind(node) == Kind::Concat);
1701        let right = node.right(self);
1702        if self.get_kind(right) != Kind::Concat {
1703            // Concat(body, LA) → (body, LA)
1704            return (node.left(self), right);
1705        }
1706        let (stripped, la) = self.strip_trailing_la(right);
1707        (self.mk_concat(node.left(self), stripped), la)
1708    }
1709    #[inline]
1710    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1711        debug_assert!(matches!(self.get_kind(lookahead_node_id), Kind::Lookahead | Kind::Counted));
1712        lookahead_node_id.left(self)
1713    }
1714    #[inline]
1715    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1716        debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1717        lookahead_node_id.right(self)
1718    }
1719    #[inline]
1720    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1721        debug_assert!(
1722            matches!(self.get_kind(lookahead_node_id), Kind::Lookahead | Kind::Counted),
1723            "not lookahead/counted: {:?}",
1724            self.pp(lookahead_node_id)
1725        );
1726        self.get_extra(lookahead_node_id)
1727    }
1728    #[inline]
1729    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1730        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1731        lookbehind_node_id.left(self)
1732    }
1733    #[inline]
1734    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1735        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1736        lookbehind_node_id.right(self)
1737    }
1738
1739    #[inline]
1740    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1741        debug_assert!(
1742            self.array.len() > node_id.0 as usize,
1743            "array len: {:?}",
1744            node_id
1745        );
1746        self.array[node_id.0 as usize].kind
1747    }
1748
1749    #[inline]
1750    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1751        &self.array[node_id.0 as usize]
1752    }
1753
1754    #[inline]
1755    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1756        self.metadata[node_id.0 as usize]
1757    }
1758
1759    #[inline]
1760    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1761        debug_assert!(node_id.0 != u32::MAX);
1762        let meta_id = self.get_node_meta_id(node_id);
1763        debug_assert!(meta_id != MetadataId::MISSING);
1764        &self.mb.array[meta_id.0 as usize]
1765    }
1766
1767    #[inline]
1768    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1769        if node_id == NodeId::MISSING {
1770            NullsId::EMPTY
1771        } else {
1772            self.get_meta(node_id).nulls
1773        }
1774    }
1775
1776    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1777        self.mb
1778            .nb
1779            .array
1780            .iter()
1781            .map(|set| set.iter().cloned().collect())
1782            .collect()
1783    }
1784
1785    pub fn nulls_count(&self) -> usize {
1786        self.mb.nb.array.len()
1787    }
1788
1789    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1790        self.mb.nb.array[id as usize].iter().cloned().collect()
1791    }
1792
1793    #[inline]
1794    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1795        if node_id == NodeId::MISSING {
1796            NullsId::EMPTY
1797        } else {
1798            let nulls = self.get_meta(node_id).nulls;
1799            self.mb.nb.and_mask(nulls, mask)
1800        }
1801    }
1802
1803    #[inline]
1804    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1805        let meta_id = self.get_node_meta_id(node_id);
1806        let meta = &self.mb.array[meta_id.0 as usize];
1807        meta.flags
1808    }
1809
1810    #[inline]
1811    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1812        self.get_meta(node_id).flags.nullability()
1813    }
1814
1815    #[inline]
1816    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1817        let meta_id = self.get_node_meta_id(node_id);
1818        let meta = &self.mb.array[meta_id.0 as usize];
1819        meta.flags.all_contains_flags()
1820    }
1821
1822    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1823        if self.get_kind(node_id) == Kind::Concat && node_id.left(self) == NodeId::BEGIN {
1824            return self.strip_lb(node_id.right(self));
1825        }
1826        self.strip_lb_inner(node_id)
1827    }
1828
1829    fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1830        if !self.contains_look(node_id) {
1831            return Ok(node_id);
1832        }
1833        if self.get_kind(node_id) == Kind::Concat
1834            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1835        {
1836            return self.strip_lb_inner(node_id.right(self));
1837        }
1838        if self.get_kind(node_id) == Kind::Inter {
1839            let left = self.strip_lb_inner(node_id.left(self))?;
1840            let right = self.strip_lb_inner(node_id.right(self))?;
1841            return Ok(self.mk_inter(left, right));
1842        }
1843        if self.get_kind(node_id) == Kind::Union {
1844            let left = self.strip_lb_inner(node_id.left(self))?;
1845            let right = self.strip_lb_inner(node_id.right(self))?;
1846            return Ok(self.mk_union(left, right));
1847        }
1848        match self.get_kind(node_id) {
1849            Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1850            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1851                Err(AlgebraError::UnsupportedPattern)
1852            }
1853            _ => Ok(node_id),
1854        }
1855    }
1856
1857    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1858        debug_assert!(node_id != NodeId::MISSING);
1859        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1860            if *rev != NodeId::MISSING {
1861                return Ok(*rev);
1862            }
1863        }
1864        let rw = match self.get_kind(node_id) {
1865            Kind::End => NodeId::BEGIN,
1866            Kind::Begin => NodeId::END,
1867            Kind::Pred => node_id,
1868            Kind::Concat => {
1869                let left = self.reverse(node_id.left(self))?;
1870                let right = self.reverse(node_id.right(self))?;
1871                self.mk_concat(right, left)
1872            }
1873            Kind::Union => {
1874                let left = self.reverse(node_id.left(self))?;
1875                let right = self.reverse(node_id.right(self))?;
1876                self.mk_union(left, right)
1877            }
1878            Kind::Inter => {
1879                let left = self.reverse(node_id.left(self))?;
1880                let right = self.reverse(node_id.right(self))?;
1881                self.mk_inter(left, right)
1882            }
1883            Kind::Star => {
1884                let body = self.reverse(node_id.left(self))?;
1885                self.mk_star(body)
1886            }
1887            Kind::Compl => {
1888                if self.contains_look(node_id.left(self)) {
1889                    return Err(AlgebraError::UnsupportedPattern);
1890                }
1891                let body = self.reverse(node_id.left(self))?;
1892                self.mk_compl(body)
1893            }
1894            Kind::Lookbehind => {
1895                // lookbehind → lookahead when reversed (separate fwd/rev DFAs)
1896                let prev = self.get_lookbehind_prev(node_id);
1897                let inner_id = self.get_lookbehind_inner(node_id);
1898                let rev_inner = self.reverse(inner_id)?;
1899                let rev_prev = if prev != NodeId::MISSING {
1900                    self.reverse(prev)?
1901                } else {
1902                    NodeId::MISSING
1903                };
1904                self.mk_lookahead(rev_inner, rev_prev, 0)
1905            }
1906            Kind::Lookahead => {
1907                let rel = self.get_lookahead_rel(node_id);
1908                if rel == u32::MAX {
1909                    // rel MAX holds no nullability - rewrite to intersection
1910                    let lbody = self.get_lookahead_inner(node_id);
1911                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
1912                    let lbody_ts = self.mk_concat(lbody, NodeId::TS);
1913                    let ltail_ts = self.mk_concat(ltail, NodeId::TS);
1914                    let as_inter = self.mk_inter(lbody_ts, ltail_ts);
1915                    let rev = self.reverse(as_inter)?;
1916                    self.init_reversed(node_id, rev);
1917                    return Ok(rev);
1918                }
1919                if rel != 0 {
1920                    return Err(AlgebraError::UnsupportedPattern);
1921                }
1922                // lookahead → lookbehind when reversed (separate fwd/rev DFAs)
1923                let tail_node = self.get_lookahead_tail(node_id);
1924                let rev_tail = if tail_node != NodeId::MISSING {
1925                    self.reverse(tail_node)?
1926                } else {
1927                    NodeId::MISSING
1928                };
1929                let inner_id = self.get_lookahead_inner(node_id);
1930                let rev_inner = self.reverse(inner_id)?;
1931                self.mk_lookbehind(rev_inner, rev_tail)
1932            }
1933            Kind::Counted => {
1934                return Err(AlgebraError::UnsupportedPattern);
1935            }
1936        };
1937        self.init_reversed(node_id, rw);
1938        Ok(rw)
1939    }
1940
1941    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
1942        let node = NodeKey {
1943            kind: Kind::Pred,
1944            left: NodeId::MISSING,
1945            right: NodeId::MISSING,
1946            extra: pred.0,
1947        };
1948        self.get_node_id(node)
1949    }
1950
1951    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
1952        let key = NodeKey {
1953            kind: Kind::Compl,
1954            left: body,
1955            right: NodeId::MISSING,
1956            extra: u32::MAX,
1957        };
1958        if let Some(id) = self.key_is_created(&key) {
1959            return *id;
1960        }
1961        if body == NodeId::BOT {
1962            return NodeId::TS;
1963        }
1964        if body == NodeId::TS {
1965            return NodeId::BOT;
1966        }
1967
1968        if let Some(contains_body) = body.is_contains(self) {
1969            if contains_body.is_pred(self) {
1970                let pred = contains_body.pred_tset(self);
1971                let notpred = self.mk_pred_not(pred);
1972                let node = self.mk_star(notpred);
1973                return self.init_as(key, node);
1974            } else if contains_body == NodeId::END {
1975                return self.init_as(key, NodeId::BOT);
1976            }
1977        };
1978
1979        if self.get_kind(body) == Kind::Compl {
1980            return self.get_node(body).left;
1981        }
1982
1983        self.get_node_id(key)
1984    }
1985
1986    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
1987        let nid = self.get_nulls_id(body);
1988        let nref = self.mb.nb.get_set_ref(nid).clone();
1989        let mut futures = NodeId::BOT;
1990        for n in nref.iter() {
1991            if !n.is_mask_nullable(mask) {
1992                continue;
1993            }
1994
1995            let eff = if n.rel == 0 {
1996                NodeId::EPS
1997            } else {
1998                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
1999            };
2000            futures = self.mk_union(futures, eff)
2001        }
2002        futures
2003    }
2004
2005    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2006        if cfg!(feature = "norewrite") {
2007            return None;
2008        }
2009
2010        if self.get_kind(tail) == Kind::Lookbehind {
2011            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2012            return self
2013                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft).ok();
2014        }
2015        if self.get_kind(head) == Kind::Lookahead {
2016            let la_tail = self.get_lookahead_tail(head);
2017            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2018            if new_la_tail.is_center_nullable(self) {
2019                let non_null_tail = self.mk_non_nullable_safe(new_la_tail);
2020                if non_null_tail == NodeId::BOT {
2021                    return None;
2022                }
2023                let la_body = self.get_lookahead_inner(head);
2024                return Some(self.mk_lookahead_internal(la_body, non_null_tail, u32::MAX));
2025            }
2026            let la_body = self.get_lookahead_inner(head);
2027            let la_rel = self.get_lookahead_rel(head);
2028            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2029                let tail_rel = self.get_lookahead_rel(new_la_tail);
2030                tail_rel + la_rel
2031            } else {
2032                u32::MAX
2033            };
2034
2035            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2036        }
2037
2038        if head.is_kind(self, Kind::End)
2039            && tail == NodeId::TS {
2040                return Some(head);
2041            }
2042        if head == NodeId::TS
2043            && tail == NodeId::END {
2044                return Some(head);
2045            }
2046
2047        if head == NodeId::TS
2048            && self.nullability(tail) == Nullability::ALWAYS {
2049                return Some(NodeId::TS);
2050            }
2051
2052        if tail == NodeId::TS
2053            && self.nullability(head) == Nullability::ALWAYS {
2054                return Some(NodeId::TS);
2055            }
2056
2057        if self.get_kind(tail) == Kind::Union
2058            && head == NodeId::TS {
2059                let mut should_distribute_top = false;
2060                self.iter_unions(tail, |v| {
2061                    if v == NodeId::BEGIN || self.starts_with_ts(v) {
2062                        should_distribute_top = true;
2063                    }
2064                });
2065                if should_distribute_top {
2066                    let mut new_union = NodeId::BOT;
2067                    let mut curr = tail;
2068                    while self.get_kind(curr) == Kind::Union {
2069                        let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2070                        new_union = self.mk_union(new_node, new_union);
2071                        curr = curr.right(self);
2072                    }
2073                    let new_node = self.mk_concat(NodeId::TS, curr);
2074                    new_union = self.mk_union(new_union, new_node);
2075                    return Some(new_union);
2076                }
2077            }
2078
2079        if self.get_kind(head) == Kind::Union
2080            && head == NodeId::TS {
2081                let mut should_distribute_top = false;
2082                self.iter_unions(head, |v| {
2083                    if v == NodeId::END || self.starts_with_ts(v) {
2084                        should_distribute_top = true;
2085                    }
2086                });
2087                if should_distribute_top {
2088                    let mut new_union = NodeId::BOT;
2089                    let mut curr = head;
2090                    while self.get_kind(curr) == Kind::Union {
2091                        let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2092                        new_union = self.mk_union(new_node, new_union);
2093                        curr = curr.right(self);
2094                    }
2095                    let new_node = self.mk_concat(curr, NodeId::TS);
2096                    new_union = self.mk_union(new_union, new_node);
2097                    return Some(new_union);
2098                }
2099            }
2100
2101        if self.get_kind(head) == Kind::Inter
2102            && tail == NodeId::TS {
2103                let mut alltopstar = true;
2104                iter_inter!(self, head, |v| {
2105                    alltopstar = self.ends_with_ts(v);
2106                });
2107                if alltopstar {
2108                    return Some(head);
2109                }
2110            }
2111
2112        if head.is_star(self) && head == tail {
2113            return Some(head);
2114        }
2115
2116        None
2117    }
2118
2119    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2120        use Kind::*;
2121
2122        if cfg!(feature = "norewrite") {
2123            return None;
2124        }
2125        if left == right {
2126            return Some(left);
2127        }
2128
2129        if right.is_kind(self, Kind::Union) && left == right.left(self) {
2130            return Some(right);
2131        }
2132
2133        if self.get_kind(left) == Kind::Pred
2134            && self.get_kind(right) == Kind::Pred {
2135                let l = left.pred_tset(self);
2136                let r = right.pred_tset(self);
2137                let solver = self.solver();
2138                let psi = solver.or_id(l, r);
2139                let rewrite = self.mk_pred(psi);
2140                return Some(rewrite);
2141            }
2142
2143        if left == NodeId::EPS
2144            && self.get_extra(left) == 0
2145                && self.nullability(right) == Nullability::ALWAYS {
2146                    return Some(right);
2147                }
2148
2149        if self.get_kind(left) == Kind::Lookahead
2150            && self.get_kind(right) == Kind::Lookahead {
2151                let lb = left.left(self);
2152                let lt = left.right(self);
2153                let lrel = left.extra(self);
2154
2155                let rb = right.left(self);
2156                let rt = right.right(self);
2157                let rrel = right.extra(self);
2158
2159                if lrel == rrel
2160                    
2161                    && lt.is_missing()
2162                        && rt.is_missing() {
2163                            let unioned = self.mk_union(lb, rb);
2164                            let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2165                            return Some(node);
2166                        }
2167            }
2168
2169        if right.is_kind(self, Concat) {
2170            if left == NodeId::END
2171                && right.left(self) == NodeId::END
2172                    && self.nullability(right).has(Nullability::END) {
2173                        return Some(right);
2174                    }
2175            // .*|.*a.* => .*(a.*|)
2176            if left == right.left(self) {
2177                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2178                let rw = self.mk_concat(left, rhs);
2179                return Some(rw);
2180            }
2181            if left.is_kind(self, Concat) {
2182                let head1 = left.left(self);
2183                let head2 = right.left(self);
2184
2185                if head1 == head2 {
2186                    let t1 = left.right(self);
2187                    let t2 = right.right(self);
2188                    // opportunistic rewrites
2189                    if head1 == NodeId::TS {
2190                        if t2.has_concat_tail(self, t1) {
2191                            return Some(left);
2192                        }
2193                        if t1.has_concat_tail(self, t2) {
2194                            return Some(right);
2195                        }
2196                    }
2197                    let un = self.mk_union(t1, t2);
2198                    return Some(self.mk_concat(left.left(self), un));
2199                }
2200
2201                // xa|ya => (x|y)a - suffix factoring via reverse
2202                // disabled: introduces reversed intermediate patterns that bloat derivative DFA
2203                if false {
2204                    let end1 = self.get_concat_end(left);
2205                    let end2 = self.get_concat_end(right);
2206                    if end1 == end2 {
2207                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2208                        if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2209                            let rev1 = self.reverse(left).unwrap();
2210                            let rev2 = self.reverse(right).unwrap();
2211
2212                            let union_rev = self.mk_union(rev1, rev2);
2213                            return Some(self.reverse(union_rev).unwrap());
2214                        }
2215                    }
2216                }
2217            }
2218            if self.get_kind(left) == Kind::Pred
2219                && left == right.left(self) {
2220                    let un = self.mk_opt(right.right(self));
2221                    let conc = self.mk_concat(left, un);
2222                    return Some(conc);
2223                }
2224        }
2225
2226        if left == NodeId::EPS
2227            && right.is_plus(self) {
2228                let result = self.mk_star(right.left(self));
2229                return Some(result);
2230            }
2231
2232        // e.g. (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2233        if self.flags == BuilderFlags::SUBSUME {
2234            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2235                return Some(rw);
2236            }
2237        }
2238
2239        None
2240    }
2241
2242    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2243        let mut curr = node;
2244        while self.get_kind(curr) == Kind::Inter {
2245            out.push(self.get_left(curr));
2246            curr = self.get_right(curr);
2247        }
2248        out.push(curr);
2249    }
2250
2251    /// check if node is [_*] Pred(X)^count . Star(Y)
2252    /// returns (has_prefix_star, pred_tset, star_node, count)
2253    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2254        let mut curr = node;
2255        let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2256        if has_prefix {
2257            curr = self.get_right(curr);
2258        }
2259        let mut count = 0u32;
2260        let mut pred_set = None;
2261        while self.get_kind(curr) == Kind::Concat {
2262            let head = self.get_left(curr);
2263            if self.get_kind(head) != Kind::Pred {
2264                return None;
2265            }
2266            let ps = head.pred_tset(self);
2267            match pred_set {
2268                None => pred_set = Some(ps),
2269                Some(existing) => {
2270                    if existing != ps {
2271                        return None;
2272                    }
2273                }
2274            }
2275            curr = self.get_right(curr);
2276            count += 1;
2277        }
2278        if count == 0 || self.get_kind(curr) != Kind::Star {
2279            return None;
2280        }
2281        Some((has_prefix, pred_set.unwrap(), curr, count))
2282    }
2283
2284    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2285        let mut j = 0;
2286        for &s in sub {
2287            while j < sup.len() && sup[j] < s {
2288                j += 1;
2289            }
2290            if j >= sup.len() || sup[j] != s {
2291                return false;
2292            }
2293            j += 1;
2294        }
2295        true
2296    }
2297
2298    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2299        if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2300            return None;
2301        }
2302
2303        let mut lc: Vec<NodeId> = Vec::new();
2304        let mut rc: Vec<NodeId> = Vec::new();
2305        self.collect_inter_components(left, &mut lc);
2306        self.collect_inter_components(right, &mut rc);
2307
2308        // component subset check: fewer constraints = larger language
2309        // if lc ⊆ rc then L(left) ⊇ L(right), keep left
2310        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2311            return Some(left);
2312        }
2313        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2314        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2315            return Some(right);
2316        }
2317
2318        if lc.len() == rc.len() {
2319            let mut diff_idx = None;
2320            for i in 0..lc.len() {
2321                if lc[i] != rc[i] {
2322                    if diff_idx.is_some() {
2323                        return None;
2324                    }
2325                    diff_idx = Some(i);
2326                }
2327            }
2328            if let Some(idx) = diff_idx {
2329                let a = lc[idx];
2330                let b = rc[idx];
2331                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2332                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2333                {
2334                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2335                        return if ca < cb { Some(left) } else { Some(right) };
2336                    }
2337                }
2338            }
2339        }
2340
2341        None
2342    }
2343
2344    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2345        if cfg!(feature = "norewrite") {
2346            return None;
2347        }
2348        if left == right {
2349            return Some(left);
2350        }
2351
2352        if self.get_kind(right) == Kind::Union {
2353            let mut result = NodeId::BOT;
2354            self.iter_unions_b(
2355                right,
2356                &mut (|b, v| {
2357                    let new_inter = b.mk_inter(v, left);
2358                    result = b.mk_union(result, new_inter);
2359                }),
2360            );
2361            return Some(result);
2362        }
2363        if self.get_kind(left) == Kind::Union {
2364            let mut result = NodeId::BOT;
2365            self.iter_unions_b(
2366                left,
2367                &mut (|b, v| {
2368                    let new_inter = b.mk_inter(v, right);
2369                    result = b.mk_union(result, new_inter);
2370                }),
2371            );
2372            return Some(result);
2373        }
2374
2375        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2376            return Some(NodeId::BOT);
2377        }
2378
2379        if left.kind(self) == Kind::Compl
2380            && right.kind(self) == Kind::Compl {
2381                let bodies = self.mk_union(left.left(self), right.left(self));
2382                return Some(self.mk_compl(bodies));
2383            }
2384
2385        if left == NodeId::TOPPLUS {
2386            if right.is_pred_star(self).is_some() {
2387                let newloop = self.mk_plus(right.left(self));
2388                return Some(newloop);
2389            }
2390            if right.is_never_nullable(self) {
2391                return Some(right);
2392            }
2393            if right.is_kind(self, Kind::Lookahead)
2394                && self.get_lookahead_tail(right).is_missing() {
2395                    return Some(NodeId::BOT);
2396                }
2397            if right.is_kind(self, Kind::Concat) {}
2398        }
2399
2400        // hoist lookahead out of intersection:
2401        //   L & (?=X)T  → (?=X)(L & T)                  [bare Lookahead]
2402        //   L & ((?=X)·body) → (?=X)(L & body)           [Concat with leading Lookahead]
2403        {
2404            let l_is_la = left.is_lookahead(self);
2405            let r_is_la = right.is_lookahead(self);
2406            let l_is_cla = !l_is_la
2407                && self.get_kind(left) == Kind::Concat
2408                && self.get_kind(left.left(self)) == Kind::Lookahead;
2409            let r_is_cla = !r_is_la
2410                && self.get_kind(right) == Kind::Concat
2411                && self.get_kind(right.left(self)) == Kind::Lookahead;
2412            if l_is_la || r_is_la || l_is_cla || r_is_cla {
2413                let (la_node, other, concat_body) = if r_is_la {
2414                    (right, left, NodeId::MISSING)
2415                } else if l_is_la {
2416                    (left, right, NodeId::MISSING)
2417                } else if r_is_cla {
2418                    (right.left(self), left, right.right(self))
2419                } else {
2420                    (left.left(self), right, left.right(self))
2421                };
2422                let la_body = la_node.left(self);
2423                let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2424                let inter_right = if concat_body.is_missing() {
2425                    la_tail
2426                } else {
2427                    self.mk_concat(la_tail, concat_body)
2428                };
2429                let new_body = self.mk_inter(other, inter_right);
2430                let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2431                return Some(self.mk_concat(la, new_body));
2432            }
2433        }
2434
2435        if self.get_kind(right) == Kind::Compl {
2436            let compl_body = right.left(self);
2437            if left == compl_body {
2438                return Some(NodeId::BOT);
2439            }
2440            if self.get_kind(compl_body) == Kind::Concat {
2441                let compl_head = compl_body.left(self);
2442                // L & ~(R·_*) = BOT when L ⊂ R·_*
2443                if compl_body.right(self) == NodeId::TS
2444                    && compl_head == left {
2445                        return Some(NodeId::BOT);
2446                    }
2447            }
2448        }
2449
2450        if let Some(pleft) = left.is_pred_star(self) {
2451            if let Some(pright) = right.is_pred_star(self) {
2452                let merged = self.mk_inter(pleft, pright);
2453                return Some(self.mk_star(merged));
2454            }
2455        }
2456
2457        // factor lookbehinds out of intersections
2458        // Concat(LB1, body1) & Concat(LB2, body2) → Concat(LB(inner1&inner2), body1&body2)
2459        // Concat(LB, body1) & body2 → Concat(LB, body1&body2)
2460        {
2461            let l_is_clb = self.get_kind(left) == Kind::Concat
2462                && self.get_kind(left.left(self)) == Kind::Lookbehind;
2463            let r_is_clb = self.get_kind(right) == Kind::Concat
2464                && self.get_kind(right.left(self)) == Kind::Lookbehind;
2465            if l_is_clb || r_is_clb {
2466                let (lb, body) = if l_is_clb && r_is_clb {
2467                    let lb1 = left.left(self);
2468                    let lb2 = right.left(self);
2469                    let inner = self.mk_inter(
2470                        self.get_lookbehind_inner(lb1),
2471                        self.get_lookbehind_inner(lb2),
2472                    );
2473                    // lb_prev is MISSING - cannot trigger UnsupportedPattern
2474                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2475                    let body = self.mk_inter(left.right(self), right.right(self));
2476                    (lb, body)
2477                } else if l_is_clb {
2478                    let lb = left.left(self);
2479                    let body = self.mk_inter(left.right(self), right);
2480                    (lb, body)
2481                } else {
2482                    let lb = right.left(self);
2483                    let body = self.mk_inter(left, right.right(self));
2484                    (lb, body)
2485                };
2486                return Some(self.mk_concat(lb, body));
2487            }
2488        }
2489
2490        // factor trailing lookaheads out of intersections
2491        // body1·LA & body2 → (body1&body2)·LA
2492        // body1·LA1 & body2·LA2 → (body1&body2)·LA(inner1&inner2)
2493        {
2494            let l_has_la = self.has_trailing_la(left);
2495            let r_has_la = self.has_trailing_la(right);
2496            if l_has_la || r_has_la {
2497                let (body, la) = if l_has_la && r_has_la {
2498                    let (lbody, l_la) = self.strip_trailing_la(left);
2499                    let (rbody, r_la) = self.strip_trailing_la(right);
2500                    let inner = self.mk_inter(
2501                        self.get_lookahead_inner(l_la),
2502                        self.get_lookahead_inner(r_la),
2503                    );
2504                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2505                    let body = self.mk_inter(lbody, rbody);
2506                    (body, la)
2507                } else if l_has_la {
2508                    let (lbody, la) = self.strip_trailing_la(left);
2509                    let body = self.mk_inter(lbody, right);
2510                    (body, la)
2511                } else {
2512                    let (rbody, la) = self.strip_trailing_la(right);
2513                    let body = self.mk_inter(left, rbody);
2514                    (body, la)
2515                };
2516                return Some(self.mk_concat(body, la));
2517            }
2518        }
2519
2520        None
2521    }
2522
2523    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2524        if cfg!(feature = "norewrite") {
2525            return None;
2526        }
2527        debug_assert!(self.get_kind(right_union) == Kind::Union);
2528
2529        let mut rewritten = None;
2530        right_union.iter_union_while(
2531            self,
2532            &mut (|b, v| {
2533                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2534                    rewritten = Some((v, rw));
2535                    false
2536                } else {
2537                    true
2538                }
2539            }),
2540        );
2541
2542        if let Some(rw) = rewritten {
2543            let mut new_union = NodeId::BOT;
2544            right_union.iter_union(
2545                self,
2546                &mut (|b, v| {
2547                    if v == rw.0 {
2548                        new_union = b.mk_union(rw.1, new_union)
2549                    } else {
2550                        new_union = b.mk_union(v, new_union)
2551                    }
2552                }),
2553            );
2554            return Some(new_union);
2555        };
2556
2557        None
2558    }
2559
2560    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2561        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2562        debug_assert!(tail != NodeId::MISSING);
2563        let key = NodeKey {
2564            kind: Kind::Concat,
2565            left: head,
2566            right: tail,
2567            extra: u32::MAX,
2568        };
2569        if let Some(id) = self.key_is_created(&key) {
2570            return *id;
2571        }
2572
2573        if head == NodeId::BOT || tail == NodeId::BOT {
2574            return NodeId::BOT;
2575        }
2576        if head == NodeId::EPS {
2577            return tail;
2578        }
2579        if tail == NodeId::EPS {
2580            return head;
2581        }
2582
2583        match tail {
2584            // this is only valid if direction known;
2585            NodeId::BEGIN => {
2586                if !self.is_nullable(head, Nullability::BEGIN) {
2587                    return NodeId::BOT;
2588                } else {
2589                    return NodeId::BEGIN;
2590                }
2591            }
2592            _ => {}
2593        }
2594
2595        // normalize concats to right
2596        if head.is_kind(self, Kind::Concat) {
2597            let left = head.left(self);
2598            let newright = self.mk_concat(head.right(self), tail);
2599            let updated = self.mk_concat(left, newright);
2600            return self.init_as(key, updated);
2601        }
2602
2603        if head == NodeId::TS && tail == NodeId::END {
2604            return NodeId::TS;
2605        }
2606
2607        if self.get_kind(head) == Kind::End
2608            && !self.is_nullable(tail, Nullability::END) {
2609                return NodeId::BOT;
2610            }
2611
2612        if self.get_kind(tail) == Kind::Concat {
2613            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2614                let upd = self.mk_concat(rw, tail.right(self));
2615                return self.init_as(key, upd);
2616            }
2617        }
2618
2619        if let Some(new) = self.attempt_rw_concat_2(head, tail) { return self.init_as(key, new) }
2620
2621        match (self.get_kind(head), self.get_kind(tail)) {
2622            // merge stars
2623            (Kind::Star, Kind::Concat) if head.is_star(self) => {
2624                let rl = tail.left(self);
2625                if head == rl {
2626                    return self.init_as(key, tail);
2627                }
2628            }
2629            // attempt longer concat rw
2630            (_, Kind::Concat) => {
2631                let curr = head;
2632                let h2 = self.mk_concat(curr, tail.left(self));
2633                let tr = tail.right(self);
2634                if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2635                    return self.init_as(key, new);
2636                }
2637            }
2638            _ if head == NodeId::TS
2639                && self.starts_with_ts(tail) => {
2640                    return self.init_as(key, tail);
2641                }
2642            _ => {}
2643        }
2644
2645        self.init(key)
2646    }
2647
2648    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2649        // LNF: lookbehind must start with ts
2650        let lb_body = {
2651            match self.starts_with_ts(lb_body) {
2652                true => lb_body,
2653                false => self.mk_concat(NodeId::TS, lb_body),
2654            }
2655        };
2656        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2657        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2658    }
2659
2660    fn mk_lookbehind_internal(
2661        &mut self,
2662        lb_body: NodeId,
2663        lb_prev: NodeId,
2664    ) -> Result<NodeId, AlgebraError> {
2665        debug_assert!(lb_body != NodeId::MISSING);
2666        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2667        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2668            return Ok(NodeId::BOT);
2669        }
2670        if lb_body == NodeId::TS {
2671            return Ok(lb_prev);
2672        }
2673        if lb_body == NodeId::EPS {
2674            match lb_prev {
2675                NodeId::MISSING => return Ok(NodeId::EPS),
2676                NodeId::EPS => return Ok(NodeId::EPS),
2677                _ => return Err(AlgebraError::UnsupportedPattern),
2678            }
2679        }
2680
2681        let key = NodeKey {
2682            kind: Kind::Lookbehind,
2683            left: lb_body,
2684            right: lb_prev,
2685            extra: u32::MAX,
2686        };
2687        match self.key_is_created(&key) {
2688            Some(id) => Ok(*id),
2689            None => {
2690                if lb_prev == NodeId::TS {
2691                    return Ok(self.mk_concat(lb_prev, lb_body));
2692                }
2693
2694                Ok(self.init(key))
2695            }
2696        }
2697    }
2698
2699    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2700        // LNF: lookahead must end with ts
2701        let la_body = {
2702            match self.ends_with_ts(la_body) {
2703                true => la_body,
2704                false => self.mk_concat(la_body, NodeId::TS),
2705            }
2706        };
2707        let rel = if NodeId::MISSING == la_tail {
2708            rel
2709        } else {
2710            match la_tail.is_center_nullable(self) {
2711                false => u32::MAX,
2712                true => rel,
2713            }
2714        };
2715
2716        self.mk_lookahead_internal(la_body, la_tail, rel)
2717    }
2718
2719    // rel max = carries no nullability, can potentially rw to intersection
2720    pub fn mk_lookahead_internal(
2721        &mut self,
2722        la_body: NodeId,
2723        la_tail: NodeId,
2724        rel: u32,
2725    ) -> NodeId {
2726        let key = NodeKey {
2727            kind: Kind::Lookahead,
2728            left: la_body,
2729            right: la_tail,
2730            extra: rel,
2731        };
2732        if let Some(id) = self.key_is_created(&key) {
2733            return *id;
2734        }
2735        if la_body == NodeId::TS {
2736            if rel == 0 {
2737                return la_tail.missing_to_eps();
2738            } else {
2739                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2740            }
2741        }
2742        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2743            return NodeId::BOT;
2744        }
2745        if la_tail.is_missing() && rel == u32::MAX {
2746            return NodeId::BOT;
2747        }
2748
2749        if la_body == NodeId::EPS && la_tail.is_missing()
2750            && rel == 0 {
2751                return la_body;
2752            }
2753
2754        if la_tail == NodeId::TS {
2755            if rel == 0 || rel == u32::MAX {
2756                return self.mk_concat(la_body, NodeId::TS);
2757            } else if rel == u32::MAX {
2758                return self.mk_begins_with(la_body);
2759            }
2760        }
2761
2762        if rel == u32::MAX {
2763            if la_tail.is_missing() {
2764                return NodeId::BOT;
2765            }
2766
2767            if self.is_always_nullable(la_body) {
2768                return la_tail.missing_to_eps();
2769            }
2770
2771            if la_tail != NodeId::MISSING {
2772                match self.get_kind(la_tail) {
2773                    _ => {
2774                        if la_body.is_compl_plus_end(self) {
2775                            let minlen = self.get_min_length_only(la_tail);
2776                            if minlen >= 1 {
2777                                return NodeId::BOT;
2778                            }
2779                        }
2780                    }
2781                }
2782            }
2783        }
2784
2785        // flatten consecutive lookaheads: (?=A, tail=(?=B, tail=C)) → (?=A._* & B._*, tail=C)
2786        if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2787            let la_body2 = self.get_lookahead_inner(la_tail);
2788            let body1_ts = self.mk_concat(la_body, NodeId::TS);
2789            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2790            let new_la_body = self.mk_inter(body1_ts, body2_ts);
2791            let new_la_rel = self.get_lookahead_rel(la_tail);
2792            let new_la_tail = self.get_lookahead_tail(la_tail);
2793            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2794        }
2795
2796        // _*\z -> too expensive to keep; null it
2797        if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2798            let la_body_right = la_body.right(self);
2799            if self.is_always_nullable(la_body_right) {
2800                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2801            }
2802            if la_body.right(self) == NodeId::END {
2803                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2804            }
2805            let bodyright = la_body.right(self);
2806            if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2807                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2808                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2809            }
2810        }
2811
2812        if la_tail != NodeId::MISSING {
2813            if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2814                let lpred = la_body.left(self);
2815                if self.get_kind(lpred) == Kind::Pred {
2816                    let l = lpred.pred_tset(self);
2817                    let r = la_tail.pred_tset(self);
2818                    let psi_and = self.solver().and_id(l, r);
2819                    let rewrite = self.mk_pred(psi_and);
2820                    let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2821                    let new_right = self.mk_lookahead_internal(
2822                        la_body.right(self),
2823                        NodeId::MISSING,
2824                        new_rel,
2825                    );
2826                    return self.mk_concat(rewrite, new_right);
2827                }
2828            }
2829        }
2830
2831        self.get_node_id(key)
2832    }
2833
2834    pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2835        let has_match = (packed >> 16) > 0;
2836        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2837            return NodeId::BOT;
2838        }
2839        debug_assert!(
2840            body == NodeId::BOT || !self.is_infinite(body),
2841            "Counted body must have finite max length"
2842        );
2843        let chain = self.prune_counted_chain(body, chain);
2844        let key = NodeKey {
2845            kind: Kind::Counted,
2846            left: body,
2847            right: chain,
2848            extra: packed,
2849        };
2850        if let Some(id) = self.key_is_created(&key) {
2851            return *id;
2852        }
2853        self.get_node_id(key)
2854    }
2855
2856    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2857        if chain == NodeId::MISSING || body == NodeId::BOT {
2858            return chain;
2859        }
2860        // nullable body → body·_* = _*, all chain candidates subsumed
2861        if self.nullability(body) != Nullability::NEVER {
2862            return NodeId::MISSING;
2863        }
2864        let chain_body = chain.left(self);
2865        if chain_body == NodeId::BOT {
2866            return chain; // pending match, keep
2867        }
2868        let not_begins = self.mk_not_begins_with(body);
2869        let inter = self.mk_inter(chain_body, not_begins);
2870        let is_empty = inter == NodeId::BOT;
2871        if is_empty {
2872            self.prune_counted_chain(body, chain.right(self))
2873        } else {
2874            chain
2875        }
2876    }
2877
2878    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2879        let neg_inner = self.mk_concat(body, NodeId::TS);
2880        let neg_part = self.mk_compl(neg_inner);
2881        let conc = self.mk_concat(neg_part, NodeId::END);
2882        self.mk_lookahead(conc, NodeId::MISSING, rel)
2883    }
2884
2885    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2886        match self.get_node(body).kind {
2887            Kind::Pred => {
2888                let psi = body.pred_tset(self);
2889                let negated = self.mk_pred_not(psi);
2890                let union = self.mk_union(NodeId::BEGIN, negated);
2891                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2892                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2893            }
2894            _ => {
2895                let neg_inner = self.mk_concat(NodeId::TS, body);
2896                let neg_part = self.mk_compl(neg_inner);
2897                let conc = self.mk_concat(NodeId::BEGIN, neg_part);
2898                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2899                self.mk_lookbehind_internal(conc, NodeId::MISSING).unwrap()
2900            }
2901        }
2902    }
2903
2904    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2905        debug_assert!(left != NodeId::MISSING);
2906        debug_assert!(right != NodeId::MISSING);
2907        if left > right {
2908            return self.mk_union(right, left);
2909        }
2910        let key = NodeKey {
2911            kind: Kind::Union,
2912            left,
2913            right,
2914            extra: u32::MAX,
2915        };
2916        if let Some(id) = self.key_is_created(&key) {
2917            return *id;
2918        }
2919
2920        if left == right {
2921            return left;
2922        }
2923        if left == NodeId::BOT {
2924            return right;
2925        }
2926        if right == NodeId::BOT {
2927            return left;
2928        }
2929        if right == NodeId::TS {
2930            return right;
2931        }
2932        if left == NodeId::TS {
2933            return left;
2934        }
2935
2936        match (self.get_kind(left), self.get_kind(right)) {
2937            (Kind::Union, _) => {
2938                self.iter_unions_b(left, &mut |b, v| {
2939                    b.temp_vec.push(v);
2940                });
2941                self.iter_unions_b(right, &mut |b, v| {
2942                    b.temp_vec.push(v);
2943                });
2944                self.temp_vec.sort();
2945                let tree = self.temp_vec.clone();
2946                self.temp_vec.clear();
2947                let newnode = tree
2948                    .iter()
2949                    .rev()
2950                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2951                return self.init_as(key, newnode);
2952            }
2953            (_, Kind::Union) => {
2954                let rleft = right.left(self);
2955                // if left_node id is smaller than rleft, just create a new union
2956                if left > rleft {
2957                    self.iter_unions_b(left, &mut |b, v| {
2958                        b.temp_vec.push(v);
2959                    });
2960                    self.iter_unions_b(right, &mut |b, v| {
2961                        b.temp_vec.push(v);
2962                    });
2963                    self.temp_vec.sort();
2964                    let tree = self.temp_vec.clone();
2965                    self.temp_vec.clear();
2966                    let newnode = tree
2967                        .iter()
2968                        .rev()
2969                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2970                    return self.init_as(key, newnode);
2971                } else {
2972                    if let Some(rw) = self.attempt_rw_unions(left, right) {
2973                        return self.init_as(key, rw);
2974                    }
2975                }
2976            }
2977            _ => {}
2978        }
2979
2980        if let Some(rw) = self.attempt_rw_union_2(left, right) {
2981            return self.init_as(key, rw);
2982        }
2983        self.init(key)
2984    }
2985
2986    pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
2987        debug_assert!(left_id != NodeId::MISSING);
2988        debug_assert!(right_id != NodeId::MISSING);
2989        if left_id == right_id {
2990            return left_id;
2991        }
2992        if left_id == NodeId::BOT || right_id == NodeId::BOT {
2993            return NodeId::BOT;
2994        }
2995        if left_id == NodeId::TS {
2996            return right_id;
2997        }
2998        if right_id == NodeId::TS {
2999            return left_id;
3000        }
3001        if left_id > right_id {
3002            return self.mk_inter(right_id, left_id);
3003        }
3004        let key = NodeKey {
3005            kind: Kind::Inter,
3006            left: left_id,
3007            right: right_id,
3008            extra: u32::MAX,
3009        };
3010        if let Some(id) = self.key_is_created(&key) {
3011            return *id;
3012        }
3013
3014        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3015            return self.init_as(key, rw);
3016        }
3017
3018        self.init(key)
3019    }
3020
3021    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3022        let node = NodeKey {
3023            kind,
3024            left: NodeId::MISSING,
3025            right: NodeId::MISSING,
3026            extra: u32::MAX,
3027        };
3028        self.init(node)
3029    }
3030
3031    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3032        let star = self.mk_star(body_id);
3033        self.mk_concat(body_id, star)
3034    }
3035    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3036        let opt = self.mk_opt(body_id);
3037        let mut nodes1 = vec![];
3038        for _ in lower..upper {
3039            nodes1.push(opt);
3040        }
3041        for _ in 0..lower {
3042            nodes1.push(body_id);
3043        }
3044        self.mk_concats(nodes1.into_iter())
3045    }
3046    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3047        self.mk_union(NodeId::EPS, body_id)
3048    }
3049
3050    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3051        let key = NodeKey {
3052            kind: Kind::Star,
3053            left: body_id,
3054            right: NodeId::MISSING,
3055            extra: 0,
3056        };
3057        if let Some(id) = self.key_is_created(&key) {
3058            return *id;
3059        }
3060        // _*{500} is still _*
3061        if body_id.is_kind(self, Kind::Star) {
3062            return body_id;
3063        }
3064        self.get_node_id(key)
3065    }
3066
3067    /// it's cheaper to check this once as an edge-case
3068    /// than to compute a 4th nullability bit for every node
3069    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3070        match self.get_kind(node_id) {
3071            Kind::End => Nullability::EMPTYSTRING,
3072            Kind::Begin => Nullability::EMPTYSTRING,
3073            Kind::Pred => Nullability::NEVER,
3074            Kind::Star => Nullability::ALWAYS,
3075            Kind::Inter | Kind::Concat => {
3076                let lnull = self.nullability_emptystring(node_id.left(self));
3077                let rnull = self.nullability_emptystring(node_id.right(self));
3078                lnull.and(rnull) // left = 010, right = 001, left & right = 000
3079            }
3080            Kind::Union => {
3081                let lnull = self.nullability_emptystring(node_id.left(self));
3082                let rnull = self.nullability_emptystring(node_id.right(self));
3083                lnull.or(rnull)
3084            }
3085            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3086            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3087            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3088            Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3089        }
3090    }
3091
3092    #[inline(always)]
3093    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3094        self.get_meta(node_id)
3095            .flags
3096            .nullability()
3097            .has(Nullability::CENTER.or(Nullability::END))
3098    }
3099
3100    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3101        self.get_only_nullability(node_id)
3102    }
3103
3104    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3105        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3106    }
3107
3108    pub fn pp(&self, node_id: NodeId) -> String {
3109        let mut s = String::new();
3110        self.ppw(&mut s, node_id).unwrap();
3111        s
3112    }
3113
3114    #[allow(dead_code)]
3115    pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
3116        let nu = self.get_nulls_id(node_id);
3117        let nr = self.mb.nb.get_set_ref(nu);
3118        let s1 = format!("{:?}", nr);
3119        s1
3120    }
3121
3122    #[allow(dead_code)]
3123    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3124        match self.get_tregex(term_id) {
3125            TRegex::Leaf(node_id) => {
3126                let mut s = String::new();
3127                self.ppw(&mut s, *node_id).unwrap();
3128                s
3129            }
3130            TRegex::ITE(cond, then_id, else_id) => {
3131                format!(
3132                    "ITE({},{},{})",
3133                    self.solver_ref().pp(*cond),
3134                    self.ppt(*then_id),
3135                    self.ppt(*else_id)
3136                )
3137            }
3138        }
3139    }
3140
3141    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3142        if cfg!(feature = "graphviz") {
3143            match node_id {
3144                NodeId::MISSING => return write!(s, "MISSING"),
3145                NodeId::BOT => return write!(s, "⊥"),
3146                NodeId::TS => return write!(s, "_*"),
3147                NodeId::TOP => return write!(s, "_"),
3148                NodeId::EPS => return write!(s, "ε"),
3149                _ => {}
3150            }
3151        }
3152
3153        match node_id {
3154            NodeId::MISSING => return write!(s, "MISSING"),
3155            NodeId::BOT => return write!(s, "⊥"),
3156            NodeId::TS => return write!(s, "_*"),
3157            NodeId::TOP => return write!(s, "_"),
3158            NodeId::EPS => return write!(s, ""),
3159            _ => {}
3160        }
3161
3162        match self.get_kind(node_id) {
3163            Kind::End => write!(s, r"\z"),
3164            Kind::Begin => write!(s, r"\A"),
3165            Kind::Pred => {
3166                let psi = node_id.pred_tset(self);
3167                if psi == TSetId::EMPTY {
3168                    write!(s, r"⊥")
3169                } else if psi == TSetId::FULL {
3170                    write!(s, r"_")
3171                } else {
3172                    write!(s, "{}", self.solver_ref().pp(psi))
3173                }
3174            }
3175            Kind::Inter => {
3176                write!(s, "(")?;
3177                self.ppw(s, node_id.left(self))?;
3178                write!(s, "&")?;
3179                let mut curr = node_id.right(self);
3180                while self.get_kind(curr) == Kind::Inter {
3181                    let n = curr.left(self);
3182                    self.ppw(s, n)?;
3183                    write!(s, "&")?;
3184                    curr = curr.right(self);
3185                }
3186                self.ppw(s, curr)?;
3187                write!(s, ")")
3188            }
3189            Kind::Union => {
3190                let left = node_id.left(self);
3191                let right = node_id.right(self);
3192                write!(s, "(")?;
3193                self.ppw(s, left)?;
3194                write!(s, "|")?;
3195                let mut curr = right;
3196                while self.get_kind(curr) == Kind::Union {
3197                    let n = curr.left(self);
3198                    self.ppw(s, n)?;
3199                    write!(s, "|")?;
3200                    curr = curr.right(self);
3201                }
3202                self.ppw(s, curr)?;
3203                write!(s, ")")
3204            }
3205            Kind::Concat => {
3206                let left = node_id.left(self);
3207                let right = node_id.right(self);
3208                if right.is_star(self) && right.left(self) == left {
3209                    self.ppw(s, left)?;
3210                    write!(s, "+")?;
3211                    return Ok(());
3212                }
3213                if right.is_concat(self) {
3214                    let rl = right.left(self);
3215                    if rl.is_star(self) && rl.left(self) == left {
3216                        self.ppw(s, left)?;
3217                        write!(s, "+")?;
3218                        return self.ppw(s, right.right(self));
3219                    }
3220                }
3221                if right.is_concat(self) && right.left(self) == left {
3222                    let mut num = 1;
3223                    let mut right = right;
3224                    while right.is_concat(self) && right.left(self) == left {
3225                        num += 1;
3226                        right = right.right(self);
3227                    }
3228                    // (|X){n} followed by X{m} -> X{m,m+n}
3229                    if let Some(inner) = left.is_opt_v(self) {
3230                        let mut inner_count = 0;
3231                        let mut right2 = right;
3232                        while right2.is_concat(self) && right2.left(self) == inner {
3233                            inner_count += 1;
3234                            right2 = right2.right(self);
3235                        }
3236                        if right2 == inner {
3237                            inner_count += 1;
3238                            self.ppw(s, inner)?;
3239                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3240                        }
3241                        if inner_count > 0 {
3242                            self.ppw(s, inner)?;
3243                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3244                            return self.ppw(s, right2);
3245                        }
3246                    }
3247                    self.ppw(s, left)?;
3248                    if right == left {
3249                        num += 1;
3250                        return write!(s, "{{{}}}", num);
3251                    }
3252                    if num <= 3 && left.is_pred(self) {
3253                        for _ in 1..num {
3254                            self.ppw(s, left)?;
3255                        }
3256                        return self.ppw(s, right);
3257                    } else {
3258                        write!(s, "{{{}}}", num)?;
3259                        return self.ppw(s, right);
3260                    }
3261                }
3262                self.ppw(s, left)?;
3263                self.ppw(s, right)
3264            }
3265            Kind::Star => {
3266                let left = node_id.left(self);
3267                let leftkind = self.get_kind(left);
3268                match leftkind {
3269                    Kind::Concat | Kind::Star | Kind::Compl => {
3270                        write!(s, "(")?;
3271                        self.ppw(s, left)?;
3272                        write!(s, ")")?;
3273                    }
3274                    _ => {
3275                        self.ppw(s, left)?;
3276                    }
3277                };
3278                write!(s, "*")
3279            }
3280            Kind::Compl => {
3281                write!(s, "~(")?;
3282                self.ppw(s, node_id.left(self))?;
3283                write!(s, ")")
3284            }
3285            Kind::Lookbehind => {
3286                let lbleft = self.get_lookbehind_prev(node_id);
3287                let lbinner = self.get_lookbehind_inner(node_id);
3288                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3289                if lbleft != NodeId::MISSING {
3290                    write!(s, "「")?;
3291                    self.ppw(s, lbleft)?;
3292                    write!(s, "」")?;
3293                }
3294
3295                write!(s, "(?<=")?;
3296                self.ppw(s, lbinner)?;
3297                write!(s, ")")
3298            }
3299            Kind::Lookahead => {
3300                let inner = self.get_lookahead_inner(node_id);
3301                write!(s, "(?=")?;
3302                self.ppw(s, inner)?;
3303                write!(s, ")")?;
3304                if self.get_lookahead_rel(node_id) != 0 {
3305                    write!(s, "{{")?;
3306                    let rel = self.get_lookahead_rel(node_id);
3307                    if rel == u32::MAX {
3308                        write!(s, "∅")?;
3309                    } else {
3310                        write!(s, "{}", rel)?;
3311                    }
3312                    write!(s, "}}")?;
3313                }
3314                if node_id.right(self) == NodeId::MISSING {
3315                    Ok(())
3316                } else {
3317                    write!(s, "『")?;
3318                    self.ppw(s, node_id.right(self))?;
3319                    write!(s, "』")
3320                }
3321            }
3322            Kind::Counted => {
3323                let body = node_id.left(self);
3324                let packed = self.get_extra(node_id);
3325                let step = packed & 0xFFFF;
3326                let best = packed >> 16;
3327                write!(s, "#(")?;
3328                self.ppw(s, body)?;
3329                write!(s, ")s{}b{}", step, best)
3330            }
3331        }
3332    }
3333
3334    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3335        self.mk_concat(node, NodeId::TS)
3336    }
3337
3338    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3339        let node_ts = self.mk_concat(node, NodeId::TS);
3340        self.mk_compl(node_ts)
3341    }
3342
3343
3344    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3345        let notset = self.solver().not_id(set);
3346        self.mk_pred(notset)
3347    }
3348
3349    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3350        let set_id = self.solver().u8_to_set_id(char);
3351        self.mk_pred(set_id)
3352    }
3353
3354    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3355        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3356        self.mk_pred(rangeset)
3357    }
3358
3359    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3360        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3361        for &(lo, hi) in &ranges[1..] {
3362            let r = self.mk_range_u8(lo, hi);
3363            node = self.mk_union(node, r);
3364        }
3365        node
3366    }
3367
3368    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3369        let mut prefix = Vec::new();
3370        let mut curr = node;
3371        loop {
3372            if curr == NodeId::EPS {
3373                let full = !prefix.is_empty();
3374                return (prefix, full);
3375            }
3376            if curr == NodeId::BOT {
3377                break;
3378            }
3379            if self.get_kind(curr) == Kind::Pred {
3380                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3381                    Some(byte) => {
3382                        prefix.push(byte);
3383                        return (prefix, true);
3384                    }
3385                    None => break, // multi-byte pred: pattern not fully consumed
3386                }
3387            }
3388            if self.get_kind(curr) != Kind::Concat {
3389                break;
3390            }
3391            let left = curr.left(self);
3392            if self.get_kind(left) != Kind::Pred {
3393                break;
3394            }
3395            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3396                Some(byte) => prefix.push(byte),
3397                None => break,
3398            }
3399            curr = curr.right(self);
3400        }
3401        (prefix, false)
3402    }
3403
3404    #[allow(dead_code)]
3405    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3406        let mut result = NodeId::EPS;
3407        for byte in raw_str.iter().rev() {
3408            let node = self.mk_u8(*byte);
3409            result = self.mk_concat(node, result);
3410        }
3411        result
3412    }
3413
3414    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3415        let mut result = NodeId::EPS;
3416        for byte in raw_str.bytes().rev() {
3417            let node = self.mk_u8(byte);
3418            result = self.mk_concat(node, result);
3419        }
3420        result
3421    }
3422
3423    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3424        let mut sorted: Vec<NodeId> = nodes.collect();
3425        if sorted.len() <= 1 {
3426            return sorted.pop().unwrap_or(NodeId::BOT);
3427        }
3428        sorted.sort();
3429        sorted.dedup();
3430        sorted.retain(|&x| x != NodeId::BOT);
3431        if sorted.is_empty() {
3432            return NodeId::BOT;
3433        }
3434        if sorted.len() > 16 {
3435            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3436            let mut non_concat: Vec<NodeId> = Vec::new();
3437            for &n in &sorted {
3438                if self.get_kind(n) == Kind::Concat {
3439                    by_head.entry(self.get_left(n)).or_default().push(n);
3440                } else {
3441                    non_concat.push(n);
3442                }
3443            }
3444            // absorb non-concat nodes whose id matches a by_head key:
3445            // x | x·t1 | x·t2 → x·(ε|t1|t2)
3446            // without this, factoring reproduces identical groups and
3447            // recurses infinitely.
3448            let mut absorbed: Vec<NodeId> = Vec::new();
3449            for &n in &non_concat {
3450                if by_head.contains_key(&n) {
3451                    absorbed.push(n);
3452                }
3453            }
3454            if !absorbed.is_empty() {
3455                non_concat.retain(|n| !absorbed.contains(n));
3456            }
3457            if by_head.len() < sorted.len() {
3458                let mut groups: Vec<NodeId> = non_concat;
3459                for (head, tails) in by_head {
3460                    let mut tail_nodes: Vec<NodeId> =
3461                        tails.iter().map(|&n| self.get_right(n)).collect();
3462                    if absorbed.contains(&head) {
3463                        tail_nodes.push(NodeId::EPS);
3464                    }
3465                    let tail_union = self.mk_unions(tail_nodes.into_iter());
3466                    let factored = self.mk_concat(head, tail_union);
3467                    groups.push(factored);
3468                }
3469                groups.sort();
3470                groups.dedup();
3471                return self.mk_unions_balanced(&groups);
3472            }
3473        }
3474        self.mk_unions_balanced(&sorted)
3475    }
3476
3477    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3478        match nodes.len() {
3479            0 => NodeId::BOT,
3480            1 => nodes[0],
3481            n => {
3482                let mid = n / 2;
3483                let left = self.mk_unions_balanced(&nodes[..mid]);
3484                let right = self.mk_unions_balanced(&nodes[mid..]);
3485                self.mk_union(left, right)
3486            }
3487        }
3488    }
3489
3490    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3491        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3492    }
3493
3494    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3495        nodes
3496            .rev()
3497            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3498    }
3499}
3500
3501impl RegexBuilder {
3502    #[allow(dead_code)]
3503    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3504        match self.get_tregex(term_id).clone() {
3505            TRegex::Leaf(node_id) => {
3506                if NodeId::BOT == node_id {
3507                    vec![]
3508                } else {
3509                    vec![node_id]
3510                }
3511            }
3512            TRegex::ITE(_, then_id, else_id) => {
3513                let mut then_nodes = self.extract_sat(then_id);
3514                let mut else_nodes = self.extract_sat(else_id);
3515                then_nodes.append(&mut else_nodes);
3516                then_nodes
3517            }
3518        }
3519    }
3520
3521    pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3522        debug_assert!(self.get_kind(start) == Kind::Union);
3523        let mut curr = start;
3524        while self.get_kind(curr) == Kind::Union {
3525            f(curr.left(self));
3526            curr = curr.right(self);
3527        }
3528        f(curr);
3529    }
3530
3531    pub(crate) fn iter_unions_b(
3532        &mut self,
3533        curr: NodeId,
3534        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3535    ) {
3536        let mut curr = curr;
3537        while self.get_kind(curr) == Kind::Union {
3538            f(self, curr.left(self));
3539            curr = curr.right(self);
3540        }
3541        f(self, curr);
3542    }
3543
3544    pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3545        if !self.contains_look(node_id) {
3546            return Some(node_id);
3547        }
3548        match self.get_kind(node_id) {
3549            Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3550            Kind::Concat => {
3551                let left = node_id.left(self);
3552                let right = node_id.right(self);
3553                let elim_l = self.try_elim_lookarounds(left)?;
3554                let elim_r = self.try_elim_lookarounds(right)?;
3555                let rw = self.mk_concat(elim_l, elim_r);
3556                Some(rw)
3557            }
3558            Kind::Union => {
3559                let left = node_id.left(self);
3560                let right = node_id.right(self);
3561                let elim_l = self.try_elim_lookarounds(left)?;
3562                let elim_r = self.try_elim_lookarounds(right)?;
3563                let rw = self.mk_union(elim_l, elim_r);
3564                Some(rw)
3565            }
3566
3567            Kind::Star => {
3568                let body = node_id.left(self);
3569                let elim_l = self.try_elim_lookarounds(body)?;
3570                Some(self.mk_star(elim_l))
3571            }
3572            Kind::Compl => {
3573                let left = node_id.left(self);
3574                let elim_l = self.try_elim_lookarounds(left)?;
3575                Some(self.mk_compl(elim_l))
3576            }
3577            Kind::Lookahead => {
3578                let rel = self.get_lookahead_rel(node_id);
3579                if rel != 0 {
3580                    return None;
3581                }
3582                let lbody = self.get_lookahead_inner(node_id);
3583                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3584                let elim_l = self.try_elim_lookarounds(lbody)?;
3585                let elim_r = self.try_elim_lookarounds(ltail)?;
3586                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3587                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3588                let rw = self.mk_inter(lbody_ts, ltail_ts);
3589                Some(rw)
3590            }
3591            Kind::Lookbehind => {
3592                let linner = self.get_lookbehind_inner(node_id);
3593                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3594                let elim_l = self.try_elim_lookarounds(linner)?;
3595                let elim_r = self.try_elim_lookarounds(lprev)?;
3596                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3597                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3598                let rw = self.mk_inter(lbody_ts, ltail_ts);
3599                Some(rw)
3600            }
3601            Kind::Inter => {
3602                let left = node_id.left(self);
3603                let right = node_id.right(self);
3604                let elim_l = self.try_elim_lookarounds(left)?;
3605                let elim_r = self.try_elim_lookarounds(right)?;
3606                let rw = self.mk_inter(elim_l, elim_r);
3607                Some(rw)
3608            }
3609            Kind::Counted => None,
3610        }
3611    }
3612
3613    /// R & _+ is a safe overapproximation of R that's nonempty
3614    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3615        if self.nullability(node) == Nullability::NEVER {
3616            node
3617        } else {
3618            self.mk_inter(NodeId::TOPPLUS, node)
3619        }
3620    }
3621
3622    fn iter_find_stack(
3623        &self,
3624        stack: &mut Vec<TRegexId>,
3625        mut f: impl FnMut(NodeId) -> bool,
3626    ) -> bool {
3627        loop {
3628            match stack.pop() {
3629                None => return false,
3630                Some(curr) => match self.get_tregex(curr) {
3631                    TRegex::Leaf(n) => {
3632                        let mut curr = *n;
3633                        while curr != NodeId::BOT {
3634                            match self.get_kind(curr) {
3635                                Kind::Union => {
3636                                    if f(curr.left(self)) {
3637                                        return true;
3638                                    }
3639                                    curr = curr.right(self);
3640                                }
3641                                _ => {
3642                                    if f(*n) {
3643                                        return true;
3644                                    }
3645                                    curr = NodeId::BOT;
3646                                }
3647                            }
3648                        }
3649                    }
3650                    TRegex::ITE(_, then_id, else_id) => {
3651                        if *else_id != TRegexId::BOT {
3652                            stack.push(*else_id);
3653                        }
3654                        stack.push(*then_id);
3655                    }
3656                },
3657            }
3658        }
3659    }
3660
3661    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3662        if node == NodeId::BOT {
3663            return Some(true);
3664        }
3665        if self.nullability(node) != Nullability::NEVER {
3666            return Some(false);
3667        }
3668        if let Some(cached) = self.cache_empty.get(&node) {
3669            if cached.is_checked() {
3670                return Some(cached.is_empty());
3671            }
3672        }
3673        let node = if !self.contains_look(node) {
3674            node
3675        } else {
3676            self.try_elim_lookarounds(node)?
3677        };
3678        let isempty_flag = self.is_empty_lang_internal(node);
3679
3680        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3681    }
3682
3683    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3684        // without inter, no need to check
3685        if !self.get_meta_flags(initial_node).contains_inter() {
3686            return Ok(NodeFlags::ZERO);
3687        }
3688
3689        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3690        let mut todo: VecDeque<NodeId> = VecDeque::new();
3691        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3692        let mut stack = Vec::new();
3693        stack.push(begin_der);
3694        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3695            visited.insert(node, initial_node);
3696            let nullability = self.nullability(node);
3697            if nullability != Nullability::NEVER {
3698                true
3699            } else {
3700                todo.push_back(node);
3701                false
3702            }
3703        });
3704        if found_nullable_right_away {
3705            return Ok(NodeFlags::ZERO);
3706        }
3707
3708        todo.push_back(initial_node);
3709        let isempty_flag: NodeFlags;
3710        let mut found_node = NodeId::BOT;
3711
3712        loop {
3713            match todo.pop_front() {
3714                None => {
3715                    isempty_flag = NodeFlags::IS_EMPTY;
3716                    break;
3717                }
3718                Some(outer) => {
3719                    if let Some(cached) = self.cache_empty.get(&outer) {
3720                        if cached.is_checked() {
3721                            if cached.is_empty() {
3722                                continue;
3723                            } else {
3724                                return Ok(NodeFlags::ZERO);
3725                            }
3726                        }
3727                    }
3728
3729                    let derivative = self.der(outer, Nullability::CENTER)?;
3730
3731                    stack.push(derivative);
3732
3733                    let found_null = self.iter_find_stack(&mut stack, |node| {
3734                        if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3735                            found_node = node;
3736                            if !self.get_meta_flags(node).contains_inter() {
3737                                true
3738                            } else {
3739                                e.insert(outer);
3740                                todo.push_front(node);
3741                                self.any_nonbegin_nullable(node)
3742                            }
3743                        } else {
3744                            false
3745                        }
3746                    });
3747                    if found_null {
3748                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3749                        isempty_flag = NodeFlags::ZERO;
3750                        break;
3751                    }
3752                }
3753            }
3754        }
3755
3756        self.cache_empty.insert(
3757            initial_node,
3758            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3759        );
3760        Ok(isempty_flag)
3761    }
3762
3763    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
3764    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3765        if larger_lang == smaller_lang {
3766            return Some(true);
3767        }
3768
3769        // assess initial nullability
3770        if self
3771            .nullability(larger_lang)
3772            .not()
3773            .and(self.nullability(smaller_lang))
3774            != Nullability::NEVER
3775        {
3776            return Some(false);
3777        }
3778
3779        // check language nullability
3780        // if (B &~ A) ≡ ⊥ then B ⊆ A
3781        // this means  L(B) \ L(A) = {}
3782        // eg. (a &~ .*) = ⊥ means .* subsumes a
3783        let (smaller_lang, larger_lang) =
3784            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3785                let wrap = |b: &mut Self, n: NodeId| {
3786                    let tmp = b.mk_concat(n, NodeId::TS);
3787                    b.mk_concat(NodeId::TS, tmp)
3788                };
3789                (wrap(self, smaller_lang), wrap(self, larger_lang))
3790            } else {
3791                (smaller_lang, larger_lang)
3792            };
3793
3794        let nota = self.mk_compl(larger_lang);
3795        let diff = self.mk_inter(smaller_lang, nota);
3796        self.is_empty_lang(diff)
3797    }
3798}