Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9pub use unicode_classes::UnicodeClassCache;
10use solver::{Solver, TSetId};
11use rustc_hash::FxHashMap;
12use std::collections::{BTreeSet, VecDeque};
13use std::fmt::Debug;
14use std::fmt::Write;
15use std::hash::Hash;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23    AnchorLimit,
24    StateSpaceExplosion,
25    UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32            AlgebraError::StateSpaceExplosion => {
33                write!(f, "too many states, likely infinite state space")
34            }
35            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36        }
37    }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43    pub const MISSING: u32 = 0;
44    pub const BOT: u32 = 1;
45    pub const EPS: u32 = 2;
46    pub const TOP: u32 = 3;
47    pub const TOPSTAR: u32 = 4;
48    pub const TOPPLUS: u32 = 5;
49    pub const BEGIN: u32 = 6;
50    pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62    pub const MISSING: NodeId = NodeId(id::MISSING);
63    pub const BOT: NodeId = NodeId(id::BOT);
64    pub const EPS: NodeId = NodeId(id::EPS);
65    pub const TOP: NodeId = NodeId(id::TOP);
66    pub const TS: NodeId = NodeId(id::TOPSTAR);
67    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68    pub const BEGIN: NodeId = NodeId(id::BEGIN);
69    pub const END: NodeId = NodeId(id::END);
70
71    #[inline]
72    pub fn as_u32(self) -> u32 {
73        self.0
74    }
75
76    #[inline]
77    pub fn from_u32(v: u32) -> NodeId {
78        NodeId(v)
79    }
80}
81impl Debug for NodeId {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let num = &self.0;
84        f.write_str(format!("{num}").as_str())
85    }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91    pub const MISSING: TRegexId = TRegexId(id::MISSING);
92    pub const EPS: TRegexId = TRegexId(id::EPS);
93    pub const BOT: TRegexId = TRegexId(id::BOT);
94    pub const TOP: TRegexId = TRegexId(id::TOP);
95    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
102
103    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106    pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110    #[inline]
111    pub(crate) fn nullability(self) -> Nullability {
112        Nullability(self.0 as u8 & Self::NULL_MASK as u8)
113    }
114
115    #[inline]
116    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118    }
119
120    #[inline]
121    pub(crate) fn has(self, flag: MetaFlags) -> bool {
122        self.0 & flag.0 != 0
123    }
124    #[inline]
125    const fn and(self, other: MetaFlags) -> MetaFlags {
126        MetaFlags(self.0 & other.0)
127    }
128    #[inline]
129    const fn or(self, other: MetaFlags) -> MetaFlags {
130        MetaFlags(self.0 | other.0)
131    }
132
133    pub(crate) fn contains_lookaround(self) -> bool {
134        self.has(MetaFlags::CONTAINS_LOOKAROUND)
135    }
136    pub(crate) fn contains_inter(self) -> bool {
137        self.has(MetaFlags::CONTAINS_INTER)
138    }
139
140    pub(crate) fn all_contains_flags(self) -> MetaFlags {
141        self.and(
142            MetaFlags::CONTAINS_LOOKAROUND
143                .or(MetaFlags::CONTAINS_ANCHORS)
144                .or(MetaFlags::CONTAINS_INTER),
145        )
146    }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152    #[default]
153    Pred,
154    Star,
155    Begin,
156    End,
157    Concat,
158    Union,
159    Compl,
160    Lookbehind,
161    Lookahead,
162    Inter,
163    Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168    flags: MetaFlags,
169    nulls: NullsId,
170}
171
172struct MetadataBuilder {
173    num_created: u32,
174    solver: Solver,
175    nb: NullsBuilder,
176    index: FxHashMap<Metadata, MetadataId>,
177    pub array: Vec<Metadata>,
178}
179
180mod helpers {
181    pub(crate) fn incr_rel(n1: u32) -> u32 {
182        match n1.overflowing_add(1) {
183            (_, true) => u32::MAX,
184            (res, false) => res,
185        }
186    }
187}
188
189impl MetadataBuilder {
190    fn new() -> MetadataBuilder {
191        let mut arr = Vec::new();
192        arr.push(Metadata {
193            flags: MetaFlags::ZERO,
194            nulls: NullsId::EMPTY,
195        });
196        Self {
197            index: FxHashMap::default(),
198            array: arr,
199            solver: Solver::new(),
200            num_created: 0,
201            nb: NullsBuilder::new(),
202        }
203    }
204
205    fn init(&mut self, inst: Metadata) -> MetadataId {
206        self.num_created += 1;
207        let new_id = MetadataId(self.num_created);
208        self.index.insert(inst.clone(), new_id);
209        self.array.push(inst);
210        new_id
211    }
212
213    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
214        match self.index.get(&inst) {
215            Some(&id) => id,
216            None => self.init(inst),
217        }
218    }
219
220    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
221        &self.array[inst.0 as usize]
222    }
223
224    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
225        setflags.all_contains_flags()
226    }
227
228    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
229        let left = &self.array[body.0 as usize].flags;
230        let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
231        // BOT* = EPS (empty string only), not infinite
232        let inf = if body_id == NodeId::BOT {
233            MetaFlags::ZERO
234        } else {
235            MetaFlags::INFINITE_LENGTH
236        };
237        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
238    }
239
240    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
241        let left = &self.array[left_id.0 as usize].flags;
242        let null = left.nullability().not().and(Nullability::ALWAYS);
243        let contains = self.get_contains(*left);
244        MetaFlags::with_nullability(
245            null,
246            contains
247                .or(MetaFlags::INFINITE_LENGTH)
248                .or(MetaFlags::CONTAINS_COMPL),
249        )
250    }
251
252    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
253        let left = &self.array[left_id.0 as usize].flags;
254        let right = &self.array[right_id.0 as usize].flags;
255        let null = left.nullability().and(right.nullability());
256        let contains = self.get_contains(left.or(*right));
257        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
258        MetaFlags::with_nullability(null, contains.or(len))
259    }
260
261    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
262        let left = &self.array[left_id.0 as usize].flags;
263        let right = &self.array[right_id.0 as usize].flags;
264        let null = left.nullability().and(right.nullability());
265        let contains = self
266            .get_contains(left.or(*right))
267            .or(MetaFlags::CONTAINS_INTER);
268        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
269        MetaFlags::with_nullability(null, contains.or(len))
270    }
271
272    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
273        let left = &self.array[left_id.0 as usize].flags;
274        let right = &self.array[right_id.0 as usize].flags;
275        let null = left.nullability().or(right.nullability());
276        let contains = self.get_contains(left.or(*right));
277        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
278        MetaFlags::with_nullability(null, contains.or(len))
279    }
280}
281
282#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
283pub struct NodeKey {
284    pub(crate) kind: Kind,
285    pub(crate) left: NodeId,
286    pub(crate) right: NodeId,
287    pub(crate) extra: u32,
288}
289
290#[derive(Eq, Hash, PartialEq, Clone, Debug)]
291pub enum TRegex<TSet> {
292    Leaf(NodeId),
293    ITE(TSet, TRegexId, TRegexId),
294}
295
296#[derive(Eq, Hash, PartialEq, Clone, Copy)]
297pub(crate) struct NodeFlags(u8);
298impl NodeFlags {
299    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
300    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
301    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
302
303    fn is_checked(self) -> bool {
304        self.0 >= NodeFlags::IS_CHECKED.0
305    }
306    fn is_empty(self) -> bool {
307        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
308    }
309}
310
311#[derive(Eq, Hash, PartialEq, Clone, Copy)]
312pub(crate) struct BuilderFlags(u8);
313impl BuilderFlags {
314    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
315    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
316}
317
318pub struct RegexBuilder {
319    mb: MetadataBuilder,
320    temp_vec: Vec<NodeId>,
321    num_created: u32,
322    index: FxHashMap<NodeKey, NodeId>,
323    array: Vec<NodeKey>,
324    metadata: Vec<MetadataId>,
325    reversed: Vec<NodeId>,
326    cache_empty: FxHashMap<NodeId, NodeFlags>,
327    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
328    tr_array: Vec<TRegex<TSetId>>,
329    tr_der_center: Vec<TRegexId>,
330    tr_der_begin: Vec<TRegexId>,
331    flags: BuilderFlags,
332    /// maximum lookahead context distance before returning `AnchorLimit`.
333    pub lookahead_context_max: u32,
334    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
335    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
336}
337
338
339macro_rules! iter_inter {
340    ($compiler:ident, $start:ident, $expression:expr) => {
341        let mut curr = $start;
342        while $compiler.get_kind(curr) == Kind::Inter {
343            let left = $compiler.get_left(curr);
344            $expression(left);
345            curr = $compiler.get_right(curr);
346        }
347        $expression(curr);
348    };
349}
350
351impl NodeId {
352    fn is_missing(&self) -> bool {
353        *self == NodeId::MISSING
354    }
355    #[inline]
356    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
357        b.get_flags_contains(self)
358    }
359    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
360        if self == tail {
361            true
362        } else if self.is_kind(b, Kind::Concat) {
363            self.right(b).has_concat_tail(b, tail)
364        } else {
365            false
366        }
367    }
368    fn missing_to_eps(&self) -> NodeId {
369        if *self == NodeId::MISSING {
370            NodeId::EPS
371        } else {
372            *self
373        }
374    }
375
376    #[inline]
377    fn kind(self, b: &RegexBuilder) -> Kind {
378        b.get_kind(self)
379    }
380    #[inline]
381    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
382        b.get_kind(self) == k
383    }
384    #[inline]
385    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
386        b.nullability(self) == Nullability::NEVER
387    }
388    #[inline]
389    fn nullability(self, b: &RegexBuilder) -> Nullability {
390        b.nullability(self)
391    }
392
393    #[inline]
394    fn is_center_nullable(self, b: &RegexBuilder) -> bool {
395        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
396    }
397    #[inline]
398    pub fn left(self, b: &RegexBuilder) -> NodeId {
399        b.get_left(self)
400    }
401
402    #[inline]
403    pub fn right(self, b: &RegexBuilder) -> NodeId {
404        b.get_right(self)
405    }
406
407    #[inline]
408    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
409        b.der(self, mask)
410    }
411
412    #[inline]
413    fn extra(self, b: &RegexBuilder) -> u32 {
414        b.get_extra(self)
415    }
416
417    #[inline]
418    fn is_pred(self, b: &RegexBuilder) -> bool {
419        b.get_kind(self) == Kind::Pred
420    }
421    #[inline]
422    fn is_lookahead(self, b: &RegexBuilder) -> bool {
423        b.get_kind(self) == Kind::Lookahead
424    }
425    #[inline]
426    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
427        debug_assert!(self.is_pred(b));
428        TSetId(b.get_extra(self))
429    }
430    #[inline]
431    fn is_star(self, b: &RegexBuilder) -> bool {
432        if NodeId::EPS == self {
433            return false;
434        }
435        b.get_kind(self) == Kind::Star
436    }
437
438    #[inline]
439    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
440        if self.is_concat(b) {
441            let r = self.right(b);
442            return r.is_star(b) && r.left(b) == self.left(b);
443        }
444        false
445    }
446
447    #[inline]
448    fn is_concat(self, b: &RegexBuilder) -> bool {
449        b.get_kind(self) == Kind::Concat
450    }
451
452    #[inline]
453    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
454        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
455            Some(b.get_right(self))
456        } else {
457            None
458        }
459    }
460
461    #[inline]
462    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
463        if b.get_kind(self) == Kind::Concat {
464            let left = self.left(b);
465            let right = self.right(b);
466            if left.is_kind(b, Kind::Compl) {
467                if left.left(b) == NodeId::TOPPLUS {
468                    return right == NodeId::END;
469                }
470            }
471        }
472        false
473    }
474
475    #[inline]
476    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
477        if NodeId::EPS == self {
478            return None;
479        }
480        if self.is_star(b) && self.left(b).is_pred(b) {
481            Some(self.left(b))
482        } else {
483            None
484        }
485    }
486
487    #[inline]
488    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
489        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
490            let middle = self.right(b);
491            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
492                return Some(middle.left(b));
493            }
494        }
495        None
496    }
497
498    #[inline]
499    pub(crate) fn iter_union(
500        self,
501        b: &mut RegexBuilder,
502        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
503    ) {
504        let mut curr: NodeId = self;
505        while curr.kind(b) == Kind::Union {
506            f(b, curr.left(b));
507            curr = curr.right(b);
508        }
509        f(b, curr);
510    }
511
512    #[inline]
513    pub(crate) fn iter_union_while(
514        self,
515        b: &mut RegexBuilder,
516        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
517    ) {
518        let mut curr: NodeId = self;
519        let mut continue_loop = true;
520        while continue_loop && curr.kind(b) == Kind::Union {
521            continue_loop = f(b, curr.left(b));
522            curr = curr.right(b);
523        }
524        if continue_loop {
525            f(b, curr);
526        }
527    }
528}
529
530impl RegexBuilder {
531    pub fn new() -> RegexBuilder {
532        let mut inst = Self {
533            mb: MetadataBuilder::new(),
534            array: Vec::new(),
535            index: FxHashMap::default(),
536            cache_empty: FxHashMap::default(),
537            tr_array: Vec::new(),
538            tr_cache: FxHashMap::default(),
539            flags: BuilderFlags::ZERO,
540            lookahead_context_max: 800,
541            num_created: 0,
542            metadata: Vec::new(),
543            reversed: Vec::new(),
544            tr_der_center: Vec::new(),
545            tr_der_begin: Vec::new(),
546            temp_vec: Vec::new(),
547            mk_binary_memo: FxHashMap::default(),
548            clean_cache: FxHashMap::default(),
549        };
550        inst.array.push(NodeKey::default());
551        inst.mk_pred(TSetId::EMPTY);
552        inst.mk_star(NodeId::BOT);
553        inst.mk_pred(TSetId::FULL);
554        inst.mk_star(NodeId::TOP);
555        let top_plus_id = inst.mk_plus(NodeId::TOP);
556        inst.mk_unset(Kind::Begin);
557        inst.mk_unset(Kind::End);
558        debug_assert!(top_plus_id == NodeId::TOPPLUS);
559
560        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
561        inst.mk_leaf(NodeId::BOT);
562        inst.mk_leaf(NodeId::EPS);
563        inst.mk_leaf(NodeId::TOP);
564        inst.mk_leaf(NodeId::TS);
565
566        inst.flags = BuilderFlags::SUBSUME;
567        inst
568    }
569
570    #[inline]
571    pub fn solver_ref(&self) -> &Solver {
572        &self.mb.solver
573    }
574
575    #[inline]
576    pub fn solver(&mut self) -> &mut Solver {
577        &mut self.mb.solver
578    }
579
580    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
581        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
582        self.tr_cache.insert(inst.clone(), new_id);
583        self.tr_array.push(inst);
584        new_id
585    }
586
587    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
588        match self.tr_cache.get(&inst) {
589            Some(&id) => id,
590            None => self.tr_init(inst),
591        }
592    }
593
594    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
595        &self.tr_array[inst.0 as usize]
596    }
597
598    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
599        let solv = self.solver();
600        !solv.is_sat_id(cond1, cond2)
601    }
602
603    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
604        let term = TRegex::<TSetId>::Leaf(node_id);
605        self.get_tregex_id(term)
606    }
607
608    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
609        let tmp_inst = TRegex::ITE(cond, _then, _else);
610        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
611            return *cached;
612        }
613        if _then == _else {
614            return _then;
615        }
616        if self.solver().is_full_id(cond) {
617            return _then;
618        }
619        if self.solver().is_empty_id(cond) {
620            return _else;
621        }
622        let _clean_then = match self.tr_array[_then.0 as usize] {
623            TRegex::Leaf(_) => _then,
624            _ => self.clean(cond, _then),
625        };
626        let notcond = self.solver().not_id(cond);
627        let _clean_else = match self.tr_array[_else.0 as usize] {
628            TRegex::Leaf(_) => _else,
629            _ => self.clean(notcond, _else),
630        };
631
632        if _clean_then == _clean_else {
633            return _clean_then;
634        }
635        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
636        match self.get_tregex(_clean_then) {
637            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
638                let _cond_then = leftcond.clone();
639                let new_then = _inner_then.clone();
640                let sand = self.solver().and_id(cond, _cond_then);
641                return self.mk_ite(sand, new_then, _clean_else);
642            }
643            _ => {}
644        }
645        // attempt right side flattening:
646        match self.get_tregex(_clean_else) {
647            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
648            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
649                let e2clone = _e2.clone();
650                let new_cond = self.mb.solver.or_id(cond, _c2.clone());
651                return self.mk_ite(new_cond, _clean_then, e2clone);
652            }
653            _ => {}
654        }
655
656        if _clean_then == TRegexId::BOT {
657            let flip_cond = self.solver().not_id(cond);
658            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
659            return cleaned_id;
660        }
661
662        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
663    }
664
665    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
666        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
667            return cached;
668        }
669        let result = match *self.get_tregex(tterm) {
670            TRegex::Leaf(_) => tterm,
671            TRegex::ITE(alpha, _then_id, _else_id) => {
672                let notalpha = self.mb.solver.not_id(alpha);
673                if self.mb.solver.unsat_id(beta, alpha) {
674                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
675                    self.clean(beta, _else_id)
676                } else if self.unsat(beta, notalpha) {
677                    // beta ⊆ alpha, so beta ∧ alpha = beta
678                    self.clean(beta, _then_id)
679                } else {
680                    let tc = self.mb.solver.and_id(beta, alpha);
681                    let ec = self.mb.solver.and_id(beta, notalpha);
682                    let new_then = self.clean(tc, _then_id);
683                    let new_else = self.clean(ec, _else_id);
684                    self.mk_ite(alpha, new_then, new_else)
685                }
686            }
687        };
688        self.clean_cache.insert((beta, tterm), result);
689        result
690    }
691
692    fn mk_unary(
693        &mut self,
694        term: TRegexId,
695        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
696    ) -> TRegexId {
697        match self.tr_array[term.0 as usize] {
698            TRegex::Leaf(node_id) => {
699                let applied = apply(self, node_id);
700                self.mk_leaf(applied)
701            }
702            TRegex::ITE(c1, _then, _else) => {
703                let _then1 = self.mk_unary(_then, apply);
704                let _else1 = self.mk_unary(_else, apply);
705                self.mk_ite(c1, _then1, _else1)
706            }
707        }
708    }
709
710    fn mk_binary_result(
711        &mut self,
712        left: TRegexId,
713        right: TRegexId,
714        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
715    ) -> Result<TRegexId, AlgebraError> {
716        match self.tr_array[left.0 as usize] {
717            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
718                TRegex::Leaf(right_node_id) => {
719                    let applied = apply(self, left_node_id, right_node_id)?;
720                    Ok(self.mk_leaf(applied))
721                }
722                TRegex::ITE(c2, _then, _else) => {
723                    let then2 = self.mk_binary_result(left, _then, apply)?;
724                    let else2 = self.mk_binary_result(left, _else, apply)?;
725                    Ok(self.mk_ite(c2, then2, else2))
726                }
727            },
728            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
729                TRegex::Leaf(_) => {
730                    let then2 = self.mk_binary_result(_then1, right, apply)?;
731                    let else2 = self.mk_binary_result(_else1, right, apply)?;
732                    Ok(self.mk_ite(c1, then2, else2))
733                }
734                TRegex::ITE(c2, _then2, _else2) => {
735                    if c1 == c2 {
736                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
737                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
738                        return Ok(self.mk_ite(c1, _then, _else));
739                    }
740                    if c1.0 > c2.0 {
741                        let _then = self.mk_binary_result(_then1, right, apply)?;
742                        let _else = self.mk_binary_result(_else1, right, apply)?;
743                        return Ok(self.mk_ite(c1, _then, _else));
744                    } else {
745                        let _then = self.mk_binary_result(left, _then2, apply)?;
746                        let _else = self.mk_binary_result(left, _else2, apply)?;
747                        return Ok(self.mk_ite(c2, _then, _else));
748                    }
749                }
750            },
751        }
752    }
753
754    fn mk_binary(
755        &mut self,
756        left: TRegexId,
757        right: TRegexId,
758        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
759    ) -> TRegexId {
760        self.mk_binary_memo.clear();
761        self.mk_binary_inner(left, right, apply)
762    }
763
764    fn mk_binary_inner(
765        &mut self,
766        left: TRegexId,
767        right: TRegexId,
768        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
769    ) -> TRegexId {
770        if left == right {
771            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
772        }
773        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
774            return cached;
775        }
776        let result = match self.tr_array[left.0 as usize] {
777            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
778                TRegex::Leaf(right_node_id) => {
779                    let applied = apply(self, left_node_id, right_node_id);
780                    self.mk_leaf(applied)
781                }
782                TRegex::ITE(c2, _then, _else) => {
783                    let then2 = self.mk_binary_inner(left, _then, apply);
784                    let else2 = self.mk_binary_inner(left, _else, apply);
785                    self.mk_ite(c2, then2, else2)
786                }
787            },
788            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
789                TRegex::Leaf(_) => {
790                    let then2 = self.mk_binary_inner(_then1, right, apply);
791                    let else2 = self.mk_binary_inner(_else1, right, apply);
792                    self.mk_ite(c1, then2, else2)
793                }
794                TRegex::ITE(c2, _then2, _else2) => {
795                    if c1 == c2 {
796                        let _then = self.mk_binary_inner(_then1, _then2, apply);
797                        let _else = self.mk_binary_inner(_else1, _else2, apply);
798                        self.mk_ite(c1, _then, _else)
799                    } else if c1.0 > c2.0 {
800                        let _then = self.mk_binary_inner(_then1, right, apply);
801                        let _else = self.mk_binary_inner(_else1, right, apply);
802                        self.mk_ite(c1, _then, _else)
803                    } else {
804                        let _then = self.mk_binary_inner(left, _then2, apply);
805                        let _else = self.mk_binary_inner(left, _else2, apply);
806                        self.mk_ite(c2, _then, _else)
807                    }
808                }
809            },
810        };
811        self.mk_binary_memo.insert((left, right), result);
812        result
813    }
814
815    pub fn get_nulls(
816        &mut self,
817        pending_rel: u32,
818        mask: Nullability,
819        acc: &mut BTreeSet<NullState>,
820        node_id: NodeId,
821    ) {
822        debug_assert!(node_id != NodeId::MISSING);
823        if !self.is_nullable(node_id, mask) {
824            return;
825        }
826        match self.get_kind(node_id) {
827            Kind::Pred => {}
828            Kind::End => {
829                if mask.has(Nullability::END) {
830                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
831                }
832            }
833            Kind::Begin => {
834                if mask.has(Nullability::BEGIN) {
835                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
836                }
837            }
838            Kind::Concat => {
839                let new_mask = self.nullability(node_id).and(mask);
840                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
841                if self.is_nullable(node_id.left(self), mask) {
842                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
843                }
844            }
845            Kind::Union => {
846                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
847                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
848            }
849            Kind::Inter => {
850                let new_mask = self.nullability(node_id).and(mask);
851                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
852                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
853            }
854            Kind::Star => {
855                acc.insert(NullState::new(mask, pending_rel));
856                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
857            }
858            Kind::Compl => {
859                if !self.is_nullable(node_id.left(self), mask) {
860                    acc.insert(NullState::new(mask, 0));
861                }
862            }
863            Kind::Lookbehind => {
864                let new_mask = self.nullability(node_id).and(mask);
865                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
866                if node_id.right(self) != NodeId::MISSING {
867                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
868                }
869            }
870            Kind::Lookahead => {
871                let la_inner = self.get_lookahead_inner(node_id);
872                if self.is_nullable(la_inner, mask) {
873                    let rel = self.get_lookahead_rel(node_id);
874                    if rel != u32::MAX {
875                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
876                    }
877                }
878                let la_tail = self.get_lookahead_tail(node_id);
879                if la_tail != NodeId::MISSING {
880                    self.get_nulls(pending_rel, mask, acc, la_tail);
881                }
882            }
883            Kind::Counted => {
884                let packed = self.get_extra(node_id);
885                let best = (packed >> 16) as u32;
886                if best > 0 {
887                    acc.insert(NullState::new(mask, pending_rel + best));
888                }
889            }
890        }
891    }
892
893    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
894        self.get_meta_flags(node_id)
895            .has(MetaFlags::CONTAINS_LOOKAROUND)
896    }
897
898    /// whether node contains `^`, `$`, `\A`, `\z` anchors.
899    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
900        self.get_meta_flags(node_id)
901            .has(MetaFlags::CONTAINS_ANCHORS)
902    }
903
904
905    pub fn is_infinite(&self, node_id: NodeId) -> bool {
906        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
907    }
908
909    /// returns (min_length, max_length). max = u32::MAX means unbounded.
910    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
911        if self.is_infinite(node_id) {
912            if self.get_kind(node_id) == Kind::Inter {
913                self.get_bounded_length(node_id)
914            } else {
915                (self.get_min_length_only(node_id), u32::MAX)
916            }
917        } else {
918            self.get_bounded_length(node_id)
919        }
920    }
921
922    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
923        if node_id == NodeId::EPS {
924            return (0, 0);
925        }
926        match self.get_kind(node_id) {
927            Kind::End | Kind::Begin => (0, 0),
928            Kind::Pred => (1, 1),
929            Kind::Concat => {
930                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
931                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
932                (lmin + rmin, lmax.saturating_add(rmax))
933            }
934            Kind::Union => {
935                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
936                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
937                (lmin.min(rmin), lmax.max(rmax))
938            }
939            Kind::Inter => {
940                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
941                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
942                (lmin.max(rmin), lmax.min(rmax))
943            }
944            Kind::Lookahead => {
945                let body = node_id.left(self);
946                if self.is_infinite(body) {
947                    return (0, u32::MAX);
948                }
949                let right = node_id.right(self);
950                if right.is_missing() {
951                    (0, 0)
952                } else {
953                    self.get_min_max_length(right)
954                }
955            }
956            Kind::Counted => (0, 0),
957            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
958        }
959    }
960
961    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
962        match self.get_kind(node_id) {
963            Kind::End | Kind::Begin => Some(0),
964            Kind::Pred => Some(1),
965            Kind::Concat => {
966                let l = self.get_fixed_length(node_id.left(self))?;
967                let r = self.get_fixed_length(node_id.right(self))?;
968                Some(l + r)
969            }
970            Kind::Union => {
971                let l = self.get_fixed_length(node_id.left(self))?;
972                let r = self.get_fixed_length(node_id.right(self))?;
973                if l == r {
974                    Some(l)
975                } else {
976                    None
977                }
978            }
979            Kind::Inter => {
980                let l = self.get_fixed_length(node_id.left(self))?;
981                let r = self.get_fixed_length(node_id.right(self))?;
982                if l == r {
983                    Some(l)
984                } else {
985                    None
986                }
987            }
988            Kind::Lookahead => {
989                let right = node_id.right(self);
990                if right.is_missing() {
991                    Some(0)
992                } else {
993                    self.get_fixed_length(right)
994                }
995            }
996            Kind::Counted => Some(0),
997            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
998        }
999    }
1000
1001    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1002        match self.get_kind(node_id) {
1003            Kind::End | Kind::Begin => 0,
1004            Kind::Pred => 1,
1005            Kind::Concat => {
1006                self.get_min_length_only(node_id.left(self))
1007                    + self.get_min_length_only(node_id.right(self))
1008            }
1009            Kind::Union => self
1010                .get_min_length_only(node_id.left(self))
1011                .min(self.get_min_length_only(node_id.right(self))),
1012            Kind::Inter => self
1013                .get_min_length_only(node_id.left(self))
1014                .max(self.get_min_length_only(node_id.right(self))),
1015            Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1016            Kind::Compl => {
1017                if self.nullability(node_id.left(self)) == Nullability::NEVER {
1018                    0
1019                } else {
1020                    1
1021                }
1022            }
1023        }
1024    }
1025
1026    fn starts_with_ts(&self, node_id: NodeId) -> bool {
1027        if node_id == NodeId::TS {
1028            return true;
1029        }
1030        match self.get_kind(node_id) {
1031            Kind::Inter => {
1032                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1033            }
1034            Kind::Union => {
1035                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1036            }
1037            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1038            _ => false,
1039        }
1040    }
1041
1042    #[inline]
1043    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1044        if self.get_kind(node_id) == Kind::Concat {
1045            self.ends_with_ts(node_id.right(self))
1046        } else {
1047            node_id == NodeId::TS
1048        }
1049    }
1050
1051    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1052        debug_assert!(node_id != NodeId::MISSING);
1053        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1054    }
1055
1056    pub(crate) fn cache_der(
1057        &mut self,
1058        node_id: NodeId,
1059        result: TRegexId,
1060        mask: Nullability,
1061    ) -> TRegexId {
1062        if mask == Nullability::CENTER {
1063            self.tr_der_center[node_id.0 as usize] = result
1064        } else {
1065            self.tr_der_begin[node_id.0 as usize] = result
1066        };
1067        result
1068    }
1069
1070    pub(crate) fn try_cached_der(
1071        &mut self,
1072        node_id: NodeId,
1073        mask: Nullability,
1074    ) -> Option<TRegexId> {
1075        let cache = if mask == Nullability::CENTER {
1076            &mut self.tr_der_center
1077        } else {
1078            &mut self.tr_der_begin
1079        };
1080        match cache.get(node_id.0 as usize) {
1081            Some(&TRegexId::MISSING) => {}
1082            Some(&result) => {
1083                return Some(result);
1084            }
1085            None => {
1086                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1087            }
1088        }
1089        return None;
1090    }
1091
1092    pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1093        let mut term = self.get_tregex(der);
1094        loop {
1095            match *term {
1096                TRegex::Leaf(node_id) => return node_id,
1097                TRegex::ITE(cond, _then, _else) => {
1098                    if self.solver().is_sat_id(set, cond) {
1099                        term = self.get_tregex(_then);
1100                    } else {
1101                        term = self.get_tregex(_else);
1102                    }
1103                }
1104            }
1105        }
1106    }
1107
1108    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1109        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1110        debug_assert!(
1111            node_id != NodeId::MISSING,
1112            "attempting to derive missing node"
1113        );
1114        if let Some(result) = self.try_cached_der(node_id, mask) {
1115            return Ok(result);
1116        }
1117
1118        let result = match node_id.kind(self) {
1119            Kind::Compl => {
1120                let leftd = node_id.left(self).der(self, mask)?;
1121                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1122            }
1123            Kind::Inter => {
1124                let leftd = node_id.left(self).der(self, mask)?;
1125                let rightd = node_id.right(self).der(self, mask)?;
1126                {
1127                    let this = &mut *self;
1128                    this.mk_binary(
1129                        leftd,
1130                        rightd,
1131                        &mut (|b, left, right| b.mk_inter(left, right)),
1132                    )
1133                }
1134            }
1135            Kind::Union => {
1136                let leftd = self.der(node_id.left(self), mask)?;
1137                let rightd = self.der(node_id.right(self), mask)?;
1138                {
1139                    let this = &mut *self;
1140                    this.mk_binary(
1141                        leftd,
1142                        rightd,
1143                        &mut (|b, left, right| b.mk_union(left, right)),
1144                    )
1145                }
1146            }
1147            Kind::Concat => {
1148                let head = node_id.left(self);
1149                let tail = node_id.right(self);
1150                let tail_leaf = self.mk_leaf(tail);
1151                let head_der = self.der(head, mask)?;
1152
1153                if self.is_nullable(head, mask) {
1154                    let rightd = self.der(tail, mask)?;
1155                    let ldr = {
1156                        let this = &mut *self;
1157                        this.mk_binary(
1158                            head_der,
1159                            tail_leaf,
1160                            &mut (|b, left, right| b.mk_concat(left, right)),
1161                        )
1162                    };
1163                    {
1164                        let this = &mut *self;
1165                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1166                    }
1167                } else {
1168                    let ldr = {
1169                        let this = &mut *self;
1170                        this.mk_binary(
1171                            head_der,
1172                            tail_leaf,
1173                            &mut (|b, left, right| b.mk_concat(left, right)),
1174                        )
1175                    };
1176                    ldr
1177                }
1178            }
1179            Kind::Star => {
1180                if node_id == NodeId::EPS {
1181                    TRegexId::BOT
1182                } else {
1183                    let left = node_id.left(self);
1184                    let r_decr_leaf = self.mk_leaf(node_id);
1185                    let r_der = self.der(left, mask)?;
1186                    let result = {
1187                        let this = &mut *self;
1188                        this.mk_binary(
1189                            r_der,
1190                            r_decr_leaf,
1191                            &mut (|b, left, right| b.mk_concat(left, right)),
1192                        )
1193                    };
1194                    result
1195                }
1196            }
1197            Kind::Lookbehind => {
1198                let lb_prev_der = {
1199                    let lb_prev = self.get_lookbehind_prev(node_id);
1200                    if lb_prev == NodeId::MISSING {
1201                        TRegexId::MISSING
1202                    } else {
1203                        self.der(lb_prev, mask)?
1204                    }
1205                };
1206                let lb_inner = self.get_lookbehind_inner(node_id);
1207                let lb_inner_der = self.der(lb_inner, mask)?;
1208                {
1209                    let this = &mut *self;
1210                    this.mk_binary_result(
1211                        lb_inner_der,
1212                        lb_prev_der,
1213                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1214                    )?
1215                }
1216            }
1217            Kind::Lookahead => {
1218                let la_tail = self.get_lookahead_tail(node_id);
1219                let la_body = node_id.left(self);
1220                let rel = self.get_lookahead_rel(node_id);
1221
1222                if self.is_nullable(la_body, mask) {
1223                    // nullabilty is taken once, just keep the body
1224                    let right = node_id.right(self).missing_to_eps();
1225                    let rder = self.der(right, mask).clone();
1226                    return rder;
1227                }
1228
1229                if rel == u32::MAX {
1230                    let la_body_der = self.der(la_body, mask)?;
1231                    if la_tail.is_kind(self, Kind::Pred) {
1232                        let transitioned =
1233                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1234                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1235                        let concated = self.mk_concat(la_tail, new_la);
1236                        return self.der(concated, mask);
1237                    }
1238                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1239                        let left = la_tail.left(self);
1240                        let tset = left.pred_tset(self);
1241                        let transitioned = self.transition_term(la_body_der, tset);
1242                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1243                        let tail_right = la_tail.right(self);
1244                        let concated = self.mk_concat(new_la, tail_right);
1245                        let concated = self.mk_concat(left, concated);
1246                        return self.der(concated, mask);
1247                    }
1248                }
1249
1250                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1251                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1252                    let concated = self.mk_concat(la_body, nulls_mask);
1253                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1254                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1255                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1256                    let new_union = self.mk_union(concated_look, new_look);
1257                    return self.der(new_union, mask);
1258                }
1259
1260                let la_tail_der = if la_tail == NodeId::MISSING {
1261                    TRegexId::MISSING
1262                } else {
1263                    if self.is_nullable(la_tail, mask) {
1264                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1265                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1266                        let la_union = self.mk_union(la_tail, nulls_la);
1267                        self.der(la_union, mask)?
1268                    } else {
1269                        self.der(la_tail, mask)?
1270                    }
1271                };
1272
1273                let la_body_der = self.der(la_body, mask)?;
1274
1275                if rel != u32::MAX && rel > self.lookahead_context_max {
1276                    return Err(AlgebraError::AnchorLimit);
1277                }
1278
1279                let la = {
1280                    let this = &mut *self;
1281                    let rel = helpers::incr_rel(rel);
1282                    this.mk_binary(
1283                        la_body_der,
1284                        la_tail_der,
1285                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1286                    )
1287                };
1288
1289                if rel != u32::MAX
1290                    && la_tail_der != TRegexId::MISSING
1291                    && self.is_nullable(la_tail, mask)
1292                {
1293                    let look_only = {
1294                        let this = &mut *self;
1295                        let right = TRegexId::MISSING;
1296                        let rel = helpers::incr_rel(rel);
1297                        this.mk_binary(
1298                            la_body_der,
1299                            right,
1300                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1301                        )
1302                    };
1303                    {
1304                        let this = &mut *self;
1305                        this.mk_binary(
1306                            look_only,
1307                            la,
1308                            &mut (|b, left, right| b.mk_union(left, right)),
1309                        )
1310                    }
1311                } else {
1312                    la
1313                }
1314            }
1315            Kind::Counted => {
1316                let body = node_id.left(self);
1317                let chain = node_id.right(self);
1318                let packed = self.get_extra(node_id);
1319                let step = (packed & 0xFFFF) as u16;
1320                let best = (packed >> 16) as u16;
1321
1322                let mid_best = if self.is_nullable(body, mask) && step >= best {
1323                    step
1324                } else {
1325                    best
1326                };
1327
1328                let body_der = self.der(body, mask)?;
1329                let new_step = step.saturating_add(1);
1330                self.mk_unary(body_der, &mut |b, new_body| {
1331                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1332                        new_step
1333                    } else {
1334                        mid_best
1335                    };
1336                    let packed = (final_best as u32) << 16 | new_step as u32;
1337                    b.mk_counted(new_body, chain, packed)
1338                })
1339            }
1340            Kind::Begin | Kind::End => TRegexId::BOT,
1341            Kind::Pred => {
1342                let psi = node_id.pred_tset(&self);
1343                if psi == TSetId::EMPTY {
1344                    TRegexId::BOT
1345                } else {
1346                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1347                }
1348            }
1349        };
1350
1351        // println!("{} {}", node_id.0, self.pp(node_id));
1352        // println!("node: {} (total: {})", node_id.0, self.num_created);
1353
1354        self.cache_der(node_id, result, mask);
1355        Ok(result)
1356    }
1357
1358    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1359        debug_assert!(meta_id != MetadataId::MISSING);
1360        match self.metadata.get_mut(node_id.0 as usize) {
1361            Some(v) => *v = meta_id,
1362            None => {
1363                self.metadata
1364                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1365                self.metadata[node_id.0 as usize] = meta_id;
1366            }
1367        }
1368    }
1369
1370    fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1371        debug_assert!(reversed_id != NodeId::MISSING);
1372        match self.reversed.get_mut(node_id.0 as usize) {
1373            Some(v) => *v = reversed_id,
1374            None => {
1375                self.reversed
1376                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1377                self.reversed[node_id.0 as usize] = reversed_id;
1378            }
1379        }
1380    }
1381
1382    fn init(&mut self, inst: NodeKey) -> NodeId {
1383        self.num_created += 1;
1384        let node_id = NodeId(self.num_created);
1385        self.index.insert(inst.clone(), node_id);
1386        match inst.kind {
1387            Kind::Pred => {
1388                let meta_id = self.mb.get_meta_id(Metadata {
1389                    flags: MetaFlags::ZERO,
1390                    nulls: NullsId::EMPTY,
1391                });
1392                self.init_metadata(node_id, meta_id);
1393            }
1394            Kind::Begin => {
1395                let meta_id = self.mb.get_meta_id(Metadata {
1396                    flags: MetaFlags::with_nullability(
1397                        Nullability::BEGIN,
1398                        MetaFlags::CONTAINS_ANCHORS,
1399                    ),
1400                    nulls: NullsId::BEGIN0,
1401                });
1402                self.init_metadata(node_id, meta_id);
1403            }
1404            Kind::End => {
1405                let meta_id = self.mb.get_meta_id(Metadata {
1406                    flags: MetaFlags::with_nullability(
1407                        Nullability::END,
1408                        MetaFlags::CONTAINS_ANCHORS,
1409                    ),
1410                    nulls: NullsId::END0,
1411                });
1412                self.init_metadata(node_id, meta_id);
1413            }
1414            Kind::Inter => {
1415                let m1 = self.get_node_meta_id(inst.left);
1416                let m2 = self.get_node_meta_id(inst.right);
1417                let meta_id = {
1418                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1419                    let mask_l = inst.left.nullability(self);
1420                    let mask_r = inst.right.nullability(self);
1421                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1422                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1423                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1424                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1425                    let new_meta = Metadata {
1426                        flags: self.mb.flags_inter(m1, m2),
1427                        nulls,
1428                    };
1429                    let new_id = self.mb.get_meta_id(new_meta);
1430
1431                    new_id
1432                };
1433                self.init_metadata(node_id, meta_id);
1434            }
1435            Kind::Union => {
1436                let m1 = self.get_node_meta_id(inst.left);
1437                let m2 = self.get_node_meta_id(inst.right);
1438                let meta_id = {
1439                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1440                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1441                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1442                    let new_meta = Metadata {
1443                        flags: self.mb.flags_union(m1, m2),
1444                        nulls,
1445                    };
1446                    let new_id = self.mb.get_meta_id(new_meta);
1447                    new_id
1448                };
1449                self.init_metadata(node_id, meta_id);
1450            }
1451            Kind::Concat => {
1452                let flags = self.mb.flags_concat(
1453                    self.get_node_meta_id(inst.left),
1454                    self.get_node_meta_id(inst.right),
1455                );
1456
1457                let right_nullability = inst.right.nullability(self);
1458                let left_nullability = inst.left.nullability(self);
1459                let nulls_left = self.get_nulls_id(inst.left);
1460                let nulls_right = self.get_nulls_id(inst.right);
1461                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1462                let mask = right_nullability.and(left_nullability);
1463                nulls = self.mb.nb.and_mask(nulls, mask);
1464
1465                let new_id = self.mb.get_meta_id(Metadata {
1466                    flags,
1467                    nulls: nulls,
1468                });
1469                self.init_metadata(node_id, new_id);
1470            }
1471            Kind::Star => {
1472                let left_nulls = self.get_nulls_id(inst.left);
1473                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1474                let meta_id = self.mb.get_meta_id(Metadata {
1475                    flags: self
1476                        .mb
1477                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1478                    nulls: nulls,
1479                });
1480                self.init_metadata(node_id, meta_id);
1481            }
1482            Kind::Compl => {
1483                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1484                let meta_id = self.mb.get_meta_id(Metadata {
1485                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1486                    nulls: nulls,
1487                });
1488                self.init_metadata(node_id, meta_id);
1489            }
1490            Kind::Lookbehind => {
1491                let mut null = self.get_meta_flags(inst.left).nullability();
1492                let mut contains_flags = self.get_flags_contains(inst.left);
1493                if !inst.right.is_missing() {
1494                    null = null.and(self.get_meta_flags(inst.right).nullability());
1495                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1496                }
1497
1498                let left_nulls = self.get_nulls_id(inst.left);
1499                let right_nulls = self.get_nulls_id(inst.right);
1500                let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1501                let meta_id = self.mb.get_meta_id(Metadata {
1502                    flags: MetaFlags::with_nullability(
1503                        null,
1504                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1505                    ),
1506                    nulls: nulls,
1507                });
1508                self.init_metadata(node_id, meta_id);
1509            }
1510            Kind::Lookahead => {
1511                let mut nulls = self.get_nulls_id(inst.left);
1512                let left_nullability = inst.left.nullability(self);
1513                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1514                nulls = self.mb.nb.or_id(nulls, nulls_right);
1515                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1516
1517                let la_inner = inst.left;
1518                let la_tail = inst.right;
1519                let null = self
1520                    .get_meta_flags(la_inner)
1521                    .nullability()
1522                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1523                let contains_flags = self
1524                    .get_flags_contains(la_inner)
1525                    .or(self.get_flags_contains(la_tail));
1526
1527                let meta_id = self.mb.get_meta_id(Metadata {
1528                    flags: MetaFlags::with_nullability(
1529                        null,
1530                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1531                    ),
1532                    nulls: nulls,
1533                });
1534                self.init_metadata(node_id, meta_id);
1535            }
1536            Kind::Counted => {
1537                let best = (inst.extra >> 16) as u32;
1538                let (null, nulls) = if best > 0 {
1539                    let mut ns = BTreeSet::new();
1540                    ns.insert(NullState::new(Nullability::CENTER, best));
1541                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1542                } else {
1543                    (Nullability::NEVER, NullsId::EMPTY)
1544                };
1545                let meta_id = self.mb.get_meta_id(Metadata {
1546                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1547                    nulls,
1548                });
1549                self.init_metadata(node_id, meta_id);
1550            }
1551        }
1552
1553        self.array.push(inst);
1554
1555        if let Some(rw) = self.post_init_simplify(node_id) {
1556            self.override_as(node_id, rw)
1557        } else {
1558            node_id
1559        }
1560    }
1561
1562    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1563        match self.get_kind(node_id) {
1564            Kind::Concat => {
1565                let lhs = node_id.left(self);
1566                let rhs = node_id.right(self);
1567                if let Some(_) = lhs.is_pred_star(self) {
1568                    if let Some(opttail) = rhs.is_opt_v(self) {
1569                        if let Some(true) = self.subsumes(lhs, opttail) {
1570                            // return self.override_as(node_id, lhs);
1571                            return Some(lhs);
1572                        }
1573                    }
1574                }
1575            }
1576            Kind::Union => {
1577                let lhs = node_id.left(self);
1578                let rhs = node_id.right(self);
1579                let mut subsumed = false;
1580                rhs.iter_union_while(self, &mut |b, branch| {
1581                    if b.nullable_subsumes(branch, lhs) {
1582                        subsumed = true;
1583                    }
1584                    !subsumed
1585                });
1586                if subsumed {
1587                    return Some(rhs);
1588                }
1589                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1590                    return Some(rhs);
1591                }
1592            }
1593            _ => {}
1594        }
1595
1596        None
1597    }
1598
1599    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1600    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1601        if self.get_kind(lhs) != Kind::Union {
1602            return false; // single branch already checked by nullable_subsumes
1603        }
1604        let mut rhs_branches = Vec::new();
1605        let mut curr = rhs;
1606        while self.get_kind(curr) == Kind::Union {
1607            rhs_branches.push(self.get_left(curr));
1608            curr = self.get_right(curr);
1609        }
1610        rhs_branches.push(curr);
1611
1612        curr = lhs;
1613        while self.get_kind(curr) == Kind::Union {
1614            if !rhs_branches.contains(&self.get_left(curr)) {
1615                return false;
1616            }
1617            curr = self.get_right(curr);
1618        }
1619        rhs_branches.contains(&curr)
1620    }
1621
1622    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1623    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1624        if node == target {
1625            return true;
1626        }
1627        match self.get_kind(node) {
1628            Kind::Union => {
1629                self.nullable_subsumes(self.get_left(node), target)
1630                    || self.nullable_subsumes(self.get_right(node), target)
1631            }
1632            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1633                self.nullable_subsumes(self.get_right(node), target)
1634            }
1635            _ => false,
1636        }
1637    }
1638
1639    pub fn num_nodes(&self) -> u32 {
1640        self.num_created
1641    }
1642    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1643        match self.index.get(&inst) {
1644            Some(&id) => id,
1645            None => self.init(inst),
1646        }
1647    }
1648    #[inline]
1649    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1650        self.index.get(inst)
1651    }
1652
1653    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1654        self.index.insert(key, subsumed);
1655        subsumed
1656    }
1657
1658    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1659        let key = &self.array[key.0 as usize];
1660        let entry = self.index.get_mut(key).unwrap();
1661        *entry = subsumed;
1662        subsumed
1663    }
1664
1665    #[inline]
1666    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1667        self.array[node_id.0 as usize].left
1668    }
1669
1670    #[inline]
1671    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1672        self.array[node_id.0 as usize].right
1673    }
1674
1675    #[inline]
1676    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1677        self.array[node_id.0 as usize].extra
1678    }
1679
1680    #[inline]
1681    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1682        debug_assert!(self.get_kind(node_id) == Kind::Concat);
1683        let mut curr = node_id;
1684        while self.get_kind(curr) == Kind::Concat {
1685            curr = curr.right(self);
1686        }
1687        curr
1688    }
1689
1690    /// true if node ends with a trailing Lookahead(_, MISSING)
1691    fn has_trailing_la(&self, node: NodeId) -> bool {
1692        let end = match self.get_kind(node) {
1693            Kind::Concat => self.get_concat_end(node),
1694            Kind::Lookahead => node,
1695            _ => return false,
1696        };
1697        self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1698    }
1699
1700    /// strip the trailing Lookahead from a node, returning (body, la).
1701    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1702        if self.get_kind(node) == Kind::Lookahead {
1703            return (NodeId::EPS, node);
1704        }
1705        debug_assert!(self.get_kind(node) == Kind::Concat);
1706        let right = node.right(self);
1707        if self.get_kind(right) != Kind::Concat {
1708            // Concat(body, LA) → (body, LA)
1709            return (node.left(self), right);
1710        }
1711        let (stripped, la) = self.strip_trailing_la(right);
1712        (self.mk_concat(node.left(self), stripped), la)
1713    }
1714    #[inline]
1715    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1716        debug_assert!(matches!(self.get_kind(lookahead_node_id), Kind::Lookahead | Kind::Counted));
1717        lookahead_node_id.left(self)
1718    }
1719    #[inline]
1720    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1721        debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1722        lookahead_node_id.right(self)
1723    }
1724    #[inline]
1725    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1726        debug_assert!(
1727            matches!(self.get_kind(lookahead_node_id), Kind::Lookahead | Kind::Counted),
1728            "not lookahead/counted: {:?}",
1729            self.pp(lookahead_node_id)
1730        );
1731        self.get_extra(lookahead_node_id)
1732    }
1733    #[inline]
1734    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1735        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1736        lookbehind_node_id.left(self)
1737    }
1738    #[inline]
1739    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1740        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1741        lookbehind_node_id.right(self)
1742    }
1743
1744    #[inline]
1745    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1746        debug_assert!(
1747            self.array.len() > node_id.0 as usize,
1748            "array len: {:?}",
1749            node_id
1750        );
1751        self.array[node_id.0 as usize].kind
1752    }
1753
1754    #[inline]
1755    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1756        &self.array[node_id.0 as usize]
1757    }
1758
1759    #[inline]
1760    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1761        self.metadata[node_id.0 as usize]
1762    }
1763
1764    #[inline]
1765    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1766        debug_assert!(node_id.0 != u32::MAX);
1767        let meta_id = self.get_node_meta_id(node_id);
1768        debug_assert!(meta_id != MetadataId::MISSING);
1769        &self.mb.array[meta_id.0 as usize]
1770    }
1771
1772    #[inline]
1773    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1774        if node_id == NodeId::MISSING {
1775            NullsId::EMPTY
1776        } else {
1777            self.get_meta(node_id).nulls
1778        }
1779    }
1780
1781    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1782        self.mb
1783            .nb
1784            .array
1785            .iter()
1786            .map(|set| set.iter().cloned().collect())
1787            .collect()
1788    }
1789
1790    pub fn nulls_count(&self) -> usize {
1791        self.mb.nb.array.len()
1792    }
1793
1794    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1795        self.mb.nb.array[id as usize].iter().cloned().collect()
1796    }
1797
1798    #[inline]
1799    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1800        if node_id == NodeId::MISSING {
1801            NullsId::EMPTY
1802        } else {
1803            let nulls = self.get_meta(node_id).nulls;
1804            self.mb.nb.and_mask(nulls, mask)
1805        }
1806    }
1807
1808    #[inline]
1809    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1810        let meta_id = self.get_node_meta_id(node_id);
1811        let meta = &self.mb.array[meta_id.0 as usize];
1812        meta.flags
1813    }
1814
1815    #[inline]
1816    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1817        self.get_meta(node_id).flags.nullability()
1818    }
1819
1820    #[inline]
1821    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1822        let meta_id = self.get_node_meta_id(node_id);
1823        let meta = &self.mb.array[meta_id.0 as usize];
1824        meta.flags.all_contains_flags()
1825    }
1826
1827    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1828        if !self.contains_look(node_id) {
1829            return Ok(node_id);
1830        }
1831        if self.get_kind(node_id) == Kind::Concat
1832            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1833        {
1834            return self.strip_lb(node_id.right(self));
1835        }
1836        match self.get_kind(node_id) {
1837            Kind::Lookbehind | Kind::Lookahead => Err(AlgebraError::UnsupportedPattern),
1838            _ => Ok(node_id),
1839        }
1840    }
1841
1842    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1843        debug_assert!(node_id != NodeId::MISSING);
1844        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1845            if *rev != NodeId::MISSING {
1846                return Ok(*rev);
1847            }
1848        }
1849        let rw = match self.get_kind(node_id) {
1850            Kind::End => NodeId::BEGIN,
1851            Kind::Begin => NodeId::END,
1852            Kind::Pred => node_id,
1853            Kind::Concat => {
1854                let left = self.reverse(node_id.left(self))?;
1855                let right = self.reverse(node_id.right(self))?;
1856                self.mk_concat(right, left)
1857            }
1858            Kind::Union => {
1859                let left = self.reverse(node_id.left(self))?;
1860                let right = self.reverse(node_id.right(self))?;
1861                self.mk_union(left, right)
1862            }
1863            Kind::Inter => {
1864                let left = self.reverse(node_id.left(self))?;
1865                let right = self.reverse(node_id.right(self))?;
1866                self.mk_inter(left, right)
1867            }
1868            Kind::Star => {
1869                let body = self.reverse(node_id.left(self))?;
1870                self.mk_star(body)
1871            }
1872            Kind::Compl => {
1873                let body = self.reverse(node_id.left(self))?;
1874                self.mk_compl(body)
1875            }
1876            Kind::Lookbehind => {
1877                // lookbehind → lookahead when reversed (separate fwd/rev DFAs)
1878                let prev = self.get_lookbehind_prev(node_id);
1879                let inner_id = self.get_lookbehind_inner(node_id);
1880                let rev_inner = self.reverse(inner_id)?;
1881                let rev_prev = if prev != NodeId::MISSING {
1882                    self.reverse(prev)?
1883                } else {
1884                    NodeId::MISSING
1885                };
1886                self.mk_lookahead(rev_inner, rev_prev, 0)
1887            }
1888            Kind::Lookahead => {
1889                let rel = self.get_lookahead_rel(node_id);
1890                if rel == u32::MAX {
1891                    // rel MAX holds no nullability - rewrite to intersection
1892                    let lbody = self.get_lookahead_inner(node_id);
1893                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
1894                    let lbody_ts = self.mk_concat(lbody, NodeId::TS);
1895                    let ltail_ts = self.mk_concat(ltail, NodeId::TS);
1896                    let as_inter = self.mk_inter(lbody_ts, ltail_ts);
1897                    let rev = self.reverse(as_inter)?;
1898                    self.init_reversed(node_id, rev);
1899                    return Ok(rev);
1900                }
1901                if rel != 0 {
1902                    return Err(AlgebraError::UnsupportedPattern);
1903                }
1904                // lookahead → lookbehind when reversed (separate fwd/rev DFAs)
1905                let tail_node = self.get_lookahead_tail(node_id);
1906                let rev_tail = if tail_node != NodeId::MISSING {
1907                    self.reverse(tail_node)?
1908                } else {
1909                    NodeId::MISSING
1910                };
1911                let inner_id = self.get_lookahead_inner(node_id);
1912                let rev_inner = self.reverse(inner_id)?;
1913                self.mk_lookbehind(rev_inner, rev_tail)
1914            }
1915            Kind::Counted => {
1916                return Err(AlgebraError::UnsupportedPattern);
1917            }
1918        };
1919        self.init_reversed(node_id, rw);
1920        Ok(rw)
1921    }
1922
1923    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
1924        let node = NodeKey {
1925            kind: Kind::Pred,
1926            left: NodeId::MISSING,
1927            right: NodeId::MISSING,
1928            extra: pred.0,
1929        };
1930        self.get_node_id(node)
1931    }
1932
1933    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
1934        let key = NodeKey {
1935            kind: Kind::Compl,
1936            left: body,
1937            right: NodeId::MISSING,
1938            extra: u32::MAX,
1939        };
1940        if let Some(id) = self.key_is_created(&key) {
1941            return *id;
1942        }
1943        if body == NodeId::BOT {
1944            return NodeId::TS;
1945        }
1946        if body == NodeId::TS {
1947            return NodeId::BOT;
1948        }
1949
1950        if let Some(contains_body) = body.is_contains(&self) {
1951            if contains_body.is_pred(&self) {
1952                let pred = contains_body.pred_tset(&self);
1953                let notpred = self.mk_pred_not(pred);
1954                let node = self.mk_star(notpred);
1955                return self.init_as(key, node);
1956            } else if contains_body == NodeId::END {
1957                return self.init_as(key, NodeId::BOT);
1958            } else {
1959            }
1960        };
1961
1962        if self.get_kind(body) == Kind::Compl {
1963            return self.get_node(body).left;
1964        }
1965
1966        let node_id = self.get_node_id(key);
1967        node_id
1968    }
1969
1970    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
1971        let nid = self.get_nulls_id(body);
1972        let nref = self.mb.nb.get_set_ref(nid).clone();
1973        let mut futures = NodeId::BOT;
1974        for n in nref.iter() {
1975            if !n.is_mask_nullable(mask) {
1976                continue;
1977            }
1978
1979            let eff = if n.rel == 0 {
1980                NodeId::EPS
1981            } else {
1982                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
1983            };
1984            futures = self.mk_union(futures, eff)
1985        }
1986        futures
1987    }
1988
1989    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
1990        if cfg!(feature = "norewrite") {
1991            return None;
1992        }
1993
1994        if self.get_kind(tail) == Kind::Lookbehind {
1995            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
1996            return match self
1997                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
1998            {
1999                Ok(node) => Some(node),
2000                Err(_) => None,
2001            };
2002        }
2003        if self.get_kind(head) == Kind::Lookahead {
2004            let la_body = self.get_lookahead_inner(head);
2005            let la_tail = self.get_lookahead_tail(head);
2006            let la_rel = self.get_lookahead_rel(head);
2007            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2008            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2009                let tail_rel = self.get_lookahead_rel(new_la_tail);
2010                tail_rel + la_rel
2011            } else if new_la_tail.is_center_nullable(self) {
2012                la_rel
2013            } else {
2014                u32::MAX
2015            };
2016
2017            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2018        }
2019
2020        if head.is_kind(self, Kind::End) {
2021            if tail == NodeId::TS {
2022                return Some(head);
2023            }
2024        }
2025        if head == NodeId::TS {
2026            if tail == NodeId::END {
2027                return Some(head);
2028            }
2029        }
2030
2031        if head == NodeId::TS {
2032            if self.nullability(tail) == Nullability::ALWAYS {
2033                return Some(NodeId::TS);
2034            }
2035        }
2036
2037        if tail == NodeId::TS {
2038            if self.nullability(head) == Nullability::ALWAYS {
2039                return Some(NodeId::TS);
2040            }
2041        }
2042
2043        if self.get_kind(tail) == Kind::Union {
2044            if head == NodeId::TS {
2045                let mut should_distribute_top = false;
2046                self.iter_unions(tail, |v| {
2047                    if v == NodeId::BEGIN || self.starts_with_ts(v) {
2048                        should_distribute_top = true;
2049                    }
2050                });
2051                if should_distribute_top {
2052                    let mut new_union = NodeId::BOT;
2053                    let mut curr = tail;
2054                    while self.get_kind(curr) == Kind::Union {
2055                        let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2056                        new_union = self.mk_union(new_node, new_union);
2057                        curr = curr.right(self);
2058                    }
2059                    let new_node = self.mk_concat(NodeId::TS, curr);
2060                    new_union = self.mk_union(new_union, new_node);
2061                    return Some(new_union);
2062                }
2063            }
2064        }
2065
2066        if self.get_kind(head) == Kind::Union {
2067            if head == NodeId::TS {
2068                let mut should_distribute_top = false;
2069                self.iter_unions(head, |v| {
2070                    if v == NodeId::END || self.starts_with_ts(v) {
2071                        should_distribute_top = true;
2072                    }
2073                });
2074                if should_distribute_top {
2075                    let mut new_union = NodeId::BOT;
2076                    let mut curr = head;
2077                    while self.get_kind(curr) == Kind::Union {
2078                        let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2079                        new_union = self.mk_union(new_node, new_union);
2080                        curr = curr.right(self);
2081                    }
2082                    let new_node = self.mk_concat(curr, NodeId::TS);
2083                    new_union = self.mk_union(new_union, new_node);
2084                    return Some(new_union);
2085                }
2086            }
2087        }
2088
2089        if self.get_kind(head) == Kind::Inter {
2090            if tail == NodeId::TS {
2091                let mut alltopstar = true;
2092                iter_inter!(self, head, |v| {
2093                    alltopstar = self.ends_with_ts(v);
2094                });
2095                if alltopstar {
2096                    return Some(head);
2097                }
2098            }
2099        }
2100
2101        if head.is_star(self) && head == tail {
2102            return Some(head);
2103        }
2104
2105        None
2106    }
2107
2108    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2109        use Kind::*;
2110
2111        if cfg!(feature = "norewrite") {
2112            return None;
2113        }
2114        if left == right {
2115            return Some(left);
2116        }
2117
2118        if right.is_kind(self, Kind::Union) && left == right.left(self) {
2119            return Some(right);
2120        }
2121
2122        if self.get_kind(left) == Kind::Pred {
2123            if self.get_kind(right) == Kind::Pred {
2124                let l = left.pred_tset(&self);
2125                let r = right.pred_tset(&self);
2126                let solver = self.solver();
2127                let psi = solver.or_id(l, r);
2128                let rewrite = self.mk_pred(psi);
2129                return Some(rewrite);
2130            }
2131        }
2132
2133        if left == NodeId::EPS {
2134            if self.get_extra(left) == 0 {
2135                if self.nullability(right) == Nullability::ALWAYS {
2136                    return Some(right);
2137                }
2138            }
2139        }
2140
2141        if self.get_kind(left) == Kind::Lookahead {
2142            if self.get_kind(right) == Kind::Lookahead {
2143                let lb = left.left(self);
2144                let lt = left.right(self);
2145                let lrel = left.extra(self);
2146
2147                let rb = right.left(self);
2148                let rt = right.right(self);
2149                let rrel = right.extra(self);
2150
2151                if lrel == rrel {
2152                    if lb == rb {}
2153                    if lt.is_missing() {
2154                        if rt.is_missing() {
2155                            let unioned = self.mk_union(lb, rb);
2156                            let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2157                            return Some(node);
2158                        }
2159                    }
2160                }
2161            }
2162        }
2163
2164        if right.is_kind(self, Concat) {
2165            if left == NodeId::END {
2166                if right.left(self) == NodeId::END {
2167                    if self.nullability(right).has(Nullability::END) {
2168                        return Some(right);
2169                    }
2170                }
2171            }
2172            // .*|.*a.* => .*(a.*|)
2173            if left == right.left(self) {
2174                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2175                let rw = self.mk_concat(left, rhs);
2176                return Some(rw);
2177            }
2178            if left.is_kind(self, Concat) {
2179                let head1 = left.left(self);
2180                let head2 = right.left(self);
2181
2182                if head1 == head2 {
2183                    let t1 = left.right(self);
2184                    let t2 = right.right(self);
2185                    // opportunistic rewrites
2186                    if head1 == NodeId::TS {
2187                        if t2.has_concat_tail(self, t1) {
2188                            return Some(left);
2189                        }
2190                        if t1.has_concat_tail(self, t2) {
2191                            return Some(right);
2192                        }
2193                    }
2194                    let un = self.mk_union(t1, t2);
2195                    return Some(self.mk_concat(left.left(self), un));
2196                }
2197
2198                // xa|ya => (x|y)a - suffix factoring via reverse
2199                // disabled: introduces reversed intermediate patterns that bloat derivative DFA
2200                if false {
2201                    let end1 = self.get_concat_end(left);
2202                    let end2 = self.get_concat_end(right);
2203                    if end1 == end2 {
2204                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2205                        if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2206                            let rev1 = self.reverse(left).unwrap();
2207                            let rev2 = self.reverse(right).unwrap();
2208
2209                            let union_rev = self.mk_union(rev1, rev2);
2210                            return Some(self.reverse(union_rev).unwrap());
2211                        }
2212                    }
2213                }
2214            }
2215            if self.get_kind(left) == Kind::Pred {
2216                if left == right.left(self) {
2217                    let un = self.mk_opt(right.right(self));
2218                    let conc = self.mk_concat(left, un);
2219                    return Some(conc);
2220                }
2221            }
2222        }
2223
2224        if left == NodeId::EPS {
2225            if right.is_plus(self) {
2226                let result = self.mk_star(right.left(self));
2227                return Some(result);
2228            }
2229        }
2230
2231        // e.g. (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2232        if self.flags == BuilderFlags::SUBSUME {
2233            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2234                return Some(rw);
2235            }
2236        }
2237
2238        None
2239    }
2240
2241    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2242        let mut curr = node;
2243        while self.get_kind(curr) == Kind::Inter {
2244            out.push(self.get_left(curr));
2245            curr = self.get_right(curr);
2246        }
2247        out.push(curr);
2248    }
2249
2250    /// check if node is [_*] Pred(X)^count . Star(Y)
2251    /// returns (has_prefix_star, pred_tset, star_node, count)
2252    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2253        let mut curr = node;
2254        let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2255        if has_prefix {
2256            curr = self.get_right(curr);
2257        }
2258        let mut count = 0u32;
2259        let mut pred_set = None;
2260        while self.get_kind(curr) == Kind::Concat {
2261            let head = self.get_left(curr);
2262            if self.get_kind(head) != Kind::Pred {
2263                return None;
2264            }
2265            let ps = head.pred_tset(self);
2266            match pred_set {
2267                None => pred_set = Some(ps),
2268                Some(existing) => {
2269                    if existing != ps {
2270                        return None;
2271                    }
2272                }
2273            }
2274            curr = self.get_right(curr);
2275            count += 1;
2276        }
2277        if count == 0 || self.get_kind(curr) != Kind::Star {
2278            return None;
2279        }
2280        Some((has_prefix, pred_set.unwrap(), curr, count))
2281    }
2282
2283    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2284        let mut j = 0;
2285        for &s in sub {
2286            while j < sup.len() && sup[j] < s {
2287                j += 1;
2288            }
2289            if j >= sup.len() || sup[j] != s {
2290                return false;
2291            }
2292            j += 1;
2293        }
2294        true
2295    }
2296
2297    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2298        if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2299            return None;
2300        }
2301
2302        let mut lc: Vec<NodeId> = Vec::new();
2303        let mut rc: Vec<NodeId> = Vec::new();
2304        self.collect_inter_components(left, &mut lc);
2305        self.collect_inter_components(right, &mut rc);
2306
2307        // component subset check: fewer constraints = larger language
2308        // if lc ⊆ rc then L(left) ⊇ L(right), keep left
2309        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2310            return Some(left);
2311        }
2312        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2313        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2314            return Some(right);
2315        }
2316
2317        if lc.len() == rc.len() {
2318            let mut diff_idx = None;
2319            for i in 0..lc.len() {
2320                if lc[i] != rc[i] {
2321                    if diff_idx.is_some() {
2322                        return None;
2323                    }
2324                    diff_idx = Some(i);
2325                }
2326            }
2327            if let Some(idx) = diff_idx {
2328                let a = lc[idx];
2329                let b = rc[idx];
2330                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2331                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2332                {
2333                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2334                        return if ca < cb { Some(left) } else { Some(right) };
2335                    }
2336                }
2337            }
2338        }
2339
2340        None
2341    }
2342
2343    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2344        if cfg!(feature = "norewrite") {
2345            return None;
2346        }
2347        if left == right {
2348            return Some(left);
2349        }
2350
2351        if self.get_kind(right) == Kind::Union {
2352            let mut result = NodeId::BOT;
2353            self.iter_unions_b(
2354                right,
2355                &mut (|b, v| {
2356                    let new_inter = b.mk_inter(v, left);
2357                    result = b.mk_union(result, new_inter);
2358                }),
2359            );
2360            return Some(result);
2361        }
2362        if self.get_kind(left) == Kind::Union {
2363            let mut result = NodeId::BOT;
2364            self.iter_unions_b(
2365                left,
2366                &mut (|b, v| {
2367                    let new_inter = b.mk_inter(v, right);
2368                    result = b.mk_union(result, new_inter);
2369                }),
2370            );
2371            return Some(result);
2372        }
2373
2374        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2375            return Some(NodeId::BOT);
2376        }
2377
2378        if left.kind(&self) == Kind::Compl {
2379            if right.kind(&self) == Kind::Compl {
2380                let bodies = self.mk_union(left.left(self), right.left(self));
2381                return Some(self.mk_compl(bodies));
2382            }
2383        }
2384
2385        if left == NodeId::TOPPLUS {
2386            if let Some(_) = right.is_pred_star(self) {
2387                let newloop = self.mk_plus(right.left(self));
2388                return Some(newloop);
2389            }
2390            if right.is_never_nullable(&self) {
2391                return Some(right);
2392            }
2393            if right.is_kind(self, Kind::Lookahead) {
2394                if self.get_lookahead_tail(right).is_missing() {
2395                    return Some(NodeId::BOT);
2396                }
2397            }
2398            if right.is_kind(self, Kind::Concat) {}
2399        }
2400
2401        if right.is_lookahead(&self) {
2402            if left.is_never_nullable(&self) {
2403                return Some(NodeId::BOT);
2404            }
2405        }
2406
2407        if self.get_kind(right) == Kind::Compl {
2408            let compl_body = right.left(self);
2409            if left == compl_body {
2410                return Some(NodeId::BOT);
2411            }
2412            if self.get_kind(compl_body) == Kind::Concat {
2413                let compl_head = compl_body.left(self);
2414                // L & ~(R·_*) = BOT when L ⊂ R·_*
2415                if compl_body.right(self) == NodeId::TS {
2416                    if compl_head == left {
2417                        return Some(NodeId::BOT);
2418                    }
2419                }
2420            }
2421        }
2422
2423        if let Some(pleft) = left.is_pred_star(&self) {
2424            if let Some(pright) = right.is_pred_star(&self) {
2425                let merged = self.mk_inter(pleft, pright);
2426                return Some(self.mk_star(merged));
2427            }
2428        }
2429
2430        // factor lookbehinds out of intersections
2431        // Concat(LB1, body1) & Concat(LB2, body2) → Concat(LB(inner1&inner2), body1&body2)
2432        // Concat(LB, body1) & body2 → Concat(LB, body1&body2)
2433        {
2434            let l_is_clb = self.get_kind(left) == Kind::Concat
2435                && self.get_kind(left.left(self)) == Kind::Lookbehind;
2436            let r_is_clb = self.get_kind(right) == Kind::Concat
2437                && self.get_kind(right.left(self)) == Kind::Lookbehind;
2438            if l_is_clb || r_is_clb {
2439                let (lb, body) = if l_is_clb && r_is_clb {
2440                    let lb1 = left.left(self);
2441                    let lb2 = right.left(self);
2442                    let inner = self.mk_inter(
2443                        self.get_lookbehind_inner(lb1),
2444                        self.get_lookbehind_inner(lb2),
2445                    );
2446                    // lb_prev is MISSING - cannot trigger UnsupportedPattern
2447                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2448                    let body = self.mk_inter(left.right(self), right.right(self));
2449                    (lb, body)
2450                } else if l_is_clb {
2451                    let lb = left.left(self);
2452                    let body = self.mk_inter(left.right(self), right);
2453                    (lb, body)
2454                } else {
2455                    let lb = right.left(self);
2456                    let body = self.mk_inter(left, right.right(self));
2457                    (lb, body)
2458                };
2459                return Some(self.mk_concat(lb, body));
2460            }
2461        }
2462
2463        // factor trailing lookaheads out of intersections
2464        // body1·LA & body2 → (body1&body2)·LA
2465        // body1·LA1 & body2·LA2 → (body1&body2)·LA(inner1&inner2)
2466        {
2467            let l_has_la = self.has_trailing_la(left);
2468            let r_has_la = self.has_trailing_la(right);
2469            if l_has_la || r_has_la {
2470                let (body, la) = if l_has_la && r_has_la {
2471                    let (lbody, l_la) = self.strip_trailing_la(left);
2472                    let (rbody, r_la) = self.strip_trailing_la(right);
2473                    let inner = self.mk_inter(
2474                        self.get_lookahead_inner(l_la),
2475                        self.get_lookahead_inner(r_la),
2476                    );
2477                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2478                    let body = self.mk_inter(lbody, rbody);
2479                    (body, la)
2480                } else if l_has_la {
2481                    let (lbody, la) = self.strip_trailing_la(left);
2482                    let body = self.mk_inter(lbody, right);
2483                    (body, la)
2484                } else {
2485                    let (rbody, la) = self.strip_trailing_la(right);
2486                    let body = self.mk_inter(left, rbody);
2487                    (body, la)
2488                };
2489                return Some(self.mk_concat(body, la));
2490            }
2491        }
2492
2493        return None;
2494    }
2495
2496    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2497        if cfg!(feature = "norewrite") {
2498            return None;
2499        }
2500        debug_assert!(self.get_kind(right_union) == Kind::Union);
2501
2502        let mut rewritten = None;
2503        right_union.iter_union_while(
2504            self,
2505            &mut (|b, v| {
2506                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2507                    rewritten = Some((v, rw));
2508                    false
2509                } else {
2510                    true
2511                }
2512            }),
2513        );
2514
2515        if let Some(rw) = rewritten {
2516            let mut new_union = NodeId::BOT;
2517            right_union.iter_union(
2518                self,
2519                &mut (|b, v| {
2520                    if v == rw.0 {
2521                        new_union = b.mk_union(rw.1, new_union)
2522                    } else {
2523                        new_union = b.mk_union(v, new_union)
2524                    }
2525                }),
2526            );
2527            return Some(new_union);
2528        };
2529
2530        return None;
2531    }
2532
2533    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2534        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2535        debug_assert!(tail != NodeId::MISSING);
2536        let key = NodeKey {
2537            kind: Kind::Concat,
2538            left: head,
2539            right: tail,
2540            extra: u32::MAX,
2541        };
2542        if let Some(id) = self.key_is_created(&key) {
2543            return *id;
2544        }
2545
2546        if head == NodeId::BOT || tail == NodeId::BOT {
2547            return NodeId::BOT;
2548        }
2549        if head == NodeId::EPS {
2550            return tail;
2551        }
2552        if tail == NodeId::EPS {
2553            return head;
2554        }
2555
2556        match tail {
2557            // this is only valid if direction known;
2558            NodeId::BEGIN => {
2559                if !self.is_nullable(head, Nullability::BEGIN) {
2560                    return NodeId::BOT;
2561                } else {
2562                    return NodeId::BEGIN;
2563                }
2564            }
2565            _ => {}
2566        }
2567
2568        // normalize concats to right
2569        if head.is_kind(self, Kind::Concat) {
2570            let left = head.left(self);
2571            let newright = self.mk_concat(head.right(self), tail);
2572            let updated = self.mk_concat(left, newright);
2573            return self.init_as(key, updated);
2574        }
2575
2576        if head == NodeId::TS && tail == NodeId::END {
2577            return NodeId::TS;
2578        }
2579
2580        if self.get_kind(head) == Kind::End {
2581            if !self.is_nullable(tail, Nullability::END) {
2582                return NodeId::BOT;
2583            }
2584        }
2585
2586        if self.get_kind(tail) == Kind::Concat {
2587            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2588                let upd = self.mk_concat(rw, tail.right(self));
2589                return self.init_as(key, upd);
2590            }
2591        }
2592
2593        match self.attempt_rw_concat_2(head, tail) {
2594            Some(new) => return self.init_as(key, new),
2595            None => {}
2596        }
2597
2598        match (self.get_kind(head), self.get_kind(tail)) {
2599            // merge stars
2600            (Kind::Star, Kind::Concat) if head.is_star(&self) => {
2601                let rl = tail.left(self);
2602                if head == rl {
2603                    return self.init_as(key, tail);
2604                }
2605            }
2606            // attempt longer concat rw
2607            (_, Kind::Concat) => {
2608                let curr = head;
2609                let h2 = self.mk_concat(curr, tail.left(self));
2610                let tr = tail.right(self);
2611                match self.attempt_rw_concat_2(h2, tr) {
2612                    Some(new) => {
2613                        return self.init_as(key, new);
2614                    }
2615                    None => {}
2616                }
2617            }
2618            _ if head == NodeId::TS => {
2619                if self.starts_with_ts(tail) {
2620                    return self.init_as(key, tail);
2621                }
2622            }
2623            _ => {}
2624        }
2625
2626        self.init(key)
2627    }
2628
2629    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2630        // LNF: lookbehind must start with ts
2631        let lb_body = {
2632            match self.starts_with_ts(lb_body) {
2633                true => lb_body,
2634                false => self.mk_concat(NodeId::TS, lb_body),
2635            }
2636        };
2637        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2638        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2639    }
2640
2641    fn mk_lookbehind_internal(
2642        &mut self,
2643        lb_body: NodeId,
2644        lb_prev: NodeId,
2645    ) -> Result<NodeId, AlgebraError> {
2646        debug_assert!(lb_body != NodeId::MISSING);
2647        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2648        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2649            return Ok(NodeId::BOT);
2650        }
2651        if lb_body == NodeId::TS {
2652            return Ok(lb_prev);
2653        }
2654        if lb_body == NodeId::EPS {
2655            match lb_prev {
2656                NodeId::MISSING => return Ok(NodeId::EPS),
2657                NodeId::EPS => return Ok(NodeId::EPS),
2658                _ => return Err(AlgebraError::UnsupportedPattern),
2659            }
2660        }
2661
2662        let key = NodeKey {
2663            kind: Kind::Lookbehind,
2664            left: lb_body,
2665            right: lb_prev,
2666            extra: u32::MAX,
2667        };
2668        match self.key_is_created(&key) {
2669            Some(id) => return Ok(*id),
2670            None => {
2671                if lb_prev == NodeId::TS {
2672                    return Ok(self.mk_concat(lb_prev, lb_body));
2673                }
2674
2675                Ok(self.init(key))
2676            }
2677        }
2678    }
2679
2680    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2681        // LNF: lookahead must end with ts
2682        let la_body = {
2683            match self.ends_with_ts(la_body) {
2684                true => la_body,
2685                false => self.mk_concat(la_body, NodeId::TS),
2686            }
2687        };
2688        let rel = if NodeId::MISSING == la_tail {
2689            rel
2690        } else {
2691            match la_tail.is_center_nullable(self) {
2692                false => u32::MAX,
2693                true => rel,
2694            }
2695        };
2696
2697        self.mk_lookahead_internal(la_body, la_tail, rel)
2698    }
2699
2700    // rel max = carries no nullability, can potentially rw to intersection
2701    pub fn mk_lookahead_internal(
2702        &mut self,
2703        la_body: NodeId,
2704        la_tail: NodeId,
2705        rel: u32,
2706    ) -> NodeId {
2707        let key = NodeKey {
2708            kind: Kind::Lookahead,
2709            left: la_body,
2710            right: la_tail,
2711            extra: rel,
2712        };
2713        if let Some(id) = self.key_is_created(&key) {
2714            return *id;
2715        }
2716        if la_body == NodeId::TS {
2717            if rel == 0 {
2718                return la_tail.missing_to_eps();
2719            } else {
2720                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2721            }
2722        }
2723        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2724            return NodeId::BOT;
2725        }
2726        if la_tail.is_missing() && rel == u32::MAX {
2727            return NodeId::BOT;
2728        }
2729
2730        if la_body == NodeId::EPS && la_tail.is_missing() {
2731            if rel == 0 {
2732                return la_body;
2733            }
2734        }
2735
2736        if la_tail == NodeId::TS {
2737            if rel == 0 || rel == u32::MAX {
2738                return self.mk_concat(la_body, NodeId::TS);
2739            } else if rel == u32::MAX {
2740                return self.mk_begins_with(la_body);
2741            }
2742        }
2743
2744        if rel == u32::MAX {
2745            if la_tail.is_missing() {
2746                return NodeId::BOT;
2747            }
2748
2749            if self.is_always_nullable(la_body) {
2750                return la_tail.missing_to_eps();
2751            }
2752
2753            if la_tail != NodeId::MISSING {
2754                match self.get_kind(la_tail) {
2755                    _ => {
2756                        if la_body.is_compl_plus_end(&self) {
2757                            let minlen = self.get_min_length_only(la_tail);
2758                            if minlen >= 1 {
2759                                return NodeId::BOT;
2760                            }
2761                        }
2762                    }
2763                }
2764            }
2765        }
2766
2767        // flatten consecutive lookaheads: (?=A, tail=(?=B, tail=C)) → (?=A._* & B._*, tail=C)
2768        if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2769            let la_body2 = self.get_lookahead_inner(la_tail);
2770            let body1_ts = self.mk_concat(la_body, NodeId::TS);
2771            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2772            let new_la_body = self.mk_inter(body1_ts, body2_ts);
2773            let new_la_rel = self.get_lookahead_rel(la_tail);
2774            let new_la_tail = self.get_lookahead_tail(la_tail);
2775            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2776        }
2777
2778        // _*\z -> too expensive to keep; null it
2779        if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2780            let la_body_right = la_body.right(self);
2781            if self.is_always_nullable(la_body_right) {
2782                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2783            }
2784            if la_body.right(self) == NodeId::END {
2785                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2786            }
2787            let bodyright = la_body.right(self);
2788            if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2789                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2790                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2791            }
2792        }
2793
2794        if la_tail != NodeId::MISSING {
2795            match (self.get_kind(la_body), self.get_kind(la_tail)) {
2796                (Kind::Concat, Kind::Pred) => {
2797                    let lpred = la_body.left(self);
2798                    if self.get_kind(lpred) == Kind::Pred {
2799                        let l = lpred.pred_tset(&self);
2800                        let r = la_tail.pred_tset(&self);
2801                        let psi_and = self.solver().and_id(l, r);
2802                        let rewrite = self.mk_pred(psi_and);
2803                        let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2804                        let new_right = self.mk_lookahead_internal(
2805                            la_body.right(self),
2806                            NodeId::MISSING,
2807                            new_rel,
2808                        );
2809                        return self.mk_concat(rewrite, new_right);
2810                    }
2811                }
2812                _ => {}
2813            }
2814        }
2815
2816        self.get_node_id(key)
2817    }
2818
2819    pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2820        let has_match = (packed >> 16) > 0;
2821        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2822            return NodeId::BOT;
2823        }
2824        let chain = self.prune_counted_chain(body, chain);
2825        let key = NodeKey {
2826            kind: Kind::Counted,
2827            left: body,
2828            right: chain,
2829            extra: packed,
2830        };
2831        if let Some(id) = self.key_is_created(&key) {
2832            return *id;
2833        }
2834        self.get_node_id(key)
2835    }
2836
2837    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2838        if chain == NodeId::MISSING || body == NodeId::BOT {
2839            return chain;
2840        }
2841        // nullable body → body·_* = _*, all chain candidates subsumed
2842        if self.nullability(body) != Nullability::NEVER {
2843            return NodeId::MISSING;
2844        }
2845        let chain_body = chain.left(self);
2846        if chain_body == NodeId::BOT {
2847            return chain; // pending match, keep
2848        }
2849        // min_length(body) <= min_length(chain_body) → chain_body ⊂ body·_*
2850        let (body_min, _) = self.get_min_max_length(body);
2851        let (chain_min, _) = self.get_min_max_length(chain_body);
2852        if body_min > 0 && body_min <= chain_min {
2853            return self.prune_counted_chain(body, chain.right(self));
2854        }
2855        let not_begins = self.mk_not_begins_with(body);
2856        let inter = self.mk_inter(chain_body, not_begins);
2857        let is_empty = inter == NodeId::BOT;
2858        if is_empty {
2859            self.prune_counted_chain(body, chain.right(self))
2860        } else {
2861            chain
2862        }
2863    }
2864
2865    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2866        match self.get_node(body).kind {
2867            Kind::Pred => {
2868                let psi = body.pred_tset(&self);
2869                let negated = self.mk_pred_not(psi);
2870                let union = self.mk_union(NodeId::END, negated);
2871                let la = self.mk_lookahead(union, NodeId::MISSING, rel);
2872                la
2873            }
2874            _ => {
2875                let neg_inner = self.mk_concat(body, NodeId::TS);
2876                let neg_part = self.mk_compl(neg_inner);
2877                let conc = self.mk_concat(neg_part, NodeId::END);
2878                let look = self.mk_lookahead(conc, NodeId::MISSING, rel);
2879                look
2880            }
2881        }
2882    }
2883
2884    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2885        match self.get_node(body).kind {
2886            Kind::Pred => {
2887                let psi = body.pred_tset(&self);
2888                let negated = self.mk_pred_not(psi);
2889                let union = self.mk_union(NodeId::BEGIN, negated);
2890                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2891                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2892            }
2893            _ => {
2894                let neg_inner = self.mk_concat(NodeId::TS, body);
2895                let neg_part = self.mk_compl(neg_inner);
2896                let conc = self.mk_concat(NodeId::BEGIN, neg_part);
2897                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2898                self.mk_lookbehind_internal(conc, NodeId::MISSING).unwrap()
2899            }
2900        }
2901    }
2902
2903    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2904        debug_assert!(left != NodeId::MISSING);
2905        debug_assert!(right != NodeId::MISSING);
2906        if left > right {
2907            return self.mk_union(right, left);
2908        }
2909        let key = NodeKey {
2910            kind: Kind::Union,
2911            left,
2912            right,
2913            extra: u32::MAX,
2914        };
2915        if let Some(id) = self.key_is_created(&key) {
2916            return *id;
2917        }
2918
2919        if left == right {
2920            return left;
2921        }
2922        if left == NodeId::BOT {
2923            return right;
2924        }
2925        if right == NodeId::BOT {
2926            return left;
2927        }
2928        if right == NodeId::TS {
2929            return right;
2930        }
2931        if left == NodeId::TS {
2932            return left;
2933        }
2934
2935        match (self.get_kind(left), self.get_kind(right)) {
2936            (Kind::Union, _) => {
2937                self.iter_unions_b(left, &mut |b, v| {
2938                    b.temp_vec.push(v);
2939                });
2940                self.iter_unions_b(right, &mut |b, v| {
2941                    b.temp_vec.push(v);
2942                });
2943                self.temp_vec.sort();
2944                let tree = self.temp_vec.clone();
2945                self.temp_vec.clear();
2946                let newnode = tree
2947                    .iter()
2948                    .rev()
2949                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2950                return self.init_as(key, newnode);
2951            }
2952            (_, Kind::Union) => {
2953                let rleft = right.left(&self);
2954                // if left_node id is smaller than rleft, just create a new union
2955                if left > rleft {
2956                    self.iter_unions_b(left, &mut |b, v| {
2957                        b.temp_vec.push(v);
2958                    });
2959                    self.iter_unions_b(right, &mut |b, v| {
2960                        b.temp_vec.push(v);
2961                    });
2962                    self.temp_vec.sort();
2963                    let tree = self.temp_vec.clone();
2964                    self.temp_vec.clear();
2965                    let newnode = tree
2966                        .iter()
2967                        .rev()
2968                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2969                    return self.init_as(key, newnode);
2970                } else {
2971                    if let Some(rw) = self.attempt_rw_unions(left, right) {
2972                        return self.init_as(key, rw);
2973                    }
2974                }
2975            }
2976            _ => {}
2977        }
2978
2979        if let Some(rw) = self.attempt_rw_union_2(left, right) {
2980            return self.init_as(key, rw);
2981        }
2982        self.init(key)
2983    }
2984
2985    pub(crate) fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
2986        debug_assert!(left_id != NodeId::MISSING);
2987        debug_assert!(right_id != NodeId::MISSING);
2988        if left_id == right_id {
2989            return left_id;
2990        }
2991        if left_id == NodeId::BOT || right_id == NodeId::BOT {
2992            return NodeId::BOT;
2993        }
2994        if left_id == NodeId::TS {
2995            return right_id;
2996        }
2997        if right_id == NodeId::TS {
2998            return left_id;
2999        }
3000        if left_id > right_id {
3001            return self.mk_inter(right_id, left_id);
3002        }
3003        let key = NodeKey {
3004            kind: Kind::Inter,
3005            left: left_id,
3006            right: right_id,
3007            extra: u32::MAX,
3008        };
3009        if let Some(id) = self.key_is_created(&key) {
3010            return *id;
3011        }
3012
3013        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3014            return self.init_as(key, rw);
3015        }
3016
3017        self.init(key)
3018    }
3019
3020    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3021        let node = NodeKey {
3022            kind,
3023            left: NodeId::MISSING,
3024            right: NodeId::MISSING,
3025            extra: u32::MAX,
3026        };
3027        self.init(node)
3028    }
3029
3030    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3031        let star = self.mk_star(body_id);
3032        self.mk_concat(body_id, star)
3033    }
3034    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3035        let opt = self.mk_opt(body_id);
3036        let mut nodes1 = vec![];
3037        for _ in lower..upper {
3038            nodes1.push(opt);
3039        }
3040        for _ in 0..lower {
3041            nodes1.push(body_id);
3042        }
3043        self.mk_concats(nodes1.into_iter())
3044    }
3045    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3046        self.mk_union(NodeId::EPS, body_id)
3047    }
3048
3049    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3050        let key = NodeKey {
3051            kind: Kind::Star,
3052            left: body_id,
3053            right: NodeId::MISSING,
3054            extra: 0,
3055        };
3056        if let Some(id) = self.key_is_created(&key) {
3057            return *id;
3058        }
3059        // _*{500} is still _*
3060        if body_id.is_kind(self, Kind::Star) {
3061            return body_id;
3062        }
3063        self.get_node_id(key)
3064    }
3065
3066    /// it's cheaper to check this once as an edge-case
3067    /// than to compute a 4th nullability bit for every node
3068    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3069        match self.get_kind(node_id) {
3070            Kind::End => Nullability::EMPTYSTRING,
3071            Kind::Begin => Nullability::EMPTYSTRING,
3072            Kind::Pred => Nullability::NEVER,
3073            Kind::Star => Nullability::ALWAYS,
3074            Kind::Inter | Kind::Concat => {
3075                let lnull = self.nullability_emptystring(node_id.left(self));
3076                let rnull = self.nullability_emptystring(node_id.right(self));
3077                lnull.and(rnull) // left = 010, right = 001, left & right = 000
3078            }
3079            Kind::Union => {
3080                let lnull = self.nullability_emptystring(node_id.left(self));
3081                let rnull = self.nullability_emptystring(node_id.right(self));
3082                lnull.or(rnull)
3083            }
3084            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3085            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3086            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3087            Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3088        }
3089    }
3090
3091    #[inline(always)]
3092    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3093        self.get_meta(node_id)
3094            .flags
3095            .nullability()
3096            .has(Nullability::CENTER.or(Nullability::END))
3097    }
3098
3099    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3100        self.get_only_nullability(node_id)
3101    }
3102
3103    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3104        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3105    }
3106
3107    pub fn pp(&self, node_id: NodeId) -> String {
3108        let mut s = String::new();
3109        self.ppw(&mut s, node_id).unwrap();
3110        s
3111    }
3112
3113    #[allow(dead_code)]
3114    pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
3115        let nu = self.get_nulls_id(node_id);
3116        let nr = self.mb.nb.get_set_ref(nu);
3117        let s1 = format!("{:?}", nr);
3118        s1
3119    }
3120
3121    #[allow(dead_code)]
3122    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3123        match self.get_tregex(term_id) {
3124            TRegex::Leaf(node_id) => {
3125                let mut s = String::new();
3126                self.ppw(&mut s, *node_id).unwrap();
3127                s
3128            }
3129            TRegex::ITE(cond, then_id, else_id) => {
3130                format!(
3131                    "ITE({},{},{})",
3132                    self.solver_ref().pp(*cond),
3133                    self.ppt(*then_id),
3134                    self.ppt(*else_id)
3135                )
3136            }
3137        }
3138    }
3139
3140    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3141        if cfg!(feature = "graphviz") {
3142            match node_id {
3143                NodeId::MISSING => return write!(s, "MISSING"),
3144                NodeId::BOT => return write!(s, "⊥"),
3145                NodeId::TS => return write!(s, "_*"),
3146                NodeId::TOP => return write!(s, "_"),
3147                NodeId::EPS => return write!(s, "ε"),
3148                _ => {}
3149            }
3150        }
3151
3152        match node_id {
3153            NodeId::MISSING => return write!(s, "MISSING"),
3154            NodeId::BOT => return write!(s, "⊥"),
3155            NodeId::TS => return write!(s, "_*"),
3156            NodeId::TOP => return write!(s, "_"),
3157            NodeId::EPS => return write!(s, ""),
3158            _ => {}
3159        }
3160
3161        match self.get_kind(node_id) {
3162            Kind::End => return write!(s, r"\z"),
3163            Kind::Begin => return write!(s, r"\A"),
3164            Kind::Pred => {
3165                let psi = node_id.pred_tset(self);
3166                if psi == TSetId::EMPTY {
3167                    return write!(s, r"⊥");
3168                } else if psi == TSetId::FULL {
3169                    return write!(s, r"_");
3170                } else {
3171                    return write!(s, "{}", self.solver_ref().pp(psi));
3172                }
3173            }
3174            Kind::Inter => {
3175                write!(s, "(")?;
3176                self.ppw(s, node_id.left(self))?;
3177                write!(s, "&")?;
3178                let mut curr = node_id.right(self);
3179                while self.get_kind(curr) == Kind::Inter {
3180                    let n = curr.left(self);
3181                    self.ppw(s, n)?;
3182                    write!(s, "&")?;
3183                    curr = curr.right(self);
3184                }
3185                self.ppw(s, curr)?;
3186                write!(s, ")")
3187            }
3188            Kind::Union => {
3189                let left = node_id.left(self);
3190                let right = node_id.right(self);
3191                write!(s, "(")?;
3192                self.ppw(s, left)?;
3193                write!(s, "|")?;
3194                let mut curr = right;
3195                while self.get_kind(curr) == Kind::Union {
3196                    let n = curr.left(self);
3197                    self.ppw(s, n)?;
3198                    write!(s, "|")?;
3199                    curr = curr.right(self);
3200                }
3201                self.ppw(s, curr)?;
3202                write!(s, ")")
3203            }
3204            Kind::Concat => {
3205                let left = node_id.left(self);
3206                let right = node_id.right(self);
3207                if right.is_star(self) && right.left(self) == left {
3208                    self.ppw(s, left)?;
3209                    write!(s, "+")?;
3210                    return Ok(());
3211                }
3212                if right.is_concat(self) {
3213                    let rl = right.left(self);
3214                    if rl.is_star(self) && rl.left(self) == left {
3215                        self.ppw(s, left)?;
3216                        write!(s, "+")?;
3217                        return self.ppw(s, right.right(self));
3218                    }
3219                }
3220                if right.is_concat(self) && right.left(self) == left {
3221                    let mut num = 1;
3222                    let mut right = right;
3223                    while right.is_concat(self) && right.left(self) == left {
3224                        num = num + 1;
3225                        right = right.right(self);
3226                    }
3227                    // (|X){n} followed by X{m} -> X{m,m+n}
3228                    if let Some(inner) = left.is_opt_v(self) {
3229                        let mut inner_count = 0;
3230                        let mut right2 = right;
3231                        while right2.is_concat(self) && right2.left(self) == inner {
3232                            inner_count += 1;
3233                            right2 = right2.right(self);
3234                        }
3235                        if right2 == inner {
3236                            inner_count += 1;
3237                            self.ppw(s, inner)?;
3238                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3239                        }
3240                        if inner_count > 0 {
3241                            self.ppw(s, inner)?;
3242                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3243                            return self.ppw(s, right2);
3244                        }
3245                    }
3246                    self.ppw(s, left)?;
3247                    if right == left {
3248                        num = num + 1;
3249                        return write!(s, "{{{}}}", num);
3250                    }
3251                    if num <= 3 && left.is_pred(self) {
3252                        for _ in 1..num {
3253                            self.ppw(s, left)?;
3254                        }
3255                        return self.ppw(s, right);
3256                    } else {
3257                        write!(s, "{{{}}}", num)?;
3258                        return self.ppw(s, right);
3259                    }
3260                }
3261                self.ppw(s, left)?;
3262                self.ppw(s, right)
3263            }
3264            Kind::Star => {
3265                let left = node_id.left(self);
3266                let leftkind = self.get_kind(left);
3267                match leftkind {
3268                    Kind::Concat | Kind::Star | Kind::Compl => {
3269                        write!(s, "(")?;
3270                        self.ppw(s, left)?;
3271                        write!(s, ")")?;
3272                    }
3273                    _ => {
3274                        self.ppw(s, left)?;
3275                    }
3276                };
3277                write!(s, "*")
3278            }
3279            Kind::Compl => {
3280                write!(s, "~(")?;
3281                self.ppw(s, node_id.left(self))?;
3282                write!(s, ")")
3283            }
3284            Kind::Lookbehind => {
3285                let lbleft = self.get_lookbehind_prev(node_id);
3286                let lbinner = self.get_lookbehind_inner(node_id);
3287                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3288                if lbleft != NodeId::MISSING {
3289                    write!(s, "「")?;
3290                    self.ppw(s, lbleft)?;
3291                    write!(s, "」")?;
3292                }
3293
3294                write!(s, "(?<=")?;
3295                self.ppw(s, lbinner)?;
3296                write!(s, ")")
3297            }
3298            Kind::Lookahead => {
3299                let inner = self.get_lookahead_inner(node_id);
3300                write!(s, "(?=")?;
3301                self.ppw(s, inner)?;
3302                write!(s, ")")?;
3303                if self.get_lookahead_rel(node_id) != 0 {
3304                    write!(s, "{{")?;
3305                    let rel = self.get_lookahead_rel(node_id);
3306                    if rel == u32::MAX {
3307                        write!(s, "∅")?;
3308                    } else {
3309                        write!(s, "{}", rel)?;
3310                    }
3311                    write!(s, "}}")?;
3312                }
3313                if node_id.right(self) == NodeId::MISSING {
3314                    Ok(())
3315                } else {
3316                    write!(s, "『")?;
3317                    self.ppw(s, node_id.right(self))?;
3318                    write!(s, "』")
3319                }
3320            }
3321            Kind::Counted => {
3322                let body = node_id.left(self);
3323                let packed = self.get_extra(node_id);
3324                let step = packed & 0xFFFF;
3325                let best = packed >> 16;
3326                write!(s, "#(")?;
3327                self.ppw(s, body)?;
3328                write!(s, ")s{}b{}", step, best)
3329            }
3330        }
3331    }
3332
3333    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3334        self.mk_concat(node, NodeId::TS)
3335    }
3336
3337    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3338        let node_ts = self.mk_concat(node, NodeId::TS);
3339        self.mk_compl(node_ts)
3340    }
3341
3342
3343    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3344        let notset = self.solver().not_id(set);
3345        self.mk_pred(notset)
3346    }
3347
3348    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3349        let set_id = self.solver().u8_to_set_id(char as u8);
3350        self.mk_pred(set_id)
3351    }
3352
3353    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3354        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3355        self.mk_pred(rangeset)
3356    }
3357
3358    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3359        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3360        for &(lo, hi) in &ranges[1..] {
3361            let r = self.mk_range_u8(lo, hi);
3362            node = self.mk_union(node, r);
3363        }
3364        node
3365    }
3366
3367    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3368        let mut prefix = Vec::new();
3369        let mut curr = node;
3370        loop {
3371            if curr == NodeId::EPS {
3372                let full = !prefix.is_empty();
3373                return (prefix, full);
3374            }
3375            if curr == NodeId::BOT {
3376                break;
3377            }
3378            if self.get_kind(curr) == Kind::Pred {
3379                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3380                    Some(byte) => {
3381                        prefix.push(byte);
3382                        return (prefix, true);
3383                    }
3384                    None => break, // multi-byte pred: pattern not fully consumed
3385                }
3386            }
3387            if self.get_kind(curr) != Kind::Concat {
3388                break;
3389            }
3390            let left = curr.left(self);
3391            if self.get_kind(left) != Kind::Pred {
3392                break;
3393            }
3394            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3395                Some(byte) => prefix.push(byte),
3396                None => break,
3397            }
3398            curr = curr.right(self);
3399        }
3400        (prefix, false)
3401    }
3402
3403    #[allow(dead_code)]
3404    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3405        let mut result = NodeId::EPS;
3406        for byte in raw_str.into_iter().rev() {
3407            let node = self.mk_u8(*byte);
3408            result = self.mk_concat(node, result);
3409        }
3410        result
3411    }
3412
3413    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3414        let mut result = NodeId::EPS;
3415        for byte in raw_str.bytes().rev() {
3416            let node = self.mk_u8(byte);
3417            result = self.mk_concat(node, result);
3418        }
3419        result
3420    }
3421
3422    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3423        let mut sorted: Vec<NodeId> = nodes.collect();
3424        if sorted.len() <= 1 {
3425            return sorted.pop().unwrap_or(NodeId::BOT);
3426        }
3427        sorted.sort();
3428        sorted.dedup();
3429        sorted.retain(|&x| x != NodeId::BOT);
3430        if sorted.is_empty() {
3431            return NodeId::BOT;
3432        }
3433        if sorted.len() > 16 {
3434            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3435            let mut non_concat: Vec<NodeId> = Vec::new();
3436            for &n in &sorted {
3437                if self.get_kind(n) == Kind::Concat {
3438                    by_head.entry(self.get_left(n)).or_default().push(n);
3439                } else {
3440                    non_concat.push(n);
3441                }
3442            }
3443            // absorb non-concat nodes whose id matches a by_head key:
3444            // x | x·t1 | x·t2 → x·(ε|t1|t2)
3445            // without this, factoring reproduces identical groups and
3446            // recurses infinitely.
3447            let mut absorbed: Vec<NodeId> = Vec::new();
3448            for &n in &non_concat {
3449                if by_head.contains_key(&n) {
3450                    absorbed.push(n);
3451                }
3452            }
3453            if !absorbed.is_empty() {
3454                non_concat.retain(|n| !absorbed.contains(n));
3455            }
3456            if by_head.len() < sorted.len() {
3457                let mut groups: Vec<NodeId> = non_concat;
3458                for (head, tails) in by_head {
3459                    let mut tail_nodes: Vec<NodeId> =
3460                        tails.iter().map(|&n| self.get_right(n)).collect();
3461                    if absorbed.contains(&head) {
3462                        tail_nodes.push(NodeId::EPS);
3463                    }
3464                    let tail_union = self.mk_unions(tail_nodes.into_iter());
3465                    let factored = self.mk_concat(head, tail_union);
3466                    groups.push(factored);
3467                }
3468                groups.sort();
3469                groups.dedup();
3470                return self.mk_unions_balanced(&groups);
3471            }
3472        }
3473        self.mk_unions_balanced(&sorted)
3474    }
3475
3476    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3477        match nodes.len() {
3478            0 => NodeId::BOT,
3479            1 => nodes[0],
3480            n => {
3481                let mid = n / 2;
3482                let left = self.mk_unions_balanced(&nodes[..mid]);
3483                let right = self.mk_unions_balanced(&nodes[mid..]);
3484                self.mk_union(left, right)
3485            }
3486        }
3487    }
3488
3489    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3490        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3491    }
3492
3493    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3494        nodes
3495            .rev()
3496            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3497    }
3498}
3499
3500impl RegexBuilder {
3501    #[allow(dead_code)]
3502    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3503        match self.get_tregex(term_id).clone() {
3504            TRegex::Leaf(node_id) => {
3505                if NodeId::BOT == node_id {
3506                    vec![]
3507                } else {
3508                    vec![node_id]
3509                }
3510            }
3511            TRegex::ITE(_, then_id, else_id) => {
3512                let mut then_nodes = self.extract_sat(then_id);
3513                let mut else_nodes = self.extract_sat(else_id);
3514                then_nodes.append(&mut else_nodes);
3515                then_nodes
3516            }
3517        }
3518    }
3519
3520    pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3521        debug_assert!(self.get_kind(start) == Kind::Union);
3522        let mut curr = start;
3523        while self.get_kind(curr) == Kind::Union {
3524            f(curr.left(self));
3525            curr = curr.right(self);
3526        }
3527        f(curr);
3528    }
3529
3530    pub(crate) fn iter_unions_b(
3531        &mut self,
3532        curr: NodeId,
3533        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3534    ) {
3535        let mut curr = curr;
3536        while self.get_kind(curr) == Kind::Union {
3537            f(self, curr.left(self));
3538            curr = curr.right(self);
3539        }
3540        f(self, curr);
3541    }
3542
3543    pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3544        if !self.contains_look(node_id) {
3545            return Some(node_id);
3546        }
3547        match self.get_kind(node_id) {
3548            Kind::Pred | Kind::Begin | Kind::End => return Some(node_id),
3549            Kind::Concat => {
3550                let left = node_id.left(self);
3551                let right = node_id.right(self);
3552                let elim_l = self.try_elim_lookarounds(left)?;
3553                let elim_r = self.try_elim_lookarounds(right)?;
3554                let rw = self.mk_concat(elim_l, elim_r);
3555                Some(rw)
3556            }
3557            Kind::Union => {
3558                let left = node_id.left(self);
3559                let right = node_id.right(self);
3560                let elim_l = self.try_elim_lookarounds(left)?;
3561                let elim_r = self.try_elim_lookarounds(right)?;
3562                let rw = self.mk_union(elim_l, elim_r);
3563                Some(rw)
3564            }
3565
3566            Kind::Star => {
3567                let body = node_id.left(self);
3568                let elim_l = self.try_elim_lookarounds(body)?;
3569                Some(self.mk_star(elim_l))
3570            }
3571            Kind::Compl => {
3572                let left = node_id.left(self);
3573                let elim_l = self.try_elim_lookarounds(left)?;
3574                Some(self.mk_compl(elim_l))
3575            }
3576            Kind::Lookahead => {
3577                let rel = self.get_lookahead_rel(node_id);
3578                if rel != 0 {
3579                    return None;
3580                }
3581                let lbody = self.get_lookahead_inner(node_id);
3582                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3583                let elim_l = self.try_elim_lookarounds(lbody)?;
3584                let elim_r = self.try_elim_lookarounds(ltail)?;
3585                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3586                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3587                let rw = self.mk_inter(lbody_ts, ltail_ts);
3588                Some(rw)
3589            }
3590            Kind::Lookbehind => {
3591                let linner = self.get_lookbehind_inner(node_id);
3592                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3593                let elim_l = self.try_elim_lookarounds(linner)?;
3594                let elim_r = self.try_elim_lookarounds(lprev)?;
3595                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3596                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3597                let rw = self.mk_inter(lbody_ts, ltail_ts);
3598                Some(rw)
3599            }
3600            Kind::Inter => {
3601                let left = node_id.left(self);
3602                let right = node_id.right(self);
3603                let elim_l = self.try_elim_lookarounds(left)?;
3604                let elim_r = self.try_elim_lookarounds(right)?;
3605                let rw = self.mk_inter(elim_l, elim_r);
3606                Some(rw)
3607            }
3608            Kind::Counted => None,
3609        }
3610    }
3611
3612    /// R & _+ is a safe overapproximation of R that's nonempty
3613    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3614        if self.nullability(node) == Nullability::NEVER {
3615            node
3616        } else {
3617            self.mk_inter(NodeId::TOPPLUS, node)
3618        }
3619    }
3620
3621    fn iter_find_stack(
3622        &self,
3623        stack: &mut Vec<TRegexId>,
3624        mut f: impl FnMut(NodeId) -> bool,
3625    ) -> bool {
3626        loop {
3627            match stack.pop() {
3628                None => return false,
3629                Some(curr) => match self.get_tregex(curr) {
3630                    TRegex::Leaf(n) => {
3631                        let mut curr = *n;
3632                        while curr != NodeId::BOT {
3633                            match self.get_kind(curr) {
3634                                Kind::Union => {
3635                                    if f(curr.left(self)) {
3636                                        return true;
3637                                    }
3638                                    curr = curr.right(self);
3639                                }
3640                                _ => {
3641                                    if f(*n) {
3642                                        return true;
3643                                    }
3644                                    curr = NodeId::BOT;
3645                                }
3646                            }
3647                        }
3648                    }
3649                    TRegex::ITE(_, then_id, else_id) => {
3650                        if *else_id != TRegexId::BOT {
3651                            stack.push(*else_id);
3652                        }
3653                        stack.push(*then_id);
3654                    }
3655                },
3656            }
3657        }
3658    }
3659
3660    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3661        if node == NodeId::BOT {
3662            return Some(true);
3663        }
3664        if self.nullability(node) != Nullability::NEVER {
3665            return Some(false);
3666        }
3667        if let Some(cached) = self.cache_empty.get(&node) {
3668            if cached.is_checked() {
3669                return Some(cached.is_empty());
3670            }
3671        }
3672        let node = if !self.contains_look(node) {
3673            node
3674        } else {
3675            let elim = self.try_elim_lookarounds(node)?;
3676            elim
3677        };
3678        let isempty_flag = self.is_empty_lang_internal(node);
3679
3680        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3681    }
3682
3683    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3684        // without inter, no need to check
3685        if !self.get_meta_flags(initial_node).contains_inter() {
3686            return Ok(NodeFlags::ZERO);
3687        }
3688
3689        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3690        let mut todo: VecDeque<NodeId> = VecDeque::new();
3691        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3692        let mut stack = Vec::new();
3693        stack.push(begin_der);
3694        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3695            visited.insert(node, initial_node);
3696            let nullability = self.nullability(node);
3697            if nullability != Nullability::NEVER {
3698                true
3699            } else {
3700                todo.push_back(node);
3701                false
3702            }
3703        });
3704        if found_nullable_right_away {
3705            return Ok(NodeFlags::ZERO);
3706        }
3707
3708        todo.push_back(initial_node);
3709        let isempty_flag: NodeFlags;
3710        let mut found_node = NodeId::BOT;
3711
3712        loop {
3713            match todo.pop_front() {
3714                None => {
3715                    isempty_flag = NodeFlags::IS_EMPTY;
3716                    break;
3717                }
3718                Some(outer) => {
3719                    if let Some(cached) = self.cache_empty.get(&outer) {
3720                        if cached.is_checked() {
3721                            if cached.is_empty() {
3722                                continue;
3723                            } else {
3724                                return Ok(NodeFlags::ZERO);
3725                            }
3726                        }
3727                    }
3728
3729                    let derivative = self.der(outer, Nullability::CENTER)?;
3730
3731                    stack.push(derivative);
3732
3733                    let found_null = self.iter_find_stack(&mut stack, |node| {
3734                        if visited.contains_key(&node) {
3735                            false
3736                        } else {
3737                            found_node = node;
3738                            if !self.get_meta_flags(node).contains_inter() {
3739                                return true;
3740                            } else {
3741                                visited.insert(node, outer);
3742                                todo.push_front(node);
3743                                self.any_nonbegin_nullable(node)
3744                            }
3745                        }
3746                    });
3747                    if found_null {
3748                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3749                        isempty_flag = NodeFlags::ZERO;
3750                        break;
3751                    }
3752                }
3753            }
3754        }
3755
3756        self.cache_empty.insert(
3757            initial_node,
3758            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3759        );
3760        Ok(isempty_flag)
3761    }
3762
3763    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
3764    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3765        if larger_lang == smaller_lang {
3766            return Some(true);
3767        }
3768
3769        // assess initial nullability
3770        if self
3771            .nullability(larger_lang)
3772            .not()
3773            .and(self.nullability(smaller_lang))
3774            != Nullability::NEVER
3775        {
3776            return Some(false);
3777        }
3778
3779        // check language nullability
3780        // if (B &~ A) ≡ ⊥ then B ⊆ A
3781        // this means  L(B) \ L(A) = {}
3782        // eg. (a &~ .*) = ⊥ means .* subsumes a
3783        let (smaller_lang, larger_lang) =
3784            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3785                let wrap = |b: &mut Self, n: NodeId| {
3786                    let tmp = b.mk_concat(n, NodeId::TS);
3787                    b.mk_concat(NodeId::TS, tmp)
3788                };
3789                (wrap(self, smaller_lang), wrap(self, larger_lang))
3790            } else {
3791                (smaller_lang, larger_lang)
3792            };
3793
3794        let nota = self.mk_compl(larger_lang);
3795        let diff = self.mk_inter(smaller_lang, nota);
3796        self.is_empty_lang(diff)
3797    }
3798}