Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::UnicodeClassCache;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23    AnchorLimit,
24    StateSpaceExplosion,
25    UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32            AlgebraError::StateSpaceExplosion => {
33                write!(f, "too many states, likely infinite state space")
34            }
35            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36        }
37    }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43    pub const MISSING: u32 = 0;
44    pub const BOT: u32 = 1;
45    pub const EPS: u32 = 2;
46    pub const TOP: u32 = 3;
47    pub const TOPSTAR: u32 = 4;
48    pub const TOPPLUS: u32 = 5;
49    pub const BEGIN: u32 = 6;
50    pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62    pub const MISSING: NodeId = NodeId(id::MISSING);
63    pub const BOT: NodeId = NodeId(id::BOT);
64    pub const EPS: NodeId = NodeId(id::EPS);
65    pub const TOP: NodeId = NodeId(id::TOP);
66    pub const TS: NodeId = NodeId(id::TOPSTAR);
67    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68    pub const BEGIN: NodeId = NodeId(id::BEGIN);
69    pub const END: NodeId = NodeId(id::END);
70
71    #[inline]
72    pub fn as_u32(self) -> u32 {
73        self.0
74    }
75
76    #[inline]
77    pub fn from_u32(v: u32) -> NodeId {
78        NodeId(v)
79    }
80}
81impl Debug for NodeId {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let num = &self.0;
84        f.write_str(format!("{num}").as_str())
85    }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91    pub const MISSING: TRegexId = TRegexId(id::MISSING);
92    pub const EPS: TRegexId = TRegexId(id::EPS);
93    pub const BOT: TRegexId = TRegexId(id::BOT);
94    pub const TOP: TRegexId = TRegexId(id::TOP);
95    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
102
103    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106    pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110    #[inline]
111    pub(crate) fn nullability(self) -> Nullability {
112        Nullability(self.0 & Self::NULL_MASK)
113    }
114
115    #[inline]
116    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118    }
119
120    #[inline]
121    pub(crate) fn has(self, flag: MetaFlags) -> bool {
122        self.0 & flag.0 != 0
123    }
124    #[inline]
125    const fn and(self, other: MetaFlags) -> MetaFlags {
126        MetaFlags(self.0 & other.0)
127    }
128    #[inline]
129    const fn or(self, other: MetaFlags) -> MetaFlags {
130        MetaFlags(self.0 | other.0)
131    }
132
133    pub(crate) fn contains_lookaround(self) -> bool {
134        self.has(MetaFlags::CONTAINS_LOOKAROUND)
135    }
136    pub(crate) fn contains_inter(self) -> bool {
137        self.has(MetaFlags::CONTAINS_INTER)
138    }
139
140    pub(crate) fn all_contains_flags(self) -> MetaFlags {
141        self.and(
142            MetaFlags::CONTAINS_LOOKAROUND
143                .or(MetaFlags::CONTAINS_ANCHORS)
144                .or(MetaFlags::CONTAINS_INTER),
145        )
146    }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152    #[default]
153    Pred,
154    Star,
155    Begin,
156    End,
157    Concat,
158    Union,
159    Compl,
160    Lookbehind,
161    Lookahead,
162    Inter,
163    Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168    flags: MetaFlags,
169    nulls: NullsId,
170}
171
172struct MetadataBuilder {
173    num_created: u32,
174    solver: Solver,
175    nb: NullsBuilder,
176    index: FxHashMap<Metadata, MetadataId>,
177    pub array: Vec<Metadata>,
178}
179
180mod helpers {
181    pub(crate) fn incr_rel(n1: u32) -> u32 {
182        match n1.overflowing_add(1) {
183            (_, true) => u32::MAX,
184            (res, false) => res,
185        }
186    }
187}
188
189impl MetadataBuilder {
190    fn new() -> MetadataBuilder {
191        Self {
192            index: FxHashMap::default(),
193            array: vec![Metadata {
194                flags: MetaFlags::ZERO,
195                nulls: NullsId::EMPTY,
196            }],
197            solver: Solver::new(),
198            num_created: 0,
199            nb: NullsBuilder::new(),
200        }
201    }
202
203    fn init(&mut self, inst: Metadata) -> MetadataId {
204        self.num_created += 1;
205        let new_id = MetadataId(self.num_created);
206        self.index.insert(inst.clone(), new_id);
207        self.array.push(inst);
208        new_id
209    }
210
211    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212        match self.index.get(&inst) {
213            Some(&id) => id,
214            None => self.init(inst),
215        }
216    }
217
218    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219        &self.array[inst.0 as usize]
220    }
221
222    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223        setflags.all_contains_flags()
224    }
225
226    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227        let left = &self.array[body.0 as usize].flags;
228        let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229        // BOT* = EPS (empty string only), not infinite
230        let inf = if body_id == NodeId::BOT {
231            MetaFlags::ZERO
232        } else {
233            MetaFlags::INFINITE_LENGTH
234        };
235        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236    }
237
238    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239        let left = &self.array[left_id.0 as usize].flags;
240        let null = left.nullability().not().and(Nullability::ALWAYS);
241        let contains = self.get_contains(*left);
242        MetaFlags::with_nullability(
243            null,
244            contains
245                .or(MetaFlags::INFINITE_LENGTH)
246                .or(MetaFlags::CONTAINS_COMPL),
247        )
248    }
249
250    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
251        let left = &self.array[left_id.0 as usize].flags;
252        let right = &self.array[right_id.0 as usize].flags;
253        let null = left.nullability().and(right.nullability());
254        let contains = self.get_contains(left.or(*right));
255        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
256        MetaFlags::with_nullability(null, contains.or(len))
257    }
258
259    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260        let left = &self.array[left_id.0 as usize].flags;
261        let right = &self.array[right_id.0 as usize].flags;
262        let null = left.nullability().and(right.nullability());
263        let contains = self
264            .get_contains(left.or(*right))
265            .or(MetaFlags::CONTAINS_INTER);
266        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
267        MetaFlags::with_nullability(null, contains.or(len))
268    }
269
270    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
271        let left = &self.array[left_id.0 as usize].flags;
272        let right = &self.array[right_id.0 as usize].flags;
273        let null = left.nullability().or(right.nullability());
274        let contains = self.get_contains(left.or(*right));
275        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
276        MetaFlags::with_nullability(null, contains.or(len))
277    }
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
281pub struct NodeKey {
282    pub(crate) kind: Kind,
283    pub(crate) left: NodeId,
284    pub(crate) right: NodeId,
285    pub(crate) extra: u32,
286}
287
288#[derive(Eq, Hash, PartialEq, Clone, Debug)]
289pub enum TRegex<TSet> {
290    Leaf(NodeId),
291    ITE(TSet, TRegexId, TRegexId),
292}
293
294#[derive(Eq, Hash, PartialEq, Clone, Copy)]
295pub(crate) struct NodeFlags(u8);
296impl NodeFlags {
297    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
298    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
299    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
300
301    fn is_checked(self) -> bool {
302        self.0 >= NodeFlags::IS_CHECKED.0
303    }
304    fn is_empty(self) -> bool {
305        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
306    }
307}
308
309#[derive(Eq, Hash, PartialEq, Clone, Copy)]
310pub(crate) struct BuilderFlags(u8);
311impl BuilderFlags {
312    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
313    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
314}
315
316pub struct RegexBuilder {
317    mb: MetadataBuilder,
318    temp_vec: Vec<NodeId>,
319    num_created: u32,
320    index: FxHashMap<NodeKey, NodeId>,
321    array: Vec<NodeKey>,
322    metadata: Vec<MetadataId>,
323    reversed: Vec<NodeId>,
324    cache_empty: FxHashMap<NodeId, NodeFlags>,
325    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
326    tr_array: Vec<TRegex<TSetId>>,
327    tr_der_center: Vec<TRegexId>,
328    tr_der_begin: Vec<TRegexId>,
329    flags: BuilderFlags,
330    /// maximum lookahead context distance before returning `AnchorLimit`.
331    pub lookahead_context_max: u32,
332    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
333    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
334}
335
336macro_rules! iter_inter {
337    ($compiler:ident, $start:ident, $expression:expr) => {
338        let mut curr = $start;
339        while $compiler.get_kind(curr) == Kind::Inter {
340            let left = $compiler.get_left(curr);
341            $expression(left);
342            curr = $compiler.get_right(curr);
343        }
344        $expression(curr);
345    };
346}
347
348impl NodeId {
349    fn is_missing(&self) -> bool {
350        *self == NodeId::MISSING
351    }
352    #[inline]
353    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
354        b.get_flags_contains(self)
355    }
356    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
357        if self == tail {
358            true
359        } else if self.is_kind(b, Kind::Concat) {
360            self.right(b).has_concat_tail(b, tail)
361        } else {
362            false
363        }
364    }
365    fn missing_to_eps(&self) -> NodeId {
366        if *self == NodeId::MISSING {
367            NodeId::EPS
368        } else {
369            *self
370        }
371    }
372
373    #[inline]
374    fn kind(self, b: &RegexBuilder) -> Kind {
375        b.get_kind(self)
376    }
377    #[inline]
378    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
379        b.get_kind(self) == k
380    }
381    #[inline]
382    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
383        b.nullability(self) == Nullability::NEVER
384    }
385    #[inline]
386    fn nullability(self, b: &RegexBuilder) -> Nullability {
387        b.nullability(self)
388    }
389
390    #[inline]
391    fn is_center_nullable(self, b: &RegexBuilder) -> bool {
392        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
393    }
394    #[inline]
395    pub fn left(self, b: &RegexBuilder) -> NodeId {
396        b.get_left(self)
397    }
398
399    #[inline]
400    pub fn right(self, b: &RegexBuilder) -> NodeId {
401        b.get_right(self)
402    }
403
404    #[inline]
405    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
406        b.der(self, mask)
407    }
408
409    #[inline]
410    fn extra(self, b: &RegexBuilder) -> u32 {
411        b.get_extra(self)
412    }
413
414    #[inline]
415    fn is_pred(self, b: &RegexBuilder) -> bool {
416        b.get_kind(self) == Kind::Pred
417    }
418    #[inline]
419    fn is_lookahead(self, b: &RegexBuilder) -> bool {
420        b.get_kind(self) == Kind::Lookahead
421    }
422    #[inline]
423    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
424        debug_assert!(self.is_pred(b));
425        TSetId(b.get_extra(self))
426    }
427    #[inline]
428    fn is_star(self, b: &RegexBuilder) -> bool {
429        if NodeId::EPS == self {
430            return false;
431        }
432        b.get_kind(self) == Kind::Star
433    }
434
435    #[inline]
436    pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
437        b.get_kind(self) == Kind::Inter
438    }
439
440    #[inline]
441    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
442        if self.is_concat(b) {
443            let r = self.right(b);
444            return r.is_star(b) && r.left(b) == self.left(b);
445        }
446        false
447    }
448
449    #[inline]
450    fn is_concat(self, b: &RegexBuilder) -> bool {
451        b.get_kind(self) == Kind::Concat
452    }
453
454    #[inline]
455    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
456        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
457            Some(b.get_right(self))
458        } else {
459            None
460        }
461    }
462
463    #[inline]
464    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
465        if b.get_kind(self) == Kind::Concat {
466            let left = self.left(b);
467            let right = self.right(b);
468            if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
469                return right == NodeId::END;
470            }
471        }
472        false
473    }
474
475    #[inline]
476    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
477        if NodeId::EPS == self {
478            return None;
479        }
480        if self.is_star(b) && self.left(b).is_pred(b) {
481            Some(self.left(b))
482        } else {
483            None
484        }
485    }
486
487    #[inline]
488    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
489        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
490            let middle = self.right(b);
491            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
492                return Some(middle.left(b));
493            }
494        }
495        None
496    }
497
498    #[inline]
499    pub(crate) fn iter_union(
500        self,
501        b: &mut RegexBuilder,
502        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
503    ) {
504        let mut curr: NodeId = self;
505        while curr.kind(b) == Kind::Union {
506            f(b, curr.left(b));
507            curr = curr.right(b);
508        }
509        f(b, curr);
510    }
511
512    #[inline]
513    pub(crate) fn iter_union_while(
514        self,
515        b: &mut RegexBuilder,
516        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
517    ) {
518        let mut curr: NodeId = self;
519        let mut continue_loop = true;
520        while continue_loop && curr.kind(b) == Kind::Union {
521            continue_loop = f(b, curr.left(b));
522            curr = curr.right(b);
523        }
524        if continue_loop {
525            f(b, curr);
526        }
527    }
528}
529
530impl Default for RegexBuilder {
531    fn default() -> Self {
532        Self::new()
533    }
534}
535
536impl RegexBuilder {
537    pub fn new() -> RegexBuilder {
538        let mut inst = Self {
539            mb: MetadataBuilder::new(),
540            array: Vec::new(),
541            index: FxHashMap::default(),
542            cache_empty: FxHashMap::default(),
543            tr_array: Vec::new(),
544            tr_cache: FxHashMap::default(),
545            flags: BuilderFlags::ZERO,
546            lookahead_context_max: 800,
547            num_created: 0,
548            metadata: Vec::new(),
549            reversed: Vec::new(),
550            tr_der_center: Vec::new(),
551            tr_der_begin: Vec::new(),
552            temp_vec: Vec::new(),
553            mk_binary_memo: FxHashMap::default(),
554            clean_cache: FxHashMap::default(),
555        };
556        inst.array.push(NodeKey::default());
557        inst.mk_pred(TSetId::EMPTY);
558        inst.mk_star(NodeId::BOT);
559        inst.mk_pred(TSetId::FULL);
560        inst.mk_star(NodeId::TOP);
561        let top_plus_id = inst.mk_plus(NodeId::TOP);
562        inst.mk_unset(Kind::Begin);
563        inst.mk_unset(Kind::End);
564        debug_assert!(top_plus_id == NodeId::TOPPLUS);
565
566        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
567        inst.mk_leaf(NodeId::BOT);
568        inst.mk_leaf(NodeId::EPS);
569        inst.mk_leaf(NodeId::TOP);
570        inst.mk_leaf(NodeId::TS);
571
572        inst.flags = BuilderFlags::SUBSUME;
573        inst
574    }
575
576    #[inline]
577    pub fn solver_ref(&self) -> &Solver {
578        &self.mb.solver
579    }
580
581    #[inline]
582    pub fn solver(&mut self) -> &mut Solver {
583        &mut self.mb.solver
584    }
585
586    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
587        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
588        self.tr_cache.insert(inst.clone(), new_id);
589        self.tr_array.push(inst);
590        new_id
591    }
592
593    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
594        match self.tr_cache.get(&inst) {
595            Some(&id) => id,
596            None => self.tr_init(inst),
597        }
598    }
599
600    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
601        &self.tr_array[inst.0 as usize]
602    }
603
604    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
605        let solv = self.solver();
606        !solv.is_sat_id(cond1, cond2)
607    }
608
609    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
610        let term = TRegex::<TSetId>::Leaf(node_id);
611        self.get_tregex_id(term)
612    }
613
614    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
615        let tmp_inst = TRegex::ITE(cond, _then, _else);
616        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
617            return *cached;
618        }
619        if _then == _else {
620            return _then;
621        }
622        if self.solver().is_full_id(cond) {
623            return _then;
624        }
625        if self.solver().is_empty_id(cond) {
626            return _else;
627        }
628        let _clean_then = match self.tr_array[_then.0 as usize] {
629            TRegex::Leaf(_) => _then,
630            _ => self.clean(cond, _then),
631        };
632        let notcond = self.solver().not_id(cond);
633        let _clean_else = match self.tr_array[_else.0 as usize] {
634            TRegex::Leaf(_) => _else,
635            _ => self.clean(notcond, _else),
636        };
637
638        if _clean_then == _clean_else {
639            return _clean_then;
640        }
641        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
642        match self.get_tregex(_clean_then) {
643            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
644                let _cond_then = *leftcond;
645                let new_then = *_inner_then;
646                let sand = self.solver().and_id(cond, _cond_then);
647                return self.mk_ite(sand, new_then, _clean_else);
648            }
649            _ => {}
650        }
651        // attempt right side flattening:
652        match self.get_tregex(_clean_else) {
653            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
654            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
655                let e2clone = *_e2;
656                let new_cond = self.mb.solver.or_id(cond, *_c2);
657                return self.mk_ite(new_cond, _clean_then, e2clone);
658            }
659            _ => {}
660        }
661
662        if _clean_then == TRegexId::BOT {
663            let flip_cond = self.solver().not_id(cond);
664            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
665            return cleaned_id;
666        }
667
668        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
669    }
670
671    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
672        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
673            return cached;
674        }
675        let result = match *self.get_tregex(tterm) {
676            TRegex::Leaf(_) => tterm,
677            TRegex::ITE(alpha, _then_id, _else_id) => {
678                let notalpha = self.mb.solver.not_id(alpha);
679                if self.mb.solver.unsat_id(beta, alpha) {
680                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
681                    self.clean(beta, _else_id)
682                } else if self.unsat(beta, notalpha) {
683                    // beta ⊆ alpha, so beta ∧ alpha = beta
684                    self.clean(beta, _then_id)
685                } else {
686                    let tc = self.mb.solver.and_id(beta, alpha);
687                    let ec = self.mb.solver.and_id(beta, notalpha);
688                    let new_then = self.clean(tc, _then_id);
689                    let new_else = self.clean(ec, _else_id);
690                    self.mk_ite(alpha, new_then, new_else)
691                }
692            }
693        };
694        self.clean_cache.insert((beta, tterm), result);
695        result
696    }
697
698    fn mk_unary(
699        &mut self,
700        term: TRegexId,
701        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
702    ) -> TRegexId {
703        match self.tr_array[term.0 as usize] {
704            TRegex::Leaf(node_id) => {
705                let applied = apply(self, node_id);
706                self.mk_leaf(applied)
707            }
708            TRegex::ITE(c1, _then, _else) => {
709                let _then1 = self.mk_unary(_then, apply);
710                let _else1 = self.mk_unary(_else, apply);
711                self.mk_ite(c1, _then1, _else1)
712            }
713        }
714    }
715
716    fn mk_binary_result(
717        &mut self,
718        left: TRegexId,
719        right: TRegexId,
720        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
721    ) -> Result<TRegexId, AlgebraError> {
722        match self.tr_array[left.0 as usize] {
723            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
724                TRegex::Leaf(right_node_id) => {
725                    let applied = apply(self, left_node_id, right_node_id)?;
726                    Ok(self.mk_leaf(applied))
727                }
728                TRegex::ITE(c2, _then, _else) => {
729                    let then2 = self.mk_binary_result(left, _then, apply)?;
730                    let else2 = self.mk_binary_result(left, _else, apply)?;
731                    Ok(self.mk_ite(c2, then2, else2))
732                }
733            },
734            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
735                TRegex::Leaf(_) => {
736                    let then2 = self.mk_binary_result(_then1, right, apply)?;
737                    let else2 = self.mk_binary_result(_else1, right, apply)?;
738                    Ok(self.mk_ite(c1, then2, else2))
739                }
740                TRegex::ITE(c2, _then2, _else2) => {
741                    if c1 == c2 {
742                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
743                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
744                        return Ok(self.mk_ite(c1, _then, _else));
745                    }
746                    if c1.0 > c2.0 {
747                        let _then = self.mk_binary_result(_then1, right, apply)?;
748                        let _else = self.mk_binary_result(_else1, right, apply)?;
749                        Ok(self.mk_ite(c1, _then, _else))
750                    } else {
751                        let _then = self.mk_binary_result(left, _then2, apply)?;
752                        let _else = self.mk_binary_result(left, _else2, apply)?;
753                        Ok(self.mk_ite(c2, _then, _else))
754                    }
755                }
756            },
757        }
758    }
759
760    fn mk_binary(
761        &mut self,
762        left: TRegexId,
763        right: TRegexId,
764        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
765    ) -> TRegexId {
766        self.mk_binary_memo.clear();
767        self.mk_binary_inner(left, right, apply)
768    }
769
770    fn mk_binary_inner(
771        &mut self,
772        left: TRegexId,
773        right: TRegexId,
774        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
775    ) -> TRegexId {
776        if left == right {
777            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
778        }
779        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
780            return cached;
781        }
782        let result = match self.tr_array[left.0 as usize] {
783            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
784                TRegex::Leaf(right_node_id) => {
785                    let applied = apply(self, left_node_id, right_node_id);
786                    self.mk_leaf(applied)
787                }
788                TRegex::ITE(c2, _then, _else) => {
789                    let then2 = self.mk_binary_inner(left, _then, apply);
790                    let else2 = self.mk_binary_inner(left, _else, apply);
791                    self.mk_ite(c2, then2, else2)
792                }
793            },
794            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
795                TRegex::Leaf(_) => {
796                    let then2 = self.mk_binary_inner(_then1, right, apply);
797                    let else2 = self.mk_binary_inner(_else1, right, apply);
798                    self.mk_ite(c1, then2, else2)
799                }
800                TRegex::ITE(c2, _then2, _else2) => {
801                    if c1 == c2 {
802                        let _then = self.mk_binary_inner(_then1, _then2, apply);
803                        let _else = self.mk_binary_inner(_else1, _else2, apply);
804                        self.mk_ite(c1, _then, _else)
805                    } else if c1.0 > c2.0 {
806                        let _then = self.mk_binary_inner(_then1, right, apply);
807                        let _else = self.mk_binary_inner(_else1, right, apply);
808                        self.mk_ite(c1, _then, _else)
809                    } else {
810                        let _then = self.mk_binary_inner(left, _then2, apply);
811                        let _else = self.mk_binary_inner(left, _else2, apply);
812                        self.mk_ite(c2, _then, _else)
813                    }
814                }
815            },
816        };
817        self.mk_binary_memo.insert((left, right), result);
818        result
819    }
820
821    pub fn get_nulls(
822        &mut self,
823        pending_rel: u32,
824        mask: Nullability,
825        acc: &mut BTreeSet<NullState>,
826        node_id: NodeId,
827    ) {
828        debug_assert!(node_id != NodeId::MISSING);
829        if !self.is_nullable(node_id, mask) {
830            return;
831        }
832        match self.get_kind(node_id) {
833            Kind::Pred => {}
834            Kind::End => {
835                if mask.has(Nullability::END) {
836                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
837                }
838            }
839            Kind::Begin => {
840                if mask.has(Nullability::BEGIN) {
841                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
842                }
843            }
844            Kind::Concat => {
845                let new_mask = self.nullability(node_id).and(mask);
846                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
847                if self.is_nullable(node_id.left(self), mask) {
848                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
849                }
850            }
851            Kind::Union => {
852                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
853                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
854            }
855            Kind::Inter => {
856                let new_mask = self.nullability(node_id).and(mask);
857                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
858                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
859            }
860            Kind::Star => {
861                acc.insert(NullState::new(mask, pending_rel));
862                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
863            }
864            Kind::Compl => {
865                if !self.is_nullable(node_id.left(self), mask) {
866                    acc.insert(NullState::new(mask, 0));
867                }
868            }
869            Kind::Lookbehind => {
870                let new_mask = self.nullability(node_id).and(mask);
871                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
872                if node_id.right(self) != NodeId::MISSING {
873                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
874                }
875            }
876            Kind::Lookahead => {
877                let la_inner = self.get_lookahead_inner(node_id);
878                if self.is_nullable(la_inner, mask) {
879                    let rel = self.get_lookahead_rel(node_id);
880                    if rel != u32::MAX {
881                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
882                    }
883                    // tail only contributes when body is satisfied
884                    let la_tail = self.get_lookahead_tail(node_id);
885                    if la_tail != NodeId::MISSING {
886                        self.get_nulls(pending_rel, mask, acc, la_tail);
887                    }
888                }
889            }
890            Kind::Counted => {
891                let packed = self.get_extra(node_id);
892                let best = packed >> 16;
893                if best > 0 {
894                    acc.insert(NullState::new(mask, pending_rel + best));
895                }
896            }
897        }
898    }
899
900    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
901        self.get_meta_flags(node_id)
902            .has(MetaFlags::CONTAINS_LOOKAROUND)
903    }
904
905    /// whether node contains `^`, `$`, `\A`, `\z` anchors.
906    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
907        self.get_meta_flags(node_id)
908            .has(MetaFlags::CONTAINS_ANCHORS)
909    }
910
911    pub fn is_infinite(&self, node_id: NodeId) -> bool {
912        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
913    }
914
915    /// returns (min_length, max_length). max = u32::MAX means unbounded.
916    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
917        if self.is_infinite(node_id) {
918            if self.get_kind(node_id) == Kind::Inter {
919                self.get_bounded_length(node_id)
920            } else {
921                (self.get_min_length_only(node_id), u32::MAX)
922            }
923        } else {
924            self.get_bounded_length(node_id)
925        }
926    }
927
928    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
929        if node_id == NodeId::EPS {
930            return (0, 0);
931        }
932        match self.get_kind(node_id) {
933            Kind::End | Kind::Begin => (0, 0),
934            Kind::Pred => (1, 1),
935            Kind::Concat => {
936                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
937                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
938                (lmin + rmin, lmax.saturating_add(rmax))
939            }
940            Kind::Union => {
941                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
942                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
943                (lmin.min(rmin), lmax.max(rmax))
944            }
945            Kind::Inter => {
946                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
947                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
948                (lmin.max(rmin), lmax.min(rmax))
949            }
950            Kind::Lookahead => {
951                let body = node_id.left(self);
952                if self.is_infinite(body) {
953                    return (0, u32::MAX);
954                }
955                let right = node_id.right(self);
956                if right.is_missing() {
957                    (0, 0)
958                } else {
959                    self.get_min_max_length(right)
960                }
961            }
962            Kind::Counted => (0, 0),
963            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
964        }
965    }
966
967    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
968        match self.get_kind(node_id) {
969            Kind::End | Kind::Begin => Some(0),
970            Kind::Pred => Some(1),
971            Kind::Concat => {
972                let l = self.get_fixed_length(node_id.left(self))?;
973                let r = self.get_fixed_length(node_id.right(self))?;
974                Some(l + r)
975            }
976            Kind::Union => {
977                let l = self.get_fixed_length(node_id.left(self))?;
978                let r = self.get_fixed_length(node_id.right(self))?;
979                if l == r {
980                    Some(l)
981                } else {
982                    None
983                }
984            }
985            Kind::Inter => {
986                let l = self.get_fixed_length(node_id.left(self))?;
987                let r = self.get_fixed_length(node_id.right(self))?;
988                if l == r {
989                    Some(l)
990                } else {
991                    None
992                }
993            }
994            Kind::Lookahead => {
995                let right = node_id.right(self);
996                if right.is_missing() {
997                    Some(0)
998                } else {
999                    self.get_fixed_length(right)
1000                }
1001            }
1002            Kind::Counted => Some(0),
1003            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1004        }
1005    }
1006
1007    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1008        match self.get_kind(node_id) {
1009            Kind::End | Kind::Begin => 0,
1010            Kind::Pred => 1,
1011            Kind::Concat => {
1012                self.get_min_length_only(node_id.left(self))
1013                    + self.get_min_length_only(node_id.right(self))
1014            }
1015            Kind::Union => self
1016                .get_min_length_only(node_id.left(self))
1017                .min(self.get_min_length_only(node_id.right(self))),
1018            Kind::Inter => self
1019                .get_min_length_only(node_id.left(self))
1020                .max(self.get_min_length_only(node_id.right(self))),
1021            Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1022            Kind::Compl => {
1023                if self.nullability(node_id.left(self)) == Nullability::NEVER {
1024                    0
1025                } else {
1026                    1
1027                }
1028            }
1029        }
1030    }
1031
1032    fn starts_with_ts(&self, node_id: NodeId) -> bool {
1033        if node_id == NodeId::TS {
1034            return true;
1035        }
1036        match self.get_kind(node_id) {
1037            Kind::Inter => {
1038                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1039            }
1040            Kind::Union => {
1041                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1042            }
1043            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1044            _ => false,
1045        }
1046    }
1047
1048    #[inline]
1049    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1050        if self.get_kind(node_id) == Kind::Concat {
1051            self.ends_with_ts(node_id.right(self))
1052        } else {
1053            node_id == NodeId::TS
1054        }
1055    }
1056
1057    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1058        debug_assert!(node_id != NodeId::MISSING);
1059        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1060    }
1061
1062    pub(crate) fn cache_der(
1063        &mut self,
1064        node_id: NodeId,
1065        result: TRegexId,
1066        mask: Nullability,
1067    ) -> TRegexId {
1068        if mask == Nullability::CENTER {
1069            self.tr_der_center[node_id.0 as usize] = result
1070        } else {
1071            self.tr_der_begin[node_id.0 as usize] = result
1072        };
1073        result
1074    }
1075
1076    pub(crate) fn try_cached_der(
1077        &mut self,
1078        node_id: NodeId,
1079        mask: Nullability,
1080    ) -> Option<TRegexId> {
1081        let cache = if mask == Nullability::CENTER {
1082            &mut self.tr_der_center
1083        } else {
1084            &mut self.tr_der_begin
1085        };
1086        match cache.get(node_id.0 as usize) {
1087            Some(&TRegexId::MISSING) => {}
1088            Some(&result) => {
1089                return Some(result);
1090            }
1091            None => {
1092                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1093            }
1094        }
1095        None
1096    }
1097
1098    pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1099        let mut term = self.get_tregex(der);
1100        loop {
1101            match *term {
1102                TRegex::Leaf(node_id) => return node_id,
1103                TRegex::ITE(cond, _then, _else) => {
1104                    if self.solver().is_sat_id(set, cond) {
1105                        term = self.get_tregex(_then);
1106                    } else {
1107                        term = self.get_tregex(_else);
1108                    }
1109                }
1110            }
1111        }
1112    }
1113
1114    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1115        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1116        debug_assert!(
1117            node_id != NodeId::MISSING,
1118            "attempting to derive missing node"
1119        );
1120        if let Some(result) = self.try_cached_der(node_id, mask) {
1121            return Ok(result);
1122        }
1123
1124        let result = match node_id.kind(self) {
1125            Kind::Compl => {
1126                let leftd = node_id.left(self).der(self, mask)?;
1127                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1128            }
1129            Kind::Inter => {
1130                let leftd = node_id.left(self).der(self, mask)?;
1131                let rightd = node_id.right(self).der(self, mask)?;
1132                {
1133                    let this = &mut *self;
1134                    this.mk_binary(
1135                        leftd,
1136                        rightd,
1137                        &mut (|b, left, right| b.mk_inter(left, right)),
1138                    )
1139                }
1140            }
1141            Kind::Union => {
1142                let leftd = self.der(node_id.left(self), mask)?;
1143                let rightd = self.der(node_id.right(self), mask)?;
1144                {
1145                    let this = &mut *self;
1146                    this.mk_binary(
1147                        leftd,
1148                        rightd,
1149                        &mut (|b, left, right| b.mk_union(left, right)),
1150                    )
1151                }
1152            }
1153            Kind::Concat => {
1154                let head = node_id.left(self);
1155                let tail = node_id.right(self);
1156                let tail_leaf = self.mk_leaf(tail);
1157                let head_der = self.der(head, mask)?;
1158
1159                if self.is_nullable(head, mask) {
1160                    let rightd = self.der(tail, mask)?;
1161                    let ldr = {
1162                        let this = &mut *self;
1163                        this.mk_binary(
1164                            head_der,
1165                            tail_leaf,
1166                            &mut (|b, left, right| b.mk_concat(left, right)),
1167                        )
1168                    };
1169                    {
1170                        let this = &mut *self;
1171                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1172                    }
1173                } else {
1174                    let this = &mut *self;
1175                    this.mk_binary(
1176                        head_der,
1177                        tail_leaf,
1178                        &mut (|b, left, right| b.mk_concat(left, right)),
1179                    )
1180                }
1181            }
1182            Kind::Star => {
1183                if node_id == NodeId::EPS {
1184                    TRegexId::BOT
1185                } else {
1186                    let left = node_id.left(self);
1187                    let r_decr_leaf = self.mk_leaf(node_id);
1188                    let r_der = self.der(left, mask)?;
1189                    let this = &mut *self;
1190                    this.mk_binary(
1191                        r_der,
1192                        r_decr_leaf,
1193                        &mut (|b, left, right| b.mk_concat(left, right)),
1194                    )
1195                }
1196            }
1197            Kind::Lookbehind => {
1198                let lb_prev_der = {
1199                    let lb_prev = self.get_lookbehind_prev(node_id);
1200                    if lb_prev == NodeId::MISSING {
1201                        TRegexId::MISSING
1202                    } else {
1203                        self.der(lb_prev, mask)?
1204                    }
1205                };
1206                let lb_inner = self.get_lookbehind_inner(node_id);
1207                let lb_inner_der = self.der(lb_inner, mask)?;
1208                {
1209                    let this = &mut *self;
1210                    this.mk_binary_result(
1211                        lb_inner_der,
1212                        lb_prev_der,
1213                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1214                    )?
1215                }
1216            }
1217            Kind::Lookahead => {
1218                let la_tail = self.get_lookahead_tail(node_id);
1219                let la_body = node_id.left(self);
1220                let rel = self.get_lookahead_rel(node_id);
1221
1222                if self.is_nullable(la_body, mask) {
1223                    // nullabilty is taken once, just keep the body
1224                    let right = node_id.right(self).missing_to_eps();
1225                    let rder = self.der(right, mask).clone();
1226                    return rder;
1227                }
1228
1229                if rel == u32::MAX {
1230                    let la_body_der = self.der(la_body, mask)?;
1231                    if la_tail.is_kind(self, Kind::Pred) {
1232                        let transitioned =
1233                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1234                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1235                        let concated = self.mk_concat(la_tail, new_la);
1236                        return self.der(concated, mask);
1237                    }
1238                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1239                        let left = la_tail.left(self);
1240                        let tset = left.pred_tset(self);
1241                        let transitioned = self.transition_term(la_body_der, tset);
1242                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1243                        let tail_right = la_tail.right(self);
1244                        let concated = self.mk_concat(new_la, tail_right);
1245                        let concated = self.mk_concat(left, concated);
1246                        return self.der(concated, mask);
1247                    }
1248                }
1249
1250                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1251                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1252                    let concated = self.mk_concat(la_body, nulls_mask);
1253                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1254                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1255                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1256                    let new_union = self.mk_union(concated_look, new_look);
1257                    return self.der(new_union, mask);
1258                }
1259
1260                let la_tail_der = if la_tail == NodeId::MISSING {
1261                    TRegexId::MISSING
1262                } else {
1263                    if self.is_nullable(la_tail, mask) {
1264                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1265                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1266                        let la_union = self.mk_union(la_tail, nulls_la);
1267                        self.der(la_union, mask)?
1268                    } else {
1269                        self.der(la_tail, mask)?
1270                    }
1271                };
1272
1273                let la_body_der = self.der(la_body, mask)?;
1274
1275                if rel != u32::MAX && rel > self.lookahead_context_max {
1276                    return Err(AlgebraError::AnchorLimit);
1277                }
1278
1279                let la = {
1280                    let this = &mut *self;
1281                    let rel = helpers::incr_rel(rel);
1282                    this.mk_binary(
1283                        la_body_der,
1284                        la_tail_der,
1285                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1286                    )
1287                };
1288
1289                if rel != u32::MAX
1290                    && la_tail_der != TRegexId::MISSING
1291                    && self.is_nullable(la_tail, mask)
1292                {
1293                    let look_only = {
1294                        let this = &mut *self;
1295                        let right = TRegexId::MISSING;
1296                        let rel = helpers::incr_rel(rel);
1297                        this.mk_binary(
1298                            la_body_der,
1299                            right,
1300                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1301                        )
1302                    };
1303                    {
1304                        let this = &mut *self;
1305                        this.mk_binary(
1306                            look_only,
1307                            la,
1308                            &mut (|b, left, right| b.mk_union(left, right)),
1309                        )
1310                    }
1311                } else {
1312                    la
1313                }
1314            }
1315            Kind::Counted => {
1316                let body = node_id.left(self);
1317                let chain = node_id.right(self);
1318                let packed = self.get_extra(node_id);
1319                let step = (packed & 0xFFFF) as u16;
1320                let best = (packed >> 16) as u16;
1321
1322                let mid_best = if self.is_nullable(body, mask) && step >= best {
1323                    step
1324                } else {
1325                    best
1326                };
1327
1328                let body_der = self.der(body, mask)?;
1329                let new_step = step.saturating_add(1);
1330                self.mk_unary(body_der, &mut |b, new_body| {
1331                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1332                        new_step
1333                    } else {
1334                        mid_best
1335                    };
1336                    let packed = (final_best as u32) << 16 | new_step as u32;
1337                    b.mk_counted(new_body, chain, packed)
1338                })
1339            }
1340            Kind::Begin | Kind::End => TRegexId::BOT,
1341            Kind::Pred => {
1342                let psi = node_id.pred_tset(self);
1343                if psi == TSetId::EMPTY {
1344                    TRegexId::BOT
1345                } else {
1346                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1347                }
1348            }
1349        };
1350
1351        // println!("{} {}", node_id.0, self.pp(node_id));
1352        // println!("node: {} (total: {})", node_id.0, self.num_created);
1353
1354        self.cache_der(node_id, result, mask);
1355        Ok(result)
1356    }
1357
1358    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1359        debug_assert!(meta_id != MetadataId::MISSING);
1360        match self.metadata.get_mut(node_id.0 as usize) {
1361            Some(v) => *v = meta_id,
1362            None => {
1363                self.metadata
1364                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1365                self.metadata[node_id.0 as usize] = meta_id;
1366            }
1367        }
1368    }
1369
1370    fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1371        debug_assert!(reversed_id != NodeId::MISSING);
1372        match self.reversed.get_mut(node_id.0 as usize) {
1373            Some(v) => *v = reversed_id,
1374            None => {
1375                self.reversed
1376                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1377                self.reversed[node_id.0 as usize] = reversed_id;
1378            }
1379        }
1380    }
1381
1382    fn init(&mut self, inst: NodeKey) -> NodeId {
1383        self.num_created += 1;
1384        let node_id = NodeId(self.num_created);
1385        self.index.insert(inst.clone(), node_id);
1386        match inst.kind {
1387            Kind::Pred => {
1388                let meta_id = self.mb.get_meta_id(Metadata {
1389                    flags: MetaFlags::ZERO,
1390                    nulls: NullsId::EMPTY,
1391                });
1392                self.init_metadata(node_id, meta_id);
1393            }
1394            Kind::Begin => {
1395                let meta_id = self.mb.get_meta_id(Metadata {
1396                    flags: MetaFlags::with_nullability(
1397                        Nullability::BEGIN,
1398                        MetaFlags::CONTAINS_ANCHORS,
1399                    ),
1400                    nulls: NullsId::BEGIN0,
1401                });
1402                self.init_metadata(node_id, meta_id);
1403            }
1404            Kind::End => {
1405                let meta_id = self.mb.get_meta_id(Metadata {
1406                    flags: MetaFlags::with_nullability(
1407                        Nullability::END,
1408                        MetaFlags::CONTAINS_ANCHORS,
1409                    ),
1410                    nulls: NullsId::END0,
1411                });
1412                self.init_metadata(node_id, meta_id);
1413            }
1414            Kind::Inter => {
1415                let m1 = self.get_node_meta_id(inst.left);
1416                let m2 = self.get_node_meta_id(inst.right);
1417                let meta_id = {
1418                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1419                    let mask_l = inst.left.nullability(self);
1420                    let mask_r = inst.right.nullability(self);
1421                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1422                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1423                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1424                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1425                    let new_meta = Metadata {
1426                        flags: self.mb.flags_inter(m1, m2),
1427                        nulls,
1428                    };
1429                    self.mb.get_meta_id(new_meta)
1430                };
1431                self.init_metadata(node_id, meta_id);
1432            }
1433            Kind::Union => {
1434                let m1 = self.get_node_meta_id(inst.left);
1435                let m2 = self.get_node_meta_id(inst.right);
1436                let meta_id = {
1437                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1438                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1439                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1440                    let new_meta = Metadata {
1441                        flags: self.mb.flags_union(m1, m2),
1442                        nulls,
1443                    };
1444                    self.mb.get_meta_id(new_meta)
1445                };
1446                self.init_metadata(node_id, meta_id);
1447            }
1448            Kind::Concat => {
1449                let flags = self.mb.flags_concat(
1450                    self.get_node_meta_id(inst.left),
1451                    self.get_node_meta_id(inst.right),
1452                );
1453
1454                let right_nullability = inst.right.nullability(self);
1455                let left_nullability = inst.left.nullability(self);
1456                let nulls_left = self.get_nulls_id(inst.left);
1457                let nulls_right = self.get_nulls_id(inst.right);
1458                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1459                let mask = right_nullability.and(left_nullability);
1460                nulls = self.mb.nb.and_mask(nulls, mask);
1461
1462                let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1463                self.init_metadata(node_id, new_id);
1464            }
1465            Kind::Star => {
1466                let left_nulls = self.get_nulls_id(inst.left);
1467                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1468                let meta_id = self.mb.get_meta_id(Metadata {
1469                    flags: self
1470                        .mb
1471                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1472                    nulls,
1473                });
1474                self.init_metadata(node_id, meta_id);
1475            }
1476            Kind::Compl => {
1477                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1478                let meta_id = self.mb.get_meta_id(Metadata {
1479                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1480                    nulls,
1481                });
1482                self.init_metadata(node_id, meta_id);
1483            }
1484            Kind::Lookbehind => {
1485                let mut null = self.get_meta_flags(inst.left).nullability();
1486                let mut contains_flags = self.get_flags_contains(inst.left);
1487                if !inst.right.is_missing() {
1488                    null = null.and(self.get_meta_flags(inst.right).nullability());
1489                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1490                }
1491
1492                let left_nulls = self.get_nulls_id(inst.left);
1493                let right_nulls = self.get_nulls_id(inst.right);
1494                let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1495                let meta_id = self.mb.get_meta_id(Metadata {
1496                    flags: MetaFlags::with_nullability(
1497                        null,
1498                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1499                    ),
1500                    nulls,
1501                });
1502                self.init_metadata(node_id, meta_id);
1503            }
1504            Kind::Lookahead => {
1505                let mut nulls = self.get_nulls_id(inst.left);
1506                let left_nullability = inst.left.nullability(self);
1507                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1508                nulls = self.mb.nb.or_id(nulls, nulls_right);
1509                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1510
1511                let la_inner = inst.left;
1512                let la_tail = inst.right;
1513                let null = self
1514                    .get_meta_flags(la_inner)
1515                    .nullability()
1516                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1517                let contains_flags = self
1518                    .get_flags_contains(la_inner)
1519                    .or(self.get_flags_contains(la_tail));
1520
1521                let meta_id = self.mb.get_meta_id(Metadata {
1522                    flags: MetaFlags::with_nullability(
1523                        null,
1524                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1525                    ),
1526                    nulls,
1527                });
1528                self.init_metadata(node_id, meta_id);
1529            }
1530            Kind::Counted => {
1531                let best = inst.extra >> 16;
1532                let (null, nulls) = if best > 0 {
1533                    let mut ns = BTreeSet::new();
1534                    ns.insert(NullState::new(Nullability::CENTER, best));
1535                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1536                } else {
1537                    (Nullability::NEVER, NullsId::EMPTY)
1538                };
1539                let meta_id = self.mb.get_meta_id(Metadata {
1540                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1541                    nulls,
1542                });
1543                self.init_metadata(node_id, meta_id);
1544            }
1545        }
1546
1547        self.array.push(inst);
1548
1549        if let Some(rw) = self.post_init_simplify(node_id) {
1550            self.override_as(node_id, rw)
1551        } else {
1552            node_id
1553        }
1554    }
1555
1556    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1557        match self.get_kind(node_id) {
1558            Kind::Concat => {
1559                let lhs = node_id.left(self);
1560                let rhs = node_id.right(self);
1561                if lhs.is_pred_star(self).is_some() {
1562                    if let Some(opttail) = rhs.is_opt_v(self) {
1563                        if let Some(true) = self.subsumes(lhs, opttail) {
1564                            // return self.override_as(node_id, lhs);
1565                            return Some(lhs);
1566                        }
1567                    }
1568                }
1569            }
1570            Kind::Union => {
1571                let lhs = node_id.left(self);
1572                let rhs = node_id.right(self);
1573                let mut subsumed = false;
1574                rhs.iter_union_while(self, &mut |b, branch| {
1575                    if b.nullable_subsumes(branch, lhs) {
1576                        subsumed = true;
1577                    }
1578                    !subsumed
1579                });
1580                if subsumed {
1581                    return Some(rhs);
1582                }
1583                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1584                    return Some(rhs);
1585                }
1586            }
1587            _ => {}
1588        }
1589
1590        None
1591    }
1592
1593    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1594    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1595        if self.get_kind(lhs) != Kind::Union {
1596            return false; // single branch already checked by nullable_subsumes
1597        }
1598        let mut rhs_branches = Vec::new();
1599        let mut curr = rhs;
1600        while self.get_kind(curr) == Kind::Union {
1601            rhs_branches.push(self.get_left(curr));
1602            curr = self.get_right(curr);
1603        }
1604        rhs_branches.push(curr);
1605
1606        curr = lhs;
1607        while self.get_kind(curr) == Kind::Union {
1608            if !rhs_branches.contains(&self.get_left(curr)) {
1609                return false;
1610            }
1611            curr = self.get_right(curr);
1612        }
1613        rhs_branches.contains(&curr)
1614    }
1615
1616    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1617    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1618        if node == target {
1619            return true;
1620        }
1621        match self.get_kind(node) {
1622            Kind::Union => {
1623                self.nullable_subsumes(self.get_left(node), target)
1624                    || self.nullable_subsumes(self.get_right(node), target)
1625            }
1626            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1627                self.nullable_subsumes(self.get_right(node), target)
1628            }
1629            _ => false,
1630        }
1631    }
1632
1633    pub fn num_nodes(&self) -> u32 {
1634        self.num_created
1635    }
1636    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1637        match self.index.get(&inst) {
1638            Some(&id) => id,
1639            None => self.init(inst),
1640        }
1641    }
1642    #[inline]
1643    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1644        self.index.get(inst)
1645    }
1646
1647    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1648        self.index.insert(key, subsumed);
1649        subsumed
1650    }
1651
1652    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1653        let key = &self.array[key.0 as usize];
1654        let entry = self.index.get_mut(key).unwrap();
1655        *entry = subsumed;
1656        subsumed
1657    }
1658
1659    #[inline]
1660    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1661        self.array[node_id.0 as usize].left
1662    }
1663
1664    #[inline]
1665    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1666        self.array[node_id.0 as usize].right
1667    }
1668
1669    #[inline]
1670    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1671        self.array[node_id.0 as usize].extra
1672    }
1673
1674    #[inline]
1675    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1676        debug_assert!(self.get_kind(node_id) == Kind::Concat);
1677        let mut curr = node_id;
1678        while self.get_kind(curr) == Kind::Concat {
1679            curr = curr.right(self);
1680        }
1681        curr
1682    }
1683
1684    fn has_trailing_la(&self, node: NodeId) -> bool {
1685        let end = match self.get_kind(node) {
1686            Kind::Concat => self.get_concat_end(node),
1687            Kind::Lookahead => node,
1688            _ => return false,
1689        };
1690        self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1691    }
1692
1693    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1694        if self.get_kind(node) == Kind::Lookahead {
1695            return (NodeId::EPS, node);
1696        }
1697        debug_assert!(self.get_kind(node) == Kind::Concat);
1698        let right = node.right(self);
1699        if self.get_kind(right) != Kind::Concat {
1700            return (node.left(self), right);
1701        }
1702        let (stripped, la) = self.strip_trailing_la(right);
1703        (self.mk_concat(node.left(self), stripped), la)
1704    }
1705    #[inline]
1706    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1707        debug_assert!(matches!(
1708            self.get_kind(lookahead_node_id),
1709            Kind::Lookahead | Kind::Counted
1710        ));
1711        lookahead_node_id.left(self)
1712    }
1713    #[inline]
1714    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1715        debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1716        lookahead_node_id.right(self)
1717    }
1718    #[inline]
1719    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1720        debug_assert!(
1721            matches!(
1722                self.get_kind(lookahead_node_id),
1723                Kind::Lookahead | Kind::Counted
1724            ),
1725            "not lookahead/counted: {:?}",
1726            self.pp(lookahead_node_id)
1727        );
1728        self.get_extra(lookahead_node_id)
1729    }
1730    #[inline]
1731    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1732        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1733        lookbehind_node_id.left(self)
1734    }
1735    #[inline]
1736    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1737        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1738        lookbehind_node_id.right(self)
1739    }
1740
1741    #[inline]
1742    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1743        debug_assert!(
1744            self.array.len() > node_id.0 as usize,
1745            "array len: {:?}",
1746            node_id
1747        );
1748        self.array[node_id.0 as usize].kind
1749    }
1750
1751    #[inline]
1752    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1753        &self.array[node_id.0 as usize]
1754    }
1755
1756    #[inline]
1757    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1758        self.metadata[node_id.0 as usize]
1759    }
1760
1761    #[inline]
1762    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1763        debug_assert!(node_id.0 != u32::MAX);
1764        let meta_id = self.get_node_meta_id(node_id);
1765        debug_assert!(meta_id != MetadataId::MISSING);
1766        &self.mb.array[meta_id.0 as usize]
1767    }
1768
1769    #[inline]
1770    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1771        if node_id == NodeId::MISSING {
1772            NullsId::EMPTY
1773        } else {
1774            self.get_meta(node_id).nulls
1775        }
1776    }
1777
1778    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1779        self.mb
1780            .nb
1781            .array
1782            .iter()
1783            .map(|set| set.iter().cloned().collect())
1784            .collect()
1785    }
1786
1787    pub fn nulls_count(&self) -> usize {
1788        self.mb.nb.array.len()
1789    }
1790
1791    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1792        self.mb.nb.array[id as usize].iter().cloned().collect()
1793    }
1794
1795    #[inline]
1796    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1797        if node_id == NodeId::MISSING {
1798            NullsId::EMPTY
1799        } else {
1800            let nulls = self.get_meta(node_id).nulls;
1801            self.mb.nb.and_mask(nulls, mask)
1802        }
1803    }
1804
1805    #[inline]
1806    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1807        let meta_id = self.get_node_meta_id(node_id);
1808        let meta = &self.mb.array[meta_id.0 as usize];
1809        meta.flags
1810    }
1811
1812    #[inline]
1813    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1814        self.get_meta(node_id).flags.nullability()
1815    }
1816
1817    #[inline]
1818    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1819        let meta_id = self.get_node_meta_id(node_id);
1820        let meta = &self.mb.array[meta_id.0 as usize];
1821        meta.flags.all_contains_flags()
1822    }
1823
1824    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1825        if self.get_kind(node_id) == Kind::Concat && node_id.left(self) == NodeId::BEGIN {
1826            return self.strip_lb(node_id.right(self));
1827        }
1828        self.strip_lb_inner(node_id)
1829    }
1830
1831    fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1832        if !self.contains_look(node_id) {
1833            return Ok(node_id);
1834        }
1835        if self.get_kind(node_id) == Kind::Concat
1836            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1837        {
1838            let lb = node_id.left(self);
1839            let prev = self.get_lookbehind_prev(lb);
1840            let tail = self.strip_lb_inner(node_id.right(self))?;
1841            if prev != NodeId::MISSING {
1842                let stripped_prev = self.strip_lb_inner(prev)?;
1843                return Ok(self.mk_concat(stripped_prev, tail));
1844            }
1845            return Ok(tail);
1846        }
1847        if self.get_kind(node_id) == Kind::Inter {
1848            let left = self.strip_lb_inner(node_id.left(self))?;
1849            let right = self.strip_lb_inner(node_id.right(self))?;
1850            return Ok(self.mk_inter(left, right));
1851        }
1852        if self.get_kind(node_id) == Kind::Union {
1853            let left = self.strip_lb_inner(node_id.left(self))?;
1854            let right = self.strip_lb_inner(node_id.right(self))?;
1855            return Ok(self.mk_union(left, right));
1856        }
1857        match self.get_kind(node_id) {
1858            Kind::Lookbehind => {
1859                let prev = self.get_lookbehind_prev(node_id);
1860                if prev != NodeId::MISSING {
1861                    self.strip_lb_inner(prev)
1862                } else {
1863                    Ok(NodeId::EPS)
1864                }
1865            }
1866            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1867                Err(AlgebraError::UnsupportedPattern)
1868            }
1869            _ => Ok(node_id),
1870        }
1871    }
1872
1873    // for prefix purposes we prune any \A leading paths
1874    pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1875        if !self.contains_anchors(node_id) {
1876            return node_id;
1877        }
1878        match self.get_kind(node_id) {
1879            Kind::Begin => NodeId::BOT,
1880            Kind::Concat => {
1881                let left = self.nonbegins(node_id.left(self));
1882                if left == NodeId::BOT {
1883                    return NodeId::BOT;
1884                }
1885                self.mk_concat(left, node_id.right(self))
1886            }
1887            Kind::Union => {
1888                let left = self.nonbegins(node_id.left(self));
1889                let right = self.nonbegins(node_id.right(self));
1890                self.mk_union(left, right)
1891            }
1892            _ => node_id,
1893        }
1894    }
1895
1896    pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1897        match self.get_kind(node_id) {
1898            Kind::Concat => {
1899                let head = node_id.left(self);
1900                match self.get_kind(head) {
1901                    _ if self.any_nonbegin_nullable(head) => {
1902                        self.strip_prefix_safe(node_id.right(self))
1903                    }
1904                    _ => node_id,
1905                }
1906            }
1907            _ => node_id,
1908        }
1909    }
1910    pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1911        match self.get_kind(node_id) {
1912            Kind::Begin => NodeId::BOT,
1913            Kind::Concat => {
1914                let head = self.prune_begin(node_id.left(self));
1915                let tail = self.prune_begin(node_id.right(self));
1916                self.mk_concat(head, tail)
1917            }
1918            Kind::Lookbehind => {
1919                if !node_id.right(self).is_missing() {
1920                    return node_id;
1921                }
1922                let head = self.prune_begin(node_id.left(self));
1923                head
1924            }
1925            Kind::Union => {
1926                let left = self.prune_begin(node_id.left(self));
1927                let right = self.prune_begin(node_id.right(self));
1928                self.mk_union(left, right)
1929            }
1930            _ => node_id,
1931        }
1932    }
1933
1934    pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1935        if !self.contains_look(node_id) {
1936            return Ok(node_id);
1937        }
1938        if self.get_kind(node_id) == Kind::Concat
1939            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1940        {
1941            let left = node_id.left(self);
1942            let ll = left.left(self).missing_to_eps();
1943            let lr = left.right(self).missing_to_eps();
1944            let new_l = self.mk_concat(ll, lr);
1945            let new = self.mk_concat(new_l, node_id.right(self));
1946            return Ok(new);
1947        }
1948        if self.get_kind(node_id) == Kind::Inter {
1949            let left = self.normalize_rev(node_id.left(self))?;
1950            let right = self.normalize_rev(node_id.right(self))?;
1951            return Ok(self.mk_inter(left, right));
1952        }
1953        if self.get_kind(node_id) == Kind::Union {
1954            let left = self.normalize_rev(node_id.left(self))?;
1955            let right = self.normalize_rev(node_id.right(self))?;
1956            return Ok(self.mk_union(left, right));
1957        }
1958        match self.get_kind(node_id) {
1959            Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1960            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1961                Err(AlgebraError::UnsupportedPattern)
1962            }
1963            _ => Ok(node_id),
1964        }
1965    }
1966
1967    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1968        debug_assert!(node_id != NodeId::MISSING);
1969        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1970            if *rev != NodeId::MISSING {
1971                return Ok(*rev);
1972            }
1973        }
1974        let rw = match self.get_kind(node_id) {
1975            Kind::End => NodeId::BEGIN,
1976            Kind::Begin => NodeId::END,
1977            Kind::Pred => node_id,
1978            Kind::Concat => {
1979                let left = self.reverse(node_id.left(self))?;
1980                let right = self.reverse(node_id.right(self))?;
1981                self.mk_concat(right, left)
1982            }
1983            Kind::Union => {
1984                let left = self.reverse(node_id.left(self))?;
1985                let right = self.reverse(node_id.right(self))?;
1986                self.mk_union(left, right)
1987            }
1988            Kind::Inter => {
1989                let left = self.reverse(node_id.left(self))?;
1990                let right = self.reverse(node_id.right(self))?;
1991                self.mk_inter(left, right)
1992            }
1993            Kind::Star => {
1994                let body = self.reverse(node_id.left(self))?;
1995                self.mk_star(body)
1996            }
1997            Kind::Compl => {
1998                if self.contains_look(node_id.left(self)) {
1999                    return Err(AlgebraError::UnsupportedPattern);
2000                }
2001                let body = self.reverse(node_id.left(self))?;
2002                self.mk_compl(body)
2003            }
2004            Kind::Lookbehind => {
2005                let prev = self.get_lookbehind_prev(node_id);
2006                let inner_id = self.get_lookbehind_inner(node_id);
2007                let rev_inner = self.reverse(inner_id)?;
2008                let rev_prev = if prev != NodeId::MISSING {
2009                    self.reverse(prev)?
2010                } else {
2011                    NodeId::MISSING
2012                };
2013                self.mk_lookahead(rev_inner, rev_prev, 0)
2014            }
2015            Kind::Lookahead => {
2016                let rel = self.get_lookahead_rel(node_id);
2017                if rel == u32::MAX {
2018                    // rel MAX holds no nullability - rewrite to intersection
2019                    let lbody = self.get_lookahead_inner(node_id);
2020                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2021                    let lbody_ts = self.mk_concat(lbody, NodeId::TS);
2022                    let ltail_ts = self.mk_concat(ltail, NodeId::TS);
2023                    let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2024                    let rev = self.reverse(as_inter)?;
2025                    self.init_reversed(node_id, rev);
2026                    return Ok(rev);
2027                }
2028                if rel != 0 {
2029                    return Err(AlgebraError::UnsupportedPattern);
2030                }
2031                let tail_node = self.get_lookahead_tail(node_id);
2032                let rev_tail = if tail_node != NodeId::MISSING {
2033                    self.reverse(tail_node)?
2034                } else {
2035                    NodeId::MISSING
2036                };
2037                let inner_id = self.get_lookahead_inner(node_id);
2038                let rev_inner = self.reverse(inner_id)?;
2039                self.mk_lookbehind(rev_inner, rev_tail)
2040            }
2041            Kind::Counted => {
2042                return Err(AlgebraError::UnsupportedPattern);
2043            }
2044        };
2045        self.init_reversed(node_id, rw);
2046        Ok(rw)
2047    }
2048
2049    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2050        let node = NodeKey {
2051            kind: Kind::Pred,
2052            left: NodeId::MISSING,
2053            right: NodeId::MISSING,
2054            extra: pred.0,
2055        };
2056        self.get_node_id(node)
2057    }
2058
2059    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2060        let key = NodeKey {
2061            kind: Kind::Compl,
2062            left: body,
2063            right: NodeId::MISSING,
2064            extra: u32::MAX,
2065        };
2066        if let Some(id) = self.key_is_created(&key) {
2067            return *id;
2068        }
2069        if body == NodeId::BOT {
2070            return NodeId::TS;
2071        }
2072        if body == NodeId::TS {
2073            return NodeId::BOT;
2074        }
2075
2076        if let Some(contains_body) = body.is_contains(self) {
2077            if contains_body.is_pred(self) {
2078                let pred = contains_body.pred_tset(self);
2079                let notpred = self.mk_pred_not(pred);
2080                let node = self.mk_star(notpred);
2081                return self.init_as(key, node);
2082            } else if contains_body == NodeId::END {
2083                return self.init_as(key, NodeId::BOT);
2084            }
2085        };
2086
2087        if self.get_kind(body) == Kind::Compl {
2088            return self.get_node(body).left;
2089        }
2090
2091        self.get_node_id(key)
2092    }
2093
2094    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2095        let nid = self.get_nulls_id(body);
2096        let nref = self.mb.nb.get_set_ref(nid).clone();
2097        let mut futures = NodeId::BOT;
2098        for n in nref.iter() {
2099            if !n.is_mask_nullable(mask) {
2100                continue;
2101            }
2102
2103            let eff = if n.rel == 0 {
2104                NodeId::EPS
2105            } else {
2106                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2107            };
2108            futures = self.mk_union(futures, eff)
2109        }
2110        futures
2111    }
2112
2113    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2114        if cfg!(feature = "norewrite") {
2115            return None;
2116        }
2117
2118        if self.get_kind(tail) == Kind::Lookbehind {
2119            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2120            return self
2121                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2122                .ok();
2123        }
2124        if self.get_kind(head) == Kind::Lookahead {
2125            let la_tail = self.get_lookahead_tail(head);
2126            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2127            if new_la_tail.is_center_nullable(self) {
2128                let non_null_tail = self.mk_non_nullable_safe(new_la_tail);
2129                if non_null_tail == NodeId::BOT {
2130                    return None;
2131                }
2132                let la_body = self.get_lookahead_inner(head);
2133                return Some(self.mk_lookahead_internal(la_body, non_null_tail, u32::MAX));
2134            }
2135            let la_body = self.get_lookahead_inner(head);
2136            let la_rel = self.get_lookahead_rel(head);
2137            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2138                let tail_rel = self.get_lookahead_rel(new_la_tail);
2139                tail_rel + la_rel
2140            } else {
2141                u32::MAX
2142            };
2143
2144            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2145        }
2146
2147        if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2148            return Some(head);
2149        }
2150        if head == NodeId::TS && tail == NodeId::END {
2151            return Some(head);
2152        }
2153
2154        if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2155            return Some(NodeId::TS);
2156        }
2157
2158        if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2159            return Some(NodeId::TS);
2160        }
2161
2162        if self.get_kind(tail) == Kind::Union && head == NodeId::TS {
2163            let mut should_distribute_top = false;
2164            self.iter_unions(tail, |v| {
2165                if v == NodeId::BEGIN || self.starts_with_ts(v) {
2166                    should_distribute_top = true;
2167                }
2168            });
2169            if should_distribute_top {
2170                let mut new_union = NodeId::BOT;
2171                let mut curr = tail;
2172                while self.get_kind(curr) == Kind::Union {
2173                    let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2174                    new_union = self.mk_union(new_node, new_union);
2175                    curr = curr.right(self);
2176                }
2177                let new_node = self.mk_concat(NodeId::TS, curr);
2178                new_union = self.mk_union(new_union, new_node);
2179                return Some(new_union);
2180            }
2181        }
2182
2183        if self.get_kind(head) == Kind::Union && head == NodeId::TS {
2184            let mut should_distribute_top = false;
2185            self.iter_unions(head, |v| {
2186                if v == NodeId::END || self.starts_with_ts(v) {
2187                    should_distribute_top = true;
2188                }
2189            });
2190            if should_distribute_top {
2191                let mut new_union = NodeId::BOT;
2192                let mut curr = head;
2193                while self.get_kind(curr) == Kind::Union {
2194                    let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2195                    new_union = self.mk_union(new_node, new_union);
2196                    curr = curr.right(self);
2197                }
2198                let new_node = self.mk_concat(curr, NodeId::TS);
2199                new_union = self.mk_union(new_union, new_node);
2200                return Some(new_union);
2201            }
2202        }
2203
2204        if self.get_kind(head) == Kind::Inter && tail == NodeId::TS {
2205            let mut alltopstar = true;
2206            iter_inter!(self, head, |v| {
2207                alltopstar = self.ends_with_ts(v);
2208            });
2209            if alltopstar {
2210                return Some(head);
2211            }
2212        }
2213
2214        if head.is_star(self) && head == tail {
2215            return Some(head);
2216        }
2217
2218        None
2219    }
2220
2221    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2222        use Kind::*;
2223
2224        if cfg!(feature = "norewrite") {
2225            return None;
2226        }
2227        if left == right {
2228            return Some(left);
2229        }
2230
2231        if right.is_kind(self, Kind::Union) && left == right.left(self) {
2232            return Some(right);
2233        }
2234
2235        if self.get_kind(left) == Kind::Pred && self.get_kind(right) == Kind::Pred {
2236            let l = left.pred_tset(self);
2237            let r = right.pred_tset(self);
2238            let solver = self.solver();
2239            let psi = solver.or_id(l, r);
2240            let rewrite = self.mk_pred(psi);
2241            return Some(rewrite);
2242        }
2243
2244        if left == NodeId::EPS
2245            && self.get_extra(left) == 0
2246            && self.nullability(right) == Nullability::ALWAYS
2247        {
2248            return Some(right);
2249        }
2250
2251        if self.get_kind(left) == Kind::Lookahead && self.get_kind(right) == Kind::Lookahead {
2252            let lb = left.left(self);
2253            let lt = left.right(self);
2254            let lrel = left.extra(self);
2255
2256            let rb = right.left(self);
2257            let rt = right.right(self);
2258            let rrel = right.extra(self);
2259
2260            if lrel == rrel && lt.is_missing() && rt.is_missing() {
2261                let unioned = self.mk_union(lb, rb);
2262                let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2263                return Some(node);
2264            }
2265        }
2266
2267        if right.is_kind(self, Concat) {
2268            if left == NodeId::END
2269                && right.left(self) == NodeId::END
2270                && self.nullability(right).has(Nullability::END)
2271            {
2272                return Some(right);
2273            }
2274            // .*|.*a.* => .*(a.*|)
2275            if left == right.left(self) {
2276                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2277                let rw = self.mk_concat(left, rhs);
2278                return Some(rw);
2279            }
2280            if left.is_kind(self, Concat) {
2281                let head1 = left.left(self);
2282                let head2 = right.left(self);
2283
2284                if head1 == head2 {
2285                    let t1 = left.right(self);
2286                    let t2 = right.right(self);
2287                    // opportunistic rewrites
2288                    if head1 == NodeId::TS {
2289                        if t2.has_concat_tail(self, t1) {
2290                            return Some(left);
2291                        }
2292                        if t1.has_concat_tail(self, t2) {
2293                            return Some(right);
2294                        }
2295                    }
2296                    let un = self.mk_union(t1, t2);
2297                    return Some(self.mk_concat(left.left(self), un));
2298                }
2299
2300                // xa|ya => (x|y)a - suffix factoring via reverse
2301                // TODO: valid and looks prettier but .reverse is not good for builder perf,
2302                // leaving out unless i find a case where it helps significantly
2303                if false {
2304                    let end1 = self.get_concat_end(left);
2305                    let end2 = self.get_concat_end(right);
2306                    if end1 == end2 {
2307                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2308                        if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2309                            let rev1 = self.reverse(left).unwrap();
2310                            let rev2 = self.reverse(right).unwrap();
2311
2312                            let union_rev = self.mk_union(rev1, rev2);
2313                            return Some(self.reverse(union_rev).unwrap());
2314                        }
2315                    }
2316                }
2317            }
2318            if left.is_pred(self) && left == right.left(self) {
2319                let un = self.mk_opt(right.right(self));
2320                let conc = self.mk_concat(left, un);
2321                return Some(conc);
2322            }
2323        }
2324
2325        if left == NodeId::EPS && right.is_plus(self) {
2326            let result = self.mk_star(right.left(self));
2327            return Some(result);
2328        }
2329
2330        // (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2331        if left.is_inter(self) && right.is_inter(self) {
2332            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2333                return Some(rw);
2334            }
2335        }
2336
2337        None
2338    }
2339
2340    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2341        let mut curr = node;
2342        while self.get_kind(curr) == Kind::Inter {
2343            out.push(self.get_left(curr));
2344            curr = self.get_right(curr);
2345        }
2346        out.push(curr);
2347    }
2348
2349    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2350        let mut curr = node;
2351        let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2352        if has_prefix {
2353            curr = self.get_right(curr);
2354        }
2355        let mut count = 0u32;
2356        let mut pred_set = None;
2357        while self.get_kind(curr) == Kind::Concat {
2358            let head = self.get_left(curr);
2359            if self.get_kind(head) != Kind::Pred {
2360                return None;
2361            }
2362            let ps = head.pred_tset(self);
2363            match pred_set {
2364                None => pred_set = Some(ps),
2365                Some(existing) => {
2366                    if existing != ps {
2367                        return None;
2368                    }
2369                }
2370            }
2371            curr = self.get_right(curr);
2372            count += 1;
2373        }
2374        if count == 0 || self.get_kind(curr) != Kind::Star {
2375            return None;
2376        }
2377        Some((has_prefix, pred_set.unwrap(), curr, count))
2378    }
2379
2380    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2381        let mut j = 0;
2382        for &s in sub {
2383            while j < sup.len() && sup[j] < s {
2384                j += 1;
2385            }
2386            if j >= sup.len() || sup[j] != s {
2387                return false;
2388            }
2389            j += 1;
2390        }
2391        true
2392    }
2393
2394    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2395        if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2396            return None;
2397        }
2398
2399        let mut lc: Vec<NodeId> = Vec::new();
2400        let mut rc: Vec<NodeId> = Vec::new();
2401        self.collect_inter_components(left, &mut lc);
2402        self.collect_inter_components(right, &mut rc);
2403
2404        // component subset check: fewer constraints = larger language
2405        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2406            return Some(left);
2407        }
2408        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2409        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2410            return Some(right);
2411        }
2412
2413        if lc.len() == rc.len() {
2414            let mut diff_idx = None;
2415            for i in 0..lc.len() {
2416                if lc[i] != rc[i] {
2417                    if diff_idx.is_some() {
2418                        return None;
2419                    }
2420                    diff_idx = Some(i);
2421                }
2422            }
2423            if let Some(idx) = diff_idx {
2424                let a = lc[idx];
2425                let b = rc[idx];
2426                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2427                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2428                {
2429                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2430                        return if ca < cb { Some(left) } else { Some(right) };
2431                    }
2432                }
2433            }
2434        }
2435
2436        None
2437    }
2438
2439    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2440        if cfg!(feature = "norewrite") {
2441            return None;
2442        }
2443        if left == right {
2444            return Some(left);
2445        }
2446
2447        if self.get_kind(right) == Kind::Union {
2448            let mut result = NodeId::BOT;
2449            self.iter_unions_b(
2450                right,
2451                &mut (|b, v| {
2452                    let new_inter = b.mk_inter(v, left);
2453                    result = b.mk_union(result, new_inter);
2454                }),
2455            );
2456            return Some(result);
2457        }
2458        if self.get_kind(left) == Kind::Union {
2459            let mut result = NodeId::BOT;
2460            self.iter_unions_b(
2461                left,
2462                &mut (|b, v| {
2463                    let new_inter = b.mk_inter(v, right);
2464                    result = b.mk_union(result, new_inter);
2465                }),
2466            );
2467            return Some(result);
2468        }
2469
2470        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2471            return Some(NodeId::BOT);
2472        }
2473
2474        if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2475            let bodies = self.mk_union(left.left(self), right.left(self));
2476            return Some(self.mk_compl(bodies));
2477        }
2478
2479        if left == NodeId::TOPPLUS {
2480            if right.is_pred_star(self).is_some() {
2481                let newloop = self.mk_plus(right.left(self));
2482                return Some(newloop);
2483            }
2484            if right.is_never_nullable(self) {
2485                return Some(right);
2486            }
2487            if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2488                return Some(NodeId::BOT);
2489            }
2490            if right.is_kind(self, Kind::Concat) {}
2491        }
2492
2493        {
2494            let l_is_la = left.is_lookahead(self);
2495            let r_is_la = right.is_lookahead(self);
2496            let l_is_cla = !l_is_la
2497                && self.get_kind(left) == Kind::Concat
2498                && self.get_kind(left.left(self)) == Kind::Lookahead;
2499            let r_is_cla = !r_is_la
2500                && self.get_kind(right) == Kind::Concat
2501                && self.get_kind(right.left(self)) == Kind::Lookahead;
2502            if l_is_la || r_is_la || l_is_cla || r_is_cla {
2503                let (la_node, other, concat_body) = if r_is_la {
2504                    (right, left, NodeId::MISSING)
2505                } else if l_is_la {
2506                    (left, right, NodeId::MISSING)
2507                } else if r_is_cla {
2508                    (right.left(self), left, right.right(self))
2509                } else {
2510                    (left.left(self), right, left.right(self))
2511                };
2512                let la_body = la_node.left(self);
2513                let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2514                let inter_right = if concat_body.is_missing() {
2515                    la_tail
2516                } else {
2517                    self.mk_concat(la_tail, concat_body)
2518                };
2519                let new_body = self.mk_inter(other, inter_right);
2520                let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2521                return Some(self.mk_concat(la, new_body));
2522            }
2523        }
2524
2525        if self.get_kind(right) == Kind::Compl {
2526            let compl_body = right.left(self);
2527            if left == compl_body {
2528                return Some(NodeId::BOT);
2529            }
2530            if self.get_kind(compl_body) == Kind::Concat {
2531                let compl_head = compl_body.left(self);
2532                if compl_body.right(self) == NodeId::TS && compl_head == left {
2533                    return Some(NodeId::BOT);
2534                }
2535            }
2536        }
2537
2538        if let Some(pleft) = left.is_pred_star(self) {
2539            if let Some(pright) = right.is_pred_star(self) {
2540                let merged = self.mk_inter(pleft, pright);
2541                return Some(self.mk_star(merged));
2542            }
2543        }
2544
2545        {
2546            let l_is_clb = self.get_kind(left) == Kind::Concat
2547                && self.get_kind(left.left(self)) == Kind::Lookbehind;
2548            let r_is_clb = self.get_kind(right) == Kind::Concat
2549                && self.get_kind(right.left(self)) == Kind::Lookbehind;
2550            if l_is_clb || r_is_clb {
2551                let (lb, body) = if l_is_clb && r_is_clb {
2552                    let lb1 = left.left(self);
2553                    let lb2 = right.left(self);
2554                    let inner = self.mk_inter(
2555                        self.get_lookbehind_inner(lb1),
2556                        self.get_lookbehind_inner(lb2),
2557                    );
2558                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2559                    let body = self.mk_inter(left.right(self), right.right(self));
2560                    (lb, body)
2561                } else if l_is_clb {
2562                    let lb = left.left(self);
2563                    let body = self.mk_inter(left.right(self), right);
2564                    (lb, body)
2565                } else {
2566                    let lb = right.left(self);
2567                    let body = self.mk_inter(left, right.right(self));
2568                    (lb, body)
2569                };
2570                return Some(self.mk_concat(lb, body));
2571            }
2572        }
2573
2574        {
2575            let l_has_la = self.has_trailing_la(left);
2576            let r_has_la = self.has_trailing_la(right);
2577            if l_has_la || r_has_la {
2578                let (body, la) = if l_has_la && r_has_la {
2579                    let (lbody, l_la) = self.strip_trailing_la(left);
2580                    let (rbody, r_la) = self.strip_trailing_la(right);
2581                    let inner = self.mk_inter(
2582                        self.get_lookahead_inner(l_la),
2583                        self.get_lookahead_inner(r_la),
2584                    );
2585                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2586                    let body = self.mk_inter(lbody, rbody);
2587                    (body, la)
2588                } else if l_has_la {
2589                    let (lbody, la) = self.strip_trailing_la(left);
2590                    let body = self.mk_inter(lbody, right);
2591                    (body, la)
2592                } else {
2593                    let (rbody, la) = self.strip_trailing_la(right);
2594                    let body = self.mk_inter(left, rbody);
2595                    (body, la)
2596                };
2597                return Some(self.mk_concat(body, la));
2598            }
2599        }
2600
2601        None
2602    }
2603
2604    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2605        if cfg!(feature = "norewrite") {
2606            return None;
2607        }
2608        debug_assert!(self.get_kind(right_union) == Kind::Union);
2609
2610        let mut rewritten = None;
2611        right_union.iter_union_while(
2612            self,
2613            &mut (|b, v| {
2614                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2615                    rewritten = Some((v, rw));
2616                    false
2617                } else {
2618                    true
2619                }
2620            }),
2621        );
2622
2623        if let Some(rw) = rewritten {
2624            let mut new_union = NodeId::BOT;
2625            right_union.iter_union(
2626                self,
2627                &mut (|b, v| {
2628                    if v == rw.0 {
2629                        new_union = b.mk_union(rw.1, new_union)
2630                    } else {
2631                        new_union = b.mk_union(v, new_union)
2632                    }
2633                }),
2634            );
2635            return Some(new_union);
2636        };
2637
2638        None
2639    }
2640
2641    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2642        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2643        debug_assert!(tail != NodeId::MISSING);
2644        let key = NodeKey {
2645            kind: Kind::Concat,
2646            left: head,
2647            right: tail,
2648            extra: u32::MAX,
2649        };
2650        if let Some(id) = self.key_is_created(&key) {
2651            return *id;
2652        }
2653
2654        if head == NodeId::BOT || tail == NodeId::BOT {
2655            return NodeId::BOT;
2656        }
2657        if head == NodeId::EPS {
2658            return tail;
2659        }
2660        if tail == NodeId::EPS {
2661            return head;
2662        }
2663
2664        match tail {
2665            // this is only valid if direction known;
2666            NodeId::BEGIN => {
2667                if !self.is_nullable(head, Nullability::BEGIN) {
2668                    return NodeId::BOT;
2669                } else {
2670                    return NodeId::BEGIN;
2671                }
2672            }
2673            _ => {}
2674        }
2675
2676        // normalize concats to right
2677        if head.is_kind(self, Kind::Concat) {
2678            let left = head.left(self);
2679            let newright = self.mk_concat(head.right(self), tail);
2680            let updated = self.mk_concat(left, newright);
2681            return self.init_as(key, updated);
2682        }
2683
2684        if head == NodeId::TS && tail == NodeId::END {
2685            return NodeId::TS;
2686        }
2687
2688        if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2689            return NodeId::BOT;
2690        }
2691
2692        if self.get_kind(tail) == Kind::Concat {
2693            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2694                let upd = self.mk_concat(rw, tail.right(self));
2695                return self.init_as(key, upd);
2696            }
2697        }
2698
2699        if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2700            return self.init_as(key, new);
2701        }
2702
2703        match (self.get_kind(head), self.get_kind(tail)) {
2704            // merge stars
2705            (Kind::Star, Kind::Concat) if head.is_star(self) => {
2706                let rl = tail.left(self);
2707                if head == rl {
2708                    return self.init_as(key, tail);
2709                }
2710            }
2711            // attempt longer concat rw
2712            (_, Kind::Concat) => {
2713                let curr = head;
2714                let h2 = self.mk_concat(curr, tail.left(self));
2715                let tr = tail.right(self);
2716                if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2717                    return self.init_as(key, new);
2718                }
2719            }
2720            _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2721                return self.init_as(key, tail);
2722            }
2723            _ => {}
2724        }
2725
2726        self.init(key)
2727    }
2728
2729    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2730        // LNF: lookbehind must start with ts
2731        let lb_body = {
2732            match self.starts_with_ts(lb_body) {
2733                true => lb_body,
2734                false => self.mk_concat(NodeId::TS, lb_body),
2735            }
2736        };
2737        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2738        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2739    }
2740
2741    fn mk_lookbehind_internal(
2742        &mut self,
2743        lb_body: NodeId,
2744        lb_prev: NodeId,
2745    ) -> Result<NodeId, AlgebraError> {
2746        debug_assert!(lb_body != NodeId::MISSING);
2747        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2748        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2749            return Ok(NodeId::BOT);
2750        }
2751        if lb_body == NodeId::TS {
2752            return Ok(lb_prev);
2753        }
2754        if lb_body == NodeId::EPS {
2755            match lb_prev {
2756                NodeId::MISSING => return Ok(NodeId::EPS),
2757                NodeId::EPS => return Ok(NodeId::EPS),
2758                _ => return Ok(lb_prev),
2759            }
2760        }
2761
2762        let key = NodeKey {
2763            kind: Kind::Lookbehind,
2764            left: lb_body,
2765            right: lb_prev,
2766            extra: u32::MAX,
2767        };
2768        match self.key_is_created(&key) {
2769            Some(id) => Ok(*id),
2770            None => {
2771                if lb_prev == NodeId::TS {
2772                    return Ok(self.mk_concat(lb_prev, lb_body));
2773                }
2774
2775                Ok(self.init(key))
2776            }
2777        }
2778    }
2779
2780    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2781        // LNF: lookahead must end with ts
2782        let la_body = {
2783            match self.ends_with_ts(la_body) {
2784                true => la_body,
2785                false => self.mk_concat(la_body, NodeId::TS),
2786            }
2787        };
2788        let rel = if NodeId::MISSING == la_tail {
2789            rel
2790        } else {
2791            match la_tail.is_center_nullable(self) {
2792                false => u32::MAX,
2793                true => rel,
2794            }
2795        };
2796
2797        self.mk_lookahead_internal(la_body, la_tail, rel)
2798    }
2799
2800    // rel max = carries no nullability, can potentially rw to intersection
2801    pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2802        let key = NodeKey {
2803            kind: Kind::Lookahead,
2804            left: la_body,
2805            right: la_tail,
2806            extra: rel,
2807        };
2808        if let Some(id) = self.key_is_created(&key) {
2809            return *id;
2810        }
2811        if la_body == NodeId::TS {
2812            if rel == 0 {
2813                return la_tail.missing_to_eps();
2814            } else {
2815                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2816            }
2817        }
2818        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2819            return NodeId::BOT;
2820        }
2821        if la_tail.is_missing() && rel == u32::MAX {
2822            return NodeId::BOT;
2823        }
2824
2825        if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2826            return la_body;
2827        }
2828
2829        if la_tail == NodeId::TS {
2830            if rel == 0 || rel == u32::MAX {
2831                return self.mk_concat(la_body, NodeId::TS);
2832            } else if rel == u32::MAX {
2833                return self.mk_begins_with(la_body);
2834            }
2835        }
2836
2837        if rel == u32::MAX {
2838            if la_tail.is_missing() {
2839                return NodeId::BOT;
2840            }
2841
2842            if self.is_always_nullable(la_body) {
2843                return la_tail.missing_to_eps();
2844            }
2845
2846            if la_tail != NodeId::MISSING {
2847                match self.get_kind(la_tail) {
2848                    _ => {
2849                        if la_body.is_compl_plus_end(self) {
2850                            let minlen = self.get_min_length_only(la_tail);
2851                            if minlen >= 1 {
2852                                return NodeId::BOT;
2853                            }
2854                        }
2855                    }
2856                }
2857            }
2858        }
2859
2860        if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2861            let la_body2 = self.get_lookahead_inner(la_tail);
2862            let body1_ts = self.mk_concat(la_body, NodeId::TS);
2863            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2864            let new_la_body = self.mk_inter(body1_ts, body2_ts);
2865            let new_la_rel = self.get_lookahead_rel(la_tail);
2866            let new_la_tail = self.get_lookahead_tail(la_tail);
2867            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2868        }
2869
2870        if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2871            let la_body_right = la_body.right(self);
2872            if self.is_always_nullable(la_body_right) {
2873                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2874            }
2875            if la_body.right(self) == NodeId::END {
2876                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2877            }
2878            let bodyright = la_body.right(self);
2879            if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2880                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2881                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2882            }
2883        }
2884
2885        if la_tail != NodeId::MISSING {
2886            if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2887                let lpred = la_body.left(self);
2888                if self.get_kind(lpred) == Kind::Pred {
2889                    let l = lpred.pred_tset(self);
2890                    let r = la_tail.pred_tset(self);
2891                    let psi_and = self.solver().and_id(l, r);
2892                    let rewrite = self.mk_pred(psi_and);
2893                    let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2894                    let new_right =
2895                        self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2896                    return self.mk_concat(rewrite, new_right);
2897                }
2898            }
2899        }
2900
2901        self.get_node_id(key)
2902    }
2903
2904    pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2905        let has_match = (packed >> 16) > 0;
2906        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2907            return NodeId::BOT;
2908        }
2909        debug_assert!(
2910            body == NodeId::BOT || !self.is_infinite(body),
2911            "Counted body must have finite max length"
2912        );
2913        let chain = self.prune_counted_chain(body, chain);
2914        let key = NodeKey {
2915            kind: Kind::Counted,
2916            left: body,
2917            right: chain,
2918            extra: packed,
2919        };
2920        if let Some(id) = self.key_is_created(&key) {
2921            return *id;
2922        }
2923        self.get_node_id(key)
2924    }
2925
2926    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2927        if chain == NodeId::MISSING || body == NodeId::BOT {
2928            return chain;
2929        }
2930        if self.nullability(body) != Nullability::NEVER {
2931            return NodeId::MISSING;
2932        }
2933        let chain_body = chain.left(self);
2934        if chain_body == NodeId::BOT {
2935            return chain;
2936        }
2937        let not_begins = self.mk_not_begins_with(body);
2938        let inter = self.mk_inter(chain_body, not_begins);
2939        let is_empty = inter == NodeId::BOT;
2940        if is_empty {
2941            self.prune_counted_chain(body, chain.right(self))
2942        } else {
2943            chain
2944        }
2945    }
2946
2947    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2948        let neg_inner = self.mk_concat(body, NodeId::TS);
2949        let neg_part = self.mk_compl(neg_inner);
2950        let conc = self.mk_concat(neg_part, NodeId::END);
2951        self.mk_lookahead(conc, NodeId::MISSING, rel)
2952    }
2953
2954    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2955        match self.get_node(body).kind {
2956            Kind::Pred => {
2957                let psi = body.pred_tset(self);
2958                let negated = self.mk_pred_not(psi);
2959                let union = self.mk_union(NodeId::BEGIN, negated);
2960                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2961                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2962            }
2963            _ => {
2964                // ~body ∩ utf8_char: non-nullable (utf8_char requires ≥1 byte),
2965                // so \A | negated won't be stripped by strip_prefix_safe
2966                let uc = crate::unicode_classes::utf8_char(self);
2967                let neg = self.mk_compl(body);
2968                let negated = self.mk_inter(neg, uc);
2969                let union = self.mk_union(NodeId::BEGIN, negated);
2970                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2971                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2972            }
2973        }
2974    }
2975
2976    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2977        debug_assert!(left != NodeId::MISSING);
2978        debug_assert!(right != NodeId::MISSING);
2979        if left > right {
2980            return self.mk_union(right, left);
2981        }
2982        let key = NodeKey {
2983            kind: Kind::Union,
2984            left,
2985            right,
2986            extra: u32::MAX,
2987        };
2988        if let Some(id) = self.key_is_created(&key) {
2989            return *id;
2990        }
2991
2992        if left == right {
2993            return left;
2994        }
2995        if left == NodeId::BOT {
2996            return right;
2997        }
2998        if right == NodeId::BOT {
2999            return left;
3000        }
3001        if right == NodeId::TS {
3002            return right;
3003        }
3004        if left == NodeId::TS {
3005            return left;
3006        }
3007
3008        match (self.get_kind(left), self.get_kind(right)) {
3009            (Kind::Union, _) => {
3010                self.iter_unions_b(left, &mut |b, v| {
3011                    b.temp_vec.push(v);
3012                });
3013                self.iter_unions_b(right, &mut |b, v| {
3014                    b.temp_vec.push(v);
3015                });
3016                self.temp_vec.sort();
3017                let tree = self.temp_vec.clone();
3018                self.temp_vec.clear();
3019                let newnode = tree
3020                    .iter()
3021                    .rev()
3022                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3023                return self.init_as(key, newnode);
3024            }
3025            (_, Kind::Union) => {
3026                let rleft = right.left(self);
3027                // if left_node id is smaller than rleft, just create a new union
3028                if left > rleft {
3029                    self.iter_unions_b(left, &mut |b, v| {
3030                        b.temp_vec.push(v);
3031                    });
3032                    self.iter_unions_b(right, &mut |b, v| {
3033                        b.temp_vec.push(v);
3034                    });
3035                    self.temp_vec.sort();
3036                    let tree = self.temp_vec.clone();
3037                    self.temp_vec.clear();
3038                    let newnode = tree
3039                        .iter()
3040                        .rev()
3041                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3042                    return self.init_as(key, newnode);
3043                } else {
3044                    if let Some(rw) = self.attempt_rw_unions(left, right) {
3045                        return self.init_as(key, rw);
3046                    }
3047                }
3048            }
3049            _ => {}
3050        }
3051
3052        if let Some(rw) = self.attempt_rw_union_2(left, right) {
3053            return self.init_as(key, rw);
3054        }
3055        self.init(key)
3056    }
3057
3058    pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3059        debug_assert!(left_id != NodeId::MISSING);
3060        debug_assert!(right_id != NodeId::MISSING);
3061        if left_id == right_id {
3062            return left_id;
3063        }
3064        if left_id == NodeId::BOT || right_id == NodeId::BOT {
3065            return NodeId::BOT;
3066        }
3067        if left_id == NodeId::TS {
3068            return right_id;
3069        }
3070        if right_id == NodeId::TS {
3071            return left_id;
3072        }
3073        if left_id > right_id {
3074            return self.mk_inter(right_id, left_id);
3075        }
3076        let key = NodeKey {
3077            kind: Kind::Inter,
3078            left: left_id,
3079            right: right_id,
3080            extra: u32::MAX,
3081        };
3082        if let Some(id) = self.key_is_created(&key) {
3083            return *id;
3084        }
3085
3086        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3087            return self.init_as(key, rw);
3088        }
3089
3090        self.init(key)
3091    }
3092
3093    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3094        let node = NodeKey {
3095            kind,
3096            left: NodeId::MISSING,
3097            right: NodeId::MISSING,
3098            extra: u32::MAX,
3099        };
3100        self.init(node)
3101    }
3102
3103    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3104        let star = self.mk_star(body_id);
3105        self.mk_concat(body_id, star)
3106    }
3107    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3108        let opt = self.mk_opt(body_id);
3109        let mut nodes1 = vec![];
3110        for _ in lower..upper {
3111            nodes1.push(opt);
3112        }
3113        for _ in 0..lower {
3114            nodes1.push(body_id);
3115        }
3116        self.mk_concats(nodes1.into_iter())
3117    }
3118    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3119        self.mk_union(NodeId::EPS, body_id)
3120    }
3121
3122    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3123        let key = NodeKey {
3124            kind: Kind::Star,
3125            left: body_id,
3126            right: NodeId::MISSING,
3127            extra: 0,
3128        };
3129        if let Some(id) = self.key_is_created(&key) {
3130            return *id;
3131        }
3132        // _*{500} is still _*
3133        if body_id.is_kind(self, Kind::Star) {
3134            return body_id;
3135        }
3136        self.get_node_id(key)
3137    }
3138
3139    /// it's cheaper to check this once as an edge-case
3140    /// than to compute a 4th nullability bit for every node
3141    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3142        match self.get_kind(node_id) {
3143            Kind::End => Nullability::EMPTYSTRING,
3144            Kind::Begin => Nullability::EMPTYSTRING,
3145            Kind::Pred => Nullability::NEVER,
3146            Kind::Star => Nullability::ALWAYS,
3147            Kind::Inter | Kind::Concat => {
3148                let lnull = self.nullability_emptystring(node_id.left(self));
3149                let rnull = self.nullability_emptystring(node_id.right(self));
3150                lnull.and(rnull) // left = 010, right = 001, left & right = 000
3151            }
3152            Kind::Union => {
3153                let lnull = self.nullability_emptystring(node_id.left(self));
3154                let rnull = self.nullability_emptystring(node_id.right(self));
3155                lnull.or(rnull)
3156            }
3157            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3158            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3159            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3160            Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3161        }
3162    }
3163
3164    #[inline(always)]
3165    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3166        self.get_meta(node_id)
3167            .flags
3168            .nullability()
3169            .has(Nullability::CENTER.or(Nullability::END))
3170    }
3171
3172    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3173        self.get_only_nullability(node_id)
3174    }
3175
3176    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3177        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3178    }
3179
3180    pub fn pp(&self, node_id: NodeId) -> String {
3181        let mut s = String::new();
3182        self.ppw(&mut s, node_id).unwrap();
3183        s
3184    }
3185
3186    #[allow(dead_code)]
3187    pub fn pp_nulls(&self, node_id: NodeId) -> String {
3188        let nu = self.get_nulls_id(node_id);
3189        let nr = self.mb.nb.get_set_ref(nu);
3190        let s1 = format!("{:?}", nr);
3191        s1
3192    }
3193
3194    #[allow(dead_code)]
3195    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3196        match self.get_tregex(term_id) {
3197            TRegex::Leaf(node_id) => {
3198                let mut s = String::new();
3199                self.ppw(&mut s, *node_id).unwrap();
3200                s
3201            }
3202            TRegex::ITE(cond, then_id, else_id) => {
3203                format!(
3204                    "ITE({},{},{})",
3205                    self.solver_ref().pp(*cond),
3206                    self.ppt(*then_id),
3207                    self.ppt(*else_id)
3208                )
3209            }
3210        }
3211    }
3212
3213    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3214        if cfg!(feature = "graphviz") {
3215            match node_id {
3216                NodeId::MISSING => return write!(s, "MISSING"),
3217                NodeId::BOT => return write!(s, "⊥"),
3218                NodeId::TS => return write!(s, "_*"),
3219                NodeId::TOP => return write!(s, "_"),
3220                NodeId::EPS => return write!(s, "ε"),
3221                _ => {}
3222            }
3223        }
3224
3225        match node_id {
3226            NodeId::MISSING => return write!(s, "MISSING"),
3227            NodeId::BOT => return write!(s, "⊥"),
3228            NodeId::TS => return write!(s, "_*"),
3229            NodeId::TOP => return write!(s, "_"),
3230            NodeId::EPS => return write!(s, ""),
3231            _ => {}
3232        }
3233
3234        match self.get_kind(node_id) {
3235            Kind::End => write!(s, r"\z"),
3236            Kind::Begin => write!(s, r"\A"),
3237            Kind::Pred => {
3238                let psi = node_id.pred_tset(self);
3239                if psi == TSetId::EMPTY {
3240                    write!(s, r"⊥")
3241                } else if psi == TSetId::FULL {
3242                    write!(s, r"_")
3243                } else {
3244                    write!(s, "{}", self.solver_ref().pp(psi))
3245                }
3246            }
3247            Kind::Inter => {
3248                write!(s, "(")?;
3249                self.ppw(s, node_id.left(self))?;
3250                write!(s, "&")?;
3251                let mut curr = node_id.right(self);
3252                while self.get_kind(curr) == Kind::Inter {
3253                    let n = curr.left(self);
3254                    self.ppw(s, n)?;
3255                    write!(s, "&")?;
3256                    curr = curr.right(self);
3257                }
3258                self.ppw(s, curr)?;
3259                write!(s, ")")
3260            }
3261            Kind::Union => {
3262                let left = node_id.left(self);
3263                let right = node_id.right(self);
3264                write!(s, "(")?;
3265                self.ppw(s, left)?;
3266                write!(s, "|")?;
3267                let mut curr = right;
3268                while self.get_kind(curr) == Kind::Union {
3269                    let n = curr.left(self);
3270                    self.ppw(s, n)?;
3271                    write!(s, "|")?;
3272                    curr = curr.right(self);
3273                }
3274                self.ppw(s, curr)?;
3275                write!(s, ")")
3276            }
3277            Kind::Concat => {
3278                let left = node_id.left(self);
3279                let right = node_id.right(self);
3280                if right.is_star(self) && right.left(self) == left {
3281                    self.ppw(s, left)?;
3282                    write!(s, "+")?;
3283                    return Ok(());
3284                }
3285                if right.is_concat(self) {
3286                    let rl = right.left(self);
3287                    if rl.is_star(self) && rl.left(self) == left {
3288                        self.ppw(s, left)?;
3289                        write!(s, "+")?;
3290                        return self.ppw(s, right.right(self));
3291                    }
3292                }
3293                if right.is_concat(self) && right.left(self) == left {
3294                    let mut num = 1;
3295                    let mut right = right;
3296                    while right.is_concat(self) && right.left(self) == left {
3297                        num += 1;
3298                        right = right.right(self);
3299                    }
3300                    // (|X){n} followed by X{m} -> X{m,m+n}
3301                    if let Some(inner) = left.is_opt_v(self) {
3302                        let mut inner_count = 0;
3303                        let mut right2 = right;
3304                        while right2.is_concat(self) && right2.left(self) == inner {
3305                            inner_count += 1;
3306                            right2 = right2.right(self);
3307                        }
3308                        if right2 == inner {
3309                            inner_count += 1;
3310                            self.ppw(s, inner)?;
3311                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3312                        }
3313                        if inner_count > 0 {
3314                            self.ppw(s, inner)?;
3315                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3316                            return self.ppw(s, right2);
3317                        }
3318                    }
3319                    self.ppw(s, left)?;
3320                    if right == left {
3321                        num += 1;
3322                        return write!(s, "{{{}}}", num);
3323                    }
3324                    if num <= 3 && left.is_pred(self) {
3325                        for _ in 1..num {
3326                            self.ppw(s, left)?;
3327                        }
3328                        return self.ppw(s, right);
3329                    } else {
3330                        write!(s, "{{{}}}", num)?;
3331                        return self.ppw(s, right);
3332                    }
3333                }
3334                self.ppw(s, left)?;
3335                self.ppw(s, right)
3336            }
3337            Kind::Star => {
3338                let left = node_id.left(self);
3339                let leftkind = self.get_kind(left);
3340                match leftkind {
3341                    Kind::Concat | Kind::Star | Kind::Compl => {
3342                        write!(s, "(")?;
3343                        self.ppw(s, left)?;
3344                        write!(s, ")")?;
3345                    }
3346                    _ => {
3347                        self.ppw(s, left)?;
3348                    }
3349                };
3350                write!(s, "*")
3351            }
3352            Kind::Compl => {
3353                write!(s, "~(")?;
3354                self.ppw(s, node_id.left(self))?;
3355                write!(s, ")")
3356            }
3357            Kind::Lookbehind => {
3358                let lbleft = self.get_lookbehind_prev(node_id);
3359                let lbinner = self.get_lookbehind_inner(node_id);
3360                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3361                if lbleft != NodeId::MISSING {
3362                    write!(s, "「")?;
3363                    self.ppw(s, lbleft)?;
3364                    write!(s, "」")?;
3365                }
3366
3367                write!(s, "(?<=")?;
3368                self.ppw(s, lbinner)?;
3369                write!(s, ")")
3370            }
3371            Kind::Lookahead => {
3372                let inner = self.get_lookahead_inner(node_id);
3373                write!(s, "(?=")?;
3374                self.ppw(s, inner)?;
3375                write!(s, ")")?;
3376                if self.get_lookahead_rel(node_id) != 0 {
3377                    write!(s, "{{")?;
3378                    let rel = self.get_lookahead_rel(node_id);
3379                    if rel == u32::MAX {
3380                        write!(s, "∅")?;
3381                    } else {
3382                        write!(s, "{}", rel)?;
3383                    }
3384                    write!(s, "}}")?;
3385                }
3386                if node_id.right(self) == NodeId::MISSING {
3387                    Ok(())
3388                } else {
3389                    write!(s, "『")?;
3390                    self.ppw(s, node_id.right(self))?;
3391                    write!(s, "』")
3392                }
3393            }
3394            Kind::Counted => {
3395                let body = node_id.left(self);
3396                let packed = self.get_extra(node_id);
3397                let step = packed & 0xFFFF;
3398                let best = packed >> 16;
3399                write!(s, "#(")?;
3400                self.ppw(s, body)?;
3401                write!(s, ")s{}b{}", step, best)
3402            }
3403        }
3404    }
3405
3406    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3407        self.mk_concat(node, NodeId::TS)
3408    }
3409
3410    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3411        let node_ts = self.mk_concat(node, NodeId::TS);
3412        self.mk_compl(node_ts)
3413    }
3414
3415    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3416        let notset = self.solver().not_id(set);
3417        self.mk_pred(notset)
3418    }
3419
3420    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3421        let set_id = self.solver().u8_to_set_id(char);
3422        self.mk_pred(set_id)
3423    }
3424
3425    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3426        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3427        self.mk_pred(rangeset)
3428    }
3429
3430    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3431        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3432        for &(lo, hi) in &ranges[1..] {
3433            let r = self.mk_range_u8(lo, hi);
3434            node = self.mk_union(node, r);
3435        }
3436        node
3437    }
3438
3439    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3440        let mut prefix = Vec::new();
3441        let mut curr = node;
3442        loop {
3443            if curr == NodeId::EPS {
3444                let full = !prefix.is_empty();
3445                return (prefix, full);
3446            }
3447            if curr == NodeId::BOT {
3448                break;
3449            }
3450            if self.get_kind(curr) == Kind::Pred {
3451                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3452                    Some(byte) => {
3453                        prefix.push(byte);
3454                        return (prefix, true);
3455                    }
3456                    None => break, // multi-byte pred: pattern not fully consumed
3457                }
3458            }
3459            if self.get_kind(curr) != Kind::Concat {
3460                break;
3461            }
3462            let left = curr.left(self);
3463            if self.get_kind(left) != Kind::Pred {
3464                break;
3465            }
3466            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3467                Some(byte) => prefix.push(byte),
3468                None => break,
3469            }
3470            curr = curr.right(self);
3471        }
3472        (prefix, false)
3473    }
3474
3475    #[allow(dead_code)]
3476    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3477        let mut result = NodeId::EPS;
3478        for byte in raw_str.iter().rev() {
3479            let node = self.mk_u8(*byte);
3480            result = self.mk_concat(node, result);
3481        }
3482        result
3483    }
3484
3485    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3486        let mut result = NodeId::EPS;
3487        for byte in raw_str.bytes().rev() {
3488            let node = self.mk_u8(byte);
3489            result = self.mk_concat(node, result);
3490        }
3491        result
3492    }
3493
3494    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3495        let mut sorted: Vec<NodeId> = nodes.collect();
3496        if sorted.len() <= 1 {
3497            return sorted.pop().unwrap_or(NodeId::BOT);
3498        }
3499        sorted.sort();
3500        sorted.dedup();
3501        sorted.retain(|&x| x != NodeId::BOT);
3502        if sorted.is_empty() {
3503            return NodeId::BOT;
3504        }
3505        if sorted.len() > 16 {
3506            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3507            let mut non_concat: Vec<NodeId> = Vec::new();
3508            for &n in &sorted {
3509                if self.get_kind(n) == Kind::Concat {
3510                    by_head.entry(self.get_left(n)).or_default().push(n);
3511                } else {
3512                    non_concat.push(n);
3513                }
3514            }
3515            let mut absorbed: Vec<NodeId> = Vec::new();
3516            for &n in &non_concat {
3517                if by_head.contains_key(&n) {
3518                    absorbed.push(n);
3519                }
3520            }
3521            if !absorbed.is_empty() {
3522                non_concat.retain(|n| !absorbed.contains(n));
3523            }
3524            if by_head.len() < sorted.len() {
3525                let mut groups: Vec<NodeId> = non_concat;
3526                for (head, tails) in by_head {
3527                    let mut tail_nodes: Vec<NodeId> =
3528                        tails.iter().map(|&n| self.get_right(n)).collect();
3529                    if absorbed.contains(&head) {
3530                        tail_nodes.push(NodeId::EPS);
3531                    }
3532                    let tail_union = self.mk_unions(tail_nodes.into_iter());
3533                    let factored = self.mk_concat(head, tail_union);
3534                    groups.push(factored);
3535                }
3536                groups.sort();
3537                groups.dedup();
3538                return self.mk_unions_balanced(&groups);
3539            }
3540        }
3541        self.mk_unions_balanced(&sorted)
3542    }
3543
3544    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3545        match nodes.len() {
3546            0 => NodeId::BOT,
3547            1 => nodes[0],
3548            n => {
3549                let mid = n / 2;
3550                let left = self.mk_unions_balanced(&nodes[..mid]);
3551                let right = self.mk_unions_balanced(&nodes[mid..]);
3552                self.mk_union(left, right)
3553            }
3554        }
3555    }
3556
3557    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3558        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3559    }
3560
3561    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3562        nodes
3563            .rev()
3564            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3565    }
3566}
3567
3568impl RegexBuilder {
3569    #[allow(dead_code)]
3570    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3571        match self.get_tregex(term_id).clone() {
3572            TRegex::Leaf(node_id) => {
3573                if NodeId::BOT == node_id {
3574                    vec![]
3575                } else {
3576                    vec![node_id]
3577                }
3578            }
3579            TRegex::ITE(_, then_id, else_id) => {
3580                let mut then_nodes = self.extract_sat(then_id);
3581                let mut else_nodes = self.extract_sat(else_id);
3582                then_nodes.append(&mut else_nodes);
3583                then_nodes
3584            }
3585        }
3586    }
3587
3588    pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3589        debug_assert!(self.get_kind(start) == Kind::Union);
3590        let mut curr = start;
3591        while self.get_kind(curr) == Kind::Union {
3592            f(curr.left(self));
3593            curr = curr.right(self);
3594        }
3595        f(curr);
3596    }
3597
3598    pub(crate) fn iter_unions_b(
3599        &mut self,
3600        curr: NodeId,
3601        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3602    ) {
3603        let mut curr = curr;
3604        while self.get_kind(curr) == Kind::Union {
3605            f(self, curr.left(self));
3606            curr = curr.right(self);
3607        }
3608        f(self, curr);
3609    }
3610
3611    pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3612        if !self.contains_look(node_id) {
3613            return Some(node_id);
3614        }
3615        match self.get_kind(node_id) {
3616            Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3617            Kind::Concat => {
3618                let left = node_id.left(self);
3619                let right = node_id.right(self);
3620                let elim_l = self.try_elim_lookarounds(left)?;
3621                let elim_r = self.try_elim_lookarounds(right)?;
3622                let rw = self.mk_concat(elim_l, elim_r);
3623                Some(rw)
3624            }
3625            Kind::Union => {
3626                let left = node_id.left(self);
3627                let right = node_id.right(self);
3628                let elim_l = self.try_elim_lookarounds(left)?;
3629                let elim_r = self.try_elim_lookarounds(right)?;
3630                let rw = self.mk_union(elim_l, elim_r);
3631                Some(rw)
3632            }
3633
3634            Kind::Star => {
3635                let body = node_id.left(self);
3636                let elim_l = self.try_elim_lookarounds(body)?;
3637                Some(self.mk_star(elim_l))
3638            }
3639            Kind::Compl => {
3640                let left = node_id.left(self);
3641                let elim_l = self.try_elim_lookarounds(left)?;
3642                Some(self.mk_compl(elim_l))
3643            }
3644            Kind::Lookahead => {
3645                let rel = self.get_lookahead_rel(node_id);
3646                if rel != 0 {
3647                    return None;
3648                }
3649                let lbody = self.get_lookahead_inner(node_id);
3650                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3651                let elim_l = self.try_elim_lookarounds(lbody)?;
3652                let elim_r = self.try_elim_lookarounds(ltail)?;
3653                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3654                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3655                let rw = self.mk_inter(lbody_ts, ltail_ts);
3656                Some(rw)
3657            }
3658            Kind::Lookbehind => {
3659                let linner = self.get_lookbehind_inner(node_id);
3660                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3661                let elim_l = self.try_elim_lookarounds(linner)?;
3662                let elim_r = self.try_elim_lookarounds(lprev)?;
3663                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3664                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3665                let rw = self.mk_inter(lbody_ts, ltail_ts);
3666                Some(rw)
3667            }
3668            Kind::Inter => {
3669                let left = node_id.left(self);
3670                let right = node_id.right(self);
3671                let elim_l = self.try_elim_lookarounds(left)?;
3672                let elim_r = self.try_elim_lookarounds(right)?;
3673                let rw = self.mk_inter(elim_l, elim_r);
3674                Some(rw)
3675            }
3676            Kind::Counted => None,
3677        }
3678    }
3679
3680    /// R & _+ is a safe overapproximation of R that's nonempty
3681    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3682        if self.nullability(node) == Nullability::NEVER {
3683            node
3684        } else {
3685            self.mk_inter(NodeId::TOPPLUS, node)
3686        }
3687    }
3688
3689    fn iter_find_stack(
3690        &self,
3691        stack: &mut Vec<TRegexId>,
3692        mut f: impl FnMut(NodeId) -> bool,
3693    ) -> bool {
3694        loop {
3695            match stack.pop() {
3696                None => return false,
3697                Some(curr) => match self.get_tregex(curr) {
3698                    TRegex::Leaf(n) => {
3699                        let mut curr = *n;
3700                        while curr != NodeId::BOT {
3701                            match self.get_kind(curr) {
3702                                Kind::Union => {
3703                                    if f(curr.left(self)) {
3704                                        return true;
3705                                    }
3706                                    curr = curr.right(self);
3707                                }
3708                                _ => {
3709                                    if f(*n) {
3710                                        return true;
3711                                    }
3712                                    curr = NodeId::BOT;
3713                                }
3714                            }
3715                        }
3716                    }
3717                    TRegex::ITE(_, then_id, else_id) => {
3718                        if *else_id != TRegexId::BOT {
3719                            stack.push(*else_id);
3720                        }
3721                        stack.push(*then_id);
3722                    }
3723                },
3724            }
3725        }
3726    }
3727
3728    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3729        if node == NodeId::BOT {
3730            return Some(true);
3731        }
3732        if self.nullability(node) != Nullability::NEVER {
3733            return Some(false);
3734        }
3735        if let Some(cached) = self.cache_empty.get(&node) {
3736            if cached.is_checked() {
3737                return Some(cached.is_empty());
3738            }
3739        }
3740        let node = if !self.contains_look(node) {
3741            node
3742        } else {
3743            self.try_elim_lookarounds(node)?
3744        };
3745        let isempty_flag = self.is_empty_lang_internal(node);
3746
3747        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3748    }
3749
3750    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3751        // without inter, no need to check
3752        if !self.get_meta_flags(initial_node).contains_inter() {
3753            return Ok(NodeFlags::ZERO);
3754        }
3755
3756        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3757        let mut worklist: VecDeque<NodeId> = VecDeque::new();
3758        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3759        let mut stack = Vec::new();
3760        stack.push(begin_der);
3761        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3762            visited.insert(node, initial_node);
3763            let nullability = self.nullability(node);
3764            if nullability != Nullability::NEVER {
3765                true
3766            } else {
3767                worklist.push_back(node);
3768                false
3769            }
3770        });
3771        if found_nullable_right_away {
3772            return Ok(NodeFlags::ZERO);
3773        }
3774
3775        worklist.push_back(initial_node);
3776        let isempty_flag: NodeFlags;
3777        let mut found_node = NodeId::BOT;
3778
3779        loop {
3780            match worklist.pop_front() {
3781                None => {
3782                    isempty_flag = NodeFlags::IS_EMPTY;
3783                    break;
3784                }
3785                Some(outer) => {
3786                    if let Some(cached) = self.cache_empty.get(&outer) {
3787                        if cached.is_checked() {
3788                            if cached.is_empty() {
3789                                continue;
3790                            } else {
3791                                return Ok(NodeFlags::ZERO);
3792                            }
3793                        }
3794                    }
3795
3796                    let derivative = self.der(outer, Nullability::CENTER)?;
3797
3798                    stack.push(derivative);
3799
3800                    let found_null = self.iter_find_stack(&mut stack, |node| {
3801                        if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3802                            found_node = node;
3803                            if !self.get_meta_flags(node).contains_inter() {
3804                                true
3805                            } else {
3806                                e.insert(outer);
3807                                worklist.push_front(node);
3808                                self.any_nonbegin_nullable(node)
3809                            }
3810                        } else {
3811                            false
3812                        }
3813                    });
3814                    if found_null {
3815                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3816                        isempty_flag = NodeFlags::ZERO;
3817                        break;
3818                    }
3819                }
3820            }
3821        }
3822
3823        self.cache_empty.insert(
3824            initial_node,
3825            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3826        );
3827        Ok(isempty_flag)
3828    }
3829
3830    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
3831    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3832        if larger_lang == smaller_lang {
3833            return Some(true);
3834        }
3835
3836        // assess initial nullability
3837        if self
3838            .nullability(larger_lang)
3839            .not()
3840            .and(self.nullability(smaller_lang))
3841            != Nullability::NEVER
3842        {
3843            return Some(false);
3844        }
3845
3846        // check language nullability
3847        // if (B &~ A) ≡ ⊥ then B ⊆ A
3848        // this means  L(B) \ L(A) = {}
3849        // eg. (a &~ .*) = ⊥ means .* subsumes a
3850        let (smaller_lang, larger_lang) =
3851            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3852                let wrap = |b: &mut Self, n: NodeId| {
3853                    let tmp = b.mk_concat(n, NodeId::TS);
3854                    b.mk_concat(NodeId::TS, tmp)
3855                };
3856                (wrap(self, smaller_lang), wrap(self, larger_lang))
3857            } else {
3858                (smaller_lang, larger_lang)
3859            };
3860
3861        let nota = self.mk_compl(larger_lang);
3862        let diff = self.mk_inter(smaller_lang, nota);
3863        self.is_empty_lang(diff)
3864    }
3865}