Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::UnicodeClassCache;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23    AnchorLimit,
24    StateSpaceExplosion,
25    UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        match self {
31            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32            AlgebraError::StateSpaceExplosion => {
33                write!(f, "too many states, likely infinite state space")
34            }
35            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36        }
37    }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43    pub const MISSING: u32 = 0;
44    pub const BOT: u32 = 1;
45    pub const EPS: u32 = 2;
46    pub const TOP: u32 = 3;
47    pub const TOPSTAR: u32 = 4;
48    pub const TOPPLUS: u32 = 5;
49    pub const BEGIN: u32 = 6;
50    pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62    pub const MISSING: NodeId = NodeId(id::MISSING);
63    pub const BOT: NodeId = NodeId(id::BOT);
64    pub const EPS: NodeId = NodeId(id::EPS);
65    pub const TOP: NodeId = NodeId(id::TOP);
66    pub const TS: NodeId = NodeId(id::TOPSTAR);
67    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68    pub const BEGIN: NodeId = NodeId(id::BEGIN);
69    pub const END: NodeId = NodeId(id::END);
70
71    #[inline]
72    pub fn as_u32(self) -> u32 {
73        self.0
74    }
75
76    #[inline]
77    pub fn from_u32(v: u32) -> NodeId {
78        NodeId(v)
79    }
80}
81impl Debug for NodeId {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        let num = &self.0;
84        f.write_str(format!("{num}").as_str())
85    }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91    pub const MISSING: TRegexId = TRegexId(id::MISSING);
92    pub const EPS: TRegexId = TRegexId(id::EPS);
93    pub const BOT: TRegexId = TRegexId(id::BOT);
94    pub const TOP: TRegexId = TRegexId(id::TOP);
95    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
102
103    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106    pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110    #[inline]
111    pub(crate) fn nullability(self) -> Nullability {
112        Nullability(self.0 & Self::NULL_MASK)
113    }
114
115    #[inline]
116    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118    }
119
120    #[inline]
121    pub(crate) fn has(self, flag: MetaFlags) -> bool {
122        self.0 & flag.0 != 0
123    }
124    #[inline]
125    const fn and(self, other: MetaFlags) -> MetaFlags {
126        MetaFlags(self.0 & other.0)
127    }
128    #[inline]
129    const fn or(self, other: MetaFlags) -> MetaFlags {
130        MetaFlags(self.0 | other.0)
131    }
132
133    pub(crate) fn contains_lookaround(self) -> bool {
134        self.has(MetaFlags::CONTAINS_LOOKAROUND)
135    }
136    pub(crate) fn contains_inter(self) -> bool {
137        self.has(MetaFlags::CONTAINS_INTER)
138    }
139
140    pub(crate) fn all_contains_flags(self) -> MetaFlags {
141        self.and(
142            MetaFlags::CONTAINS_LOOKAROUND
143                .or(MetaFlags::CONTAINS_ANCHORS)
144                .or(MetaFlags::CONTAINS_INTER),
145        )
146    }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152    #[default]
153    Pred,
154    Star,
155    Begin,
156    End,
157    Concat,
158    Union,
159    Compl,
160    Lookbehind,
161    Lookahead,
162    Inter,
163    Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168    flags: MetaFlags,
169    nulls: NullsId,
170}
171
172struct MetadataBuilder {
173    num_created: u32,
174    solver: Solver,
175    nb: NullsBuilder,
176    index: FxHashMap<Metadata, MetadataId>,
177    pub array: Vec<Metadata>,
178}
179
180mod helpers {
181    pub(crate) fn incr_rel(n1: u32) -> u32 {
182        match n1.overflowing_add(1) {
183            (_, true) => u32::MAX,
184            (res, false) => res,
185        }
186    }
187}
188
189impl MetadataBuilder {
190    fn new() -> MetadataBuilder {
191        Self {
192            index: FxHashMap::default(),
193            array: vec![Metadata {
194                flags: MetaFlags::ZERO,
195                nulls: NullsId::EMPTY,
196            }],
197            solver: Solver::new(),
198            num_created: 0,
199            nb: NullsBuilder::new(),
200        }
201    }
202
203    fn init(&mut self, inst: Metadata) -> MetadataId {
204        self.num_created += 1;
205        let new_id = MetadataId(self.num_created);
206        self.index.insert(inst.clone(), new_id);
207        self.array.push(inst);
208        new_id
209    }
210
211    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212        match self.index.get(&inst) {
213            Some(&id) => id,
214            None => self.init(inst),
215        }
216    }
217
218    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219        &self.array[inst.0 as usize]
220    }
221
222    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223        setflags.all_contains_flags()
224    }
225
226    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227        let left = &self.array[body.0 as usize].flags;
228        let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229        // BOT* = EPS (empty string only), not infinite
230        let inf = if body_id == NodeId::BOT {
231            MetaFlags::ZERO
232        } else {
233            MetaFlags::INFINITE_LENGTH
234        };
235        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236    }
237
238    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239        let left = &self.array[left_id.0 as usize].flags;
240        let null = left.nullability().not().and(Nullability::ALWAYS);
241        let contains = self.get_contains(*left);
242        MetaFlags::with_nullability(
243            null,
244            contains
245                .or(MetaFlags::INFINITE_LENGTH)
246                .or(MetaFlags::CONTAINS_COMPL),
247        )
248    }
249
250    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
251        let left = &self.array[left_id.0 as usize].flags;
252        let right = &self.array[right_id.0 as usize].flags;
253        let null = left.nullability().and(right.nullability());
254        let contains = self.get_contains(left.or(*right));
255        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
256        MetaFlags::with_nullability(null, contains.or(len))
257    }
258
259    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260        let left = &self.array[left_id.0 as usize].flags;
261        let right = &self.array[right_id.0 as usize].flags;
262        let null = left.nullability().and(right.nullability());
263        let contains = self
264            .get_contains(left.or(*right))
265            .or(MetaFlags::CONTAINS_INTER);
266        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
267        MetaFlags::with_nullability(null, contains.or(len))
268    }
269
270    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
271        let left = &self.array[left_id.0 as usize].flags;
272        let right = &self.array[right_id.0 as usize].flags;
273        let null = left.nullability().or(right.nullability());
274        let contains = self.get_contains(left.or(*right));
275        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
276        MetaFlags::with_nullability(null, contains.or(len))
277    }
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
281pub struct NodeKey {
282    pub(crate) kind: Kind,
283    pub(crate) left: NodeId,
284    pub(crate) right: NodeId,
285    pub(crate) extra: u32,
286}
287
288#[derive(Eq, Hash, PartialEq, Clone, Debug)]
289pub enum TRegex<TSet> {
290    Leaf(NodeId),
291    ITE(TSet, TRegexId, TRegexId),
292}
293
294#[derive(Eq, Hash, PartialEq, Clone, Copy)]
295pub(crate) struct NodeFlags(u8);
296impl NodeFlags {
297    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
298    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
299    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
300
301    fn is_checked(self) -> bool {
302        self.0 >= NodeFlags::IS_CHECKED.0
303    }
304    fn is_empty(self) -> bool {
305        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
306    }
307}
308
309#[derive(Eq, Hash, PartialEq, Clone, Copy)]
310pub(crate) struct BuilderFlags(u8);
311impl BuilderFlags {
312    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
313    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
314}
315
316pub struct RegexBuilder {
317    mb: MetadataBuilder,
318    temp_vec: Vec<NodeId>,
319    num_created: u32,
320    index: FxHashMap<NodeKey, NodeId>,
321    array: Vec<NodeKey>,
322    metadata: Vec<MetadataId>,
323    reversed: Vec<NodeId>,
324    cache_empty: FxHashMap<NodeId, NodeFlags>,
325    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
326    tr_array: Vec<TRegex<TSetId>>,
327    tr_der_center: Vec<TRegexId>,
328    tr_der_begin: Vec<TRegexId>,
329    flags: BuilderFlags,
330    /// maximum lookahead context distance before returning `AnchorLimit`.
331    pub lookahead_context_max: u32,
332    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
333    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
334}
335
336macro_rules! iter_inter {
337    ($compiler:ident, $start:ident, $expression:expr) => {
338        let mut curr = $start;
339        while $compiler.get_kind(curr) == Kind::Inter {
340            let left = $compiler.get_left(curr);
341            $expression(left);
342            curr = $compiler.get_right(curr);
343        }
344        $expression(curr);
345    };
346}
347
348impl NodeId {
349    fn is_missing(&self) -> bool {
350        *self == NodeId::MISSING
351    }
352    #[inline]
353    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
354        b.get_flags_contains(self)
355    }
356    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
357        if self == tail {
358            true
359        } else if self.is_kind(b, Kind::Concat) {
360            self.right(b).has_concat_tail(b, tail)
361        } else {
362            false
363        }
364    }
365    fn missing_to_eps(&self) -> NodeId {
366        if *self == NodeId::MISSING {
367            NodeId::EPS
368        } else {
369            *self
370        }
371    }
372
373    #[inline]
374    fn kind(self, b: &RegexBuilder) -> Kind {
375        b.get_kind(self)
376    }
377    #[inline]
378    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
379        b.get_kind(self) == k
380    }
381    #[inline]
382    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
383        b.nullability(self) == Nullability::NEVER
384    }
385    #[inline]
386    fn nullability(self, b: &RegexBuilder) -> Nullability {
387        b.nullability(self)
388    }
389
390    #[inline]
391    fn is_center_nullable(self, b: &RegexBuilder) -> bool {
392        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
393    }
394    #[inline]
395    pub fn left(self, b: &RegexBuilder) -> NodeId {
396        b.get_left(self)
397    }
398
399    #[inline]
400    pub fn right(self, b: &RegexBuilder) -> NodeId {
401        b.get_right(self)
402    }
403
404    #[inline]
405    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
406        b.der(self, mask)
407    }
408
409    #[inline]
410    fn extra(self, b: &RegexBuilder) -> u32 {
411        b.get_extra(self)
412    }
413
414    #[inline]
415    fn is_pred(self, b: &RegexBuilder) -> bool {
416        b.get_kind(self) == Kind::Pred
417    }
418    #[inline]
419    fn is_lookahead(self, b: &RegexBuilder) -> bool {
420        b.get_kind(self) == Kind::Lookahead
421    }
422    #[inline]
423    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
424        debug_assert!(self.is_pred(b));
425        TSetId(b.get_extra(self))
426    }
427    #[inline]
428    fn is_star(self, b: &RegexBuilder) -> bool {
429        if NodeId::EPS == self {
430            return false;
431        }
432        b.get_kind(self) == Kind::Star
433    }
434
435    #[inline]
436    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
437        if self.is_concat(b) {
438            let r = self.right(b);
439            return r.is_star(b) && r.left(b) == self.left(b);
440        }
441        false
442    }
443
444    #[inline]
445    fn is_concat(self, b: &RegexBuilder) -> bool {
446        b.get_kind(self) == Kind::Concat
447    }
448
449    #[inline]
450    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
451        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
452            Some(b.get_right(self))
453        } else {
454            None
455        }
456    }
457
458    #[inline]
459    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
460        if b.get_kind(self) == Kind::Concat {
461            let left = self.left(b);
462            let right = self.right(b);
463            if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
464                return right == NodeId::END;
465            }
466        }
467        false
468    }
469
470    #[inline]
471    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
472        if NodeId::EPS == self {
473            return None;
474        }
475        if self.is_star(b) && self.left(b).is_pred(b) {
476            Some(self.left(b))
477        } else {
478            None
479        }
480    }
481
482    #[inline]
483    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
484        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
485            let middle = self.right(b);
486            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
487                return Some(middle.left(b));
488            }
489        }
490        None
491    }
492
493    #[inline]
494    pub(crate) fn iter_union(
495        self,
496        b: &mut RegexBuilder,
497        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
498    ) {
499        let mut curr: NodeId = self;
500        while curr.kind(b) == Kind::Union {
501            f(b, curr.left(b));
502            curr = curr.right(b);
503        }
504        f(b, curr);
505    }
506
507    #[inline]
508    pub(crate) fn iter_union_while(
509        self,
510        b: &mut RegexBuilder,
511        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
512    ) {
513        let mut curr: NodeId = self;
514        let mut continue_loop = true;
515        while continue_loop && curr.kind(b) == Kind::Union {
516            continue_loop = f(b, curr.left(b));
517            curr = curr.right(b);
518        }
519        if continue_loop {
520            f(b, curr);
521        }
522    }
523}
524
525impl Default for RegexBuilder {
526    fn default() -> Self {
527        Self::new()
528    }
529}
530
531impl RegexBuilder {
532    pub fn new() -> RegexBuilder {
533        let mut inst = Self {
534            mb: MetadataBuilder::new(),
535            array: Vec::new(),
536            index: FxHashMap::default(),
537            cache_empty: FxHashMap::default(),
538            tr_array: Vec::new(),
539            tr_cache: FxHashMap::default(),
540            flags: BuilderFlags::ZERO,
541            lookahead_context_max: 800,
542            num_created: 0,
543            metadata: Vec::new(),
544            reversed: Vec::new(),
545            tr_der_center: Vec::new(),
546            tr_der_begin: Vec::new(),
547            temp_vec: Vec::new(),
548            mk_binary_memo: FxHashMap::default(),
549            clean_cache: FxHashMap::default(),
550        };
551        inst.array.push(NodeKey::default());
552        inst.mk_pred(TSetId::EMPTY);
553        inst.mk_star(NodeId::BOT);
554        inst.mk_pred(TSetId::FULL);
555        inst.mk_star(NodeId::TOP);
556        let top_plus_id = inst.mk_plus(NodeId::TOP);
557        inst.mk_unset(Kind::Begin);
558        inst.mk_unset(Kind::End);
559        debug_assert!(top_plus_id == NodeId::TOPPLUS);
560
561        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
562        inst.mk_leaf(NodeId::BOT);
563        inst.mk_leaf(NodeId::EPS);
564        inst.mk_leaf(NodeId::TOP);
565        inst.mk_leaf(NodeId::TS);
566
567        inst.flags = BuilderFlags::SUBSUME;
568        inst
569    }
570
571    #[inline]
572    pub fn solver_ref(&self) -> &Solver {
573        &self.mb.solver
574    }
575
576    #[inline]
577    pub fn solver(&mut self) -> &mut Solver {
578        &mut self.mb.solver
579    }
580
581    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
582        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
583        self.tr_cache.insert(inst.clone(), new_id);
584        self.tr_array.push(inst);
585        new_id
586    }
587
588    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
589        match self.tr_cache.get(&inst) {
590            Some(&id) => id,
591            None => self.tr_init(inst),
592        }
593    }
594
595    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
596        &self.tr_array[inst.0 as usize]
597    }
598
599    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
600        let solv = self.solver();
601        !solv.is_sat_id(cond1, cond2)
602    }
603
604    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
605        let term = TRegex::<TSetId>::Leaf(node_id);
606        self.get_tregex_id(term)
607    }
608
609    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
610        let tmp_inst = TRegex::ITE(cond, _then, _else);
611        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
612            return *cached;
613        }
614        if _then == _else {
615            return _then;
616        }
617        if self.solver().is_full_id(cond) {
618            return _then;
619        }
620        if self.solver().is_empty_id(cond) {
621            return _else;
622        }
623        let _clean_then = match self.tr_array[_then.0 as usize] {
624            TRegex::Leaf(_) => _then,
625            _ => self.clean(cond, _then),
626        };
627        let notcond = self.solver().not_id(cond);
628        let _clean_else = match self.tr_array[_else.0 as usize] {
629            TRegex::Leaf(_) => _else,
630            _ => self.clean(notcond, _else),
631        };
632
633        if _clean_then == _clean_else {
634            return _clean_then;
635        }
636        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
637        match self.get_tregex(_clean_then) {
638            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
639                let _cond_then = *leftcond;
640                let new_then = *_inner_then;
641                let sand = self.solver().and_id(cond, _cond_then);
642                return self.mk_ite(sand, new_then, _clean_else);
643            }
644            _ => {}
645        }
646        // attempt right side flattening:
647        match self.get_tregex(_clean_else) {
648            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
649            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
650                let e2clone = *_e2;
651                let new_cond = self.mb.solver.or_id(cond, *_c2);
652                return self.mk_ite(new_cond, _clean_then, e2clone);
653            }
654            _ => {}
655        }
656
657        if _clean_then == TRegexId::BOT {
658            let flip_cond = self.solver().not_id(cond);
659            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
660            return cleaned_id;
661        }
662
663        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
664    }
665
666    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
667        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
668            return cached;
669        }
670        let result = match *self.get_tregex(tterm) {
671            TRegex::Leaf(_) => tterm,
672            TRegex::ITE(alpha, _then_id, _else_id) => {
673                let notalpha = self.mb.solver.not_id(alpha);
674                if self.mb.solver.unsat_id(beta, alpha) {
675                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
676                    self.clean(beta, _else_id)
677                } else if self.unsat(beta, notalpha) {
678                    // beta ⊆ alpha, so beta ∧ alpha = beta
679                    self.clean(beta, _then_id)
680                } else {
681                    let tc = self.mb.solver.and_id(beta, alpha);
682                    let ec = self.mb.solver.and_id(beta, notalpha);
683                    let new_then = self.clean(tc, _then_id);
684                    let new_else = self.clean(ec, _else_id);
685                    self.mk_ite(alpha, new_then, new_else)
686                }
687            }
688        };
689        self.clean_cache.insert((beta, tterm), result);
690        result
691    }
692
693    fn mk_unary(
694        &mut self,
695        term: TRegexId,
696        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
697    ) -> TRegexId {
698        match self.tr_array[term.0 as usize] {
699            TRegex::Leaf(node_id) => {
700                let applied = apply(self, node_id);
701                self.mk_leaf(applied)
702            }
703            TRegex::ITE(c1, _then, _else) => {
704                let _then1 = self.mk_unary(_then, apply);
705                let _else1 = self.mk_unary(_else, apply);
706                self.mk_ite(c1, _then1, _else1)
707            }
708        }
709    }
710
711    fn mk_binary_result(
712        &mut self,
713        left: TRegexId,
714        right: TRegexId,
715        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
716    ) -> Result<TRegexId, AlgebraError> {
717        match self.tr_array[left.0 as usize] {
718            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
719                TRegex::Leaf(right_node_id) => {
720                    let applied = apply(self, left_node_id, right_node_id)?;
721                    Ok(self.mk_leaf(applied))
722                }
723                TRegex::ITE(c2, _then, _else) => {
724                    let then2 = self.mk_binary_result(left, _then, apply)?;
725                    let else2 = self.mk_binary_result(left, _else, apply)?;
726                    Ok(self.mk_ite(c2, then2, else2))
727                }
728            },
729            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
730                TRegex::Leaf(_) => {
731                    let then2 = self.mk_binary_result(_then1, right, apply)?;
732                    let else2 = self.mk_binary_result(_else1, right, apply)?;
733                    Ok(self.mk_ite(c1, then2, else2))
734                }
735                TRegex::ITE(c2, _then2, _else2) => {
736                    if c1 == c2 {
737                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
738                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
739                        return Ok(self.mk_ite(c1, _then, _else));
740                    }
741                    if c1.0 > c2.0 {
742                        let _then = self.mk_binary_result(_then1, right, apply)?;
743                        let _else = self.mk_binary_result(_else1, right, apply)?;
744                        Ok(self.mk_ite(c1, _then, _else))
745                    } else {
746                        let _then = self.mk_binary_result(left, _then2, apply)?;
747                        let _else = self.mk_binary_result(left, _else2, apply)?;
748                        Ok(self.mk_ite(c2, _then, _else))
749                    }
750                }
751            },
752        }
753    }
754
755    fn mk_binary(
756        &mut self,
757        left: TRegexId,
758        right: TRegexId,
759        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
760    ) -> TRegexId {
761        self.mk_binary_memo.clear();
762        self.mk_binary_inner(left, right, apply)
763    }
764
765    fn mk_binary_inner(
766        &mut self,
767        left: TRegexId,
768        right: TRegexId,
769        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
770    ) -> TRegexId {
771        if left == right {
772            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
773        }
774        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
775            return cached;
776        }
777        let result = match self.tr_array[left.0 as usize] {
778            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
779                TRegex::Leaf(right_node_id) => {
780                    let applied = apply(self, left_node_id, right_node_id);
781                    self.mk_leaf(applied)
782                }
783                TRegex::ITE(c2, _then, _else) => {
784                    let then2 = self.mk_binary_inner(left, _then, apply);
785                    let else2 = self.mk_binary_inner(left, _else, apply);
786                    self.mk_ite(c2, then2, else2)
787                }
788            },
789            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
790                TRegex::Leaf(_) => {
791                    let then2 = self.mk_binary_inner(_then1, right, apply);
792                    let else2 = self.mk_binary_inner(_else1, right, apply);
793                    self.mk_ite(c1, then2, else2)
794                }
795                TRegex::ITE(c2, _then2, _else2) => {
796                    if c1 == c2 {
797                        let _then = self.mk_binary_inner(_then1, _then2, apply);
798                        let _else = self.mk_binary_inner(_else1, _else2, apply);
799                        self.mk_ite(c1, _then, _else)
800                    } else if c1.0 > c2.0 {
801                        let _then = self.mk_binary_inner(_then1, right, apply);
802                        let _else = self.mk_binary_inner(_else1, right, apply);
803                        self.mk_ite(c1, _then, _else)
804                    } else {
805                        let _then = self.mk_binary_inner(left, _then2, apply);
806                        let _else = self.mk_binary_inner(left, _else2, apply);
807                        self.mk_ite(c2, _then, _else)
808                    }
809                }
810            },
811        };
812        self.mk_binary_memo.insert((left, right), result);
813        result
814    }
815
816    pub fn get_nulls(
817        &mut self,
818        pending_rel: u32,
819        mask: Nullability,
820        acc: &mut BTreeSet<NullState>,
821        node_id: NodeId,
822    ) {
823        debug_assert!(node_id != NodeId::MISSING);
824        if !self.is_nullable(node_id, mask) {
825            return;
826        }
827        match self.get_kind(node_id) {
828            Kind::Pred => {}
829            Kind::End => {
830                if mask.has(Nullability::END) {
831                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
832                }
833            }
834            Kind::Begin => {
835                if mask.has(Nullability::BEGIN) {
836                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
837                }
838            }
839            Kind::Concat => {
840                let new_mask = self.nullability(node_id).and(mask);
841                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
842                if self.is_nullable(node_id.left(self), mask) {
843                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
844                }
845            }
846            Kind::Union => {
847                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
848                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
849            }
850            Kind::Inter => {
851                let new_mask = self.nullability(node_id).and(mask);
852                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
853                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
854            }
855            Kind::Star => {
856                acc.insert(NullState::new(mask, pending_rel));
857                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
858            }
859            Kind::Compl => {
860                if !self.is_nullable(node_id.left(self), mask) {
861                    acc.insert(NullState::new(mask, 0));
862                }
863            }
864            Kind::Lookbehind => {
865                let new_mask = self.nullability(node_id).and(mask);
866                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
867                if node_id.right(self) != NodeId::MISSING {
868                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
869                }
870            }
871            Kind::Lookahead => {
872                let la_inner = self.get_lookahead_inner(node_id);
873                if self.is_nullable(la_inner, mask) {
874                    let rel = self.get_lookahead_rel(node_id);
875                    if rel != u32::MAX {
876                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
877                    }
878                    // tail only contributes when body is satisfied
879                    let la_tail = self.get_lookahead_tail(node_id);
880                    if la_tail != NodeId::MISSING {
881                        self.get_nulls(pending_rel, mask, acc, la_tail);
882                    }
883                }
884            }
885            Kind::Counted => {
886                let packed = self.get_extra(node_id);
887                let best = packed >> 16;
888                if best > 0 {
889                    acc.insert(NullState::new(mask, pending_rel + best));
890                }
891            }
892        }
893    }
894
895    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
896        self.get_meta_flags(node_id)
897            .has(MetaFlags::CONTAINS_LOOKAROUND)
898    }
899
900    /// whether node contains `^`, `$`, `\A`, `\z` anchors.
901    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
902        self.get_meta_flags(node_id)
903            .has(MetaFlags::CONTAINS_ANCHORS)
904    }
905
906    pub fn is_infinite(&self, node_id: NodeId) -> bool {
907        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
908    }
909
910    /// returns (min_length, max_length). max = u32::MAX means unbounded.
911    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
912        if self.is_infinite(node_id) {
913            if self.get_kind(node_id) == Kind::Inter {
914                self.get_bounded_length(node_id)
915            } else {
916                (self.get_min_length_only(node_id), u32::MAX)
917            }
918        } else {
919            self.get_bounded_length(node_id)
920        }
921    }
922
923    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
924        if node_id == NodeId::EPS {
925            return (0, 0);
926        }
927        match self.get_kind(node_id) {
928            Kind::End | Kind::Begin => (0, 0),
929            Kind::Pred => (1, 1),
930            Kind::Concat => {
931                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
932                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
933                (lmin + rmin, lmax.saturating_add(rmax))
934            }
935            Kind::Union => {
936                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
937                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
938                (lmin.min(rmin), lmax.max(rmax))
939            }
940            Kind::Inter => {
941                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
942                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
943                (lmin.max(rmin), lmax.min(rmax))
944            }
945            Kind::Lookahead => {
946                let body = node_id.left(self);
947                if self.is_infinite(body) {
948                    return (0, u32::MAX);
949                }
950                let right = node_id.right(self);
951                if right.is_missing() {
952                    (0, 0)
953                } else {
954                    self.get_min_max_length(right)
955                }
956            }
957            Kind::Counted => (0, 0),
958            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
959        }
960    }
961
962    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
963        match self.get_kind(node_id) {
964            Kind::End | Kind::Begin => Some(0),
965            Kind::Pred => Some(1),
966            Kind::Concat => {
967                let l = self.get_fixed_length(node_id.left(self))?;
968                let r = self.get_fixed_length(node_id.right(self))?;
969                Some(l + r)
970            }
971            Kind::Union => {
972                let l = self.get_fixed_length(node_id.left(self))?;
973                let r = self.get_fixed_length(node_id.right(self))?;
974                if l == r {
975                    Some(l)
976                } else {
977                    None
978                }
979            }
980            Kind::Inter => {
981                let l = self.get_fixed_length(node_id.left(self))?;
982                let r = self.get_fixed_length(node_id.right(self))?;
983                if l == r {
984                    Some(l)
985                } else {
986                    None
987                }
988            }
989            Kind::Lookahead => {
990                let right = node_id.right(self);
991                if right.is_missing() {
992                    Some(0)
993                } else {
994                    self.get_fixed_length(right)
995                }
996            }
997            Kind::Counted => Some(0),
998            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
999        }
1000    }
1001
1002    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1003        match self.get_kind(node_id) {
1004            Kind::End | Kind::Begin => 0,
1005            Kind::Pred => 1,
1006            Kind::Concat => {
1007                self.get_min_length_only(node_id.left(self))
1008                    + self.get_min_length_only(node_id.right(self))
1009            }
1010            Kind::Union => self
1011                .get_min_length_only(node_id.left(self))
1012                .min(self.get_min_length_only(node_id.right(self))),
1013            Kind::Inter => self
1014                .get_min_length_only(node_id.left(self))
1015                .max(self.get_min_length_only(node_id.right(self))),
1016            Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1017            Kind::Compl => {
1018                if self.nullability(node_id.left(self)) == Nullability::NEVER {
1019                    0
1020                } else {
1021                    1
1022                }
1023            }
1024        }
1025    }
1026
1027    fn starts_with_ts(&self, node_id: NodeId) -> bool {
1028        if node_id == NodeId::TS {
1029            return true;
1030        }
1031        match self.get_kind(node_id) {
1032            Kind::Inter => {
1033                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1034            }
1035            Kind::Union => {
1036                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1037            }
1038            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1039            _ => false,
1040        }
1041    }
1042
1043    #[inline]
1044    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1045        if self.get_kind(node_id) == Kind::Concat {
1046            self.ends_with_ts(node_id.right(self))
1047        } else {
1048            node_id == NodeId::TS
1049        }
1050    }
1051
1052    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1053        debug_assert!(node_id != NodeId::MISSING);
1054        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1055    }
1056
1057    pub(crate) fn cache_der(
1058        &mut self,
1059        node_id: NodeId,
1060        result: TRegexId,
1061        mask: Nullability,
1062    ) -> TRegexId {
1063        if mask == Nullability::CENTER {
1064            self.tr_der_center[node_id.0 as usize] = result
1065        } else {
1066            self.tr_der_begin[node_id.0 as usize] = result
1067        };
1068        result
1069    }
1070
1071    pub(crate) fn try_cached_der(
1072        &mut self,
1073        node_id: NodeId,
1074        mask: Nullability,
1075    ) -> Option<TRegexId> {
1076        let cache = if mask == Nullability::CENTER {
1077            &mut self.tr_der_center
1078        } else {
1079            &mut self.tr_der_begin
1080        };
1081        match cache.get(node_id.0 as usize) {
1082            Some(&TRegexId::MISSING) => {}
1083            Some(&result) => {
1084                return Some(result);
1085            }
1086            None => {
1087                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1088            }
1089        }
1090        None
1091    }
1092
1093    pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1094        let mut term = self.get_tregex(der);
1095        loop {
1096            match *term {
1097                TRegex::Leaf(node_id) => return node_id,
1098                TRegex::ITE(cond, _then, _else) => {
1099                    if self.solver().is_sat_id(set, cond) {
1100                        term = self.get_tregex(_then);
1101                    } else {
1102                        term = self.get_tregex(_else);
1103                    }
1104                }
1105            }
1106        }
1107    }
1108
1109    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1110        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1111        debug_assert!(
1112            node_id != NodeId::MISSING,
1113            "attempting to derive missing node"
1114        );
1115        if let Some(result) = self.try_cached_der(node_id, mask) {
1116            return Ok(result);
1117        }
1118
1119        let result = match node_id.kind(self) {
1120            Kind::Compl => {
1121                let leftd = node_id.left(self).der(self, mask)?;
1122                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1123            }
1124            Kind::Inter => {
1125                let leftd = node_id.left(self).der(self, mask)?;
1126                let rightd = node_id.right(self).der(self, mask)?;
1127                {
1128                    let this = &mut *self;
1129                    this.mk_binary(
1130                        leftd,
1131                        rightd,
1132                        &mut (|b, left, right| b.mk_inter(left, right)),
1133                    )
1134                }
1135            }
1136            Kind::Union => {
1137                let leftd = self.der(node_id.left(self), mask)?;
1138                let rightd = self.der(node_id.right(self), mask)?;
1139                {
1140                    let this = &mut *self;
1141                    this.mk_binary(
1142                        leftd,
1143                        rightd,
1144                        &mut (|b, left, right| b.mk_union(left, right)),
1145                    )
1146                }
1147            }
1148            Kind::Concat => {
1149                let head = node_id.left(self);
1150                let tail = node_id.right(self);
1151                let tail_leaf = self.mk_leaf(tail);
1152                let head_der = self.der(head, mask)?;
1153
1154                if self.is_nullable(head, mask) {
1155                    let rightd = self.der(tail, mask)?;
1156                    let ldr = {
1157                        let this = &mut *self;
1158                        this.mk_binary(
1159                            head_der,
1160                            tail_leaf,
1161                            &mut (|b, left, right| b.mk_concat(left, right)),
1162                        )
1163                    };
1164                    {
1165                        let this = &mut *self;
1166                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1167                    }
1168                } else {
1169                    let this = &mut *self;
1170                    this.mk_binary(
1171                        head_der,
1172                        tail_leaf,
1173                        &mut (|b, left, right| b.mk_concat(left, right)),
1174                    )
1175                }
1176            }
1177            Kind::Star => {
1178                if node_id == NodeId::EPS {
1179                    TRegexId::BOT
1180                } else {
1181                    let left = node_id.left(self);
1182                    let r_decr_leaf = self.mk_leaf(node_id);
1183                    let r_der = self.der(left, mask)?;
1184                    let this = &mut *self;
1185                    this.mk_binary(
1186                        r_der,
1187                        r_decr_leaf,
1188                        &mut (|b, left, right| b.mk_concat(left, right)),
1189                    )
1190                }
1191            }
1192            Kind::Lookbehind => {
1193                let lb_prev_der = {
1194                    let lb_prev = self.get_lookbehind_prev(node_id);
1195                    if lb_prev == NodeId::MISSING {
1196                        TRegexId::MISSING
1197                    } else {
1198                        self.der(lb_prev, mask)?
1199                    }
1200                };
1201                let lb_inner = self.get_lookbehind_inner(node_id);
1202                let lb_inner_der = self.der(lb_inner, mask)?;
1203                {
1204                    let this = &mut *self;
1205                    this.mk_binary_result(
1206                        lb_inner_der,
1207                        lb_prev_der,
1208                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1209                    )?
1210                }
1211            }
1212            Kind::Lookahead => {
1213                let la_tail = self.get_lookahead_tail(node_id);
1214                let la_body = node_id.left(self);
1215                let rel = self.get_lookahead_rel(node_id);
1216
1217                if self.is_nullable(la_body, mask) {
1218                    // nullabilty is taken once, just keep the body
1219                    let right = node_id.right(self).missing_to_eps();
1220                    let rder = self.der(right, mask).clone();
1221                    return rder;
1222                }
1223
1224                if rel == u32::MAX {
1225                    let la_body_der = self.der(la_body, mask)?;
1226                    if la_tail.is_kind(self, Kind::Pred) {
1227                        let transitioned =
1228                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1229                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1230                        let concated = self.mk_concat(la_tail, new_la);
1231                        return self.der(concated, mask);
1232                    }
1233                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1234                        let left = la_tail.left(self);
1235                        let tset = left.pred_tset(self);
1236                        let transitioned = self.transition_term(la_body_der, tset);
1237                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1238                        let tail_right = la_tail.right(self);
1239                        let concated = self.mk_concat(new_la, tail_right);
1240                        let concated = self.mk_concat(left, concated);
1241                        return self.der(concated, mask);
1242                    }
1243                }
1244
1245                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1246                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1247                    let concated = self.mk_concat(la_body, nulls_mask);
1248                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1249                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1250                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1251                    let new_union = self.mk_union(concated_look, new_look);
1252                    return self.der(new_union, mask);
1253                }
1254
1255                let la_tail_der = if la_tail == NodeId::MISSING {
1256                    TRegexId::MISSING
1257                } else {
1258                    if self.is_nullable(la_tail, mask) {
1259                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1260                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1261                        let la_union = self.mk_union(la_tail, nulls_la);
1262                        self.der(la_union, mask)?
1263                    } else {
1264                        self.der(la_tail, mask)?
1265                    }
1266                };
1267
1268                let la_body_der = self.der(la_body, mask)?;
1269
1270                if rel != u32::MAX && rel > self.lookahead_context_max {
1271                    return Err(AlgebraError::AnchorLimit);
1272                }
1273
1274                let la = {
1275                    let this = &mut *self;
1276                    let rel = helpers::incr_rel(rel);
1277                    this.mk_binary(
1278                        la_body_der,
1279                        la_tail_der,
1280                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1281                    )
1282                };
1283
1284                if rel != u32::MAX
1285                    && la_tail_der != TRegexId::MISSING
1286                    && self.is_nullable(la_tail, mask)
1287                {
1288                    let look_only = {
1289                        let this = &mut *self;
1290                        let right = TRegexId::MISSING;
1291                        let rel = helpers::incr_rel(rel);
1292                        this.mk_binary(
1293                            la_body_der,
1294                            right,
1295                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1296                        )
1297                    };
1298                    {
1299                        let this = &mut *self;
1300                        this.mk_binary(
1301                            look_only,
1302                            la,
1303                            &mut (|b, left, right| b.mk_union(left, right)),
1304                        )
1305                    }
1306                } else {
1307                    la
1308                }
1309            }
1310            Kind::Counted => {
1311                let body = node_id.left(self);
1312                let chain = node_id.right(self);
1313                let packed = self.get_extra(node_id);
1314                let step = (packed & 0xFFFF) as u16;
1315                let best = (packed >> 16) as u16;
1316
1317                let mid_best = if self.is_nullable(body, mask) && step >= best {
1318                    step
1319                } else {
1320                    best
1321                };
1322
1323                let body_der = self.der(body, mask)?;
1324                let new_step = step.saturating_add(1);
1325                self.mk_unary(body_der, &mut |b, new_body| {
1326                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1327                        new_step
1328                    } else {
1329                        mid_best
1330                    };
1331                    let packed = (final_best as u32) << 16 | new_step as u32;
1332                    b.mk_counted(new_body, chain, packed)
1333                })
1334            }
1335            Kind::Begin | Kind::End => TRegexId::BOT,
1336            Kind::Pred => {
1337                let psi = node_id.pred_tset(self);
1338                if psi == TSetId::EMPTY {
1339                    TRegexId::BOT
1340                } else {
1341                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1342                }
1343            }
1344        };
1345
1346        // println!("{} {}", node_id.0, self.pp(node_id));
1347        // println!("node: {} (total: {})", node_id.0, self.num_created);
1348
1349        self.cache_der(node_id, result, mask);
1350        Ok(result)
1351    }
1352
1353    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1354        debug_assert!(meta_id != MetadataId::MISSING);
1355        match self.metadata.get_mut(node_id.0 as usize) {
1356            Some(v) => *v = meta_id,
1357            None => {
1358                self.metadata
1359                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1360                self.metadata[node_id.0 as usize] = meta_id;
1361            }
1362        }
1363    }
1364
1365    fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1366        debug_assert!(reversed_id != NodeId::MISSING);
1367        match self.reversed.get_mut(node_id.0 as usize) {
1368            Some(v) => *v = reversed_id,
1369            None => {
1370                self.reversed
1371                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1372                self.reversed[node_id.0 as usize] = reversed_id;
1373            }
1374        }
1375    }
1376
1377    fn init(&mut self, inst: NodeKey) -> NodeId {
1378        self.num_created += 1;
1379        let node_id = NodeId(self.num_created);
1380        self.index.insert(inst.clone(), node_id);
1381        match inst.kind {
1382            Kind::Pred => {
1383                let meta_id = self.mb.get_meta_id(Metadata {
1384                    flags: MetaFlags::ZERO,
1385                    nulls: NullsId::EMPTY,
1386                });
1387                self.init_metadata(node_id, meta_id);
1388            }
1389            Kind::Begin => {
1390                let meta_id = self.mb.get_meta_id(Metadata {
1391                    flags: MetaFlags::with_nullability(
1392                        Nullability::BEGIN,
1393                        MetaFlags::CONTAINS_ANCHORS,
1394                    ),
1395                    nulls: NullsId::BEGIN0,
1396                });
1397                self.init_metadata(node_id, meta_id);
1398            }
1399            Kind::End => {
1400                let meta_id = self.mb.get_meta_id(Metadata {
1401                    flags: MetaFlags::with_nullability(
1402                        Nullability::END,
1403                        MetaFlags::CONTAINS_ANCHORS,
1404                    ),
1405                    nulls: NullsId::END0,
1406                });
1407                self.init_metadata(node_id, meta_id);
1408            }
1409            Kind::Inter => {
1410                let m1 = self.get_node_meta_id(inst.left);
1411                let m2 = self.get_node_meta_id(inst.right);
1412                let meta_id = {
1413                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1414                    let mask_l = inst.left.nullability(self);
1415                    let mask_r = inst.right.nullability(self);
1416                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1417                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1418                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1419                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1420                    let new_meta = Metadata {
1421                        flags: self.mb.flags_inter(m1, m2),
1422                        nulls,
1423                    };
1424                    self.mb.get_meta_id(new_meta)
1425                };
1426                self.init_metadata(node_id, meta_id);
1427            }
1428            Kind::Union => {
1429                let m1 = self.get_node_meta_id(inst.left);
1430                let m2 = self.get_node_meta_id(inst.right);
1431                let meta_id = {
1432                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1433                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1434                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1435                    let new_meta = Metadata {
1436                        flags: self.mb.flags_union(m1, m2),
1437                        nulls,
1438                    };
1439                    self.mb.get_meta_id(new_meta)
1440                };
1441                self.init_metadata(node_id, meta_id);
1442            }
1443            Kind::Concat => {
1444                let flags = self.mb.flags_concat(
1445                    self.get_node_meta_id(inst.left),
1446                    self.get_node_meta_id(inst.right),
1447                );
1448
1449                let right_nullability = inst.right.nullability(self);
1450                let left_nullability = inst.left.nullability(self);
1451                let nulls_left = self.get_nulls_id(inst.left);
1452                let nulls_right = self.get_nulls_id(inst.right);
1453                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1454                let mask = right_nullability.and(left_nullability);
1455                nulls = self.mb.nb.and_mask(nulls, mask);
1456
1457                let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1458                self.init_metadata(node_id, new_id);
1459            }
1460            Kind::Star => {
1461                let left_nulls = self.get_nulls_id(inst.left);
1462                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1463                let meta_id = self.mb.get_meta_id(Metadata {
1464                    flags: self
1465                        .mb
1466                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1467                    nulls,
1468                });
1469                self.init_metadata(node_id, meta_id);
1470            }
1471            Kind::Compl => {
1472                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1473                let meta_id = self.mb.get_meta_id(Metadata {
1474                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1475                    nulls,
1476                });
1477                self.init_metadata(node_id, meta_id);
1478            }
1479            Kind::Lookbehind => {
1480                let mut null = self.get_meta_flags(inst.left).nullability();
1481                let mut contains_flags = self.get_flags_contains(inst.left);
1482                if !inst.right.is_missing() {
1483                    null = null.and(self.get_meta_flags(inst.right).nullability());
1484                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1485                }
1486
1487                let left_nulls = self.get_nulls_id(inst.left);
1488                let right_nulls = self.get_nulls_id(inst.right);
1489                let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1490                let meta_id = self.mb.get_meta_id(Metadata {
1491                    flags: MetaFlags::with_nullability(
1492                        null,
1493                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1494                    ),
1495                    nulls,
1496                });
1497                self.init_metadata(node_id, meta_id);
1498            }
1499            Kind::Lookahead => {
1500                let mut nulls = self.get_nulls_id(inst.left);
1501                let left_nullability = inst.left.nullability(self);
1502                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1503                nulls = self.mb.nb.or_id(nulls, nulls_right);
1504                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1505
1506                let la_inner = inst.left;
1507                let la_tail = inst.right;
1508                let null = self
1509                    .get_meta_flags(la_inner)
1510                    .nullability()
1511                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1512                let contains_flags = self
1513                    .get_flags_contains(la_inner)
1514                    .or(self.get_flags_contains(la_tail));
1515
1516                let meta_id = self.mb.get_meta_id(Metadata {
1517                    flags: MetaFlags::with_nullability(
1518                        null,
1519                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1520                    ),
1521                    nulls,
1522                });
1523                self.init_metadata(node_id, meta_id);
1524            }
1525            Kind::Counted => {
1526                let best = inst.extra >> 16;
1527                let (null, nulls) = if best > 0 {
1528                    let mut ns = BTreeSet::new();
1529                    ns.insert(NullState::new(Nullability::CENTER, best));
1530                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1531                } else {
1532                    (Nullability::NEVER, NullsId::EMPTY)
1533                };
1534                let meta_id = self.mb.get_meta_id(Metadata {
1535                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1536                    nulls,
1537                });
1538                self.init_metadata(node_id, meta_id);
1539            }
1540        }
1541
1542        self.array.push(inst);
1543
1544        if let Some(rw) = self.post_init_simplify(node_id) {
1545            self.override_as(node_id, rw)
1546        } else {
1547            node_id
1548        }
1549    }
1550
1551    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1552        match self.get_kind(node_id) {
1553            Kind::Concat => {
1554                let lhs = node_id.left(self);
1555                let rhs = node_id.right(self);
1556                if lhs.is_pred_star(self).is_some() {
1557                    if let Some(opttail) = rhs.is_opt_v(self) {
1558                        if let Some(true) = self.subsumes(lhs, opttail) {
1559                            // return self.override_as(node_id, lhs);
1560                            return Some(lhs);
1561                        }
1562                    }
1563                }
1564            }
1565            Kind::Union => {
1566                let lhs = node_id.left(self);
1567                let rhs = node_id.right(self);
1568                let mut subsumed = false;
1569                rhs.iter_union_while(self, &mut |b, branch| {
1570                    if b.nullable_subsumes(branch, lhs) {
1571                        subsumed = true;
1572                    }
1573                    !subsumed
1574                });
1575                if subsumed {
1576                    return Some(rhs);
1577                }
1578                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1579                    return Some(rhs);
1580                }
1581            }
1582            _ => {}
1583        }
1584
1585        None
1586    }
1587
1588    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1589    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1590        if self.get_kind(lhs) != Kind::Union {
1591            return false; // single branch already checked by nullable_subsumes
1592        }
1593        let mut rhs_branches = Vec::new();
1594        let mut curr = rhs;
1595        while self.get_kind(curr) == Kind::Union {
1596            rhs_branches.push(self.get_left(curr));
1597            curr = self.get_right(curr);
1598        }
1599        rhs_branches.push(curr);
1600
1601        curr = lhs;
1602        while self.get_kind(curr) == Kind::Union {
1603            if !rhs_branches.contains(&self.get_left(curr)) {
1604                return false;
1605            }
1606            curr = self.get_right(curr);
1607        }
1608        rhs_branches.contains(&curr)
1609    }
1610
1611    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1612    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1613        if node == target {
1614            return true;
1615        }
1616        match self.get_kind(node) {
1617            Kind::Union => {
1618                self.nullable_subsumes(self.get_left(node), target)
1619                    || self.nullable_subsumes(self.get_right(node), target)
1620            }
1621            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1622                self.nullable_subsumes(self.get_right(node), target)
1623            }
1624            _ => false,
1625        }
1626    }
1627
1628    pub fn num_nodes(&self) -> u32 {
1629        self.num_created
1630    }
1631    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1632        match self.index.get(&inst) {
1633            Some(&id) => id,
1634            None => self.init(inst),
1635        }
1636    }
1637    #[inline]
1638    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1639        self.index.get(inst)
1640    }
1641
1642    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1643        self.index.insert(key, subsumed);
1644        subsumed
1645    }
1646
1647    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1648        let key = &self.array[key.0 as usize];
1649        let entry = self.index.get_mut(key).unwrap();
1650        *entry = subsumed;
1651        subsumed
1652    }
1653
1654    #[inline]
1655    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1656        self.array[node_id.0 as usize].left
1657    }
1658
1659    #[inline]
1660    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1661        self.array[node_id.0 as usize].right
1662    }
1663
1664    #[inline]
1665    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1666        self.array[node_id.0 as usize].extra
1667    }
1668
1669    #[inline]
1670    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1671        debug_assert!(self.get_kind(node_id) == Kind::Concat);
1672        let mut curr = node_id;
1673        while self.get_kind(curr) == Kind::Concat {
1674            curr = curr.right(self);
1675        }
1676        curr
1677    }
1678
1679    fn has_trailing_la(&self, node: NodeId) -> bool {
1680        let end = match self.get_kind(node) {
1681            Kind::Concat => self.get_concat_end(node),
1682            Kind::Lookahead => node,
1683            _ => return false,
1684        };
1685        self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1686    }
1687
1688    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1689        if self.get_kind(node) == Kind::Lookahead {
1690            return (NodeId::EPS, node);
1691        }
1692        debug_assert!(self.get_kind(node) == Kind::Concat);
1693        let right = node.right(self);
1694        if self.get_kind(right) != Kind::Concat {
1695            return (node.left(self), right);
1696        }
1697        let (stripped, la) = self.strip_trailing_la(right);
1698        (self.mk_concat(node.left(self), stripped), la)
1699    }
1700    #[inline]
1701    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1702        debug_assert!(matches!(
1703            self.get_kind(lookahead_node_id),
1704            Kind::Lookahead | Kind::Counted
1705        ));
1706        lookahead_node_id.left(self)
1707    }
1708    #[inline]
1709    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1710        debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1711        lookahead_node_id.right(self)
1712    }
1713    #[inline]
1714    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1715        debug_assert!(
1716            matches!(
1717                self.get_kind(lookahead_node_id),
1718                Kind::Lookahead | Kind::Counted
1719            ),
1720            "not lookahead/counted: {:?}",
1721            self.pp(lookahead_node_id)
1722        );
1723        self.get_extra(lookahead_node_id)
1724    }
1725    #[inline]
1726    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1727        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1728        lookbehind_node_id.left(self)
1729    }
1730    #[inline]
1731    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1732        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1733        lookbehind_node_id.right(self)
1734    }
1735
1736    #[inline]
1737    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1738        debug_assert!(
1739            self.array.len() > node_id.0 as usize,
1740            "array len: {:?}",
1741            node_id
1742        );
1743        self.array[node_id.0 as usize].kind
1744    }
1745
1746    #[inline]
1747    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1748        &self.array[node_id.0 as usize]
1749    }
1750
1751    #[inline]
1752    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1753        self.metadata[node_id.0 as usize]
1754    }
1755
1756    #[inline]
1757    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1758        debug_assert!(node_id.0 != u32::MAX);
1759        let meta_id = self.get_node_meta_id(node_id);
1760        debug_assert!(meta_id != MetadataId::MISSING);
1761        &self.mb.array[meta_id.0 as usize]
1762    }
1763
1764    #[inline]
1765    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1766        if node_id == NodeId::MISSING {
1767            NullsId::EMPTY
1768        } else {
1769            self.get_meta(node_id).nulls
1770        }
1771    }
1772
1773    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1774        self.mb
1775            .nb
1776            .array
1777            .iter()
1778            .map(|set| set.iter().cloned().collect())
1779            .collect()
1780    }
1781
1782    pub fn nulls_count(&self) -> usize {
1783        self.mb.nb.array.len()
1784    }
1785
1786    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1787        self.mb.nb.array[id as usize].iter().cloned().collect()
1788    }
1789
1790    #[inline]
1791    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1792        if node_id == NodeId::MISSING {
1793            NullsId::EMPTY
1794        } else {
1795            let nulls = self.get_meta(node_id).nulls;
1796            self.mb.nb.and_mask(nulls, mask)
1797        }
1798    }
1799
1800    #[inline]
1801    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1802        let meta_id = self.get_node_meta_id(node_id);
1803        let meta = &self.mb.array[meta_id.0 as usize];
1804        meta.flags
1805    }
1806
1807    #[inline]
1808    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1809        self.get_meta(node_id).flags.nullability()
1810    }
1811
1812    #[inline]
1813    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1814        let meta_id = self.get_node_meta_id(node_id);
1815        let meta = &self.mb.array[meta_id.0 as usize];
1816        meta.flags.all_contains_flags()
1817    }
1818
1819    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1820        if self.get_kind(node_id) == Kind::Concat && node_id.left(self) == NodeId::BEGIN {
1821            return self.strip_lb(node_id.right(self));
1822        }
1823        self.strip_lb_inner(node_id)
1824    }
1825
1826    fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1827        if !self.contains_look(node_id) {
1828            return Ok(node_id);
1829        }
1830        if self.get_kind(node_id) == Kind::Concat
1831            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1832        {
1833            return self.strip_lb_inner(node_id.right(self));
1834        }
1835        if self.get_kind(node_id) == Kind::Inter {
1836            let left = self.strip_lb_inner(node_id.left(self))?;
1837            let right = self.strip_lb_inner(node_id.right(self))?;
1838            return Ok(self.mk_inter(left, right));
1839        }
1840        if self.get_kind(node_id) == Kind::Union {
1841            let left = self.strip_lb_inner(node_id.left(self))?;
1842            let right = self.strip_lb_inner(node_id.right(self))?;
1843            return Ok(self.mk_union(left, right));
1844        }
1845        match self.get_kind(node_id) {
1846            Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1847            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1848                Err(AlgebraError::UnsupportedPattern)
1849            }
1850            _ => Ok(node_id),
1851        }
1852    }
1853
1854    // for prefix purposes we prune any \A leading paths
1855    pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1856        if !self.contains_anchors(node_id) {
1857            return node_id;
1858        }
1859        match self.get_kind(node_id) {
1860            Kind::Begin => NodeId::BOT,
1861            Kind::Concat => {
1862                let left = self.nonbegins(node_id.left(self));
1863                if left == NodeId::BOT {
1864                    return NodeId::BOT;
1865                }
1866                self.mk_concat(left, node_id.right(self))
1867            }
1868            Kind::Union => {
1869                let left = self.nonbegins(node_id.left(self));
1870                let right = self.nonbegins(node_id.right(self));
1871                self.mk_union(left, right)
1872            }
1873            _ => node_id,
1874        }
1875    }
1876
1877    pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1878        match self.get_kind(node_id) {
1879            Kind::Concat => {
1880                let head = node_id.left(self);
1881                match self.get_kind(head) {
1882                    _ if self.any_nonbegin_nullable(head) => {
1883                        self.strip_prefix_safe(node_id.right(self))
1884                    }
1885                    _ => node_id,
1886                }
1887            }
1888            _ => node_id,
1889        }
1890    }
1891    pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1892        match self.get_kind(node_id) {
1893            Kind::Begin => NodeId::BOT,
1894            Kind::Concat => {
1895                let head = self.prune_begin(node_id.left(self));
1896                let tail = self.prune_begin(node_id.right(self));
1897                self.mk_concat(head, tail)
1898            }
1899            Kind::Lookbehind => {
1900                if !node_id.right(self).is_missing() {
1901                    return node_id;
1902                }
1903                let head = self.prune_begin(node_id.left(self));
1904                head
1905            }
1906            Kind::Union => {
1907                let left = self.prune_begin(node_id.left(self));
1908                let right = self.prune_begin(node_id.right(self));
1909                self.mk_union(left, right)
1910            }
1911            _ => node_id,
1912        }
1913    }
1914
1915    pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1916        if !self.contains_look(node_id) {
1917            return Ok(node_id);
1918        }
1919        if self.get_kind(node_id) == Kind::Concat
1920            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1921        {
1922            let left = node_id.left(self);
1923            let ll = left.left(self).missing_to_eps();
1924            let lr = left.right(self).missing_to_eps();
1925            let new_l = self.mk_concat(ll, lr);
1926            let new = self.mk_concat(new_l, node_id.right(self));
1927            return Ok(new);
1928        }
1929        if self.get_kind(node_id) == Kind::Inter {
1930            let left = self.normalize_rev(node_id.left(self))?;
1931            let right = self.normalize_rev(node_id.right(self))?;
1932            return Ok(self.mk_inter(left, right));
1933        }
1934        if self.get_kind(node_id) == Kind::Union {
1935            let left = self.normalize_rev(node_id.left(self))?;
1936            let right = self.normalize_rev(node_id.right(self))?;
1937            return Ok(self.mk_union(left, right));
1938        }
1939        match self.get_kind(node_id) {
1940            Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1941            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1942                Err(AlgebraError::UnsupportedPattern)
1943            }
1944            _ => Ok(node_id),
1945        }
1946    }
1947
1948    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1949        debug_assert!(node_id != NodeId::MISSING);
1950        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1951            if *rev != NodeId::MISSING {
1952                return Ok(*rev);
1953            }
1954        }
1955        let rw = match self.get_kind(node_id) {
1956            Kind::End => NodeId::BEGIN,
1957            Kind::Begin => NodeId::END,
1958            Kind::Pred => node_id,
1959            Kind::Concat => {
1960                let left = self.reverse(node_id.left(self))?;
1961                let right = self.reverse(node_id.right(self))?;
1962                self.mk_concat(right, left)
1963            }
1964            Kind::Union => {
1965                let left = self.reverse(node_id.left(self))?;
1966                let right = self.reverse(node_id.right(self))?;
1967                self.mk_union(left, right)
1968            }
1969            Kind::Inter => {
1970                let left = self.reverse(node_id.left(self))?;
1971                let right = self.reverse(node_id.right(self))?;
1972                self.mk_inter(left, right)
1973            }
1974            Kind::Star => {
1975                let body = self.reverse(node_id.left(self))?;
1976                self.mk_star(body)
1977            }
1978            Kind::Compl => {
1979                if self.contains_look(node_id.left(self)) {
1980                    return Err(AlgebraError::UnsupportedPattern);
1981                }
1982                let body = self.reverse(node_id.left(self))?;
1983                self.mk_compl(body)
1984            }
1985            Kind::Lookbehind => {
1986                let prev = self.get_lookbehind_prev(node_id);
1987                let inner_id = self.get_lookbehind_inner(node_id);
1988                let rev_inner = self.reverse(inner_id)?;
1989                let rev_prev = if prev != NodeId::MISSING {
1990                    self.reverse(prev)?
1991                } else {
1992                    NodeId::MISSING
1993                };
1994                self.mk_lookahead(rev_inner, rev_prev, 0)
1995            }
1996            Kind::Lookahead => {
1997                let rel = self.get_lookahead_rel(node_id);
1998                if rel == u32::MAX {
1999                    // rel MAX holds no nullability - rewrite to intersection
2000                    let lbody = self.get_lookahead_inner(node_id);
2001                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2002                    let lbody_ts = self.mk_concat(lbody, NodeId::TS);
2003                    let ltail_ts = self.mk_concat(ltail, NodeId::TS);
2004                    let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2005                    let rev = self.reverse(as_inter)?;
2006                    self.init_reversed(node_id, rev);
2007                    return Ok(rev);
2008                }
2009                if rel != 0 {
2010                    return Err(AlgebraError::UnsupportedPattern);
2011                }
2012                let tail_node = self.get_lookahead_tail(node_id);
2013                let rev_tail = if tail_node != NodeId::MISSING {
2014                    self.reverse(tail_node)?
2015                } else {
2016                    NodeId::MISSING
2017                };
2018                let inner_id = self.get_lookahead_inner(node_id);
2019                let rev_inner = self.reverse(inner_id)?;
2020                self.mk_lookbehind(rev_inner, rev_tail)
2021            }
2022            Kind::Counted => {
2023                return Err(AlgebraError::UnsupportedPattern);
2024            }
2025        };
2026        self.init_reversed(node_id, rw);
2027        Ok(rw)
2028    }
2029
2030    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2031        let node = NodeKey {
2032            kind: Kind::Pred,
2033            left: NodeId::MISSING,
2034            right: NodeId::MISSING,
2035            extra: pred.0,
2036        };
2037        self.get_node_id(node)
2038    }
2039
2040    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2041        let key = NodeKey {
2042            kind: Kind::Compl,
2043            left: body,
2044            right: NodeId::MISSING,
2045            extra: u32::MAX,
2046        };
2047        if let Some(id) = self.key_is_created(&key) {
2048            return *id;
2049        }
2050        if body == NodeId::BOT {
2051            return NodeId::TS;
2052        }
2053        if body == NodeId::TS {
2054            return NodeId::BOT;
2055        }
2056
2057        if let Some(contains_body) = body.is_contains(self) {
2058            if contains_body.is_pred(self) {
2059                let pred = contains_body.pred_tset(self);
2060                let notpred = self.mk_pred_not(pred);
2061                let node = self.mk_star(notpred);
2062                return self.init_as(key, node);
2063            } else if contains_body == NodeId::END {
2064                return self.init_as(key, NodeId::BOT);
2065            }
2066        };
2067
2068        if self.get_kind(body) == Kind::Compl {
2069            return self.get_node(body).left;
2070        }
2071
2072        self.get_node_id(key)
2073    }
2074
2075    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2076        let nid = self.get_nulls_id(body);
2077        let nref = self.mb.nb.get_set_ref(nid).clone();
2078        let mut futures = NodeId::BOT;
2079        for n in nref.iter() {
2080            if !n.is_mask_nullable(mask) {
2081                continue;
2082            }
2083
2084            let eff = if n.rel == 0 {
2085                NodeId::EPS
2086            } else {
2087                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2088            };
2089            futures = self.mk_union(futures, eff)
2090        }
2091        futures
2092    }
2093
2094    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2095        if cfg!(feature = "norewrite") {
2096            return None;
2097        }
2098
2099        if self.get_kind(tail) == Kind::Lookbehind {
2100            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2101            return self
2102                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2103                .ok();
2104        }
2105        if self.get_kind(head) == Kind::Lookahead {
2106            let la_tail = self.get_lookahead_tail(head);
2107            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2108            if new_la_tail.is_center_nullable(self) {
2109                let non_null_tail = self.mk_non_nullable_safe(new_la_tail);
2110                if non_null_tail == NodeId::BOT {
2111                    return None;
2112                }
2113                let la_body = self.get_lookahead_inner(head);
2114                return Some(self.mk_lookahead_internal(la_body, non_null_tail, u32::MAX));
2115            }
2116            let la_body = self.get_lookahead_inner(head);
2117            let la_rel = self.get_lookahead_rel(head);
2118            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2119                let tail_rel = self.get_lookahead_rel(new_la_tail);
2120                tail_rel + la_rel
2121            } else {
2122                u32::MAX
2123            };
2124
2125            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2126        }
2127
2128        if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2129            return Some(head);
2130        }
2131        if head == NodeId::TS && tail == NodeId::END {
2132            return Some(head);
2133        }
2134
2135        if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2136            return Some(NodeId::TS);
2137        }
2138
2139        if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2140            return Some(NodeId::TS);
2141        }
2142
2143        if self.get_kind(tail) == Kind::Union && head == NodeId::TS {
2144            let mut should_distribute_top = false;
2145            self.iter_unions(tail, |v| {
2146                if v == NodeId::BEGIN || self.starts_with_ts(v) {
2147                    should_distribute_top = true;
2148                }
2149            });
2150            if should_distribute_top {
2151                let mut new_union = NodeId::BOT;
2152                let mut curr = tail;
2153                while self.get_kind(curr) == Kind::Union {
2154                    let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2155                    new_union = self.mk_union(new_node, new_union);
2156                    curr = curr.right(self);
2157                }
2158                let new_node = self.mk_concat(NodeId::TS, curr);
2159                new_union = self.mk_union(new_union, new_node);
2160                return Some(new_union);
2161            }
2162        }
2163
2164        if self.get_kind(head) == Kind::Union && head == NodeId::TS {
2165            let mut should_distribute_top = false;
2166            self.iter_unions(head, |v| {
2167                if v == NodeId::END || self.starts_with_ts(v) {
2168                    should_distribute_top = true;
2169                }
2170            });
2171            if should_distribute_top {
2172                let mut new_union = NodeId::BOT;
2173                let mut curr = head;
2174                while self.get_kind(curr) == Kind::Union {
2175                    let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2176                    new_union = self.mk_union(new_node, new_union);
2177                    curr = curr.right(self);
2178                }
2179                let new_node = self.mk_concat(curr, NodeId::TS);
2180                new_union = self.mk_union(new_union, new_node);
2181                return Some(new_union);
2182            }
2183        }
2184
2185        if self.get_kind(head) == Kind::Inter && tail == NodeId::TS {
2186            let mut alltopstar = true;
2187            iter_inter!(self, head, |v| {
2188                alltopstar = self.ends_with_ts(v);
2189            });
2190            if alltopstar {
2191                return Some(head);
2192            }
2193        }
2194
2195        if head.is_star(self) && head == tail {
2196            return Some(head);
2197        }
2198
2199        None
2200    }
2201
2202    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2203        use Kind::*;
2204
2205        if cfg!(feature = "norewrite") {
2206            return None;
2207        }
2208        if left == right {
2209            return Some(left);
2210        }
2211
2212        if right.is_kind(self, Kind::Union) && left == right.left(self) {
2213            return Some(right);
2214        }
2215
2216        if self.get_kind(left) == Kind::Pred && self.get_kind(right) == Kind::Pred {
2217            let l = left.pred_tset(self);
2218            let r = right.pred_tset(self);
2219            let solver = self.solver();
2220            let psi = solver.or_id(l, r);
2221            let rewrite = self.mk_pred(psi);
2222            return Some(rewrite);
2223        }
2224
2225        if left == NodeId::EPS
2226            && self.get_extra(left) == 0
2227            && self.nullability(right) == Nullability::ALWAYS
2228        {
2229            return Some(right);
2230        }
2231
2232        if self.get_kind(left) == Kind::Lookahead && self.get_kind(right) == Kind::Lookahead {
2233            let lb = left.left(self);
2234            let lt = left.right(self);
2235            let lrel = left.extra(self);
2236
2237            let rb = right.left(self);
2238            let rt = right.right(self);
2239            let rrel = right.extra(self);
2240
2241            if lrel == rrel && lt.is_missing() && rt.is_missing() {
2242                let unioned = self.mk_union(lb, rb);
2243                let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2244                return Some(node);
2245            }
2246        }
2247
2248        if right.is_kind(self, Concat) {
2249            if left == NodeId::END
2250                && right.left(self) == NodeId::END
2251                && self.nullability(right).has(Nullability::END)
2252            {
2253                return Some(right);
2254            }
2255            // .*|.*a.* => .*(a.*|)
2256            if left == right.left(self) {
2257                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2258                let rw = self.mk_concat(left, rhs);
2259                return Some(rw);
2260            }
2261            if left.is_kind(self, Concat) {
2262                let head1 = left.left(self);
2263                let head2 = right.left(self);
2264
2265                if head1 == head2 {
2266                    let t1 = left.right(self);
2267                    let t2 = right.right(self);
2268                    // opportunistic rewrites
2269                    if head1 == NodeId::TS {
2270                        if t2.has_concat_tail(self, t1) {
2271                            return Some(left);
2272                        }
2273                        if t1.has_concat_tail(self, t2) {
2274                            return Some(right);
2275                        }
2276                    }
2277                    let un = self.mk_union(t1, t2);
2278                    return Some(self.mk_concat(left.left(self), un));
2279                }
2280
2281                // xa|ya => (x|y)a - suffix factoring via reverse
2282                // looks prettier but not good for builder perf - leaving out for now
2283                if false {
2284                    let end1 = self.get_concat_end(left);
2285                    let end2 = self.get_concat_end(right);
2286                    if end1 == end2 {
2287                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2288                        if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2289                            let rev1 = self.reverse(left).unwrap();
2290                            let rev2 = self.reverse(right).unwrap();
2291
2292                            let union_rev = self.mk_union(rev1, rev2);
2293                            return Some(self.reverse(union_rev).unwrap());
2294                        }
2295                    }
2296                }
2297            }
2298            if left.is_pred(self) && left == right.left(self) {
2299                let un = self.mk_opt(right.right(self));
2300                let conc = self.mk_concat(left, un);
2301                return Some(conc);
2302            }
2303        }
2304
2305        if left == NodeId::EPS && right.is_plus(self) {
2306            let result = self.mk_star(right.left(self));
2307            return Some(result);
2308        }
2309
2310        // (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2311        // TODO: an extra check that both sides are Kind::Inter
2312        if self.flags == BuilderFlags::SUBSUME {
2313            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2314                return Some(rw);
2315            }
2316        }
2317
2318        None
2319    }
2320
2321    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2322        let mut curr = node;
2323        while self.get_kind(curr) == Kind::Inter {
2324            out.push(self.get_left(curr));
2325            curr = self.get_right(curr);
2326        }
2327        out.push(curr);
2328    }
2329
2330    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2331        let mut curr = node;
2332        let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2333        if has_prefix {
2334            curr = self.get_right(curr);
2335        }
2336        let mut count = 0u32;
2337        let mut pred_set = None;
2338        while self.get_kind(curr) == Kind::Concat {
2339            let head = self.get_left(curr);
2340            if self.get_kind(head) != Kind::Pred {
2341                return None;
2342            }
2343            let ps = head.pred_tset(self);
2344            match pred_set {
2345                None => pred_set = Some(ps),
2346                Some(existing) => {
2347                    if existing != ps {
2348                        return None;
2349                    }
2350                }
2351            }
2352            curr = self.get_right(curr);
2353            count += 1;
2354        }
2355        if count == 0 || self.get_kind(curr) != Kind::Star {
2356            return None;
2357        }
2358        Some((has_prefix, pred_set.unwrap(), curr, count))
2359    }
2360
2361    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2362        let mut j = 0;
2363        for &s in sub {
2364            while j < sup.len() && sup[j] < s {
2365                j += 1;
2366            }
2367            if j >= sup.len() || sup[j] != s {
2368                return false;
2369            }
2370            j += 1;
2371        }
2372        true
2373    }
2374
2375    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2376        if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2377            return None;
2378        }
2379
2380        let mut lc: Vec<NodeId> = Vec::new();
2381        let mut rc: Vec<NodeId> = Vec::new();
2382        self.collect_inter_components(left, &mut lc);
2383        self.collect_inter_components(right, &mut rc);
2384
2385        // component subset check: fewer constraints = larger language
2386        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2387            return Some(left);
2388        }
2389        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2390        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2391            return Some(right);
2392        }
2393
2394        if lc.len() == rc.len() {
2395            let mut diff_idx = None;
2396            for i in 0..lc.len() {
2397                if lc[i] != rc[i] {
2398                    if diff_idx.is_some() {
2399                        return None;
2400                    }
2401                    diff_idx = Some(i);
2402                }
2403            }
2404            if let Some(idx) = diff_idx {
2405                let a = lc[idx];
2406                let b = rc[idx];
2407                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2408                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2409                {
2410                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2411                        return if ca < cb { Some(left) } else { Some(right) };
2412                    }
2413                }
2414            }
2415        }
2416
2417        None
2418    }
2419
2420    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2421        if cfg!(feature = "norewrite") {
2422            return None;
2423        }
2424        if left == right {
2425            return Some(left);
2426        }
2427
2428        if self.get_kind(right) == Kind::Union {
2429            let mut result = NodeId::BOT;
2430            self.iter_unions_b(
2431                right,
2432                &mut (|b, v| {
2433                    let new_inter = b.mk_inter(v, left);
2434                    result = b.mk_union(result, new_inter);
2435                }),
2436            );
2437            return Some(result);
2438        }
2439        if self.get_kind(left) == Kind::Union {
2440            let mut result = NodeId::BOT;
2441            self.iter_unions_b(
2442                left,
2443                &mut (|b, v| {
2444                    let new_inter = b.mk_inter(v, right);
2445                    result = b.mk_union(result, new_inter);
2446                }),
2447            );
2448            return Some(result);
2449        }
2450
2451        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2452            return Some(NodeId::BOT);
2453        }
2454
2455        if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2456            let bodies = self.mk_union(left.left(self), right.left(self));
2457            return Some(self.mk_compl(bodies));
2458        }
2459
2460        if left == NodeId::TOPPLUS {
2461            if right.is_pred_star(self).is_some() {
2462                let newloop = self.mk_plus(right.left(self));
2463                return Some(newloop);
2464            }
2465            if right.is_never_nullable(self) {
2466                return Some(right);
2467            }
2468            if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2469                return Some(NodeId::BOT);
2470            }
2471            if right.is_kind(self, Kind::Concat) {}
2472        }
2473
2474        {
2475            let l_is_la = left.is_lookahead(self);
2476            let r_is_la = right.is_lookahead(self);
2477            let l_is_cla = !l_is_la
2478                && self.get_kind(left) == Kind::Concat
2479                && self.get_kind(left.left(self)) == Kind::Lookahead;
2480            let r_is_cla = !r_is_la
2481                && self.get_kind(right) == Kind::Concat
2482                && self.get_kind(right.left(self)) == Kind::Lookahead;
2483            if l_is_la || r_is_la || l_is_cla || r_is_cla {
2484                let (la_node, other, concat_body) = if r_is_la {
2485                    (right, left, NodeId::MISSING)
2486                } else if l_is_la {
2487                    (left, right, NodeId::MISSING)
2488                } else if r_is_cla {
2489                    (right.left(self), left, right.right(self))
2490                } else {
2491                    (left.left(self), right, left.right(self))
2492                };
2493                let la_body = la_node.left(self);
2494                let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2495                let inter_right = if concat_body.is_missing() {
2496                    la_tail
2497                } else {
2498                    self.mk_concat(la_tail, concat_body)
2499                };
2500                let new_body = self.mk_inter(other, inter_right);
2501                let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2502                return Some(self.mk_concat(la, new_body));
2503            }
2504        }
2505
2506        if self.get_kind(right) == Kind::Compl {
2507            let compl_body = right.left(self);
2508            if left == compl_body {
2509                return Some(NodeId::BOT);
2510            }
2511            if self.get_kind(compl_body) == Kind::Concat {
2512                let compl_head = compl_body.left(self);
2513                if compl_body.right(self) == NodeId::TS && compl_head == left {
2514                    return Some(NodeId::BOT);
2515                }
2516            }
2517        }
2518
2519        if let Some(pleft) = left.is_pred_star(self) {
2520            if let Some(pright) = right.is_pred_star(self) {
2521                let merged = self.mk_inter(pleft, pright);
2522                return Some(self.mk_star(merged));
2523            }
2524        }
2525
2526        {
2527            let l_is_clb = self.get_kind(left) == Kind::Concat
2528                && self.get_kind(left.left(self)) == Kind::Lookbehind;
2529            let r_is_clb = self.get_kind(right) == Kind::Concat
2530                && self.get_kind(right.left(self)) == Kind::Lookbehind;
2531            if l_is_clb || r_is_clb {
2532                let (lb, body) = if l_is_clb && r_is_clb {
2533                    let lb1 = left.left(self);
2534                    let lb2 = right.left(self);
2535                    let inner = self.mk_inter(
2536                        self.get_lookbehind_inner(lb1),
2537                        self.get_lookbehind_inner(lb2),
2538                    );
2539                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2540                    let body = self.mk_inter(left.right(self), right.right(self));
2541                    (lb, body)
2542                } else if l_is_clb {
2543                    let lb = left.left(self);
2544                    let body = self.mk_inter(left.right(self), right);
2545                    (lb, body)
2546                } else {
2547                    let lb = right.left(self);
2548                    let body = self.mk_inter(left, right.right(self));
2549                    (lb, body)
2550                };
2551                return Some(self.mk_concat(lb, body));
2552            }
2553        }
2554
2555        {
2556            let l_has_la = self.has_trailing_la(left);
2557            let r_has_la = self.has_trailing_la(right);
2558            if l_has_la || r_has_la {
2559                let (body, la) = if l_has_la && r_has_la {
2560                    let (lbody, l_la) = self.strip_trailing_la(left);
2561                    let (rbody, r_la) = self.strip_trailing_la(right);
2562                    let inner = self.mk_inter(
2563                        self.get_lookahead_inner(l_la),
2564                        self.get_lookahead_inner(r_la),
2565                    );
2566                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2567                    let body = self.mk_inter(lbody, rbody);
2568                    (body, la)
2569                } else if l_has_la {
2570                    let (lbody, la) = self.strip_trailing_la(left);
2571                    let body = self.mk_inter(lbody, right);
2572                    (body, la)
2573                } else {
2574                    let (rbody, la) = self.strip_trailing_la(right);
2575                    let body = self.mk_inter(left, rbody);
2576                    (body, la)
2577                };
2578                return Some(self.mk_concat(body, la));
2579            }
2580        }
2581
2582        None
2583    }
2584
2585    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2586        if cfg!(feature = "norewrite") {
2587            return None;
2588        }
2589        debug_assert!(self.get_kind(right_union) == Kind::Union);
2590
2591        let mut rewritten = None;
2592        right_union.iter_union_while(
2593            self,
2594            &mut (|b, v| {
2595                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2596                    rewritten = Some((v, rw));
2597                    false
2598                } else {
2599                    true
2600                }
2601            }),
2602        );
2603
2604        if let Some(rw) = rewritten {
2605            let mut new_union = NodeId::BOT;
2606            right_union.iter_union(
2607                self,
2608                &mut (|b, v| {
2609                    if v == rw.0 {
2610                        new_union = b.mk_union(rw.1, new_union)
2611                    } else {
2612                        new_union = b.mk_union(v, new_union)
2613                    }
2614                }),
2615            );
2616            return Some(new_union);
2617        };
2618
2619        None
2620    }
2621
2622    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2623        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2624        debug_assert!(tail != NodeId::MISSING);
2625        let key = NodeKey {
2626            kind: Kind::Concat,
2627            left: head,
2628            right: tail,
2629            extra: u32::MAX,
2630        };
2631        if let Some(id) = self.key_is_created(&key) {
2632            return *id;
2633        }
2634
2635        if head == NodeId::BOT || tail == NodeId::BOT {
2636            return NodeId::BOT;
2637        }
2638        if head == NodeId::EPS {
2639            return tail;
2640        }
2641        if tail == NodeId::EPS {
2642            return head;
2643        }
2644
2645        match tail {
2646            // this is only valid if direction known;
2647            NodeId::BEGIN => {
2648                if !self.is_nullable(head, Nullability::BEGIN) {
2649                    return NodeId::BOT;
2650                } else {
2651                    return NodeId::BEGIN;
2652                }
2653            }
2654            _ => {}
2655        }
2656
2657        // normalize concats to right
2658        if head.is_kind(self, Kind::Concat) {
2659            let left = head.left(self);
2660            let newright = self.mk_concat(head.right(self), tail);
2661            let updated = self.mk_concat(left, newright);
2662            return self.init_as(key, updated);
2663        }
2664
2665        if head == NodeId::TS && tail == NodeId::END {
2666            return NodeId::TS;
2667        }
2668
2669        if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2670            return NodeId::BOT;
2671        }
2672
2673        if self.get_kind(tail) == Kind::Concat {
2674            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2675                let upd = self.mk_concat(rw, tail.right(self));
2676                return self.init_as(key, upd);
2677            }
2678        }
2679
2680        if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2681            return self.init_as(key, new);
2682        }
2683
2684        match (self.get_kind(head), self.get_kind(tail)) {
2685            // merge stars
2686            (Kind::Star, Kind::Concat) if head.is_star(self) => {
2687                let rl = tail.left(self);
2688                if head == rl {
2689                    return self.init_as(key, tail);
2690                }
2691            }
2692            // attempt longer concat rw
2693            (_, Kind::Concat) => {
2694                let curr = head;
2695                let h2 = self.mk_concat(curr, tail.left(self));
2696                let tr = tail.right(self);
2697                if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2698                    return self.init_as(key, new);
2699                }
2700            }
2701            _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2702                return self.init_as(key, tail);
2703            }
2704            _ => {}
2705        }
2706
2707        self.init(key)
2708    }
2709
2710    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2711        // LNF: lookbehind must start with ts
2712        let lb_body = {
2713            match self.starts_with_ts(lb_body) {
2714                true => lb_body,
2715                false => self.mk_concat(NodeId::TS, lb_body),
2716            }
2717        };
2718        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2719        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2720    }
2721
2722    fn mk_lookbehind_internal(
2723        &mut self,
2724        lb_body: NodeId,
2725        lb_prev: NodeId,
2726    ) -> Result<NodeId, AlgebraError> {
2727        debug_assert!(lb_body != NodeId::MISSING);
2728        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2729        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2730            return Ok(NodeId::BOT);
2731        }
2732        if lb_body == NodeId::TS {
2733            return Ok(lb_prev);
2734        }
2735        if lb_body == NodeId::EPS {
2736            match lb_prev {
2737                NodeId::MISSING => return Ok(NodeId::EPS),
2738                NodeId::EPS => return Ok(NodeId::EPS),
2739                _ => return Ok(lb_prev),
2740            }
2741        }
2742
2743        let key = NodeKey {
2744            kind: Kind::Lookbehind,
2745            left: lb_body,
2746            right: lb_prev,
2747            extra: u32::MAX,
2748        };
2749        match self.key_is_created(&key) {
2750            Some(id) => Ok(*id),
2751            None => {
2752                if lb_prev == NodeId::TS {
2753                    return Ok(self.mk_concat(lb_prev, lb_body));
2754                }
2755
2756                Ok(self.init(key))
2757            }
2758        }
2759    }
2760
2761    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2762        // LNF: lookahead must end with ts
2763        let la_body = {
2764            match self.ends_with_ts(la_body) {
2765                true => la_body,
2766                false => self.mk_concat(la_body, NodeId::TS),
2767            }
2768        };
2769        let rel = if NodeId::MISSING == la_tail {
2770            rel
2771        } else {
2772            match la_tail.is_center_nullable(self) {
2773                false => u32::MAX,
2774                true => rel,
2775            }
2776        };
2777
2778        self.mk_lookahead_internal(la_body, la_tail, rel)
2779    }
2780
2781    // rel max = carries no nullability, can potentially rw to intersection
2782    pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2783        let key = NodeKey {
2784            kind: Kind::Lookahead,
2785            left: la_body,
2786            right: la_tail,
2787            extra: rel,
2788        };
2789        if let Some(id) = self.key_is_created(&key) {
2790            return *id;
2791        }
2792        if la_body == NodeId::TS {
2793            if rel == 0 {
2794                return la_tail.missing_to_eps();
2795            } else {
2796                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2797            }
2798        }
2799        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2800            return NodeId::BOT;
2801        }
2802        if la_tail.is_missing() && rel == u32::MAX {
2803            return NodeId::BOT;
2804        }
2805
2806        if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2807            return la_body;
2808        }
2809
2810        if la_tail == NodeId::TS {
2811            if rel == 0 || rel == u32::MAX {
2812                return self.mk_concat(la_body, NodeId::TS);
2813            } else if rel == u32::MAX {
2814                return self.mk_begins_with(la_body);
2815            }
2816        }
2817
2818        if rel == u32::MAX {
2819            if la_tail.is_missing() {
2820                return NodeId::BOT;
2821            }
2822
2823            if self.is_always_nullable(la_body) {
2824                return la_tail.missing_to_eps();
2825            }
2826
2827            if la_tail != NodeId::MISSING {
2828                match self.get_kind(la_tail) {
2829                    _ => {
2830                        if la_body.is_compl_plus_end(self) {
2831                            let minlen = self.get_min_length_only(la_tail);
2832                            if minlen >= 1 {
2833                                return NodeId::BOT;
2834                            }
2835                        }
2836                    }
2837                }
2838            }
2839        }
2840
2841        if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2842            let la_body2 = self.get_lookahead_inner(la_tail);
2843            let body1_ts = self.mk_concat(la_body, NodeId::TS);
2844            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2845            let new_la_body = self.mk_inter(body1_ts, body2_ts);
2846            let new_la_rel = self.get_lookahead_rel(la_tail);
2847            let new_la_tail = self.get_lookahead_tail(la_tail);
2848            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2849        }
2850
2851        if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2852            let la_body_right = la_body.right(self);
2853            if self.is_always_nullable(la_body_right) {
2854                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2855            }
2856            if la_body.right(self) == NodeId::END {
2857                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2858            }
2859            let bodyright = la_body.right(self);
2860            if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2861                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2862                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2863            }
2864        }
2865
2866        if la_tail != NodeId::MISSING {
2867            if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2868                let lpred = la_body.left(self);
2869                if self.get_kind(lpred) == Kind::Pred {
2870                    let l = lpred.pred_tset(self);
2871                    let r = la_tail.pred_tset(self);
2872                    let psi_and = self.solver().and_id(l, r);
2873                    let rewrite = self.mk_pred(psi_and);
2874                    let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2875                    let new_right =
2876                        self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2877                    return self.mk_concat(rewrite, new_right);
2878                }
2879            }
2880        }
2881
2882        self.get_node_id(key)
2883    }
2884
2885    pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2886        let has_match = (packed >> 16) > 0;
2887        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2888            return NodeId::BOT;
2889        }
2890        debug_assert!(
2891            body == NodeId::BOT || !self.is_infinite(body),
2892            "Counted body must have finite max length"
2893        );
2894        let chain = self.prune_counted_chain(body, chain);
2895        let key = NodeKey {
2896            kind: Kind::Counted,
2897            left: body,
2898            right: chain,
2899            extra: packed,
2900        };
2901        if let Some(id) = self.key_is_created(&key) {
2902            return *id;
2903        }
2904        self.get_node_id(key)
2905    }
2906
2907    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2908        if chain == NodeId::MISSING || body == NodeId::BOT {
2909            return chain;
2910        }
2911        if self.nullability(body) != Nullability::NEVER {
2912            return NodeId::MISSING;
2913        }
2914        let chain_body = chain.left(self);
2915        if chain_body == NodeId::BOT {
2916            return chain;
2917        }
2918        let not_begins = self.mk_not_begins_with(body);
2919        let inter = self.mk_inter(chain_body, not_begins);
2920        let is_empty = inter == NodeId::BOT;
2921        if is_empty {
2922            self.prune_counted_chain(body, chain.right(self))
2923        } else {
2924            chain
2925        }
2926    }
2927
2928    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2929        let neg_inner = self.mk_concat(body, NodeId::TS);
2930        let neg_part = self.mk_compl(neg_inner);
2931        let conc = self.mk_concat(neg_part, NodeId::END);
2932        self.mk_lookahead(conc, NodeId::MISSING, rel)
2933    }
2934
2935    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2936        match self.get_node(body).kind {
2937            Kind::Pred => {
2938                let psi = body.pred_tset(self);
2939                let negated = self.mk_pred_not(psi);
2940                let union = self.mk_union(NodeId::BEGIN, negated);
2941                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2942                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2943            }
2944            _ => {
2945                // ~body ∩ utf8_char: non-nullable (utf8_char requires ≥1 byte),
2946                // so \A | negated won't be stripped by strip_prefix_safe
2947                let uc = crate::unicode_classes::utf8_char(self);
2948                let neg = self.mk_compl(body);
2949                let negated = self.mk_inter(neg, uc);
2950                let union = self.mk_union(NodeId::BEGIN, negated);
2951                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2952                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2953            }
2954        }
2955    }
2956
2957    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2958        debug_assert!(left != NodeId::MISSING);
2959        debug_assert!(right != NodeId::MISSING);
2960        if left > right {
2961            return self.mk_union(right, left);
2962        }
2963        let key = NodeKey {
2964            kind: Kind::Union,
2965            left,
2966            right,
2967            extra: u32::MAX,
2968        };
2969        if let Some(id) = self.key_is_created(&key) {
2970            return *id;
2971        }
2972
2973        if left == right {
2974            return left;
2975        }
2976        if left == NodeId::BOT {
2977            return right;
2978        }
2979        if right == NodeId::BOT {
2980            return left;
2981        }
2982        if right == NodeId::TS {
2983            return right;
2984        }
2985        if left == NodeId::TS {
2986            return left;
2987        }
2988
2989        match (self.get_kind(left), self.get_kind(right)) {
2990            (Kind::Union, _) => {
2991                self.iter_unions_b(left, &mut |b, v| {
2992                    b.temp_vec.push(v);
2993                });
2994                self.iter_unions_b(right, &mut |b, v| {
2995                    b.temp_vec.push(v);
2996                });
2997                self.temp_vec.sort();
2998                let tree = self.temp_vec.clone();
2999                self.temp_vec.clear();
3000                let newnode = tree
3001                    .iter()
3002                    .rev()
3003                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3004                return self.init_as(key, newnode);
3005            }
3006            (_, Kind::Union) => {
3007                let rleft = right.left(self);
3008                // if left_node id is smaller than rleft, just create a new union
3009                if left > rleft {
3010                    self.iter_unions_b(left, &mut |b, v| {
3011                        b.temp_vec.push(v);
3012                    });
3013                    self.iter_unions_b(right, &mut |b, v| {
3014                        b.temp_vec.push(v);
3015                    });
3016                    self.temp_vec.sort();
3017                    let tree = self.temp_vec.clone();
3018                    self.temp_vec.clear();
3019                    let newnode = tree
3020                        .iter()
3021                        .rev()
3022                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3023                    return self.init_as(key, newnode);
3024                } else {
3025                    if let Some(rw) = self.attempt_rw_unions(left, right) {
3026                        return self.init_as(key, rw);
3027                    }
3028                }
3029            }
3030            _ => {}
3031        }
3032
3033        if let Some(rw) = self.attempt_rw_union_2(left, right) {
3034            return self.init_as(key, rw);
3035        }
3036        self.init(key)
3037    }
3038
3039    pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3040        debug_assert!(left_id != NodeId::MISSING);
3041        debug_assert!(right_id != NodeId::MISSING);
3042        if left_id == right_id {
3043            return left_id;
3044        }
3045        if left_id == NodeId::BOT || right_id == NodeId::BOT {
3046            return NodeId::BOT;
3047        }
3048        if left_id == NodeId::TS {
3049            return right_id;
3050        }
3051        if right_id == NodeId::TS {
3052            return left_id;
3053        }
3054        if left_id > right_id {
3055            return self.mk_inter(right_id, left_id);
3056        }
3057        let key = NodeKey {
3058            kind: Kind::Inter,
3059            left: left_id,
3060            right: right_id,
3061            extra: u32::MAX,
3062        };
3063        if let Some(id) = self.key_is_created(&key) {
3064            return *id;
3065        }
3066
3067        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3068            return self.init_as(key, rw);
3069        }
3070
3071        self.init(key)
3072    }
3073
3074    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3075        let node = NodeKey {
3076            kind,
3077            left: NodeId::MISSING,
3078            right: NodeId::MISSING,
3079            extra: u32::MAX,
3080        };
3081        self.init(node)
3082    }
3083
3084    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3085        let star = self.mk_star(body_id);
3086        self.mk_concat(body_id, star)
3087    }
3088    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3089        let opt = self.mk_opt(body_id);
3090        let mut nodes1 = vec![];
3091        for _ in lower..upper {
3092            nodes1.push(opt);
3093        }
3094        for _ in 0..lower {
3095            nodes1.push(body_id);
3096        }
3097        self.mk_concats(nodes1.into_iter())
3098    }
3099    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3100        self.mk_union(NodeId::EPS, body_id)
3101    }
3102
3103    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3104        let key = NodeKey {
3105            kind: Kind::Star,
3106            left: body_id,
3107            right: NodeId::MISSING,
3108            extra: 0,
3109        };
3110        if let Some(id) = self.key_is_created(&key) {
3111            return *id;
3112        }
3113        // _*{500} is still _*
3114        if body_id.is_kind(self, Kind::Star) {
3115            return body_id;
3116        }
3117        self.get_node_id(key)
3118    }
3119
3120    /// it's cheaper to check this once as an edge-case
3121    /// than to compute a 4th nullability bit for every node
3122    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3123        match self.get_kind(node_id) {
3124            Kind::End => Nullability::EMPTYSTRING,
3125            Kind::Begin => Nullability::EMPTYSTRING,
3126            Kind::Pred => Nullability::NEVER,
3127            Kind::Star => Nullability::ALWAYS,
3128            Kind::Inter | Kind::Concat => {
3129                let lnull = self.nullability_emptystring(node_id.left(self));
3130                let rnull = self.nullability_emptystring(node_id.right(self));
3131                lnull.and(rnull) // left = 010, right = 001, left & right = 000
3132            }
3133            Kind::Union => {
3134                let lnull = self.nullability_emptystring(node_id.left(self));
3135                let rnull = self.nullability_emptystring(node_id.right(self));
3136                lnull.or(rnull)
3137            }
3138            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3139            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3140            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3141            Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3142        }
3143    }
3144
3145    #[inline(always)]
3146    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3147        self.get_meta(node_id)
3148            .flags
3149            .nullability()
3150            .has(Nullability::CENTER.or(Nullability::END))
3151    }
3152
3153    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3154        self.get_only_nullability(node_id)
3155    }
3156
3157    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3158        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3159    }
3160
3161    pub fn pp(&self, node_id: NodeId) -> String {
3162        let mut s = String::new();
3163        self.ppw(&mut s, node_id).unwrap();
3164        s
3165    }
3166
3167    #[allow(dead_code)]
3168    pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
3169        let nu = self.get_nulls_id(node_id);
3170        let nr = self.mb.nb.get_set_ref(nu);
3171        let s1 = format!("{:?}", nr);
3172        s1
3173    }
3174
3175    #[allow(dead_code)]
3176    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3177        match self.get_tregex(term_id) {
3178            TRegex::Leaf(node_id) => {
3179                let mut s = String::new();
3180                self.ppw(&mut s, *node_id).unwrap();
3181                s
3182            }
3183            TRegex::ITE(cond, then_id, else_id) => {
3184                format!(
3185                    "ITE({},{},{})",
3186                    self.solver_ref().pp(*cond),
3187                    self.ppt(*then_id),
3188                    self.ppt(*else_id)
3189                )
3190            }
3191        }
3192    }
3193
3194    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3195        if cfg!(feature = "graphviz") {
3196            match node_id {
3197                NodeId::MISSING => return write!(s, "MISSING"),
3198                NodeId::BOT => return write!(s, "⊥"),
3199                NodeId::TS => return write!(s, "_*"),
3200                NodeId::TOP => return write!(s, "_"),
3201                NodeId::EPS => return write!(s, "ε"),
3202                _ => {}
3203            }
3204        }
3205
3206        match node_id {
3207            NodeId::MISSING => return write!(s, "MISSING"),
3208            NodeId::BOT => return write!(s, "⊥"),
3209            NodeId::TS => return write!(s, "_*"),
3210            NodeId::TOP => return write!(s, "_"),
3211            NodeId::EPS => return write!(s, ""),
3212            _ => {}
3213        }
3214
3215        match self.get_kind(node_id) {
3216            Kind::End => write!(s, r"\z"),
3217            Kind::Begin => write!(s, r"\A"),
3218            Kind::Pred => {
3219                let psi = node_id.pred_tset(self);
3220                if psi == TSetId::EMPTY {
3221                    write!(s, r"⊥")
3222                } else if psi == TSetId::FULL {
3223                    write!(s, r"_")
3224                } else {
3225                    write!(s, "{}", self.solver_ref().pp(psi))
3226                }
3227            }
3228            Kind::Inter => {
3229                write!(s, "(")?;
3230                self.ppw(s, node_id.left(self))?;
3231                write!(s, "&")?;
3232                let mut curr = node_id.right(self);
3233                while self.get_kind(curr) == Kind::Inter {
3234                    let n = curr.left(self);
3235                    self.ppw(s, n)?;
3236                    write!(s, "&")?;
3237                    curr = curr.right(self);
3238                }
3239                self.ppw(s, curr)?;
3240                write!(s, ")")
3241            }
3242            Kind::Union => {
3243                let left = node_id.left(self);
3244                let right = node_id.right(self);
3245                write!(s, "(")?;
3246                self.ppw(s, left)?;
3247                write!(s, "|")?;
3248                let mut curr = right;
3249                while self.get_kind(curr) == Kind::Union {
3250                    let n = curr.left(self);
3251                    self.ppw(s, n)?;
3252                    write!(s, "|")?;
3253                    curr = curr.right(self);
3254                }
3255                self.ppw(s, curr)?;
3256                write!(s, ")")
3257            }
3258            Kind::Concat => {
3259                let left = node_id.left(self);
3260                let right = node_id.right(self);
3261                if right.is_star(self) && right.left(self) == left {
3262                    self.ppw(s, left)?;
3263                    write!(s, "+")?;
3264                    return Ok(());
3265                }
3266                if right.is_concat(self) {
3267                    let rl = right.left(self);
3268                    if rl.is_star(self) && rl.left(self) == left {
3269                        self.ppw(s, left)?;
3270                        write!(s, "+")?;
3271                        return self.ppw(s, right.right(self));
3272                    }
3273                }
3274                if right.is_concat(self) && right.left(self) == left {
3275                    let mut num = 1;
3276                    let mut right = right;
3277                    while right.is_concat(self) && right.left(self) == left {
3278                        num += 1;
3279                        right = right.right(self);
3280                    }
3281                    // (|X){n} followed by X{m} -> X{m,m+n}
3282                    if let Some(inner) = left.is_opt_v(self) {
3283                        let mut inner_count = 0;
3284                        let mut right2 = right;
3285                        while right2.is_concat(self) && right2.left(self) == inner {
3286                            inner_count += 1;
3287                            right2 = right2.right(self);
3288                        }
3289                        if right2 == inner {
3290                            inner_count += 1;
3291                            self.ppw(s, inner)?;
3292                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3293                        }
3294                        if inner_count > 0 {
3295                            self.ppw(s, inner)?;
3296                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3297                            return self.ppw(s, right2);
3298                        }
3299                    }
3300                    self.ppw(s, left)?;
3301                    if right == left {
3302                        num += 1;
3303                        return write!(s, "{{{}}}", num);
3304                    }
3305                    if num <= 3 && left.is_pred(self) {
3306                        for _ in 1..num {
3307                            self.ppw(s, left)?;
3308                        }
3309                        return self.ppw(s, right);
3310                    } else {
3311                        write!(s, "{{{}}}", num)?;
3312                        return self.ppw(s, right);
3313                    }
3314                }
3315                self.ppw(s, left)?;
3316                self.ppw(s, right)
3317            }
3318            Kind::Star => {
3319                let left = node_id.left(self);
3320                let leftkind = self.get_kind(left);
3321                match leftkind {
3322                    Kind::Concat | Kind::Star | Kind::Compl => {
3323                        write!(s, "(")?;
3324                        self.ppw(s, left)?;
3325                        write!(s, ")")?;
3326                    }
3327                    _ => {
3328                        self.ppw(s, left)?;
3329                    }
3330                };
3331                write!(s, "*")
3332            }
3333            Kind::Compl => {
3334                write!(s, "~(")?;
3335                self.ppw(s, node_id.left(self))?;
3336                write!(s, ")")
3337            }
3338            Kind::Lookbehind => {
3339                let lbleft = self.get_lookbehind_prev(node_id);
3340                let lbinner = self.get_lookbehind_inner(node_id);
3341                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3342                if lbleft != NodeId::MISSING {
3343                    write!(s, "「")?;
3344                    self.ppw(s, lbleft)?;
3345                    write!(s, "」")?;
3346                }
3347
3348                write!(s, "(?<=")?;
3349                self.ppw(s, lbinner)?;
3350                write!(s, ")")
3351            }
3352            Kind::Lookahead => {
3353                let inner = self.get_lookahead_inner(node_id);
3354                write!(s, "(?=")?;
3355                self.ppw(s, inner)?;
3356                write!(s, ")")?;
3357                if self.get_lookahead_rel(node_id) != 0 {
3358                    write!(s, "{{")?;
3359                    let rel = self.get_lookahead_rel(node_id);
3360                    if rel == u32::MAX {
3361                        write!(s, "∅")?;
3362                    } else {
3363                        write!(s, "{}", rel)?;
3364                    }
3365                    write!(s, "}}")?;
3366                }
3367                if node_id.right(self) == NodeId::MISSING {
3368                    Ok(())
3369                } else {
3370                    write!(s, "『")?;
3371                    self.ppw(s, node_id.right(self))?;
3372                    write!(s, "』")
3373                }
3374            }
3375            Kind::Counted => {
3376                let body = node_id.left(self);
3377                let packed = self.get_extra(node_id);
3378                let step = packed & 0xFFFF;
3379                let best = packed >> 16;
3380                write!(s, "#(")?;
3381                self.ppw(s, body)?;
3382                write!(s, ")s{}b{}", step, best)
3383            }
3384        }
3385    }
3386
3387    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3388        self.mk_concat(node, NodeId::TS)
3389    }
3390
3391    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3392        let node_ts = self.mk_concat(node, NodeId::TS);
3393        self.mk_compl(node_ts)
3394    }
3395
3396    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3397        let notset = self.solver().not_id(set);
3398        self.mk_pred(notset)
3399    }
3400
3401    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3402        let set_id = self.solver().u8_to_set_id(char);
3403        self.mk_pred(set_id)
3404    }
3405
3406    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3407        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3408        self.mk_pred(rangeset)
3409    }
3410
3411    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3412        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3413        for &(lo, hi) in &ranges[1..] {
3414            let r = self.mk_range_u8(lo, hi);
3415            node = self.mk_union(node, r);
3416        }
3417        node
3418    }
3419
3420    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3421        let mut prefix = Vec::new();
3422        let mut curr = node;
3423        loop {
3424            if curr == NodeId::EPS {
3425                let full = !prefix.is_empty();
3426                return (prefix, full);
3427            }
3428            if curr == NodeId::BOT {
3429                break;
3430            }
3431            if self.get_kind(curr) == Kind::Pred {
3432                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3433                    Some(byte) => {
3434                        prefix.push(byte);
3435                        return (prefix, true);
3436                    }
3437                    None => break, // multi-byte pred: pattern not fully consumed
3438                }
3439            }
3440            if self.get_kind(curr) != Kind::Concat {
3441                break;
3442            }
3443            let left = curr.left(self);
3444            if self.get_kind(left) != Kind::Pred {
3445                break;
3446            }
3447            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3448                Some(byte) => prefix.push(byte),
3449                None => break,
3450            }
3451            curr = curr.right(self);
3452        }
3453        (prefix, false)
3454    }
3455
3456    #[allow(dead_code)]
3457    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3458        let mut result = NodeId::EPS;
3459        for byte in raw_str.iter().rev() {
3460            let node = self.mk_u8(*byte);
3461            result = self.mk_concat(node, result);
3462        }
3463        result
3464    }
3465
3466    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3467        let mut result = NodeId::EPS;
3468        for byte in raw_str.bytes().rev() {
3469            let node = self.mk_u8(byte);
3470            result = self.mk_concat(node, result);
3471        }
3472        result
3473    }
3474
3475    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3476        let mut sorted: Vec<NodeId> = nodes.collect();
3477        if sorted.len() <= 1 {
3478            return sorted.pop().unwrap_or(NodeId::BOT);
3479        }
3480        sorted.sort();
3481        sorted.dedup();
3482        sorted.retain(|&x| x != NodeId::BOT);
3483        if sorted.is_empty() {
3484            return NodeId::BOT;
3485        }
3486        if sorted.len() > 16 {
3487            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3488            let mut non_concat: Vec<NodeId> = Vec::new();
3489            for &n in &sorted {
3490                if self.get_kind(n) == Kind::Concat {
3491                    by_head.entry(self.get_left(n)).or_default().push(n);
3492                } else {
3493                    non_concat.push(n);
3494                }
3495            }
3496            let mut absorbed: Vec<NodeId> = Vec::new();
3497            for &n in &non_concat {
3498                if by_head.contains_key(&n) {
3499                    absorbed.push(n);
3500                }
3501            }
3502            if !absorbed.is_empty() {
3503                non_concat.retain(|n| !absorbed.contains(n));
3504            }
3505            if by_head.len() < sorted.len() {
3506                let mut groups: Vec<NodeId> = non_concat;
3507                for (head, tails) in by_head {
3508                    let mut tail_nodes: Vec<NodeId> =
3509                        tails.iter().map(|&n| self.get_right(n)).collect();
3510                    if absorbed.contains(&head) {
3511                        tail_nodes.push(NodeId::EPS);
3512                    }
3513                    let tail_union = self.mk_unions(tail_nodes.into_iter());
3514                    let factored = self.mk_concat(head, tail_union);
3515                    groups.push(factored);
3516                }
3517                groups.sort();
3518                groups.dedup();
3519                return self.mk_unions_balanced(&groups);
3520            }
3521        }
3522        self.mk_unions_balanced(&sorted)
3523    }
3524
3525    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3526        match nodes.len() {
3527            0 => NodeId::BOT,
3528            1 => nodes[0],
3529            n => {
3530                let mid = n / 2;
3531                let left = self.mk_unions_balanced(&nodes[..mid]);
3532                let right = self.mk_unions_balanced(&nodes[mid..]);
3533                self.mk_union(left, right)
3534            }
3535        }
3536    }
3537
3538    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3539        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3540    }
3541
3542    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3543        nodes
3544            .rev()
3545            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3546    }
3547}
3548
3549impl RegexBuilder {
3550    #[allow(dead_code)]
3551    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3552        match self.get_tregex(term_id).clone() {
3553            TRegex::Leaf(node_id) => {
3554                if NodeId::BOT == node_id {
3555                    vec![]
3556                } else {
3557                    vec![node_id]
3558                }
3559            }
3560            TRegex::ITE(_, then_id, else_id) => {
3561                let mut then_nodes = self.extract_sat(then_id);
3562                let mut else_nodes = self.extract_sat(else_id);
3563                then_nodes.append(&mut else_nodes);
3564                then_nodes
3565            }
3566        }
3567    }
3568
3569    pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3570        debug_assert!(self.get_kind(start) == Kind::Union);
3571        let mut curr = start;
3572        while self.get_kind(curr) == Kind::Union {
3573            f(curr.left(self));
3574            curr = curr.right(self);
3575        }
3576        f(curr);
3577    }
3578
3579    pub(crate) fn iter_unions_b(
3580        &mut self,
3581        curr: NodeId,
3582        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3583    ) {
3584        let mut curr = curr;
3585        while self.get_kind(curr) == Kind::Union {
3586            f(self, curr.left(self));
3587            curr = curr.right(self);
3588        }
3589        f(self, curr);
3590    }
3591
3592    pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3593        if !self.contains_look(node_id) {
3594            return Some(node_id);
3595        }
3596        match self.get_kind(node_id) {
3597            Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3598            Kind::Concat => {
3599                let left = node_id.left(self);
3600                let right = node_id.right(self);
3601                let elim_l = self.try_elim_lookarounds(left)?;
3602                let elim_r = self.try_elim_lookarounds(right)?;
3603                let rw = self.mk_concat(elim_l, elim_r);
3604                Some(rw)
3605            }
3606            Kind::Union => {
3607                let left = node_id.left(self);
3608                let right = node_id.right(self);
3609                let elim_l = self.try_elim_lookarounds(left)?;
3610                let elim_r = self.try_elim_lookarounds(right)?;
3611                let rw = self.mk_union(elim_l, elim_r);
3612                Some(rw)
3613            }
3614
3615            Kind::Star => {
3616                let body = node_id.left(self);
3617                let elim_l = self.try_elim_lookarounds(body)?;
3618                Some(self.mk_star(elim_l))
3619            }
3620            Kind::Compl => {
3621                let left = node_id.left(self);
3622                let elim_l = self.try_elim_lookarounds(left)?;
3623                Some(self.mk_compl(elim_l))
3624            }
3625            Kind::Lookahead => {
3626                let rel = self.get_lookahead_rel(node_id);
3627                if rel != 0 {
3628                    return None;
3629                }
3630                let lbody = self.get_lookahead_inner(node_id);
3631                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3632                let elim_l = self.try_elim_lookarounds(lbody)?;
3633                let elim_r = self.try_elim_lookarounds(ltail)?;
3634                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3635                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3636                let rw = self.mk_inter(lbody_ts, ltail_ts);
3637                Some(rw)
3638            }
3639            Kind::Lookbehind => {
3640                let linner = self.get_lookbehind_inner(node_id);
3641                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3642                let elim_l = self.try_elim_lookarounds(linner)?;
3643                let elim_r = self.try_elim_lookarounds(lprev)?;
3644                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3645                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3646                let rw = self.mk_inter(lbody_ts, ltail_ts);
3647                Some(rw)
3648            }
3649            Kind::Inter => {
3650                let left = node_id.left(self);
3651                let right = node_id.right(self);
3652                let elim_l = self.try_elim_lookarounds(left)?;
3653                let elim_r = self.try_elim_lookarounds(right)?;
3654                let rw = self.mk_inter(elim_l, elim_r);
3655                Some(rw)
3656            }
3657            Kind::Counted => None,
3658        }
3659    }
3660
3661    /// R & _+ is a safe overapproximation of R that's nonempty
3662    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3663        if self.nullability(node) == Nullability::NEVER {
3664            node
3665        } else {
3666            self.mk_inter(NodeId::TOPPLUS, node)
3667        }
3668    }
3669
3670    fn iter_find_stack(
3671        &self,
3672        stack: &mut Vec<TRegexId>,
3673        mut f: impl FnMut(NodeId) -> bool,
3674    ) -> bool {
3675        loop {
3676            match stack.pop() {
3677                None => return false,
3678                Some(curr) => match self.get_tregex(curr) {
3679                    TRegex::Leaf(n) => {
3680                        let mut curr = *n;
3681                        while curr != NodeId::BOT {
3682                            match self.get_kind(curr) {
3683                                Kind::Union => {
3684                                    if f(curr.left(self)) {
3685                                        return true;
3686                                    }
3687                                    curr = curr.right(self);
3688                                }
3689                                _ => {
3690                                    if f(*n) {
3691                                        return true;
3692                                    }
3693                                    curr = NodeId::BOT;
3694                                }
3695                            }
3696                        }
3697                    }
3698                    TRegex::ITE(_, then_id, else_id) => {
3699                        if *else_id != TRegexId::BOT {
3700                            stack.push(*else_id);
3701                        }
3702                        stack.push(*then_id);
3703                    }
3704                },
3705            }
3706        }
3707    }
3708
3709    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3710        if node == NodeId::BOT {
3711            return Some(true);
3712        }
3713        if self.nullability(node) != Nullability::NEVER {
3714            return Some(false);
3715        }
3716        if let Some(cached) = self.cache_empty.get(&node) {
3717            if cached.is_checked() {
3718                return Some(cached.is_empty());
3719            }
3720        }
3721        let node = if !self.contains_look(node) {
3722            node
3723        } else {
3724            self.try_elim_lookarounds(node)?
3725        };
3726        let isempty_flag = self.is_empty_lang_internal(node);
3727
3728        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3729    }
3730
3731    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3732        // without inter, no need to check
3733        if !self.get_meta_flags(initial_node).contains_inter() {
3734            return Ok(NodeFlags::ZERO);
3735        }
3736
3737        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3738        let mut worklist: VecDeque<NodeId> = VecDeque::new();
3739        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3740        let mut stack = Vec::new();
3741        stack.push(begin_der);
3742        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3743            visited.insert(node, initial_node);
3744            let nullability = self.nullability(node);
3745            if nullability != Nullability::NEVER {
3746                true
3747            } else {
3748                worklist.push_back(node);
3749                false
3750            }
3751        });
3752        if found_nullable_right_away {
3753            return Ok(NodeFlags::ZERO);
3754        }
3755
3756        worklist.push_back(initial_node);
3757        let isempty_flag: NodeFlags;
3758        let mut found_node = NodeId::BOT;
3759
3760        loop {
3761            match worklist.pop_front() {
3762                None => {
3763                    isempty_flag = NodeFlags::IS_EMPTY;
3764                    break;
3765                }
3766                Some(outer) => {
3767                    if let Some(cached) = self.cache_empty.get(&outer) {
3768                        if cached.is_checked() {
3769                            if cached.is_empty() {
3770                                continue;
3771                            } else {
3772                                return Ok(NodeFlags::ZERO);
3773                            }
3774                        }
3775                    }
3776
3777                    let derivative = self.der(outer, Nullability::CENTER)?;
3778
3779                    stack.push(derivative);
3780
3781                    let found_null = self.iter_find_stack(&mut stack, |node| {
3782                        if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3783                            found_node = node;
3784                            if !self.get_meta_flags(node).contains_inter() {
3785                                true
3786                            } else {
3787                                e.insert(outer);
3788                                worklist.push_front(node);
3789                                self.any_nonbegin_nullable(node)
3790                            }
3791                        } else {
3792                            false
3793                        }
3794                    });
3795                    if found_null {
3796                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3797                        isempty_flag = NodeFlags::ZERO;
3798                        break;
3799                    }
3800                }
3801            }
3802        }
3803
3804        self.cache_empty.insert(
3805            initial_node,
3806            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3807        );
3808        Ok(isempty_flag)
3809    }
3810
3811    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
3812    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3813        if larger_lang == smaller_lang {
3814            return Some(true);
3815        }
3816
3817        // assess initial nullability
3818        if self
3819            .nullability(larger_lang)
3820            .not()
3821            .and(self.nullability(smaller_lang))
3822            != Nullability::NEVER
3823        {
3824            return Some(false);
3825        }
3826
3827        // check language nullability
3828        // if (B &~ A) ≡ ⊥ then B ⊆ A
3829        // this means  L(B) \ L(A) = {}
3830        // eg. (a &~ .*) = ⊥ means .* subsumes a
3831        let (smaller_lang, larger_lang) =
3832            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3833                let wrap = |b: &mut Self, n: NodeId| {
3834                    let tmp = b.mk_concat(n, NodeId::TS);
3835                    b.mk_concat(NodeId::TS, tmp)
3836                };
3837                (wrap(self, smaller_lang), wrap(self, larger_lang))
3838            } else {
3839                (smaller_lang, larger_lang)
3840            };
3841
3842        let nota = self.mk_compl(larger_lang);
3843        let diff = self.mk_inter(smaller_lang, nota);
3844        self.is_empty_lang(diff)
3845    }
3846}