Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10#[cfg(feature = "reentrant-assert")]
11use rustc_hash::FxHashSet;
12use solver::{Solver, TSetId};
13use std::collections::{BTreeSet, VecDeque};
14use std::fmt::Debug;
15use std::fmt::Write;
16use std::hash::Hash;
17pub use unicode_classes::{neg_class, utf8_char, UnicodeClassCache};
18
19use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
20pub mod nulls;
21pub mod solver;
22
23const IS_EMPTY_LANG_NODE_BUDGET: usize = 10_000;
24
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub enum ResharpError {
27    AnchorLimit,
28    StateSpaceExplosion,
29    UnsupportedPattern,
30}
31
32impl std::fmt::Display for ResharpError {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            ResharpError::AnchorLimit => write!(f, "anchor limit exceeded"),
36            ResharpError::StateSpaceExplosion => {
37                write!(f, "too many states, likely infinite state space")
38            }
39            ResharpError::UnsupportedPattern => write!(
40                f,
41                "unsupported pattern: eg. lookaround, `\\b`/`^`/`$` inside a complement `~(...)` or a star `*`"
42            ),
43        }
44    }
45}
46
47impl std::error::Error for ResharpError {}
48
49mod id {
50    pub const MISSING: u32 = 0;
51    pub const BOT: u32 = 1;
52    pub const EPS: u32 = 2;
53    pub const TOP: u32 = 3;
54    pub const TOPSTAR: u32 = 4;
55    pub const TOPPLUS: u32 = 5;
56    pub const BEGIN: u32 = 6;
57    pub const END: u32 = 7;
58}
59
60#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
61pub(crate) struct MetadataId(u32);
62impl MetadataId {
63    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
64}
65
66#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
67#[cfg_attr(feature = "serialize", derive(serde::Serialize, serde::Deserialize))]
68pub struct NodeId(pub u32);
69impl NodeId {
70    pub const MISSING: NodeId = NodeId(id::MISSING);
71    pub const BOT: NodeId = NodeId(id::BOT);
72    pub const EPS: NodeId = NodeId(id::EPS);
73    pub const TOP: NodeId = NodeId(id::TOP);
74    pub const TS: NodeId = NodeId(id::TOPSTAR);
75    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
76    pub const BEGIN: NodeId = NodeId(id::BEGIN);
77    pub const END: NodeId = NodeId(id::END);
78
79    #[inline]
80    pub fn as_u32(self) -> u32 {
81        self.0
82    }
83
84    #[inline]
85    pub fn from_u32(v: u32) -> NodeId {
86        NodeId(v)
87    }
88}
89impl Debug for NodeId {
90    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91        let num = &self.0;
92        f.write_str(format!("{num}").as_str())
93    }
94}
95
96#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
97pub struct TRegexId(u32);
98impl TRegexId {
99    pub const MISSING: TRegexId = TRegexId(id::MISSING);
100    pub const EPS: TRegexId = TRegexId(id::EPS);
101    pub const BOT: TRegexId = TRegexId(id::BOT);
102    pub const TOP: TRegexId = TRegexId(id::TOP);
103    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
104}
105
106#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
107pub(crate) struct MetaFlags(u8);
108impl MetaFlags {
109    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
110
111    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
112    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 3);
113    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 4);
114    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 5);
115    pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 6);
116    pub(crate) const CONTAINS_LOOKAHEAD: MetaFlags = MetaFlags(1 << 7);
117
118    #[inline]
119    pub(crate) fn nullability(self) -> Nullability {
120        Nullability(self.0 & Self::NULL_MASK)
121    }
122
123    #[inline]
124    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
125        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
126    }
127
128    #[inline]
129    pub(crate) fn has(self, flag: MetaFlags) -> bool {
130        self.0 & flag.0 != 0
131    }
132    #[inline]
133    const fn and(self, other: MetaFlags) -> MetaFlags {
134        MetaFlags(self.0 & other.0)
135    }
136    #[inline]
137    const fn or(self, other: MetaFlags) -> MetaFlags {
138        MetaFlags(self.0 | other.0)
139    }
140
141    pub(crate) fn contains_inter(self) -> bool {
142        self.has(MetaFlags::CONTAINS_INTER)
143    }
144
145    pub(crate) fn all_contains_flags(self) -> MetaFlags {
146        self.and(
147            MetaFlags::CONTAINS_ANCHORS
148                .or(MetaFlags::CONTAINS_INTER)
149                .or(MetaFlags::CONTAINS_LOOKBEHIND)
150                .or(MetaFlags::CONTAINS_LOOKAHEAD),
151        )
152    }
153}
154
155#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
156#[repr(u8)]
157pub enum Kind {
158    #[default]
159    Pred,
160    Star,
161    Begin,
162    End,
163    Concat,
164    Union,
165    Compl,
166    Lookbehind,
167    Lookahead,
168    Inter,
169    Ordered,
170}
171
172#[derive(Clone, Copy)]
173struct NodeLen {
174    mml_min: u32,
175    mml_max: u32,
176    es_null: Nullability,
177}
178
179impl Default for NodeLen {
180    fn default() -> Self {
181        NodeLen {
182            mml_min: 0,
183            mml_max: 0,
184            es_null: Nullability::NEVER,
185        }
186    }
187}
188
189#[derive(Eq, Hash, PartialEq, Clone)]
190struct Metadata {
191    flags: MetaFlags,
192    nulls: NullsId,
193}
194
195struct MetadataBuilder {
196    num_created: u32,
197    solver: Solver,
198    nb: NullsBuilder,
199    index: FxHashMap<Metadata, MetadataId>,
200    pub array: Vec<Metadata>,
201}
202
203impl MetadataBuilder {
204    fn new() -> MetadataBuilder {
205        Self {
206            index: FxHashMap::default(),
207            array: vec![Metadata {
208                flags: MetaFlags::ZERO,
209                nulls: NullsId::EMPTY,
210            }],
211            solver: Solver::new(),
212            num_created: 0,
213            nb: NullsBuilder::new(),
214        }
215    }
216
217    fn init(&mut self, inst: Metadata) -> MetadataId {
218        self.num_created += 1;
219        let new_id = MetadataId(self.num_created);
220        self.index.insert(inst.clone(), new_id);
221        self.array.push(inst);
222        new_id
223    }
224
225    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
226        match self.index.get(&inst) {
227            Some(&id) => id,
228            None => self.init(inst),
229        }
230    }
231
232    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
233        &self.array[inst.0 as usize]
234    }
235
236    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
237        setflags.all_contains_flags()
238    }
239
240    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
241        let left = &self.array[body.0 as usize].flags;
242        let contains = self.get_contains(*left);
243        // BOT* = EPS (empty string only), not infinite
244        let inf = if body_id == NodeId::BOT {
245            MetaFlags::ZERO
246        } else {
247            MetaFlags::INFINITE_LENGTH
248        };
249        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
250    }
251
252    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
253        let left = &self.array[left_id.0 as usize].flags;
254        let null = left.nullability().not().and(Nullability::ALWAYS);
255        let contains = self.get_contains(*left);
256        MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
257    }
258
259    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260        let left = &self.array[left_id.0 as usize].flags;
261        let right = &self.array[right_id.0 as usize].flags;
262        let null = left.nullability().and(right.nullability());
263        let contains = self.get_contains(left.or(*right));
264        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
265        MetaFlags::with_nullability(null, contains.or(len))
266    }
267
268    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
269        let left = &self.array[left_id.0 as usize].flags;
270        let right = &self.array[right_id.0 as usize].flags;
271        let null = left.nullability().and(right.nullability());
272        let contains = self
273            .get_contains(left.or(*right))
274            .or(MetaFlags::CONTAINS_INTER);
275        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
276        MetaFlags::with_nullability(null, contains.or(len))
277    }
278
279    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
280        let left = &self.array[left_id.0 as usize].flags;
281        let right = &self.array[right_id.0 as usize].flags;
282        let null = left.nullability().or(right.nullability());
283        let contains = self.get_contains(left.or(*right));
284        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
285        MetaFlags::with_nullability(null, contains.or(len))
286    }
287}
288
289#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
290pub struct NodeKey {
291    pub(crate) kind: Kind,
292    pub(crate) left: NodeId,
293    pub(crate) right: NodeId,
294    pub(crate) extra: u32,
295}
296
297#[derive(Eq, Hash, PartialEq, Clone, Debug)]
298pub enum TRegex<TSet> {
299    Leaf(NodeId),
300    ITE(TSet, TRegexId, TRegexId),
301}
302
303#[derive(Eq, Hash, PartialEq, Clone, Copy)]
304pub(crate) struct NodeFlags(u8);
305impl NodeFlags {
306    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
307    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
308    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
309
310    fn is_checked(self) -> bool {
311        self.0 >= NodeFlags::IS_CHECKED.0
312    }
313    fn is_empty(self) -> bool {
314        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
315    }
316}
317
318#[derive(Eq, Hash, PartialEq, Clone, Copy)]
319pub(crate) struct BuilderFlags(u8);
320impl BuilderFlags {
321    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
322    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
323}
324
325pub struct RegexBuilder {
326    mb: MetadataBuilder,
327    temp_vec: Vec<NodeId>,
328    num_created: u32,
329    index: FxHashMap<NodeKey, NodeId>,
330    array: Vec<NodeKey>,
331    metadata: Vec<MetadataId>,
332    reversed: Vec<NodeId>,
333    cache_empty: FxHashMap<NodeId, NodeFlags>,
334    lengths: Vec<NodeLen>,
335    tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
336    tr_array: Vec<TRegex<TSetId>>,
337    tr_der_center: Vec<TRegexId>,
338    tr_der_begin: Vec<TRegexId>,
339    flags: BuilderFlags,
340    /// maximum lookahead context distance before returning `AnchorLimit`.
341    pub lookahead_context_max: u32,
342    mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
343    clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
344    #[cfg(feature = "reentrant-assert")]
345    rw_active: FxHashSet<(Kind, NodeId, NodeId)>,
346}
347
348impl NodeId {
349    fn is_missing(&self) -> bool {
350        *self == NodeId::MISSING
351    }
352    fn is_eps(&self) -> bool {
353        *self == NodeId::EPS
354    }
355    #[inline]
356    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
357        b.get_flags_contains(self)
358    }
359    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
360        if self == tail {
361            true
362        } else if self.is_kind(b, Kind::Concat) {
363            self.right(b).has_concat_tail(b, tail)
364        } else {
365            false
366        }
367    }
368    fn missing_to_eps(&self) -> NodeId {
369        if *self == NodeId::MISSING {
370            NodeId::EPS
371        } else {
372            *self
373        }
374    }
375
376    #[inline]
377    fn kind(self, b: &RegexBuilder) -> Kind {
378        b.get_kind(self)
379    }
380    #[inline]
381    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
382        b.get_kind(self) == k
383    }
384    #[inline]
385    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
386        b.nullability(self) == Nullability::NEVER
387    }
388    #[inline]
389    pub fn nullability(self, b: &RegexBuilder) -> Nullability {
390        b.nullability(self)
391    }
392
393    #[inline]
394    pub fn is_center_nullable(self, b: &RegexBuilder) -> bool {
395        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
396    }
397    #[inline]
398    pub fn is_begin_nullable(self, b: &RegexBuilder) -> bool {
399        b.nullability(self).has(Nullability::BEGIN)
400    }
401    #[inline]
402    pub fn left(self, b: &RegexBuilder) -> NodeId {
403        b.get_left(self)
404    }
405
406    #[inline]
407    pub fn right(self, b: &RegexBuilder) -> NodeId {
408        b.get_right(self)
409    }
410
411    #[inline]
412    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, ResharpError> {
413        b.der(self, mask)
414    }
415
416    #[inline]
417    fn extra(self, b: &RegexBuilder) -> u32 {
418        b.get_extra(self)
419    }
420
421    #[inline]
422    fn is_pred(self, b: &RegexBuilder) -> bool {
423        b.get_kind(self) == Kind::Pred
424    }
425
426    #[inline]
427    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
428        debug_assert!(self.is_pred(b));
429        TSetId(b.get_extra(self))
430    }
431    #[inline]
432    pub fn is_star(self, b: &RegexBuilder) -> bool {
433        if NodeId::EPS == self {
434            return false;
435        }
436        b.get_kind(self) == Kind::Star
437    }
438
439    pub fn contains_lookbehind(self, b: &RegexBuilder) -> bool {
440        b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKBEHIND)
441    }
442
443    pub fn contains_lookahead(self, b: &RegexBuilder) -> bool {
444        b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKAHEAD)
445    }
446
447    pub fn contains_lookaround(self, b: &RegexBuilder) -> bool {
448        b.get_meta_flags(self)
449            .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
450    }
451
452    #[inline]
453    pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
454        b.get_kind(self) == Kind::Inter
455    }
456
457    #[inline]
458    pub(crate) fn is_compl(self, b: &RegexBuilder) -> bool {
459        b.get_kind(self) == Kind::Compl
460    }
461
462    #[inline]
463    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
464        if self.is_concat(b) {
465            let r = self.right(b);
466            return r.is_star(b) && r.left(b) == self.left(b);
467        }
468        false
469    }
470
471    #[inline]
472    fn is_begin(self) -> bool {
473        self == NodeId::BEGIN
474    }
475
476    #[inline]
477    fn is_end(self) -> bool {
478        self == NodeId::END
479    }
480
481    #[inline]
482    fn is_union(self, b: &RegexBuilder) -> bool {
483        b.get_kind(self) == Kind::Union
484    }
485
486    #[inline]
487    pub fn is_concat(self, b: &RegexBuilder) -> bool {
488        b.get_kind(self) == Kind::Concat
489    }
490
491    #[inline]
492    fn is_lookahead(self, b: &RegexBuilder) -> bool {
493        b.get_kind(self) == Kind::Lookahead
494    }
495
496    #[inline]
497    fn is_lookbehind(self, b: &RegexBuilder) -> bool {
498        b.get_kind(self) == Kind::Lookbehind
499    }
500
501    #[inline]
502    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
503        if b.get_kind(self) == Kind::Union && b.get_left(self).is_eps() {
504            Some(b.get_right(self))
505        } else {
506            None
507        }
508    }
509
510    #[inline]
511    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
512        if self.is_concat(b) {
513            let left = self.left(b);
514            let right = self.right(b);
515            if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
516                return right == NodeId::END;
517            }
518        }
519        false
520    }
521
522    #[inline]
523    #[allow(dead_code)]
524    fn is_ts(self) -> bool {
525        NodeId::TS == self
526    }
527
528    #[inline]
529    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
530        if NodeId::EPS == self {
531            return None;
532        }
533        if self.is_star(b) && self.left(b).is_pred(b) {
534            Some(self.left(b))
535        } else {
536            None
537        }
538    }
539
540    #[inline]
541    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
542        if self.is_concat(b) && self.left(b) == NodeId::TS {
543            let middle = self.right(b);
544            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
545                return Some(middle.left(b));
546            }
547        }
548        None
549    }
550
551    #[inline]
552    pub(crate) fn iter_union(
553        self,
554        b: &mut RegexBuilder,
555        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
556    ) {
557        let mut curr: NodeId = self;
558        while curr.kind(b) == Kind::Union {
559            f(b, curr.left(b));
560            curr = curr.right(b);
561        }
562        f(b, curr);
563    }
564
565    #[inline]
566    pub(crate) fn iter_inter(
567        self,
568        b: &mut RegexBuilder,
569        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
570    ) {
571        let mut curr: NodeId = self;
572        while curr.kind(b) == Kind::Inter {
573            f(b, curr.left(b));
574            curr = curr.right(b);
575        }
576        f(b, curr);
577    }
578
579    #[inline]
580    pub fn iter_union_while(
581        self,
582        b: &mut RegexBuilder,
583        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
584    ) {
585        let mut curr: NodeId = self;
586        let mut continue_loop = true;
587        while continue_loop && curr.kind(b) == Kind::Union {
588            continue_loop = f(b, curr.left(b));
589            curr = curr.right(b);
590        }
591        if continue_loop {
592            f(b, curr);
593        }
594    }
595
596    #[inline]
597    pub(crate) fn any_inter_component(
598        self,
599        b: &RegexBuilder,
600        mut f: impl FnMut(NodeId) -> bool,
601    ) -> bool {
602        debug_assert!(self.kind(b) == Kind::Inter);
603        let mut cur = self;
604        while cur.kind(b) == Kind::Inter {
605            if f(cur.left(b)) {
606                return true;
607            }
608            cur = cur.right(b);
609        }
610        f(cur)
611    }
612
613    #[inline]
614    pub(crate) fn any_union_component(
615        self,
616        b: &RegexBuilder,
617        mut f: impl FnMut(NodeId) -> bool,
618    ) -> bool {
619        let mut cur = self;
620        while cur.kind(b) == Kind::Union {
621            if f(cur.left(b)) {
622                return true;
623            }
624            cur = cur.right(b);
625        }
626        f(cur)
627    }
628}
629
630impl Default for RegexBuilder {
631    fn default() -> Self {
632        Self::new()
633    }
634}
635
636impl RegexBuilder {
637    pub fn new() -> RegexBuilder {
638        let mut inst = Self {
639            mb: MetadataBuilder::new(),
640            array: Vec::new(),
641            index: FxHashMap::default(),
642            cache_empty: FxHashMap::default(),
643            lengths: vec![NodeLen::default()],
644            tr_array: Vec::new(),
645            tr_cache: FxHashMap::default(),
646            flags: BuilderFlags::ZERO,
647            lookahead_context_max: 800,
648            num_created: 0,
649            metadata: Vec::new(),
650            reversed: Vec::new(),
651            tr_der_center: Vec::new(),
652            tr_der_begin: Vec::new(),
653            temp_vec: Vec::new(),
654            mk_binary_memo: FxHashMap::default(),
655            clean_cache: FxHashMap::default(),
656            #[cfg(feature = "reentrant-assert")]
657            rw_active: FxHashSet::default(),
658        };
659        inst.array.push(NodeKey::default());
660        inst.mk_pred(TSetId::EMPTY);
661        inst.mk_star(NodeId::BOT);
662        inst.mk_pred(TSetId::FULL);
663        inst.mk_star(NodeId::TOP);
664        let top_plus_id = inst.mk_plus(NodeId::TOP);
665        inst.mk_unset(Kind::Begin);
666        inst.mk_unset(Kind::End);
667        debug_assert!(top_plus_id == NodeId::TOPPLUS);
668
669        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
670        inst.mk_leaf(NodeId::BOT);
671        inst.mk_leaf(NodeId::EPS);
672        inst.mk_leaf(NodeId::TOP);
673        inst.mk_leaf(NodeId::TS);
674
675        inst.flags = BuilderFlags::SUBSUME;
676        inst
677    }
678
679    #[inline]
680    pub fn solver_ref(&self) -> &Solver {
681        &self.mb.solver
682    }
683
684    #[inline]
685    pub fn solver(&mut self) -> &mut Solver {
686        &mut self.mb.solver
687    }
688
689    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
690        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
691        self.tr_cache.insert(inst.clone(), new_id);
692        self.tr_array.push(inst);
693        new_id
694    }
695
696    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
697        match self.tr_cache.get(&inst) {
698            Some(&id) => id,
699            None => self.tr_init(inst),
700        }
701    }
702
703    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
704        &self.tr_array[inst.0 as usize]
705    }
706
707    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
708        let solv = self.solver();
709        !solv.is_sat_id(cond1, cond2)
710    }
711
712    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
713        let term = TRegex::<TSetId>::Leaf(node_id);
714        self.get_tregex_id(term)
715    }
716
717    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
718        let tmp_inst = TRegex::ITE(cond, _then, _else);
719        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
720            return *cached;
721        }
722        if _then == _else {
723            return _then;
724        }
725        if self.solver().is_full_id(cond) {
726            return _then;
727        }
728        if self.solver().is_empty_id(cond) {
729            return _else;
730        }
731        let _clean_then = match self.tr_array[_then.0 as usize] {
732            TRegex::Leaf(_) => _then,
733            _ => self.clean(cond, _then),
734        };
735        let notcond = self.solver().not_id(cond);
736        let _clean_else = match self.tr_array[_else.0 as usize] {
737            TRegex::Leaf(_) => _else,
738            _ => self.clean(notcond, _else),
739        };
740
741        if _clean_then == _clean_else {
742            return _clean_then;
743        }
744        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
745        match self.get_tregex(_clean_then) {
746            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
747                let _cond_then = *leftcond;
748                let new_then = *_inner_then;
749                let sand = self.solver().and_id(cond, _cond_then);
750                return self.mk_ite(sand, new_then, _clean_else);
751            }
752            _ => {}
753        }
754        // attempt right side flattening:
755        match self.get_tregex(_clean_else) {
756            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
757            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
758                let e2clone = *_e2;
759                let new_cond = self.mb.solver.or_id(cond, *_c2);
760                return self.mk_ite(new_cond, _clean_then, e2clone);
761            }
762            _ => {}
763        }
764
765        if _clean_then == TRegexId::BOT {
766            let flip_cond = self.solver().not_id(cond);
767            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
768            return cleaned_id;
769        }
770
771        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
772    }
773
774    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
775        if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
776            return cached;
777        }
778        let result = match *self.get_tregex(tterm) {
779            TRegex::Leaf(_) => tterm,
780            TRegex::ITE(alpha, _then_id, _else_id) => {
781                let notalpha = self.mb.solver.not_id(alpha);
782                if self.mb.solver.unsat_id(beta, alpha) {
783                    // beta ⊆ ¬alpha, so beta ∧ ¬alpha = beta
784                    self.clean(beta, _else_id)
785                } else if self.unsat(beta, notalpha) {
786                    // beta ⊆ alpha, so beta ∧ alpha = beta
787                    self.clean(beta, _then_id)
788                } else {
789                    let tc = self.mb.solver.and_id(beta, alpha);
790                    let ec = self.mb.solver.and_id(beta, notalpha);
791                    let new_then = self.clean(tc, _then_id);
792                    let new_else = self.clean(ec, _else_id);
793                    self.mk_ite(alpha, new_then, new_else)
794                }
795            }
796        };
797        self.clean_cache.insert((beta, tterm), result);
798        result
799    }
800
801    fn mk_unary(
802        &mut self,
803        term: TRegexId,
804        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
805    ) -> TRegexId {
806        match self.tr_array[term.0 as usize] {
807            TRegex::Leaf(node_id) => {
808                let applied = apply(self, node_id);
809                self.mk_leaf(applied)
810            }
811            TRegex::ITE(c1, _then, _else) => {
812                let _then1 = self.mk_unary(_then, apply);
813                let _else1 = self.mk_unary(_else, apply);
814                self.mk_ite(c1, _then1, _else1)
815            }
816        }
817    }
818
819    fn mk_binary_result(
820        &mut self,
821        left: TRegexId,
822        right: TRegexId,
823        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, ResharpError>,
824    ) -> Result<TRegexId, ResharpError> {
825        match self.tr_array[left.0 as usize] {
826            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
827                TRegex::Leaf(right_node_id) => {
828                    let applied = apply(self, left_node_id, right_node_id)?;
829                    Ok(self.mk_leaf(applied))
830                }
831                TRegex::ITE(c2, _then, _else) => {
832                    let then2 = self.mk_binary_result(left, _then, apply)?;
833                    let else2 = self.mk_binary_result(left, _else, apply)?;
834                    Ok(self.mk_ite(c2, then2, else2))
835                }
836            },
837            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
838                TRegex::Leaf(_) => {
839                    let then2 = self.mk_binary_result(_then1, right, apply)?;
840                    let else2 = self.mk_binary_result(_else1, right, apply)?;
841                    Ok(self.mk_ite(c1, then2, else2))
842                }
843                TRegex::ITE(c2, _then2, _else2) => {
844                    if c1 == c2 {
845                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
846                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
847                        return Ok(self.mk_ite(c1, _then, _else));
848                    }
849                    if c1.0 > c2.0 {
850                        let _then = self.mk_binary_result(_then1, right, apply)?;
851                        let _else = self.mk_binary_result(_else1, right, apply)?;
852                        Ok(self.mk_ite(c1, _then, _else))
853                    } else {
854                        let _then = self.mk_binary_result(left, _then2, apply)?;
855                        let _else = self.mk_binary_result(left, _else2, apply)?;
856                        Ok(self.mk_ite(c2, _then, _else))
857                    }
858                }
859            },
860        }
861    }
862
863    fn mk_binary(
864        &mut self,
865        left: TRegexId,
866        right: TRegexId,
867        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
868    ) -> TRegexId {
869        self.mk_binary_memo.clear();
870        self.mk_binary_inner(left, right, apply)
871    }
872
873    fn mk_binary_inner(
874        &mut self,
875        left: TRegexId,
876        right: TRegexId,
877        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
878    ) -> TRegexId {
879        if left == right {
880            return self.mk_unary(left, &mut |b, n| apply(b, n, n));
881        }
882        if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
883            return cached;
884        }
885        let result = match self.tr_array[left.0 as usize] {
886            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
887                TRegex::Leaf(right_node_id) => {
888                    let applied = apply(self, left_node_id, right_node_id);
889                    self.mk_leaf(applied)
890                }
891                TRegex::ITE(c2, _then, _else) => {
892                    let then2 = self.mk_binary_inner(left, _then, apply);
893                    let else2 = self.mk_binary_inner(left, _else, apply);
894                    self.mk_ite(c2, then2, else2)
895                }
896            },
897            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
898                TRegex::Leaf(_) => {
899                    let then2 = self.mk_binary_inner(_then1, right, apply);
900                    let else2 = self.mk_binary_inner(_else1, right, apply);
901                    self.mk_ite(c1, then2, else2)
902                }
903                TRegex::ITE(c2, _then2, _else2) => {
904                    if c1 == c2 {
905                        let _then = self.mk_binary_inner(_then1, _then2, apply);
906                        let _else = self.mk_binary_inner(_else1, _else2, apply);
907                        self.mk_ite(c1, _then, _else)
908                    } else if c1.0 > c2.0 {
909                        let _then = self.mk_binary_inner(_then1, right, apply);
910                        let _else = self.mk_binary_inner(_else1, right, apply);
911                        self.mk_ite(c1, _then, _else)
912                    } else {
913                        let _then = self.mk_binary_inner(left, _then2, apply);
914                        let _else = self.mk_binary_inner(left, _else2, apply);
915                        self.mk_ite(c2, _then, _else)
916                    }
917                }
918            },
919        };
920        self.mk_binary_memo.insert((left, right), result);
921        result
922    }
923
924    pub fn get_nulls(
925        &mut self,
926        pending_rel: u32,
927        mask: Nullability,
928        acc: &mut BTreeSet<NullState>,
929        node_id: NodeId,
930    ) {
931        debug_assert!(node_id != NodeId::MISSING);
932        if !self.is_nullable(node_id, mask) {
933            return;
934        }
935        match self.get_kind(node_id) {
936            Kind::Pred => {}
937            Kind::End => {
938                if mask.has(Nullability::END) {
939                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
940                }
941            }
942            Kind::Begin => {
943                if mask.has(Nullability::BEGIN) {
944                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
945                }
946            }
947            Kind::Concat => {
948                let new_mask = self.nullability(node_id).and(mask);
949                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
950                if self.is_nullable(node_id.left(self), mask) {
951                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
952                }
953            }
954            Kind::Union => {
955                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
956                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
957            }
958            Kind::Inter => {
959                let new_mask = self.nullability(node_id).and(mask);
960                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
961                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
962            }
963            Kind::Star => {
964                acc.insert(NullState::new(mask, pending_rel));
965                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
966            }
967            Kind::Compl => {
968                if !self.is_nullable(node_id.left(self), mask) {
969                    acc.insert(NullState::new(mask, 0));
970                }
971            }
972            Kind::Lookbehind => {
973                let new_mask = self.nullability(node_id).and(mask);
974                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
975                if node_id.right(self) != NodeId::MISSING {
976                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
977                }
978            }
979            Kind::Lookahead => {
980                let la_inner = self.get_lookahead_inner(node_id);
981                if self.is_nullable(la_inner, mask) {
982                    let lo = self.get_lookahead_rel_lo(node_id);
983                    let hi = self.get_lookahead_rel(node_id);
984                    if hi != u32::MAX {
985                        for rel in lo..=hi {
986                            self.get_nulls(pending_rel + rel, mask, acc, la_inner);
987                        }
988                    }
989                    // tail only contributes when body is sat
990                    let la_tail = self.get_lookahead_tail(node_id);
991                    if la_tail != NodeId::MISSING {
992                        self.get_nulls(pending_rel, mask, acc, la_tail);
993                    }
994                }
995            }
996            Kind::Ordered => {
997                let packed = self.get_extra(node_id);
998                let best = packed >> 16;
999                if best > 0 {
1000                    acc.insert(NullState::new(mask, pending_rel + best));
1001                }
1002            }
1003        }
1004    }
1005
1006    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
1007        self.get_meta_flags(node_id)
1008            .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
1009    }
1010
1011    pub fn contains_lookbehind(&mut self, node_id: NodeId) -> bool {
1012        self.get_meta_flags(node_id)
1013            .has(MetaFlags::CONTAINS_LOOKBEHIND)
1014    }
1015
1016    pub fn contains_anchors(&self, node_id: NodeId) -> bool {
1017        self.get_meta_flags(node_id)
1018            .has(MetaFlags::CONTAINS_ANCHORS)
1019    }
1020
1021    pub fn is_infinite(&self, node_id: NodeId) -> bool {
1022        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
1023    }
1024
1025    /// returns (min_length, max_length). max = u32::MAX means unbounded.
1026    /// max body length across all lookaheads reachable from `node_id`. returns 0 if
1027    /// no lookaheads. `u32::MAX` if any lookahead body has unbounded length. used to
1028    /// bound the rel value an action can produce: rel \u2264 lookahead body's max
1029    /// consumption.
1030    pub fn max_lookahead_body_len(&self, node_id: NodeId) -> u32 {
1031        let mut visited: FxHashMap<NodeId, ()> = FxHashMap::default();
1032        let mut stack = vec![node_id];
1033        let mut best: u32 = 0;
1034        while let Some(n) = stack.pop() {
1035            if n == NodeId::MISSING || visited.insert(n, ()).is_some() {
1036                continue;
1037            }
1038            let kind = self.get_kind(n);
1039            if kind == Kind::Lookahead {
1040                let mut body = self.get_lookahead_inner(n);
1041                // mk_lookahead appends TS to body; strip it so we measure actual
1042                // consumption length, not the unbounded TS tail.
1043                if body.is_concat(self) && body.right(self) == NodeId::TS {
1044                    body = body.left(self);
1045                }
1046                let (_, mx) = self.get_min_max_length(body);
1047                if mx > best {
1048                    best = mx;
1049                }
1050            }
1051            match kind {
1052                Kind::Pred | Kind::Begin | Kind::End => {}
1053                Kind::Star | Kind::Compl => stack.push(n.left(self)),
1054                _ => {
1055                    stack.push(n.left(self));
1056                    stack.push(n.right(self));
1057                }
1058            }
1059        }
1060        best
1061    }
1062
1063    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
1064        let nl = &self.lengths[node_id.0 as usize];
1065        (nl.mml_min, nl.mml_max)
1066    }
1067
1068    #[inline]
1069    fn mml_of(&self, node_id: NodeId) -> (u32, u32) {
1070        let nl = &self.lengths[node_id.0 as usize];
1071        (nl.mml_min, nl.mml_max)
1072    }
1073
1074    fn compute_node_len(&self, node_id: NodeId) -> NodeLen {
1075        let es_null = self.compute_es_null(node_id);
1076        if node_id == NodeId::EPS {
1077            return NodeLen {
1078                mml_min: 0,
1079                mml_max: 0,
1080                es_null,
1081            };
1082        }
1083        let kind = self.get_kind(node_id);
1084        let l = node_id.left(self);
1085        let r = node_id.right(self);
1086        let min_of = |n: NodeId| self.lengths[n.0 as usize].mml_min;
1087
1088        let mml_min = match kind {
1089            Kind::End | Kind::Begin => 0,
1090            Kind::Pred => 1,
1091            Kind::Concat => min_of(l) + min_of(r),
1092            Kind::Union => min_of(l).min(min_of(r)),
1093            Kind::Inter => min_of(l).max(min_of(r)),
1094            Kind::Star | Kind::Lookbehind | Kind::Lookahead => 0,
1095            Kind::Ordered => min_of(l),
1096            Kind::Compl => {
1097                if self.nullability(l) == Nullability::ALWAYS {
1098                    1
1099                } else {
1100                    0
1101                }
1102            }
1103        };
1104
1105        let bounded_max = match kind {
1106            Kind::End | Kind::Begin => 0,
1107            Kind::Pred => 1,
1108            Kind::Concat => self.mml_of(l).1.saturating_add(self.mml_of(r).1),
1109            Kind::Union => self.mml_of(l).1.max(self.mml_of(r).1),
1110            Kind::Inter => self.mml_of(l).1.min(self.mml_of(r).1),
1111            Kind::Lookahead => self.mml_of(r.missing_to_eps()).1,
1112            Kind::Ordered => self.mml_of(l).1,
1113            Kind::Star | Kind::Compl => u32::MAX,
1114            Kind::Lookbehind => self.mml_of(r.missing_to_eps()).1,
1115        };
1116
1117        let mml_max = if self.is_infinite(node_id) && kind != Kind::Inter {
1118            u32::MAX
1119        } else {
1120            bounded_max
1121        };
1122
1123        NodeLen {
1124            mml_min,
1125            mml_max,
1126            es_null,
1127        }
1128    }
1129
1130    fn compute_es_null(&self, node_id: NodeId) -> Nullability {
1131        let es = |n: NodeId| self.lengths[n.0 as usize].es_null;
1132        match self.get_kind(node_id) {
1133            Kind::End | Kind::Begin => Nullability::EMPTYSTRING,
1134            Kind::Pred => Nullability::NEVER,
1135            Kind::Star => Nullability::ALWAYS,
1136            Kind::Inter | Kind::Concat => es(node_id.left(self)).and(es(node_id.right(self))),
1137            Kind::Union => es(node_id.left(self)).or(es(node_id.right(self))),
1138            Kind::Compl => es(node_id.left(self)).not(),
1139            Kind::Lookbehind => es(node_id.left(self)),
1140            Kind::Lookahead => {
1141                let body_null = es(node_id.left(self));
1142                let la_tail = self.get_lookahead_tail(node_id);
1143                if la_tail == NodeId::MISSING {
1144                    body_null
1145                } else {
1146                    body_null.and(es(la_tail))
1147                }
1148            }
1149            Kind::Ordered => es(node_id.left(self)),
1150        }
1151    }
1152
1153    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
1154        if node_id == NodeId::EPS {
1155            return Some(0);
1156        }
1157        match self.get_kind(node_id) {
1158            Kind::End | Kind::Begin => Some(0),
1159            Kind::Pred => Some(1),
1160            Kind::Concat => {
1161                let l = self.get_fixed_length(node_id.left(self))?;
1162                let r = self.get_fixed_length(node_id.right(self))?;
1163                Some(l + r)
1164            }
1165            Kind::Union => {
1166                let l = self.get_fixed_length(node_id.left(self))?;
1167                let r = self.get_fixed_length(node_id.right(self))?;
1168                if l == r {
1169                    Some(l)
1170                } else {
1171                    None
1172                }
1173            }
1174            Kind::Inter => {
1175                let l = self.get_fixed_length(node_id.left(self));
1176                let r = self.get_fixed_length(node_id.right(self));
1177                match (l, r) {
1178                    (Some(a), Some(b)) if a == b => Some(a),
1179                    (Some(_), Some(_)) => None,
1180                    (Some(a), None) | (None, Some(a)) => Some(a),
1181                    (None, None) => None,
1182                }
1183            }
1184            Kind::Lookahead | Kind::Lookbehind => {
1185                let prev = node_id.right(self).missing_to_eps();
1186                self.get_fixed_length(prev)
1187            }
1188            Kind::Ordered => self.get_fixed_length(node_id.left(self)),
1189            Kind::Star | Kind::Compl => None,
1190        }
1191    }
1192
1193    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1194        self.lengths[node_id.0 as usize].mml_min
1195    }
1196
1197    pub fn starts_with_ts(&self, node_id: NodeId) -> bool {
1198        if node_id == NodeId::TS {
1199            return true;
1200        }
1201        match self.get_kind(node_id) {
1202            Kind::Inter => {
1203                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1204            }
1205            Kind::Union => {
1206                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1207            }
1208            Kind::Concat => self.starts_with_ts(node_id.left(self)),
1209            _ => false,
1210        }
1211    }
1212
1213    #[inline]
1214    pub fn ends_with_ts(&self, node_id: NodeId) -> bool {
1215        if node_id.is_concat(self) {
1216            return self.ends_with_ts(node_id.right(self));
1217        }
1218        if node_id.is_lookahead(self) {
1219            let tail = self.get_lookahead_tail(node_id);
1220            if !tail.is_missing() {
1221                return self.ends_with_ts(tail);
1222            }
1223        }
1224        node_id == NodeId::TS
1225    }
1226
1227    pub fn ends_with_end(&self, node_id: NodeId) -> bool {
1228        if node_id.is_concat(self) {
1229            return self.ends_with_end(node_id.right(self));
1230        }
1231        node_id.is_end()
1232    }
1233
1234    pub fn is_begin_anchored(&self, node_id: NodeId) -> bool {
1235        if node_id == NodeId::BEGIN {
1236            return true;
1237        }
1238        match self.get_kind(node_id) {
1239            Kind::Concat => self.is_begin_anchored(node_id.left(self)),
1240            Kind::Union => {
1241                self.is_begin_anchored(node_id.left(self))
1242                    && self.is_begin_anchored(node_id.right(self))
1243            }
1244            Kind::Lookahead => {
1245                if self.is_begin_anchored(self.get_lookahead_inner(node_id)) {
1246                    return true;
1247                }
1248                let tail = self.get_lookahead_tail(node_id);
1249                !tail.is_missing() && self.is_begin_anchored(tail)
1250            }
1251            _ => false,
1252        }
1253    }
1254
1255
1256    pub fn ends_with_ts_any_branch(&self, node_id: NodeId) -> bool {
1257        if node_id == NodeId::TS {
1258            return true;
1259        }
1260        match self.get_kind(node_id) {
1261            Kind::Concat => self.ends_with_ts_any_branch(node_id.right(self)),
1262            Kind::Union => {
1263                self.ends_with_ts_any_branch(node_id.left(self))
1264                    || self.ends_with_ts_any_branch(node_id.right(self))
1265            }
1266            Kind::Lookahead => {
1267                let tail = self.get_lookahead_tail(node_id);
1268                !tail.is_missing() && self.ends_with_ts_any_branch(tail)
1269            }
1270            _ => false,
1271        }
1272    }
1273
1274    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1275        debug_assert!(node_id != NodeId::MISSING);
1276        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1277    }
1278
1279    pub(crate) fn cache_der(
1280        &mut self,
1281        node_id: NodeId,
1282        result: TRegexId,
1283        mask: Nullability,
1284    ) -> TRegexId {
1285        if mask == Nullability::CENTER {
1286            self.tr_der_center[node_id.0 as usize] = result
1287        } else {
1288            self.tr_der_begin[node_id.0 as usize] = result
1289        };
1290        result
1291    }
1292
1293    pub(crate) fn try_cached_der(
1294        &mut self,
1295        node_id: NodeId,
1296        mask: Nullability,
1297    ) -> Option<TRegexId> {
1298        let cache = if mask == Nullability::CENTER {
1299            &mut self.tr_der_center
1300        } else {
1301            &mut self.tr_der_begin
1302        };
1303        match cache.get(node_id.0 as usize) {
1304            Some(&TRegexId::MISSING) => {}
1305            Some(&result) => {
1306                return Some(result);
1307            }
1308            None => {
1309                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1310            }
1311        }
1312        None
1313    }
1314
1315    pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1316        let mut term = self.get_tregex(der);
1317        loop {
1318            match *term {
1319                TRegex::Leaf(node_id) => return node_id,
1320                TRegex::ITE(cond, _then, _else) => {
1321                    if self.solver().is_sat_id(set, cond) {
1322                        term = self.get_tregex(_then);
1323                    } else {
1324                        term = self.get_tregex(_else);
1325                    }
1326                }
1327            }
1328        }
1329    }
1330
1331    pub fn transition_term_uniform(&mut self, der: TRegexId, set: TSetId) -> Option<NodeId> {
1332        let mut term = self.get_tregex(der);
1333        loop {
1334            match *term {
1335                TRegex::Leaf(node_id) => return Some(node_id),
1336                TRegex::ITE(cond, _then, _else) => {
1337                    let solver = self.solver();
1338                    let hits_then = solver.is_sat_id(set, cond);
1339                    let fully_in_then = solver.contains_id(cond, set);
1340                    if fully_in_then {
1341                        term = self.get_tregex(_then);
1342                    } else if !hits_then {
1343                        term = self.get_tregex(_else);
1344                    } else {
1345                        return None;
1346                    }
1347                }
1348            }
1349        }
1350    }
1351
1352    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, ResharpError> {
1353        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1354        debug_assert!(
1355            node_id != NodeId::MISSING,
1356            "attempting to derive missing node"
1357        );
1358        if let Some(result) = self.try_cached_der(node_id, mask) {
1359            return Ok(result);
1360        }
1361
1362        let result = match node_id.kind(self) {
1363            Kind::Compl => {
1364                let leftd = node_id.left(self).der(self, mask)?;
1365                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1366            }
1367            Kind::Inter => {
1368                let leftd = node_id.left(self).der(self, mask)?;
1369                let rightd = node_id.right(self).der(self, mask)?;
1370                {
1371                    let this = &mut *self;
1372                    this.mk_binary(
1373                        leftd,
1374                        rightd,
1375                        &mut (|b, left, right| b.mk_inter(left, right)),
1376                    )
1377                }
1378            }
1379            Kind::Union => {
1380                let leftd = self.der(node_id.left(self), mask)?;
1381                let rightd = self.der(node_id.right(self), mask)?;
1382                {
1383                    let this = &mut *self;
1384                    this.mk_binary(
1385                        leftd,
1386                        rightd,
1387                        &mut (|b, left, right| b.mk_union(left, right)),
1388                    )
1389                }
1390            }
1391            Kind::Concat => {
1392                let head = node_id.left(self);
1393                let tail = node_id.right(self);
1394                let tail_leaf = self.mk_leaf(tail);
1395                let head_der = self.der(head, mask)?;
1396
1397                if self.is_nullable(head, mask) {
1398                    let rightd = self.der(tail, mask)?;
1399                    let ldr = {
1400                        let this = &mut *self;
1401                        this.mk_binary(
1402                            head_der,
1403                            tail_leaf,
1404                            &mut (|b, left, right| b.mk_concat(left, right)),
1405                        )
1406                    };
1407                    {
1408                        let this = &mut *self;
1409                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1410                    }
1411                } else {
1412                    let this = &mut *self;
1413                    this.mk_binary(
1414                        head_der,
1415                        tail_leaf,
1416                        &mut (|b, left, right| b.mk_concat(left, right)),
1417                    )
1418                }
1419            }
1420            Kind::Star => {
1421                if node_id == NodeId::EPS {
1422                    TRegexId::BOT
1423                } else {
1424                    let left = node_id.left(self);
1425                    let r_decr_leaf = self.mk_leaf(node_id);
1426                    let r_der = self.der(left, mask)?;
1427                    let this = &mut *self;
1428                    this.mk_binary(
1429                        r_der,
1430                        r_decr_leaf,
1431                        &mut (|b, left, right| b.mk_concat(left, right)),
1432                    )
1433                }
1434            }
1435            Kind::Lookbehind => {
1436                let lb_prev_der = {
1437                    let lb_prev = self.get_lookbehind_prev(node_id);
1438                    if lb_prev == NodeId::MISSING {
1439                        TRegexId::MISSING
1440                    } else {
1441                        self.der(lb_prev, mask)?
1442                    }
1443                };
1444                let lb_inner = self.get_lookbehind_inner(node_id);
1445                let lb_inner_der = self.der(lb_inner, mask)?;
1446                {
1447                    let this = &mut *self;
1448                    this.mk_binary_result(
1449                        lb_inner_der,
1450                        lb_prev_der,
1451                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1452                    )?
1453                }
1454            }
1455            Kind::Lookahead => {
1456                let la_tail = self.get_lookahead_tail(node_id);
1457                let la_body = node_id.left(self);
1458                let rel = self.get_lookahead_rel(node_id);
1459                let cur_rels = if rel == u32::MAX {
1460                    NullsId(u32::MAX)
1461                } else {
1462                    NullsId(self.get_extra(node_id))
1463                };
1464
1465                if self.is_nullable(la_body, mask) {
1466                    // nullabilty is taken once, just keep the body
1467                    let right = node_id.right(self).missing_to_eps();
1468                    let rder = self.der(right, mask).clone();
1469                    return rder;
1470                }
1471
1472                if rel == u32::MAX {
1473                    let la_body_der = self.der(la_body, mask)?;
1474                    if la_tail.is_kind(self, Kind::Pred) {
1475                        if let Some(transitioned) =
1476                            self.transition_term_uniform(la_body_der, la_tail.pred_tset(self))
1477                        {
1478                            let new_la =
1479                                self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1480                            let concated = self.mk_concat(la_tail, new_la);
1481                            return self.der(concated, mask);
1482                        }
1483                    }
1484                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1485                        let left = la_tail.left(self);
1486                        let tset = left.pred_tset(self);
1487                        if let Some(transitioned) = self.transition_term_uniform(la_body_der, tset)
1488                        {
1489                            let new_la =
1490                                self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1491                            let tail_right = la_tail.right(self);
1492                            let concated = self.mk_concat(new_la, tail_right);
1493                            let concated = self.mk_concat(left, concated);
1494                            return self.der(concated, mask);
1495                        }
1496                    }
1497                }
1498
1499                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1500                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1501                    let concated = self.mk_concat(la_body, nulls_mask);
1502                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1503                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1504                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1505                    let new_union = self.mk_union(concated_look, new_look);
1506                    return self.der(new_union, mask);
1507                }
1508
1509                let la_tail_der = if la_tail == NodeId::MISSING {
1510                    TRegexId::MISSING
1511                } else {
1512                    if self.is_nullable(la_tail, mask) {
1513                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1514                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1515                        let la_union = self.mk_union(la_tail, nulls_la);
1516                        self.der(la_union, mask)?
1517                    } else {
1518                        self.der(la_tail, mask)?
1519                    }
1520                };
1521
1522                let la_body_der = self.der(la_body, mask)?;
1523
1524                if rel != u32::MAX && rel > self.lookahead_context_max {
1525                    return Err(ResharpError::AnchorLimit);
1526                }
1527
1528                let new_rels = if cur_rels.0 == u32::MAX {
1529                    NullsId(u32::MAX)
1530                } else {
1531                    self.mb.nb.add_rel(cur_rels, 1)
1532                };
1533                let single_rel = if new_rels.0 == u32::MAX {
1534                    Some(u32::MAX)
1535                } else {
1536                    let lo = self.mb.nb.min_rel(new_rels);
1537                    let hi = self.mb.nb.max_rel(new_rels);
1538                    if lo == hi { Some(hi) } else { None }
1539                };
1540
1541                let la = {
1542                    let this = &mut *self;
1543                    this.mk_binary(
1544                        la_body_der,
1545                        la_tail_der,
1546                        &mut (|b, left, right| match single_rel {
1547                            Some(r) => b.mk_lookahead_internal(left, right, r),
1548                            None => b.mk_lookahead_nid(left, right, new_rels),
1549                        }),
1550                    )
1551                };
1552
1553                if rel != u32::MAX
1554                    && la_tail_der != TRegexId::MISSING
1555                    && self.is_nullable(la_tail, mask)
1556                {
1557                    let look_only = {
1558                        let this = &mut *self;
1559                        let right = TRegexId::MISSING;
1560                        this.mk_binary(
1561                            la_body_der,
1562                            right,
1563                            &mut (|b, left, right| match single_rel {
1564                                Some(r) => b.mk_lookahead_internal(left, right, r),
1565                                None => b.mk_lookahead_nid(left, right, new_rels),
1566                            }),
1567                        )
1568                    };
1569                    {
1570                        let this = &mut *self;
1571                        this.mk_binary(
1572                            look_only,
1573                            la,
1574                            &mut (|b, left, right| b.mk_union(left, right)),
1575                        )
1576                    }
1577                } else {
1578                    la
1579                }
1580            }
1581            Kind::Ordered => {
1582                let body = node_id.left(self);
1583                let chain = node_id.right(self);
1584                let packed = self.get_extra(node_id);
1585                let step = (packed & 0xFFFF) as u16;
1586                let best = (packed >> 16) as u16;
1587
1588                let mid_best = if self.is_nullable(body, mask) && step >= best {
1589                    step
1590                } else {
1591                    best
1592                };
1593
1594                let body_der = self.der(body, mask)?;
1595                let new_step = step.saturating_add(1);
1596                self.mk_unary(body_der, &mut |b, new_body| {
1597                    let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1598                        new_step
1599                    } else {
1600                        mid_best
1601                    };
1602                    let packed = (final_best as u32) << 16 | new_step as u32;
1603                    b.mk_ordered(new_body, chain, packed)
1604                })
1605            }
1606            Kind::Begin | Kind::End => TRegexId::BOT,
1607            Kind::Pred => {
1608                let psi = node_id.pred_tset(self);
1609                if psi == TSetId::EMPTY {
1610                    TRegexId::BOT
1611                } else {
1612                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1613                }
1614            }
1615        };
1616
1617        // println!("{} {}", node_id.0, self.pp(node_id));
1618        // println!("node: {} (total: {})", node_id.0, self.num_created);
1619
1620        self.cache_der(node_id, result, mask);
1621        Ok(result)
1622    }
1623
1624    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1625        debug_assert!(meta_id != MetadataId::MISSING);
1626        match self.metadata.get_mut(node_id.0 as usize) {
1627            Some(v) => *v = meta_id,
1628            None => {
1629                self.metadata
1630                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1631                self.metadata[node_id.0 as usize] = meta_id;
1632            }
1633        }
1634    }
1635
1636    fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1637        debug_assert!(reversed_id != NodeId::MISSING);
1638        match self.reversed.get_mut(node_id.0 as usize) {
1639            Some(v) => *v = reversed_id,
1640            None => {
1641                self.reversed
1642                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1643                self.reversed[node_id.0 as usize] = reversed_id;
1644            }
1645        }
1646        return reversed_id;
1647    }
1648
1649    fn init(&mut self, inst: NodeKey) -> NodeId {
1650        self.num_created += 1;
1651        let node_id = NodeId(self.num_created);
1652        self.index.insert(inst.clone(), node_id);
1653        match inst.kind {
1654            Kind::Pred => {
1655                let meta_id = self.mb.get_meta_id(Metadata {
1656                    flags: MetaFlags::ZERO,
1657                    nulls: NullsId::EMPTY,
1658                });
1659                self.init_metadata(node_id, meta_id);
1660            }
1661            Kind::Begin => {
1662                let meta_id = self.mb.get_meta_id(Metadata {
1663                    flags: MetaFlags::with_nullability(
1664                        Nullability::BEGIN,
1665                        MetaFlags::CONTAINS_ANCHORS,
1666                    ),
1667                    nulls: NullsId::BEGIN0,
1668                });
1669                self.init_metadata(node_id, meta_id);
1670            }
1671            Kind::End => {
1672                let meta_id = self.mb.get_meta_id(Metadata {
1673                    flags: MetaFlags::with_nullability(
1674                        Nullability::END,
1675                        MetaFlags::CONTAINS_ANCHORS,
1676                    ),
1677                    nulls: NullsId::END0,
1678                });
1679                self.init_metadata(node_id, meta_id);
1680            }
1681            Kind::Inter => {
1682                let m1 = self.get_node_meta_id(inst.left);
1683                let m2 = self.get_node_meta_id(inst.right);
1684                let meta_id = {
1685                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1686                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1687                    let nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1688                    let new_meta = Metadata {
1689                        flags: self.mb.flags_inter(m1, m2),
1690                        nulls,
1691                    };
1692                    self.mb.get_meta_id(new_meta)
1693                };
1694                self.init_metadata(node_id, meta_id);
1695            }
1696            Kind::Union => {
1697                let m1 = self.get_node_meta_id(inst.left);
1698                let m2 = self.get_node_meta_id(inst.right);
1699                let meta_id = {
1700                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1701                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1702                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1703                    let new_meta = Metadata {
1704                        flags: self.mb.flags_union(m1, m2),
1705                        nulls,
1706                    };
1707                    self.mb.get_meta_id(new_meta)
1708                };
1709                self.init_metadata(node_id, meta_id);
1710            }
1711            Kind::Concat => {
1712                let flags = self.mb.flags_concat(
1713                    self.get_node_meta_id(inst.left),
1714                    self.get_node_meta_id(inst.right),
1715                );
1716
1717                let right_nullability = inst.right.nullability(self);
1718                let left_nullability = inst.left.nullability(self);
1719                let nulls_left = self.get_nulls_id(inst.left);
1720                let nulls_right = self.get_nulls_id(inst.right);
1721                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1722                let mask = right_nullability.and(left_nullability);
1723                nulls = self.mb.nb.and_mask(nulls, mask);
1724
1725                let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1726                self.init_metadata(node_id, new_id);
1727            }
1728            Kind::Star => {
1729                let left_nulls = self.get_nulls_id(inst.left);
1730                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1731                let meta_id = self.mb.get_meta_id(Metadata {
1732                    flags: self
1733                        .mb
1734                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1735                    nulls,
1736                });
1737                self.init_metadata(node_id, meta_id);
1738            }
1739            Kind::Compl => {
1740                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1741                let meta_id = self.mb.get_meta_id(Metadata {
1742                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1743                    nulls,
1744                });
1745                self.init_metadata(node_id, meta_id);
1746            }
1747            Kind::Lookbehind => {
1748                let mut left_nullability = self.nullability(inst.left);
1749                let mut contains_flags = self.get_flags_contains(inst.left);
1750                let nulls = if inst.right.is_missing() {
1751                    self.get_nulls_id(inst.left)
1752                } else {
1753                    let right_nullability = self.nullability(inst.right);
1754                    let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1755                    let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1756                    left_nullability = left_nullability.and(right_nullability);
1757                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1758                    self.mb.nb.and_id(nulls_left, nulls_right)
1759                };
1760
1761                let meta_id = self.mb.get_meta_id(Metadata {
1762                    flags: MetaFlags::with_nullability(
1763                        left_nullability,
1764                        contains_flags.or(MetaFlags::CONTAINS_LOOKBEHIND),
1765                    ),
1766                    nulls,
1767                });
1768                self.init_metadata(node_id, meta_id);
1769            }
1770            Kind::Lookahead => {
1771                let mut nulls = if inst.extra == u32::MAX {
1772                    NullsId::EMPTY
1773                } else {
1774                    let rels = NullsId(inst.extra);
1775                    let body_nulls = self.get_nulls_id(inst.left);
1776                    let rel_vals: Vec<u32> =
1777                        self.mb.nb.get_set_ref(rels).iter().rev().map(|ns| ns.rel).collect();
1778                    let mut combined = NullsId::EMPTY;
1779                    for &rel in &rel_vals {
1780                        let part = self.mb.nb.add_rel(body_nulls, rel);
1781                        combined = self.mb.nb.or_id(combined, part);
1782                    }
1783                    combined
1784                };
1785                let left_nullability = inst.left.nullability(self);
1786                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1787
1788                nulls = self.mb.nb.or_id(nulls, nulls_right);
1789
1790                let la_inner = inst.left;
1791                let la_tail = inst.right;
1792                let null = self
1793                    .get_meta_flags(la_inner)
1794                    .nullability()
1795                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1796                let contains_flags = self
1797                    .get_flags_contains(la_inner)
1798                    .or(self.get_flags_contains(la_tail));
1799
1800                let meta_id = self.mb.get_meta_id(Metadata {
1801                    flags: MetaFlags::with_nullability(
1802                        null,
1803                        contains_flags.or(MetaFlags::CONTAINS_LOOKAHEAD),
1804                    ),
1805                    nulls,
1806                });
1807                self.init_metadata(node_id, meta_id);
1808            }
1809            Kind::Ordered => {
1810                let best = inst.extra >> 16;
1811                let (null, nulls) = if best > 0 {
1812                    let mut ns = BTreeSet::new();
1813                    ns.insert(NullState::new(Nullability::CENTER, best));
1814                    (Nullability::CENTER, self.mb.nb.get_id(ns))
1815                } else {
1816                    (Nullability::NEVER, NullsId::EMPTY)
1817                };
1818                let meta_id = self.mb.get_meta_id(Metadata {
1819                    flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1820                    nulls,
1821                });
1822                self.init_metadata(node_id, meta_id);
1823            }
1824        }
1825
1826        self.array.push(inst);
1827
1828        let nl = self.compute_node_len(node_id);
1829        debug_assert!(self.lengths.len() == node_id.0 as usize);
1830        self.lengths.push(nl);
1831
1832        if let Some(rw) = self.post_init_simplify(node_id) {
1833            self.override_as(node_id, rw)
1834        } else {
1835            node_id
1836        }
1837    }
1838
1839    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1840        match self.get_kind(node_id) {
1841            Kind::Concat => {
1842                let lhs = node_id.left(self);
1843                let rhs = node_id.right(self);
1844                if let Some(_) = lhs.is_pred_star(self) {
1845                    if let Some(opttail) = rhs.is_opt_v(self) {
1846                        let (_, max) = self.get_min_max_length(node_id);
1847                        if max < u32::MAX {
1848                            if let Some(true) = self.subsumes(lhs, opttail) {
1849                                return Some(lhs);
1850                            }
1851                        }
1852                    }
1853                }
1854            }
1855            Kind::Union => {
1856                let lhs = node_id.left(self);
1857                let rhs = node_id.right(self);
1858                let mut subsumed = false;
1859                rhs.iter_union_while(self, &mut |b, branch| {
1860                    if b.nullable_subsumes(branch, lhs) {
1861                        subsumed = true;
1862                    }
1863                    !subsumed
1864                });
1865                if subsumed {
1866                    return Some(rhs);
1867                }
1868                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1869                    return Some(rhs);
1870                }
1871            }
1872            _ => {}
1873        }
1874
1875        None
1876    }
1877
1878    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1879    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1880        if self.get_kind(lhs) != Kind::Union {
1881            return false; // single branch already checked by nullable_subsumes
1882        }
1883        let mut rhs_branches = Vec::new();
1884        let mut curr = rhs;
1885        while self.get_kind(curr) == Kind::Union {
1886            rhs_branches.push(self.get_left(curr));
1887            curr = self.get_right(curr);
1888        }
1889        rhs_branches.push(curr);
1890
1891        curr = lhs;
1892        while self.get_kind(curr) == Kind::Union {
1893            if !rhs_branches.contains(&self.get_left(curr)) {
1894                return false;
1895            }
1896            curr = self.get_right(curr);
1897        }
1898        rhs_branches.contains(&curr)
1899    }
1900
1901    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1902    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1903        if node == target {
1904            return true;
1905        }
1906        match self.get_kind(node) {
1907            Kind::Union => {
1908                self.nullable_subsumes(self.get_left(node), target)
1909                    || self.nullable_subsumes(self.get_right(node), target)
1910            }
1911            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1912                self.nullable_subsumes(self.get_right(node), target)
1913            }
1914            _ => false,
1915        }
1916    }
1917
1918    pub fn num_nodes(&self) -> u32 {
1919        self.num_created
1920    }
1921
1922    pub fn tree_size(&self, root: NodeId, limit: usize) -> usize {
1923        let mut seen: FxHashMap<NodeId, ()> = FxHashMap::default();
1924        let mut stack: Vec<NodeId> = vec![root];
1925        while let Some(n) = stack.pop() {
1926            if n == NodeId::MISSING
1927                || n == NodeId::BOT
1928                || n == NodeId::EPS
1929                || n == NodeId::TS
1930                || n == NodeId::BEGIN
1931                || n == NodeId::END
1932            {
1933                continue;
1934            }
1935            if seen.insert(n, ()).is_some() {
1936                continue;
1937            }
1938            if seen.len() >= limit {
1939                return limit;
1940            }
1941            stack.push(self.get_left(n));
1942            stack.push(self.get_right(n));
1943        }
1944        seen.len()
1945    }
1946    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1947        match self.index.get(&inst) {
1948            Some(&id) => id,
1949            None => self.init(inst),
1950        }
1951    }
1952    #[inline]
1953    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1954        self.index.get(inst)
1955    }
1956
1957    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1958        self.index.insert(key, subsumed);
1959        subsumed
1960    }
1961
1962    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1963        let key = &self.array[key.0 as usize];
1964        let entry = self.index.get_mut(key).unwrap();
1965        *entry = subsumed;
1966        subsumed
1967    }
1968
1969    #[inline]
1970    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1971        self.array[node_id.0 as usize].left
1972    }
1973
1974    #[inline]
1975    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1976        self.array[node_id.0 as usize].right
1977    }
1978
1979    #[inline]
1980    pub fn get_extra(&self, node_id: NodeId) -> u32 {
1981        self.array[node_id.0 as usize].extra
1982    }
1983
1984    #[inline]
1985    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1986        debug_assert!(node_id.is_concat(self));
1987        let mut curr = node_id;
1988        while curr.is_concat(self) {
1989            curr = curr.right(self);
1990        }
1991        curr
1992    }
1993
1994    fn has_trailing_la(&self, node: NodeId) -> bool {
1995        let end = match self.get_kind(node) {
1996            Kind::Concat => self.get_concat_end(node),
1997            Kind::Lookahead => node,
1998            _ => return false,
1999        };
2000        end.is_lookahead(self) && end.right(self).is_missing()
2001    }
2002
2003    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
2004        if node.is_lookahead(self) {
2005            return (NodeId::EPS, node);
2006        }
2007        debug_assert!(node.is_concat(self));
2008        let right = node.right(self);
2009        if !right.is_concat(self) {
2010            return (node.left(self), right);
2011        }
2012        let (stripped, la) = self.strip_trailing_la(right);
2013        (self.mk_concat(node.left(self), stripped), la)
2014    }
2015    #[inline]
2016    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
2017        debug_assert!(matches!(
2018            self.get_kind(lookahead_node_id),
2019            Kind::Lookahead | Kind::Ordered
2020        ));
2021        lookahead_node_id.left(self)
2022    }
2023    #[inline]
2024    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
2025        debug_assert!(lookahead_node_id.is_lookahead(self));
2026        lookahead_node_id.right(self)
2027    }
2028    #[inline]
2029    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
2030        debug_assert!(
2031            matches!(
2032                self.get_kind(lookahead_node_id),
2033                Kind::Lookahead | Kind::Ordered
2034            ),
2035            "not lookahead/counted: {:?}",
2036            self.pp(lookahead_node_id)
2037        );
2038        let extra = self.get_extra(lookahead_node_id);
2039        if extra == u32::MAX {
2040            u32::MAX
2041        } else {
2042            self.mb.nb.max_rel(NullsId(extra))
2043        }
2044    }
2045    #[inline]
2046    pub(crate) fn get_lookahead_rel_lo(&self, lookahead_node_id: NodeId) -> u32 {
2047        debug_assert!(lookahead_node_id.is_lookahead(self));
2048        let extra = self.get_extra(lookahead_node_id);
2049        if extra == u32::MAX {
2050            u32::MAX
2051        } else {
2052            self.mb.nb.min_rel(NullsId(extra))
2053        }
2054    }
2055    #[inline]
2056    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
2057        debug_assert!(lookbehind_node_id.is_lookbehind(self));
2058        lookbehind_node_id.left(self)
2059    }
2060    #[inline]
2061    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
2062        debug_assert!(lookbehind_node_id.is_lookbehind(self));
2063        lookbehind_node_id.right(self)
2064    }
2065
2066    #[inline]
2067    pub fn get_kind(&self, node_id: NodeId) -> Kind {
2068        debug_assert!(
2069            self.array.len() > node_id.0 as usize,
2070            "array len: {:?}",
2071            node_id
2072        );
2073        self.array[node_id.0 as usize].kind
2074    }
2075
2076    #[inline]
2077    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
2078        &self.array[node_id.0 as usize]
2079    }
2080
2081    #[inline]
2082    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
2083        self.metadata[node_id.0 as usize]
2084    }
2085
2086    #[inline]
2087    fn get_meta(&self, node_id: NodeId) -> &Metadata {
2088        debug_assert!(node_id.0 != u32::MAX);
2089        let meta_id = self.get_node_meta_id(node_id);
2090        debug_assert!(meta_id != MetadataId::MISSING);
2091        &self.mb.array[meta_id.0 as usize]
2092    }
2093
2094    #[inline]
2095    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
2096        if node_id == NodeId::MISSING {
2097            NullsId::EMPTY
2098        } else {
2099            self.get_meta(node_id).nulls
2100        }
2101    }
2102
2103    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
2104        self.mb
2105            .nb
2106            .array
2107            .iter()
2108            .map(|set| set.iter().cloned().collect())
2109            .collect()
2110    }
2111
2112    pub fn nulls_count(&self) -> usize {
2113        self.mb.nb.array.len()
2114    }
2115
2116    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
2117        self.mb.nb.array[id as usize].iter().cloned().collect()
2118    }
2119
2120    pub fn center_nulls_id(&mut self, nid: NullsId) -> NullsId {
2121        self.mb.nb.and_mask(nid, Nullability::CENTER)
2122    }
2123
2124    #[inline]
2125    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
2126        if node_id == NodeId::MISSING {
2127            NullsId::EMPTY
2128        } else {
2129            let nulls = self.get_meta(node_id).nulls;
2130            self.mb.nb.and_mask(nulls, mask)
2131        }
2132    }
2133
2134    #[inline]
2135    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
2136        let meta_id = self.get_node_meta_id(node_id);
2137        let meta = &self.mb.array[meta_id.0 as usize];
2138        meta.flags
2139    }
2140
2141    #[inline]
2142    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
2143        self.get_meta(node_id).flags.nullability()
2144    }
2145
2146    #[inline]
2147    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
2148        let meta_id = self.get_node_meta_id(node_id);
2149        let meta = &self.mb.array[meta_id.0 as usize];
2150        meta.flags.all_contains_flags()
2151    }
2152
2153    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2154        if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2155            return self.strip_lb(node_id.right(self));
2156        }
2157        let result = self.strip_lb_inner(true, node_id)?;
2158        if self.contains_lookbehind(result) {
2159            return Err(ResharpError::UnsupportedPattern);
2160        }
2161        Ok(result)
2162    }
2163
2164    fn strip_lb_inner(&mut self, allowed: bool, node_id: NodeId) -> Result<NodeId, ResharpError> {
2165        if !self.contains_lookbehind(node_id) {
2166            return Ok(node_id);
2167        }
2168        if !allowed {
2169            return Result::Err(ResharpError::UnsupportedPattern);
2170        }
2171        if node_id.is_inter(self) {
2172            let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2173            let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2174            return Ok(self.mk_inter(left, right));
2175        }
2176        if node_id.is_union(self) {
2177            let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2178            let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2179            return Ok(self.mk_union(left, right));
2180        }
2181        match self.get_kind(node_id) {
2182            Kind::Compl => Result::Err(ResharpError::UnsupportedPattern),
2183            Kind::Concat => {
2184                let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2185                let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2186                Ok(self.mk_concat(left, right))
2187            }
2188            Kind::Lookbehind => {
2189                let prev = self.get_lookbehind_prev(node_id);
2190                if prev != NodeId::MISSING {
2191                    self.strip_lb_inner(allowed, prev)
2192                } else {
2193                    Ok(NodeId::EPS)
2194                }
2195            }
2196            Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => Ok(node_id),
2197            _ => Ok(node_id),
2198        }
2199    }
2200
2201    // for prefix purposes we prune any \A leading paths
2202    pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
2203        if !self.contains_anchors(node_id) {
2204            return node_id;
2205        }
2206        match self.get_kind(node_id) {
2207            Kind::Begin => NodeId::BOT,
2208            Kind::Concat => {
2209                let left = self.nonbegins(node_id.left(self));
2210                if left == NodeId::BOT {
2211                    return NodeId::BOT;
2212                }
2213                self.mk_concat(left, node_id.right(self))
2214            }
2215            Kind::Union => {
2216                let left = self.nonbegins(node_id.left(self));
2217                let right = self.nonbegins(node_id.right(self));
2218                self.mk_union(left, right)
2219            }
2220            _ => node_id,
2221        }
2222    }
2223
2224    pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
2225        match self.get_kind(node_id) {
2226            Kind::Concat => {
2227                let head = node_id.left(self);
2228                match self.get_kind(head) {
2229                    _ if self.any_nonbegin_nullable(head) => {
2230                        self.strip_prefix_safe(node_id.right(self))
2231                    }
2232                    _ => node_id,
2233                }
2234            }
2235            _ => node_id,
2236        }
2237    }
2238    pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
2239        match self.get_kind(node_id) {
2240            Kind::Begin => NodeId::BOT,
2241            Kind::Concat => {
2242                let head = self.prune_begin(node_id.left(self));
2243                let tail = self.prune_begin(node_id.right(self));
2244                self.mk_concat(head, tail)
2245            }
2246            Kind::Lookbehind => {
2247                if !node_id.right(self).is_missing() {
2248                    return node_id;
2249                }
2250                let head = self.prune_begin(node_id.left(self));
2251                head
2252            }
2253            Kind::Union => {
2254                let left = self.prune_begin(node_id.left(self));
2255                let right = self.prune_begin(node_id.right(self));
2256                self.mk_union(left, right)
2257            }
2258            _ => node_id,
2259        }
2260    }
2261    pub fn prune_begin_eps(&mut self, node_id: NodeId) -> NodeId {
2262        match self.get_kind(node_id) {
2263            Kind::Begin => NodeId::EPS,
2264            Kind::Concat => {
2265                let head = self.prune_begin_eps(node_id.left(self));
2266                let tail = self.prune_begin_eps(node_id.right(self));
2267                self.mk_concat(head, tail)
2268            }
2269            Kind::Lookbehind => {
2270                if !node_id.right(self).is_missing() {
2271                    return node_id;
2272                }
2273                let head = self.prune_begin_eps(node_id.left(self));
2274                head
2275            }
2276            Kind::Union => {
2277                let left = self.prune_begin_eps(node_id.left(self));
2278                let right = self.prune_begin_eps(node_id.right(self));
2279                self.mk_union(left, right)
2280            }
2281            _ => node_id,
2282        }
2283    }
2284
2285    pub fn normalize_rev(&mut self, node_id: NodeId, depth: usize) -> Result<NodeId, ResharpError> {
2286        if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2287            return Ok(node_id);
2288        }
2289        let next_depth = depth + 1;
2290        let result = match self.get_kind(node_id) {
2291            Kind::Concat => {
2292                let left = self.normalize_rev(node_id.left(self), depth)?;
2293                let right = self.normalize_rev(node_id.right(self), next_depth)?;
2294                self.mk_concat(left, right)
2295            }
2296            Kind::Inter => {
2297                let left = self.normalize_rev(node_id.left(self), depth)?;
2298                let right = self.normalize_rev(node_id.right(self), depth)?;
2299                self.mk_inter(left, right)
2300            }
2301            Kind::Union => {
2302                let left = self.normalize_rev(node_id.left(self), depth)?;
2303                let right = self.normalize_rev(node_id.right(self), depth)?;
2304                self.mk_union(left, right)
2305            }
2306            Kind::Lookbehind if depth == 0 => {
2307                let left = self.normalize_rev(node_id.left(self), depth)?;
2308                let right = self.normalize_rev(node_id.right(self).missing_to_eps(), depth)?;
2309                let lbody_ts = self.mk_concat(NodeId::TS, left);
2310                let ltail_ts = self.mk_concat(NodeId::TS, right);
2311                self.mk_inter(lbody_ts, ltail_ts)
2312            }
2313            Kind::Lookbehind => {
2314                let left = self.normalize_rev(node_id.left(self), depth)?;
2315                let right = self.normalize_rev(node_id.right(self), depth)?;
2316                self.mk_lookbehind(left, right)
2317            }
2318            Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2319                return Err(ResharpError::UnsupportedPattern)
2320            }
2321            _ => node_id,
2322        };
2323        Ok(result)
2324    }
2325
2326    pub fn collect_der_targets(
2327        &mut self,
2328        der: TRegexId,
2329        path_set: TSetId,
2330        out: &mut Vec<(NodeId, TSetId)>,
2331    ) {
2332        match *self.get_tregex(der) {
2333            TRegex::Leaf(target) => {
2334                if let Some(entry) = out.iter_mut().find(|(t, _)| *t == target) {
2335                    entry.1 = self.solver().or_id(entry.1, path_set);
2336                } else {
2337                    out.push((target, path_set));
2338                }
2339            }
2340            TRegex::ITE(cond, then_b, else_b) => {
2341                let then_path = self.solver().and_id(path_set, cond);
2342                self.collect_der_targets(then_b, then_path, out);
2343                let not_cond = self.solver().not_id(cond);
2344                let else_path = self.solver().and_id(path_set, not_cond);
2345                self.collect_der_targets(else_b, else_path, out);
2346            }
2347        }
2348    }
2349
2350    pub fn prefix_ts(&mut self, node_id: NodeId) -> NodeId {
2351        let with_ts = if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2352            node_id
2353        } else {
2354            self.mk_concat(NodeId::TS, node_id)
2355        };
2356        with_ts
2357    }
2358
2359    pub fn ts_rev_start(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2360        let rev = self.reverse(node_id)?;
2361        let rev = self.normalize_rev(rev, 0)?;
2362        let with_ts = self.prefix_ts(rev);
2363        Ok(self.simplify_rev_initial(with_ts))
2364    }
2365
2366    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2367        debug_assert!(node_id != NodeId::MISSING);
2368        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
2369            if *rev != NodeId::MISSING {
2370                return Ok(*rev);
2371            }
2372        }
2373        let rw = match self.get_kind(node_id) {
2374            Kind::End => NodeId::BEGIN,
2375            Kind::Begin => NodeId::END,
2376            Kind::Pred => node_id,
2377            Kind::Concat => {
2378                let new_right = self.reverse(node_id.left(self))?;
2379                let new_left = self.reverse(node_id.right(self))?;
2380                if new_right.is_union(self) {
2381                    let union = new_right;
2382                    if union.left(self).contains_lookbehind(self)
2383                        || union.right(self).contains_lookbehind(self)
2384                    {
2385                        if new_left.is_ts() {
2386                            self.mk_concat(new_left, new_right)
2387                        } else {
2388                            #[cfg(feature = "debug")]
2389                            {
2390                                eprintln!(
2391                                    "left: {:?}, right: {:?}",
2392                                    self.pp(new_left),
2393                                    self.pp(union)
2394                                );
2395                            }
2396                            return Err(ResharpError::UnsupportedPattern);
2397                        }
2398                    } else {
2399                        self.mk_concat(new_left, new_right)
2400                    }
2401                } else if node_id.right(self).is_union(self) {
2402                    // R . ((?<=x)|y) :  a headache for correctness
2403                    // TODO: inspect in the future how and when we can support this
2404                    let union = node_id.right(self);
2405                    if union.left(self).contains_lookbehind(self)
2406                        || union.right(self).contains_lookbehind(self)
2407                    {
2408                        if new_left.is_ts() {
2409                            self.mk_concat(new_left, new_right)
2410                        } else {
2411                            return Err(ResharpError::UnsupportedPattern);
2412                        }
2413                    } else {
2414                        self.mk_concat(new_left, new_right)
2415                    }
2416                } else {
2417                    self.mk_concat(new_left, new_right)
2418                }
2419            }
2420            Kind::Union => {
2421                let left = self.reverse(node_id.left(self))?;
2422                let right = self.reverse(node_id.right(self))?;
2423                self.mk_union(left, right)
2424            }
2425            Kind::Inter => {
2426                let left = self.reverse(node_id.left(self))?;
2427                let right = self.reverse(node_id.right(self))?;
2428                self.mk_inter(left, right)
2429            }
2430            Kind::Star => {
2431                let body = self.reverse(node_id.left(self))?;
2432                self.mk_star(body)
2433            }
2434            Kind::Compl => {
2435                if self.contains_look(node_id.left(self)) {
2436                    return Err(ResharpError::UnsupportedPattern);
2437                }
2438                let body = self.reverse(node_id.left(self))?;
2439                self.mk_compl(body)
2440            }
2441            Kind::Lookbehind => {
2442                let prev = self.get_lookbehind_prev(node_id);
2443                let inner_id = self.get_lookbehind_inner(node_id);
2444                let rev_inner = self.reverse(inner_id)?;
2445                let rev_prev = if prev != NodeId::MISSING {
2446                    self.reverse(prev)?
2447                } else {
2448                    NodeId::MISSING
2449                };
2450                self.mk_lookahead(rev_inner, rev_prev, 0)
2451            }
2452            Kind::Lookahead => {
2453                let rel = self.get_lookahead_rel(node_id);
2454                // no nullability, we can write as intersection
2455                if rel == u32::MAX {
2456                    let lbody = self.get_lookahead_inner(node_id);
2457                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2458                    let rbody = self.reverse(lbody);
2459                    let rtail = self.reverse(ltail);
2460                    let rev = self.mk_lookbehind(rbody?, rtail?);
2461                    return Ok(self.cache_reversed(node_id, rev));
2462                }
2463                if rel != 0 {
2464                    return Err(ResharpError::UnsupportedPattern);
2465                }
2466                let tail_node = self.get_lookahead_tail(node_id);
2467                let rev_tail = if tail_node != NodeId::MISSING {
2468                    self.reverse(tail_node)?
2469                } else {
2470                    NodeId::MISSING
2471                };
2472                let inner_id = self.get_lookahead_inner(node_id);
2473                let rev_inner = self.reverse(inner_id)?;
2474                self.mk_lookbehind(rev_inner, rev_tail)
2475            }
2476            Kind::Ordered => {
2477                return Err(ResharpError::UnsupportedPattern);
2478            }
2479        };
2480        // eprintln!("rev: {:?} --> {:?}", self.pp(node_id), self.pp(rw));
2481        self.cache_reversed(node_id, rw);
2482        Ok(rw)
2483    }
2484
2485    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2486        let node = NodeKey {
2487            kind: Kind::Pred,
2488            left: NodeId::MISSING,
2489            right: NodeId::MISSING,
2490            extra: pred.0,
2491        };
2492        self.get_node_id(node)
2493    }
2494
2495    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2496        let key = NodeKey {
2497            kind: Kind::Compl,
2498            left: body,
2499            right: NodeId::MISSING,
2500            extra: u32::MAX,
2501        };
2502        if let Some(id) = self.key_is_created(&key) {
2503            return *id;
2504        }
2505        if body == NodeId::BOT {
2506            return NodeId::TS;
2507        }
2508        if body == NodeId::TS {
2509            return NodeId::BOT;
2510        }
2511
2512        if let Some(contains_body) = body.is_contains(self) {
2513            if contains_body.is_pred(self) {
2514                let pred = contains_body.pred_tset(self);
2515                let notpred = self.mk_pred_not(pred);
2516                let node = self.mk_star(notpred);
2517                return self.init_as(key, node);
2518            } else if contains_body == NodeId::END {
2519                return self.init_as(key, NodeId::BOT);
2520            }
2521        };
2522
2523        if self.get_kind(body) == Kind::Compl {
2524            return self.get_node(body).left;
2525        }
2526
2527        self.get_node_id(key)
2528    }
2529
2530    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2531        let nid = self.get_nulls_id(body);
2532        let nref = self.mb.nb.get_set_ref(nid).clone();
2533        let mut futures = NodeId::BOT;
2534        for n in nref.iter() {
2535            if !n.is_mask_nullable(mask) {
2536                continue;
2537            }
2538
2539            let eff = if n.rel == 0 {
2540                NodeId::EPS
2541            } else {
2542                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2543            };
2544            futures = self.mk_union(futures, eff)
2545        }
2546        futures
2547    }
2548
2549    /// used to determine len of lookahead
2550    fn strip_ts_max_len(&self, node: NodeId) -> Option<u32> {
2551        let mut cur = node;
2552        let mut total: u32 = 0;
2553        loop {
2554            if !cur.is_concat(self) {
2555                return None;
2556            }
2557            let r = cur.right(self);
2558            let (_, lmax) = self.get_min_max_length(cur.left(self));
2559            if lmax == u32::MAX {
2560                return None;
2561            }
2562            total = total.saturating_add(lmax);
2563            if r == NodeId::TS {
2564                return Some(total);
2565            }
2566            cur = r;
2567        }
2568    }
2569
2570    fn peel_head_pred(&self, node: NodeId) -> Option<(TSetId, NodeId)> {
2571        if node.is_pred(self) {
2572            Some((node.pred_tset(self), NodeId::EPS))
2573        } else if node.is_concat(self) && node.left(self).is_pred(self) {
2574            Some((node.left(self).pred_tset(self), node.right(self)))
2575        } else {
2576            None
2577        }
2578    }
2579
2580    /// Drop the `\z` alternative from `Union(\z, X)` (possibly with tail). `\z` is always on the left.
2581    fn strip_end_from_la_head(&mut self, node: NodeId) -> NodeId {
2582        let (head, rest) = if node.is_concat(self) {
2583            (node.left(self), node.right(self))
2584        } else {
2585            (node, NodeId::EPS)
2586        };
2587        if !head.is_kind(self, Kind::Union) {
2588            return node;
2589        }
2590        let l = head.left(self);
2591        if !l.is_end() {
2592            return node;
2593        }
2594        let r = head.right(self);
2595        self.mk_concat(r, rest)
2596    }
2597
2598    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2599        if cfg!(feature = "norewrite") {
2600            return None;
2601        }
2602        if tail.is_lookbehind(self) {
2603            let tail_prev = self.get_lookbehind_prev(tail);
2604            // \z(?<!,) into (?<!,)\z, not much use of it
2605            if head.is_end() && (tail_prev.is_missing()) {
2606                return Some(self.mk_concat(tail, head));
2607            }
2608            if head.is_lookahead(self)
2609                && self.get_lookahead_rel(head) == 0
2610                && self.get_lookahead_tail(head).is_missing()
2611                && (tail_prev == NodeId::MISSING || tail_prev == NodeId::EPS)
2612            {
2613                return Some(self.mk_concat(tail, head));
2614            }
2615            // (?<=a)(?<=b)x into (?<=(_*a&_*b))x
2616            if head.is_lookbehind(self) {
2617                let head_prev = self.get_lookbehind_prev(head);
2618                if (head_prev.is_missing()) && (tail_prev.is_missing()) {
2619                    let head_inner = self.get_lookbehind_inner(head).missing_to_eps();
2620                    let tail_inner = self.get_lookbehind_inner(tail).missing_to_eps();
2621                    let merged = self.mk_inter(head_inner, tail_inner);
2622                    return self.mk_lookbehind_internal(merged, NodeId::MISSING).ok();
2623                }
2624            }
2625            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2626            return self
2627                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2628                .ok();
2629        }
2630        if head.is_lookahead(self) {
2631            let la_tail = self.get_lookahead_tail(head);
2632            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2633            let la_body = self.get_lookahead_inner(head);
2634            // `(?=pL·L_rest)·pB·B_rest` to `(pL∩pB)·(?=L_rest)·B_rest`.
2635            let la_body = if new_la_tail.is_never_nullable(self) {
2636                self.strip_end_from_la_head(la_body)
2637            } else {
2638                la_body
2639            };
2640            if la_body == NodeId::BOT {
2641                return Some(NodeId::BOT);
2642            }
2643            if let (Some((p_l, body_rest)), Some((p_b, tail_rest))) = (
2644                self.peel_head_pred(la_body),
2645                self.peel_head_pred(new_la_tail),
2646            ) {
2647                let p = self.solver().and_id(p_l, p_b);
2648                let merged = self.mk_pred(p);
2649                if merged == NodeId::BOT {
2650                    return Some(NodeId::BOT);
2651                }
2652                let new_la = if body_rest == NodeId::EPS {
2653                    NodeId::EPS
2654                } else {
2655                    self.mk_lookahead(body_rest, NodeId::MISSING, 0)
2656                };
2657                let after = self.mk_concat(new_la, tail_rest);
2658                return Some(self.mk_concat(merged, after));
2659            }
2660
2661            if la_body.is_concat(self)
2662                && la_body.right(self).is_end()
2663                && la_body.left(self).is_compl(self)
2664            {
2665                let not_p_ts = la_body.left(self); // ~(P·_*)
2666                if let Some(p_max) = self.strip_ts_max_len(not_p_ts.left(self)) {
2667                    let (tail_min, _) = self.get_min_max_length(new_la_tail);
2668                    if p_max <= tail_min {
2669                        return Some(self.mk_inter(not_p_ts, new_la_tail));
2670                    }
2671                }
2672            }
2673
2674            if new_la_tail.is_center_nullable(self) {
2675                let la_new = self.mk_lookahead_internal(la_body, new_la_tail, u32::MAX);
2676                return Some(la_new);
2677            }
2678            let la_rel = self.get_lookahead_rel(head);
2679            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2680                let tail_rel = self.get_lookahead_rel(new_la_tail);
2681                tail_rel.saturating_add(la_rel)
2682            } else {
2683                u32::MAX
2684            };
2685
2686            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2687        }
2688
2689        if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2690            return Some(head);
2691        }
2692
2693        if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2694            return Some(NodeId::TS);
2695        }
2696
2697        if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2698            return Some(NodeId::TS);
2699        }
2700
2701        if head.is_inter(self) && tail == NodeId::TS {
2702            let mut alltopstar = true;
2703            head.iter_inter(
2704                self,
2705                &mut (|b, v| {
2706                    alltopstar = b.ends_with_ts(v);
2707                }),
2708            );
2709
2710            if alltopstar {
2711                return Some(head);
2712            }
2713        }
2714
2715        if head.is_star(self) && head == tail {
2716            return Some(head);
2717        }
2718
2719        None
2720    }
2721
2722    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2723        #[cfg(feature = "reentrant-assert")]
2724        if !self.rw_active.insert((Kind::Union, left, right)) {
2725            #[cfg(debug_assertions)]
2726            panic!(
2727                "reentrant union rewrite {:?} | {:?}, this is a bug, please file an issue with the pattern",
2728                self.pp(left),
2729                self.pp(right)
2730            );
2731            #[cfg(not(debug_assertions))]
2732            return None;
2733        }
2734        let r = self.attempt_rw_union_2_inner(left, right);
2735        #[cfg(feature = "reentrant-assert")]
2736        self.rw_active.remove(&(Kind::Union, left, right));
2737        r
2738    }
2739
2740    fn attempt_rw_union_2_inner(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2741        use Kind::*;
2742
2743        if cfg!(feature = "norewrite") {
2744            return None;
2745        }
2746        if left == right {
2747            return Some(left);
2748        }
2749
2750        if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2751            return Some(left);
2752        }
2753        if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2754            return Some(right);
2755        }
2756
2757        if left.is_pred(self) && right.is_pred(self) {
2758            let l = left.pred_tset(self);
2759            let r = right.pred_tset(self);
2760            let solver = self.solver();
2761            let psi = solver.or_id(l, r);
2762            let rewrite = self.mk_pred(psi);
2763            return Some(rewrite);
2764        }
2765
2766        if left == NodeId::EPS && self.nullability(right) == Nullability::ALWAYS {
2767            let right_nulls_id = self.get_nulls_id(right);
2768            if self.mb.nb.array[right_nulls_id.0 as usize]
2769                .iter()
2770                .any(|n| n.rel == 0 && n.mask.has(Nullability::ALWAYS))
2771            {
2772                return Some(right);
2773            }
2774        }
2775
2776        if left.is_lookahead(self) && right.is_lookahead(self) {
2777            let lb = left.left(self);
2778            let lt = left.right(self);
2779            let lrel_extra = left.extra(self);
2780
2781            let rb = right.left(self);
2782            let rt = right.right(self);
2783            let rrel_extra = right.extra(self);
2784
2785            if lrel_extra == rrel_extra && lt.is_missing() && rt.is_missing() {
2786                let unioned = self.mk_union(lb, rb);
2787                let node = if lrel_extra == u32::MAX {
2788                    self.mk_lookahead_internal(unioned, NodeId::MISSING, u32::MAX)
2789                } else {
2790                    let rels = NullsId(lrel_extra);
2791                    let lo = self.mb.nb.min_rel(rels);
2792                    let hi = self.mb.nb.max_rel(rels);
2793                    if lo == hi {
2794                        self.mk_lookahead_internal(unioned, NodeId::MISSING, hi)
2795                    } else {
2796                        self.mk_lookahead_nid(unioned, NodeId::MISSING, rels)
2797                    }
2798                };
2799                return Some(node);
2800            }
2801        }
2802
2803        if right.is_kind(self, Concat) {
2804            if left == NodeId::END
2805                && right.left(self) == NodeId::END
2806                && self.nullability(right).has(Nullability::END)
2807            {
2808                return Some(right);
2809            }
2810            // .*|.*a.* => .*(a.*|)
2811            if left == right.left(self) {
2812                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2813                let rw = self.mk_concat(left, rhs);
2814                return Some(rw);
2815            }
2816            if left.is_kind(self, Concat) {
2817                let head1 = left.left(self);
2818                let head2 = right.left(self);
2819
2820                if head1 == head2 {
2821                    let t1 = left.right(self);
2822                    let t2 = right.right(self);
2823                    // opportunistic rewrites
2824                    if head1 == NodeId::TS {
2825                        if t2.has_concat_tail(self, t1) {
2826                            return Some(left);
2827                        }
2828                        if t1.has_concat_tail(self, t2) {
2829                            return Some(right);
2830                        }
2831                    }
2832                    let un = self.mk_union(t1, t2);
2833                    return Some(self.mk_concat(left.left(self), un));
2834                }
2835
2836                // (\A)X | (?<=R)X => (?<=R|\A)X
2837                {
2838                    let t1 = left.right(self);
2839                    let t2 = right.right(self);
2840                    if t1 == t2 {
2841                        let begin_lb = (head1 == NodeId::BEGIN
2842                            && head2.is_kind(self, Kind::Lookbehind))
2843                            || (head2 == NodeId::BEGIN && head1.is_kind(self, Kind::Lookbehind));
2844                        if begin_lb {
2845                            let un = self.mk_union(head1, head2);
2846                            return Some(self.mk_concat(un, t1));
2847                        }
2848                    }
2849                }
2850
2851                // xa|ya => (x|y)a - suffix factoring via reverse
2852                // TODO: valid and looks prettier but .reverse is not good for builder perf,
2853                // leaving out unless i find a case where it helps significantly
2854                if false {
2855                    let end1 = self.get_concat_end(left);
2856                    let end2 = self.get_concat_end(right);
2857                    if end1 == end2 {
2858                        let contains_lookarounds =
2859                            left.contains_lookaround(self) || right.contains_lookaround(self);
2860                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2861                        if !contains_lookarounds && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2862                            let rev1 = self.reverse(left).unwrap();
2863                            let rev2 = self.reverse(right).unwrap();
2864
2865                            let union_rev = self.mk_union(rev1, rev2);
2866                            return Some(self.reverse(union_rev).unwrap());
2867                        }
2868                    }
2869                }
2870            }
2871            if left.is_pred(self) && left == right.left(self) {
2872                let un = self.mk_opt(right.right(self));
2873                let conc = self.mk_concat(left, un);
2874                return Some(conc);
2875            }
2876        }
2877
2878        if left == NodeId::EPS && right.is_plus(self) {
2879            let result = self.mk_star(right.left(self));
2880            return Some(result);
2881        }
2882
2883        // (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2884        if left.is_inter(self) && right.is_inter(self) {
2885            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2886                return Some(rw);
2887            }
2888        }
2889
2890        None
2891    }
2892
2893    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2894        let mut curr = node;
2895        while curr.is_inter(self) {
2896            out.push(self.get_left(curr));
2897            curr = self.get_right(curr);
2898        }
2899        out.push(curr);
2900    }
2901
2902    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2903        let mut curr = node;
2904        let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2905        if has_prefix {
2906            curr = self.get_right(curr);
2907        }
2908        let mut count = 0u32;
2909        let mut pred_set = None;
2910        while curr.is_concat(self) {
2911            let head = self.get_left(curr);
2912            if !head.is_pred(self) {
2913                return None;
2914            }
2915            let ps = head.pred_tset(self);
2916            match pred_set {
2917                None => pred_set = Some(ps),
2918                Some(existing) => {
2919                    if existing != ps {
2920                        return None;
2921                    }
2922                }
2923            }
2924            curr = self.get_right(curr);
2925            count += 1;
2926        }
2927        if count == 0 || !curr.is_star(self) {
2928            return None;
2929        }
2930        Some((has_prefix, pred_set.unwrap(), curr, count))
2931    }
2932
2933    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2934        let mut j = 0;
2935        for &s in sub {
2936            while j < sup.len() && sup[j] < s {
2937                j += 1;
2938            }
2939            if j >= sup.len() || sup[j] != s {
2940                return false;
2941            }
2942            j += 1;
2943        }
2944        true
2945    }
2946
2947    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2948        if !left.is_inter(self) || !right.is_inter(self) {
2949            return None;
2950        }
2951
2952        let mut lc: Vec<NodeId> = Vec::new();
2953        let mut rc: Vec<NodeId> = Vec::new();
2954        self.collect_inter_components(left, &mut lc);
2955        self.collect_inter_components(right, &mut rc);
2956
2957        // component subset check: fewer constraints = larger language
2958        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2959            return Some(left);
2960        }
2961        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2962        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2963            return Some(right);
2964        }
2965
2966        if lc.len() == rc.len() {
2967            let mut diff_idx = None;
2968            for i in 0..lc.len() {
2969                if lc[i] != rc[i] {
2970                    if diff_idx.is_some() {
2971                        return None;
2972                    }
2973                    diff_idx = Some(i);
2974                }
2975            }
2976            if let Some(idx) = diff_idx {
2977                let a = lc[idx];
2978                let b = rc[idx];
2979                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2980                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2981                {
2982                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2983                        return if ca < cb { Some(left) } else { Some(right) };
2984                    }
2985                }
2986            }
2987        }
2988
2989        None
2990    }
2991
2992    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2993        #[cfg(feature = "reentrant-assert")]
2994        if !self.rw_active.insert((Kind::Inter, left, right)) {
2995            #[cfg(debug_assertions)]
2996            panic!(
2997                "reentrant inter rewrite {:?} & {:?}, this is a bug, please file an issue with the pattern",
2998                self.pp(left),
2999                self.pp(right)
3000            );
3001            #[cfg(not(debug_assertions))]
3002            return None;
3003        }
3004        let r = self.attempt_rw_inter_2_inner(left, right);
3005        #[cfg(feature = "reentrant-assert")]
3006        self.rw_active.remove(&(Kind::Inter, left, right));
3007        r
3008    }
3009
3010    fn attempt_rw_inter_2_inner(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
3011        if cfg!(feature = "norewrite") {
3012            return None;
3013        }
3014        if left == right {
3015            return Some(left);
3016        }
3017        if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
3018            return Some(right);
3019        }
3020        if right.is_union(self) && right.any_union_component(self, |v| v == left) {
3021            return Some(left);
3022        }
3023
3024        {
3025            let (l_min, l_max) = self.get_min_max_length(left);
3026            let (r_min, r_max) = self.get_min_max_length(right);
3027            if l_max < r_min || r_max < l_min {
3028                return Some(NodeId::BOT);
3029            }
3030        }
3031
3032        if left.is_pred(self) && right.is_pred(self) {
3033            let l = left.pred_tset(self);
3034            let r = right.pred_tset(self);
3035            let solver = self.solver();
3036            let psi = solver.and_id(l, r);
3037            let rewrite = self.mk_pred(psi);
3038            return Some(rewrite);
3039        }
3040
3041        for (a, b) in [(left, right), (right, left)] {
3042            if a.is_pred(self) && b.is_compl(self) {
3043                let cbody = b.left(self);
3044                if cbody.is_concat(self)
3045                    && cbody.right(self) == NodeId::TS
3046                    && cbody.left(self).is_pred(self)
3047                {
3048                    let q = a.pred_tset(self);
3049                    let p = cbody.left(self).pred_tset(self);
3050                    let solver = self.solver();
3051                    let notp = solver.not_id(p);
3052                    let anded = solver.and_id(q, notp);
3053                    return Some(self.mk_pred(anded));
3054                }
3055            }
3056        }
3057
3058        if left.is_concat(self) && right.is_concat(self) {
3059            if left.left(self) == right.left(self) {
3060                if left.left(self).is_pred(self) {
3061                    let new_right = self.mk_inter(left.right(self), right.right(self));
3062                    return Some(self.mk_concat(left.left(self), new_right));
3063                }
3064            }
3065        }
3066
3067        if right.is_compl(self) && right.left(self) == left {
3068            return Some(NodeId::BOT);
3069        }
3070
3071        if left.is_compl(self) && right.is_compl(self) {
3072            let bodies = self.mk_union(left.left(self), right.left(self));
3073            return Some(self.mk_compl(bodies));
3074        }
3075
3076        if left == NodeId::TOPPLUS {
3077            if right.is_pred_star(self).is_some() {
3078                let newloop = self.mk_plus(right.left(self));
3079                return Some(newloop);
3080            }
3081            if right.is_never_nullable(self) {
3082                return Some(right);
3083            }
3084            if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
3085                return Some(NodeId::BOT);
3086            }
3087            if right.is_kind(self, Kind::Concat) {}
3088        }
3089
3090        {
3091            let l_is_la = left.is_lookahead(self);
3092            let r_is_la = right.is_lookahead(self);
3093            let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
3094            let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
3095            if l_is_la || r_is_la || l_is_cla || r_is_cla {
3096                let (la_node, other, concat_body) = if r_is_la {
3097                    (right, left, NodeId::MISSING)
3098                } else if l_is_la {
3099                    (left, right, NodeId::MISSING)
3100                } else if r_is_cla {
3101                    (right.left(self), left, right.right(self))
3102                } else {
3103                    (left.left(self), right, left.right(self))
3104                };
3105                let la_body = la_node.left(self);
3106                let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
3107                let inter_right = if concat_body.is_missing() {
3108                    la_tail
3109                } else {
3110                    self.mk_concat(la_tail, concat_body)
3111                };
3112                let new_body = self.mk_inter(other, inter_right);
3113                let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
3114                return Some(self.mk_concat(la, new_body));
3115            }
3116        }
3117
3118        if self.get_kind(right) == Kind::Compl {
3119            let compl_body = right.left(self);
3120            if left == compl_body {
3121                return Some(NodeId::BOT);
3122            }
3123            if compl_body.is_concat(self) {
3124                let compl_head = compl_body.left(self);
3125                if compl_body.right(self) == NodeId::TS && compl_head == left {
3126                    return Some(NodeId::BOT);
3127                }
3128            }
3129        }
3130
3131        if let Some(pleft) = left.is_pred_star(self) {
3132            if let Some(pright) = right.is_pred_star(self) {
3133                let merged = self.mk_inter(pleft, pright);
3134                return Some(self.mk_star(merged));
3135            }
3136        }
3137
3138        for (star, lb) in [(left, right), (right, left)] {
3139            if star.is_star(self) && lb.is_lookbehind(self) {
3140                return Some(lb);
3141            }
3142        }
3143
3144        for (star, pred) in [(left, right), (right, left)] {
3145            if let Some(star_pred) = star.is_pred_star(self) {
3146                if pred.is_pred(self) {
3147                    let sp = star_pred.pred_tset(self);
3148                    let op = pred.pred_tset(self);
3149                    if self.solver().and_id(sp, op) == TSetId::EMPTY {
3150                        return Some(NodeId::BOT);
3151                    }
3152                }
3153            }
3154        }
3155
3156        {
3157            let l_union_lb = self.union_has_leading_lb(left);
3158            let r_union_lb = self.union_has_leading_lb(right);
3159            if l_union_lb || r_union_lb {
3160                let (u, other) = if l_union_lb {
3161                    (left, right)
3162                } else {
3163                    (right, left)
3164                };
3165                let mut acc = NodeId::BOT;
3166                u.iter_union(self, &mut |b, v| {
3167                    let inter = b.mk_inter(v, other);
3168                    acc = b.mk_union(inter, acc);
3169                });
3170                return Some(acc);
3171            }
3172        }
3173
3174        {
3175            let l_is_clb = left.is_concat(self)
3176                && left.left(self).is_lookbehind(self)
3177                && self.lookbehind_prev_missing(left.left(self));
3178            let r_is_clb = right.is_concat(self)
3179                && right.left(self).is_lookbehind(self)
3180                && self.lookbehind_prev_missing(right.left(self));
3181            if l_is_clb || r_is_clb {
3182                let (lb, body) = if l_is_clb && r_is_clb {
3183                    let lb1 = left.left(self);
3184                    let lb2 = right.left(self);
3185                    let inner = self.mk_inter(
3186                        self.get_lookbehind_inner(lb1),
3187                        self.get_lookbehind_inner(lb2),
3188                    );
3189                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
3190                    let body = self.mk_inter(left.right(self), right.right(self));
3191                    (lb, body)
3192                } else if l_is_clb {
3193                    let lb = left.left(self);
3194                    let body = self.mk_inter(left.right(self), right);
3195                    (lb, body)
3196                } else {
3197                    let lb = right.left(self);
3198                    let body = self.mk_inter(left, right.right(self));
3199                    (lb, body)
3200                };
3201                return Some(self.mk_concat(lb, body));
3202            }
3203        }
3204
3205        {
3206            let l_has_la = self.has_trailing_la(left);
3207            let r_has_la = self.has_trailing_la(right);
3208            if l_has_la || r_has_la {
3209                let (body, la) = if l_has_la && r_has_la {
3210                    let (lbody, l_la) = self.strip_trailing_la(left);
3211                    let (rbody, r_la) = self.strip_trailing_la(right);
3212                    let inner = self.mk_inter(
3213                        self.get_lookahead_inner(l_la),
3214                        self.get_lookahead_inner(r_la),
3215                    );
3216                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
3217                    let body = self.mk_inter(lbody, rbody);
3218                    (body, la)
3219                } else if l_has_la {
3220                    let (lbody, la) = self.strip_trailing_la(left);
3221                    let body = self.mk_inter(lbody, right);
3222                    (body, la)
3223                } else {
3224                    let (rbody, la) = self.strip_trailing_la(right);
3225                    let body = self.mk_inter(left, rbody);
3226                    (body, la)
3227                };
3228                return Some(self.mk_concat(body, la));
3229            }
3230        }
3231
3232        None
3233    }
3234
3235    fn lookbehind_prev_missing(&self, lb: NodeId) -> bool {
3236        matches!(self.get_lookbehind_prev(lb), NodeId::MISSING)
3237    }
3238
3239    fn union_has_leading_lb(&self, node: NodeId) -> bool {
3240        if !node.is_union(self) {
3241            return false;
3242        }
3243        node.any_union_component(self, |v| {
3244            v.is_lookbehind(self) || (v.is_concat(self) && v.left(self).is_lookbehind(self))
3245        })
3246    }
3247
3248    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
3249        if cfg!(feature = "norewrite") {
3250            return None;
3251        }
3252        debug_assert!(self.get_kind(right_union) == Kind::Union);
3253
3254        let mut rewritten = None;
3255        right_union.iter_union_while(
3256            self,
3257            &mut (|b, v| {
3258                if let Some(rw) = b.attempt_rw_union_2(left, v) {
3259                    rewritten = Some((v, rw));
3260                    false
3261                } else {
3262                    true
3263                }
3264            }),
3265        );
3266
3267        if let Some(rw) = rewritten {
3268            let mut new_union = NodeId::BOT;
3269            right_union.iter_union(
3270                self,
3271                &mut (|b, v| {
3272                    if v == rw.0 {
3273                        new_union = b.mk_union(rw.1, new_union)
3274                    } else {
3275                        new_union = b.mk_union(v, new_union)
3276                    }
3277                }),
3278            );
3279            return Some(new_union);
3280        };
3281
3282        None
3283    }
3284
3285    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
3286        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
3287        debug_assert!(tail != NodeId::MISSING);
3288        let key = NodeKey {
3289            kind: Kind::Concat,
3290            left: head,
3291            right: tail,
3292            extra: u32::MAX,
3293        };
3294        if let Some(id) = self.key_is_created(&key) {
3295            return *id;
3296        }
3297
3298        if head == NodeId::BOT || tail == NodeId::BOT {
3299            return NodeId::BOT;
3300        }
3301        if head == NodeId::EPS {
3302            return tail;
3303        }
3304        if tail == NodeId::EPS {
3305            return head;
3306        }
3307
3308        // normalize concats to right
3309        if head.is_kind(self, Kind::Concat) {
3310            let left = head.left(self);
3311            let newright = self.mk_concat(head.right(self), tail);
3312            let updated = self.mk_concat(left, newright);
3313            return self.init_as(key, updated);
3314        }
3315
3316        // if tail.is_union(self)
3317        //     && self.contains_lookbehind(tail)
3318        //     && self.union_has_absorbing_lb(tail)
3319        // {
3320        //     let l = self.mk_concat(head, tail.left(self));
3321        //     let r = self.mk_concat(head, tail.right(self));
3322        //     let u = self.mk_union(l, r);
3323        //     return self.init_as(key, u);
3324        // }
3325
3326        if head.is_union(self)
3327            && head.contains_lookahead(self)
3328            && self.nullability(tail) == Nullability::NEVER
3329            && !self.contains_look(tail)
3330        {
3331            let (hl, hr) = (head.left(self), head.right(self));
3332            
3333            if (hl.is_eps()
3334                && hr.is_lookahead(self)
3335                && self.get_lookahead_tail(hr).is_missing())
3336                || (hr.is_eps()
3337                    && hl.is_lookahead(self)
3338                    && self.get_lookahead_tail(hl).is_missing())
3339            {
3340                return self.init_as(key, tail);
3341            }
3342            let l = self.mk_concat(hl, tail);
3343            let r = self.mk_concat(hr, tail);
3344            let u = self.mk_union(l, r);
3345            return self.init_as(key, u);
3346        }
3347
3348        if self.get_kind(head) == Kind::End
3349            && !tail.is_lookbehind(self)
3350            && !self.is_nullable(tail, Nullability::EMPTYSTRING)
3351        {
3352            return NodeId::BOT;
3353        }
3354
3355        if tail.is_concat(self) && !head.is_lookahead(self) {
3356            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
3357                let upd = self.mk_concat(rw, tail.right(self));
3358                return self.init_as(key, upd);
3359            }
3360        }
3361
3362        if let Some(rw) = self.attempt_rw_concat_2(head, tail) {
3363            return self.init_as(key, rw);
3364        }
3365
3366        match (self.get_kind(head), self.get_kind(tail)) {
3367            // merge stars
3368            (Kind::Star, Kind::Concat) if head.is_star(self) => {
3369                let rl = tail.left(self);
3370                if head == rl {
3371                    return self.init_as(key, tail);
3372                }
3373            }
3374            // attempt longer concat rw
3375            (_, Kind::Concat) => {
3376                let curr = head;
3377                let h2 = self.mk_concat(curr, tail.left(self));
3378                let tr = tail.right(self);
3379                if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
3380                    return self.init_as(key, new);
3381                }
3382            }
3383            _ if head == NodeId::TS && self.starts_with_ts(tail) => {
3384                return self.init_as(key, tail);
3385            }
3386            _ => {}
3387        }
3388
3389        self.init(key)
3390    }
3391
3392    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
3393        // LNF: lookbehind must start with ts
3394        let lb_body = {
3395            match self.starts_with_ts(lb_body) {
3396                true => lb_body,
3397                false => self.mk_concat(NodeId::TS, lb_body),
3398            }
3399        };
3400        // lb_body starts with TS after normalization above, so EPS case cannot trigger
3401        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
3402    }
3403
3404    fn mk_lookbehind_internal(
3405        &mut self,
3406        lb_body: NodeId,
3407        lb_prev: NodeId,
3408    ) -> Result<NodeId, ResharpError> {
3409        debug_assert!(lb_body != NodeId::MISSING);
3410        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
3411        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
3412            return Ok(NodeId::BOT);
3413        }
3414        if lb_body == NodeId::TS {
3415            return Ok(if lb_prev == NodeId::MISSING {
3416                NodeId::EPS
3417            } else {
3418                lb_prev
3419            });
3420        }
3421        if lb_body == NodeId::EPS {
3422            match lb_prev {
3423                NodeId::MISSING => return Ok(NodeId::EPS),
3424                NodeId::EPS => return Ok(NodeId::EPS),
3425                _ => return Ok(lb_prev),
3426            }
3427        }
3428
3429        let key = NodeKey {
3430            kind: Kind::Lookbehind,
3431            left: lb_body,
3432            right: lb_prev,
3433            extra: u32::MAX,
3434        };
3435        match self.key_is_created(&key) {
3436            Some(id) => Ok(*id),
3437            None => {
3438                if lb_prev == NodeId::TS {
3439                    return Ok(self.mk_concat(lb_prev, lb_body));
3440                }
3441
3442                Ok(self.init(key))
3443            }
3444        }
3445    }
3446
3447    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3448        let la_body = if la_tail.is_missing() && rel == 0 {
3449            self.flatten_la_body(la_body)
3450        } else {
3451            la_body
3452        };
3453        // LNF: lookahead must end with ts
3454        let la_body = {
3455            match self.ends_with_ts(la_body) {
3456                true => la_body,
3457                false => self.mk_concat(la_body, NodeId::TS),
3458            }
3459        };
3460        let rel = if NodeId::MISSING == la_tail {
3461            rel
3462        } else {
3463            match la_tail.is_center_nullable(self) {
3464                false => u32::MAX,
3465                true => rel,
3466            }
3467        };
3468
3469        self.mk_lookahead_internal(la_body, la_tail, rel)
3470    }
3471
3472    fn flatten_la_body(&mut self, node: NodeId) -> NodeId {
3473        if !self
3474            .get_meta_flags(node)
3475            .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
3476        {
3477            return node;
3478        }
3479        match self.get_kind(node) {
3480            Kind::Lookahead
3481                if self.get_lookahead_tail(node).is_missing()
3482                    && self.get_lookahead_rel(node) == 0 =>
3483            {
3484                let inner = self.get_lookahead_inner(node);
3485                let inner = self.strip_trailing_ts(inner);
3486                self.flatten_la_body(inner)
3487            }
3488            Kind::Union => {
3489                let l = self.flatten_la_body(node.left(self));
3490                let r = self.flatten_la_body(node.right(self));
3491                self.mk_union(l, r)
3492            }
3493            _ => node,
3494        }
3495    }
3496
3497    fn strip_trailing_ts(&self, node: NodeId) -> NodeId {
3498        if node.is_concat(self) && node.right(self) == NodeId::TS {
3499            node.left(self)
3500        } else {
3501            node
3502        }
3503    }
3504
3505    // rel max = carries no nullability, can potentially rw to intersection
3506    pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3507        let extra = if rel == u32::MAX {
3508            u32::MAX
3509        } else {
3510            self.mb.nb.add_rel(NullsId::ALWAYS0, rel).0
3511        };
3512        let key = NodeKey {
3513            kind: Kind::Lookahead,
3514            left: la_body,
3515            right: la_tail,
3516            extra,
3517        };
3518        if let Some(id) = self.key_is_created(&key) {
3519            return *id;
3520        }
3521        if la_body == NodeId::TS {
3522            if rel == 0 {
3523                return la_tail.missing_to_eps();
3524            } else {
3525                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
3526            }
3527        }
3528        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
3529            return NodeId::BOT;
3530        }
3531        if la_tail.is_missing() && rel == u32::MAX {
3532            return NodeId::BOT;
3533        }
3534
3535        if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
3536            return la_body;
3537        }
3538
3539        if la_tail == NodeId::TS && !self.ends_with_end(la_body) {
3540            if rel == 0 || rel == u32::MAX {
3541                return self.mk_concat(la_body, NodeId::TS);
3542            } else if rel == u32::MAX {
3543                return self.mk_begins_with(la_body);
3544            }
3545        }
3546
3547        if rel == u32::MAX {
3548            if la_tail.is_missing() {
3549                return NodeId::BOT;
3550            }
3551
3552            if self.is_always_nullable(la_body) {
3553                return la_tail.missing_to_eps();
3554            }
3555
3556            if la_tail != NodeId::MISSING {
3557                match self.get_kind(la_tail) {
3558                    _ => {
3559                        if la_body.is_compl_plus_end(self) {
3560                            let minlen = self.get_min_length_only(la_tail);
3561                            if minlen >= 1 {
3562                                return NodeId::BOT;
3563                            }
3564                        }
3565                    }
3566                }
3567            }
3568        }
3569
3570        if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
3571            let la_body2 = self.get_lookahead_inner(la_tail);
3572            let body1_ts = self.mk_concat(la_body, NodeId::TS);
3573            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
3574            let new_la_body = self.mk_inter(body1_ts, body2_ts);
3575            let new_la_rel = self.get_lookahead_rel(la_tail);
3576            let new_la_tail = self.get_lookahead_tail(la_tail);
3577            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
3578        }
3579
3580        if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
3581            let la_body_right = la_body.right(self);
3582            if self.is_always_nullable(la_body_right) {
3583                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
3584            }
3585            let bodyright = la_body.right(self);
3586            if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
3587                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
3588                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
3589            }
3590        }
3591
3592        if la_tail != NodeId::MISSING {
3593            if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
3594                let lpred = la_body.left(self);
3595                if lpred.is_pred(self) {
3596                    let l = lpred.pred_tset(self);
3597                    let r = la_tail.pred_tset(self);
3598                    let psi_and = self.solver().and_id(l, r);
3599                    let rewrite = self.mk_pred(psi_and);
3600                    let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
3601                    let new_right =
3602                        self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
3603                    return self.mk_concat(rewrite, new_right);
3604                }
3605            }
3606        }
3607
3608        self.get_node_id(key)
3609    }
3610
3611    pub fn mk_lookahead_range(&mut self, la_body: NodeId, la_tail: NodeId, lo: u32, hi: u32) -> NodeId {
3612        if hi == u32::MAX {
3613            return self.mk_lookahead_internal(la_body, la_tail, u32::MAX);
3614        }
3615        debug_assert!(lo <= hi);
3616        let mut rels = self.mb.nb.add_rel(NullsId::ALWAYS0, lo);
3617        for k in lo + 1..=hi {
3618            let part = self.mb.nb.add_rel(NullsId::ALWAYS0, k);
3619            rels = self.mb.nb.or_id(rels, part);
3620        }
3621        self.mk_lookahead_nid(la_body, la_tail, rels)
3622    }
3623
3624    pub fn mk_lookahead_nid(&mut self, la_body: NodeId, la_tail: NodeId, rels: NullsId) -> NodeId {
3625        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
3626            return NodeId::BOT;
3627        }
3628        let key = NodeKey {
3629            kind: Kind::Lookahead,
3630            left: la_body,
3631            right: la_tail,
3632            extra: rels.0,
3633        };
3634        if let Some(id) = self.key_is_created(&key) {
3635            return *id;
3636        }
3637        self.get_node_id(key)
3638    }
3639
3640    pub fn mk_ordered(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
3641        let has_match = (packed >> 16) > 0;
3642        if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
3643            return NodeId::BOT;
3644        }
3645        debug_assert!(
3646            body == NodeId::BOT || !self.is_infinite(body),
3647            "Counted body must have finite max length"
3648        );
3649        let chain = self.prune_counted_chain(body, chain);
3650        let key = NodeKey {
3651            kind: Kind::Ordered,
3652            left: body,
3653            right: chain,
3654            extra: packed,
3655        };
3656        if let Some(id) = self.key_is_created(&key) {
3657            return *id;
3658        }
3659        self.get_node_id(key)
3660    }
3661
3662    fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
3663        if chain == NodeId::MISSING || body == NodeId::BOT {
3664            return chain;
3665        }
3666        if self.nullability(body) != Nullability::NEVER {
3667            return NodeId::MISSING;
3668        }
3669        let chain_body = chain.left(self);
3670        if chain_body == NodeId::BOT {
3671            return chain;
3672        }
3673        let not_begins = self.mk_not_begins_with(body);
3674        let inter = self.mk_inter(chain_body, not_begins);
3675        let is_empty = inter == NodeId::BOT;
3676        if is_empty {
3677            self.prune_counted_chain(body, chain.right(self))
3678        } else {
3679            chain
3680        }
3681    }
3682
3683    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
3684        let (_, p_max) = self.get_min_max_length(body);
3685        if p_max == 0 && !self.contains_anchors(body) {
3686            let not_body = self.mk_compl(body);
3687            return self.mk_inter(NodeId::EPS, not_body);
3688        }
3689        let neg_inner = self.mk_concat(body, NodeId::TS);
3690        let neg_part = self.mk_compl(neg_inner);
3691        let conc = self.mk_concat(neg_part, NodeId::END);
3692        self.mk_lookahead(conc, NodeId::MISSING, rel)
3693    }
3694
3695    pub fn neg_lookbehind_supported(&mut self, body: NodeId) -> bool {
3696        if body.is_center_nullable(self) {
3697            return true;
3698        }
3699        if self.get_node(body).kind == Kind::Pred {
3700            return true;
3701        }
3702        let uc = crate::unicode_classes::utf8_char(self);
3703        let allowed = self.mk_union(NodeId::BEGIN, uc);
3704        self.subsumes(allowed, body).unwrap_or(false)
3705    }
3706
3707    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
3708        if body.is_center_nullable(self) {
3709            return NodeId::BOT;
3710        }
3711        match self.get_node(body).kind {
3712            // Kind::Pred => {
3713            //     let psi = body.pred_tset(self);
3714            //     let negated = self.mk_pred_not(psi);
3715            //     let union = self.mk_union(NodeId::BEGIN, negated);
3716            //     self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3717            // }
3718            _ => {
3719                let body_ts = self.mk_concat(NodeId::TS, body);
3720                let compl = self.mk_compl(body_ts);
3721                let not_begins = self.mk_concat(NodeId::BEGIN, compl);
3722                self.mk_lookbehind_internal(not_begins, NodeId::MISSING)
3723                    .unwrap()
3724            }
3725        }
3726    }
3727
3728    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
3729        debug_assert!(left != NodeId::MISSING);
3730        debug_assert!(right != NodeId::MISSING);
3731        if left > right {
3732            return self.mk_union(right, left);
3733        }
3734        let key = NodeKey {
3735            kind: Kind::Union,
3736            left,
3737            right,
3738            extra: u32::MAX,
3739        };
3740        if let Some(id) = self.key_is_created(&key) {
3741            return *id;
3742        }
3743        // println!("{:?}",(self.pp(left),self.pp(right)));
3744        if left == right {
3745            return self.init_as(key, left);
3746        }
3747        if left == NodeId::BOT {
3748            return self.init_as(key, right);
3749        }
3750        if right == NodeId::BOT {
3751            return self.init_as(key, left);
3752        }
3753        if right == NodeId::TS {
3754            return self.init_as(key, right);
3755        }
3756        if left == NodeId::TS {
3757            return self.init_as(key, left);
3758        }
3759        if left == NodeId::EPS && right == NodeId::END {
3760            return self.init_as(key, NodeId::EPS);
3761        }
3762        // \A | (?<=R) ==>  (?<=\A|R)
3763        if left == NodeId::BEGIN && self.get_kind(right) == Kind::Lookbehind {
3764            let lb_body = self.get_lookbehind_inner(right);
3765            let lb_prev = self.get_lookbehind_prev(right);
3766            let new_body = self.mk_union(NodeId::BEGIN, lb_body);
3767            let rewritten = self.mk_lookbehind(new_body, lb_prev);
3768            return self.init_as(key, rewritten);
3769        }
3770        match (self.get_kind(left), self.get_kind(right)) {
3771            (Kind::Union, _) => {
3772                self.iter_unions_b(left, &mut |b, v| {
3773                    b.temp_vec.push(v);
3774                });
3775                self.iter_unions_b(right, &mut |b, v| {
3776                    b.temp_vec.push(v);
3777                });
3778                self.temp_vec.sort();
3779                let tree = self.temp_vec.clone();
3780                self.temp_vec.clear();
3781                let newnode = tree
3782                    .iter()
3783                    .rev()
3784                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3785                return self.init_as(key, newnode);
3786            }
3787            (_, Kind::Union) => {
3788                let rleft = right.left(self);
3789                // if left_node id is smaller than rleft, just create a new union
3790                if left > rleft {
3791                    self.iter_unions_b(left, &mut |b, v| {
3792                        b.temp_vec.push(v);
3793                    });
3794                    self.iter_unions_b(right, &mut |b, v| {
3795                        b.temp_vec.push(v);
3796                    });
3797                    self.temp_vec.sort();
3798                    let tree = self.temp_vec.clone();
3799                    self.temp_vec.clear();
3800                    let newnode = tree
3801                        .iter()
3802                        .rev()
3803                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3804                    return self.init_as(key, newnode);
3805                } else {
3806                    if let Some(rw) = self.attempt_rw_unions(left, right) {
3807                        return self.init_as(key, rw);
3808                    }
3809                }
3810            }
3811            _ => {}
3812        }
3813        if let Some(rw) = self.attempt_rw_union_2(left, right) {
3814            return self.init_as(key, rw);
3815        }
3816        self.init(key)
3817    }
3818
3819    pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3820        debug_assert!(left_id != NodeId::MISSING);
3821        debug_assert!(right_id != NodeId::MISSING);
3822        if left_id == right_id {
3823            return left_id;
3824        }
3825        if left_id == NodeId::BOT || right_id == NodeId::BOT {
3826            return NodeId::BOT;
3827        }
3828        if left_id == NodeId::TS {
3829            return right_id;
3830        }
3831        if right_id == NodeId::TS {
3832            return left_id;
3833        }
3834        if left_id > right_id {
3835            return self.mk_inter(right_id, left_id);
3836        }
3837        let key = NodeKey {
3838            kind: Kind::Inter,
3839            left: left_id,
3840            right: right_id,
3841            extra: u32::MAX,
3842        };
3843        if let Some(id) = self.key_is_created(&key) {
3844            return *id;
3845        }
3846
3847        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3848            return self.init_as(key, rw);
3849        }
3850
3851        self.init(key)
3852    }
3853
3854    fn eps_inter_look(&self, left_id: NodeId, right_id: NodeId) -> Option<NodeId> {
3855        let other = if left_id.is_eps() {
3856            right_id
3857        } else if right_id.is_eps() {
3858            left_id
3859        } else {
3860            return None;
3861        };
3862        if other.is_lookbehind(self) {
3863            return Some(other);
3864        }
3865        if other.is_lookahead(self) && self.get_lookahead_rel(other) == 0 {
3866            return Some(other);
3867        }
3868        None
3869    }
3870
3871    fn mk_unset(&mut self, kind: Kind) -> NodeId {
3872        let node = NodeKey {
3873            kind,
3874            left: NodeId::MISSING,
3875            right: NodeId::MISSING,
3876            extra: u32::MAX,
3877        };
3878        self.init(node)
3879    }
3880
3881    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3882        let star = self.mk_star(body_id);
3883        self.mk_concat(body_id, star)
3884    }
3885    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3886        let opt = self.mk_opt(body_id);
3887        let mut nodes1 = vec![];
3888        for _ in lower..upper {
3889            nodes1.push(opt);
3890        }
3891        for _ in 0..lower {
3892            nodes1.push(body_id);
3893        }
3894        self.mk_concats(nodes1.into_iter())
3895    }
3896    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3897        self.mk_union(NodeId::EPS, body_id)
3898    }
3899
3900    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3901        let key = NodeKey {
3902            kind: Kind::Star,
3903            left: body_id,
3904            right: NodeId::MISSING,
3905            extra: 0,
3906        };
3907        if let Some(id) = self.key_is_created(&key) {
3908            return *id;
3909        }
3910        if body_id.is_begin() || body_id.is_end() {
3911            return NodeId::EPS;
3912        }
3913        // _*{500} is still _*
3914        if body_id.is_star(self) {
3915            return body_id;
3916        }
3917        // (P+)* = P*
3918        if body_id.is_plus(self) {
3919            return body_id.right(self);
3920        }
3921        self.get_node_id(key)
3922    }
3923
3924    /// it's cheaper to check this once as an edge-case
3925    /// than to compute a 4th nullability bit for every node
3926    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3927        self.lengths[node_id.0 as usize].es_null
3928    }
3929
3930    #[inline(always)]
3931    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3932        self.get_meta(node_id)
3933            .flags
3934            .nullability()
3935            .has(Nullability::CENTER.or(Nullability::END))
3936    }
3937
3938    pub fn nullability(&self, node_id: NodeId) -> Nullability {
3939        self.get_only_nullability(node_id)
3940    }
3941
3942    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3943        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3944    }
3945
3946    pub fn pp(&self, node_id: NodeId) -> String {
3947        let mut s = String::new();
3948        self.ppw(&mut s, node_id).unwrap();
3949        s
3950    }
3951
3952    /// xa|ya => (x|y)a suffix factoring
3953    /// used only for pretty-printing in to_pattern
3954    pub fn factor_suffixes(&mut self, node: NodeId) -> Result<NodeId, ResharpError> {
3955        let mut memo = FxHashMap::default();
3956        self.factor_suffixes_rec(node, &mut memo)
3957    }
3958
3959    fn factor_suffixes_rec(
3960        &mut self,
3961        node: NodeId,
3962        memo: &mut FxHashMap<NodeId, NodeId>,
3963    ) -> Result<NodeId, ResharpError> {
3964        if let Some(&v) = memo.get(&node) {
3965            return Ok(v);
3966        }
3967        let result = match self.get_kind(node) {
3968            Kind::Pred | Kind::Begin | Kind::End => node,
3969            Kind::Star => {
3970                let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3971                self.mk_star(inner)
3972            }
3973            Kind::Compl => {
3974                let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3975                self.mk_compl(inner)
3976            }
3977            Kind::Concat => {
3978                let l = self.factor_suffixes_rec(node.left(self), memo)?;
3979                let r = self.factor_suffixes_rec(node.right(self), memo)?;
3980                self.mk_concat(l, r)
3981            }
3982            Kind::Inter => {
3983                let l = self.factor_suffixes_rec(node.left(self), memo)?;
3984                let r = self.factor_suffixes_rec(node.right(self), memo)?;
3985                self.mk_inter(l, r)
3986            }
3987            Kind::Union => {
3988                let mut comps: Vec<NodeId> = Vec::new();
3989                node.iter_union(self, &mut |_, c| comps.push(c));
3990                let mut factored: Vec<NodeId> = Vec::with_capacity(comps.len());
3991                let mut reversible = true;
3992                for c in &comps {
3993                    let f = self.factor_suffixes_rec(*c, memo)?;
3994                    if c.contains_lookaround(self)
3995                        || c.flags_contains(self).has(MetaFlags::CONTAINS_ANCHORS)
3996                    {
3997                        reversible = false;
3998                    }
3999                    factored.push(f);
4000                }
4001                if !reversible || factored.len() < 2 {
4002                    let mut acc = factored[0];
4003                    for c in factored.into_iter().skip(1) {
4004                        acc = self.mk_union(acc, c);
4005                    }
4006                    acc
4007                } else {
4008                    let mut rev_comps: Vec<NodeId> = Vec::with_capacity(factored.len());
4009                    for c in factored {
4010                        rev_comps.push(self.reverse(c)?);
4011                    }
4012                    let mut acc = rev_comps[0];
4013                    for c in rev_comps.into_iter().skip(1) {
4014                        acc = self.mk_union(acc, c);
4015                    }
4016                    self.reverse(acc)?
4017                }
4018            }
4019            Kind::Lookahead => {
4020                let inner = self.get_lookahead_inner(node);
4021                let tail = self.get_lookahead_tail(node);
4022                let rel = self.get_lookahead_rel(node);
4023                let ni = self.factor_suffixes_rec(inner, memo)?;
4024                let nt = self.factor_suffixes_rec(tail, memo)?;
4025                self.mk_lookahead(ni, nt, rel)
4026            }
4027            Kind::Lookbehind => {
4028                let inner = self.get_lookbehind_inner(node);
4029                let prev = self.get_lookbehind_prev(node);
4030                let ni = self.factor_suffixes_rec(inner, memo)?;
4031                let np = if prev != NodeId::MISSING {
4032                    self.factor_suffixes_rec(prev, memo)?
4033                } else {
4034                    prev
4035                };
4036                self.mk_lookbehind(ni, np)
4037            }
4038            Kind::Ordered => node,
4039        };
4040        memo.insert(node, result);
4041        Ok(result)
4042    }
4043
4044    #[allow(dead_code)]
4045    pub fn pp_nulls(&self, node_id: NodeId) -> String {
4046        let nu = self.get_nulls_id(node_id);
4047        let nr = self.mb.nb.get_set_ref(nu);
4048        let s1 = format!("{:?}", nr);
4049        s1
4050    }
4051
4052    #[allow(dead_code)]
4053    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
4054        match self.get_tregex(term_id) {
4055            TRegex::Leaf(node_id) => {
4056                let mut s = String::new();
4057                self.ppw(&mut s, *node_id).unwrap();
4058                s
4059            }
4060            TRegex::ITE(cond, then_id, else_id) => {
4061                format!(
4062                    "ITE({},{},{})",
4063                    self.solver_ref().pp(*cond),
4064                    self.ppt(*then_id),
4065                    self.ppt(*else_id)
4066                )
4067            }
4068        }
4069    }
4070
4071    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
4072        if cfg!(feature = "graphviz") {
4073            match node_id {
4074                NodeId::MISSING => return write!(s, "MISSING"),
4075                NodeId::BOT => return write!(s, "⊥"),
4076                NodeId::TS => return write!(s, "_*"),
4077                NodeId::TOP => return write!(s, "_"),
4078                NodeId::EPS => return write!(s, "ε"),
4079                _ => {}
4080            }
4081        }
4082
4083        match node_id {
4084            NodeId::MISSING => return write!(s, "MISSING"),
4085            NodeId::BOT => return write!(s, "⊥"),
4086            NodeId::TS => return write!(s, "_*"),
4087            NodeId::TOP => return write!(s, "_"),
4088            NodeId::EPS => return write!(s, ""),
4089            _ => {}
4090        }
4091
4092        match self.get_kind(node_id) {
4093            Kind::End => write!(s, r"\z"),
4094            Kind::Begin => write!(s, r"\A"),
4095            Kind::Pred => {
4096                let psi = node_id.pred_tset(self);
4097                if psi == TSetId::EMPTY {
4098                    write!(s, r"⊥")
4099                } else if psi == TSetId::FULL {
4100                    write!(s, r"_")
4101                } else {
4102                    write!(s, "{}", self.solver_ref().pp(psi))
4103                }
4104            }
4105            Kind::Inter => {
4106                write!(s, "(")?;
4107                self.ppw(s, node_id.left(self))?;
4108                write!(s, "&")?;
4109                let mut curr = node_id.right(self);
4110                while curr.is_inter(self) {
4111                    let n = curr.left(self);
4112                    self.ppw(s, n)?;
4113                    write!(s, "&")?;
4114                    curr = curr.right(self);
4115                }
4116                self.ppw(s, curr)?;
4117                write!(s, ")")
4118            }
4119            Kind::Union => {
4120                let left = node_id.left(self);
4121                let right = node_id.right(self);
4122                write!(s, "(")?;
4123                self.ppw(s, left)?;
4124                write!(s, "|")?;
4125                let mut curr = right;
4126                while self.get_kind(curr) == Kind::Union {
4127                    let n = curr.left(self);
4128                    self.ppw(s, n)?;
4129                    write!(s, "|")?;
4130                    curr = curr.right(self);
4131                }
4132                self.ppw(s, curr)?;
4133                write!(s, ")")
4134            }
4135            Kind::Concat => {
4136                let left = node_id.left(self);
4137                let right = node_id.right(self);
4138                if right.is_star(self) && right.left(self) == left {
4139                    self.ppw(s, left)?;
4140                    write!(s, "+")?;
4141                    return Ok(());
4142                }
4143                if right.is_concat(self) {
4144                    let rl = right.left(self);
4145                    if rl.is_star(self) && rl.left(self) == left {
4146                        self.ppw(s, left)?;
4147                        write!(s, "+")?;
4148                        return self.ppw(s, right.right(self));
4149                    }
4150                }
4151                if right.is_concat(self) && right.left(self) == left {
4152                    let mut num = 1;
4153                    let mut right = right;
4154                    while right.is_concat(self) && right.left(self) == left {
4155                        num += 1;
4156                        right = right.right(self);
4157                    }
4158                    // (|X){n} followed by X{m} -> X{m,m+n}
4159                    if let Some(inner) = left.is_opt_v(self) {
4160                        let mut inner_count = 0;
4161                        let mut right2 = right;
4162                        while right2.is_concat(self) && right2.left(self) == inner {
4163                            inner_count += 1;
4164                            right2 = right2.right(self);
4165                        }
4166                        if right2 == inner {
4167                            inner_count += 1;
4168                            self.ppw(s, inner)?;
4169                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
4170                        }
4171                        if inner_count > 0 {
4172                            self.ppw(s, inner)?;
4173                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
4174                            return self.ppw(s, right2);
4175                        }
4176                    }
4177                    self.ppw(s, left)?;
4178                    if right == left {
4179                        num += 1;
4180                        return write!(s, "{{{}}}", num);
4181                    }
4182                    if num <= 3 && left.is_pred(self) {
4183                        for _ in 1..num {
4184                            self.ppw(s, left)?;
4185                        }
4186                        return self.ppw(s, right);
4187                    } else {
4188                        write!(s, "{{{}}}", num)?;
4189                        return self.ppw(s, right);
4190                    }
4191                }
4192                self.ppw(s, left)?;
4193                self.ppw(s, right)
4194            }
4195            Kind::Star => {
4196                let left = node_id.left(self);
4197                let leftkind = self.get_kind(left);
4198                match leftkind {
4199                    Kind::Concat | Kind::Star | Kind::Compl => {
4200                        write!(s, "(")?;
4201                        self.ppw(s, left)?;
4202                        write!(s, ")")?;
4203                    }
4204                    _ => {
4205                        self.ppw(s, left)?;
4206                    }
4207                };
4208                write!(s, "*")
4209            }
4210            Kind::Compl => {
4211                write!(s, "~(")?;
4212                self.ppw(s, node_id.left(self))?;
4213                write!(s, ")")
4214            }
4215            Kind::Lookbehind => {
4216                let lbleft = self.get_lookbehind_prev(node_id);
4217                let lbinner = self.get_lookbehind_inner(node_id);
4218                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
4219                if lbleft != NodeId::MISSING {
4220                    write!(s, "❮")?;
4221                    self.ppw(s, lbleft)?;
4222                    write!(s, "❯")?;
4223                }
4224
4225                write!(s, "(?<=")?;
4226                self.ppw(s, lbinner)?;
4227                write!(s, ")")
4228            }
4229            Kind::Lookahead => {
4230                let inner = self.get_lookahead_inner(node_id);
4231                write!(s, "(?=")?;
4232                self.ppw(s, inner)?;
4233                write!(s, ")")?;
4234                let extra = self.get_extra(node_id);
4235                if extra == u32::MAX {
4236                    write!(s, "{{∅}}")?;
4237                } else {
4238                    let rels = NullsId(extra);
4239                    let hi = self.mb.nb.max_rel(rels);
4240                    let lo = self.mb.nb.min_rel(rels);
4241                    if hi != 0 {
4242                        write!(s, "{{")?;
4243                        if lo == hi {
4244                            write!(s, "{hi}")?;
4245                        } else {
4246                            write!(s, "{lo}..{hi}")?;
4247                        }
4248                        write!(s, "}}")?;
4249                    }
4250                }
4251                if node_id.right(self) == NodeId::MISSING {
4252                    Ok(())
4253                } else {
4254                    write!(s, "❮")?;
4255                    self.ppw(s, node_id.right(self))?;
4256                    write!(s, "❯")
4257                }
4258            }
4259            Kind::Ordered => {
4260                let body = node_id.left(self);
4261                let packed = self.get_extra(node_id);
4262                let step = packed & 0xFFFF;
4263                let best = packed >> 16;
4264                write!(s, "#(")?;
4265                self.ppw(s, body)?;
4266                write!(s, ")s{}b{}", step, best)
4267            }
4268        }
4269    }
4270
4271    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
4272        self.mk_concat(node, NodeId::TS)
4273    }
4274
4275    pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
4276        let node_ts = self.mk_concat(node, NodeId::TS);
4277        self.mk_compl(node_ts)
4278    }
4279
4280    pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
4281        let notset = self.solver().not_id(set);
4282        self.mk_pred(notset)
4283    }
4284
4285    pub fn mk_u8(&mut self, char: u8) -> NodeId {
4286        let set_id = self.solver().u8_to_set_id(char);
4287        self.mk_pred(set_id)
4288    }
4289
4290    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
4291        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
4292        self.mk_pred(rangeset)
4293    }
4294
4295    pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
4296        let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
4297        for &(lo, hi) in &ranges[1..] {
4298            let r = self.mk_range_u8(lo, hi);
4299            node = self.mk_union(node, r);
4300        }
4301        node
4302    }
4303
4304    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
4305        let mut prefix = Vec::new();
4306        let mut curr = node;
4307        loop {
4308            if curr == NodeId::EPS {
4309                let full = !prefix.is_empty();
4310                return (prefix, full);
4311            }
4312            if curr == NodeId::BOT {
4313                break;
4314            }
4315            if curr.is_pred(self) {
4316                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
4317                    Some(byte) => {
4318                        prefix.push(byte);
4319                        return (prefix, true);
4320                    }
4321                    None => break, // multi-byte pred: pattern not fully consumed
4322                }
4323            }
4324            if !curr.is_concat(self) {
4325                break;
4326            }
4327            let left = curr.left(self);
4328            if !left.is_pred(self) {
4329                break;
4330            }
4331            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
4332                Some(byte) => prefix.push(byte),
4333                None => break,
4334            }
4335            curr = curr.right(self);
4336        }
4337        (prefix, false)
4338    }
4339
4340    #[allow(dead_code)]
4341    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
4342        let mut result = NodeId::EPS;
4343        for byte in raw_str.iter().rev() {
4344            let node = self.mk_u8(*byte);
4345            result = self.mk_concat(node, result);
4346        }
4347        result
4348    }
4349
4350    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
4351        let mut result = NodeId::EPS;
4352        for byte in raw_str.bytes().rev() {
4353            let node = self.mk_u8(byte);
4354            result = self.mk_concat(node, result);
4355        }
4356        result
4357    }
4358
4359    pub fn prune_fwd(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
4360        self.prune_rec::<true>(node_id, memo)
4361    }
4362
4363    pub fn prune_rev(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
4364        self.prune_rec::<false>(node_id, memo)
4365    }
4366
4367    pub fn simplify_fwd_initial(&mut self, node_id: NodeId) -> NodeId {
4368        let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4369        match self.get_kind(node_id) {
4370            Kind::Concat => {
4371                let l = self.simplify_fwd_initial_rec(node_id.left(self), &mut memo);
4372                let r = self.simplify_fwd_initial_rec(node_id.right(self), &mut memo);
4373                if r.is_concat(self) {
4374                    if r.left(self).is_ts() && !r.right(self).is_lookahead(self) {
4375                        if l.is_begin_nullable(self) {
4376                            return r;
4377                        }
4378                    }
4379                }
4380            }
4381            _ => {}
4382        }
4383
4384        self.simplify_fwd_initial_rec(node_id, &mut memo)
4385    }
4386
4387    fn simplify_fwd_initial_rec(
4388        &mut self,
4389        node_id: NodeId,
4390        memo: &mut FxHashMap<NodeId, NodeId>,
4391    ) -> NodeId {
4392        if let Some(&v) = memo.get(&node_id) {
4393            return v;
4394        }
4395
4396        let out = match self.get_kind(node_id) {
4397            Kind::Union => {
4398                let mut parts: Vec<NodeId> = Vec::new();
4399                self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4400                for p in &mut parts {
4401                    *p = self.simplify_fwd_initial_rec(*p, memo);
4402                }
4403                parts
4404                    .iter()
4405                    .rev()
4406                    .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4407            }
4408            Kind::Inter => {
4409                let l = self.simplify_fwd_initial_rec(node_id.left(self), memo);
4410                let r = self.simplify_fwd_initial_rec(node_id.right(self), memo);
4411                let mut distributed = None;
4412                for (u, other) in [(l, r), (r, l)] {
4413                    if self.get_kind(u) == Kind::Union && u.contains_lookahead(self) {
4414                        let mut branches: Vec<NodeId> = Vec::new();
4415                        self.iter_unions_b(u, &mut |_, v| branches.push(v));
4416                        let mut acc = NodeId::BOT;
4417                        for &br in branches.iter().rev() {
4418                            let arm = self.mk_inter(br, other);
4419                            let arm = self.simplify_fwd_initial_rec(arm, memo);
4420                            acc = self.mk_union(arm, acc);
4421                        }
4422                        distributed = Some(acc);
4423                        break;
4424                    }
4425                }
4426                if let Some(acc) = distributed {
4427                    acc
4428                } else if let Some(look) = self.eps_inter_look(l, r) {
4429                    look
4430                } else {
4431                    self.mk_inter(l, r)
4432                }
4433            }
4434            Kind::Concat => {
4435                let l = self.simplify_fwd_initial_rec(node_id.left(self), memo);
4436                let r = self.simplify_fwd_initial_rec(node_id.right(self), memo);
4437                if l == NodeId::TS
4438                    && r.is_lookahead(self)
4439                    && self.get_lookahead_tail(r).is_missing()
4440                    && self.get_lookahead_rel(r) == 0
4441                {
4442                    let body = self.get_lookahead_inner(r);
4443                    if self.is_nullable(body, Nullability::END) {
4444                        return NodeId::TS;
4445                    }
4446                }
4447
4448                if l != NodeId::TS {
4449                    return self.mk_concat(l, r);
4450                }
4451                if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4452                    return rewritten;
4453                }
4454                let (head, tail) = if r.is_concat(self) {
4455                    (r.left(self), r.right(self))
4456                } else {
4457                    (r, NodeId::EPS)
4458                };
4459                if self.get_kind(head) != Kind::Union {
4460                    return self.mk_concat(l, r);
4461                }
4462                if !head.left(self).is_begin() {
4463                    return self.mk_concat(l, r);
4464                }
4465                let y = head.right(self);
4466                if !y.is_pred(self) {
4467                    return self.mk_concat(l, r);
4468                }
4469                if tail.is_concat(self) {
4470                    let tl = tail.left(self);
4471                    let tr = tail.right(self);
4472                    let y_tset = y.pred_tset(self);
4473                    let covers_all = if tl == NodeId::TS {
4474                        true
4475                    } else if let Some(x_pred) = tl.is_pred_star(self) {
4476                        let x_ts = x_pred.pred_tset(self);
4477                        let combined = self.solver().or_id(x_ts, y_tset);
4478                        self.solver().is_full_id(combined)
4479                    } else {
4480                        false
4481                    };
4482                    if covers_all {
4483                        return self.mk_concat(NodeId::TS, tr);
4484                    }
4485                }
4486                self.mk_concat(l, r)
4487            }
4488            _ => node_id,
4489        };
4490        memo.insert(node_id, out);
4491        out
4492    }
4493
4494    pub fn simplify_rev_initial(&mut self, node_id: NodeId) -> NodeId {
4495        let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4496        self.simplify_rev_initial_rec(node_id, &mut memo)
4497    }
4498
4499    fn simplify_rev_initial_rec(
4500        &mut self,
4501        node_id: NodeId,
4502        memo: &mut FxHashMap<NodeId, NodeId>,
4503    ) -> NodeId {
4504        if let Some(&v) = memo.get(&node_id) {
4505            return v;
4506        }
4507        let out = match self.get_kind(node_id) {
4508            Kind::Union => {
4509                let mut parts: Vec<NodeId> = Vec::new();
4510                self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4511                for p in &mut parts {
4512                    *p = self.simplify_rev_initial_rec(*p, memo);
4513                }
4514                parts
4515                    .iter()
4516                    .rev()
4517                    .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4518            }
4519            Kind::Concat => {
4520                let l = self.simplify_rev_initial_rec(node_id.left(self), memo);
4521                let r = self.simplify_rev_initial_rec(node_id.right(self), memo);
4522
4523                if l != NodeId::TS {
4524                    return self.mk_concat(l, r);
4525                }
4526                if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4527                    return rewritten;
4528                }
4529                let (tail1, tail2) = if r.is_concat(self) {
4530                    (r.left(self), r.right(self))
4531                } else {
4532                    (r, NodeId::EPS)
4533                };
4534
4535                if tail1.is_begin() {
4536                    return r;
4537                }
4538
4539                if self.get_kind(tail1) != Kind::Union {
4540                    return self.mk_concat(l, r);
4541                }
4542                if !tail1.left(self).is_begin() {
4543                    return self.mk_concat(l, r);
4544                }
4545                let y = tail1.right(self);
4546                if !y.is_pred(self) {
4547                    return self.mk_concat(l, r);
4548                }
4549                if tail2.is_concat(self) {
4550                    let tl = tail2.left(self);
4551                    let tr = tail2.right(self);
4552
4553                    let y_tset = y.pred_tset(self);
4554                    let covers_all = if tl == NodeId::TS {
4555                        true
4556                    } else if let Some(x_pred) = tl.is_pred_star(self) {
4557                        let x_ts = x_pred.pred_tset(self);
4558                        let combined = self.solver().or_id(x_ts, y_tset);
4559                        self.solver().is_full_id(combined)
4560                    } else {
4561                        false
4562                    };
4563                    if covers_all {
4564                        return self.mk_concat(NodeId::TS, tr);
4565                    }
4566                }
4567                self.mk_concat(l, r)
4568            }
4569            _ => node_id,
4570        };
4571        memo.insert(node_id, out);
4572        out
4573    }
4574
4575    fn try_begin_neg_pred_rewrite(&mut self, r: NodeId) -> Option<NodeId> {
4576        if !r.is_concat(self) {
4577            return None;
4578        }
4579        let begin = r.left(self);
4580        let mid_tail = r.right(self);
4581        if !begin.is_begin() {
4582            return None;
4583        }
4584        if !mid_tail.is_concat(self) {
4585            return None;
4586        }
4587        let neg = mid_tail.left(self);
4588        let tail = mid_tail.right(self);
4589        if self.get_kind(neg) != Kind::Compl {
4590            return None;
4591        }
4592        let inside = neg.left(self);
4593        if !inside.is_concat(self) {
4594            return None;
4595        }
4596        let ts_part = inside.left(self);
4597        let c_pred = inside.right(self);
4598        if ts_part != NodeId::TS || !c_pred.is_pred(self) {
4599            return None;
4600        }
4601        let c_tset = c_pred.pred_tset(self);
4602        let not_c = self.mk_pred_not(c_tset);
4603        let left_branch = self.mk_concat(NodeId::BEGIN, tail);
4604        let not_c_tail = self.mk_concat(not_c, tail);
4605        let right_branch = self.mk_concat(NodeId::TS, not_c_tail);
4606        Some(self.mk_union(left_branch, right_branch))
4607    }
4608
4609    fn strip_la_body_end(&mut self, n: NodeId) -> NodeId {
4610        if !n.is_concat(self) {
4611            return n;
4612        }
4613        let l = n.left(self);
4614        let r = n.right(self);
4615        if l == NodeId::TS && r == NodeId::END {
4616            return NodeId::TS;
4617        }
4618        let new_r = self.strip_la_body_end(r);
4619        if new_r == r {
4620            n
4621        } else {
4622            self.mk_concat(l, new_r)
4623        }
4624    }
4625
4626    fn prune_rec<const FWD: bool>(
4627        &mut self,
4628        node_id: NodeId,
4629        memo: &mut FxHashMap<NodeId, NodeId>,
4630    ) -> NodeId {
4631        debug_assert!(node_id != NodeId::MISSING);
4632        if node_id == NodeId::MISSING {
4633            return node_id;
4634        }
4635        if let Some(&v) = memo.get(&node_id) {
4636            return v;
4637        }
4638        let (l, r) = (node_id.left(self), node_id.right(self));
4639        let out = match node_id.kind(self) {
4640            Kind::Union => {
4641                let l = self.prune_rec::<FWD>(l, memo);
4642                let r = self.prune_rec::<FWD>(r, memo);
4643                let mut parts: Vec<NodeId> = Vec::new();
4644                parts.push(l);
4645                self.iter_unions_b(r, &mut |_, v| parts.push(v));
4646                for p in &mut parts {
4647                    *p = self.prune_rec::<FWD>(*p, memo);
4648                }
4649
4650                if FWD {
4651                    // min lo per body for pure lookaheads (la_tail MISSING)
4652                    let mut best: FxHashMap<NodeId, u32> = FxHashMap::default();
4653                    for &p in &parts {
4654                        if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4655                            let body = self.get_lookahead_inner(p);
4656                            let lo = self.get_lookahead_rel_lo(p);
4657                            best.entry(body)
4658                                .and_modify(|r| *r = (*r).min(lo))
4659                                .or_insert(lo);
4660                        }
4661                    }
4662                    parts.iter().rev().fold(NodeId::BOT, |acc, &p| {
4663                        if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4664                            let body = self.get_lookahead_inner(p);
4665                            if self.get_lookahead_rel_lo(p) != best[&body] {
4666                                return acc;
4667                            }
4668                        }
4669                        self.mk_union(p, acc)
4670                    })
4671                } else {
4672                    let mut rels_map: FxHashMap<NodeId, NullsId> = FxHashMap::default();
4673                    for &p in &parts {
4674                        if p.is_lookahead(self)
4675                            && self.get_lookahead_tail(p) == NodeId::MISSING
4676                            && self.get_extra(p) != u32::MAX
4677                        {
4678                            let body = self.get_lookahead_inner(p);
4679                            let rels = NullsId(self.get_extra(p));
4680                            rels_map
4681                                .entry(body)
4682                                .and_modify(|r| *r = self.mb.nb.or_id(*r, rels))
4683                                .or_insert(rels);
4684                        }
4685                    }
4686                    let mut la_acc = NodeId::BOT;
4687                    for (body, rels) in rels_map {
4688                        let lo = self.mb.nb.min_rel(rels);
4689                        let hi = self.mb.nb.max_rel(rels);
4690                        let node = if lo == hi {
4691                            self.mk_lookahead_internal(body, NodeId::MISSING, hi)
4692                        } else {
4693                            self.mk_lookahead_nid(body, NodeId::MISSING, rels)
4694                        };
4695                        la_acc = self.mk_union(node, la_acc);
4696                    }
4697                    parts.iter().rev().fold(la_acc, |acc, &p| {
4698                        if p.is_lookahead(self)
4699                            && self.get_lookahead_tail(p) == NodeId::MISSING
4700                            && self.get_extra(p) != u32::MAX
4701                        {
4702                            acc
4703                        } else {
4704                            self.mk_union(p, acc)
4705                        }
4706                    })
4707                }
4708            }
4709            Kind::Concat => {
4710                if FWD
4711                    && l.is_ts()
4712                    && r.is_lookahead(self)
4713                    && self.get_lookahead_tail(r).is_missing()
4714                    && self.get_lookahead_rel(r) == 0
4715                {
4716                    let body = self.get_lookahead_inner(r);
4717                    if self.is_nullable(body, Nullability::END) {
4718                        return NodeId::TS;
4719                    }
4720                }
4721                let l = self.prune_rec::<FWD>(l, memo);
4722                let r = self.prune_rec::<FWD>(r, memo);
4723                self.mk_concat(l, r)
4724            }
4725            Kind::Inter => {
4726                let l = self.prune_rec::<FWD>(l, memo);
4727                let r = self.prune_rec::<FWD>(r, memo);
4728                self.mk_inter(l, r)
4729            }
4730            Kind::Compl => {
4731                let l = self.prune_rec::<FWD>(l, memo);
4732                self.mk_compl(l)
4733            }
4734            Kind::Lookahead => {
4735                let extra = self.get_extra(node_id);
4736                let body = self.strip_la_body_end(self.get_lookahead_inner(node_id));
4737                let body = self.prune_rec::<FWD>(body, memo);
4738
4739                let tail = if self.get_lookahead_tail(node_id).is_missing() {
4740                    NodeId::MISSING
4741                } else {
4742                    self.prune_rec::<FWD>(self.get_lookahead_tail(node_id), memo)
4743                };
4744                if extra == u32::MAX {
4745                    self.mk_lookahead_internal(body, tail, u32::MAX)
4746                } else {
4747                    let rels = NullsId(extra);
4748                    let lo = self.mb.nb.min_rel(rels);
4749                    let hi = self.mb.nb.max_rel(rels);
4750                    if lo == hi {
4751                        self.mk_lookahead_internal(body, tail, hi)
4752                    } else {
4753                        self.mk_lookahead_nid(body, tail, rels)
4754                    }
4755                }
4756            }
4757            Kind::Begin => NodeId::BOT,
4758            Kind::Ordered => {
4759                let body = self.prune_rec::<FWD>(l, memo);
4760                let chain = self.prune_rec::<FWD>(r, memo);
4761                self.mk_ordered(body, chain, self.get_extra(node_id))
4762            }
4763            Kind::End | Kind::Pred => node_id,
4764            Kind::Star => {
4765                let l = self.prune_rec::<FWD>(l, memo);
4766                self.mk_star(l)
4767            }
4768            Kind::Lookbehind => {
4769                let l = self.prune_rec::<FWD>(l, memo);
4770                if r.is_missing() {
4771                    l
4772                } else {
4773                    node_id
4774                }
4775            }
4776        };
4777        memo.insert(node_id, out);
4778        out
4779    }
4780
4781    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4782        let mut sorted: Vec<NodeId> = nodes.collect();
4783        if sorted.len() <= 1 {
4784            return sorted.pop().unwrap_or(NodeId::BOT);
4785        }
4786        sorted.sort();
4787        sorted.dedup();
4788        sorted.retain(|&x| x != NodeId::BOT);
4789        if sorted.is_empty() {
4790            return NodeId::BOT;
4791        }
4792        if sorted.len() > 16 {
4793            let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
4794            let mut non_concat: Vec<NodeId> = Vec::new();
4795            for &n in &sorted {
4796                if n.is_concat(self) {
4797                    by_head.entry(self.get_left(n)).or_default().push(n);
4798                } else {
4799                    non_concat.push(n);
4800                }
4801            }
4802            let mut absorbed: Vec<NodeId> = Vec::new();
4803            for &n in &non_concat {
4804                if by_head.contains_key(&n) {
4805                    absorbed.push(n);
4806                }
4807            }
4808            if !absorbed.is_empty() {
4809                non_concat.retain(|n| !absorbed.contains(n));
4810            }
4811            if by_head.len() < sorted.len() {
4812                let mut groups: Vec<NodeId> = non_concat;
4813                for (head, tails) in by_head {
4814                    let mut tail_nodes: Vec<NodeId> =
4815                        tails.iter().map(|&n| self.get_right(n)).collect();
4816                    if absorbed.contains(&head) {
4817                        tail_nodes.push(NodeId::EPS);
4818                    }
4819                    let tail_union = self.mk_unions(tail_nodes.into_iter());
4820                    let factored = self.mk_concat(head, tail_union);
4821                    groups.push(factored);
4822                }
4823                groups.sort();
4824                groups.dedup();
4825                return self.mk_unions_balanced(&groups);
4826            }
4827        }
4828        self.mk_unions_balanced(&sorted)
4829    }
4830
4831    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
4832        match nodes.len() {
4833            0 => NodeId::BOT,
4834            1 => nodes[0],
4835            n => {
4836                let mid = n / 2;
4837                let left = self.mk_unions_balanced(&nodes[..mid]);
4838                let right = self.mk_unions_balanced(&nodes[mid..]);
4839                self.mk_union(left, right)
4840            }
4841        }
4842    }
4843
4844    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4845        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
4846    }
4847
4848    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4849        nodes
4850            .rev()
4851            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
4852    }
4853}
4854
4855impl RegexBuilder {
4856    pub fn iter_sat(
4857        &mut self,
4858        stack: &mut Vec<(TRegexId, TSetId)>,
4859        f: &mut impl FnMut(&mut RegexBuilder, NodeId, TSetId),
4860    ) {
4861        debug_assert!(!stack.is_empty());
4862        loop {
4863            match stack.pop() {
4864                None => return,
4865                Some((curr, curr_set)) => match *self.get_tregex(curr) {
4866                    TRegex::Leaf(n) => {
4867                        let mut curr = n;
4868                        while curr != NodeId::BOT {
4869                            match self.get_kind(curr) {
4870                                _ => {
4871                                    f(self, n, curr_set);
4872                                    curr = NodeId::BOT;
4873                                }
4874                            }
4875                        }
4876                    }
4877                    TRegex::ITE(cnd, then_id, else_id) => {
4878                        if else_id != TRegexId::BOT {
4879                            let notcnd = self.solver().not_id(cnd);
4880                            let interset1 = self.solver().and_id(curr_set, notcnd);
4881                            stack.push((else_id, interset1));
4882                        }
4883                        let interset2 = self.solver().and_id(curr_set, cnd);
4884                        stack.push((then_id, interset2));
4885                    }
4886                },
4887            }
4888        }
4889    }
4890
4891    #[allow(dead_code)]
4892    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
4893        match self.get_tregex(term_id).clone() {
4894            TRegex::Leaf(node_id) => {
4895                if NodeId::BOT == node_id {
4896                    vec![]
4897                } else {
4898                    vec![node_id]
4899                }
4900            }
4901            TRegex::ITE(_, then_id, else_id) => {
4902                let mut then_nodes = self.extract_sat(then_id);
4903                let mut else_nodes = self.extract_sat(else_id);
4904                then_nodes.append(&mut else_nodes);
4905                then_nodes
4906            }
4907        }
4908    }
4909
4910    pub(crate) fn iter_unions_b(
4911        &mut self,
4912        curr: NodeId,
4913        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
4914    ) {
4915        let mut curr = curr;
4916        while self.get_kind(curr) == Kind::Union {
4917            f(self, curr.left(self));
4918            curr = curr.right(self);
4919        }
4920        f(self, curr);
4921    }
4922
4923    pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
4924        if !self.contains_look(node_id) {
4925            return Some(node_id);
4926        }
4927        match self.get_kind(node_id) {
4928            Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
4929            Kind::Concat => {
4930                let left = node_id.left(self);
4931                let right = node_id.right(self);
4932                let elim_l = self.try_elim_lookarounds(left)?;
4933                let elim_r = self.try_elim_lookarounds(right)?;
4934                let rw = self.mk_concat(elim_l, elim_r);
4935                Some(rw)
4936            }
4937            Kind::Union => {
4938                let left = node_id.left(self);
4939                let right = node_id.right(self);
4940                let elim_l = self.try_elim_lookarounds(left)?;
4941                let elim_r = self.try_elim_lookarounds(right)?;
4942                let rw = self.mk_union(elim_l, elim_r);
4943                Some(rw)
4944            }
4945
4946            Kind::Star => {
4947                let body = node_id.left(self);
4948                let elim_l = self.try_elim_lookarounds(body)?;
4949                Some(self.mk_star(elim_l))
4950            }
4951            Kind::Compl => {
4952                let left = node_id.left(self);
4953                let elim_l = self.try_elim_lookarounds(left)?;
4954                Some(self.mk_compl(elim_l))
4955            }
4956            Kind::Lookahead => {
4957                let rel = self.get_lookahead_rel(node_id);
4958                if rel != 0 {
4959                    return None;
4960                }
4961                let lbody = self.get_lookahead_inner(node_id);
4962                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
4963                let elim_l = self.try_elim_lookarounds(lbody)?;
4964                let elim_r = self.try_elim_lookarounds(ltail)?;
4965                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
4966                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
4967                let rw = self.mk_inter(lbody_ts, ltail_ts);
4968                Some(rw)
4969            }
4970            Kind::Lookbehind => {
4971                let linner = self.get_lookbehind_inner(node_id);
4972                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
4973                let elim_l = self.try_elim_lookarounds(linner)?;
4974                let elim_r = self.try_elim_lookarounds(lprev)?;
4975                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
4976                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
4977                let rw = self.mk_inter(lbody_ts, ltail_ts);
4978                Some(rw)
4979            }
4980            Kind::Inter => {
4981                let left = node_id.left(self);
4982                let right = node_id.right(self);
4983                let elim_l = self.try_elim_lookarounds(left)?;
4984                let elim_r = self.try_elim_lookarounds(right)?;
4985                let rw = self.mk_inter(elim_l, elim_r);
4986                Some(rw)
4987            }
4988            Kind::Ordered => None,
4989        }
4990    }
4991
4992    /// R & _+ is a safe overapproximation of R that's nonempty
4993    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
4994        if self.nullability(node) == Nullability::NEVER {
4995            node
4996        } else {
4997            self.mk_inter(NodeId::TOPPLUS, node)
4998        }
4999    }
5000
5001    pub fn iter_find_stack(
5002        &self,
5003        stack: &mut Vec<TRegexId>,
5004        mut f: impl FnMut(NodeId) -> bool,
5005    ) -> bool {
5006        loop {
5007            match stack.pop() {
5008                None => return false,
5009                Some(curr) => match self.get_tregex(curr) {
5010                    TRegex::Leaf(n) => {
5011                        let mut curr = *n;
5012                        while curr != NodeId::BOT {
5013                            match self.get_kind(curr) {
5014                                Kind::Union => {
5015                                    if f(curr.left(self)) {
5016                                        return true;
5017                                    }
5018                                    curr = curr.right(self);
5019                                }
5020                                _ => {
5021                                    if f(*n) {
5022                                        return true;
5023                                    }
5024                                    curr = NodeId::BOT;
5025                                }
5026                            }
5027                        }
5028                    }
5029                    TRegex::ITE(_, then_id, else_id) => {
5030                        if *else_id != TRegexId::BOT {
5031                            stack.push(*else_id);
5032                        }
5033                        stack.push(*then_id);
5034                    }
5035                },
5036            }
5037        }
5038    }
5039
5040    // pub fn is_equiv(&mut self, nodea: NodeId, nodeb: NodeId) -> Option<bool> {
5041    //     if nodea == nodeb {
5042    //         return Some(true);
5043    //     }
5044    //     let (nodea, nodeb) = if self.contains_look(nodea) || self.contains_look(nodeb) {
5045    //         let wrap = |b: &mut Self, n: NodeId| {
5046    //             let tmp = b.mk_concat(n, NodeId::TS);
5047    //             b.mk_concat(NodeId::TS, tmp)
5048    //         };
5049    //         (wrap(self, nodea), wrap(self, nodeb))
5050    //     } else {
5051    //         (nodea, nodeb)
5052    //     };
5053    //     if self.nullability(nodea) != self.nullability(nodeb) {
5054    //         return Some(false);
5055    //     }
5056    //     // A⊕B = (A\B)∪(B\A) = ⊥
5057    //     let nota = self.mk_compl(nodea);
5058    //     let notb = self.mk_compl(nodeb);
5059    //     let anotb = self.mk_inter(nodea, notb);
5060    //     let bnota = self.mk_inter(nodeb, nota);
5061    //     let diff = self.mk_union(anotb, bnota);
5062    //     self.is_empty_lang(diff)
5063    // }
5064
5065    pub fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
5066        if node == NodeId::BOT {
5067            return Some(true);
5068        }
5069        if self.nullability(node) != Nullability::NEVER {
5070            return Some(false);
5071        }
5072        if let Some(cached) = self.cache_empty.get(&node) {
5073            if cached.is_checked() {
5074                return Some(cached.is_empty());
5075            }
5076        }
5077        let node = if !self.contains_look(node) {
5078            node
5079        } else {
5080            self.try_elim_lookarounds(node)?
5081        };
5082        match self.is_empty_lang_internal(node) {
5083            Ok(flag) => Some(flag == NodeFlags::IS_EMPTY),
5084            Err(_) => None,
5085        }
5086    }
5087
5088    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, ResharpError> {
5089        // without inter, no need to check
5090        if !self.get_meta_flags(initial_node).contains_inter() {
5091            return Ok(NodeFlags::ZERO);
5092        }
5093
5094        let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
5095        let mut worklist: VecDeque<NodeId> = VecDeque::new();
5096        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
5097        let mut stack = Vec::new();
5098        stack.push(begin_der);
5099        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
5100            visited.insert(node, initial_node);
5101            let nullability = self.nullability(node);
5102            if nullability != Nullability::NEVER {
5103                true
5104            } else {
5105                worklist.push_back(node);
5106                false
5107            }
5108        });
5109        if found_nullable_right_away {
5110            return Ok(NodeFlags::ZERO);
5111        }
5112
5113        worklist.push_back(initial_node);
5114        let isempty_flag: NodeFlags;
5115        let mut found_node = NodeId::BOT;
5116
5117        loop {
5118            if visited.len() > IS_EMPTY_LANG_NODE_BUDGET {
5119                return Err(ResharpError::StateSpaceExplosion);
5120            }
5121            match worklist.pop_front() {
5122                None => {
5123                    isempty_flag = NodeFlags::IS_EMPTY;
5124                    break;
5125                }
5126                Some(outer) => {
5127                    if let Some(cached) = self.cache_empty.get(&outer) {
5128                        if cached.is_checked() {
5129                            if cached.is_empty() {
5130                                continue;
5131                            } else {
5132                                return Ok(NodeFlags::ZERO);
5133                            }
5134                        }
5135                    }
5136
5137                    let derivative = self.der(outer, Nullability::CENTER)?;
5138
5139                    stack.push(derivative);
5140
5141                    let found_null = self.iter_find_stack(&mut stack, |node| {
5142                        if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
5143                            found_node = node;
5144                            if !self.get_meta_flags(node).contains_inter() {
5145                                true
5146                            } else {
5147                                e.insert(outer);
5148                                worklist.push_front(node);
5149                                self.any_nonbegin_nullable(node)
5150                            }
5151                        } else {
5152                            false
5153                        }
5154                    });
5155                    if found_null {
5156                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
5157                        isempty_flag = NodeFlags::ZERO;
5158                        break;
5159                    }
5160                }
5161            }
5162        }
5163
5164        self.cache_empty.insert(
5165            initial_node,
5166            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
5167        );
5168        Ok(isempty_flag)
5169    }
5170
5171    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
5172    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
5173        if larger_lang == smaller_lang {
5174            return Some(true);
5175        }
5176
5177        // assess initial nullability
5178        if self
5179            .nullability(larger_lang)
5180            .not()
5181            .and(self.nullability(smaller_lang))
5182            != Nullability::NEVER
5183        {
5184            return Some(false);
5185        }
5186
5187        // check language nullability
5188        // if (B &~ A) ≡ ⊥ then B ⊆ A
5189        // this means  L(B) \ L(A) = {}
5190        // eg. (a &~ .*) = ⊥ means .* subsumes a
5191        let wrap = |b: &mut Self, n: NodeId| {
5192            if b.contains_look(n) {
5193                let tmp = b.mk_concat(n, NodeId::TS);
5194                b.mk_concat(NodeId::TS, tmp)
5195            } else {
5196                n
5197            }
5198        };
5199        let smaller_lang = wrap(self, smaller_lang);
5200        let larger_lang = wrap(self, larger_lang);
5201
5202        let nota = self.mk_compl(larger_lang);
5203        let diff = self.mk_inter(smaller_lang, nota);
5204        self.is_empty_lang(diff)
5205    }
5206}