Skip to main content

resharp_algebra/
lib.rs

1//! Boolean algebra and symbolic rewriting engine for resharp regex.
2//!
3//! Provides regex node construction, symbolic derivatives, nullability analysis,
4//! and algebraic simplification via rewrite rules.
5
6#![warn(dead_code)]
7
8use solver::{Solver, TSetId};
9use std::collections::{BTreeSet, HashMap, VecDeque};
10use std::fmt::Debug;
11use std::fmt::Write;
12use std::hash::Hash;
13
14use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
15pub mod nulls;
16pub mod solver;
17
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum AlgebraError {
20    AnchorLimit,
21    StateSpaceExplosion,
22    UnsupportedPattern,
23}
24
25impl std::fmt::Display for AlgebraError {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        match self {
28            AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
29            AlgebraError::StateSpaceExplosion => {
30                write!(f, "too many states, likely infinite state space")
31            }
32            AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
33        }
34    }
35}
36
37impl std::error::Error for AlgebraError {}
38
39mod id {
40    pub const MISSING: u32 = 0;
41    pub const BOT: u32 = 1;
42    pub const EPS: u32 = 2;
43    pub const TOP: u32 = 3;
44    pub const TOPSTAR: u32 = 4;
45    pub const TOPPLUS: u32 = 5;
46    pub const BEGIN: u32 = 6;
47    pub const END: u32 = 7;
48}
49
50#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
51pub(crate) struct MetadataId(u32);
52impl MetadataId {
53    pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
54}
55
56#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
57pub struct NodeId(pub u32);
58impl NodeId {
59    pub const MISSING: NodeId = NodeId(id::MISSING);
60    pub const BOT: NodeId = NodeId(id::BOT);
61    pub const EPS: NodeId = NodeId(id::EPS);
62    pub const TOP: NodeId = NodeId(id::TOP);
63    pub const TS: NodeId = NodeId(id::TOPSTAR);
64    pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
65    pub const BEGIN: NodeId = NodeId(id::BEGIN);
66    pub const END: NodeId = NodeId(id::END);
67
68    #[inline]
69    pub fn as_u32(self) -> u32 {
70        self.0
71    }
72
73    #[inline]
74    pub fn from_u32(v: u32) -> NodeId {
75        NodeId(v)
76    }
77}
78impl Debug for NodeId {
79    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80        let num = &self.0;
81        f.write_str(format!("{num}").as_str())
82    }
83}
84
85#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
86pub struct TRegexId(u32);
87impl TRegexId {
88    pub const MISSING: TRegexId = TRegexId(id::MISSING);
89    pub const EPS: TRegexId = TRegexId(id::EPS);
90    pub const BOT: TRegexId = TRegexId(id::BOT);
91    pub const TOP: TRegexId = TRegexId(id::TOP);
92    pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
93}
94
95#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
96pub(crate) struct MetaFlags(u8);
97impl MetaFlags {
98    const NULL_MASK: u8 = 0b111; // first 3 bits for nullability
99
100    pub(crate) const ZERO: MetaFlags = MetaFlags(0);
101    pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
102    pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
103    pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
104    pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
105    pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
106
107    #[inline]
108    pub(crate) fn nullability(self) -> Nullability {
109        Nullability(self.0 as u8 & Self::NULL_MASK as u8)
110    }
111
112    #[inline]
113    pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
114        MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
115    }
116
117    #[inline]
118    pub(crate) fn has(self, flag: MetaFlags) -> bool {
119        self.0 & flag.0 != 0
120    }
121    #[inline]
122    const fn and(self, other: MetaFlags) -> MetaFlags {
123        MetaFlags(self.0 & other.0)
124    }
125    #[inline]
126    const fn or(self, other: MetaFlags) -> MetaFlags {
127        MetaFlags(self.0 | other.0)
128    }
129
130    pub(crate) fn contains_lookaround(self) -> bool {
131        self.has(MetaFlags::CONTAINS_LOOKAROUND)
132    }
133    pub(crate) fn contains_inter(self) -> bool {
134        self.has(MetaFlags::CONTAINS_INTER)
135    }
136
137    pub(crate) fn all_contains_flags(self) -> MetaFlags {
138        self.and(
139            MetaFlags::CONTAINS_LOOKAROUND
140                .or(MetaFlags::CONTAINS_ANCHORS)
141                .or(MetaFlags::CONTAINS_INTER),
142        )
143    }
144}
145
146#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
147#[repr(u8)]
148pub enum Kind {
149    #[default]
150    Pred,
151    Star,
152    Begin,
153    End,
154    Concat,
155    Union,
156    Compl,
157    Lookbehind,
158    Lookahead,
159    Inter,
160}
161
162#[derive(Eq, Hash, PartialEq, Clone)]
163struct Metadata {
164    flags: MetaFlags,
165    nulls: NullsId,
166}
167
168struct MetadataBuilder {
169    num_created: u32,
170    solver: Solver,
171    nb: NullsBuilder,
172    index: HashMap<Metadata, MetadataId>,
173    pub array: Vec<Metadata>,
174}
175
176mod helpers {
177    pub(crate) fn incr_rel(n1: u32) -> u32 {
178        match n1.overflowing_add(1) {
179            (_, true) => u32::MAX,
180            (res, false) => res,
181        }
182    }
183}
184
185impl MetadataBuilder {
186    fn new() -> MetadataBuilder {
187        let mut arr = Vec::new();
188        arr.push(Metadata {
189            flags: MetaFlags::ZERO,
190            nulls: NullsId::EMPTY,
191        });
192        Self {
193            index: HashMap::new(),
194            array: arr,
195            solver: Solver::new(),
196            num_created: 0,
197            nb: NullsBuilder::new(),
198        }
199    }
200
201    fn init(&mut self, inst: Metadata) -> MetadataId {
202        self.num_created += 1;
203        let new_id = MetadataId(self.num_created);
204        self.index.insert(inst.clone(), new_id);
205        self.array.push(inst);
206        new_id
207    }
208
209    fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
210        match self.index.get(&inst) {
211            Some(&id) => id,
212            None => self.init(inst),
213        }
214    }
215
216    fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
217        &self.array[inst.0 as usize]
218    }
219
220    fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
221        setflags.all_contains_flags()
222    }
223
224    fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
225        let left = &self.array[body.0 as usize].flags;
226        let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
227        // BOT* = EPS (empty string only), not infinite
228        let inf = if body_id == NodeId::BOT {
229            MetaFlags::ZERO
230        } else {
231            MetaFlags::INFINITE_LENGTH
232        };
233        MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
234    }
235
236    fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
237        let left = &self.array[left_id.0 as usize].flags;
238        let null = left.nullability().not().and(Nullability::ALWAYS);
239        let contains = self.get_contains(*left);
240        MetaFlags::with_nullability(
241            null,
242            contains
243                .or(MetaFlags::INFINITE_LENGTH)
244                .or(MetaFlags::CONTAINS_COMPL),
245        )
246    }
247
248    fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
249        let left = &self.array[left_id.0 as usize].flags;
250        let right = &self.array[right_id.0 as usize].flags;
251        let null = left.nullability().and(right.nullability());
252        let contains = self.get_contains(left.or(*right));
253        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
254        MetaFlags::with_nullability(null, contains.or(len))
255    }
256
257    fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
258        let left = &self.array[left_id.0 as usize].flags;
259        let right = &self.array[right_id.0 as usize].flags;
260        let null = left.nullability().and(right.nullability());
261        let contains = self
262            .get_contains(left.or(*right))
263            .or(MetaFlags::CONTAINS_INTER);
264        let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
265        MetaFlags::with_nullability(null, contains.or(len))
266    }
267
268    fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
269        let left = &self.array[left_id.0 as usize].flags;
270        let right = &self.array[right_id.0 as usize].flags;
271        let null = left.nullability().or(right.nullability());
272        let contains = self.get_contains(left.or(*right));
273        let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
274        MetaFlags::with_nullability(null, contains.or(len))
275    }
276}
277
278#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
279pub struct NodeKey {
280    pub(crate) kind: Kind,
281    pub(crate) left: NodeId,
282    pub(crate) right: NodeId,
283    pub(crate) extra: u32,
284}
285
286#[derive(Eq, Hash, PartialEq, Clone, Debug)]
287pub enum TRegex<TSet> {
288    Leaf(NodeId),
289    ITE(TSet, TRegexId, TRegexId),
290}
291
292#[derive(Eq, Hash, PartialEq, Clone, Copy)]
293pub(crate) struct NodeFlags(u8);
294impl NodeFlags {
295    pub(crate) const ZERO: NodeFlags = NodeFlags(0);
296    pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
297    pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
298
299    fn is_checked(self) -> bool {
300        self.0 >= NodeFlags::IS_CHECKED.0
301    }
302    fn is_empty(self) -> bool {
303        self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
304    }
305}
306
307#[derive(Eq, Hash, PartialEq, Clone, Copy)]
308pub(crate) struct BuilderFlags(u8);
309impl BuilderFlags {
310    pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
311    pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
312}
313
314pub struct RegexBuilder {
315    mb: MetadataBuilder,
316    temp_vec: Vec<NodeId>,
317    num_created: u32,
318    index: HashMap<NodeKey, NodeId>,
319    array: Vec<NodeKey>,
320    metadata: Vec<MetadataId>,
321    reversed: Vec<NodeId>,
322    cache_empty: HashMap<NodeId, NodeFlags>,
323    tr_cache: HashMap<TRegex<TSetId>, TRegexId>,
324    tr_array: Vec<TRegex<TSetId>>,
325    tr_der_center: Vec<TRegexId>,
326    tr_der_begin: Vec<TRegexId>,
327    flags: BuilderFlags,
328    /// maximum lookahead context distance before returning `AnchorLimit`.
329    pub lookahead_context_max: u32,
330}
331
332macro_rules! iter_inter {
333    ($compiler:ident, $start:ident, $expression:expr) => {
334        let mut curr = $start;
335        while $compiler.get_kind(curr) == Kind::Inter {
336            let left = $compiler.get_left(curr);
337            $expression(left);
338            curr = $compiler.get_right(curr);
339        }
340        $expression(curr);
341    };
342}
343
344impl NodeId {
345    fn is_missing(&self) -> bool {
346        *self == NodeId::MISSING
347    }
348    #[inline]
349    fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
350        b.get_flags_contains(self)
351    }
352    pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
353        if self == tail {
354            true
355        } else if self.is_kind(b, Kind::Concat) {
356            self.right(b).has_concat_tail(b, tail)
357        } else {
358            false
359        }
360    }
361    fn missing_to_eps(&self) -> NodeId {
362        if *self == NodeId::MISSING {
363            NodeId::EPS
364        } else {
365            *self
366        }
367    }
368
369    #[inline]
370    fn kind(self, b: &RegexBuilder) -> Kind {
371        b.get_kind(self)
372    }
373    #[inline]
374    fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
375        b.get_kind(self) == k
376    }
377    #[inline]
378    fn is_never_nullable(self, b: &RegexBuilder) -> bool {
379        b.nullability(self) == Nullability::NEVER
380    }
381    #[inline]
382    fn nullability(self, b: &RegexBuilder) -> Nullability {
383        b.nullability(self)
384    }
385
386    #[inline]
387    fn is_center_nullable(self, b: &RegexBuilder) -> bool {
388        b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
389    }
390    #[inline]
391    pub fn left(self, b: &RegexBuilder) -> NodeId {
392        b.get_left(self)
393    }
394
395    #[inline]
396    pub fn right(self, b: &RegexBuilder) -> NodeId {
397        b.get_right(self)
398    }
399
400    #[inline]
401    fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
402        b.der(self, mask)
403    }
404
405    #[inline]
406    fn extra(self, b: &RegexBuilder) -> u32 {
407        b.get_extra(self)
408    }
409
410    #[inline]
411    fn is_pred(self, b: &RegexBuilder) -> bool {
412        b.get_kind(self) == Kind::Pred
413    }
414    #[inline]
415    fn is_lookahead(self, b: &RegexBuilder) -> bool {
416        b.get_kind(self) == Kind::Lookahead
417    }
418    #[inline]
419    pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
420        debug_assert!(self.is_pred(b));
421        TSetId(b.get_extra(self))
422    }
423    #[inline]
424    fn is_star(self, b: &RegexBuilder) -> bool {
425        if NodeId::EPS == self {
426            return false;
427        }
428        b.get_kind(self) == Kind::Star
429    }
430
431    #[inline]
432    pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
433        if self.is_concat(b) {
434            let r = self.right(b);
435            return r.is_star(b) && r.left(b) == self.left(b);
436        }
437        false
438    }
439
440    #[inline]
441    fn is_concat(self, b: &RegexBuilder) -> bool {
442        b.get_kind(self) == Kind::Concat
443    }
444
445    #[inline]
446    fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
447        if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
448            Some(b.get_right(self))
449        } else {
450            None
451        }
452    }
453
454    #[inline]
455    fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
456        if b.get_kind(self) == Kind::Concat {
457            let left = self.left(b);
458            let right = self.right(b);
459            if left.is_kind(b, Kind::Compl) {
460                if left.left(b) == NodeId::TOPPLUS {
461                    return right == NodeId::END;
462                }
463            }
464        }
465        false
466    }
467
468    #[inline]
469    fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
470        if NodeId::EPS == self {
471            return None;
472        }
473        if self.is_star(b) && self.left(b).is_pred(b) {
474            Some(self.left(b))
475        } else {
476            None
477        }
478    }
479
480    #[inline]
481    fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
482        if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
483            let middle = self.right(b);
484            if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
485                return Some(middle.left(b));
486            }
487        }
488        None
489    }
490
491    #[inline]
492    pub(crate) fn iter_union(
493        self,
494        b: &mut RegexBuilder,
495        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
496    ) {
497        let mut curr: NodeId = self;
498        while curr.kind(b) == Kind::Union {
499            f(b, curr.left(b));
500            curr = curr.right(b);
501        }
502        f(b, curr);
503    }
504
505    #[inline]
506    pub(crate) fn iter_union_while(
507        self,
508        b: &mut RegexBuilder,
509        f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
510    ) {
511        let mut curr: NodeId = self;
512        let mut continue_loop = true;
513        while continue_loop && curr.kind(b) == Kind::Union {
514            continue_loop = f(b, curr.left(b));
515            curr = curr.right(b);
516        }
517        if continue_loop {
518            f(b, curr);
519        }
520    }
521}
522
523impl RegexBuilder {
524    pub fn new() -> RegexBuilder {
525        let mut inst = Self {
526            mb: MetadataBuilder::new(),
527            array: Vec::new(),
528            index: HashMap::new(),
529            cache_empty: HashMap::new(),
530            tr_array: Vec::new(),
531            tr_cache: HashMap::new(),
532            flags: BuilderFlags::ZERO,
533            lookahead_context_max: 800,
534            num_created: 0,
535            metadata: Vec::new(),
536            reversed: Vec::new(),
537            tr_der_center: Vec::new(),
538            tr_der_begin: Vec::new(),
539            temp_vec: Vec::new(),
540        };
541        inst.array.push(NodeKey::default());
542        inst.mk_pred(TSetId::EMPTY);
543        inst.mk_star(NodeId::BOT);
544        inst.mk_pred(TSetId::FULL);
545        inst.mk_star(NodeId::TOP);
546        let top_plus_id = inst.mk_plus(NodeId::TOP);
547        inst.mk_unset(Kind::Begin);
548        inst.mk_unset(Kind::End);
549        debug_assert!(top_plus_id == NodeId::TOPPLUS);
550
551        inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
552        inst.mk_leaf(NodeId::BOT);
553        inst.mk_leaf(NodeId::EPS);
554        inst.mk_leaf(NodeId::TOP);
555        inst.mk_leaf(NodeId::TS);
556
557        inst.flags = BuilderFlags::SUBSUME;
558        inst
559    }
560
561    #[inline]
562    pub fn solver_ref(&self) -> &Solver {
563        &self.mb.solver
564    }
565
566    #[inline]
567    pub fn solver(&mut self) -> &mut Solver {
568        &mut self.mb.solver
569    }
570
571    fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
572        let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
573        self.tr_cache.insert(inst.clone(), new_id);
574        self.tr_array.push(inst);
575        new_id
576    }
577
578    fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
579        match self.tr_cache.get(&inst) {
580            Some(&id) => id,
581            None => self.tr_init(inst),
582        }
583    }
584
585    pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
586        &self.tr_array[inst.0 as usize]
587    }
588
589    fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
590        let solv = self.solver();
591        !solv.is_sat_id(cond1, cond2)
592    }
593
594    pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
595        let term = TRegex::<TSetId>::Leaf(node_id);
596        self.get_tregex_id(term)
597    }
598
599    fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
600        let tmp_inst = TRegex::ITE(cond, _then, _else);
601        if let Some(cached) = self.tr_cache.get(&tmp_inst) {
602            return *cached;
603        }
604        if _then == _else {
605            return _then;
606        }
607        if self.solver().is_full_id(cond) {
608            return _then;
609        }
610        if self.solver().is_empty_id(cond) {
611            return _else;
612        }
613        let _clean_then = self.clean(cond, _then);
614        let notcond = self.solver().not_id(cond);
615        let _clean_else = self.clean(notcond, _else);
616
617        if _clean_then == _clean_else {
618            return _clean_then;
619        }
620        // attempt left side flattening: ITE(.,ITE(a,ε,⊥),⊥) -> ITE(a,ε,⊥)
621        match self.get_tregex(_clean_then) {
622            TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
623                let _cond_then = leftcond.clone();
624                let new_then = _inner_then.clone();
625                let sand = self.solver().and_id(cond, _cond_then);
626                return self.mk_ite(sand, new_then, _clean_else);
627            }
628            _ => {}
629        }
630        // attempt right side flattening:
631        match self.get_tregex(_clean_else) {
632            // ITE(a,ε,ITE(b,ε,⊥)) -> ITE([ab],ε,⊥)
633            TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
634                let e2clone = _e2.clone();
635                let new_cond = self.mb.solver.or_id(cond, _c2.clone());
636                return self.mk_ite(new_cond, _clean_then, e2clone);
637            }
638            _ => {}
639        }
640
641        if _clean_then == TRegexId::BOT {
642            let flip_cond = self.solver().not_id(cond);
643            let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
644            return cleaned_id;
645        }
646
647        self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
648    }
649
650    fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
651        match *self.get_tregex(tterm) {
652            TRegex::Leaf(_) => return tterm,
653            TRegex::ITE(alpha, _then_id, _else_id) => {
654                let notalpha = self.mb.solver.not_id(alpha);
655                if self.mb.solver.unsat_id(beta, alpha) {
656                    let ec = self.mb.solver.and_id(beta, notalpha);
657                    let new_else = self.clean(ec, _else_id);
658                    return new_else;
659                }
660                if self.unsat(beta, notalpha) {
661                    let tc = self.mb.solver.and_id(beta, alpha);
662                    let new_then = self.clean(tc, _then_id);
663                    return new_then;
664                }
665
666                let ei = _else_id;
667                let tc = self.mb.solver.and_id(beta, alpha);
668                let ec = self.mb.solver.and_id(beta, notalpha);
669                let new_then = self.clean(tc, _then_id);
670                let new_else = self.clean(ec, ei);
671                return self.mk_ite(tc, new_then, new_else);
672            }
673        }
674    }
675
676    fn mk_unary(
677        &mut self,
678        term: TRegexId,
679        apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
680    ) -> TRegexId {
681        match self.tr_array[term.0 as usize] {
682            TRegex::Leaf(node_id) => {
683                let applied = apply(self, node_id);
684                self.mk_leaf(applied)
685            }
686            TRegex::ITE(c1, _then, _else) => {
687                let _then1 = self.mk_unary(_then, apply);
688                let _else1 = self.mk_unary(_else, apply);
689                self.mk_ite(c1, _then1, _else1)
690            }
691        }
692    }
693
694    fn mk_binary_result(
695        &mut self,
696        left: TRegexId,
697        right: TRegexId,
698        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
699    ) -> Result<TRegexId, AlgebraError> {
700        match self.tr_array[left.0 as usize] {
701            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
702                TRegex::Leaf(right_node_id) => {
703                    let applied = apply(self, left_node_id, right_node_id)?;
704                    Ok(self.mk_leaf(applied))
705                }
706                TRegex::ITE(c2, _then, _else) => {
707                    let then2 = self.mk_binary_result(left, _then, apply)?;
708                    let else2 = self.mk_binary_result(left, _else, apply)?;
709                    Ok(self.mk_ite(c2, then2, else2))
710                }
711            },
712            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
713                TRegex::Leaf(_) => {
714                    let then2 = self.mk_binary_result(_then1, right, apply)?;
715                    let else2 = self.mk_binary_result(_else1, right, apply)?;
716                    Ok(self.mk_ite(c1, then2, else2))
717                }
718                TRegex::ITE(c2, _then2, _else2) => {
719                    if c1 == c2 {
720                        let _then = self.mk_binary_result(_then1, _then2, apply)?;
721                        let _else = self.mk_binary_result(_else1, _else2, apply)?;
722                        return Ok(self.mk_ite(c1, _then, _else));
723                    }
724                    if c1.0 > c2.0 {
725                        let _then = self.mk_binary_result(_then1, right, apply)?;
726                        let _else = self.mk_binary_result(_else1, right, apply)?;
727                        return Ok(self.mk_ite(c1, _then, _else));
728                    } else {
729                        let _then = self.mk_binary_result(left, _then2, apply)?;
730                        let _else = self.mk_binary_result(left, _else2, apply)?;
731                        return Ok(self.mk_ite(c2, _then, _else));
732                    }
733                }
734            },
735        }
736    }
737
738    fn mk_binary(
739        &mut self,
740        left: TRegexId,
741        right: TRegexId,
742        apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
743    ) -> TRegexId {
744        match self.tr_array[left.0 as usize] {
745            TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
746                TRegex::Leaf(right_node_id) => {
747                    let applied = apply(self, left_node_id, right_node_id);
748                    self.mk_leaf(applied)
749                }
750                TRegex::ITE(c2, _then, _else) => {
751                    let then2 = self.mk_binary(left, _then, apply);
752                    let else2 = self.mk_binary(left, _else, apply);
753                    self.mk_ite(c2, then2, else2)
754                }
755            },
756            TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
757                TRegex::Leaf(_) => {
758                    let then2 = self.mk_binary(_then1, right, apply);
759                    let else2 = self.mk_binary(_else1, right, apply);
760                    self.mk_ite(c1, then2, else2)
761                }
762                TRegex::ITE(c2, _then2, _else2) => {
763                    if c1 == c2 {
764                        let _then = self.mk_binary(_then1, _then2, apply);
765                        let _else = self.mk_binary(_else1, _else2, apply);
766                        return self.mk_ite(c1, _then, _else);
767                    }
768                    if c1.0 > c2.0 {
769                        let _then = self.mk_binary(_then1, right, apply);
770                        let _else = self.mk_binary(_else1, right, apply);
771                        return self.mk_ite(c1, _then, _else);
772                    } else {
773                        let _then = self.mk_binary(left, _then2, apply);
774                        let _else = self.mk_binary(left, _else2, apply);
775                        return self.mk_ite(c2, _then, _else);
776                    }
777                }
778            },
779        }
780    }
781
782    pub fn get_nulls(
783        &mut self,
784        pending_rel: u32,
785        mask: Nullability,
786        acc: &mut BTreeSet<NullState>,
787        node_id: NodeId,
788    ) {
789        debug_assert!(node_id != NodeId::MISSING);
790        if !self.is_nullable(node_id, mask) {
791            return;
792        }
793        match self.get_kind(node_id) {
794            Kind::Pred => {}
795            Kind::End => {
796                if mask.has(Nullability::END) {
797                    acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
798                }
799            }
800            Kind::Begin => {
801                if mask.has(Nullability::BEGIN) {
802                    acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
803                }
804            }
805            Kind::Concat => {
806                let new_mask = self.nullability(node_id).and(mask);
807                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
808                if self.is_nullable(node_id.left(self), mask) {
809                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
810                }
811            }
812            Kind::Union => {
813                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
814                self.get_nulls(pending_rel, mask, acc, node_id.right(self));
815            }
816            Kind::Inter => {
817                let new_mask = self.nullability(node_id).and(mask);
818                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
819                self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
820            }
821            Kind::Star => {
822                acc.insert(NullState::new(mask, pending_rel));
823                self.get_nulls(pending_rel, mask, acc, node_id.left(self));
824            }
825            Kind::Compl => {
826                if !self.is_nullable(node_id.left(self), mask) {
827                    acc.insert(NullState::new(mask, 0));
828                }
829            }
830            Kind::Lookbehind => {
831                let new_mask = self.nullability(node_id).and(mask);
832                self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
833                if node_id.right(self) != NodeId::MISSING {
834                    self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
835                }
836            }
837            Kind::Lookahead => {
838                let la_inner = self.get_lookahead_inner(node_id);
839                if self.is_nullable(la_inner, mask) {
840                    let rel = self.get_lookahead_rel(node_id);
841                    if rel != u32::MAX {
842                        self.get_nulls(pending_rel + rel, mask, acc, la_inner);
843                    }
844                }
845                let la_tail = self.get_lookahead_tail(node_id);
846                if la_tail != NodeId::MISSING {
847                    self.get_nulls(pending_rel, mask, acc, la_tail);
848                }
849            }
850        }
851    }
852
853    pub fn contains_look(&mut self, node_id: NodeId) -> bool {
854        self.get_meta_flags(node_id)
855            .has(MetaFlags::CONTAINS_LOOKAROUND)
856    }
857
858    pub fn is_infinite(&self, node_id: NodeId) -> bool {
859        self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
860    }
861
862    /// returns (min_length, max_length). max = u32::MAX means unbounded.
863    pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
864        if self.is_infinite(node_id) {
865            (self.get_min_length_only(node_id), u32::MAX)
866        } else {
867            self.get_bounded_length(node_id)
868        }
869    }
870
871    fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
872        match self.get_kind(node_id) {
873            Kind::End | Kind::Begin => (0, 0),
874            Kind::Pred => (1, 1),
875            Kind::Concat => {
876                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
877                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
878                (lmin + rmin, lmax.saturating_add(rmax))
879            }
880            Kind::Union => {
881                let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
882                let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
883                (lmin.min(rmin), lmax.max(rmax))
884            }
885            Kind::Inter => {
886                let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
887                let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
888                (lmin.max(rmin), lmax.min(rmax))
889            }
890            Kind::Lookahead => {
891                let right = node_id.right(self);
892                if right.is_missing() {
893                    (0, 0)
894                } else {
895                    self.get_min_max_length(right)
896                }
897            }
898            Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
899        }
900    }
901
902    pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
903        match self.get_kind(node_id) {
904            Kind::End | Kind::Begin => Some(0),
905            Kind::Pred => Some(1),
906            Kind::Concat => {
907                let l = self.get_fixed_length(node_id.left(self))?;
908                let r = self.get_fixed_length(node_id.right(self))?;
909                Some(l + r)
910            }
911            Kind::Union => {
912                let l = self.get_fixed_length(node_id.left(self))?;
913                let r = self.get_fixed_length(node_id.right(self))?;
914                if l == r {
915                    Some(l)
916                } else {
917                    None
918                }
919            }
920            Kind::Inter => {
921                let l = self.get_fixed_length(node_id.left(self))?;
922                let r = self.get_fixed_length(node_id.right(self))?;
923                if l == r {
924                    Some(l)
925                } else {
926                    None
927                }
928            }
929            Kind::Lookahead => {
930                let right = node_id.right(self);
931                if right.is_missing() {
932                    Some(0)
933                } else {
934                    self.get_fixed_length(right)
935                }
936            }
937            Kind::Star | Kind::Lookbehind | Kind::Compl => None,
938        }
939    }
940
941    fn get_min_length_only(&self, node_id: NodeId) -> u32 {
942        match self.get_kind(node_id) {
943            Kind::End | Kind::Begin => 0,
944            Kind::Pred => 1,
945            Kind::Concat => {
946                self.get_min_length_only(node_id.left(self))
947                    + self.get_min_length_only(node_id.right(self))
948            }
949            Kind::Union => self
950                .get_min_length_only(node_id.left(self))
951                .min(self.get_min_length_only(node_id.right(self))),
952            Kind::Inter => self
953                .get_min_length_only(node_id.left(self))
954                .max(self.get_min_length_only(node_id.right(self))),
955            Kind::Star | Kind::Lookbehind | Kind::Lookahead => 0,
956            Kind::Compl => {
957                if self.nullability(node_id.left(self)) == Nullability::NEVER {
958                    0
959                } else {
960                    1
961                }
962            }
963        }
964    }
965
966    fn starts_with_ts(&self, node_id: NodeId) -> bool {
967        if node_id == NodeId::TS {
968            return true;
969        }
970        match self.get_kind(node_id) {
971            Kind::Inter => {
972                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
973            }
974            Kind::Union => {
975                self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
976            }
977            Kind::Concat => self.starts_with_ts(node_id.left(self)),
978            _ => false,
979        }
980    }
981
982    #[inline]
983    pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
984        if self.get_kind(node_id) == Kind::Concat {
985            self.ends_with_ts(node_id.right(self))
986        } else {
987            node_id == NodeId::TS
988        }
989    }
990
991    pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
992        debug_assert!(node_id != NodeId::MISSING);
993        self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
994    }
995
996    pub(crate) fn cache_der(
997        &mut self,
998        node_id: NodeId,
999        result: TRegexId,
1000        mask: Nullability,
1001    ) -> TRegexId {
1002        if mask == Nullability::CENTER {
1003            self.tr_der_center[node_id.0 as usize] = result
1004        } else {
1005            self.tr_der_begin[node_id.0 as usize] = result
1006        };
1007        result
1008    }
1009
1010    pub(crate) fn try_cached_der(
1011        &mut self,
1012        node_id: NodeId,
1013        mask: Nullability,
1014    ) -> Option<TRegexId> {
1015        let cache = if mask == Nullability::CENTER {
1016            &mut self.tr_der_center
1017        } else {
1018            &mut self.tr_der_begin
1019        };
1020        match cache.get(node_id.0 as usize) {
1021            Some(&TRegexId::MISSING) => {}
1022            Some(&result) => {
1023                return Some(result);
1024            }
1025            None => {
1026                cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1027            }
1028        }
1029        return None;
1030    }
1031
1032    pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1033        let mut term = self.get_tregex(der);
1034        loop {
1035            match *term {
1036                TRegex::Leaf(node_id) => return node_id,
1037                TRegex::ITE(cond, _then, _else) => {
1038                    if self.solver().is_sat_id(set, cond) {
1039                        term = self.get_tregex(_then);
1040                    } else {
1041                        term = self.get_tregex(_else);
1042                    }
1043                }
1044            }
1045        }
1046    }
1047
1048    pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1049        debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1050        debug_assert!(
1051            node_id != NodeId::MISSING,
1052            "attempting to derive missing node"
1053        );
1054        if let Some(result) = self.try_cached_der(node_id, mask) {
1055            return Ok(result);
1056        }
1057
1058        let result = match node_id.kind(self) {
1059            Kind::Compl => {
1060                let leftd = node_id.left(self).der(self, mask)?;
1061                self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1062            }
1063            Kind::Inter => {
1064                let leftd = node_id.left(self).der(self, mask)?;
1065                let rightd = node_id.right(self).der(self, mask)?;
1066                {
1067                    let this = &mut *self;
1068                    this.mk_binary(
1069                        leftd,
1070                        rightd,
1071                        &mut (|b, left, right| b.mk_inter(left, right)),
1072                    )
1073                }
1074            }
1075            Kind::Union => {
1076                let leftd = self.der(node_id.left(self), mask)?;
1077                let rightd = self.der(node_id.right(self), mask)?;
1078                {
1079                    let this = &mut *self;
1080                    this.mk_binary(
1081                        leftd,
1082                        rightd,
1083                        &mut (|b, left, right| b.mk_union(left, right)),
1084                    )
1085                }
1086            }
1087            Kind::Concat => {
1088                let head = node_id.left(self);
1089                let tail = node_id.right(self);
1090                let tail_leaf = self.mk_leaf(tail);
1091                let head_der = self.der(head, mask)?;
1092
1093                if self.is_nullable(head, mask) {
1094                    let rightd = self.der(tail, mask)?;
1095                    let ldr = {
1096                        let this = &mut *self;
1097                        this.mk_binary(
1098                            head_der,
1099                            tail_leaf,
1100                            &mut (|b, left, right| b.mk_concat(left, right)),
1101                        )
1102                    };
1103                    {
1104                        let this = &mut *self;
1105                        this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1106                    }
1107                } else {
1108                    let ldr = {
1109                        let this = &mut *self;
1110                        this.mk_binary(
1111                            head_der,
1112                            tail_leaf,
1113                            &mut (|b, left, right| b.mk_concat(left, right)),
1114                        )
1115                    };
1116                    ldr
1117                }
1118            }
1119            Kind::Star => {
1120                if node_id == NodeId::EPS {
1121                    TRegexId::BOT
1122                } else {
1123                    let left = node_id.left(self);
1124                    let r_decr_leaf = self.mk_leaf(node_id);
1125                    let r_der = self.der(left, mask)?;
1126                    let result = {
1127                        let this = &mut *self;
1128                        this.mk_binary(
1129                            r_der,
1130                            r_decr_leaf,
1131                            &mut (|b, left, right| b.mk_concat(left, right)),
1132                        )
1133                    };
1134                    result
1135                }
1136            }
1137            Kind::Lookbehind => {
1138                let lb_prev_der = {
1139                    let lb_prev = self.get_lookbehind_prev(node_id);
1140                    if lb_prev == NodeId::MISSING {
1141                        TRegexId::MISSING
1142                    } else {
1143                        self.der(lb_prev, mask)?
1144                    }
1145                };
1146                let lb_inner = self.get_lookbehind_inner(node_id);
1147                let lb_inner_der = self.der(lb_inner, mask)?;
1148                {
1149                    let this = &mut *self;
1150                    this.mk_binary_result(
1151                        lb_inner_der,
1152                        lb_prev_der,
1153                        &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1154                    )?
1155                }
1156            }
1157            Kind::Lookahead => {
1158                let la_tail = self.get_lookahead_tail(node_id);
1159                let la_body = node_id.left(self);
1160                let rel = self.get_lookahead_rel(node_id);
1161
1162                if self.is_nullable(la_body, mask) {
1163                    // nullabilty is taken once, just keep the body
1164                    let right = node_id.right(self).missing_to_eps();
1165                    let rder = self.der(right, mask).clone();
1166                    return rder;
1167                }
1168
1169                if rel == u32::MAX {
1170                    let la_body_der = self.der(la_body, mask)?;
1171                    if la_tail.is_kind(self, Kind::Pred) {
1172                        let transitioned =
1173                            self.transition_term(la_body_der, la_tail.pred_tset(self));
1174                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1175                        let concated = self.mk_concat(la_tail, new_la);
1176                        return self.der(concated, mask);
1177                    }
1178                    if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1179                        let left = la_tail.left(self);
1180                        let tset = left.pred_tset(self);
1181                        let transitioned = self.transition_term(la_body_der, tset);
1182                        let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1183                        let tail_right = la_tail.right(self);
1184                        let concated = self.mk_concat(new_la, tail_right);
1185                        let concated = self.mk_concat(left, concated);
1186                        return self.der(concated, mask);
1187                    }
1188                }
1189
1190                if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1191                    let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1192                    let concated = self.mk_concat(la_body, nulls_mask);
1193                    let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1194                    let non_nulled = self.mk_non_nullable_safe(la_tail);
1195                    let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1196                    let new_union = self.mk_union(concated_look, new_look);
1197                    return self.der(new_union, mask);
1198                }
1199
1200                let la_tail_der = if la_tail == NodeId::MISSING {
1201                    TRegexId::MISSING
1202                } else {
1203                    if self.is_nullable(la_tail, mask) {
1204                        let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1205                        let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1206                        let la_union = self.mk_union(la_tail, nulls_la);
1207                        self.der(la_union, mask)?
1208                    } else {
1209                        self.der(la_tail, mask)?
1210                    }
1211                };
1212
1213                let la_body_der = self.der(la_body, mask)?;
1214
1215                if rel != u32::MAX && rel > self.lookahead_context_max {
1216                    return Err(AlgebraError::AnchorLimit);
1217                }
1218
1219                let la = {
1220                    let this = &mut *self;
1221                    let rel = helpers::incr_rel(rel);
1222                    this.mk_binary(
1223                        la_body_der,
1224                        la_tail_der,
1225                        &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1226                    )
1227                };
1228
1229                if rel != u32::MAX
1230                    && la_tail_der != TRegexId::MISSING
1231                    && self.is_nullable(la_tail, mask)
1232                {
1233                    let look_only = {
1234                        let this = &mut *self;
1235                        let right = TRegexId::MISSING;
1236                        let rel = helpers::incr_rel(rel);
1237                        this.mk_binary(
1238                            la_body_der,
1239                            right,
1240                            &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1241                        )
1242                    };
1243                    {
1244                        let this = &mut *self;
1245                        this.mk_binary(
1246                            look_only,
1247                            la,
1248                            &mut (|b, left, right| b.mk_union(left, right)),
1249                        )
1250                    }
1251                } else {
1252                    la
1253                }
1254            }
1255            Kind::Begin | Kind::End => TRegexId::BOT,
1256            Kind::Pred => {
1257                let psi = node_id.pred_tset(&self);
1258                if psi == TSetId::EMPTY {
1259                    TRegexId::BOT
1260                } else {
1261                    self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1262                }
1263            }
1264        };
1265
1266        // println!("{} {}", node_id.0, self.pp(node_id));
1267        // println!("node: {} (total: {})", node_id.0, self.num_created);
1268
1269        self.cache_der(node_id, result, mask);
1270        Ok(result)
1271    }
1272
1273    fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1274        debug_assert!(meta_id != MetadataId::MISSING);
1275        match self.metadata.get_mut(node_id.0 as usize) {
1276            Some(v) => *v = meta_id,
1277            None => {
1278                self.metadata
1279                    .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1280                self.metadata[node_id.0 as usize] = meta_id;
1281            }
1282        }
1283    }
1284
1285    fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1286        debug_assert!(reversed_id != NodeId::MISSING);
1287        match self.reversed.get_mut(node_id.0 as usize) {
1288            Some(v) => *v = reversed_id,
1289            None => {
1290                self.reversed
1291                    .resize(node_id.0 as usize + 1, NodeId::MISSING);
1292                self.reversed[node_id.0 as usize] = reversed_id;
1293            }
1294        }
1295    }
1296
1297    fn init(&mut self, inst: NodeKey) -> NodeId {
1298        self.num_created += 1;
1299        let node_id = NodeId(self.num_created);
1300        self.index.insert(inst.clone(), node_id);
1301        match inst.kind {
1302            Kind::Pred => {
1303                let meta_id = self.mb.get_meta_id(Metadata {
1304                    flags: MetaFlags::ZERO,
1305                    nulls: NullsId::EMPTY,
1306                });
1307                self.init_metadata(node_id, meta_id);
1308            }
1309            Kind::Begin => {
1310                let meta_id = self.mb.get_meta_id(Metadata {
1311                    flags: MetaFlags::with_nullability(Nullability::BEGIN, MetaFlags::ZERO),
1312                    nulls: NullsId::BEGIN0,
1313                });
1314                self.init_metadata(node_id, meta_id);
1315            }
1316            Kind::End => {
1317                let meta_id = self.mb.get_meta_id(Metadata {
1318                    flags: MetaFlags::with_nullability(Nullability::END, MetaFlags::ZERO),
1319                    nulls: NullsId::END0,
1320                });
1321                self.init_metadata(node_id, meta_id);
1322            }
1323            Kind::Inter => {
1324                let m1 = self.get_node_meta_id(inst.left);
1325                let m2 = self.get_node_meta_id(inst.right);
1326                let meta_id = {
1327                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1328                    let mask_l = inst.left.nullability(self);
1329                    let mask_r = inst.right.nullability(self);
1330                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1331                    let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1332                    nulls = self.mb.nb.and_mask(nulls, mask_l);
1333                    nulls = self.mb.nb.and_mask(nulls, mask_r);
1334                    let new_meta = Metadata {
1335                        flags: self.mb.flags_inter(m1, m2),
1336                        nulls,
1337                    };
1338                    let new_id = self.mb.get_meta_id(new_meta);
1339
1340                    new_id
1341                };
1342                self.init_metadata(node_id, meta_id);
1343            }
1344            Kind::Union => {
1345                let m1 = self.get_node_meta_id(inst.left);
1346                let m2 = self.get_node_meta_id(inst.right);
1347                let meta_id = {
1348                    let left_nulls = self.mb.get_meta_ref(m1).nulls;
1349                    let right_nulls = self.mb.get_meta_ref(m2).nulls;
1350                    let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1351                    let new_meta = Metadata {
1352                        flags: self.mb.flags_union(m1, m2),
1353                        nulls,
1354                    };
1355                    let new_id = self.mb.get_meta_id(new_meta);
1356                    new_id
1357                };
1358                self.init_metadata(node_id, meta_id);
1359            }
1360            Kind::Concat => {
1361                let flags = self.mb.flags_concat(
1362                    self.get_node_meta_id(inst.left),
1363                    self.get_node_meta_id(inst.right),
1364                );
1365
1366                let right_nullability = inst.right.nullability(self);
1367                let left_nullability = inst.left.nullability(self);
1368                let nulls_left = self.get_nulls_id(inst.left);
1369                let nulls_right = self.get_nulls_id(inst.right);
1370                let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1371                let mask = right_nullability.and(left_nullability);
1372                nulls = self.mb.nb.and_mask(nulls, mask);
1373
1374                let new_id = self.mb.get_meta_id(Metadata {
1375                    flags,
1376                    nulls: nulls,
1377                });
1378                self.init_metadata(node_id, new_id);
1379            }
1380            Kind::Star => {
1381                let left_nulls = self.get_nulls_id(inst.left);
1382                let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1383                let meta_id = self.mb.get_meta_id(Metadata {
1384                    flags: self
1385                        .mb
1386                        .flags_star(self.get_node_meta_id(inst.left), inst.left),
1387                    nulls: nulls,
1388                });
1389                self.init_metadata(node_id, meta_id);
1390            }
1391            Kind::Compl => {
1392                let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1393                let meta_id = self.mb.get_meta_id(Metadata {
1394                    flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1395                    nulls: nulls,
1396                });
1397                self.init_metadata(node_id, meta_id);
1398            }
1399            Kind::Lookbehind => {
1400                let mut null = self.get_meta_flags(inst.left).nullability();
1401                let mut contains_flags = self.get_flags_contains(inst.left);
1402                if !inst.right.is_missing() {
1403                    null = null.and(self.get_meta_flags(inst.right).nullability());
1404                    contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1405                }
1406
1407                let left_nulls = self.get_nulls_id(inst.left);
1408                let right_nulls = self.get_nulls_id(inst.right);
1409                let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1410                let meta_id = self.mb.get_meta_id(Metadata {
1411                    flags: MetaFlags::with_nullability(
1412                        null,
1413                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1414                    ),
1415                    nulls: nulls,
1416                });
1417                self.init_metadata(node_id, meta_id);
1418            }
1419            Kind::Lookahead => {
1420                let mut nulls = self.get_nulls_id(inst.left);
1421                let left_nullability = inst.left.nullability(self);
1422                let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1423                nulls = self.mb.nb.or_id(nulls, nulls_right);
1424                nulls = self.mb.nb.add_rel(nulls, inst.extra);
1425
1426                let la_inner = inst.left;
1427                let la_tail = inst.right;
1428                let null = self
1429                    .get_meta_flags(la_inner)
1430                    .nullability()
1431                    .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1432                let contains_flags = self
1433                    .get_flags_contains(la_inner)
1434                    .or(self.get_flags_contains(la_tail));
1435
1436                let meta_id = self.mb.get_meta_id(Metadata {
1437                    flags: MetaFlags::with_nullability(
1438                        null,
1439                        contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1440                    ),
1441                    nulls: nulls,
1442                });
1443                self.init_metadata(node_id, meta_id);
1444            }
1445        }
1446
1447        self.array.push(inst);
1448
1449        if let Some(rw) = self.post_init_simplify(node_id) {
1450            self.override_as(node_id, rw)
1451        } else {
1452            node_id
1453        }
1454    }
1455
1456    fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1457        match self.get_kind(node_id) {
1458            Kind::Concat => {
1459                let lhs = node_id.left(self);
1460                let rhs = node_id.right(self);
1461                if let Some(_) = lhs.is_pred_star(self) {
1462                    // subsume the tail into the star if possible
1463                    if let Some(opttail) = rhs.is_opt_v(self) {
1464                        if let Some(true) = self.subsumes(lhs, opttail) {
1465                            // return self.override_as(node_id, lhs);
1466                            return Some(lhs);
1467                        }
1468                    }
1469                }
1470            }
1471            Kind::Union => {
1472                // if any branch of rhs structurally subsumes lhs, drop lhs
1473                let lhs = node_id.left(self);
1474                let rhs = node_id.right(self);
1475                let mut subsumed = false;
1476                rhs.iter_union_while(self, &mut |b, branch| {
1477                    if b.nullable_subsumes(branch, lhs) {
1478                        subsumed = true;
1479                    }
1480                    !subsumed
1481                });
1482                if subsumed {
1483                    return Some(rhs);
1484                }
1485                // if all branches of lhs are already branches of rhs, drop lhs
1486                if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1487                    return Some(rhs);
1488                }
1489            }
1490            _ => {}
1491        }
1492
1493        None
1494    }
1495
1496    /// checks if every branch of `lhs` (as a union tree) appears in `rhs` (as a union tree).
1497    fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1498        // only worth checking if lhs is itself a union
1499        if self.get_kind(lhs) != Kind::Union {
1500            return false; // single branch already checked by nullable_subsumes
1501        }
1502        // collect rhs branches
1503        let mut rhs_branches = Vec::new();
1504        let mut curr = rhs;
1505        while self.get_kind(curr) == Kind::Union {
1506            rhs_branches.push(self.get_left(curr));
1507            curr = self.get_right(curr);
1508        }
1509        rhs_branches.push(curr);
1510
1511        // check all lhs branches are in rhs
1512        curr = lhs;
1513        while self.get_kind(curr) == Kind::Union {
1514            if !rhs_branches.contains(&self.get_left(curr)) {
1515                return false;
1516            }
1517            curr = self.get_right(curr);
1518        }
1519        rhs_branches.contains(&curr)
1520    }
1521
1522    /// checks if `node` structurally subsumes `target` via nullable concat chains and union branches
1523    fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1524        if node == target {
1525            return true;
1526        }
1527        match self.get_kind(node) {
1528            Kind::Union => {
1529                self.nullable_subsumes(self.get_left(node), target)
1530                    || self.nullable_subsumes(self.get_right(node), target)
1531            }
1532            Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1533                self.nullable_subsumes(self.get_right(node), target)
1534            }
1535            _ => false,
1536        }
1537    }
1538
1539    pub fn num_nodes(&self) -> u32 {
1540        self.num_created
1541    }
1542    fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1543        match self.index.get(&inst) {
1544            Some(&id) => id,
1545            None => self.init(inst),
1546        }
1547    }
1548    #[inline]
1549    fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1550        self.index.get(inst)
1551    }
1552
1553    fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1554        self.index.insert(key, subsumed);
1555        subsumed
1556    }
1557
1558    pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1559        let key = &self.array[key.0 as usize];
1560        let entry = self.index.get_mut(key).unwrap();
1561        *entry = subsumed;
1562        subsumed
1563    }
1564
1565    #[inline]
1566    pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1567        self.array[node_id.0 as usize].left
1568    }
1569
1570    #[inline]
1571    pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1572        self.array[node_id.0 as usize].right
1573    }
1574
1575    #[inline]
1576    pub(crate) fn get_extra(&self, node_id: NodeId) -> u32 {
1577        self.array[node_id.0 as usize].extra
1578    }
1579
1580    #[inline]
1581    pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1582        debug_assert!(self.get_kind(node_id) == Kind::Concat);
1583        let mut curr = node_id;
1584        while self.get_kind(curr) == Kind::Concat {
1585            curr = curr.right(self);
1586        }
1587        curr
1588    }
1589
1590    /// true if node ends with a trailing Lookahead(_, MISSING)
1591    fn has_trailing_la(&self, node: NodeId) -> bool {
1592        let end = match self.get_kind(node) {
1593            Kind::Concat => self.get_concat_end(node),
1594            Kind::Lookahead => node,
1595            _ => return false,
1596        };
1597        self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1598    }
1599
1600    /// strip the trailing Lookahead from a node, returning (body, la).
1601    fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1602        if self.get_kind(node) == Kind::Lookahead {
1603            return (NodeId::EPS, node);
1604        }
1605        debug_assert!(self.get_kind(node) == Kind::Concat);
1606        let right = node.right(self);
1607        if self.get_kind(right) != Kind::Concat {
1608            // Concat(body, LA) → (body, LA)
1609            return (node.left(self), right);
1610        }
1611        let (stripped, la) = self.strip_trailing_la(right);
1612        (self.mk_concat(node.left(self), stripped), la)
1613    }
1614    #[inline]
1615    pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1616        debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1617        lookahead_node_id.left(self)
1618    }
1619    #[inline]
1620    pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1621        debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1622        lookahead_node_id.right(self)
1623    }
1624    #[inline]
1625    pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1626        debug_assert!(
1627            self.get_kind(lookahead_node_id) == Kind::Lookahead,
1628            "not lookahead: {:?}",
1629            self.pp(lookahead_node_id)
1630        );
1631        self.get_extra(lookahead_node_id)
1632    }
1633    #[inline]
1634    pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1635        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1636        lookbehind_node_id.left(self)
1637    }
1638    #[inline]
1639    pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1640        debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1641        lookbehind_node_id.right(self)
1642    }
1643
1644    #[inline]
1645    pub fn get_kind(&self, node_id: NodeId) -> Kind {
1646        debug_assert!(
1647            self.array.len() > node_id.0 as usize,
1648            "array len: {:?}",
1649            node_id
1650        );
1651        self.array[node_id.0 as usize].kind
1652    }
1653
1654    #[inline]
1655    pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1656        &self.array[node_id.0 as usize]
1657    }
1658
1659    #[inline]
1660    fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1661        self.metadata[node_id.0 as usize]
1662    }
1663
1664    #[inline]
1665    fn get_meta(&self, node_id: NodeId) -> &Metadata {
1666        debug_assert!(node_id.0 != u32::MAX);
1667        let meta_id = self.get_node_meta_id(node_id);
1668        debug_assert!(meta_id != MetadataId::MISSING);
1669        &self.mb.array[meta_id.0 as usize]
1670    }
1671
1672    #[inline]
1673    pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1674        if node_id == NodeId::MISSING {
1675            NullsId::EMPTY
1676        } else {
1677            self.get_meta(node_id).nulls
1678        }
1679    }
1680
1681    pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1682        self.mb
1683            .nb
1684            .array
1685            .iter()
1686            .map(|set| set.iter().cloned().collect())
1687            .collect()
1688    }
1689
1690    pub fn nulls_count(&self) -> usize {
1691        self.mb.nb.array.len()
1692    }
1693
1694    pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1695        self.mb.nb.array[id as usize].iter().cloned().collect()
1696    }
1697
1698    #[inline]
1699    pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1700        if node_id == NodeId::MISSING {
1701            NullsId::EMPTY
1702        } else {
1703            let nulls = self.get_meta(node_id).nulls;
1704            self.mb.nb.and_mask(nulls, mask)
1705        }
1706    }
1707
1708    #[inline]
1709    pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1710        let meta_id = self.get_node_meta_id(node_id);
1711        let meta = &self.mb.array[meta_id.0 as usize];
1712        meta.flags
1713    }
1714
1715    #[inline]
1716    pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1717        self.get_meta(node_id).flags.nullability()
1718    }
1719
1720    #[inline]
1721    pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1722        let meta_id = self.get_node_meta_id(node_id);
1723        let meta = &self.mb.array[meta_id.0 as usize];
1724        meta.flags.all_contains_flags()
1725    }
1726
1727    pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1728        if !self.contains_look(node_id) {
1729            return Ok(node_id);
1730        }
1731        if self.get_kind(node_id) == Kind::Concat
1732            && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1733        {
1734            return self.strip_lb(node_id.right(self));
1735        }
1736        match self.get_kind(node_id) {
1737            Kind::Lookbehind | Kind::Lookahead => Err(AlgebraError::UnsupportedPattern),
1738            _ => Ok(node_id),
1739        }
1740    }
1741
1742    pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1743        debug_assert!(node_id != NodeId::MISSING);
1744        if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1745            if *rev != NodeId::MISSING {
1746                return Ok(*rev);
1747            }
1748        }
1749        let rw = match self.get_kind(node_id) {
1750            Kind::End => NodeId::BEGIN,
1751            Kind::Begin => NodeId::END,
1752            Kind::Pred => node_id,
1753            Kind::Concat => {
1754                let left = self.reverse(node_id.left(self))?;
1755                let right = self.reverse(node_id.right(self))?;
1756                self.mk_concat(right, left)
1757            }
1758            Kind::Union => {
1759                let left = self.reverse(node_id.left(self))?;
1760                let right = self.reverse(node_id.right(self))?;
1761                self.mk_union(left, right)
1762            }
1763            Kind::Inter => {
1764                let left = self.reverse(node_id.left(self))?;
1765                let right = self.reverse(node_id.right(self))?;
1766                self.mk_inter(left, right)
1767            }
1768            Kind::Star => {
1769                let body = self.reverse(node_id.left(self))?;
1770                self.mk_star(body)
1771            }
1772            Kind::Compl => {
1773                let body = self.reverse(node_id.left(self))?;
1774                self.mk_compl(body)
1775            }
1776            Kind::Lookbehind => {
1777                // lookbehind → lookahead when reversed (separate fwd/rev DFAs)
1778                let prev = self.get_lookbehind_prev(node_id);
1779                let inner_id = self.get_lookbehind_inner(node_id);
1780                let rev_inner = self.reverse(inner_id)?;
1781                let rev_prev = if prev != NodeId::MISSING {
1782                    self.reverse(prev)?
1783                } else {
1784                    NodeId::MISSING
1785                };
1786                self.mk_lookahead(rev_inner, rev_prev, 0)
1787            }
1788            Kind::Lookahead => {
1789                let rel = self.get_lookahead_rel(node_id);
1790                if rel == u32::MAX {
1791                    // rel MAX holds no nullability - rewrite to intersection
1792                    let lbody = self.get_lookahead_inner(node_id);
1793                    let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
1794                    let lbody_ts = self.mk_concat(lbody, NodeId::TS);
1795                    let ltail_ts = self.mk_concat(ltail, NodeId::TS);
1796                    let as_inter = self.mk_inter(lbody_ts, ltail_ts);
1797                    let rev = self.reverse(as_inter)?;
1798                    self.init_reversed(node_id, rev);
1799                    return Ok(rev);
1800                }
1801                if rel != 0 {
1802                    return Err(AlgebraError::UnsupportedPattern);
1803                }
1804                // lookahead → lookbehind when reversed (separate fwd/rev DFAs)
1805                let tail_node = self.get_lookahead_tail(node_id);
1806                let rev_tail = if tail_node != NodeId::MISSING {
1807                    self.reverse(tail_node)?
1808                } else {
1809                    NodeId::MISSING
1810                };
1811                let inner_id = self.get_lookahead_inner(node_id);
1812                let rev_inner = self.reverse(inner_id)?;
1813                self.mk_lookbehind(rev_inner, rev_tail)
1814            }
1815        };
1816        self.init_reversed(node_id, rw);
1817        Ok(rw)
1818    }
1819
1820    pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
1821        let node = NodeKey {
1822            kind: Kind::Pred,
1823            left: NodeId::MISSING,
1824            right: NodeId::MISSING,
1825            extra: pred.0,
1826        };
1827        self.get_node_id(node)
1828    }
1829
1830    pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
1831        let key = NodeKey {
1832            kind: Kind::Compl,
1833            left: body,
1834            right: NodeId::MISSING,
1835            extra: u32::MAX,
1836        };
1837        if let Some(id) = self.key_is_created(&key) {
1838            return *id;
1839        }
1840        if body == NodeId::BOT {
1841            return NodeId::TS;
1842        }
1843        if body == NodeId::TS {
1844            return NodeId::BOT;
1845        }
1846
1847        if let Some(contains_body) = body.is_contains(&self) {
1848            if contains_body.is_pred(&self) {
1849                let pred = contains_body.pred_tset(&self);
1850                let notpred = self.mk_pred_not(pred);
1851                let node = self.mk_star(notpred);
1852                return self.init_as(key, node);
1853            } else if contains_body == NodeId::END {
1854                return self.init_as(key, NodeId::BOT);
1855            } else {
1856            }
1857        };
1858
1859        if self.get_kind(body) == Kind::Compl {
1860            return self.get_node(body).left;
1861        }
1862
1863        let node_id = self.get_node_id(key);
1864        node_id
1865    }
1866
1867    pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
1868        let nid = self.get_nulls_id(body);
1869        let nref = self.mb.nb.get_set_ref(nid).clone();
1870        let mut futures = NodeId::BOT;
1871        for n in nref.iter() {
1872            if !n.is_mask_nullable(mask) {
1873                continue;
1874            }
1875
1876            let eff = if n.rel == 0 {
1877                NodeId::EPS
1878            } else {
1879                self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
1880            };
1881            futures = self.mk_union(futures, eff)
1882        }
1883        futures
1884    }
1885
1886    // rewrites for 2 concats
1887    fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
1888        if cfg!(feature = "norewrite") {
1889            return None;
1890        }
1891
1892        // important lookaround rewrites for normal form pt 1
1893        if self.get_kind(tail) == Kind::Lookbehind {
1894            let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
1895            return match self
1896                .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
1897            {
1898                Ok(node) => Some(node),
1899                Err(_) => None,
1900            };
1901        }
1902        // important lookaround rewrites for normal form pt 2
1903        if self.get_kind(head) == Kind::Lookahead {
1904            let la_body = self.get_lookahead_inner(head);
1905            let la_tail = self.get_lookahead_tail(head);
1906            let la_rel = self.get_lookahead_rel(head);
1907            let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
1908            let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
1909                let tail_rel = self.get_lookahead_rel(new_la_tail);
1910                tail_rel + la_rel
1911            } else if new_la_tail.is_center_nullable(self) {
1912                la_rel
1913            } else {
1914                u32::MAX
1915            };
1916
1917            return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
1918        }
1919
1920        if head.is_kind(self, Kind::End) {
1921            if tail == NodeId::TS {
1922                return Some(head);
1923            }
1924        }
1925        if head == NodeId::TS {
1926            if tail == NodeId::END {
1927                return Some(head);
1928            }
1929        }
1930
1931        if head == NodeId::TS {
1932            if self.nullability(tail) == Nullability::ALWAYS {
1933                return Some(NodeId::TS);
1934            }
1935        }
1936
1937        if tail == NodeId::TS {
1938            if self.nullability(head) == Nullability::ALWAYS {
1939                return Some(NodeId::TS);
1940            }
1941        }
1942
1943        if self.get_kind(tail) == Kind::Union {
1944            // gather topstar to head
1945            if head == NodeId::TS {
1946                let mut should_distribute_top = false;
1947                self.iter_unions(tail, |v| {
1948                    if v == NodeId::BEGIN || self.starts_with_ts(v) {
1949                        should_distribute_top = true;
1950                    }
1951                });
1952                if should_distribute_top {
1953                    let mut new_union = NodeId::BOT;
1954                    let mut curr = tail;
1955                    while self.get_kind(curr) == Kind::Union {
1956                        let new_node = self.mk_concat(NodeId::TS, curr.left(self));
1957                        new_union = self.mk_union(new_node, new_union);
1958                        curr = curr.right(self);
1959                    }
1960                    let new_node = self.mk_concat(NodeId::TS, curr);
1961                    new_union = self.mk_union(new_union, new_node);
1962                    return Some(new_union);
1963                }
1964            }
1965        }
1966
1967        if self.get_kind(head) == Kind::Union {
1968            // gather topstar to head
1969            if head == NodeId::TS {
1970                let mut should_distribute_top = false;
1971                self.iter_unions(head, |v| {
1972                    if v == NodeId::END || self.starts_with_ts(v) {
1973                        should_distribute_top = true;
1974                    }
1975                });
1976                if should_distribute_top {
1977                    let mut new_union = NodeId::BOT;
1978                    let mut curr = head;
1979                    while self.get_kind(curr) == Kind::Union {
1980                        let new_node = self.mk_concat(curr.left(self), NodeId::TS);
1981                        new_union = self.mk_union(new_node, new_union);
1982                        curr = curr.right(self);
1983                    }
1984                    let new_node = self.mk_concat(curr, NodeId::TS);
1985                    new_union = self.mk_union(new_union, new_node);
1986                    return Some(new_union);
1987                }
1988            }
1989        }
1990
1991        if self.get_kind(head) == Kind::Inter {
1992            if tail == NodeId::TS {
1993                let mut alltopstar = true;
1994                iter_inter!(self, head, |v| {
1995                    alltopstar = self.ends_with_ts(v);
1996                });
1997                if alltopstar {
1998                    return Some(head);
1999                }
2000            }
2001        }
2002
2003        if head.is_star(self) && head == tail {
2004            return Some(head);
2005        }
2006
2007        None
2008    }
2009
2010    // useful rewrites for 2 unions
2011    fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2012        use Kind::*;
2013
2014        if cfg!(feature = "norewrite") {
2015            return None;
2016        }
2017        if left == right {
2018            return Some(left);
2019        }
2020
2021        if right.is_kind(self, Kind::Union) && left == right.left(self) {
2022            return Some(right);
2023        }
2024
2025        if self.get_kind(left) == Kind::Pred {
2026            if self.get_kind(right) == Kind::Pred {
2027                let l = left.pred_tset(&self);
2028                let r = right.pred_tset(&self);
2029                let solver = self.solver();
2030                let psi = solver.or_id(l, r);
2031                let rewrite = self.mk_pred(psi);
2032                return Some(rewrite);
2033            }
2034        }
2035
2036        if left == NodeId::EPS {
2037            if self.get_extra(left) == 0 {
2038                if self.nullability(right) == Nullability::ALWAYS {
2039                    return Some(right);
2040                }
2041            }
2042        }
2043
2044        if self.get_kind(left) == Kind::Lookahead {
2045            if self.get_kind(right) == Kind::Lookahead {
2046                let lb = left.left(self);
2047                let lt = left.right(self);
2048                let lrel = left.extra(self);
2049
2050                let rb = right.left(self);
2051                let rt = right.right(self);
2052                let rrel = right.extra(self);
2053
2054                if lrel == rrel {
2055                    if lb == rb {}
2056                    if lt.is_missing() {
2057                        if rt.is_missing() {
2058                            let unioned = self.mk_union(lb, rb);
2059                            let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2060                            return Some(node);
2061                        }
2062                    }
2063                }
2064            }
2065        }
2066
2067        if right.is_kind(self, Concat) {
2068            if left == NodeId::END {
2069                if right.left(self) == NodeId::END {
2070                    if self.nullability(right).has(Nullability::END) {
2071                        return Some(right);
2072                    }
2073                }
2074            }
2075            // .*|.*a.* => .*(a.*|)
2076            if left == right.left(self) {
2077                let rhs = self.mk_union(NodeId::EPS, right.right(self));
2078                let rw = self.mk_concat(left, rhs);
2079                return Some(rw);
2080            }
2081            if left.is_kind(self, Concat) {
2082                let head1 = left.left(self);
2083                let head2 = right.left(self);
2084
2085                if head1 == head2 {
2086                    let t1 = left.right(self);
2087                    let t2 = right.right(self);
2088                    // opportunistic rewrites
2089                    if head1 == NodeId::TS {
2090                        if t2.has_concat_tail(self, t1) {
2091                            return Some(left);
2092                        }
2093                        if t1.has_concat_tail(self, t2) {
2094                            return Some(right);
2095                        }
2096                    }
2097                    let un = self.mk_union(t1, t2);
2098                    return Some(self.mk_concat(left.left(self), un));
2099                }
2100
2101                // xa|ya => (x|y)a - suffix factoring via reverse
2102                // disabled: introduces reversed intermediate patterns that bloat derivative DFA
2103                if false {
2104                    let end1 = self.get_concat_end(left);
2105                    let end2 = self.get_concat_end(right);
2106                    if end1 == end2 {
2107                        let flags = left.flags_contains(self).or(right.flags_contains(self));
2108                        if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2109                            let rev1 = self.reverse(left).unwrap();
2110                            let rev2 = self.reverse(right).unwrap();
2111
2112                            let union_rev = self.mk_union(rev1, rev2);
2113                            return Some(self.reverse(union_rev).unwrap());
2114                        }
2115                    }
2116                }
2117            }
2118            if self.get_kind(left) == Kind::Pred {
2119                if left == right.left(self) {
2120                    let un = self.mk_opt(right.right(self));
2121                    let conc = self.mk_concat(left, un);
2122                    return Some(conc);
2123                }
2124            }
2125        }
2126
2127        if left == NodeId::EPS {
2128            if right.is_plus(self) {
2129                let result = self.mk_star(right.left(self));
2130                return Some(result);
2131            }
2132        }
2133
2134        // e.g. (.*&X{19}_*&C) | (.*&X{20}_*&C) => (.*&X{19}_*&C)
2135        if self.flags == BuilderFlags::SUBSUME {
2136            if let Some(rw) = self.try_subsume_inter_union(left, right) {
2137                return Some(rw);
2138            }
2139        }
2140
2141        None
2142    }
2143
2144    fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2145        let mut curr = node;
2146        while self.get_kind(curr) == Kind::Inter {
2147            out.push(self.get_left(curr));
2148            curr = self.get_right(curr);
2149        }
2150        out.push(curr);
2151    }
2152
2153    /// check if node is [_*] Pred(X)^count . Star(Y)
2154    /// returns (has_prefix_star, pred_tset, star_node, count)
2155    fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2156        let mut curr = node;
2157        let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2158        if has_prefix {
2159            curr = self.get_right(curr);
2160        }
2161        let mut count = 0u32;
2162        let mut pred_set = None;
2163        while self.get_kind(curr) == Kind::Concat {
2164            let head = self.get_left(curr);
2165            if self.get_kind(head) != Kind::Pred {
2166                return None;
2167            }
2168            let ps = head.pred_tset(self);
2169            match pred_set {
2170                None => pred_set = Some(ps),
2171                Some(existing) => {
2172                    if existing != ps {
2173                        return None;
2174                    }
2175                }
2176            }
2177            curr = self.get_right(curr);
2178            count += 1;
2179        }
2180        if count == 0 || self.get_kind(curr) != Kind::Star {
2181            return None;
2182        }
2183        Some((has_prefix, pred_set.unwrap(), curr, count))
2184    }
2185
2186    fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2187        let mut j = 0;
2188        for &s in sub {
2189            while j < sup.len() && sup[j] < s {
2190                j += 1;
2191            }
2192            if j >= sup.len() || sup[j] != s {
2193                return false;
2194            }
2195            j += 1;
2196        }
2197        true
2198    }
2199
2200    fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2201        if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2202            return None;
2203        }
2204
2205        let mut lc: Vec<NodeId> = Vec::new();
2206        let mut rc: Vec<NodeId> = Vec::new();
2207        self.collect_inter_components(left, &mut lc);
2208        self.collect_inter_components(right, &mut rc);
2209
2210        // component subset check: fewer constraints = larger language
2211        // if lc ⊆ rc then L(left) ⊇ L(right), keep left
2212        if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2213            return Some(left);
2214        }
2215        // if rc ⊆ lc then L(right) ⊇ L(left), keep right
2216        if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2217            return Some(right);
2218        }
2219
2220        if lc.len() == rc.len() {
2221            let mut diff_idx = None;
2222            for i in 0..lc.len() {
2223                if lc[i] != rc[i] {
2224                    if diff_idx.is_some() {
2225                        return None;
2226                    }
2227                    diff_idx = Some(i);
2228                }
2229            }
2230            if let Some(idx) = diff_idx {
2231                let a = lc[idx];
2232                let b = rc[idx];
2233                if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2234                    (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2235                {
2236                    if pfa == pfb && pa == pb && sa == sb && ca != cb {
2237                        return if ca < cb { Some(left) } else { Some(right) };
2238                    }
2239                }
2240            }
2241        }
2242
2243        None
2244    }
2245
2246    fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2247        if cfg!(feature = "norewrite") {
2248            return None;
2249        }
2250        if left == right {
2251            return Some(left);
2252        }
2253
2254        if self.get_kind(right) == Kind::Union {
2255            let mut result = NodeId::BOT;
2256            self.iter_unions_b(
2257                right,
2258                &mut (|b, v| {
2259                    let new_inter = b.mk_inter(v, left);
2260                    result = b.mk_union(result, new_inter);
2261                }),
2262            );
2263            return Some(result);
2264        }
2265        if self.get_kind(left) == Kind::Union {
2266            let mut result = NodeId::BOT;
2267            self.iter_unions_b(
2268                left,
2269                &mut (|b, v| {
2270                    let new_inter = b.mk_inter(v, right);
2271                    result = b.mk_union(result, new_inter);
2272                }),
2273            );
2274            return Some(result);
2275        }
2276
2277        if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2278            return Some(NodeId::BOT);
2279        }
2280
2281        if left.kind(&self) == Kind::Compl {
2282            if right.kind(&self) == Kind::Compl {
2283                let bodies = self.mk_union(left.left(self), right.left(self));
2284                return Some(self.mk_compl(bodies));
2285            }
2286        }
2287
2288        if left == NodeId::TOPPLUS {
2289            if let Some(_) = right.is_pred_star(self) {
2290                let newloop = self.mk_plus(right.left(self));
2291                return Some(newloop);
2292            }
2293            if right.is_never_nullable(&self) {
2294                return Some(right);
2295            }
2296            if right.is_kind(self, Kind::Lookahead) {
2297                if self.get_lookahead_tail(right).is_missing() {
2298                    return Some(NodeId::BOT);
2299                }
2300            }
2301            if right.is_kind(self, Kind::Concat) {}
2302        }
2303
2304        if right.is_lookahead(&self) {
2305            if left.is_never_nullable(&self) {
2306                return Some(NodeId::BOT);
2307            }
2308        }
2309
2310        if self.get_kind(right) == Kind::Compl {
2311            let compl_body = right.left(self);
2312            if left == compl_body {
2313                return Some(NodeId::BOT);
2314            }
2315            if self.get_kind(compl_body) == Kind::Concat {
2316                let compl_head = compl_body.left(self);
2317                // not starts with
2318                if compl_body.right(self) == NodeId::TS {
2319                    if compl_head == left {
2320                        return Some(NodeId::BOT);
2321                    }
2322                }
2323            }
2324        }
2325
2326        if let Some(pleft) = left.is_pred_star(&self) {
2327            if let Some(pright) = right.is_pred_star(&self) {
2328                let merged = self.mk_inter(pleft, pright);
2329                return Some(self.mk_star(merged));
2330            }
2331        }
2332
2333        // factor lookbehinds out of intersections
2334        // Concat(LB1, body1) & Concat(LB2, body2) → Concat(LB(inner1&inner2), body1&body2)
2335        // Concat(LB, body1) & body2 → Concat(LB, body1&body2)
2336        {
2337            let l_is_clb = self.get_kind(left) == Kind::Concat
2338                && self.get_kind(left.left(self)) == Kind::Lookbehind;
2339            let r_is_clb = self.get_kind(right) == Kind::Concat
2340                && self.get_kind(right.left(self)) == Kind::Lookbehind;
2341            if l_is_clb || r_is_clb {
2342                let (lb, body) = if l_is_clb && r_is_clb {
2343                    let lb1 = left.left(self);
2344                    let lb2 = right.left(self);
2345                    let inner = self.mk_inter(
2346                        self.get_lookbehind_inner(lb1),
2347                        self.get_lookbehind_inner(lb2),
2348                    );
2349                    // lb_prev is MISSING - cannot trigger UnsupportedPattern
2350                    let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2351                    let body = self.mk_inter(left.right(self), right.right(self));
2352                    (lb, body)
2353                } else if l_is_clb {
2354                    let lb = left.left(self);
2355                    let body = self.mk_inter(left.right(self), right);
2356                    (lb, body)
2357                } else {
2358                    let lb = right.left(self);
2359                    let body = self.mk_inter(left, right.right(self));
2360                    (lb, body)
2361                };
2362                return Some(self.mk_concat(lb, body));
2363            }
2364        }
2365
2366        // factor trailing lookaheads out of intersections
2367        // body1·LA & body2 → (body1&body2)·LA
2368        // body1·LA1 & body2·LA2 → (body1&body2)·LA(inner1&inner2)
2369        {
2370            let l_has_la = self.has_trailing_la(left);
2371            let r_has_la = self.has_trailing_la(right);
2372            if l_has_la || r_has_la {
2373                let (body, la) = if l_has_la && r_has_la {
2374                    let (lbody, l_la) = self.strip_trailing_la(left);
2375                    let (rbody, r_la) = self.strip_trailing_la(right);
2376                    let inner = self.mk_inter(
2377                        self.get_lookahead_inner(l_la),
2378                        self.get_lookahead_inner(r_la),
2379                    );
2380                    let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2381                    let body = self.mk_inter(lbody, rbody);
2382                    (body, la)
2383                } else if l_has_la {
2384                    let (lbody, la) = self.strip_trailing_la(left);
2385                    let body = self.mk_inter(lbody, right);
2386                    (body, la)
2387                } else {
2388                    let (rbody, la) = self.strip_trailing_la(right);
2389                    let body = self.mk_inter(left, rbody);
2390                    (body, la)
2391                };
2392                return Some(self.mk_concat(body, la));
2393            }
2394        }
2395
2396        return None;
2397    }
2398
2399    fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2400        if cfg!(feature = "norewrite") {
2401            return None;
2402        }
2403        debug_assert!(self.get_kind(right_union) == Kind::Union);
2404
2405        let mut rewritten = None;
2406        right_union.iter_union_while(
2407            self,
2408            &mut (|b, v| {
2409                if let Some(rw) = b.attempt_rw_union_2(left, v) {
2410                    rewritten = Some((v, rw));
2411                    false
2412                } else {
2413                    true
2414                }
2415            }),
2416        );
2417
2418        if let Some(rw) = rewritten {
2419            let mut new_union = NodeId::BOT;
2420            right_union.iter_union(
2421                self,
2422                &mut (|b, v| {
2423                    if v == rw.0 {
2424                        new_union = b.mk_union(rw.1, new_union)
2425                    } else {
2426                        new_union = b.mk_union(v, new_union)
2427                    }
2428                }),
2429            );
2430            return Some(new_union);
2431        };
2432
2433        return None;
2434    }
2435
2436    pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2437        debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2438        debug_assert!(tail != NodeId::MISSING);
2439        let key = NodeKey {
2440            kind: Kind::Concat,
2441            left: head,
2442            right: tail,
2443            extra: u32::MAX,
2444        };
2445        if let Some(id) = self.key_is_created(&key) {
2446            return *id;
2447        }
2448
2449        if head == NodeId::BOT || tail == NodeId::BOT {
2450            return NodeId::BOT;
2451        }
2452        if head == NodeId::EPS {
2453            return tail;
2454        }
2455        if tail == NodeId::EPS {
2456            return head;
2457        }
2458
2459        match tail {
2460            // this is only valid if direction known;
2461            NodeId::BEGIN => {
2462                if !self.is_nullable(head, Nullability::BEGIN) {
2463                    return NodeId::BOT;
2464                } else {
2465                    return NodeId::BEGIN;
2466                }
2467            }
2468            _ => {}
2469        }
2470
2471        // normalize concats to right
2472        if head.is_kind(self, Kind::Concat) {
2473            let left = head.left(self);
2474            let newright = self.mk_concat(head.right(self), tail);
2475            let updated = self.mk_concat(left, newright);
2476            return self.init_as(key, updated);
2477        }
2478
2479        if head == NodeId::TS && tail == NodeId::END {
2480            return NodeId::TS;
2481        }
2482
2483        if self.get_kind(head) == Kind::End {
2484            if !self.is_nullable(tail, Nullability::END) {
2485                return NodeId::BOT;
2486            }
2487        }
2488
2489        if self.get_kind(tail) == Kind::Concat {
2490            if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2491                let upd = self.mk_concat(rw, tail.right(self));
2492                return self.init_as(key, upd);
2493            }
2494        }
2495
2496        match self.attempt_rw_concat_2(head, tail) {
2497            Some(new) => return self.init_as(key, new),
2498            None => {}
2499        }
2500
2501        match (self.get_kind(head), self.get_kind(tail)) {
2502            // merge stars
2503            (Kind::Star, Kind::Concat) if head.is_star(&self) => {
2504                let rl = tail.left(self);
2505                if head == rl {
2506                    return self.init_as(key, tail);
2507                }
2508            }
2509            // attempt longer concat rw
2510            (_, Kind::Concat) => {
2511                let curr = head;
2512                let h2 = self.mk_concat(curr, tail.left(self));
2513                let tr = tail.right(self);
2514                match self.attempt_rw_concat_2(h2, tr) {
2515                    Some(new) => {
2516                        return self.init_as(key, new);
2517                    }
2518                    None => {}
2519                }
2520            }
2521            _ if head == NodeId::TS => {
2522                if self.starts_with_ts(tail) {
2523                    return self.init_as(key, tail);
2524                }
2525            }
2526            _ => {}
2527        }
2528
2529        self.init(key)
2530    }
2531
2532    pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2533        // LNF: lookbehind must start with ts
2534        let lb_body = {
2535            match self.starts_with_ts(lb_body) {
2536                true => lb_body,
2537                false => self.mk_concat(NodeId::TS, lb_body),
2538            }
2539        };
2540        // lb_body starts with TS after normalization above, so EPS case cannot trigger
2541        self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2542    }
2543
2544    fn mk_lookbehind_internal(
2545        &mut self,
2546        lb_body: NodeId,
2547        lb_prev: NodeId,
2548    ) -> Result<NodeId, AlgebraError> {
2549        debug_assert!(lb_body != NodeId::MISSING);
2550        debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2551        if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2552            return Ok(NodeId::BOT);
2553        }
2554        if lb_body == NodeId::TS {
2555            return Ok(lb_prev);
2556        }
2557        if lb_body == NodeId::EPS {
2558            match lb_prev {
2559                NodeId::MISSING => return Ok(NodeId::EPS),
2560                NodeId::EPS => return Ok(NodeId::EPS),
2561                _ => return Err(AlgebraError::UnsupportedPattern),
2562            }
2563        }
2564
2565        let key = NodeKey {
2566            kind: Kind::Lookbehind,
2567            left: lb_body,
2568            right: lb_prev,
2569            extra: u32::MAX,
2570        };
2571        match self.key_is_created(&key) {
2572            Some(id) => return Ok(*id),
2573            None => {
2574                if lb_prev == NodeId::TS {
2575                    return Ok(self.mk_concat(lb_prev, lb_body));
2576                }
2577
2578                Ok(self.init(key))
2579            }
2580        }
2581    }
2582
2583    pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2584        // LNF: lookahead must end with ts
2585        let la_body = {
2586            match self.ends_with_ts(la_body) {
2587                true => la_body,
2588                false => self.mk_concat(la_body, NodeId::TS),
2589            }
2590        };
2591        let rel = if NodeId::MISSING == la_tail {
2592            rel
2593        } else {
2594            match la_tail.is_center_nullable(self) {
2595                false => u32::MAX,
2596                true => rel,
2597            }
2598        };
2599
2600        self.mk_lookahead_internal(la_body, la_tail, rel)
2601    }
2602
2603    // rel max = carries no nullability, can potentially rw to intersection
2604    pub(crate) fn mk_lookahead_internal(
2605        &mut self,
2606        la_body: NodeId,
2607        la_tail: NodeId,
2608        rel: u32,
2609    ) -> NodeId {
2610        let key = NodeKey {
2611            kind: Kind::Lookahead,
2612            left: la_body,
2613            right: la_tail,
2614            extra: rel,
2615        };
2616        if let Some(id) = self.key_is_created(&key) {
2617            return *id;
2618        }
2619        if la_body == NodeId::TS {
2620            if rel == 0 {
2621                return la_tail.missing_to_eps();
2622            } else {
2623                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2624            }
2625        }
2626        if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2627            return NodeId::BOT;
2628        }
2629        if la_tail.is_missing() && rel == u32::MAX {
2630            return NodeId::BOT;
2631        }
2632
2633        if la_body == NodeId::EPS && la_tail.is_missing() {
2634            if rel == 0 {
2635                return la_body;
2636            }
2637        }
2638
2639        if la_tail == NodeId::TS {
2640            if rel == 0 || rel == u32::MAX {
2641                return self.mk_concat(la_body, NodeId::TS);
2642            } else if rel == u32::MAX {
2643                return self.mk_begins_with(la_body);
2644            }
2645        }
2646
2647        if rel == u32::MAX {
2648            if la_tail.is_missing() {
2649                return NodeId::BOT;
2650            }
2651
2652            if self.is_always_nullable(la_body) {
2653                return la_tail.missing_to_eps();
2654            }
2655
2656            if la_tail != NodeId::MISSING {
2657                match self.get_kind(la_tail) {
2658                    _ => {
2659                        if la_body.is_compl_plus_end(&self) {
2660                            let minlen = self.get_min_length_only(la_tail);
2661                            if minlen >= 1 {
2662                                return NodeId::BOT;
2663                            }
2664                        }
2665                    }
2666                }
2667            }
2668        }
2669
2670        // flatten consecutive lookaheads: (?=A, tail=(?=B, tail=C)) → (?=A._* & B._*, tail=C)
2671        if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2672            let la_body2 = self.get_lookahead_inner(la_tail);
2673            let body1_ts = self.mk_concat(la_body, NodeId::TS);
2674            let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2675            let new_la_body = self.mk_inter(body1_ts, body2_ts);
2676            let new_la_rel = self.get_lookahead_rel(la_tail);
2677            let new_la_tail = self.get_lookahead_tail(la_tail);
2678            return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2679        }
2680
2681        // _*\z -> too expensive to keep; null it
2682        if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2683            let la_body_right = la_body.right(self);
2684            if self.is_always_nullable(la_body_right) {
2685                return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2686            }
2687            if la_body.right(self) == NodeId::END {
2688                return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2689            }
2690            let bodyright = la_body.right(self);
2691            if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2692                let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2693                return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2694            }
2695        }
2696
2697        if la_tail != NodeId::MISSING {
2698            match (self.get_kind(la_body), self.get_kind(la_tail)) {
2699                (Kind::Concat, Kind::Pred) => {
2700                    let lpred = la_body.left(self);
2701                    if self.get_kind(lpred) == Kind::Pred {
2702                        let l = lpred.pred_tset(&self);
2703                        let r = la_tail.pred_tset(&self);
2704                        let psi_and = self.solver().and_id(l, r);
2705                        let rewrite = self.mk_pred(psi_and);
2706                        let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2707                        let new_right = self.mk_lookahead_internal(
2708                            la_body.right(self),
2709                            NodeId::MISSING,
2710                            new_rel,
2711                        );
2712                        return self.mk_concat(rewrite, new_right);
2713                    }
2714                }
2715                _ => {}
2716            }
2717        }
2718
2719        self.get_node_id(key)
2720    }
2721
2722    pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2723        match self.get_node(body).kind {
2724            Kind::Pred => {
2725                let psi = body.pred_tset(&self);
2726                let negated = self.mk_pred_not(psi);
2727                let union = self.mk_union(NodeId::END, negated);
2728                let la = self.mk_lookahead(union, NodeId::MISSING, rel);
2729                la
2730            }
2731            _ => {
2732                let neg_inner = self.mk_concat(body, NodeId::TS);
2733                let neg_part = self.mk_compl(neg_inner);
2734                let conc = self.mk_concat(neg_part, NodeId::END);
2735                let look = self.mk_lookahead(conc, NodeId::MISSING, rel);
2736                look
2737            }
2738        }
2739    }
2740
2741    pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2742        match self.get_node(body).kind {
2743            Kind::Pred => {
2744                let psi = body.pred_tset(&self);
2745                let negated = self.mk_pred_not(psi);
2746                let union = self.mk_union(NodeId::BEGIN, negated);
2747                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2748                self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2749            }
2750            _ => {
2751                let neg_inner = self.mk_concat(NodeId::TS, body);
2752                let neg_part = self.mk_compl(neg_inner);
2753                let conc = self.mk_concat(NodeId::BEGIN, neg_part);
2754                // lb_prev is MISSING - cannot trigger UnsupportedPattern
2755                self.mk_lookbehind_internal(conc, NodeId::MISSING).unwrap()
2756            }
2757        }
2758    }
2759
2760    pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2761        debug_assert!(left != NodeId::MISSING);
2762        debug_assert!(right != NodeId::MISSING);
2763        if left > right {
2764            return self.mk_union(right, left);
2765        }
2766        let key = NodeKey {
2767            kind: Kind::Union,
2768            left,
2769            right,
2770            extra: u32::MAX,
2771        };
2772        if let Some(id) = self.key_is_created(&key) {
2773            return *id;
2774        }
2775
2776        if left == right {
2777            return left;
2778        }
2779        if left == NodeId::BOT {
2780            return right;
2781        }
2782        if right == NodeId::BOT {
2783            return left;
2784        }
2785        if right == NodeId::TS {
2786            return right;
2787        }
2788        if left == NodeId::TS {
2789            return left;
2790        }
2791
2792        match (self.get_kind(left), self.get_kind(right)) {
2793            // always move union to the right
2794            (Kind::Union, _) => {
2795                self.iter_unions_b(left, &mut |b, v| {
2796                    b.temp_vec.push(v);
2797                });
2798                self.iter_unions_b(right, &mut |b, v| {
2799                    b.temp_vec.push(v);
2800                });
2801                self.temp_vec.sort();
2802                let tree = self.temp_vec.clone();
2803                self.temp_vec.clear();
2804                let newnode = tree
2805                    .iter()
2806                    .rev()
2807                    .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2808                return self.init_as(key, newnode);
2809            }
2810            (_, Kind::Union) => {
2811                let rleft = right.left(&self);
2812                // if left_node id is smaller than rleft, just create a new union
2813                if left > rleft {
2814                    self.iter_unions_b(left, &mut |b, v| {
2815                        b.temp_vec.push(v);
2816                    });
2817                    self.iter_unions_b(right, &mut |b, v| {
2818                        b.temp_vec.push(v);
2819                    });
2820                    self.temp_vec.sort();
2821                    let tree = self.temp_vec.clone();
2822                    self.temp_vec.clear();
2823                    let newnode = tree
2824                        .iter()
2825                        .rev()
2826                        .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2827                    return self.init_as(key, newnode);
2828                } else {
2829                    if let Some(rw) = self.attempt_rw_unions(left, right) {
2830                        return self.init_as(key, rw);
2831                    }
2832                }
2833            }
2834            _ => {}
2835        }
2836
2837        if let Some(rw) = self.attempt_rw_union_2(left, right) {
2838            return self.init_as(key, rw);
2839        }
2840        self.init(key)
2841    }
2842
2843    pub(crate) fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
2844        debug_assert!(left_id != NodeId::MISSING);
2845        debug_assert!(right_id != NodeId::MISSING);
2846        if left_id == right_id {
2847            return left_id;
2848        }
2849        if left_id == NodeId::BOT || right_id == NodeId::BOT {
2850            return NodeId::BOT;
2851        }
2852        if left_id == NodeId::TS {
2853            return right_id;
2854        }
2855        if right_id == NodeId::TS {
2856            return left_id;
2857        }
2858        if left_id > right_id {
2859            return self.mk_inter(right_id, left_id);
2860        }
2861        let key = NodeKey {
2862            kind: Kind::Inter,
2863            left: left_id,
2864            right: right_id,
2865            extra: u32::MAX,
2866        };
2867        if let Some(id) = self.key_is_created(&key) {
2868            return *id;
2869        }
2870
2871        if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
2872            return self.init_as(key, rw);
2873        }
2874
2875        self.init(key)
2876    }
2877
2878    fn mk_unset(&mut self, kind: Kind) -> NodeId {
2879        let node = NodeKey {
2880            kind,
2881            left: NodeId::MISSING,
2882            right: NodeId::MISSING,
2883            extra: u32::MAX,
2884        };
2885        self.init(node)
2886    }
2887
2888    pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
2889        let star = self.mk_star(body_id);
2890        self.mk_concat(body_id, star)
2891    }
2892    pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
2893        let opt = self.mk_opt(body_id);
2894        let mut nodes1 = vec![];
2895        for _ in lower..upper {
2896            nodes1.push(opt);
2897        }
2898        for _ in 0..lower {
2899            nodes1.push(body_id);
2900        }
2901        self.mk_concats(nodes1.into_iter())
2902    }
2903    pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
2904        self.mk_union(NodeId::EPS, body_id)
2905    }
2906
2907    pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
2908        let key = NodeKey {
2909            kind: Kind::Star,
2910            left: body_id,
2911            right: NodeId::MISSING,
2912            extra: 0,
2913        };
2914        if let Some(id) = self.key_is_created(&key) {
2915            return *id;
2916        }
2917        // _*{500} is still _*
2918        if body_id.is_kind(self, Kind::Star) {
2919            return body_id;
2920        }
2921        self.get_node_id(key)
2922    }
2923
2924    /// it's cheaper to check this once as an edge-case
2925    /// than to compute a 4th nullability bit for every node
2926    pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
2927        match self.get_kind(node_id) {
2928            Kind::End => Nullability::EMPTYSTRING,
2929            Kind::Begin => Nullability::EMPTYSTRING,
2930            Kind::Pred => Nullability::NEVER,
2931            Kind::Star => Nullability::ALWAYS,
2932            Kind::Inter | Kind::Concat => {
2933                let lnull = self.nullability_emptystring(node_id.left(self));
2934                let rnull = self.nullability_emptystring(node_id.right(self));
2935                lnull.and(rnull) // left = 010, right = 001, left & right = 000
2936            }
2937            Kind::Union => {
2938                let lnull = self.nullability_emptystring(node_id.left(self));
2939                let rnull = self.nullability_emptystring(node_id.right(self));
2940                lnull.or(rnull)
2941            }
2942            Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
2943            Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
2944            Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
2945        }
2946    }
2947
2948    #[inline(always)]
2949    pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
2950        self.get_meta(node_id)
2951            .flags
2952            .nullability()
2953            .has(Nullability::CENTER.or(Nullability::END))
2954    }
2955
2956    pub fn nullability(&self, node_id: NodeId) -> Nullability {
2957        self.get_only_nullability(node_id)
2958    }
2959
2960    pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
2961        self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
2962    }
2963
2964    pub fn pp(&self, node_id: NodeId) -> String {
2965        let mut s = String::new();
2966        self.ppw(&mut s, node_id).unwrap();
2967        s
2968    }
2969
2970    #[allow(dead_code)]
2971    pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
2972        let nu = self.get_nulls_id(node_id);
2973        let nr = self.mb.nb.get_set_ref(nu);
2974        let s1 = format!("{:?}", nr);
2975        s1
2976    }
2977
2978    #[allow(dead_code)]
2979    pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
2980        match self.get_tregex(term_id) {
2981            TRegex::Leaf(node_id) => {
2982                let mut s = String::new();
2983                self.ppw(&mut s, *node_id).unwrap();
2984                s
2985            }
2986            TRegex::ITE(cond, then_id, else_id) => {
2987                format!(
2988                    "ITE({},{},{})",
2989                    self.solver_ref().pp(*cond),
2990                    self.ppt(*then_id),
2991                    self.ppt(*else_id)
2992                )
2993            }
2994        }
2995    }
2996
2997    fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
2998        if cfg!(feature = "graphviz") {
2999            match node_id {
3000                NodeId::MISSING => return write!(s, "MISSING"),
3001                NodeId::BOT => return write!(s, "⊥"),
3002                NodeId::TS => return write!(s, "_*"),
3003                NodeId::TOP => return write!(s, "_"),
3004                NodeId::EPS => return write!(s, "ε"),
3005                _ => {}
3006            }
3007        }
3008
3009        match node_id {
3010            NodeId::MISSING => return write!(s, "MISSING"),
3011            NodeId::BOT => return write!(s, "⊥"),
3012            NodeId::TS => return write!(s, "_*"),
3013            NodeId::TOP => return write!(s, "_"),
3014            NodeId::EPS => return write!(s, ""),
3015            _ => {}
3016        }
3017
3018        match self.get_kind(node_id) {
3019            Kind::End => return write!(s, r"\z"),
3020            Kind::Begin => return write!(s, r"\A"),
3021            Kind::Pred => {
3022                let psi = node_id.pred_tset(self);
3023                if psi == TSetId::EMPTY {
3024                    return write!(s, r"⊥");
3025                } else if psi == TSetId::FULL {
3026                    return write!(s, r"_");
3027                } else {
3028                    return write!(s, "{}", self.solver_ref().pp(psi));
3029                }
3030            }
3031            Kind::Inter => {
3032                write!(s, "(")?;
3033                self.ppw(s, node_id.left(self))?;
3034                write!(s, "&")?;
3035                let mut curr = node_id.right(self);
3036                while self.get_kind(curr) == Kind::Inter {
3037                    let n = curr.left(self);
3038                    self.ppw(s, n)?;
3039                    write!(s, "&")?;
3040                    curr = curr.right(self);
3041                }
3042                self.ppw(s, curr)?;
3043                write!(s, ")")
3044            }
3045            Kind::Union => {
3046                let left = node_id.left(self);
3047                let right = node_id.right(self);
3048                write!(s, "(")?;
3049                self.ppw(s, left)?;
3050                write!(s, "|")?;
3051                let mut curr = right;
3052                while self.get_kind(curr) == Kind::Union {
3053                    let n = curr.left(self);
3054                    self.ppw(s, n)?;
3055                    write!(s, "|")?;
3056                    curr = curr.right(self);
3057                }
3058                self.ppw(s, curr)?;
3059                write!(s, ")")
3060            }
3061            Kind::Concat => {
3062                let left = node_id.left(self);
3063                let right = node_id.right(self);
3064                if right.is_star(self) && right.left(self) == left {
3065                    self.ppw(s, left)?;
3066                    write!(s, "+")?;
3067                    return Ok(());
3068                }
3069                if right.is_concat(self) {
3070                    let rl = right.left(self);
3071                    if rl.is_star(self) && rl.left(self) == left {
3072                        self.ppw(s, left)?;
3073                        write!(s, "+")?;
3074                        return self.ppw(s, right.right(self));
3075                    }
3076                }
3077                if right.is_concat(self) && right.left(self) == left {
3078                    let mut num = 1;
3079                    let mut right = right;
3080                    while right.is_concat(self) && right.left(self) == left {
3081                        num = num + 1;
3082                        right = right.right(self);
3083                    }
3084                    // (|X){n} followed by X{m} -> X{m,m+n}
3085                    if let Some(inner) = left.is_opt_v(self) {
3086                        let mut inner_count = 0;
3087                        let mut right2 = right;
3088                        while right2.is_concat(self) && right2.left(self) == inner {
3089                            inner_count += 1;
3090                            right2 = right2.right(self);
3091                        }
3092                        if right2 == inner {
3093                            inner_count += 1;
3094                            self.ppw(s, inner)?;
3095                            return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3096                        }
3097                        if inner_count > 0 {
3098                            self.ppw(s, inner)?;
3099                            write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3100                            return self.ppw(s, right2);
3101                        }
3102                    }
3103                    self.ppw(s, left)?;
3104                    if right == left {
3105                        num = num + 1;
3106                        return write!(s, "{{{}}}", num);
3107                    }
3108                    if num <= 3 && left.is_pred(self) {
3109                        for _ in 1..num {
3110                            self.ppw(s, left)?;
3111                        }
3112                        return self.ppw(s, right);
3113                    } else {
3114                        write!(s, "{{{}}}", num)?;
3115                        return self.ppw(s, right);
3116                    }
3117                }
3118                self.ppw(s, left)?;
3119                self.ppw(s, right)
3120            }
3121            Kind::Star => {
3122                let left = node_id.left(self);
3123                let leftkind = self.get_kind(left);
3124                match leftkind {
3125                    Kind::Concat | Kind::Star | Kind::Compl => {
3126                        write!(s, "(")?;
3127                        self.ppw(s, left)?;
3128                        write!(s, ")")?;
3129                    }
3130                    _ => {
3131                        self.ppw(s, left)?;
3132                    }
3133                };
3134                write!(s, "*")
3135            }
3136            Kind::Compl => {
3137                write!(s, "~(")?;
3138                self.ppw(s, node_id.left(self))?;
3139                write!(s, ")")
3140            }
3141            Kind::Lookbehind => {
3142                let lbleft = self.get_lookbehind_prev(node_id);
3143                let lbinner = self.get_lookbehind_inner(node_id);
3144                debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3145                if lbleft != NodeId::MISSING {
3146                    write!(s, "「")?;
3147                    self.ppw(s, lbleft)?;
3148                    write!(s, "」")?;
3149                }
3150
3151                write!(s, "(?<=")?;
3152                self.ppw(s, lbinner)?;
3153                write!(s, ")")
3154            }
3155            Kind::Lookahead => {
3156                let inner = self.get_lookahead_inner(node_id);
3157                write!(s, "(?=")?;
3158                self.ppw(s, inner)?;
3159                write!(s, ")")?;
3160                if self.get_lookahead_rel(node_id) != 0 {
3161                    write!(s, "{{")?;
3162                    let rel = self.get_lookahead_rel(node_id);
3163                    if rel == u32::MAX {
3164                        write!(s, "∅")?;
3165                    } else {
3166                        write!(s, "{}", rel)?;
3167                    }
3168                    write!(s, "}}")?;
3169                }
3170                if node_id.right(self) == NodeId::MISSING {
3171                    Ok(())
3172                } else {
3173                    write!(s, "『")?;
3174                    self.ppw(s, node_id.right(self))?;
3175                    write!(s, "』")
3176                }
3177            }
3178        }
3179    }
3180
3181    pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3182        self.mk_concat(node, NodeId::TS)
3183    }
3184
3185    #[allow(dead_code)]
3186    pub(crate) fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3187        let node_ts = self.mk_concat(node, NodeId::TS);
3188        self.mk_compl(node_ts)
3189    }
3190
3191    pub(crate) fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3192        let notset = self.solver().not_id(set);
3193        self.mk_pred(notset)
3194    }
3195
3196    pub fn mk_u8(&mut self, char: u8) -> NodeId {
3197        let set_id = self.solver().u8_to_set_id(char as u8);
3198        self.mk_pred(set_id)
3199    }
3200
3201    pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3202        let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3203        self.mk_pred(rangeset)
3204    }
3205
3206    pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3207        let mut prefix = Vec::new();
3208        let mut curr = node;
3209        loop {
3210            if curr == NodeId::EPS {
3211                let full = !prefix.is_empty();
3212                return (prefix, full);
3213            }
3214            if curr == NodeId::BOT {
3215                break;
3216            }
3217            if self.get_kind(curr) == Kind::Pred {
3218                match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3219                    Some(byte) => {
3220                        prefix.push(byte);
3221                        return (prefix, true);
3222                    }
3223                    None => break, // multi-byte pred: pattern not fully consumed
3224                }
3225            }
3226            if self.get_kind(curr) != Kind::Concat {
3227                break;
3228            }
3229            let left = curr.left(self);
3230            if self.get_kind(left) != Kind::Pred {
3231                break;
3232            }
3233            match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3234                Some(byte) => prefix.push(byte),
3235                None => break,
3236            }
3237            curr = curr.right(self);
3238        }
3239        (prefix, false)
3240    }
3241
3242    #[allow(dead_code)]
3243    pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3244        let mut result = NodeId::EPS;
3245        for byte in raw_str.into_iter().rev() {
3246            let node = self.mk_u8(*byte);
3247            result = self.mk_concat(node, result);
3248        }
3249        result
3250    }
3251
3252    pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3253        let mut result = NodeId::EPS;
3254        for byte in raw_str.bytes().rev() {
3255            let node = self.mk_u8(byte);
3256            result = self.mk_concat(node, result);
3257        }
3258        result
3259    }
3260
3261    pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3262        let mut sorted: Vec<NodeId> = nodes.collect();
3263        if sorted.len() <= 1 {
3264            return sorted.pop().unwrap_or(NodeId::BOT);
3265        }
3266        sorted.sort();
3267        sorted.dedup();
3268        sorted.retain(|&x| x != NodeId::BOT);
3269        if sorted.is_empty() {
3270            return NodeId::BOT;
3271        }
3272        if sorted.len() > 16 {
3273            let mut by_head: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
3274            let mut non_concat: Vec<NodeId> = Vec::new();
3275            for &n in &sorted {
3276                if self.get_kind(n) == Kind::Concat {
3277                    by_head.entry(self.get_left(n)).or_default().push(n);
3278                } else {
3279                    non_concat.push(n);
3280                }
3281            }
3282            if by_head.len() < sorted.len() {
3283                let mut groups: Vec<NodeId> = non_concat;
3284                for (head, tails) in by_head {
3285                    if tails.len() == 1 {
3286                        groups.push(tails[0]);
3287                    } else {
3288                        let tail_nodes: Vec<NodeId> =
3289                            tails.iter().map(|&n| self.get_right(n)).collect();
3290                        let tail_union = self.mk_unions(tail_nodes.into_iter());
3291                        let factored = self.mk_concat(head, tail_union);
3292                        groups.push(factored);
3293                    }
3294                }
3295                groups.sort();
3296                groups.dedup();
3297                return self.mk_unions(groups.into_iter());
3298            }
3299        }
3300        self.mk_unions_balanced(&sorted)
3301    }
3302
3303    fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3304        match nodes.len() {
3305            0 => NodeId::BOT,
3306            1 => nodes[0],
3307            n => {
3308                let mid = n / 2;
3309                let left = self.mk_unions_balanced(&nodes[..mid]);
3310                let right = self.mk_unions_balanced(&nodes[mid..]);
3311                self.mk_union(left, right)
3312            }
3313        }
3314    }
3315
3316    pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3317        nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3318    }
3319
3320    pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3321        nodes
3322            .rev()
3323            .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3324    }
3325}
3326
3327impl RegexBuilder {
3328    #[allow(dead_code)]
3329    pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3330        match self.get_tregex(term_id).clone() {
3331            TRegex::Leaf(node_id) => {
3332                if NodeId::BOT == node_id {
3333                    vec![]
3334                } else {
3335                    vec![node_id]
3336                }
3337            }
3338            TRegex::ITE(_, then_id, else_id) => {
3339                let mut then_nodes = self.extract_sat(then_id);
3340                let mut else_nodes = self.extract_sat(else_id);
3341                then_nodes.append(&mut else_nodes);
3342                then_nodes
3343            }
3344        }
3345    }
3346
3347    pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3348        debug_assert!(self.get_kind(start) == Kind::Union);
3349        let mut curr = start;
3350        while self.get_kind(curr) == Kind::Union {
3351            f(curr.left(self));
3352            curr = curr.right(self);
3353        }
3354        f(curr);
3355    }
3356
3357    pub(crate) fn iter_unions_b(
3358        &mut self,
3359        curr: NodeId,
3360        f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3361    ) {
3362        let mut curr = curr;
3363        while self.get_kind(curr) == Kind::Union {
3364            f(self, curr.left(self));
3365            curr = curr.right(self);
3366        }
3367        f(self, curr);
3368    }
3369
3370    pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3371        if !self.contains_look(node_id) {
3372            return Some(node_id);
3373        }
3374        match self.get_kind(node_id) {
3375            Kind::Pred | Kind::Begin | Kind::End => return Some(node_id),
3376            Kind::Concat => {
3377                let left = node_id.left(self);
3378                let right = node_id.right(self);
3379                let elim_l = self.try_elim_lookarounds(left)?;
3380                let elim_r = self.try_elim_lookarounds(right)?;
3381                let rw = self.mk_concat(elim_l, elim_r);
3382                Some(rw)
3383            }
3384            Kind::Union => {
3385                let left = node_id.left(self);
3386                let right = node_id.right(self);
3387                let elim_l = self.try_elim_lookarounds(left)?;
3388                let elim_r = self.try_elim_lookarounds(right)?;
3389                let rw = self.mk_union(elim_l, elim_r);
3390                Some(rw)
3391            }
3392
3393            Kind::Star => {
3394                let body = node_id.left(self);
3395                let elim_l = self.try_elim_lookarounds(body)?;
3396                Some(self.mk_star(elim_l))
3397            }
3398            Kind::Compl => {
3399                let left = node_id.left(self);
3400                let elim_l = self.try_elim_lookarounds(left)?;
3401                Some(self.mk_compl(elim_l))
3402            }
3403            Kind::Lookahead => {
3404                let rel = self.get_lookahead_rel(node_id);
3405                if rel != 0 {
3406                    return None;
3407                }
3408                let lbody = self.get_lookahead_inner(node_id);
3409                let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3410                let elim_l = self.try_elim_lookarounds(lbody)?;
3411                let elim_r = self.try_elim_lookarounds(ltail)?;
3412                let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3413                let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3414                let rw = self.mk_inter(lbody_ts, ltail_ts);
3415                Some(rw)
3416            }
3417            Kind::Lookbehind => {
3418                let linner = self.get_lookbehind_inner(node_id);
3419                let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3420                let elim_l = self.try_elim_lookarounds(linner)?;
3421                let elim_r = self.try_elim_lookarounds(lprev)?;
3422                let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3423                let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3424                let rw = self.mk_inter(lbody_ts, ltail_ts);
3425                Some(rw)
3426            }
3427            Kind::Inter => {
3428                let left = node_id.left(self);
3429                let right = node_id.right(self);
3430                let elim_l = self.try_elim_lookarounds(left)?;
3431                let elim_r = self.try_elim_lookarounds(right)?;
3432                let rw = self.mk_inter(elim_l, elim_r);
3433                Some(rw)
3434            }
3435        }
3436    }
3437
3438    /// R & _+ is a safe overapproximation of R that's nonempty
3439    pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3440        if self.nullability(node) == Nullability::NEVER {
3441            node
3442        } else {
3443            self.mk_inter(NodeId::TOPPLUS, node)
3444        }
3445    }
3446
3447    fn iter_find_stack(
3448        &self,
3449        stack: &mut Vec<TRegexId>,
3450        mut f: impl FnMut(NodeId) -> bool,
3451    ) -> bool {
3452        loop {
3453            match stack.pop() {
3454                None => return false,
3455                Some(curr) => match self.get_tregex(curr) {
3456                    TRegex::Leaf(n) => {
3457                        let mut curr = *n;
3458                        while curr != NodeId::BOT {
3459                            match self.get_kind(curr) {
3460                                Kind::Union => {
3461                                    if f(curr.left(self)) {
3462                                        return true;
3463                                    }
3464                                    curr = curr.right(self);
3465                                }
3466                                _ => {
3467                                    if f(*n) {
3468                                        return true;
3469                                    }
3470                                    curr = NodeId::BOT;
3471                                }
3472                            }
3473                        }
3474                    }
3475                    TRegex::ITE(_, then_id, else_id) => {
3476                        if *else_id != TRegexId::BOT {
3477                            stack.push(*else_id);
3478                        }
3479                        stack.push(*then_id);
3480                    }
3481                },
3482            }
3483        }
3484    }
3485
3486    pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3487        if node == NodeId::BOT {
3488            return Some(true);
3489        }
3490        if self.nullability(node) != Nullability::NEVER {
3491            return Some(false);
3492        }
3493        if let Some(cached) = self.cache_empty.get(&node) {
3494            if cached.is_checked() {
3495                return Some(cached.is_empty());
3496            }
3497        }
3498        let node = if !self.contains_look(node) {
3499            node
3500        } else {
3501            let elim = self.try_elim_lookarounds(node)?;
3502            elim
3503        };
3504        let isempty_flag = self.is_empty_lang_internal(node);
3505
3506        Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3507    }
3508
3509    fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3510        // without inter, no need to check
3511        if !self.get_meta_flags(initial_node).contains_inter() {
3512            return Ok(NodeFlags::ZERO);
3513        }
3514
3515        let mut visited: HashMap<NodeId, NodeId> = HashMap::new();
3516        let mut todo: VecDeque<NodeId> = VecDeque::new();
3517        let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3518        let mut stack = Vec::new();
3519        stack.push(begin_der);
3520        let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3521            visited.insert(node, initial_node);
3522            let nullability = self.nullability(node);
3523            if nullability != Nullability::NEVER {
3524                true
3525            } else {
3526                todo.push_back(node);
3527                false
3528            }
3529        });
3530        if found_nullable_right_away {
3531            return Ok(NodeFlags::ZERO);
3532        }
3533
3534        todo.push_back(initial_node);
3535        let isempty_flag: NodeFlags;
3536        let mut found_node = NodeId::BOT;
3537
3538        loop {
3539            match todo.pop_front() {
3540                None => {
3541                    isempty_flag = NodeFlags::IS_EMPTY;
3542                    break;
3543                }
3544                Some(outer) => {
3545                    if let Some(cached) = self.cache_empty.get(&outer) {
3546                        if cached.is_checked() {
3547                            if cached.is_empty() {
3548                                continue;
3549                            } else {
3550                                return Ok(NodeFlags::ZERO);
3551                            }
3552                        }
3553                    }
3554
3555                    let derivative = self.der(outer, Nullability::CENTER)?;
3556
3557                    stack.push(derivative);
3558
3559                    let found_null = self.iter_find_stack(&mut stack, |node| {
3560                        if visited.contains_key(&node) {
3561                            false
3562                        } else {
3563                            found_node = node;
3564                            if !self.get_meta_flags(node).contains_inter() {
3565                                return true;
3566                            } else {
3567                                visited.insert(node, outer);
3568                                todo.push_front(node);
3569                                self.any_nonbegin_nullable(node)
3570                            }
3571                        }
3572                    });
3573                    if found_null {
3574                        self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3575                        isempty_flag = NodeFlags::ZERO;
3576                        break;
3577                    }
3578                }
3579            }
3580        }
3581
3582        self.cache_empty.insert(
3583            initial_node,
3584            NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3585        );
3586        Ok(isempty_flag)
3587    }
3588
3589    /// check if `larger_lang` subsumes `smaller_lang` (i.e. L(smaller) ⊆ L(larger)).
3590    pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3591        if larger_lang == smaller_lang {
3592            return Some(true);
3593        }
3594
3595        // assess initial nullability
3596        if self
3597            .nullability(larger_lang)
3598            .not()
3599            .and(self.nullability(smaller_lang))
3600            != Nullability::NEVER
3601        {
3602            return Some(false);
3603        }
3604
3605        // check language nullability
3606        // if (B &~ A) ≡ ⊥ then B ⊆ A
3607        // this means  L(B) \ L(A) = {}
3608        // eg. (a &~ .*) = ⊥ means .* subsumes a
3609        let (smaller_lang, larger_lang) =
3610            if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3611                let wrap = |b: &mut Self, n: NodeId| {
3612                    let tmp = b.mk_concat(n, NodeId::TS);
3613                    b.mk_concat(NodeId::TS, tmp)
3614                };
3615                (wrap(self, smaller_lang), wrap(self, larger_lang))
3616            } else {
3617                (smaller_lang, larger_lang)
3618            };
3619
3620        let nota = self.mk_compl(larger_lang);
3621        let diff = self.mk_inter(smaller_lang, nota);
3622        self.is_empty_lang(diff)
3623    }
3624}