1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::{neg_class, utf8_char, UnicodeClassCache};
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 AlgebraError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 & Self::NULL_MASK)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_inter(self) -> bool {
134 self.has(MetaFlags::CONTAINS_INTER)
135 }
136
137 pub(crate) fn all_contains_flags(self) -> MetaFlags {
138 self.and(
139 MetaFlags::CONTAINS_LOOKAROUND
140 .or(MetaFlags::CONTAINS_ANCHORS)
141 .or(MetaFlags::CONTAINS_INTER)
142 .or(MetaFlags::CONTAINS_LOOKBEHIND),
143 )
144 }
145}
146
147#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
148#[repr(u8)]
149pub enum Kind {
150 #[default]
151 Pred,
152 Star,
153 Begin,
154 End,
155 Concat,
156 Union,
157 Compl,
158 Lookbehind,
159 Lookahead,
160 Inter,
161 Counted,
162}
163
164#[derive(Eq, Hash, PartialEq, Clone)]
165struct Metadata {
166 flags: MetaFlags,
167 nulls: NullsId,
168}
169
170struct MetadataBuilder {
171 num_created: u32,
172 solver: Solver,
173 nb: NullsBuilder,
174 index: FxHashMap<Metadata, MetadataId>,
175 pub array: Vec<Metadata>,
176}
177
178mod helpers {
179 pub(crate) fn incr_rel(n1: u32) -> u32 {
180 match n1.overflowing_add(1) {
181 (_, true) => u32::MAX,
182 (res, false) => res,
183 }
184 }
185}
186
187impl MetadataBuilder {
188 fn new() -> MetadataBuilder {
189 Self {
190 index: FxHashMap::default(),
191 array: vec![Metadata {
192 flags: MetaFlags::ZERO,
193 nulls: NullsId::EMPTY,
194 }],
195 solver: Solver::new(),
196 num_created: 0,
197 nb: NullsBuilder::new(),
198 }
199 }
200
201 fn init(&mut self, inst: Metadata) -> MetadataId {
202 self.num_created += 1;
203 let new_id = MetadataId(self.num_created);
204 self.index.insert(inst.clone(), new_id);
205 self.array.push(inst);
206 new_id
207 }
208
209 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
210 match self.index.get(&inst) {
211 Some(&id) => id,
212 None => self.init(inst),
213 }
214 }
215
216 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
217 &self.array[inst.0 as usize]
218 }
219
220 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
221 setflags.all_contains_flags()
222 }
223
224 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
225 let left = &self.array[body.0 as usize].flags;
226 let contains = self.get_contains(*left);
227 let inf = if body_id == NodeId::BOT {
229 MetaFlags::ZERO
230 } else {
231 MetaFlags::INFINITE_LENGTH
232 };
233 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
234 }
235
236 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
237 let left = &self.array[left_id.0 as usize].flags;
238 let null = left.nullability().not().and(Nullability::ALWAYS);
239 let contains = self.get_contains(*left);
240 MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
241 }
242
243 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
244 let left = &self.array[left_id.0 as usize].flags;
245 let right = &self.array[right_id.0 as usize].flags;
246 let null = left.nullability().and(right.nullability());
247 let contains = self.get_contains(left.or(*right));
248 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
249 MetaFlags::with_nullability(null, contains.or(len))
250 }
251
252 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
253 let left = &self.array[left_id.0 as usize].flags;
254 let right = &self.array[right_id.0 as usize].flags;
255 let null = left.nullability().and(right.nullability());
256 let contains = self
257 .get_contains(left.or(*right))
258 .or(MetaFlags::CONTAINS_INTER);
259 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
260 MetaFlags::with_nullability(null, contains.or(len))
261 }
262
263 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
264 let left = &self.array[left_id.0 as usize].flags;
265 let right = &self.array[right_id.0 as usize].flags;
266 let null = left.nullability().or(right.nullability());
267 let contains = self.get_contains(left.or(*right));
268 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
269 MetaFlags::with_nullability(null, contains.or(len))
270 }
271}
272
273#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
274pub struct NodeKey {
275 pub(crate) kind: Kind,
276 pub(crate) left: NodeId,
277 pub(crate) right: NodeId,
278 pub(crate) extra: u32,
279}
280
281#[derive(Eq, Hash, PartialEq, Clone, Debug)]
282pub enum TRegex<TSet> {
283 Leaf(NodeId),
284 ITE(TSet, TRegexId, TRegexId),
285}
286
287#[derive(Eq, Hash, PartialEq, Clone, Copy)]
288pub(crate) struct NodeFlags(u8);
289impl NodeFlags {
290 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
291 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
292 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
293
294 fn is_checked(self) -> bool {
295 self.0 >= NodeFlags::IS_CHECKED.0
296 }
297 fn is_empty(self) -> bool {
298 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
299 }
300}
301
302#[derive(Eq, Hash, PartialEq, Clone, Copy)]
303pub(crate) struct BuilderFlags(u8);
304impl BuilderFlags {
305 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
306 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
307}
308
309pub struct RegexBuilder {
310 mb: MetadataBuilder,
311 temp_vec: Vec<NodeId>,
312 num_created: u32,
313 index: FxHashMap<NodeKey, NodeId>,
314 array: Vec<NodeKey>,
315 metadata: Vec<MetadataId>,
316 reversed: Vec<NodeId>,
317 cache_empty: FxHashMap<NodeId, NodeFlags>,
318 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
319 tr_array: Vec<TRegex<TSetId>>,
320 tr_der_center: Vec<TRegexId>,
321 tr_der_begin: Vec<TRegexId>,
322 flags: BuilderFlags,
323 pub lookahead_context_max: u32,
325 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
326 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
327}
328
329impl NodeId {
330 fn is_missing(&self) -> bool {
331 *self == NodeId::MISSING
332 }
333 #[inline]
334 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
335 b.get_flags_contains(self)
336 }
337 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
338 if self == tail {
339 true
340 } else if self.is_kind(b, Kind::Concat) {
341 self.right(b).has_concat_tail(b, tail)
342 } else {
343 false
344 }
345 }
346 fn missing_to_eps(&self) -> NodeId {
347 if *self == NodeId::MISSING {
348 NodeId::EPS
349 } else {
350 *self
351 }
352 }
353
354 #[inline]
355 fn kind(self, b: &RegexBuilder) -> Kind {
356 b.get_kind(self)
357 }
358 #[inline]
359 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
360 b.get_kind(self) == k
361 }
362 #[inline]
363 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
364 b.nullability(self) == Nullability::NEVER
365 }
366 #[inline]
367 pub fn nullability(self, b: &RegexBuilder) -> Nullability {
368 b.nullability(self)
369 }
370
371 #[inline]
372 pub fn is_center_nullable(self, b: &RegexBuilder) -> bool {
373 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
374 }
375 #[inline]
376 pub fn left(self, b: &RegexBuilder) -> NodeId {
377 b.get_left(self)
378 }
379
380 #[inline]
381 pub fn right(self, b: &RegexBuilder) -> NodeId {
382 b.get_right(self)
383 }
384
385 #[inline]
386 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
387 b.der(self, mask)
388 }
389
390 #[inline]
391 fn extra(self, b: &RegexBuilder) -> u32 {
392 b.get_extra(self)
393 }
394
395 #[inline]
396 fn is_pred(self, b: &RegexBuilder) -> bool {
397 b.get_kind(self) == Kind::Pred
398 }
399
400 #[inline]
401 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
402 debug_assert!(self.is_pred(b));
403 TSetId(b.get_extra(self))
404 }
405 #[inline]
406 fn is_star(self, b: &RegexBuilder) -> bool {
407 if NodeId::EPS == self {
408 return false;
409 }
410 b.get_kind(self) == Kind::Star
411 }
412
413 pub fn contains_lookbehind(self, b: &RegexBuilder) -> bool {
414 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKBEHIND)
415 }
416
417 pub fn contains_lookaround(self, b: &RegexBuilder) -> bool {
418 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKAROUND)
419 }
420
421 #[inline]
422 pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
423 b.get_kind(self) == Kind::Inter
424 }
425
426 #[inline]
427 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
428 if self.is_concat(b) {
429 let r = self.right(b);
430 return r.is_star(b) && r.left(b) == self.left(b);
431 }
432 false
433 }
434
435 #[inline]
436 fn is_begin(self) -> bool {
437 self == NodeId::BEGIN
438 }
439
440 #[inline]
441 fn is_union(self, b: &RegexBuilder) -> bool {
442 b.get_kind(self) == Kind::Union
443 }
444
445 #[inline]
446 fn is_concat(self, b: &RegexBuilder) -> bool {
447 b.get_kind(self) == Kind::Concat
448 }
449
450 #[inline]
451 fn is_lookahead(self, b: &RegexBuilder) -> bool {
452 b.get_kind(self) == Kind::Lookahead
453 }
454
455 #[inline]
456 fn is_lookbehind(self, b: &RegexBuilder) -> bool {
457 b.get_kind(self) == Kind::Lookbehind
458 }
459
460 #[inline]
461 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
462 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
463 Some(b.get_right(self))
464 } else {
465 None
466 }
467 }
468
469 #[inline]
470 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
471 if b.get_kind(self) == Kind::Concat {
472 let left = self.left(b);
473 let right = self.right(b);
474 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
475 return right == NodeId::END;
476 }
477 }
478 false
479 }
480
481 #[inline]
482 fn is_ts(self) -> bool {
483 NodeId::TS == self
484 }
485
486 #[inline]
487 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
488 if NodeId::EPS == self {
489 return None;
490 }
491 if self.is_star(b) && self.left(b).is_pred(b) {
492 Some(self.left(b))
493 } else {
494 None
495 }
496 }
497
498 #[inline]
499 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
500 if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
501 let middle = self.right(b);
502 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
503 return Some(middle.left(b));
504 }
505 }
506 None
507 }
508
509 #[inline]
510 pub(crate) fn iter_union(
511 self,
512 b: &mut RegexBuilder,
513 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
514 ) {
515 let mut curr: NodeId = self;
516 while curr.kind(b) == Kind::Union {
517 f(b, curr.left(b));
518 curr = curr.right(b);
519 }
520 f(b, curr);
521 }
522
523 #[inline]
524 pub(crate) fn iter_inter(
525 self,
526 b: &mut RegexBuilder,
527 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
528 ) {
529 let mut curr: NodeId = self;
530 while curr.kind(b) == Kind::Inter {
531 f(b, curr.left(b));
532 curr = curr.right(b);
533 }
534 f(b, curr);
535 }
536
537 #[inline]
538 pub(crate) fn iter_union_while(
539 self,
540 b: &mut RegexBuilder,
541 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
542 ) {
543 let mut curr: NodeId = self;
544 let mut continue_loop = true;
545 while continue_loop && curr.kind(b) == Kind::Union {
546 continue_loop = f(b, curr.left(b));
547 curr = curr.right(b);
548 }
549 if continue_loop {
550 f(b, curr);
551 }
552 }
553
554 #[inline]
555 pub(crate) fn any_inter_component(
556 self,
557 b: &RegexBuilder,
558 mut f: impl FnMut(NodeId) -> bool,
559 ) -> bool {
560 debug_assert!(self.kind(b) == Kind::Inter);
561 let mut cur = self;
562 while cur.kind(b) == Kind::Inter {
563 if f(cur.left(b)) {
564 return true;
565 }
566 cur = cur.right(b);
567 }
568 f(cur)
569 }
570
571 #[inline]
572 pub(crate) fn any_union_component(
573 self,
574 b: &RegexBuilder,
575 mut f: impl FnMut(NodeId) -> bool,
576 ) -> bool {
577 let mut cur = self;
578 while cur.kind(b) == Kind::Union {
579 if f(cur.left(b)) {
580 return true;
581 }
582 cur = cur.right(b);
583 }
584 f(cur)
585 }
586}
587
588impl Default for RegexBuilder {
589 fn default() -> Self {
590 Self::new()
591 }
592}
593
594impl RegexBuilder {
595 pub fn new() -> RegexBuilder {
596 let mut inst = Self {
597 mb: MetadataBuilder::new(),
598 array: Vec::new(),
599 index: FxHashMap::default(),
600 cache_empty: FxHashMap::default(),
601 tr_array: Vec::new(),
602 tr_cache: FxHashMap::default(),
603 flags: BuilderFlags::ZERO,
604 lookahead_context_max: 800,
605 num_created: 0,
606 metadata: Vec::new(),
607 reversed: Vec::new(),
608 tr_der_center: Vec::new(),
609 tr_der_begin: Vec::new(),
610 temp_vec: Vec::new(),
611 mk_binary_memo: FxHashMap::default(),
612 clean_cache: FxHashMap::default(),
613 };
614 inst.array.push(NodeKey::default());
615 inst.mk_pred(TSetId::EMPTY);
616 inst.mk_star(NodeId::BOT);
617 inst.mk_pred(TSetId::FULL);
618 inst.mk_star(NodeId::TOP);
619 let top_plus_id = inst.mk_plus(NodeId::TOP);
620 inst.mk_unset(Kind::Begin);
621 inst.mk_unset(Kind::End);
622 debug_assert!(top_plus_id == NodeId::TOPPLUS);
623
624 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
625 inst.mk_leaf(NodeId::BOT);
626 inst.mk_leaf(NodeId::EPS);
627 inst.mk_leaf(NodeId::TOP);
628 inst.mk_leaf(NodeId::TS);
629
630 inst.flags = BuilderFlags::SUBSUME;
631 inst
632 }
633
634 #[inline]
635 pub fn solver_ref(&self) -> &Solver {
636 &self.mb.solver
637 }
638
639 #[inline]
640 pub fn solver(&mut self) -> &mut Solver {
641 &mut self.mb.solver
642 }
643
644 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
645 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
646 self.tr_cache.insert(inst.clone(), new_id);
647 self.tr_array.push(inst);
648 new_id
649 }
650
651 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
652 match self.tr_cache.get(&inst) {
653 Some(&id) => id,
654 None => self.tr_init(inst),
655 }
656 }
657
658 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
659 &self.tr_array[inst.0 as usize]
660 }
661
662 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
663 let solv = self.solver();
664 !solv.is_sat_id(cond1, cond2)
665 }
666
667 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
668 let term = TRegex::<TSetId>::Leaf(node_id);
669 self.get_tregex_id(term)
670 }
671
672 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
673 let tmp_inst = TRegex::ITE(cond, _then, _else);
674 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
675 return *cached;
676 }
677 if _then == _else {
678 return _then;
679 }
680 if self.solver().is_full_id(cond) {
681 return _then;
682 }
683 if self.solver().is_empty_id(cond) {
684 return _else;
685 }
686 let _clean_then = match self.tr_array[_then.0 as usize] {
687 TRegex::Leaf(_) => _then,
688 _ => self.clean(cond, _then),
689 };
690 let notcond = self.solver().not_id(cond);
691 let _clean_else = match self.tr_array[_else.0 as usize] {
692 TRegex::Leaf(_) => _else,
693 _ => self.clean(notcond, _else),
694 };
695
696 if _clean_then == _clean_else {
697 return _clean_then;
698 }
699 match self.get_tregex(_clean_then) {
701 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
702 let _cond_then = *leftcond;
703 let new_then = *_inner_then;
704 let sand = self.solver().and_id(cond, _cond_then);
705 return self.mk_ite(sand, new_then, _clean_else);
706 }
707 _ => {}
708 }
709 match self.get_tregex(_clean_else) {
711 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
713 let e2clone = *_e2;
714 let new_cond = self.mb.solver.or_id(cond, *_c2);
715 return self.mk_ite(new_cond, _clean_then, e2clone);
716 }
717 _ => {}
718 }
719
720 if _clean_then == TRegexId::BOT {
721 let flip_cond = self.solver().not_id(cond);
722 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
723 return cleaned_id;
724 }
725
726 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
727 }
728
729 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
730 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
731 return cached;
732 }
733 let result = match *self.get_tregex(tterm) {
734 TRegex::Leaf(_) => tterm,
735 TRegex::ITE(alpha, _then_id, _else_id) => {
736 let notalpha = self.mb.solver.not_id(alpha);
737 if self.mb.solver.unsat_id(beta, alpha) {
738 self.clean(beta, _else_id)
740 } else if self.unsat(beta, notalpha) {
741 self.clean(beta, _then_id)
743 } else {
744 let tc = self.mb.solver.and_id(beta, alpha);
745 let ec = self.mb.solver.and_id(beta, notalpha);
746 let new_then = self.clean(tc, _then_id);
747 let new_else = self.clean(ec, _else_id);
748 self.mk_ite(alpha, new_then, new_else)
749 }
750 }
751 };
752 self.clean_cache.insert((beta, tterm), result);
753 result
754 }
755
756 fn mk_unary(
757 &mut self,
758 term: TRegexId,
759 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
760 ) -> TRegexId {
761 match self.tr_array[term.0 as usize] {
762 TRegex::Leaf(node_id) => {
763 let applied = apply(self, node_id);
764 self.mk_leaf(applied)
765 }
766 TRegex::ITE(c1, _then, _else) => {
767 let _then1 = self.mk_unary(_then, apply);
768 let _else1 = self.mk_unary(_else, apply);
769 self.mk_ite(c1, _then1, _else1)
770 }
771 }
772 }
773
774 fn mk_binary_result(
775 &mut self,
776 left: TRegexId,
777 right: TRegexId,
778 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
779 ) -> Result<TRegexId, AlgebraError> {
780 match self.tr_array[left.0 as usize] {
781 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
782 TRegex::Leaf(right_node_id) => {
783 let applied = apply(self, left_node_id, right_node_id)?;
784 Ok(self.mk_leaf(applied))
785 }
786 TRegex::ITE(c2, _then, _else) => {
787 let then2 = self.mk_binary_result(left, _then, apply)?;
788 let else2 = self.mk_binary_result(left, _else, apply)?;
789 Ok(self.mk_ite(c2, then2, else2))
790 }
791 },
792 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
793 TRegex::Leaf(_) => {
794 let then2 = self.mk_binary_result(_then1, right, apply)?;
795 let else2 = self.mk_binary_result(_else1, right, apply)?;
796 Ok(self.mk_ite(c1, then2, else2))
797 }
798 TRegex::ITE(c2, _then2, _else2) => {
799 if c1 == c2 {
800 let _then = self.mk_binary_result(_then1, _then2, apply)?;
801 let _else = self.mk_binary_result(_else1, _else2, apply)?;
802 return Ok(self.mk_ite(c1, _then, _else));
803 }
804 if c1.0 > c2.0 {
805 let _then = self.mk_binary_result(_then1, right, apply)?;
806 let _else = self.mk_binary_result(_else1, right, apply)?;
807 Ok(self.mk_ite(c1, _then, _else))
808 } else {
809 let _then = self.mk_binary_result(left, _then2, apply)?;
810 let _else = self.mk_binary_result(left, _else2, apply)?;
811 Ok(self.mk_ite(c2, _then, _else))
812 }
813 }
814 },
815 }
816 }
817
818 fn mk_binary(
819 &mut self,
820 left: TRegexId,
821 right: TRegexId,
822 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
823 ) -> TRegexId {
824 self.mk_binary_memo.clear();
825 self.mk_binary_inner(left, right, apply)
826 }
827
828 fn mk_binary_inner(
829 &mut self,
830 left: TRegexId,
831 right: TRegexId,
832 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
833 ) -> TRegexId {
834 if left == right {
835 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
836 }
837 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
838 return cached;
839 }
840 let result = match self.tr_array[left.0 as usize] {
841 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
842 TRegex::Leaf(right_node_id) => {
843 let applied = apply(self, left_node_id, right_node_id);
844 self.mk_leaf(applied)
845 }
846 TRegex::ITE(c2, _then, _else) => {
847 let then2 = self.mk_binary_inner(left, _then, apply);
848 let else2 = self.mk_binary_inner(left, _else, apply);
849 self.mk_ite(c2, then2, else2)
850 }
851 },
852 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
853 TRegex::Leaf(_) => {
854 let then2 = self.mk_binary_inner(_then1, right, apply);
855 let else2 = self.mk_binary_inner(_else1, right, apply);
856 self.mk_ite(c1, then2, else2)
857 }
858 TRegex::ITE(c2, _then2, _else2) => {
859 if c1 == c2 {
860 let _then = self.mk_binary_inner(_then1, _then2, apply);
861 let _else = self.mk_binary_inner(_else1, _else2, apply);
862 self.mk_ite(c1, _then, _else)
863 } else if c1.0 > c2.0 {
864 let _then = self.mk_binary_inner(_then1, right, apply);
865 let _else = self.mk_binary_inner(_else1, right, apply);
866 self.mk_ite(c1, _then, _else)
867 } else {
868 let _then = self.mk_binary_inner(left, _then2, apply);
869 let _else = self.mk_binary_inner(left, _else2, apply);
870 self.mk_ite(c2, _then, _else)
871 }
872 }
873 },
874 };
875 self.mk_binary_memo.insert((left, right), result);
876 result
877 }
878
879 pub fn get_nulls(
880 &mut self,
881 pending_rel: u32,
882 mask: Nullability,
883 acc: &mut BTreeSet<NullState>,
884 node_id: NodeId,
885 ) {
886 debug_assert!(node_id != NodeId::MISSING);
887 if !self.is_nullable(node_id, mask) {
888 return;
889 }
890 match self.get_kind(node_id) {
891 Kind::Pred => {}
892 Kind::End => {
893 if mask.has(Nullability::END) {
894 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
895 }
896 }
897 Kind::Begin => {
898 if mask.has(Nullability::BEGIN) {
899 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
900 }
901 }
902 Kind::Concat => {
903 let new_mask = self.nullability(node_id).and(mask);
904 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
905 if self.is_nullable(node_id.left(self), mask) {
906 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
907 }
908 }
909 Kind::Union => {
910 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
911 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
912 }
913 Kind::Inter => {
914 let new_mask = self.nullability(node_id).and(mask);
915 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
916 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
917 }
918 Kind::Star => {
919 acc.insert(NullState::new(mask, pending_rel));
920 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
921 }
922 Kind::Compl => {
923 if !self.is_nullable(node_id.left(self), mask) {
924 acc.insert(NullState::new(mask, 0));
925 }
926 }
927 Kind::Lookbehind => {
928 let new_mask = self.nullability(node_id).and(mask);
929 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
930 if node_id.right(self) != NodeId::MISSING {
931 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
932 }
933 }
934 Kind::Lookahead => {
935 let la_inner = self.get_lookahead_inner(node_id);
936 if self.is_nullable(la_inner, mask) {
937 let rel = self.get_lookahead_rel(node_id);
938 if rel != u32::MAX {
939 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
940 }
941 let la_tail = self.get_lookahead_tail(node_id);
943 if la_tail != NodeId::MISSING {
944 self.get_nulls(pending_rel, mask, acc, la_tail);
945 }
946 }
947 }
948 Kind::Counted => {
949 let packed = self.get_extra(node_id);
950 let best = packed >> 16;
951 if best > 0 {
952 acc.insert(NullState::new(mask, pending_rel + best));
953 }
954 }
955 }
956 }
957
958 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
959 self.get_meta_flags(node_id)
960 .has(MetaFlags::CONTAINS_LOOKAROUND)
961 }
962
963 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
964 self.get_meta_flags(node_id)
965 .has(MetaFlags::CONTAINS_ANCHORS)
966 }
967
968 pub fn is_infinite(&self, node_id: NodeId) -> bool {
969 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
970 }
971
972 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
974 if self.is_infinite(node_id) {
975 if node_id.is_inter(self) {
976 self.get_bounded_length(node_id)
977 } else {
978 (self.get_min_length_only(node_id), u32::MAX)
979 }
980 } else {
981 self.get_bounded_length(node_id)
982 }
983 }
984
985 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
986 if node_id == NodeId::EPS {
987 return (0, 0);
988 }
989 match self.get_kind(node_id) {
990 Kind::End | Kind::Begin => (0, 0),
991 Kind::Pred => (1, 1),
992 Kind::Concat => {
993 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
994 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
995 (lmin + rmin, lmax.saturating_add(rmax))
996 }
997 Kind::Union => {
998 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
999 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
1000 (lmin.min(rmin), lmax.max(rmax))
1001 }
1002 Kind::Inter => {
1003 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
1004 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
1005 (lmin.max(rmin), lmax.min(rmax))
1006 }
1007 Kind::Lookahead => {
1008 let body = node_id.left(self);
1009 if self.is_infinite(body) {
1010 return (0, u32::MAX);
1011 }
1012 let right = node_id.right(self);
1013 if right.is_missing() {
1014 (0, 0)
1015 } else {
1016 self.get_min_max_length(right)
1017 }
1018 }
1019 Kind::Counted => (0, 0),
1020 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
1021 }
1022 }
1023
1024 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
1025 match self.get_kind(node_id) {
1026 Kind::End | Kind::Begin => Some(0),
1027 Kind::Pred => Some(1),
1028 Kind::Concat => {
1029 let l = self.get_fixed_length(node_id.left(self))?;
1030 let r = self.get_fixed_length(node_id.right(self))?;
1031 Some(l + r)
1032 }
1033 Kind::Union => {
1034 let l = self.get_fixed_length(node_id.left(self))?;
1035 let r = self.get_fixed_length(node_id.right(self))?;
1036 if l == r {
1037 Some(l)
1038 } else {
1039 None
1040 }
1041 }
1042 Kind::Inter => {
1043 let l = self.get_fixed_length(node_id.left(self))?;
1044 let r = self.get_fixed_length(node_id.right(self))?;
1045 if l == r {
1046 Some(l)
1047 } else {
1048 None
1049 }
1050 }
1051 Kind::Lookahead => {
1052 let right = node_id.right(self);
1053 if right.is_missing() {
1054 Some(0)
1055 } else {
1056 self.get_fixed_length(right)
1057 }
1058 }
1059 Kind::Counted => Some(0),
1060 Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1061 }
1062 }
1063
1064 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1065 match self.get_kind(node_id) {
1066 Kind::End | Kind::Begin => 0,
1067 Kind::Pred => 1,
1068 Kind::Concat => {
1069 self.get_min_length_only(node_id.left(self))
1070 + self.get_min_length_only(node_id.right(self))
1071 }
1072 Kind::Union => self
1073 .get_min_length_only(node_id.left(self))
1074 .min(self.get_min_length_only(node_id.right(self))),
1075 Kind::Inter => self
1076 .get_min_length_only(node_id.left(self))
1077 .max(self.get_min_length_only(node_id.right(self))),
1078 Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1079 Kind::Compl => {
1080 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1081 0
1082 } else {
1083 1
1084 }
1085 }
1086 }
1087 }
1088
1089 fn starts_with_ts(&self, node_id: NodeId) -> bool {
1090 if node_id == NodeId::TS {
1091 return true;
1092 }
1093 match self.get_kind(node_id) {
1094 Kind::Inter => {
1095 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1096 }
1097 Kind::Union => {
1098 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1099 }
1100 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1101 _ => false,
1102 }
1103 }
1104
1105 #[inline]
1106 pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1107 if node_id.is_concat(self) {
1108 self.ends_with_ts(node_id.right(self))
1109 } else {
1110 node_id == NodeId::TS
1111 }
1112 }
1113
1114 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1115 debug_assert!(node_id != NodeId::MISSING);
1116 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1117 }
1118
1119 pub(crate) fn cache_der(
1120 &mut self,
1121 node_id: NodeId,
1122 result: TRegexId,
1123 mask: Nullability,
1124 ) -> TRegexId {
1125 if mask == Nullability::CENTER {
1126 self.tr_der_center[node_id.0 as usize] = result
1127 } else {
1128 self.tr_der_begin[node_id.0 as usize] = result
1129 };
1130 result
1131 }
1132
1133 pub(crate) fn try_cached_der(
1134 &mut self,
1135 node_id: NodeId,
1136 mask: Nullability,
1137 ) -> Option<TRegexId> {
1138 let cache = if mask == Nullability::CENTER {
1139 &mut self.tr_der_center
1140 } else {
1141 &mut self.tr_der_begin
1142 };
1143 match cache.get(node_id.0 as usize) {
1144 Some(&TRegexId::MISSING) => {}
1145 Some(&result) => {
1146 return Some(result);
1147 }
1148 None => {
1149 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1150 }
1151 }
1152 None
1153 }
1154
1155 pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1156 let mut term = self.get_tregex(der);
1157 loop {
1158 match *term {
1159 TRegex::Leaf(node_id) => return node_id,
1160 TRegex::ITE(cond, _then, _else) => {
1161 if self.solver().is_sat_id(set, cond) {
1162 term = self.get_tregex(_then);
1163 } else {
1164 term = self.get_tregex(_else);
1165 }
1166 }
1167 }
1168 }
1169 }
1170
1171 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1172 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1173 debug_assert!(
1174 node_id != NodeId::MISSING,
1175 "attempting to derive missing node"
1176 );
1177 if let Some(result) = self.try_cached_der(node_id, mask) {
1178 return Ok(result);
1179 }
1180
1181 let result = match node_id.kind(self) {
1182 Kind::Compl => {
1183 let leftd = node_id.left(self).der(self, mask)?;
1184 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1185 }
1186 Kind::Inter => {
1187 let leftd = node_id.left(self).der(self, mask)?;
1188 let rightd = node_id.right(self).der(self, mask)?;
1189 {
1190 let this = &mut *self;
1191 this.mk_binary(
1192 leftd,
1193 rightd,
1194 &mut (|b, left, right| b.mk_inter(left, right)),
1195 )
1196 }
1197 }
1198 Kind::Union => {
1199 let leftd = self.der(node_id.left(self), mask)?;
1200 let rightd = self.der(node_id.right(self), mask)?;
1201 {
1202 let this = &mut *self;
1203 this.mk_binary(
1204 leftd,
1205 rightd,
1206 &mut (|b, left, right| b.mk_union(left, right)),
1207 )
1208 }
1209 }
1210 Kind::Concat => {
1211 let head = node_id.left(self);
1212 let tail = node_id.right(self);
1213 let tail_leaf = self.mk_leaf(tail);
1214 let head_der = self.der(head, mask)?;
1215
1216 if self.is_nullable(head, mask) {
1217 let rightd = self.der(tail, mask)?;
1218 let ldr = {
1219 let this = &mut *self;
1220 this.mk_binary(
1221 head_der,
1222 tail_leaf,
1223 &mut (|b, left, right| b.mk_concat(left, right)),
1224 )
1225 };
1226 {
1227 let this = &mut *self;
1228 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1229 }
1230 } else {
1231 let this = &mut *self;
1232 this.mk_binary(
1233 head_der,
1234 tail_leaf,
1235 &mut (|b, left, right| b.mk_concat(left, right)),
1236 )
1237 }
1238 }
1239 Kind::Star => {
1240 if node_id == NodeId::EPS {
1241 TRegexId::BOT
1242 } else {
1243 let left = node_id.left(self);
1244 let r_decr_leaf = self.mk_leaf(node_id);
1245 let r_der = self.der(left, mask)?;
1246 let this = &mut *self;
1247 this.mk_binary(
1248 r_der,
1249 r_decr_leaf,
1250 &mut (|b, left, right| b.mk_concat(left, right)),
1251 )
1252 }
1253 }
1254 Kind::Lookbehind => {
1255 let lb_prev_der = {
1256 let lb_prev = self.get_lookbehind_prev(node_id);
1257 if lb_prev == NodeId::MISSING {
1258 TRegexId::MISSING
1259 } else {
1260 self.der(lb_prev, mask)?
1261 }
1262 };
1263 let lb_inner = self.get_lookbehind_inner(node_id);
1264 let lb_inner_der = self.der(lb_inner, mask)?;
1265 {
1266 let this = &mut *self;
1267 this.mk_binary_result(
1268 lb_inner_der,
1269 lb_prev_der,
1270 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1271 )?
1272 }
1273 }
1274 Kind::Lookahead => {
1275 let la_tail = self.get_lookahead_tail(node_id);
1276 let la_body = node_id.left(self);
1277 let rel = self.get_lookahead_rel(node_id);
1278
1279 if self.is_nullable(la_body, mask) {
1280 let right = node_id.right(self).missing_to_eps();
1282 let rder = self.der(right, mask).clone();
1283 return rder;
1284 }
1285
1286 if rel == u32::MAX {
1287 let la_body_der = self.der(la_body, mask)?;
1288 if la_tail.is_kind(self, Kind::Pred) {
1289 let transitioned =
1290 self.transition_term(la_body_der, la_tail.pred_tset(self));
1291 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1292 let concated = self.mk_concat(la_tail, new_la);
1293 return self.der(concated, mask);
1294 }
1295 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1296 let left = la_tail.left(self);
1297 let tset = left.pred_tset(self);
1298 let transitioned = self.transition_term(la_body_der, tset);
1299 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1300 let tail_right = la_tail.right(self);
1301 let concated = self.mk_concat(new_la, tail_right);
1302 let concated = self.mk_concat(left, concated);
1303 return self.der(concated, mask);
1304 }
1305 }
1306
1307 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1308 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1309 let concated = self.mk_concat(la_body, nulls_mask);
1310 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1311 let non_nulled = self.mk_non_nullable_safe(la_tail);
1312 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1313 let new_union = self.mk_union(concated_look, new_look);
1314 return self.der(new_union, mask);
1315 }
1316
1317 let la_tail_der = if la_tail == NodeId::MISSING {
1318 TRegexId::MISSING
1319 } else {
1320 if self.is_nullable(la_tail, mask) {
1321 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1322 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1323 let la_union = self.mk_union(la_tail, nulls_la);
1324 self.der(la_union, mask)?
1325 } else {
1326 self.der(la_tail, mask)?
1327 }
1328 };
1329
1330 let la_body_der = self.der(la_body, mask)?;
1331
1332 if rel != u32::MAX && rel > self.lookahead_context_max {
1333 return Err(AlgebraError::AnchorLimit);
1334 }
1335
1336 let la = {
1337 let this = &mut *self;
1338 let rel = helpers::incr_rel(rel);
1339 this.mk_binary(
1340 la_body_der,
1341 la_tail_der,
1342 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1343 )
1344 };
1345
1346 if rel != u32::MAX
1347 && la_tail_der != TRegexId::MISSING
1348 && self.is_nullable(la_tail, mask)
1349 {
1350 let look_only = {
1351 let this = &mut *self;
1352 let right = TRegexId::MISSING;
1353 let rel = helpers::incr_rel(rel);
1354 this.mk_binary(
1355 la_body_der,
1356 right,
1357 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1358 )
1359 };
1360 {
1361 let this = &mut *self;
1362 this.mk_binary(
1363 look_only,
1364 la,
1365 &mut (|b, left, right| b.mk_union(left, right)),
1366 )
1367 }
1368 } else {
1369 la
1370 }
1371 }
1372 Kind::Counted => {
1373 let body = node_id.left(self);
1374 let chain = node_id.right(self);
1375 let packed = self.get_extra(node_id);
1376 let step = (packed & 0xFFFF) as u16;
1377 let best = (packed >> 16) as u16;
1378
1379 let mid_best = if self.is_nullable(body, mask) && step >= best {
1380 step
1381 } else {
1382 best
1383 };
1384
1385 let body_der = self.der(body, mask)?;
1386 let new_step = step.saturating_add(1);
1387 self.mk_unary(body_der, &mut |b, new_body| {
1388 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1389 new_step
1390 } else {
1391 mid_best
1392 };
1393 let packed = (final_best as u32) << 16 | new_step as u32;
1394 b.mk_counted(new_body, chain, packed)
1395 })
1396 }
1397 Kind::Begin | Kind::End => TRegexId::BOT,
1398 Kind::Pred => {
1399 let psi = node_id.pred_tset(self);
1400 if psi == TSetId::EMPTY {
1401 TRegexId::BOT
1402 } else {
1403 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1404 }
1405 }
1406 };
1407
1408 self.cache_der(node_id, result, mask);
1412 Ok(result)
1413 }
1414
1415 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1416 debug_assert!(meta_id != MetadataId::MISSING);
1417 match self.metadata.get_mut(node_id.0 as usize) {
1418 Some(v) => *v = meta_id,
1419 None => {
1420 self.metadata
1421 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1422 self.metadata[node_id.0 as usize] = meta_id;
1423 }
1424 }
1425 }
1426
1427 fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1428 debug_assert!(reversed_id != NodeId::MISSING);
1429 match self.reversed.get_mut(node_id.0 as usize) {
1430 Some(v) => *v = reversed_id,
1431 None => {
1432 self.reversed
1433 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1434 self.reversed[node_id.0 as usize] = reversed_id;
1435 }
1436 }
1437 return reversed_id;
1438 }
1439
1440 fn init(&mut self, inst: NodeKey) -> NodeId {
1441 self.num_created += 1;
1442 let node_id = NodeId(self.num_created);
1443 self.index.insert(inst.clone(), node_id);
1444 match inst.kind {
1445 Kind::Pred => {
1446 let meta_id = self.mb.get_meta_id(Metadata {
1447 flags: MetaFlags::ZERO,
1448 nulls: NullsId::EMPTY,
1449 });
1450 self.init_metadata(node_id, meta_id);
1451 }
1452 Kind::Begin => {
1453 let meta_id = self.mb.get_meta_id(Metadata {
1454 flags: MetaFlags::with_nullability(
1455 Nullability::BEGIN,
1456 MetaFlags::CONTAINS_ANCHORS,
1457 ),
1458 nulls: NullsId::BEGIN0,
1459 });
1460 self.init_metadata(node_id, meta_id);
1461 }
1462 Kind::End => {
1463 let meta_id = self.mb.get_meta_id(Metadata {
1464 flags: MetaFlags::with_nullability(
1465 Nullability::END,
1466 MetaFlags::CONTAINS_ANCHORS,
1467 ),
1468 nulls: NullsId::END0,
1469 });
1470 self.init_metadata(node_id, meta_id);
1471 }
1472 Kind::Inter => {
1473 let m1 = self.get_node_meta_id(inst.left);
1474 let m2 = self.get_node_meta_id(inst.right);
1475 let meta_id = {
1476 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1477 let mask_l = inst.left.nullability(self);
1478 let mask_r = inst.right.nullability(self);
1479 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1480 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1481 nulls = self.mb.nb.and_mask(nulls, mask_l);
1482 nulls = self.mb.nb.and_mask(nulls, mask_r);
1483 let new_meta = Metadata {
1484 flags: self.mb.flags_inter(m1, m2),
1485 nulls,
1486 };
1487 self.mb.get_meta_id(new_meta)
1488 };
1489 self.init_metadata(node_id, meta_id);
1490 }
1491 Kind::Union => {
1492 let m1 = self.get_node_meta_id(inst.left);
1493 let m2 = self.get_node_meta_id(inst.right);
1494 let meta_id = {
1495 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1496 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1497 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1498 let new_meta = Metadata {
1499 flags: self.mb.flags_union(m1, m2),
1500 nulls,
1501 };
1502 self.mb.get_meta_id(new_meta)
1503 };
1504 self.init_metadata(node_id, meta_id);
1505 }
1506 Kind::Concat => {
1507 let flags = self.mb.flags_concat(
1508 self.get_node_meta_id(inst.left),
1509 self.get_node_meta_id(inst.right),
1510 );
1511
1512 let right_nullability = inst.right.nullability(self);
1513 let left_nullability = inst.left.nullability(self);
1514 let nulls_left = self.get_nulls_id(inst.left);
1515 let nulls_right = self.get_nulls_id(inst.right);
1516 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1517 let mask = right_nullability.and(left_nullability);
1518 nulls = self.mb.nb.and_mask(nulls, mask);
1519
1520 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1521 self.init_metadata(node_id, new_id);
1522 }
1523 Kind::Star => {
1524 let left_nulls = self.get_nulls_id(inst.left);
1525 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1526 let meta_id = self.mb.get_meta_id(Metadata {
1527 flags: self
1528 .mb
1529 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1530 nulls,
1531 });
1532 self.init_metadata(node_id, meta_id);
1533 }
1534 Kind::Compl => {
1535 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1536 let meta_id = self.mb.get_meta_id(Metadata {
1537 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1538 nulls,
1539 });
1540 self.init_metadata(node_id, meta_id);
1541 }
1542 Kind::Lookbehind => {
1543 let mut left_nullability = self.nullability(inst.left);
1544 let mut contains_flags = self.get_flags_contains(inst.left);
1545 let nulls = if inst.right.is_missing() {
1546 self.get_nulls_id(inst.left)
1547 } else {
1548 let right_nullability = self.nullability(inst.right);
1549 let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1550 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1551 left_nullability = left_nullability.and(right_nullability);
1552 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1553 self.mb.nb.and_id(nulls_left, nulls_right)
1554 };
1555
1556 let meta_id = self.mb.get_meta_id(Metadata {
1557 flags: MetaFlags::with_nullability(
1558 left_nullability,
1559 contains_flags
1560 .or(MetaFlags::CONTAINS_LOOKAROUND)
1561 .or(MetaFlags::CONTAINS_LOOKBEHIND),
1562 ),
1563 nulls,
1564 });
1565 self.init_metadata(node_id, meta_id);
1566 }
1567 Kind::Lookahead => {
1568 let mut nulls = self.get_nulls_id(inst.left);
1569 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1571 let left_nullability = inst.left.nullability(self);
1572 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1573
1574 nulls = self.mb.nb.or_id(nulls, nulls_right);
1575
1576 let la_inner = inst.left;
1577 let la_tail = inst.right;
1578 let null = self
1579 .get_meta_flags(la_inner)
1580 .nullability()
1581 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1582 let contains_flags = self
1583 .get_flags_contains(la_inner)
1584 .or(self.get_flags_contains(la_tail));
1585
1586 let meta_id = self.mb.get_meta_id(Metadata {
1587 flags: MetaFlags::with_nullability(
1588 null,
1589 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1590 ),
1591 nulls,
1592 });
1593 self.init_metadata(node_id, meta_id);
1594 }
1595 Kind::Counted => {
1596 let best = inst.extra >> 16;
1597 let (null, nulls) = if best > 0 {
1598 let mut ns = BTreeSet::new();
1599 ns.insert(NullState::new(Nullability::CENTER, best));
1600 (Nullability::CENTER, self.mb.nb.get_id(ns))
1601 } else {
1602 (Nullability::NEVER, NullsId::EMPTY)
1603 };
1604 let meta_id = self.mb.get_meta_id(Metadata {
1605 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1606 nulls,
1607 });
1608 self.init_metadata(node_id, meta_id);
1609 }
1610 }
1611
1612 self.array.push(inst);
1613
1614 if let Some(rw) = self.post_init_simplify(node_id) {
1615 self.override_as(node_id, rw)
1616 } else {
1617 node_id
1618 }
1619 }
1620
1621 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1622 match self.get_kind(node_id) {
1623 Kind::Concat => {
1624 let lhs = node_id.left(self);
1625 let rhs = node_id.right(self);
1626 if lhs.is_pred_star(self).is_some() {
1627 if let Some(opttail) = rhs.is_opt_v(self) {
1628 if let Some(true) = self.subsumes(lhs, opttail) {
1629 return Some(lhs);
1630 }
1631 }
1632 }
1633 }
1634 Kind::Union => {
1635 let lhs = node_id.left(self);
1636 let rhs = node_id.right(self);
1637 let mut subsumed = false;
1638 rhs.iter_union_while(self, &mut |b, branch| {
1639 if b.nullable_subsumes(branch, lhs) {
1640 subsumed = true;
1641 }
1642 !subsumed
1643 });
1644 if subsumed {
1645 return Some(rhs);
1646 }
1647 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1648 return Some(rhs);
1649 }
1650 }
1651 _ => {}
1652 }
1653
1654 None
1655 }
1656
1657 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1659 if self.get_kind(lhs) != Kind::Union {
1660 return false; }
1662 let mut rhs_branches = Vec::new();
1663 let mut curr = rhs;
1664 while self.get_kind(curr) == Kind::Union {
1665 rhs_branches.push(self.get_left(curr));
1666 curr = self.get_right(curr);
1667 }
1668 rhs_branches.push(curr);
1669
1670 curr = lhs;
1671 while self.get_kind(curr) == Kind::Union {
1672 if !rhs_branches.contains(&self.get_left(curr)) {
1673 return false;
1674 }
1675 curr = self.get_right(curr);
1676 }
1677 rhs_branches.contains(&curr)
1678 }
1679
1680 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1682 if node == target {
1683 return true;
1684 }
1685 match self.get_kind(node) {
1686 Kind::Union => {
1687 self.nullable_subsumes(self.get_left(node), target)
1688 || self.nullable_subsumes(self.get_right(node), target)
1689 }
1690 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1691 self.nullable_subsumes(self.get_right(node), target)
1692 }
1693 _ => false,
1694 }
1695 }
1696
1697 pub fn num_nodes(&self) -> u32 {
1698 self.num_created
1699 }
1700
1701 pub fn tree_size(&self, root: NodeId, limit: usize) -> usize {
1702 let mut seen: FxHashMap<NodeId, ()> = FxHashMap::default();
1703 let mut stack: Vec<NodeId> = vec![root];
1704 while let Some(n) = stack.pop() {
1705 if n == NodeId::MISSING
1706 || n == NodeId::BOT
1707 || n == NodeId::EPS
1708 || n == NodeId::TS
1709 || n == NodeId::BEGIN
1710 || n == NodeId::END
1711 {
1712 continue;
1713 }
1714 if seen.insert(n, ()).is_some() {
1715 continue;
1716 }
1717 if seen.len() >= limit {
1718 return limit;
1719 }
1720 stack.push(self.get_left(n));
1721 stack.push(self.get_right(n));
1722 }
1723 seen.len()
1724 }
1725 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1726 match self.index.get(&inst) {
1727 Some(&id) => id,
1728 None => self.init(inst),
1729 }
1730 }
1731 #[inline]
1732 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1733 self.index.get(inst)
1734 }
1735
1736 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1737 self.index.insert(key, subsumed);
1738 subsumed
1739 }
1740
1741 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1742 let key = &self.array[key.0 as usize];
1743 let entry = self.index.get_mut(key).unwrap();
1744 *entry = subsumed;
1745 subsumed
1746 }
1747
1748 #[inline]
1749 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1750 self.array[node_id.0 as usize].left
1751 }
1752
1753 #[inline]
1754 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1755 self.array[node_id.0 as usize].right
1756 }
1757
1758 #[inline]
1759 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1760 self.array[node_id.0 as usize].extra
1761 }
1762
1763 #[inline]
1764 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1765 debug_assert!(node_id.is_concat(self));
1766 let mut curr = node_id;
1767 while curr.is_concat(self) {
1768 curr = curr.right(self);
1769 }
1770 curr
1771 }
1772
1773 fn has_trailing_la(&self, node: NodeId) -> bool {
1774 let end = match self.get_kind(node) {
1775 Kind::Concat => self.get_concat_end(node),
1776 Kind::Lookahead => node,
1777 _ => return false,
1778 };
1779 end.is_lookahead(self) && end.right(self).is_missing()
1780 }
1781
1782 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1783 if node.is_lookahead(self) {
1784 return (NodeId::EPS, node);
1785 }
1786 debug_assert!(node.is_concat(self));
1787 let right = node.right(self);
1788 if !right.is_concat(self) {
1789 return (node.left(self), right);
1790 }
1791 let (stripped, la) = self.strip_trailing_la(right);
1792 (self.mk_concat(node.left(self), stripped), la)
1793 }
1794 #[inline]
1795 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1796 debug_assert!(matches!(
1797 self.get_kind(lookahead_node_id),
1798 Kind::Lookahead | Kind::Counted
1799 ));
1800 lookahead_node_id.left(self)
1801 }
1802 #[inline]
1803 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1804 debug_assert!(lookahead_node_id.is_lookahead(self));
1805 lookahead_node_id.right(self)
1806 }
1807 #[inline]
1808 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1809 debug_assert!(
1810 matches!(
1811 self.get_kind(lookahead_node_id),
1812 Kind::Lookahead | Kind::Counted
1813 ),
1814 "not lookahead/counted: {:?}",
1815 self.pp(lookahead_node_id)
1816 );
1817 self.get_extra(lookahead_node_id)
1818 }
1819 #[inline]
1820 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1821 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1822 lookbehind_node_id.left(self)
1823 }
1824 #[inline]
1825 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1826 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1827 lookbehind_node_id.right(self)
1828 }
1829
1830 #[inline]
1831 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1832 debug_assert!(
1833 self.array.len() > node_id.0 as usize,
1834 "array len: {:?}",
1835 node_id
1836 );
1837 self.array[node_id.0 as usize].kind
1838 }
1839
1840 #[inline]
1841 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1842 &self.array[node_id.0 as usize]
1843 }
1844
1845 #[inline]
1846 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1847 self.metadata[node_id.0 as usize]
1848 }
1849
1850 #[inline]
1851 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1852 debug_assert!(node_id.0 != u32::MAX);
1853 let meta_id = self.get_node_meta_id(node_id);
1854 debug_assert!(meta_id != MetadataId::MISSING);
1855 &self.mb.array[meta_id.0 as usize]
1856 }
1857
1858 #[inline]
1859 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1860 if node_id == NodeId::MISSING {
1861 NullsId::EMPTY
1862 } else {
1863 self.get_meta(node_id).nulls
1864 }
1865 }
1866
1867 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1868 self.mb
1869 .nb
1870 .array
1871 .iter()
1872 .map(|set| set.iter().cloned().collect())
1873 .collect()
1874 }
1875
1876 pub fn nulls_count(&self) -> usize {
1877 self.mb.nb.array.len()
1878 }
1879
1880 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1881 self.mb.nb.array[id as usize].iter().cloned().collect()
1882 }
1883
1884 pub fn center_nulls_id(&mut self, nid: NullsId) -> NullsId {
1887 self.mb.nb.and_mask(nid, Nullability::CENTER)
1888 }
1889
1890 #[inline]
1891 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1892 if node_id == NodeId::MISSING {
1893 NullsId::EMPTY
1894 } else {
1895 let nulls = self.get_meta(node_id).nulls;
1896 self.mb.nb.and_mask(nulls, mask)
1897 }
1898 }
1899
1900 #[inline]
1901 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1902 let meta_id = self.get_node_meta_id(node_id);
1903 let meta = &self.mb.array[meta_id.0 as usize];
1904 meta.flags
1905 }
1906
1907 #[inline]
1908 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1909 self.get_meta(node_id).flags.nullability()
1910 }
1911
1912 #[inline]
1913 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1914 let meta_id = self.get_node_meta_id(node_id);
1915 let meta = &self.mb.array[meta_id.0 as usize];
1916 meta.flags.all_contains_flags()
1917 }
1918
1919 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1920 if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
1921 return self.strip_lb(node_id.right(self));
1922 }
1923 self.strip_lb_inner(node_id)
1924 }
1925
1926 fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1927 if !self.contains_look(node_id) {
1928 return Ok(node_id);
1929 }
1930 if node_id.is_concat(self) && node_id.left(self).is_lookbehind(self) {
1931 let lb = node_id.left(self);
1932 let prev = self.get_lookbehind_prev(lb);
1933 let tail = self.strip_lb_inner(node_id.right(self))?;
1934 if prev != NodeId::MISSING {
1935 let stripped_prev = self.strip_lb_inner(prev)?;
1936 return Ok(self.mk_concat(stripped_prev, tail));
1937 }
1938 return Ok(tail);
1939 }
1940 if node_id.is_inter(self) {
1941 let left = self.strip_lb_inner(node_id.left(self))?;
1942 let right = self.strip_lb_inner(node_id.right(self))?;
1943 return Ok(self.mk_inter(left, right));
1944 }
1945 if self.get_kind(node_id) == Kind::Union {
1946 let left = self.strip_lb_inner(node_id.left(self))?;
1947 let right = self.strip_lb_inner(node_id.right(self))?;
1948 return Ok(self.mk_union(left, right));
1949 }
1950 match self.get_kind(node_id) {
1951 Kind::Lookbehind => {
1952 let prev = self.get_lookbehind_prev(node_id);
1953 if prev != NodeId::MISSING {
1954 self.strip_lb_inner(prev)
1955 } else {
1956 Ok(NodeId::EPS)
1957 }
1958 }
1959 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1960 Err(AlgebraError::UnsupportedPattern)
1961 }
1962 _ => Ok(node_id),
1963 }
1964 }
1965
1966 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1968 if !self.contains_anchors(node_id) {
1969 return node_id;
1970 }
1971 match self.get_kind(node_id) {
1972 Kind::Begin => NodeId::BOT,
1973 Kind::Concat => {
1974 let left = self.nonbegins(node_id.left(self));
1975 if left == NodeId::BOT {
1976 return NodeId::BOT;
1977 }
1978 self.mk_concat(left, node_id.right(self))
1979 }
1980 Kind::Union => {
1981 let left = self.nonbegins(node_id.left(self));
1982 let right = self.nonbegins(node_id.right(self));
1983 self.mk_union(left, right)
1984 }
1985 _ => node_id,
1986 }
1987 }
1988
1989 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1990 match self.get_kind(node_id) {
1991 Kind::Concat => {
1992 let head = node_id.left(self);
1993 match self.get_kind(head) {
1994 _ if self.any_nonbegin_nullable(head) => {
1995 self.strip_prefix_safe(node_id.right(self))
1996 }
1997 _ => node_id,
1998 }
1999 }
2000 _ => node_id,
2001 }
2002 }
2003 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
2004 match self.get_kind(node_id) {
2005 Kind::Begin => NodeId::BOT,
2006 Kind::Concat => {
2007 let head = self.prune_begin(node_id.left(self));
2008 let tail = self.prune_begin(node_id.right(self));
2009 self.mk_concat(head, tail)
2010 }
2011 Kind::Lookbehind => {
2012 if !node_id.right(self).is_missing() {
2013 return node_id;
2014 }
2015 let head = self.prune_begin(node_id.left(self));
2016 head
2017 }
2018 Kind::Union => {
2019 let left = self.prune_begin(node_id.left(self));
2020 let right = self.prune_begin(node_id.right(self));
2021 self.mk_union(left, right)
2022 }
2023 _ => node_id,
2024 }
2025 }
2026 pub fn prune_begin_eps(&mut self, node_id: NodeId) -> NodeId {
2027 match self.get_kind(node_id) {
2028 Kind::Begin => NodeId::BOT,
2029 Kind::Concat => {
2030 let l = node_id.left(self);
2031 let r = node_id.right(self);
2032 if r.is_concat(self) {
2033 let r_left = r.left(self);
2034 if l.is_ts() && r_left.is_begin() {
2035 return self.prune_begin_eps(r.right(self));
2036 }
2037 }
2038 let head = self.prune_begin_eps(l);
2039 let tail = self.prune_begin_eps(r);
2040 self.mk_concat(head, tail)
2041 }
2042 Kind::Lookbehind => {
2043 if !node_id.right(self).is_missing() {
2044 return node_id;
2045 }
2046 let head = self.prune_begin_eps(node_id.left(self));
2047 head
2048 }
2049 Kind::Union => {
2050 let left = self.prune_begin_eps(node_id.left(self));
2051 let right = self.prune_begin_eps(node_id.right(self));
2052 self.mk_union(left, right)
2053 }
2054 _ => node_id,
2055 }
2056 }
2057
2058 pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
2059 if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2060 return Ok(node_id);
2061 }
2062 let result = match self.get_kind(node_id) {
2063 Kind::Concat => {
2064 let left = self.normalize_rev(node_id.left(self))?;
2065 let right = self.normalize_rev(node_id.right(self))?;
2066 self.mk_concat(left, right)
2067 }
2068 Kind::Inter => {
2069 let left = self.normalize_rev(node_id.left(self))?;
2070 let right = self.normalize_rev(node_id.right(self))?;
2071 self.mk_inter(left, right)
2072 }
2073 Kind::Union => {
2074 let left = self.normalize_rev(node_id.left(self))?;
2075 let right = self.normalize_rev(node_id.right(self))?;
2076 self.mk_union(left, right)
2077 }
2078 Kind::Lookbehind => {
2079 let left = self.normalize_rev(node_id.left(self))?;
2080 let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
2081 let lbody_ts = self.mk_concat(NodeId::TS, left);
2082 let ltail_ts = self.mk_concat(NodeId::TS, right);
2083 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2084 as_inter
2085 }
2086 Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2087 return Err(AlgebraError::UnsupportedPattern)
2088 }
2089 _ => node_id,
2090 };
2091 Ok(result)
2092 }
2093
2094 pub fn normalize_rev_prune(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
2095 if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2096 return Ok(node_id);
2097 }
2098 let result = match self.get_kind(node_id) {
2099 Kind::Concat => {
2100 let left = self.normalize_rev(node_id.left(self))?;
2101 let right = self.normalize_rev(node_id.right(self))?;
2102 self.mk_concat(left, right)
2103 }
2104 Kind::Inter => {
2105 let left = self.normalize_rev(node_id.left(self))?;
2106 let right = self.normalize_rev(node_id.right(self))?;
2107 self.mk_inter(left, right)
2108 }
2109 Kind::Union => {
2110 let left = self.normalize_rev(node_id.left(self))?;
2111 let right = self.normalize_rev(node_id.right(self))?;
2112 self.mk_union(left, right)
2113 }
2114 Kind::Lookbehind => {
2115 let left = self.normalize_rev(node_id.left(self))?;
2116 let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
2117 let lbody_ts = self.mk_concat(NodeId::TS, left);
2118 let ltail_ts = self.mk_concat(NodeId::TS, right);
2119 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2120 as_inter
2121 }
2122 Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2123 return Err(AlgebraError::UnsupportedPattern)
2124 }
2125 _ => node_id,
2126 };
2127 Ok(result)
2128 }
2129
2130 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
2131 debug_assert!(node_id != NodeId::MISSING);
2132 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
2133 if *rev != NodeId::MISSING {
2134 return Ok(*rev);
2135 }
2136 }
2137 let rw = match self.get_kind(node_id) {
2138 Kind::End => NodeId::BEGIN,
2139 Kind::Begin => NodeId::END,
2140 Kind::Pred => node_id,
2141 Kind::Concat => {
2142 let left = self.reverse(node_id.left(self))?;
2143 let right = self.reverse(node_id.right(self))?;
2144 self.mk_concat(right, left)
2145 }
2146 Kind::Union => {
2147 let left = self.reverse(node_id.left(self))?;
2148 let right = self.reverse(node_id.right(self))?;
2149 self.mk_union(left, right)
2150 }
2151 Kind::Inter => {
2152 let left = self.reverse(node_id.left(self))?;
2153 let right = self.reverse(node_id.right(self))?;
2154 self.mk_inter(left, right)
2155 }
2156 Kind::Star => {
2157 let body = self.reverse(node_id.left(self))?;
2158 self.mk_star(body)
2159 }
2160 Kind::Compl => {
2161 if self.contains_look(node_id.left(self)) {
2162 return Err(AlgebraError::UnsupportedPattern);
2163 }
2164 let body = self.reverse(node_id.left(self))?;
2165 self.mk_compl(body)
2166 }
2167 Kind::Lookbehind => {
2168 let prev = self.get_lookbehind_prev(node_id);
2169 let inner_id = self.get_lookbehind_inner(node_id);
2170 let rev_inner = self.reverse(inner_id)?;
2171 let rev_prev = if prev != NodeId::MISSING {
2172 self.reverse(prev)?
2173 } else {
2174 NodeId::MISSING
2175 };
2176 self.mk_lookahead(rev_inner, rev_prev, 0)
2177 }
2178 Kind::Lookahead => {
2179 let rel = self.get_lookahead_rel(node_id);
2180 if rel == u32::MAX {
2182 let lbody = self.get_lookahead_inner(node_id);
2183 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2184 let rbody = self.reverse(lbody);
2185 let rtail = self.reverse(ltail);
2186 let rev = self.mk_lookbehind(rbody?, rtail?);
2187 return Ok(self.cache_reversed(node_id, rev));
2188 }
2189 if rel != 0 {
2190 return Err(AlgebraError::UnsupportedPattern);
2191 }
2192 let tail_node = self.get_lookahead_tail(node_id);
2193 let rev_tail = if tail_node != NodeId::MISSING {
2194 self.reverse(tail_node)?
2195 } else {
2196 NodeId::MISSING
2197 };
2198 let inner_id = self.get_lookahead_inner(node_id);
2199 let rev_inner = self.reverse(inner_id)?;
2200 self.mk_lookbehind(rev_inner, rev_tail)
2201 }
2202 Kind::Counted => {
2203 return Err(AlgebraError::UnsupportedPattern);
2204 }
2205 };
2206 self.cache_reversed(node_id, rw);
2207 Ok(rw)
2208 }
2209
2210 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2211 let node = NodeKey {
2212 kind: Kind::Pred,
2213 left: NodeId::MISSING,
2214 right: NodeId::MISSING,
2215 extra: pred.0,
2216 };
2217 self.get_node_id(node)
2218 }
2219
2220 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2221 let key = NodeKey {
2222 kind: Kind::Compl,
2223 left: body,
2224 right: NodeId::MISSING,
2225 extra: u32::MAX,
2226 };
2227 if let Some(id) = self.key_is_created(&key) {
2228 return *id;
2229 }
2230 if body == NodeId::BOT {
2231 return NodeId::TS;
2232 }
2233 if body == NodeId::TS {
2234 return NodeId::BOT;
2235 }
2236
2237 if let Some(contains_body) = body.is_contains(self) {
2238 if contains_body.is_pred(self) {
2239 let pred = contains_body.pred_tset(self);
2240 let notpred = self.mk_pred_not(pred);
2241 let node = self.mk_star(notpred);
2242 return self.init_as(key, node);
2243 } else if contains_body == NodeId::END {
2244 return self.init_as(key, NodeId::BOT);
2245 }
2246 };
2247
2248 if self.get_kind(body) == Kind::Compl {
2249 return self.get_node(body).left;
2250 }
2251
2252 self.get_node_id(key)
2253 }
2254
2255 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2256 let nid = self.get_nulls_id(body);
2257 let nref = self.mb.nb.get_set_ref(nid).clone();
2258 let mut futures = NodeId::BOT;
2259 for n in nref.iter() {
2260 if !n.is_mask_nullable(mask) {
2261 continue;
2262 }
2263
2264 let eff = if n.rel == 0 {
2265 NodeId::EPS
2266 } else {
2267 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2268 };
2269 futures = self.mk_union(futures, eff)
2270 }
2271 futures
2272 }
2273
2274 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2275 if cfg!(feature = "norewrite") {
2276 return None;
2277 }
2278 #[cfg(debug_assertions)]
2279 if std::env::var("TRACE_CONCAT").is_ok() {
2280 eprintln!("rw2 head={} tail={}", self.pp(head), self.pp(tail));
2281 }
2282
2283 if tail.is_lookbehind(self) {
2284 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2285 return self
2286 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2287 .ok();
2288 }
2289 if head.is_lookahead(self) {
2290 let la_tail = self.get_lookahead_tail(head);
2291 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2292 if new_la_tail.is_center_nullable(self) {
2293 let la_new = self.mk_lookahead_internal(
2294 self.get_lookahead_inner(head),
2295 new_la_tail,
2296 u32::MAX,
2297 );
2298 return Some(la_new);
2299 }
2300 let la_body = self.get_lookahead_inner(head);
2301 let la_rel = self.get_lookahead_rel(head);
2302 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2303 let tail_rel = self.get_lookahead_rel(new_la_tail);
2304 tail_rel + la_rel
2305 } else {
2306 u32::MAX
2307 };
2308
2309 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2310 }
2311
2312 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2313 return Some(head);
2314 }
2315
2316 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2322 return Some(NodeId::TS);
2323 }
2324
2325 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2326 return Some(NodeId::TS);
2327 }
2328
2329 if head.is_inter(self) && tail == NodeId::TS {
2330 let mut alltopstar = true;
2331 head.iter_inter(
2332 self,
2333 &mut (|b, v| {
2334 alltopstar = b.ends_with_ts(v);
2335 }),
2336 );
2337
2338 if alltopstar {
2339 return Some(head);
2340 }
2341 }
2342
2343 if head.is_star(self) && head == tail {
2344 return Some(head);
2345 }
2346
2347 None
2348 }
2349
2350 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2351 use Kind::*;
2352
2353 if cfg!(feature = "norewrite") {
2354 return None;
2355 }
2356 if left == right {
2357 return Some(left);
2358 }
2359
2360 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2361 return Some(left);
2362 }
2363 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2364 return Some(right);
2365 }
2366
2367 if left.is_inter(self) && right.is_inter(self) {
2368 let mut lconj: Vec<NodeId> = Vec::new();
2369 left.any_inter_component(self, |v| {
2370 lconj.push(v);
2371 false
2372 });
2373 let lconj_initial = lconj.len();
2374 let mut common = NodeId::TS;
2375 let mut r_rest = NodeId::TS;
2376 let mut cur = right;
2377 loop {
2378 let (v, next) = if cur.kind(self) == Inter {
2379 (cur.left(self), Some(cur.right(self)))
2380 } else {
2381 (cur, None)
2382 };
2383 if let Some(pos) = lconj.iter().position(|&x| x == v) {
2384 lconj.swap_remove(pos);
2385 common = self.mk_inter(v, common);
2386 } else {
2387 r_rest = self.mk_inter(v, r_rest);
2388 }
2389 match next {
2390 Some(n) => cur = n,
2391 None => break,
2392 }
2393 }
2394 if lconj.len() < lconj_initial {
2395 let l_rest = lconj
2396 .iter()
2397 .fold(NodeId::TS, |acc, &v| self.mk_inter(v, acc));
2398 let inner_union = self.mk_union(l_rest, r_rest);
2399 return Some(self.mk_inter(common, inner_union));
2400 }
2401 }
2402
2403 if left.is_pred(self) && right.is_pred(self) {
2404 let l = left.pred_tset(self);
2405 let r = right.pred_tset(self);
2406 let solver = self.solver();
2407 let psi = solver.or_id(l, r);
2408 let rewrite = self.mk_pred(psi);
2409 return Some(rewrite);
2410 }
2411
2412 if left == NodeId::EPS
2413 && self.get_extra(left) == 0
2414 && self.nullability(right) == Nullability::ALWAYS
2415 {
2416 return Some(right);
2417 }
2418
2419 if left.is_lookahead(self) && right.is_lookahead(self) {
2420 let lb = left.left(self);
2421 let lt = left.right(self);
2422 let lrel = left.extra(self);
2423
2424 let rb = right.left(self);
2425 let rt = right.right(self);
2426 let rrel = right.extra(self);
2427
2428 if lrel == rrel && lt.is_missing() && rt.is_missing() {
2429 let unioned = self.mk_union(lb, rb);
2430 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2431 return Some(node);
2432 }
2433 }
2434
2435 if right.is_kind(self, Concat) {
2436 if left == NodeId::END
2437 && right.left(self) == NodeId::END
2438 && self.nullability(right).has(Nullability::END)
2439 {
2440 return Some(right);
2441 }
2442 if left == right.left(self) {
2444 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2445 let rw = self.mk_concat(left, rhs);
2446 return Some(rw);
2447 }
2448 if left.is_kind(self, Concat) {
2449 let head1 = left.left(self);
2450 let head2 = right.left(self);
2451
2452 if head1 == head2 {
2453 let t1 = left.right(self);
2454 let t2 = right.right(self);
2455 if head1 == NodeId::TS {
2457 if t2.has_concat_tail(self, t1) {
2458 return Some(left);
2459 }
2460 if t1.has_concat_tail(self, t2) {
2461 return Some(right);
2462 }
2463 }
2464 let un = self.mk_union(t1, t2);
2465 return Some(self.mk_concat(left.left(self), un));
2466 }
2467
2468 if false {
2472 let end1 = self.get_concat_end(left);
2473 let end2 = self.get_concat_end(right);
2474 if end1 == end2 {
2475 let contains_lookarounds =
2476 left.contains_lookaround(self) || right.contains_lookaround(self);
2477 let flags = left.flags_contains(self).or(right.flags_contains(self));
2478 if !contains_lookarounds && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2479 let rev1 = self.reverse(left).unwrap();
2480 let rev2 = self.reverse(right).unwrap();
2481
2482 let union_rev = self.mk_union(rev1, rev2);
2483 return Some(self.reverse(union_rev).unwrap());
2484 }
2485 }
2486 }
2487 }
2488 if left.is_pred(self) && left == right.left(self) {
2489 let un = self.mk_opt(right.right(self));
2490 let conc = self.mk_concat(left, un);
2491 return Some(conc);
2492 }
2493 }
2494
2495 if left == NodeId::EPS && right.is_plus(self) {
2496 let result = self.mk_star(right.left(self));
2497 return Some(result);
2498 }
2499
2500 if left.is_inter(self) && right.is_inter(self) {
2502 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2503 return Some(rw);
2504 }
2505 }
2506
2507 None
2508 }
2509
2510 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2511 let mut curr = node;
2512 while curr.is_inter(self) {
2513 out.push(self.get_left(curr));
2514 curr = self.get_right(curr);
2515 }
2516 out.push(curr);
2517 }
2518
2519 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2520 let mut curr = node;
2521 let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2522 if has_prefix {
2523 curr = self.get_right(curr);
2524 }
2525 let mut count = 0u32;
2526 let mut pred_set = None;
2527 while curr.is_concat(self) {
2528 let head = self.get_left(curr);
2529 if !head.is_pred(self) {
2530 return None;
2531 }
2532 let ps = head.pred_tset(self);
2533 match pred_set {
2534 None => pred_set = Some(ps),
2535 Some(existing) => {
2536 if existing != ps {
2537 return None;
2538 }
2539 }
2540 }
2541 curr = self.get_right(curr);
2542 count += 1;
2543 }
2544 if count == 0 || !curr.is_star(self) {
2545 return None;
2546 }
2547 Some((has_prefix, pred_set.unwrap(), curr, count))
2548 }
2549
2550 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2551 let mut j = 0;
2552 for &s in sub {
2553 while j < sup.len() && sup[j] < s {
2554 j += 1;
2555 }
2556 if j >= sup.len() || sup[j] != s {
2557 return false;
2558 }
2559 j += 1;
2560 }
2561 true
2562 }
2563
2564 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2565 if !left.is_inter(self) || !right.is_inter(self) {
2566 return None;
2567 }
2568
2569 let mut lc: Vec<NodeId> = Vec::new();
2570 let mut rc: Vec<NodeId> = Vec::new();
2571 self.collect_inter_components(left, &mut lc);
2572 self.collect_inter_components(right, &mut rc);
2573
2574 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2576 return Some(left);
2577 }
2578 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2580 return Some(right);
2581 }
2582
2583 if lc.len() == rc.len() {
2584 let mut diff_idx = None;
2585 for i in 0..lc.len() {
2586 if lc[i] != rc[i] {
2587 if diff_idx.is_some() {
2588 return None;
2589 }
2590 diff_idx = Some(i);
2591 }
2592 }
2593 if let Some(idx) = diff_idx {
2594 let a = lc[idx];
2595 let b = rc[idx];
2596 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2597 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2598 {
2599 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2600 return if ca < cb { Some(left) } else { Some(right) };
2601 }
2602 }
2603 }
2604 }
2605
2606 None
2607 }
2608
2609 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2610 if cfg!(feature = "norewrite") {
2611 return None;
2612 }
2613 if left == right {
2614 return Some(left);
2615 }
2616 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2617 return Some(right);
2618 }
2619 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2620 return Some(left);
2621 }
2622
2623 if left.is_pred(self) && right.is_pred(self) {
2624 let l = left.pred_tset(self);
2625 let r = right.pred_tset(self);
2626 let solver = self.solver();
2627 let psi = solver.and_id(l, r);
2628 let rewrite = self.mk_pred(psi);
2629 return Some(rewrite);
2630 }
2631
2632 if left.is_concat(self) && right.is_concat(self) {
2633 if left.left(self) == right.left(self) {
2634 if left.left(self).is_pred(self) {
2635 let new_right = self.mk_inter(left.right(self), right.right(self));
2636 return Some(self.mk_concat(left.left(self), new_right));
2637 }
2638 }
2639 }
2640
2641 if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2642 return Some(NodeId::BOT);
2643 }
2644
2645 if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2646 let bodies = self.mk_union(left.left(self), right.left(self));
2647 return Some(self.mk_compl(bodies));
2648 }
2649
2650 if left == NodeId::TOPPLUS {
2651 if right.is_pred_star(self).is_some() {
2652 let newloop = self.mk_plus(right.left(self));
2653 return Some(newloop);
2654 }
2655 if right.is_never_nullable(self) {
2656 return Some(right);
2657 }
2658 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2659 return Some(NodeId::BOT);
2660 }
2661 if right.is_kind(self, Kind::Concat) {}
2662 }
2663
2664 {
2665 let l_is_la = left.is_lookahead(self);
2666 let r_is_la = right.is_lookahead(self);
2667 let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
2668 let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
2669 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2670 let (la_node, other, concat_body) = if r_is_la {
2671 (right, left, NodeId::MISSING)
2672 } else if l_is_la {
2673 (left, right, NodeId::MISSING)
2674 } else if r_is_cla {
2675 (right.left(self), left, right.right(self))
2676 } else {
2677 (left.left(self), right, left.right(self))
2678 };
2679 let la_body = la_node.left(self);
2680 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2681 let inter_right = if concat_body.is_missing() {
2682 la_tail
2683 } else {
2684 self.mk_concat(la_tail, concat_body)
2685 };
2686 let new_body = self.mk_inter(other, inter_right);
2687 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2688 return Some(self.mk_concat(la, new_body));
2689 }
2690 }
2691
2692 if self.get_kind(right) == Kind::Compl {
2693 let compl_body = right.left(self);
2694 if left == compl_body {
2695 return Some(NodeId::BOT);
2696 }
2697 if compl_body.is_concat(self) {
2698 let compl_head = compl_body.left(self);
2699 if compl_body.right(self) == NodeId::TS && compl_head == left {
2700 return Some(NodeId::BOT);
2701 }
2702 }
2703 }
2704
2705 if let Some(pleft) = left.is_pred_star(self) {
2706 if let Some(pright) = right.is_pred_star(self) {
2707 let merged = self.mk_inter(pleft, pright);
2708 return Some(self.mk_star(merged));
2709 }
2710 }
2711
2712 {
2713 let l_is_clb = left.is_concat(self) && left.left(self).is_lookbehind(self);
2714 let r_is_clb = right.is_concat(self) && right.left(self).is_lookbehind(self);
2715 if l_is_clb || r_is_clb {
2716 let (lb, body) = if l_is_clb && r_is_clb {
2717 let lb1 = left.left(self);
2718 let lb2 = right.left(self);
2719 let inner = self.mk_inter(
2720 self.get_lookbehind_inner(lb1),
2721 self.get_lookbehind_inner(lb2),
2722 );
2723 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2724 let body = self.mk_inter(left.right(self), right.right(self));
2725 (lb, body)
2726 } else if l_is_clb {
2727 let lb = left.left(self);
2728 let body = self.mk_inter(left.right(self), right);
2729 (lb, body)
2730 } else {
2731 let lb = right.left(self);
2732 let body = self.mk_inter(left, right.right(self));
2733 (lb, body)
2734 };
2735 return Some(self.mk_concat(lb, body));
2736 }
2737 }
2738
2739 {
2740 let l_has_la = self.has_trailing_la(left);
2741 let r_has_la = self.has_trailing_la(right);
2742 if l_has_la || r_has_la {
2743 let (body, la) = if l_has_la && r_has_la {
2744 let (lbody, l_la) = self.strip_trailing_la(left);
2745 let (rbody, r_la) = self.strip_trailing_la(right);
2746 let inner = self.mk_inter(
2747 self.get_lookahead_inner(l_la),
2748 self.get_lookahead_inner(r_la),
2749 );
2750 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2751 let body = self.mk_inter(lbody, rbody);
2752 (body, la)
2753 } else if l_has_la {
2754 let (lbody, la) = self.strip_trailing_la(left);
2755 let body = self.mk_inter(lbody, right);
2756 (body, la)
2757 } else {
2758 let (rbody, la) = self.strip_trailing_la(right);
2759 let body = self.mk_inter(left, rbody);
2760 (body, la)
2761 };
2762 return Some(self.mk_concat(body, la));
2763 }
2764 }
2765
2766 None
2767 }
2768
2769 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2770 if cfg!(feature = "norewrite") {
2771 return None;
2772 }
2773 debug_assert!(self.get_kind(right_union) == Kind::Union);
2774
2775 let mut rewritten = None;
2776 right_union.iter_union_while(
2777 self,
2778 &mut (|b, v| {
2779 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2780 rewritten = Some((v, rw));
2781 false
2782 } else {
2783 true
2784 }
2785 }),
2786 );
2787
2788 if let Some(rw) = rewritten {
2789 let mut new_union = NodeId::BOT;
2790 right_union.iter_union(
2791 self,
2792 &mut (|b, v| {
2793 if v == rw.0 {
2794 new_union = b.mk_union(rw.1, new_union)
2795 } else {
2796 new_union = b.mk_union(v, new_union)
2797 }
2798 }),
2799 );
2800 return Some(new_union);
2801 };
2802
2803 None
2804 }
2805
2806 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2807 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2808 debug_assert!(tail != NodeId::MISSING);
2809 let key = NodeKey {
2810 kind: Kind::Concat,
2811 left: head,
2812 right: tail,
2813 extra: u32::MAX,
2814 };
2815 if let Some(id) = self.key_is_created(&key) {
2816 return *id;
2817 }
2818
2819 if head == NodeId::BOT || tail == NodeId::BOT {
2820 return NodeId::BOT;
2821 }
2822 if head == NodeId::EPS {
2823 return tail;
2824 }
2825 if tail == NodeId::EPS {
2826 return head;
2827 }
2828
2829 match tail {
2830 NodeId::BEGIN => {
2832 if !self.is_nullable(head, Nullability::BEGIN) {
2833 return NodeId::BOT;
2834 } else {
2835 return NodeId::BEGIN;
2836 }
2837 }
2838 _ => {}
2839 }
2840
2841 if head.is_kind(self, Kind::Concat) {
2843 let left = head.left(self);
2844 let newright = self.mk_concat(head.right(self), tail);
2845 let updated = self.mk_concat(left, newright);
2846 return self.init_as(key, updated);
2847 }
2848
2849 if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2850 return NodeId::BOT;
2851 }
2852
2853 if tail.is_concat(self) {
2854 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2855 let upd = self.mk_concat(rw, tail.right(self));
2856 return self.init_as(key, upd);
2857 }
2858 }
2859
2860 if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2861 return self.init_as(key, new);
2862 }
2863
2864 match (self.get_kind(head), self.get_kind(tail)) {
2865 (Kind::Star, Kind::Concat) if head.is_star(self) => {
2867 let rl = tail.left(self);
2868 if head == rl {
2869 return self.init_as(key, tail);
2870 }
2871 }
2872 (_, Kind::Concat) => {
2874 let curr = head;
2875 let h2 = self.mk_concat(curr, tail.left(self));
2876 let tr = tail.right(self);
2877 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2878 return self.init_as(key, new);
2879 }
2880 }
2881 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2882 return self.init_as(key, tail);
2883 }
2884 _ => {}
2885 }
2886
2887 self.init(key)
2888 }
2889
2890 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2891 let lb_body = {
2893 match self.starts_with_ts(lb_body) {
2894 true => lb_body,
2895 false => self.mk_concat(NodeId::TS, lb_body),
2896 }
2897 };
2898 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2900 }
2901
2902 fn mk_lookbehind_internal(
2903 &mut self,
2904 lb_body: NodeId,
2905 lb_prev: NodeId,
2906 ) -> Result<NodeId, AlgebraError> {
2907 debug_assert!(lb_body != NodeId::MISSING);
2908 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2909 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2910 return Ok(NodeId::BOT);
2911 }
2912 if lb_body == NodeId::TS {
2913 return Ok(lb_prev);
2914 }
2915 if lb_body == NodeId::EPS {
2916 match lb_prev {
2917 NodeId::MISSING => return Ok(NodeId::EPS),
2918 NodeId::EPS => return Ok(NodeId::EPS),
2919 _ => return Ok(lb_prev),
2920 }
2921 }
2922
2923 let key = NodeKey {
2924 kind: Kind::Lookbehind,
2925 left: lb_body,
2926 right: lb_prev,
2927 extra: u32::MAX,
2928 };
2929 match self.key_is_created(&key) {
2930 Some(id) => Ok(*id),
2931 None => {
2932 if lb_prev == NodeId::TS {
2933 return Ok(self.mk_concat(lb_prev, lb_body));
2934 }
2935
2936 Ok(self.init(key))
2937 }
2938 }
2939 }
2940
2941 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2942 let la_body = {
2944 match self.ends_with_ts(la_body) {
2945 true => la_body,
2946 false => self.mk_concat(la_body, NodeId::TS),
2947 }
2948 };
2949 let rel = if NodeId::MISSING == la_tail {
2950 rel
2951 } else {
2952 match la_tail.is_center_nullable(self) {
2953 false => u32::MAX,
2954 true => rel,
2955 }
2956 };
2957
2958 self.mk_lookahead_internal(la_body, la_tail, rel)
2959 }
2960
2961 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2963 let key = NodeKey {
2964 kind: Kind::Lookahead,
2965 left: la_body,
2966 right: la_tail,
2967 extra: rel,
2968 };
2969 if let Some(id) = self.key_is_created(&key) {
2970 return *id;
2971 }
2972 if la_body == NodeId::TS {
2973 if rel == 0 {
2974 return la_tail.missing_to_eps();
2975 } else {
2976 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2977 }
2978 }
2979 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2980 return NodeId::BOT;
2981 }
2982 if la_tail.is_missing() && rel == u32::MAX {
2983 return NodeId::BOT;
2984 }
2985
2986 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2987 return la_body;
2988 }
2989
2990 if la_tail == NodeId::TS {
2991 if rel == 0 || rel == u32::MAX {
2992 return self.mk_concat(la_body, NodeId::TS);
2993 } else if rel == u32::MAX {
2994 return self.mk_begins_with(la_body);
2995 }
2996 }
2997
2998 if rel == u32::MAX {
2999 if la_tail.is_missing() {
3000 return NodeId::BOT;
3001 }
3002
3003 if self.is_always_nullable(la_body) {
3004 return la_tail.missing_to_eps();
3005 }
3006
3007 if la_tail != NodeId::MISSING {
3008 match self.get_kind(la_tail) {
3009 _ => {
3010 if la_body.is_compl_plus_end(self) {
3011 let minlen = self.get_min_length_only(la_tail);
3012 if minlen >= 1 {
3013 return NodeId::BOT;
3014 }
3015 }
3016 }
3017 }
3018 }
3019 }
3020
3021 if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
3022 let la_body2 = self.get_lookahead_inner(la_tail);
3023 let body1_ts = self.mk_concat(la_body, NodeId::TS);
3024 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
3025 let new_la_body = self.mk_inter(body1_ts, body2_ts);
3026 let new_la_rel = self.get_lookahead_rel(la_tail);
3027 let new_la_tail = self.get_lookahead_tail(la_tail);
3028 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
3029 }
3030
3031 if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
3032 let la_body_right = la_body.right(self);
3033 if self.is_always_nullable(la_body_right) {
3034 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
3035 }
3036 if la_body.right(self) == NodeId::END {
3037 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
3038 }
3039 let bodyright = la_body.right(self);
3040 if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
3041 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
3042 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
3043 }
3044 }
3045
3046 if la_tail != NodeId::MISSING {
3047 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
3048 let lpred = la_body.left(self);
3049 if lpred.is_pred(self) {
3050 let l = lpred.pred_tset(self);
3051 let r = la_tail.pred_tset(self);
3052 let psi_and = self.solver().and_id(l, r);
3053 let rewrite = self.mk_pred(psi_and);
3054 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
3055 let new_right =
3056 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
3057 return self.mk_concat(rewrite, new_right);
3058 }
3059 }
3060 }
3061
3062 self.get_node_id(key)
3063 }
3064
3065 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
3066 let has_match = (packed >> 16) > 0;
3067 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
3068 return NodeId::BOT;
3069 }
3070 debug_assert!(
3071 body == NodeId::BOT || !self.is_infinite(body),
3072 "Counted body must have finite max length"
3073 );
3074 let chain = self.prune_counted_chain(body, chain);
3075 let key = NodeKey {
3076 kind: Kind::Counted,
3077 left: body,
3078 right: chain,
3079 extra: packed,
3080 };
3081 if let Some(id) = self.key_is_created(&key) {
3082 return *id;
3083 }
3084 self.get_node_id(key)
3085 }
3086
3087 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
3088 if chain == NodeId::MISSING || body == NodeId::BOT {
3089 return chain;
3090 }
3091 if self.nullability(body) != Nullability::NEVER {
3092 return NodeId::MISSING;
3093 }
3094 let chain_body = chain.left(self);
3095 if chain_body == NodeId::BOT {
3096 return chain;
3097 }
3098 let not_begins = self.mk_not_begins_with(body);
3099 let inter = self.mk_inter(chain_body, not_begins);
3100 let is_empty = inter == NodeId::BOT;
3101 if is_empty {
3102 self.prune_counted_chain(body, chain.right(self))
3103 } else {
3104 chain
3105 }
3106 }
3107
3108 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
3109 let neg_inner = self.mk_concat(body, NodeId::TS);
3110 let neg_part = self.mk_compl(neg_inner);
3111 let conc = self.mk_concat(neg_part, NodeId::END);
3112 self.mk_lookahead(conc, NodeId::MISSING, rel)
3113 }
3114
3115 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
3116 match self.get_node(body).kind {
3117 Kind::Pred => {
3118 let psi = body.pred_tset(self);
3119 let negated = self.mk_pred_not(psi);
3120 let union = self.mk_union(NodeId::BEGIN, negated);
3121 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3123 }
3124 _ => {
3125 let uc = crate::unicode_classes::utf8_char(self);
3128 let neg = self.mk_compl(body);
3129 let negated = self.mk_inter(neg, uc);
3130 let union = self.mk_union(NodeId::BEGIN, negated);
3131 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3133 }
3134 }
3135 }
3136
3137 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
3138 debug_assert!(left != NodeId::MISSING);
3139 debug_assert!(right != NodeId::MISSING);
3140 if left > right {
3141 return self.mk_union(right, left);
3142 }
3143 let key = NodeKey {
3144 kind: Kind::Union,
3145 left,
3146 right,
3147 extra: u32::MAX,
3148 };
3149 if let Some(id) = self.key_is_created(&key) {
3150 return *id;
3151 }
3152 if left == right {
3153 return self.init_as(key, left);
3154 }
3155 if left == NodeId::BOT {
3156 return self.init_as(key, right);
3157 }
3158 if right == NodeId::BOT {
3159 return self.init_as(key, left);
3160 }
3161 if right == NodeId::TS {
3162 return self.init_as(key, right);
3163 }
3164 if left == NodeId::TS {
3165 return self.init_as(key, left);
3166 }
3167 match (self.get_kind(left), self.get_kind(right)) {
3168 (Kind::Union, _) => {
3169 self.iter_unions_b(left, &mut |b, v| {
3170 b.temp_vec.push(v);
3171 });
3172 self.iter_unions_b(right, &mut |b, v| {
3173 b.temp_vec.push(v);
3174 });
3175 self.temp_vec.sort();
3176 let tree = self.temp_vec.clone();
3177 self.temp_vec.clear();
3178 let newnode = tree
3179 .iter()
3180 .rev()
3181 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3182 return self.init_as(key, newnode);
3183 }
3184 (_, Kind::Union) => {
3185 let rleft = right.left(self);
3186 if left > rleft {
3188 self.iter_unions_b(left, &mut |b, v| {
3189 b.temp_vec.push(v);
3190 });
3191 self.iter_unions_b(right, &mut |b, v| {
3192 b.temp_vec.push(v);
3193 });
3194 self.temp_vec.sort();
3195 let tree = self.temp_vec.clone();
3196 self.temp_vec.clear();
3197 let newnode = tree
3198 .iter()
3199 .rev()
3200 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3201 return self.init_as(key, newnode);
3202 } else {
3203 if let Some(rw) = self.attempt_rw_unions(left, right) {
3204 return self.init_as(key, rw);
3205 }
3206 }
3207 }
3208 _ => {}
3209 }
3210
3211 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3212 return self.init_as(key, rw);
3213 }
3214 self.init(key)
3215 }
3216
3217 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3218 debug_assert!(left_id != NodeId::MISSING);
3219 debug_assert!(right_id != NodeId::MISSING);
3220 if left_id == right_id {
3221 return left_id;
3222 }
3223 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3224 return NodeId::BOT;
3225 }
3226 if left_id == NodeId::TS {
3227 return right_id;
3228 }
3229 if right_id == NodeId::TS {
3230 return left_id;
3231 }
3232 if left_id > right_id {
3233 return self.mk_inter(right_id, left_id);
3234 }
3235 let key = NodeKey {
3236 kind: Kind::Inter,
3237 left: left_id,
3238 right: right_id,
3239 extra: u32::MAX,
3240 };
3241 if let Some(id) = self.key_is_created(&key) {
3242 return *id;
3243 }
3244
3245 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3246 return self.init_as(key, rw);
3247 }
3248
3249 self.init(key)
3250 }
3251
3252 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3253 let node = NodeKey {
3254 kind,
3255 left: NodeId::MISSING,
3256 right: NodeId::MISSING,
3257 extra: u32::MAX,
3258 };
3259 self.init(node)
3260 }
3261
3262 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3263 let star = self.mk_star(body_id);
3264 self.mk_concat(body_id, star)
3265 }
3266 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3267 let opt = self.mk_opt(body_id);
3268 let mut nodes1 = vec![];
3269 for _ in lower..upper {
3270 nodes1.push(opt);
3271 }
3272 for _ in 0..lower {
3273 nodes1.push(body_id);
3274 }
3275 self.mk_concats(nodes1.into_iter())
3276 }
3277 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3278 self.mk_union(NodeId::EPS, body_id)
3279 }
3280
3281 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3282 let key = NodeKey {
3283 kind: Kind::Star,
3284 left: body_id,
3285 right: NodeId::MISSING,
3286 extra: 0,
3287 };
3288 if let Some(id) = self.key_is_created(&key) {
3289 return *id;
3290 }
3291 if body_id.is_kind(self, Kind::Star) {
3293 return body_id;
3294 }
3295 self.get_node_id(key)
3296 }
3297
3298 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3301 match self.get_kind(node_id) {
3302 Kind::End => Nullability::EMPTYSTRING,
3303 Kind::Begin => Nullability::EMPTYSTRING,
3304 Kind::Pred => Nullability::NEVER,
3305 Kind::Star => Nullability::ALWAYS,
3306 Kind::Inter | Kind::Concat => {
3307 let lnull = self.nullability_emptystring(node_id.left(self));
3308 let rnull = self.nullability_emptystring(node_id.right(self));
3309 lnull.and(rnull) }
3311 Kind::Union => {
3312 let lnull = self.nullability_emptystring(node_id.left(self));
3313 let rnull = self.nullability_emptystring(node_id.right(self));
3314 lnull.or(rnull)
3315 }
3316 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3317 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3318 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3319 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3320 }
3321 }
3322
3323 #[inline(always)]
3324 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3325 self.get_meta(node_id)
3326 .flags
3327 .nullability()
3328 .has(Nullability::CENTER.or(Nullability::END))
3329 }
3330
3331 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3332 self.get_only_nullability(node_id)
3333 }
3334
3335 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3336 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3337 }
3338
3339 pub fn pp(&self, node_id: NodeId) -> String {
3340 let mut s = String::new();
3341 self.ppw(&mut s, node_id).unwrap();
3342 s
3343 }
3344
3345 #[allow(dead_code)]
3346 pub fn pp_nulls(&self, node_id: NodeId) -> String {
3347 let nu = self.get_nulls_id(node_id);
3348 let nr = self.mb.nb.get_set_ref(nu);
3349 let s1 = format!("{:?}", nr);
3350 s1
3351 }
3352
3353 #[allow(dead_code)]
3354 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3355 match self.get_tregex(term_id) {
3356 TRegex::Leaf(node_id) => {
3357 let mut s = String::new();
3358 self.ppw(&mut s, *node_id).unwrap();
3359 s
3360 }
3361 TRegex::ITE(cond, then_id, else_id) => {
3362 format!(
3363 "ITE({},{},{})",
3364 self.solver_ref().pp(*cond),
3365 self.ppt(*then_id),
3366 self.ppt(*else_id)
3367 )
3368 }
3369 }
3370 }
3371
3372 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3373 if cfg!(feature = "graphviz") {
3374 match node_id {
3375 NodeId::MISSING => return write!(s, "MISSING"),
3376 NodeId::BOT => return write!(s, "⊥"),
3377 NodeId::TS => return write!(s, "_*"),
3378 NodeId::TOP => return write!(s, "_"),
3379 NodeId::EPS => return write!(s, "ε"),
3380 _ => {}
3381 }
3382 }
3383
3384 match node_id {
3385 NodeId::MISSING => return write!(s, "MISSING"),
3386 NodeId::BOT => return write!(s, "⊥"),
3387 NodeId::TS => return write!(s, "_*"),
3388 NodeId::TOP => return write!(s, "_"),
3389 NodeId::EPS => return write!(s, ""),
3390 _ => {}
3391 }
3392
3393 match self.get_kind(node_id) {
3394 Kind::End => write!(s, r"\z"),
3395 Kind::Begin => write!(s, r"\A"),
3396 Kind::Pred => {
3397 let psi = node_id.pred_tset(self);
3398 if psi == TSetId::EMPTY {
3399 write!(s, r"⊥")
3400 } else if psi == TSetId::FULL {
3401 write!(s, r"_")
3402 } else {
3403 write!(s, "{}", self.solver_ref().pp(psi))
3404 }
3405 }
3406 Kind::Inter => {
3407 write!(s, "(")?;
3408 self.ppw(s, node_id.left(self))?;
3409 write!(s, "&")?;
3410 let mut curr = node_id.right(self);
3411 while curr.is_inter(self) {
3412 let n = curr.left(self);
3413 self.ppw(s, n)?;
3414 write!(s, "&")?;
3415 curr = curr.right(self);
3416 }
3417 self.ppw(s, curr)?;
3418 write!(s, ")")
3419 }
3420 Kind::Union => {
3421 let left = node_id.left(self);
3422 let right = node_id.right(self);
3423 write!(s, "(")?;
3424 self.ppw(s, left)?;
3425 write!(s, "|")?;
3426 let mut curr = right;
3427 while self.get_kind(curr) == Kind::Union {
3428 let n = curr.left(self);
3429 self.ppw(s, n)?;
3430 write!(s, "|")?;
3431 curr = curr.right(self);
3432 }
3433 self.ppw(s, curr)?;
3434 write!(s, ")")
3435 }
3436 Kind::Concat => {
3437 let left = node_id.left(self);
3438 let right = node_id.right(self);
3439 if right.is_star(self) && right.left(self) == left {
3440 self.ppw(s, left)?;
3441 write!(s, "+")?;
3442 return Ok(());
3443 }
3444 if right.is_concat(self) {
3445 let rl = right.left(self);
3446 if rl.is_star(self) && rl.left(self) == left {
3447 self.ppw(s, left)?;
3448 write!(s, "+")?;
3449 return self.ppw(s, right.right(self));
3450 }
3451 }
3452 if right.is_concat(self) && right.left(self) == left {
3453 let mut num = 1;
3454 let mut right = right;
3455 while right.is_concat(self) && right.left(self) == left {
3456 num += 1;
3457 right = right.right(self);
3458 }
3459 if let Some(inner) = left.is_opt_v(self) {
3461 let mut inner_count = 0;
3462 let mut right2 = right;
3463 while right2.is_concat(self) && right2.left(self) == inner {
3464 inner_count += 1;
3465 right2 = right2.right(self);
3466 }
3467 if right2 == inner {
3468 inner_count += 1;
3469 self.ppw(s, inner)?;
3470 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3471 }
3472 if inner_count > 0 {
3473 self.ppw(s, inner)?;
3474 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3475 return self.ppw(s, right2);
3476 }
3477 }
3478 self.ppw(s, left)?;
3479 if right == left {
3480 num += 1;
3481 return write!(s, "{{{}}}", num);
3482 }
3483 if num <= 3 && left.is_pred(self) {
3484 for _ in 1..num {
3485 self.ppw(s, left)?;
3486 }
3487 return self.ppw(s, right);
3488 } else {
3489 write!(s, "{{{}}}", num)?;
3490 return self.ppw(s, right);
3491 }
3492 }
3493 self.ppw(s, left)?;
3494 self.ppw(s, right)
3495 }
3496 Kind::Star => {
3497 let left = node_id.left(self);
3498 let leftkind = self.get_kind(left);
3499 match leftkind {
3500 Kind::Concat | Kind::Star | Kind::Compl => {
3501 write!(s, "(")?;
3502 self.ppw(s, left)?;
3503 write!(s, ")")?;
3504 }
3505 _ => {
3506 self.ppw(s, left)?;
3507 }
3508 };
3509 write!(s, "*")
3510 }
3511 Kind::Compl => {
3512 write!(s, "~(")?;
3513 self.ppw(s, node_id.left(self))?;
3514 write!(s, ")")
3515 }
3516 Kind::Lookbehind => {
3517 let lbleft = self.get_lookbehind_prev(node_id);
3518 let lbinner = self.get_lookbehind_inner(node_id);
3519 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3520 if lbleft != NodeId::MISSING {
3521 write!(s, "❮")?;
3522 self.ppw(s, lbleft)?;
3523 write!(s, "❯")?;
3524 }
3525
3526 write!(s, "(?<=")?;
3527 self.ppw(s, lbinner)?;
3528 write!(s, ")")
3529 }
3530 Kind::Lookahead => {
3531 let inner = self.get_lookahead_inner(node_id);
3532 write!(s, "(?=")?;
3533 self.ppw(s, inner)?;
3534 write!(s, ")")?;
3535 if self.get_lookahead_rel(node_id) != 0 {
3536 write!(s, "{{")?;
3537 let rel = self.get_lookahead_rel(node_id);
3538 if rel == u32::MAX {
3539 write!(s, "∅")?;
3540 } else {
3541 write!(s, "{}", rel)?;
3542 }
3543 write!(s, "}}")?;
3544 }
3545 if node_id.right(self) == NodeId::MISSING {
3546 Ok(())
3547 } else {
3548 write!(s, "❮")?;
3549 self.ppw(s, node_id.right(self))?;
3550 write!(s, "❯")
3551 }
3552 }
3553 Kind::Counted => {
3554 let body = node_id.left(self);
3555 let packed = self.get_extra(node_id);
3556 let step = packed & 0xFFFF;
3557 let best = packed >> 16;
3558 write!(s, "#(")?;
3559 self.ppw(s, body)?;
3560 write!(s, ")s{}b{}", step, best)
3561 }
3562 }
3563 }
3564
3565 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3566 self.mk_concat(node, NodeId::TS)
3567 }
3568
3569 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3570 let node_ts = self.mk_concat(node, NodeId::TS);
3571 self.mk_compl(node_ts)
3572 }
3573
3574 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3575 let notset = self.solver().not_id(set);
3576 self.mk_pred(notset)
3577 }
3578
3579 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3580 let set_id = self.solver().u8_to_set_id(char);
3581 self.mk_pred(set_id)
3582 }
3583
3584 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3585 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3586 self.mk_pred(rangeset)
3587 }
3588
3589 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3590 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3591 for &(lo, hi) in &ranges[1..] {
3592 let r = self.mk_range_u8(lo, hi);
3593 node = self.mk_union(node, r);
3594 }
3595 node
3596 }
3597
3598 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3599 let mut prefix = Vec::new();
3600 let mut curr = node;
3601 loop {
3602 if curr == NodeId::EPS {
3603 let full = !prefix.is_empty();
3604 return (prefix, full);
3605 }
3606 if curr == NodeId::BOT {
3607 break;
3608 }
3609 if curr.is_pred(self) {
3610 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3611 Some(byte) => {
3612 prefix.push(byte);
3613 return (prefix, true);
3614 }
3615 None => break, }
3617 }
3618 if !curr.is_concat(self) {
3619 break;
3620 }
3621 let left = curr.left(self);
3622 if !left.is_pred(self) {
3623 break;
3624 }
3625 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3626 Some(byte) => prefix.push(byte),
3627 None => break,
3628 }
3629 curr = curr.right(self);
3630 }
3631 (prefix, false)
3632 }
3633
3634 #[allow(dead_code)]
3635 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3636 let mut result = NodeId::EPS;
3637 for byte in raw_str.iter().rev() {
3638 let node = self.mk_u8(*byte);
3639 result = self.mk_concat(node, result);
3640 }
3641 result
3642 }
3643
3644 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3645 let mut result = NodeId::EPS;
3646 for byte in raw_str.bytes().rev() {
3647 let node = self.mk_u8(byte);
3648 result = self.mk_concat(node, result);
3649 }
3650 result
3651 }
3652
3653 pub fn prune_fwd(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3654 self.prune_rec::<true>(node_id, memo)
3655 }
3656
3657 pub fn prune_rev(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3658 self.prune_rec::<false>(node_id, memo)
3659 }
3660
3661 pub fn simplify_rev_initial(&mut self, node_id: NodeId) -> NodeId {
3662 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3663 self.simplify_rev_initial_rec(node_id, &mut memo)
3664 }
3665
3666 fn simplify_rev_initial_rec(
3667 &mut self,
3668 node_id: NodeId,
3669 memo: &mut FxHashMap<NodeId, NodeId>,
3670 ) -> NodeId {
3671 if let Some(&v) = memo.get(&node_id) {
3672 return v;
3673 }
3674 let out = match self.get_kind(node_id) {
3675 Kind::Union => {
3676 let mut parts: Vec<NodeId> = Vec::new();
3677 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
3678 for p in &mut parts {
3679 *p = self.simplify_rev_initial_rec(*p, memo);
3680 }
3681 parts
3682 .iter()
3683 .rev()
3684 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
3685 }
3686 Kind::Concat => {
3687 let l = node_id.left(self);
3688 let r = node_id.right(self);
3689 let l_new = self.simplify_rev_initial_rec(l, memo);
3690 let r_new = self.simplify_rev_initial_rec(r, memo);
3691 let rebuilt = if l_new == l && r_new == r {
3692 node_id
3693 } else {
3694 self.mk_concat(l_new, r_new)
3695 };
3696 if self.get_kind(rebuilt) != Kind::Concat {
3697 return rebuilt;
3698 }
3699 let cl = rebuilt.left(self);
3700 let cr = rebuilt.right(self);
3701 if cl != NodeId::TS {
3702 return rebuilt;
3703 }
3704 let (head, tail) = if self.get_kind(cr) == Kind::Concat {
3705 (cr.left(self), cr.right(self))
3706 } else {
3707 (cr, NodeId::EPS)
3708 };
3709 if self.nullability(tail) != Nullability::ALWAYS {
3710 return rebuilt;
3711 }
3712 if self.get_kind(head) != Kind::Union {
3713 return rebuilt;
3714 }
3715 let mut has_begin = false;
3716 self.iter_unions_b(head, &mut |_, v| {
3717 if v == NodeId::BEGIN {
3718 has_begin = true;
3719 }
3720 });
3721 if has_begin {
3722 NodeId::TS
3723 } else {
3724 rebuilt
3725 }
3726 }
3727 _ => node_id,
3728 };
3729 memo.insert(node_id, out);
3730 out
3731 }
3732
3733 fn strip_la_body_end(&mut self, n: NodeId) -> NodeId {
3734 if self.get_kind(n) != Kind::Concat {
3735 return n;
3736 }
3737 let l = n.left(self);
3738 let r = n.right(self);
3739 if l == NodeId::TS && r == NodeId::END {
3740 return NodeId::TS;
3741 }
3742 let new_r = self.strip_la_body_end(r);
3743 if new_r == r {
3744 n
3745 } else {
3746 self.mk_concat(l, new_r)
3747 }
3748 }
3749
3750 fn prune_rec<const FWD: bool>(
3751 &mut self,
3752 node_id: NodeId,
3753 memo: &mut FxHashMap<NodeId, NodeId>,
3754 ) -> NodeId {
3755 if node_id == NodeId::MISSING {
3756 return node_id;
3757 }
3758 if let Some(&v) = memo.get(&node_id) {
3759 return v;
3760 }
3761 let (l, r) = (node_id.left(self), node_id.right(self));
3762 let out = match node_id.kind(self) {
3763 Kind::Union => {
3764 let l = self.prune_rec::<FWD>(l, memo);
3765 let r = self.prune_rec::<FWD>(r, memo);
3766 let mut parts: Vec<NodeId> = Vec::new();
3767 parts.push(l);
3768 self.iter_unions_b(r, &mut |_, v| parts.push(v));
3769 for p in &mut parts {
3770 *p = self.prune_rec::<FWD>(*p, memo);
3771 }
3772
3773 if FWD {
3774 let mut best: FxHashMap<NodeId, u32> = FxHashMap::default();
3776 for &p in &parts {
3777 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
3778 let body = self.get_lookahead_inner(p);
3779 let rel = self.get_lookahead_rel(p);
3780 best.entry(body)
3781 .and_modify(|r| *r = (*r).min(rel))
3782 .or_insert(rel);
3783 }
3784 }
3785 parts.iter().rev().fold(NodeId::BOT, |acc, &p| {
3786 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
3787 let body = self.get_lookahead_inner(p);
3788 if self.get_lookahead_rel(p) != best[&body] {
3789 return acc;
3790 }
3791 }
3792 self.mk_union(p, acc)
3793 })
3794 } else {
3795 parts
3796 .iter()
3797 .rev()
3798 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
3799 }
3800 }
3801 Kind::Concat => {
3802 if !FWD && l == NodeId::TS {
3811 let (head, tail) = if self.get_kind(r) == Kind::Concat {
3813 (r.left(self), r.right(self))
3814 } else {
3815 (r, NodeId::EPS)
3816 };
3817
3818 if self.nullability(tail) == Nullability::ALWAYS
3819 && self.get_kind(head) == Kind::Union
3820 {
3821 let mut has_begin = false;
3822 self.iter_unions_b(head, &mut |_, v| {
3823 if v == NodeId::BEGIN {
3824 has_begin = true;
3825 }
3826 });
3827 if has_begin {
3828 let out = NodeId::TS;
3829 memo.insert(node_id, out);
3830 return out;
3831 }
3832 }
3833 }
3834 let l = self.prune_rec::<FWD>(l, memo);
3835 let r = self.prune_rec::<FWD>(r, memo);
3836 self.mk_concat(l, r)
3837 }
3838 Kind::Inter => {
3839 let l = self.prune_rec::<FWD>(l, memo);
3840 let r = self.prune_rec::<FWD>(r, memo);
3841 self.mk_inter(l, r)
3842 }
3843 Kind::Compl => {
3844 let l = self.prune_rec::<FWD>(l, memo);
3845 self.mk_compl(l)
3846 }
3847 Kind::Lookahead => {
3848 let lrel = self.get_lookahead_rel(node_id);
3849 let body = self.strip_la_body_end(self.get_lookahead_inner(node_id));
3850 let body = self.prune_rec::<FWD>(body, memo);
3851 let tail = self.prune_rec::<FWD>(self.get_lookahead_tail(node_id), memo);
3852 self.mk_lookahead_internal(body, tail, lrel)
3853 }
3854 Kind::Begin => NodeId::BOT,
3855 Kind::Counted => {
3856 let body = self.prune_rec::<FWD>(l, memo);
3857 let chain = self.prune_rec::<FWD>(r, memo);
3858 self.mk_counted(body, chain, self.get_extra(node_id))
3859 }
3860 Kind::End | Kind::Pred => node_id,
3861 Kind::Star => {
3862 let l = self.prune_rec::<FWD>(l, memo);
3863 self.mk_star(l)
3864 }
3865 Kind::Lookbehind => {
3866 let l = self.prune_rec::<FWD>(l, memo);
3867 if r.is_missing() {
3868 l
3869 } else {
3870 node_id
3871 }
3872 }
3873 };
3874 memo.insert(node_id, out);
3875 out
3876 }
3877
3878 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3879 let mut sorted: Vec<NodeId> = nodes.collect();
3880 if sorted.len() <= 1 {
3881 return sorted.pop().unwrap_or(NodeId::BOT);
3882 }
3883 sorted.sort();
3884 sorted.dedup();
3885 sorted.retain(|&x| x != NodeId::BOT);
3886 if sorted.is_empty() {
3887 return NodeId::BOT;
3888 }
3889 if sorted.len() > 16 {
3890 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3891 let mut non_concat: Vec<NodeId> = Vec::new();
3892 for &n in &sorted {
3893 if n.is_concat(self) {
3894 by_head.entry(self.get_left(n)).or_default().push(n);
3895 } else {
3896 non_concat.push(n);
3897 }
3898 }
3899 let mut absorbed: Vec<NodeId> = Vec::new();
3900 for &n in &non_concat {
3901 if by_head.contains_key(&n) {
3902 absorbed.push(n);
3903 }
3904 }
3905 if !absorbed.is_empty() {
3906 non_concat.retain(|n| !absorbed.contains(n));
3907 }
3908 if by_head.len() < sorted.len() {
3909 let mut groups: Vec<NodeId> = non_concat;
3910 for (head, tails) in by_head {
3911 let mut tail_nodes: Vec<NodeId> =
3912 tails.iter().map(|&n| self.get_right(n)).collect();
3913 if absorbed.contains(&head) {
3914 tail_nodes.push(NodeId::EPS);
3915 }
3916 let tail_union = self.mk_unions(tail_nodes.into_iter());
3917 let factored = self.mk_concat(head, tail_union);
3918 groups.push(factored);
3919 }
3920 groups.sort();
3921 groups.dedup();
3922 return self.mk_unions_balanced(&groups);
3923 }
3924 }
3925 self.mk_unions_balanced(&sorted)
3926 }
3927
3928 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3929 match nodes.len() {
3930 0 => NodeId::BOT,
3931 1 => nodes[0],
3932 n => {
3933 let mid = n / 2;
3934 let left = self.mk_unions_balanced(&nodes[..mid]);
3935 let right = self.mk_unions_balanced(&nodes[mid..]);
3936 self.mk_union(left, right)
3937 }
3938 }
3939 }
3940
3941 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3942 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3943 }
3944
3945 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3946 nodes
3947 .rev()
3948 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3949 }
3950}
3951
3952impl RegexBuilder {
3953 pub fn iter_sat(
3954 &mut self,
3955 stack: &mut Vec<(TRegexId, TSetId)>,
3956 f: &mut impl FnMut(&mut RegexBuilder, NodeId, TSetId),
3957 ) {
3958 debug_assert!(!stack.is_empty());
3959 loop {
3960 match stack.pop() {
3961 None => return,
3962 Some((curr, curr_set)) => match *self.get_tregex(curr) {
3963 TRegex::Leaf(n) => {
3964 let mut curr = n;
3965 while curr != NodeId::BOT {
3966 match self.get_kind(curr) {
3967 _ => {
3968 f(self, n, curr_set);
3969 curr = NodeId::BOT;
3970 }
3971 }
3972 }
3973 }
3974 TRegex::ITE(cnd, then_id, else_id) => {
3975 if else_id != TRegexId::BOT {
3976 let notcnd = self.solver().not_id(cnd);
3977 let interset1 = self.solver().and_id(curr_set, notcnd);
3978 stack.push((else_id, interset1));
3979 }
3980 let interset2 = self.solver().and_id(curr_set, cnd);
3981 stack.push((then_id, interset2));
3982 }
3983 },
3984 }
3985 }
3986 }
3987
3988 #[allow(dead_code)]
3989 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3990 match self.get_tregex(term_id).clone() {
3991 TRegex::Leaf(node_id) => {
3992 if NodeId::BOT == node_id {
3993 vec![]
3994 } else {
3995 vec![node_id]
3996 }
3997 }
3998 TRegex::ITE(_, then_id, else_id) => {
3999 let mut then_nodes = self.extract_sat(then_id);
4000 let mut else_nodes = self.extract_sat(else_id);
4001 then_nodes.append(&mut else_nodes);
4002 then_nodes
4003 }
4004 }
4005 }
4006
4007 pub(crate) fn iter_unions_b(
4008 &mut self,
4009 curr: NodeId,
4010 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
4011 ) {
4012 let mut curr = curr;
4013 while self.get_kind(curr) == Kind::Union {
4014 f(self, curr.left(self));
4015 curr = curr.right(self);
4016 }
4017 f(self, curr);
4018 }
4019
4020 pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
4021 if !self.contains_look(node_id) {
4022 return Some(node_id);
4023 }
4024 match self.get_kind(node_id) {
4025 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
4026 Kind::Concat => {
4027 let left = node_id.left(self);
4028 let right = node_id.right(self);
4029 let elim_l = self.try_elim_lookarounds(left)?;
4030 let elim_r = self.try_elim_lookarounds(right)?;
4031 let rw = self.mk_concat(elim_l, elim_r);
4032 Some(rw)
4033 }
4034 Kind::Union => {
4035 let left = node_id.left(self);
4036 let right = node_id.right(self);
4037 let elim_l = self.try_elim_lookarounds(left)?;
4038 let elim_r = self.try_elim_lookarounds(right)?;
4039 let rw = self.mk_union(elim_l, elim_r);
4040 Some(rw)
4041 }
4042
4043 Kind::Star => {
4044 let body = node_id.left(self);
4045 let elim_l = self.try_elim_lookarounds(body)?;
4046 Some(self.mk_star(elim_l))
4047 }
4048 Kind::Compl => {
4049 let left = node_id.left(self);
4050 let elim_l = self.try_elim_lookarounds(left)?;
4051 Some(self.mk_compl(elim_l))
4052 }
4053 Kind::Lookahead => {
4054 let rel = self.get_lookahead_rel(node_id);
4055 if rel != 0 {
4056 return None;
4057 }
4058 let lbody = self.get_lookahead_inner(node_id);
4059 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
4060 let elim_l = self.try_elim_lookarounds(lbody)?;
4061 let elim_r = self.try_elim_lookarounds(ltail)?;
4062 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
4063 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
4064 let rw = self.mk_inter(lbody_ts, ltail_ts);
4065 Some(rw)
4066 }
4067 Kind::Lookbehind => {
4068 let linner = self.get_lookbehind_inner(node_id);
4069 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
4070 let elim_l = self.try_elim_lookarounds(linner)?;
4071 let elim_r = self.try_elim_lookarounds(lprev)?;
4072 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
4073 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
4074 let rw = self.mk_inter(lbody_ts, ltail_ts);
4075 Some(rw)
4076 }
4077 Kind::Inter => {
4078 let left = node_id.left(self);
4079 let right = node_id.right(self);
4080 let elim_l = self.try_elim_lookarounds(left)?;
4081 let elim_r = self.try_elim_lookarounds(right)?;
4082 let rw = self.mk_inter(elim_l, elim_r);
4083 Some(rw)
4084 }
4085 Kind::Counted => None,
4086 }
4087 }
4088
4089 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
4091 if self.nullability(node) == Nullability::NEVER {
4092 node
4093 } else {
4094 self.mk_inter(NodeId::TOPPLUS, node)
4095 }
4096 }
4097
4098 pub fn iter_find_stack(
4099 &self,
4100 stack: &mut Vec<TRegexId>,
4101 mut f: impl FnMut(NodeId) -> bool,
4102 ) -> bool {
4103 loop {
4104 match stack.pop() {
4105 None => return false,
4106 Some(curr) => match self.get_tregex(curr) {
4107 TRegex::Leaf(n) => {
4108 let mut curr = *n;
4109 while curr != NodeId::BOT {
4110 match self.get_kind(curr) {
4111 Kind::Union => {
4112 if f(curr.left(self)) {
4113 return true;
4114 }
4115 curr = curr.right(self);
4116 }
4117 _ => {
4118 if f(*n) {
4119 return true;
4120 }
4121 curr = NodeId::BOT;
4122 }
4123 }
4124 }
4125 }
4126 TRegex::ITE(_, then_id, else_id) => {
4127 if *else_id != TRegexId::BOT {
4128 stack.push(*else_id);
4129 }
4130 stack.push(*then_id);
4131 }
4132 },
4133 }
4134 }
4135 }
4136
4137 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
4138 if node == NodeId::BOT {
4139 return Some(true);
4140 }
4141 if self.nullability(node) != Nullability::NEVER {
4142 return Some(false);
4143 }
4144 if let Some(cached) = self.cache_empty.get(&node) {
4145 if cached.is_checked() {
4146 return Some(cached.is_empty());
4147 }
4148 }
4149 let node = if !self.contains_look(node) {
4150 node
4151 } else {
4152 self.try_elim_lookarounds(node)?
4153 };
4154 let isempty_flag = self.is_empty_lang_internal(node);
4155
4156 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
4157 }
4158
4159 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
4160 if !self.get_meta_flags(initial_node).contains_inter() {
4162 return Ok(NodeFlags::ZERO);
4163 }
4164
4165 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4166 let mut worklist: VecDeque<NodeId> = VecDeque::new();
4167 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
4168 let mut stack = Vec::new();
4169 stack.push(begin_der);
4170 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
4171 visited.insert(node, initial_node);
4172 let nullability = self.nullability(node);
4173 if nullability != Nullability::NEVER {
4174 true
4175 } else {
4176 worklist.push_back(node);
4177 false
4178 }
4179 });
4180 if found_nullable_right_away {
4181 return Ok(NodeFlags::ZERO);
4182 }
4183
4184 worklist.push_back(initial_node);
4185 let isempty_flag: NodeFlags;
4186 let mut found_node = NodeId::BOT;
4187
4188 loop {
4189 match worklist.pop_front() {
4190 None => {
4191 isempty_flag = NodeFlags::IS_EMPTY;
4192 break;
4193 }
4194 Some(outer) => {
4195 if let Some(cached) = self.cache_empty.get(&outer) {
4196 if cached.is_checked() {
4197 if cached.is_empty() {
4198 continue;
4199 } else {
4200 return Ok(NodeFlags::ZERO);
4201 }
4202 }
4203 }
4204
4205 let derivative = self.der(outer, Nullability::CENTER)?;
4206
4207 stack.push(derivative);
4208
4209 let found_null = self.iter_find_stack(&mut stack, |node| {
4210 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
4211 found_node = node;
4212 if !self.get_meta_flags(node).contains_inter() {
4213 true
4214 } else {
4215 e.insert(outer);
4216 worklist.push_front(node);
4217 self.any_nonbegin_nullable(node)
4218 }
4219 } else {
4220 false
4221 }
4222 });
4223 if found_null {
4224 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
4225 isempty_flag = NodeFlags::ZERO;
4226 break;
4227 }
4228 }
4229 }
4230 }
4231
4232 self.cache_empty.insert(
4233 initial_node,
4234 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
4235 );
4236 Ok(isempty_flag)
4237 }
4238
4239 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
4241 if larger_lang == smaller_lang {
4242 return Some(true);
4243 }
4244
4245 if self
4247 .nullability(larger_lang)
4248 .not()
4249 .and(self.nullability(smaller_lang))
4250 != Nullability::NEVER
4251 {
4252 return Some(false);
4253 }
4254
4255 let (smaller_lang, larger_lang) =
4260 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
4261 let wrap = |b: &mut Self, n: NodeId| {
4262 let tmp = b.mk_concat(n, NodeId::TS);
4263 b.mk_concat(NodeId::TS, tmp)
4264 };
4265 (wrap(self, smaller_lang), wrap(self, larger_lang))
4266 } else {
4267 (smaller_lang, larger_lang)
4268 };
4269
4270 let nota = self.mk_compl(larger_lang);
4271 let diff = self.mk_inter(smaller_lang, nota);
4272 self.is_empty_lang(diff)
4273 }
4274}