1#![warn(dead_code)]
7
8pub mod unicode_classes;
9pub use unicode_classes::UnicodeClassCache;
10use solver::{Solver, TSetId};
11use rustc_hash::FxHashMap;
12use std::collections::{BTreeSet, VecDeque};
13use std::fmt::Debug;
14use std::fmt::Write;
15use std::hash::Hash;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 AlgebraError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 as u8 & Self::NULL_MASK as u8)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_lookaround(self) -> bool {
134 self.has(MetaFlags::CONTAINS_LOOKAROUND)
135 }
136 pub(crate) fn contains_inter(self) -> bool {
137 self.has(MetaFlags::CONTAINS_INTER)
138 }
139
140 pub(crate) fn all_contains_flags(self) -> MetaFlags {
141 self.and(
142 MetaFlags::CONTAINS_LOOKAROUND
143 .or(MetaFlags::CONTAINS_ANCHORS)
144 .or(MetaFlags::CONTAINS_INTER),
145 )
146 }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152 #[default]
153 Pred,
154 Star,
155 Begin,
156 End,
157 Concat,
158 Union,
159 Compl,
160 Lookbehind,
161 Lookahead,
162 Inter,
163 Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168 flags: MetaFlags,
169 nulls: NullsId,
170}
171
172struct MetadataBuilder {
173 num_created: u32,
174 solver: Solver,
175 nb: NullsBuilder,
176 index: FxHashMap<Metadata, MetadataId>,
177 pub array: Vec<Metadata>,
178}
179
180mod helpers {
181 pub(crate) fn incr_rel(n1: u32) -> u32 {
182 match n1.overflowing_add(1) {
183 (_, true) => u32::MAX,
184 (res, false) => res,
185 }
186 }
187}
188
189impl MetadataBuilder {
190 fn new() -> MetadataBuilder {
191 let mut arr = Vec::new();
192 arr.push(Metadata {
193 flags: MetaFlags::ZERO,
194 nulls: NullsId::EMPTY,
195 });
196 Self {
197 index: FxHashMap::default(),
198 array: arr,
199 solver: Solver::new(),
200 num_created: 0,
201 nb: NullsBuilder::new(),
202 }
203 }
204
205 fn init(&mut self, inst: Metadata) -> MetadataId {
206 self.num_created += 1;
207 let new_id = MetadataId(self.num_created);
208 self.index.insert(inst.clone(), new_id);
209 self.array.push(inst);
210 new_id
211 }
212
213 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
214 match self.index.get(&inst) {
215 Some(&id) => id,
216 None => self.init(inst),
217 }
218 }
219
220 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
221 &self.array[inst.0 as usize]
222 }
223
224 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
225 setflags.all_contains_flags()
226 }
227
228 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
229 let left = &self.array[body.0 as usize].flags;
230 let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
231 let inf = if body_id == NodeId::BOT {
233 MetaFlags::ZERO
234 } else {
235 MetaFlags::INFINITE_LENGTH
236 };
237 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
238 }
239
240 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
241 let left = &self.array[left_id.0 as usize].flags;
242 let null = left.nullability().not().and(Nullability::ALWAYS);
243 let contains = self.get_contains(*left);
244 MetaFlags::with_nullability(
245 null,
246 contains
247 .or(MetaFlags::INFINITE_LENGTH)
248 .or(MetaFlags::CONTAINS_COMPL),
249 )
250 }
251
252 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
253 let left = &self.array[left_id.0 as usize].flags;
254 let right = &self.array[right_id.0 as usize].flags;
255 let null = left.nullability().and(right.nullability());
256 let contains = self.get_contains(left.or(*right));
257 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
258 MetaFlags::with_nullability(null, contains.or(len))
259 }
260
261 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
262 let left = &self.array[left_id.0 as usize].flags;
263 let right = &self.array[right_id.0 as usize].flags;
264 let null = left.nullability().and(right.nullability());
265 let contains = self
266 .get_contains(left.or(*right))
267 .or(MetaFlags::CONTAINS_INTER);
268 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
269 MetaFlags::with_nullability(null, contains.or(len))
270 }
271
272 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
273 let left = &self.array[left_id.0 as usize].flags;
274 let right = &self.array[right_id.0 as usize].flags;
275 let null = left.nullability().or(right.nullability());
276 let contains = self.get_contains(left.or(*right));
277 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
278 MetaFlags::with_nullability(null, contains.or(len))
279 }
280}
281
282#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
283pub struct NodeKey {
284 pub(crate) kind: Kind,
285 pub(crate) left: NodeId,
286 pub(crate) right: NodeId,
287 pub(crate) extra: u32,
288}
289
290#[derive(Eq, Hash, PartialEq, Clone, Debug)]
291pub enum TRegex<TSet> {
292 Leaf(NodeId),
293 ITE(TSet, TRegexId, TRegexId),
294}
295
296#[derive(Eq, Hash, PartialEq, Clone, Copy)]
297pub(crate) struct NodeFlags(u8);
298impl NodeFlags {
299 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
300 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
301 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
302
303 fn is_checked(self) -> bool {
304 self.0 >= NodeFlags::IS_CHECKED.0
305 }
306 fn is_empty(self) -> bool {
307 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
308 }
309}
310
311#[derive(Eq, Hash, PartialEq, Clone, Copy)]
312pub(crate) struct BuilderFlags(u8);
313impl BuilderFlags {
314 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
315 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
316}
317
318pub struct RegexBuilder {
319 mb: MetadataBuilder,
320 temp_vec: Vec<NodeId>,
321 num_created: u32,
322 index: FxHashMap<NodeKey, NodeId>,
323 array: Vec<NodeKey>,
324 metadata: Vec<MetadataId>,
325 reversed: Vec<NodeId>,
326 cache_empty: FxHashMap<NodeId, NodeFlags>,
327 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
328 tr_array: Vec<TRegex<TSetId>>,
329 tr_der_center: Vec<TRegexId>,
330 tr_der_begin: Vec<TRegexId>,
331 flags: BuilderFlags,
332 pub lookahead_context_max: u32,
334 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
335 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
336}
337
338
339macro_rules! iter_inter {
340 ($compiler:ident, $start:ident, $expression:expr) => {
341 let mut curr = $start;
342 while $compiler.get_kind(curr) == Kind::Inter {
343 let left = $compiler.get_left(curr);
344 $expression(left);
345 curr = $compiler.get_right(curr);
346 }
347 $expression(curr);
348 };
349}
350
351impl NodeId {
352 fn is_missing(&self) -> bool {
353 *self == NodeId::MISSING
354 }
355 #[inline]
356 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
357 b.get_flags_contains(self)
358 }
359 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
360 if self == tail {
361 true
362 } else if self.is_kind(b, Kind::Concat) {
363 self.right(b).has_concat_tail(b, tail)
364 } else {
365 false
366 }
367 }
368 fn missing_to_eps(&self) -> NodeId {
369 if *self == NodeId::MISSING {
370 NodeId::EPS
371 } else {
372 *self
373 }
374 }
375
376 #[inline]
377 fn kind(self, b: &RegexBuilder) -> Kind {
378 b.get_kind(self)
379 }
380 #[inline]
381 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
382 b.get_kind(self) == k
383 }
384 #[inline]
385 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
386 b.nullability(self) == Nullability::NEVER
387 }
388 #[inline]
389 fn nullability(self, b: &RegexBuilder) -> Nullability {
390 b.nullability(self)
391 }
392
393 #[inline]
394 fn is_center_nullable(self, b: &RegexBuilder) -> bool {
395 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
396 }
397 #[inline]
398 pub fn left(self, b: &RegexBuilder) -> NodeId {
399 b.get_left(self)
400 }
401
402 #[inline]
403 pub fn right(self, b: &RegexBuilder) -> NodeId {
404 b.get_right(self)
405 }
406
407 #[inline]
408 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
409 b.der(self, mask)
410 }
411
412 #[inline]
413 fn extra(self, b: &RegexBuilder) -> u32 {
414 b.get_extra(self)
415 }
416
417 #[inline]
418 fn is_pred(self, b: &RegexBuilder) -> bool {
419 b.get_kind(self) == Kind::Pred
420 }
421 #[inline]
422 fn is_lookahead(self, b: &RegexBuilder) -> bool {
423 b.get_kind(self) == Kind::Lookahead
424 }
425 #[inline]
426 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
427 debug_assert!(self.is_pred(b));
428 TSetId(b.get_extra(self))
429 }
430 #[inline]
431 fn is_star(self, b: &RegexBuilder) -> bool {
432 if NodeId::EPS == self {
433 return false;
434 }
435 b.get_kind(self) == Kind::Star
436 }
437
438 #[inline]
439 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
440 if self.is_concat(b) {
441 let r = self.right(b);
442 return r.is_star(b) && r.left(b) == self.left(b);
443 }
444 false
445 }
446
447 #[inline]
448 fn is_concat(self, b: &RegexBuilder) -> bool {
449 b.get_kind(self) == Kind::Concat
450 }
451
452 #[inline]
453 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
454 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
455 Some(b.get_right(self))
456 } else {
457 None
458 }
459 }
460
461 #[inline]
462 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
463 if b.get_kind(self) == Kind::Concat {
464 let left = self.left(b);
465 let right = self.right(b);
466 if left.is_kind(b, Kind::Compl) {
467 if left.left(b) == NodeId::TOPPLUS {
468 return right == NodeId::END;
469 }
470 }
471 }
472 false
473 }
474
475 #[inline]
476 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
477 if NodeId::EPS == self {
478 return None;
479 }
480 if self.is_star(b) && self.left(b).is_pred(b) {
481 Some(self.left(b))
482 } else {
483 None
484 }
485 }
486
487 #[inline]
488 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
489 if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
490 let middle = self.right(b);
491 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
492 return Some(middle.left(b));
493 }
494 }
495 None
496 }
497
498 #[inline]
499 pub(crate) fn iter_union(
500 self,
501 b: &mut RegexBuilder,
502 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
503 ) {
504 let mut curr: NodeId = self;
505 while curr.kind(b) == Kind::Union {
506 f(b, curr.left(b));
507 curr = curr.right(b);
508 }
509 f(b, curr);
510 }
511
512 #[inline]
513 pub(crate) fn iter_union_while(
514 self,
515 b: &mut RegexBuilder,
516 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
517 ) {
518 let mut curr: NodeId = self;
519 let mut continue_loop = true;
520 while continue_loop && curr.kind(b) == Kind::Union {
521 continue_loop = f(b, curr.left(b));
522 curr = curr.right(b);
523 }
524 if continue_loop {
525 f(b, curr);
526 }
527 }
528}
529
530impl RegexBuilder {
531 pub fn new() -> RegexBuilder {
532 let mut inst = Self {
533 mb: MetadataBuilder::new(),
534 array: Vec::new(),
535 index: FxHashMap::default(),
536 cache_empty: FxHashMap::default(),
537 tr_array: Vec::new(),
538 tr_cache: FxHashMap::default(),
539 flags: BuilderFlags::ZERO,
540 lookahead_context_max: 800,
541 num_created: 0,
542 metadata: Vec::new(),
543 reversed: Vec::new(),
544 tr_der_center: Vec::new(),
545 tr_der_begin: Vec::new(),
546 temp_vec: Vec::new(),
547 mk_binary_memo: FxHashMap::default(),
548 clean_cache: FxHashMap::default(),
549 };
550 inst.array.push(NodeKey::default());
551 inst.mk_pred(TSetId::EMPTY);
552 inst.mk_star(NodeId::BOT);
553 inst.mk_pred(TSetId::FULL);
554 inst.mk_star(NodeId::TOP);
555 let top_plus_id = inst.mk_plus(NodeId::TOP);
556 inst.mk_unset(Kind::Begin);
557 inst.mk_unset(Kind::End);
558 debug_assert!(top_plus_id == NodeId::TOPPLUS);
559
560 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
561 inst.mk_leaf(NodeId::BOT);
562 inst.mk_leaf(NodeId::EPS);
563 inst.mk_leaf(NodeId::TOP);
564 inst.mk_leaf(NodeId::TS);
565
566 inst.flags = BuilderFlags::SUBSUME;
567 inst
568 }
569
570 #[inline]
571 pub fn solver_ref(&self) -> &Solver {
572 &self.mb.solver
573 }
574
575 #[inline]
576 pub fn solver(&mut self) -> &mut Solver {
577 &mut self.mb.solver
578 }
579
580 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
581 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
582 self.tr_cache.insert(inst.clone(), new_id);
583 self.tr_array.push(inst);
584 new_id
585 }
586
587 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
588 match self.tr_cache.get(&inst) {
589 Some(&id) => id,
590 None => self.tr_init(inst),
591 }
592 }
593
594 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
595 &self.tr_array[inst.0 as usize]
596 }
597
598 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
599 let solv = self.solver();
600 !solv.is_sat_id(cond1, cond2)
601 }
602
603 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
604 let term = TRegex::<TSetId>::Leaf(node_id);
605 self.get_tregex_id(term)
606 }
607
608 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
609 let tmp_inst = TRegex::ITE(cond, _then, _else);
610 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
611 return *cached;
612 }
613 if _then == _else {
614 return _then;
615 }
616 if self.solver().is_full_id(cond) {
617 return _then;
618 }
619 if self.solver().is_empty_id(cond) {
620 return _else;
621 }
622 let _clean_then = match self.tr_array[_then.0 as usize] {
623 TRegex::Leaf(_) => _then,
624 _ => self.clean(cond, _then),
625 };
626 let notcond = self.solver().not_id(cond);
627 let _clean_else = match self.tr_array[_else.0 as usize] {
628 TRegex::Leaf(_) => _else,
629 _ => self.clean(notcond, _else),
630 };
631
632 if _clean_then == _clean_else {
633 return _clean_then;
634 }
635 match self.get_tregex(_clean_then) {
637 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
638 let _cond_then = leftcond.clone();
639 let new_then = _inner_then.clone();
640 let sand = self.solver().and_id(cond, _cond_then);
641 return self.mk_ite(sand, new_then, _clean_else);
642 }
643 _ => {}
644 }
645 match self.get_tregex(_clean_else) {
647 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
649 let e2clone = _e2.clone();
650 let new_cond = self.mb.solver.or_id(cond, _c2.clone());
651 return self.mk_ite(new_cond, _clean_then, e2clone);
652 }
653 _ => {}
654 }
655
656 if _clean_then == TRegexId::BOT {
657 let flip_cond = self.solver().not_id(cond);
658 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
659 return cleaned_id;
660 }
661
662 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
663 }
664
665 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
666 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
667 return cached;
668 }
669 let result = match *self.get_tregex(tterm) {
670 TRegex::Leaf(_) => tterm,
671 TRegex::ITE(alpha, _then_id, _else_id) => {
672 let notalpha = self.mb.solver.not_id(alpha);
673 if self.mb.solver.unsat_id(beta, alpha) {
674 self.clean(beta, _else_id)
676 } else if self.unsat(beta, notalpha) {
677 self.clean(beta, _then_id)
679 } else {
680 let tc = self.mb.solver.and_id(beta, alpha);
681 let ec = self.mb.solver.and_id(beta, notalpha);
682 let new_then = self.clean(tc, _then_id);
683 let new_else = self.clean(ec, _else_id);
684 self.mk_ite(alpha, new_then, new_else)
685 }
686 }
687 };
688 self.clean_cache.insert((beta, tterm), result);
689 result
690 }
691
692 fn mk_unary(
693 &mut self,
694 term: TRegexId,
695 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
696 ) -> TRegexId {
697 match self.tr_array[term.0 as usize] {
698 TRegex::Leaf(node_id) => {
699 let applied = apply(self, node_id);
700 self.mk_leaf(applied)
701 }
702 TRegex::ITE(c1, _then, _else) => {
703 let _then1 = self.mk_unary(_then, apply);
704 let _else1 = self.mk_unary(_else, apply);
705 self.mk_ite(c1, _then1, _else1)
706 }
707 }
708 }
709
710 fn mk_binary_result(
711 &mut self,
712 left: TRegexId,
713 right: TRegexId,
714 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
715 ) -> Result<TRegexId, AlgebraError> {
716 match self.tr_array[left.0 as usize] {
717 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
718 TRegex::Leaf(right_node_id) => {
719 let applied = apply(self, left_node_id, right_node_id)?;
720 Ok(self.mk_leaf(applied))
721 }
722 TRegex::ITE(c2, _then, _else) => {
723 let then2 = self.mk_binary_result(left, _then, apply)?;
724 let else2 = self.mk_binary_result(left, _else, apply)?;
725 Ok(self.mk_ite(c2, then2, else2))
726 }
727 },
728 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
729 TRegex::Leaf(_) => {
730 let then2 = self.mk_binary_result(_then1, right, apply)?;
731 let else2 = self.mk_binary_result(_else1, right, apply)?;
732 Ok(self.mk_ite(c1, then2, else2))
733 }
734 TRegex::ITE(c2, _then2, _else2) => {
735 if c1 == c2 {
736 let _then = self.mk_binary_result(_then1, _then2, apply)?;
737 let _else = self.mk_binary_result(_else1, _else2, apply)?;
738 return Ok(self.mk_ite(c1, _then, _else));
739 }
740 if c1.0 > c2.0 {
741 let _then = self.mk_binary_result(_then1, right, apply)?;
742 let _else = self.mk_binary_result(_else1, right, apply)?;
743 return Ok(self.mk_ite(c1, _then, _else));
744 } else {
745 let _then = self.mk_binary_result(left, _then2, apply)?;
746 let _else = self.mk_binary_result(left, _else2, apply)?;
747 return Ok(self.mk_ite(c2, _then, _else));
748 }
749 }
750 },
751 }
752 }
753
754 fn mk_binary(
755 &mut self,
756 left: TRegexId,
757 right: TRegexId,
758 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
759 ) -> TRegexId {
760 self.mk_binary_memo.clear();
761 self.mk_binary_inner(left, right, apply)
762 }
763
764 fn mk_binary_inner(
765 &mut self,
766 left: TRegexId,
767 right: TRegexId,
768 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
769 ) -> TRegexId {
770 if left == right {
771 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
772 }
773 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
774 return cached;
775 }
776 let result = match self.tr_array[left.0 as usize] {
777 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
778 TRegex::Leaf(right_node_id) => {
779 let applied = apply(self, left_node_id, right_node_id);
780 self.mk_leaf(applied)
781 }
782 TRegex::ITE(c2, _then, _else) => {
783 let then2 = self.mk_binary_inner(left, _then, apply);
784 let else2 = self.mk_binary_inner(left, _else, apply);
785 self.mk_ite(c2, then2, else2)
786 }
787 },
788 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
789 TRegex::Leaf(_) => {
790 let then2 = self.mk_binary_inner(_then1, right, apply);
791 let else2 = self.mk_binary_inner(_else1, right, apply);
792 self.mk_ite(c1, then2, else2)
793 }
794 TRegex::ITE(c2, _then2, _else2) => {
795 if c1 == c2 {
796 let _then = self.mk_binary_inner(_then1, _then2, apply);
797 let _else = self.mk_binary_inner(_else1, _else2, apply);
798 self.mk_ite(c1, _then, _else)
799 } else if c1.0 > c2.0 {
800 let _then = self.mk_binary_inner(_then1, right, apply);
801 let _else = self.mk_binary_inner(_else1, right, apply);
802 self.mk_ite(c1, _then, _else)
803 } else {
804 let _then = self.mk_binary_inner(left, _then2, apply);
805 let _else = self.mk_binary_inner(left, _else2, apply);
806 self.mk_ite(c2, _then, _else)
807 }
808 }
809 },
810 };
811 self.mk_binary_memo.insert((left, right), result);
812 result
813 }
814
815 pub fn get_nulls(
816 &mut self,
817 pending_rel: u32,
818 mask: Nullability,
819 acc: &mut BTreeSet<NullState>,
820 node_id: NodeId,
821 ) {
822 debug_assert!(node_id != NodeId::MISSING);
823 if !self.is_nullable(node_id, mask) {
824 return;
825 }
826 match self.get_kind(node_id) {
827 Kind::Pred => {}
828 Kind::End => {
829 if mask.has(Nullability::END) {
830 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
831 }
832 }
833 Kind::Begin => {
834 if mask.has(Nullability::BEGIN) {
835 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
836 }
837 }
838 Kind::Concat => {
839 let new_mask = self.nullability(node_id).and(mask);
840 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
841 if self.is_nullable(node_id.left(self), mask) {
842 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
843 }
844 }
845 Kind::Union => {
846 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
847 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
848 }
849 Kind::Inter => {
850 let new_mask = self.nullability(node_id).and(mask);
851 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
852 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
853 }
854 Kind::Star => {
855 acc.insert(NullState::new(mask, pending_rel));
856 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
857 }
858 Kind::Compl => {
859 if !self.is_nullable(node_id.left(self), mask) {
860 acc.insert(NullState::new(mask, 0));
861 }
862 }
863 Kind::Lookbehind => {
864 let new_mask = self.nullability(node_id).and(mask);
865 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
866 if node_id.right(self) != NodeId::MISSING {
867 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
868 }
869 }
870 Kind::Lookahead => {
871 let la_inner = self.get_lookahead_inner(node_id);
872 if self.is_nullable(la_inner, mask) {
873 let rel = self.get_lookahead_rel(node_id);
874 if rel != u32::MAX {
875 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
876 }
877 }
878 let la_tail = self.get_lookahead_tail(node_id);
879 if la_tail != NodeId::MISSING {
880 self.get_nulls(pending_rel, mask, acc, la_tail);
881 }
882 }
883 Kind::Counted => {
884 let packed = self.get_extra(node_id);
885 let best = (packed >> 16) as u32;
886 if best > 0 {
887 acc.insert(NullState::new(mask, pending_rel + best));
888 }
889 }
890 }
891 }
892
893 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
894 self.get_meta_flags(node_id)
895 .has(MetaFlags::CONTAINS_LOOKAROUND)
896 }
897
898 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
900 self.get_meta_flags(node_id)
901 .has(MetaFlags::CONTAINS_ANCHORS)
902 }
903
904
905 pub fn is_infinite(&self, node_id: NodeId) -> bool {
906 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
907 }
908
909 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
911 if self.is_infinite(node_id) {
912 if self.get_kind(node_id) == Kind::Inter {
913 self.get_bounded_length(node_id)
914 } else {
915 (self.get_min_length_only(node_id), u32::MAX)
916 }
917 } else {
918 self.get_bounded_length(node_id)
919 }
920 }
921
922 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
923 if node_id == NodeId::EPS {
924 return (0, 0);
925 }
926 match self.get_kind(node_id) {
927 Kind::End | Kind::Begin => (0, 0),
928 Kind::Pred => (1, 1),
929 Kind::Concat => {
930 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
931 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
932 (lmin + rmin, lmax.saturating_add(rmax))
933 }
934 Kind::Union => {
935 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
936 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
937 (lmin.min(rmin), lmax.max(rmax))
938 }
939 Kind::Inter => {
940 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
941 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
942 (lmin.max(rmin), lmax.min(rmax))
943 }
944 Kind::Lookahead => {
945 let body = node_id.left(self);
946 if self.is_infinite(body) {
947 return (0, u32::MAX);
948 }
949 let right = node_id.right(self);
950 if right.is_missing() {
951 (0, 0)
952 } else {
953 self.get_min_max_length(right)
954 }
955 }
956 Kind::Counted => (0, 0),
957 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
958 }
959 }
960
961 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
962 match self.get_kind(node_id) {
963 Kind::End | Kind::Begin => Some(0),
964 Kind::Pred => Some(1),
965 Kind::Concat => {
966 let l = self.get_fixed_length(node_id.left(self))?;
967 let r = self.get_fixed_length(node_id.right(self))?;
968 Some(l + r)
969 }
970 Kind::Union => {
971 let l = self.get_fixed_length(node_id.left(self))?;
972 let r = self.get_fixed_length(node_id.right(self))?;
973 if l == r {
974 Some(l)
975 } else {
976 None
977 }
978 }
979 Kind::Inter => {
980 let l = self.get_fixed_length(node_id.left(self))?;
981 let r = self.get_fixed_length(node_id.right(self))?;
982 if l == r {
983 Some(l)
984 } else {
985 None
986 }
987 }
988 Kind::Lookahead => {
989 let right = node_id.right(self);
990 if right.is_missing() {
991 Some(0)
992 } else {
993 self.get_fixed_length(right)
994 }
995 }
996 Kind::Counted => Some(0),
997 Kind::Star | Kind::Lookbehind | Kind::Compl => None,
998 }
999 }
1000
1001 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1002 match self.get_kind(node_id) {
1003 Kind::End | Kind::Begin => 0,
1004 Kind::Pred => 1,
1005 Kind::Concat => {
1006 self.get_min_length_only(node_id.left(self))
1007 + self.get_min_length_only(node_id.right(self))
1008 }
1009 Kind::Union => self
1010 .get_min_length_only(node_id.left(self))
1011 .min(self.get_min_length_only(node_id.right(self))),
1012 Kind::Inter => self
1013 .get_min_length_only(node_id.left(self))
1014 .max(self.get_min_length_only(node_id.right(self))),
1015 Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1016 Kind::Compl => {
1017 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1018 0
1019 } else {
1020 1
1021 }
1022 }
1023 }
1024 }
1025
1026 fn starts_with_ts(&self, node_id: NodeId) -> bool {
1027 if node_id == NodeId::TS {
1028 return true;
1029 }
1030 match self.get_kind(node_id) {
1031 Kind::Inter => {
1032 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1033 }
1034 Kind::Union => {
1035 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1036 }
1037 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1038 _ => false,
1039 }
1040 }
1041
1042 #[inline]
1043 pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1044 if self.get_kind(node_id) == Kind::Concat {
1045 self.ends_with_ts(node_id.right(self))
1046 } else {
1047 node_id == NodeId::TS
1048 }
1049 }
1050
1051 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1052 debug_assert!(node_id != NodeId::MISSING);
1053 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1054 }
1055
1056 pub(crate) fn cache_der(
1057 &mut self,
1058 node_id: NodeId,
1059 result: TRegexId,
1060 mask: Nullability,
1061 ) -> TRegexId {
1062 if mask == Nullability::CENTER {
1063 self.tr_der_center[node_id.0 as usize] = result
1064 } else {
1065 self.tr_der_begin[node_id.0 as usize] = result
1066 };
1067 result
1068 }
1069
1070 pub(crate) fn try_cached_der(
1071 &mut self,
1072 node_id: NodeId,
1073 mask: Nullability,
1074 ) -> Option<TRegexId> {
1075 let cache = if mask == Nullability::CENTER {
1076 &mut self.tr_der_center
1077 } else {
1078 &mut self.tr_der_begin
1079 };
1080 match cache.get(node_id.0 as usize) {
1081 Some(&TRegexId::MISSING) => {}
1082 Some(&result) => {
1083 return Some(result);
1084 }
1085 None => {
1086 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1087 }
1088 }
1089 return None;
1090 }
1091
1092 pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1093 let mut term = self.get_tregex(der);
1094 loop {
1095 match *term {
1096 TRegex::Leaf(node_id) => return node_id,
1097 TRegex::ITE(cond, _then, _else) => {
1098 if self.solver().is_sat_id(set, cond) {
1099 term = self.get_tregex(_then);
1100 } else {
1101 term = self.get_tregex(_else);
1102 }
1103 }
1104 }
1105 }
1106 }
1107
1108 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1109 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1110 debug_assert!(
1111 node_id != NodeId::MISSING,
1112 "attempting to derive missing node"
1113 );
1114 if let Some(result) = self.try_cached_der(node_id, mask) {
1115 return Ok(result);
1116 }
1117
1118 let result = match node_id.kind(self) {
1119 Kind::Compl => {
1120 let leftd = node_id.left(self).der(self, mask)?;
1121 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1122 }
1123 Kind::Inter => {
1124 let leftd = node_id.left(self).der(self, mask)?;
1125 let rightd = node_id.right(self).der(self, mask)?;
1126 {
1127 let this = &mut *self;
1128 this.mk_binary(
1129 leftd,
1130 rightd,
1131 &mut (|b, left, right| b.mk_inter(left, right)),
1132 )
1133 }
1134 }
1135 Kind::Union => {
1136 let leftd = self.der(node_id.left(self), mask)?;
1137 let rightd = self.der(node_id.right(self), mask)?;
1138 {
1139 let this = &mut *self;
1140 this.mk_binary(
1141 leftd,
1142 rightd,
1143 &mut (|b, left, right| b.mk_union(left, right)),
1144 )
1145 }
1146 }
1147 Kind::Concat => {
1148 let head = node_id.left(self);
1149 let tail = node_id.right(self);
1150 let tail_leaf = self.mk_leaf(tail);
1151 let head_der = self.der(head, mask)?;
1152
1153 if self.is_nullable(head, mask) {
1154 let rightd = self.der(tail, mask)?;
1155 let ldr = {
1156 let this = &mut *self;
1157 this.mk_binary(
1158 head_der,
1159 tail_leaf,
1160 &mut (|b, left, right| b.mk_concat(left, right)),
1161 )
1162 };
1163 {
1164 let this = &mut *self;
1165 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1166 }
1167 } else {
1168 let ldr = {
1169 let this = &mut *self;
1170 this.mk_binary(
1171 head_der,
1172 tail_leaf,
1173 &mut (|b, left, right| b.mk_concat(left, right)),
1174 )
1175 };
1176 ldr
1177 }
1178 }
1179 Kind::Star => {
1180 if node_id == NodeId::EPS {
1181 TRegexId::BOT
1182 } else {
1183 let left = node_id.left(self);
1184 let r_decr_leaf = self.mk_leaf(node_id);
1185 let r_der = self.der(left, mask)?;
1186 let result = {
1187 let this = &mut *self;
1188 this.mk_binary(
1189 r_der,
1190 r_decr_leaf,
1191 &mut (|b, left, right| b.mk_concat(left, right)),
1192 )
1193 };
1194 result
1195 }
1196 }
1197 Kind::Lookbehind => {
1198 let lb_prev_der = {
1199 let lb_prev = self.get_lookbehind_prev(node_id);
1200 if lb_prev == NodeId::MISSING {
1201 TRegexId::MISSING
1202 } else {
1203 self.der(lb_prev, mask)?
1204 }
1205 };
1206 let lb_inner = self.get_lookbehind_inner(node_id);
1207 let lb_inner_der = self.der(lb_inner, mask)?;
1208 {
1209 let this = &mut *self;
1210 this.mk_binary_result(
1211 lb_inner_der,
1212 lb_prev_der,
1213 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1214 )?
1215 }
1216 }
1217 Kind::Lookahead => {
1218 let la_tail = self.get_lookahead_tail(node_id);
1219 let la_body = node_id.left(self);
1220 let rel = self.get_lookahead_rel(node_id);
1221
1222 if self.is_nullable(la_body, mask) {
1223 let right = node_id.right(self).missing_to_eps();
1225 let rder = self.der(right, mask).clone();
1226 return rder;
1227 }
1228
1229 if rel == u32::MAX {
1230 let la_body_der = self.der(la_body, mask)?;
1231 if la_tail.is_kind(self, Kind::Pred) {
1232 let transitioned =
1233 self.transition_term(la_body_der, la_tail.pred_tset(self));
1234 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1235 let concated = self.mk_concat(la_tail, new_la);
1236 return self.der(concated, mask);
1237 }
1238 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1239 let left = la_tail.left(self);
1240 let tset = left.pred_tset(self);
1241 let transitioned = self.transition_term(la_body_der, tset);
1242 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1243 let tail_right = la_tail.right(self);
1244 let concated = self.mk_concat(new_la, tail_right);
1245 let concated = self.mk_concat(left, concated);
1246 return self.der(concated, mask);
1247 }
1248 }
1249
1250 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1251 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1252 let concated = self.mk_concat(la_body, nulls_mask);
1253 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1254 let non_nulled = self.mk_non_nullable_safe(la_tail);
1255 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1256 let new_union = self.mk_union(concated_look, new_look);
1257 return self.der(new_union, mask);
1258 }
1259
1260 let la_tail_der = if la_tail == NodeId::MISSING {
1261 TRegexId::MISSING
1262 } else {
1263 if self.is_nullable(la_tail, mask) {
1264 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1265 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1266 let la_union = self.mk_union(la_tail, nulls_la);
1267 self.der(la_union, mask)?
1268 } else {
1269 self.der(la_tail, mask)?
1270 }
1271 };
1272
1273 let la_body_der = self.der(la_body, mask)?;
1274
1275 if rel != u32::MAX && rel > self.lookahead_context_max {
1276 return Err(AlgebraError::AnchorLimit);
1277 }
1278
1279 let la = {
1280 let this = &mut *self;
1281 let rel = helpers::incr_rel(rel);
1282 this.mk_binary(
1283 la_body_der,
1284 la_tail_der,
1285 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1286 )
1287 };
1288
1289 if rel != u32::MAX
1290 && la_tail_der != TRegexId::MISSING
1291 && self.is_nullable(la_tail, mask)
1292 {
1293 let look_only = {
1294 let this = &mut *self;
1295 let right = TRegexId::MISSING;
1296 let rel = helpers::incr_rel(rel);
1297 this.mk_binary(
1298 la_body_der,
1299 right,
1300 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1301 )
1302 };
1303 {
1304 let this = &mut *self;
1305 this.mk_binary(
1306 look_only,
1307 la,
1308 &mut (|b, left, right| b.mk_union(left, right)),
1309 )
1310 }
1311 } else {
1312 la
1313 }
1314 }
1315 Kind::Counted => {
1316 let body = node_id.left(self);
1317 let chain = node_id.right(self);
1318 let packed = self.get_extra(node_id);
1319 let step = (packed & 0xFFFF) as u16;
1320 let best = (packed >> 16) as u16;
1321
1322 let mid_best = if self.is_nullable(body, mask) && step >= best {
1323 step
1324 } else {
1325 best
1326 };
1327
1328 let body_der = self.der(body, mask)?;
1329 let new_step = step.saturating_add(1);
1330 self.mk_unary(body_der, &mut |b, new_body| {
1331 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1332 new_step
1333 } else {
1334 mid_best
1335 };
1336 let packed = (final_best as u32) << 16 | new_step as u32;
1337 b.mk_counted(new_body, chain, packed)
1338 })
1339 }
1340 Kind::Begin | Kind::End => TRegexId::BOT,
1341 Kind::Pred => {
1342 let psi = node_id.pred_tset(&self);
1343 if psi == TSetId::EMPTY {
1344 TRegexId::BOT
1345 } else {
1346 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1347 }
1348 }
1349 };
1350
1351 self.cache_der(node_id, result, mask);
1355 Ok(result)
1356 }
1357
1358 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1359 debug_assert!(meta_id != MetadataId::MISSING);
1360 match self.metadata.get_mut(node_id.0 as usize) {
1361 Some(v) => *v = meta_id,
1362 None => {
1363 self.metadata
1364 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1365 self.metadata[node_id.0 as usize] = meta_id;
1366 }
1367 }
1368 }
1369
1370 fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1371 debug_assert!(reversed_id != NodeId::MISSING);
1372 match self.reversed.get_mut(node_id.0 as usize) {
1373 Some(v) => *v = reversed_id,
1374 None => {
1375 self.reversed
1376 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1377 self.reversed[node_id.0 as usize] = reversed_id;
1378 }
1379 }
1380 }
1381
1382 fn init(&mut self, inst: NodeKey) -> NodeId {
1383 self.num_created += 1;
1384 let node_id = NodeId(self.num_created);
1385 self.index.insert(inst.clone(), node_id);
1386 match inst.kind {
1387 Kind::Pred => {
1388 let meta_id = self.mb.get_meta_id(Metadata {
1389 flags: MetaFlags::ZERO,
1390 nulls: NullsId::EMPTY,
1391 });
1392 self.init_metadata(node_id, meta_id);
1393 }
1394 Kind::Begin => {
1395 let meta_id = self.mb.get_meta_id(Metadata {
1396 flags: MetaFlags::with_nullability(
1397 Nullability::BEGIN,
1398 MetaFlags::CONTAINS_ANCHORS,
1399 ),
1400 nulls: NullsId::BEGIN0,
1401 });
1402 self.init_metadata(node_id, meta_id);
1403 }
1404 Kind::End => {
1405 let meta_id = self.mb.get_meta_id(Metadata {
1406 flags: MetaFlags::with_nullability(
1407 Nullability::END,
1408 MetaFlags::CONTAINS_ANCHORS,
1409 ),
1410 nulls: NullsId::END0,
1411 });
1412 self.init_metadata(node_id, meta_id);
1413 }
1414 Kind::Inter => {
1415 let m1 = self.get_node_meta_id(inst.left);
1416 let m2 = self.get_node_meta_id(inst.right);
1417 let meta_id = {
1418 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1419 let mask_l = inst.left.nullability(self);
1420 let mask_r = inst.right.nullability(self);
1421 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1422 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1423 nulls = self.mb.nb.and_mask(nulls, mask_l);
1424 nulls = self.mb.nb.and_mask(nulls, mask_r);
1425 let new_meta = Metadata {
1426 flags: self.mb.flags_inter(m1, m2),
1427 nulls,
1428 };
1429 let new_id = self.mb.get_meta_id(new_meta);
1430
1431 new_id
1432 };
1433 self.init_metadata(node_id, meta_id);
1434 }
1435 Kind::Union => {
1436 let m1 = self.get_node_meta_id(inst.left);
1437 let m2 = self.get_node_meta_id(inst.right);
1438 let meta_id = {
1439 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1440 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1441 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1442 let new_meta = Metadata {
1443 flags: self.mb.flags_union(m1, m2),
1444 nulls,
1445 };
1446 let new_id = self.mb.get_meta_id(new_meta);
1447 new_id
1448 };
1449 self.init_metadata(node_id, meta_id);
1450 }
1451 Kind::Concat => {
1452 let flags = self.mb.flags_concat(
1453 self.get_node_meta_id(inst.left),
1454 self.get_node_meta_id(inst.right),
1455 );
1456
1457 let right_nullability = inst.right.nullability(self);
1458 let left_nullability = inst.left.nullability(self);
1459 let nulls_left = self.get_nulls_id(inst.left);
1460 let nulls_right = self.get_nulls_id(inst.right);
1461 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1462 let mask = right_nullability.and(left_nullability);
1463 nulls = self.mb.nb.and_mask(nulls, mask);
1464
1465 let new_id = self.mb.get_meta_id(Metadata {
1466 flags,
1467 nulls: nulls,
1468 });
1469 self.init_metadata(node_id, new_id);
1470 }
1471 Kind::Star => {
1472 let left_nulls = self.get_nulls_id(inst.left);
1473 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1474 let meta_id = self.mb.get_meta_id(Metadata {
1475 flags: self
1476 .mb
1477 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1478 nulls: nulls,
1479 });
1480 self.init_metadata(node_id, meta_id);
1481 }
1482 Kind::Compl => {
1483 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1484 let meta_id = self.mb.get_meta_id(Metadata {
1485 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1486 nulls: nulls,
1487 });
1488 self.init_metadata(node_id, meta_id);
1489 }
1490 Kind::Lookbehind => {
1491 let mut null = self.get_meta_flags(inst.left).nullability();
1492 let mut contains_flags = self.get_flags_contains(inst.left);
1493 if !inst.right.is_missing() {
1494 null = null.and(self.get_meta_flags(inst.right).nullability());
1495 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1496 }
1497
1498 let left_nulls = self.get_nulls_id(inst.left);
1499 let right_nulls = self.get_nulls_id(inst.right);
1500 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1501 let meta_id = self.mb.get_meta_id(Metadata {
1502 flags: MetaFlags::with_nullability(
1503 null,
1504 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1505 ),
1506 nulls: nulls,
1507 });
1508 self.init_metadata(node_id, meta_id);
1509 }
1510 Kind::Lookahead => {
1511 let mut nulls = self.get_nulls_id(inst.left);
1512 let left_nullability = inst.left.nullability(self);
1513 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1514 nulls = self.mb.nb.or_id(nulls, nulls_right);
1515 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1516
1517 let la_inner = inst.left;
1518 let la_tail = inst.right;
1519 let null = self
1520 .get_meta_flags(la_inner)
1521 .nullability()
1522 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1523 let contains_flags = self
1524 .get_flags_contains(la_inner)
1525 .or(self.get_flags_contains(la_tail));
1526
1527 let meta_id = self.mb.get_meta_id(Metadata {
1528 flags: MetaFlags::with_nullability(
1529 null,
1530 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1531 ),
1532 nulls: nulls,
1533 });
1534 self.init_metadata(node_id, meta_id);
1535 }
1536 Kind::Counted => {
1537 let best = (inst.extra >> 16) as u32;
1538 let (null, nulls) = if best > 0 {
1539 let mut ns = BTreeSet::new();
1540 ns.insert(NullState::new(Nullability::CENTER, best));
1541 (Nullability::CENTER, self.mb.nb.get_id(ns))
1542 } else {
1543 (Nullability::NEVER, NullsId::EMPTY)
1544 };
1545 let meta_id = self.mb.get_meta_id(Metadata {
1546 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1547 nulls,
1548 });
1549 self.init_metadata(node_id, meta_id);
1550 }
1551 }
1552
1553 self.array.push(inst);
1554
1555 if let Some(rw) = self.post_init_simplify(node_id) {
1556 self.override_as(node_id, rw)
1557 } else {
1558 node_id
1559 }
1560 }
1561
1562 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1563 match self.get_kind(node_id) {
1564 Kind::Concat => {
1565 let lhs = node_id.left(self);
1566 let rhs = node_id.right(self);
1567 if let Some(_) = lhs.is_pred_star(self) {
1568 if let Some(opttail) = rhs.is_opt_v(self) {
1569 if let Some(true) = self.subsumes(lhs, opttail) {
1570 return Some(lhs);
1572 }
1573 }
1574 }
1575 }
1576 Kind::Union => {
1577 let lhs = node_id.left(self);
1578 let rhs = node_id.right(self);
1579 let mut subsumed = false;
1580 rhs.iter_union_while(self, &mut |b, branch| {
1581 if b.nullable_subsumes(branch, lhs) {
1582 subsumed = true;
1583 }
1584 !subsumed
1585 });
1586 if subsumed {
1587 return Some(rhs);
1588 }
1589 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1590 return Some(rhs);
1591 }
1592 }
1593 _ => {}
1594 }
1595
1596 None
1597 }
1598
1599 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1601 if self.get_kind(lhs) != Kind::Union {
1602 return false; }
1604 let mut rhs_branches = Vec::new();
1605 let mut curr = rhs;
1606 while self.get_kind(curr) == Kind::Union {
1607 rhs_branches.push(self.get_left(curr));
1608 curr = self.get_right(curr);
1609 }
1610 rhs_branches.push(curr);
1611
1612 curr = lhs;
1613 while self.get_kind(curr) == Kind::Union {
1614 if !rhs_branches.contains(&self.get_left(curr)) {
1615 return false;
1616 }
1617 curr = self.get_right(curr);
1618 }
1619 rhs_branches.contains(&curr)
1620 }
1621
1622 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1624 if node == target {
1625 return true;
1626 }
1627 match self.get_kind(node) {
1628 Kind::Union => {
1629 self.nullable_subsumes(self.get_left(node), target)
1630 || self.nullable_subsumes(self.get_right(node), target)
1631 }
1632 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1633 self.nullable_subsumes(self.get_right(node), target)
1634 }
1635 _ => false,
1636 }
1637 }
1638
1639 pub fn num_nodes(&self) -> u32 {
1640 self.num_created
1641 }
1642 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1643 match self.index.get(&inst) {
1644 Some(&id) => id,
1645 None => self.init(inst),
1646 }
1647 }
1648 #[inline]
1649 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1650 self.index.get(inst)
1651 }
1652
1653 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1654 self.index.insert(key, subsumed);
1655 subsumed
1656 }
1657
1658 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1659 let key = &self.array[key.0 as usize];
1660 let entry = self.index.get_mut(key).unwrap();
1661 *entry = subsumed;
1662 subsumed
1663 }
1664
1665 #[inline]
1666 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1667 self.array[node_id.0 as usize].left
1668 }
1669
1670 #[inline]
1671 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1672 self.array[node_id.0 as usize].right
1673 }
1674
1675 #[inline]
1676 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1677 self.array[node_id.0 as usize].extra
1678 }
1679
1680 #[inline]
1681 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1682 debug_assert!(self.get_kind(node_id) == Kind::Concat);
1683 let mut curr = node_id;
1684 while self.get_kind(curr) == Kind::Concat {
1685 curr = curr.right(self);
1686 }
1687 curr
1688 }
1689
1690 fn has_trailing_la(&self, node: NodeId) -> bool {
1692 let end = match self.get_kind(node) {
1693 Kind::Concat => self.get_concat_end(node),
1694 Kind::Lookahead => node,
1695 _ => return false,
1696 };
1697 self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1698 }
1699
1700 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1702 if self.get_kind(node) == Kind::Lookahead {
1703 return (NodeId::EPS, node);
1704 }
1705 debug_assert!(self.get_kind(node) == Kind::Concat);
1706 let right = node.right(self);
1707 if self.get_kind(right) != Kind::Concat {
1708 return (node.left(self), right);
1710 }
1711 let (stripped, la) = self.strip_trailing_la(right);
1712 (self.mk_concat(node.left(self), stripped), la)
1713 }
1714 #[inline]
1715 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1716 debug_assert!(matches!(self.get_kind(lookahead_node_id), Kind::Lookahead | Kind::Counted));
1717 lookahead_node_id.left(self)
1718 }
1719 #[inline]
1720 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1721 debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1722 lookahead_node_id.right(self)
1723 }
1724 #[inline]
1725 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1726 debug_assert!(
1727 matches!(self.get_kind(lookahead_node_id), Kind::Lookahead | Kind::Counted),
1728 "not lookahead/counted: {:?}",
1729 self.pp(lookahead_node_id)
1730 );
1731 self.get_extra(lookahead_node_id)
1732 }
1733 #[inline]
1734 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1735 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1736 lookbehind_node_id.left(self)
1737 }
1738 #[inline]
1739 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1740 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1741 lookbehind_node_id.right(self)
1742 }
1743
1744 #[inline]
1745 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1746 debug_assert!(
1747 self.array.len() > node_id.0 as usize,
1748 "array len: {:?}",
1749 node_id
1750 );
1751 self.array[node_id.0 as usize].kind
1752 }
1753
1754 #[inline]
1755 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1756 &self.array[node_id.0 as usize]
1757 }
1758
1759 #[inline]
1760 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1761 self.metadata[node_id.0 as usize]
1762 }
1763
1764 #[inline]
1765 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1766 debug_assert!(node_id.0 != u32::MAX);
1767 let meta_id = self.get_node_meta_id(node_id);
1768 debug_assert!(meta_id != MetadataId::MISSING);
1769 &self.mb.array[meta_id.0 as usize]
1770 }
1771
1772 #[inline]
1773 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1774 if node_id == NodeId::MISSING {
1775 NullsId::EMPTY
1776 } else {
1777 self.get_meta(node_id).nulls
1778 }
1779 }
1780
1781 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1782 self.mb
1783 .nb
1784 .array
1785 .iter()
1786 .map(|set| set.iter().cloned().collect())
1787 .collect()
1788 }
1789
1790 pub fn nulls_count(&self) -> usize {
1791 self.mb.nb.array.len()
1792 }
1793
1794 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1795 self.mb.nb.array[id as usize].iter().cloned().collect()
1796 }
1797
1798 #[inline]
1799 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1800 if node_id == NodeId::MISSING {
1801 NullsId::EMPTY
1802 } else {
1803 let nulls = self.get_meta(node_id).nulls;
1804 self.mb.nb.and_mask(nulls, mask)
1805 }
1806 }
1807
1808 #[inline]
1809 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1810 let meta_id = self.get_node_meta_id(node_id);
1811 let meta = &self.mb.array[meta_id.0 as usize];
1812 meta.flags
1813 }
1814
1815 #[inline]
1816 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1817 self.get_meta(node_id).flags.nullability()
1818 }
1819
1820 #[inline]
1821 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1822 let meta_id = self.get_node_meta_id(node_id);
1823 let meta = &self.mb.array[meta_id.0 as usize];
1824 meta.flags.all_contains_flags()
1825 }
1826
1827 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1828 if !self.contains_look(node_id) {
1829 return Ok(node_id);
1830 }
1831 if self.get_kind(node_id) == Kind::Concat
1832 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1833 {
1834 return self.strip_lb(node_id.right(self));
1835 }
1836 match self.get_kind(node_id) {
1837 Kind::Lookbehind | Kind::Lookahead => Err(AlgebraError::UnsupportedPattern),
1838 _ => Ok(node_id),
1839 }
1840 }
1841
1842 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1843 debug_assert!(node_id != NodeId::MISSING);
1844 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1845 if *rev != NodeId::MISSING {
1846 return Ok(*rev);
1847 }
1848 }
1849 let rw = match self.get_kind(node_id) {
1850 Kind::End => NodeId::BEGIN,
1851 Kind::Begin => NodeId::END,
1852 Kind::Pred => node_id,
1853 Kind::Concat => {
1854 let left = self.reverse(node_id.left(self))?;
1855 let right = self.reverse(node_id.right(self))?;
1856 self.mk_concat(right, left)
1857 }
1858 Kind::Union => {
1859 let left = self.reverse(node_id.left(self))?;
1860 let right = self.reverse(node_id.right(self))?;
1861 self.mk_union(left, right)
1862 }
1863 Kind::Inter => {
1864 let left = self.reverse(node_id.left(self))?;
1865 let right = self.reverse(node_id.right(self))?;
1866 self.mk_inter(left, right)
1867 }
1868 Kind::Star => {
1869 let body = self.reverse(node_id.left(self))?;
1870 self.mk_star(body)
1871 }
1872 Kind::Compl => {
1873 let body = self.reverse(node_id.left(self))?;
1874 self.mk_compl(body)
1875 }
1876 Kind::Lookbehind => {
1877 let prev = self.get_lookbehind_prev(node_id);
1879 let inner_id = self.get_lookbehind_inner(node_id);
1880 let rev_inner = self.reverse(inner_id)?;
1881 let rev_prev = if prev != NodeId::MISSING {
1882 self.reverse(prev)?
1883 } else {
1884 NodeId::MISSING
1885 };
1886 self.mk_lookahead(rev_inner, rev_prev, 0)
1887 }
1888 Kind::Lookahead => {
1889 let rel = self.get_lookahead_rel(node_id);
1890 if rel == u32::MAX {
1891 let lbody = self.get_lookahead_inner(node_id);
1893 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
1894 let lbody_ts = self.mk_concat(lbody, NodeId::TS);
1895 let ltail_ts = self.mk_concat(ltail, NodeId::TS);
1896 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
1897 let rev = self.reverse(as_inter)?;
1898 self.init_reversed(node_id, rev);
1899 return Ok(rev);
1900 }
1901 if rel != 0 {
1902 return Err(AlgebraError::UnsupportedPattern);
1903 }
1904 let tail_node = self.get_lookahead_tail(node_id);
1906 let rev_tail = if tail_node != NodeId::MISSING {
1907 self.reverse(tail_node)?
1908 } else {
1909 NodeId::MISSING
1910 };
1911 let inner_id = self.get_lookahead_inner(node_id);
1912 let rev_inner = self.reverse(inner_id)?;
1913 self.mk_lookbehind(rev_inner, rev_tail)
1914 }
1915 Kind::Counted => {
1916 return Err(AlgebraError::UnsupportedPattern);
1917 }
1918 };
1919 self.init_reversed(node_id, rw);
1920 Ok(rw)
1921 }
1922
1923 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
1924 let node = NodeKey {
1925 kind: Kind::Pred,
1926 left: NodeId::MISSING,
1927 right: NodeId::MISSING,
1928 extra: pred.0,
1929 };
1930 self.get_node_id(node)
1931 }
1932
1933 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
1934 let key = NodeKey {
1935 kind: Kind::Compl,
1936 left: body,
1937 right: NodeId::MISSING,
1938 extra: u32::MAX,
1939 };
1940 if let Some(id) = self.key_is_created(&key) {
1941 return *id;
1942 }
1943 if body == NodeId::BOT {
1944 return NodeId::TS;
1945 }
1946 if body == NodeId::TS {
1947 return NodeId::BOT;
1948 }
1949
1950 if let Some(contains_body) = body.is_contains(&self) {
1951 if contains_body.is_pred(&self) {
1952 let pred = contains_body.pred_tset(&self);
1953 let notpred = self.mk_pred_not(pred);
1954 let node = self.mk_star(notpred);
1955 return self.init_as(key, node);
1956 } else if contains_body == NodeId::END {
1957 return self.init_as(key, NodeId::BOT);
1958 } else {
1959 }
1960 };
1961
1962 if self.get_kind(body) == Kind::Compl {
1963 return self.get_node(body).left;
1964 }
1965
1966 let node_id = self.get_node_id(key);
1967 node_id
1968 }
1969
1970 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
1971 let nid = self.get_nulls_id(body);
1972 let nref = self.mb.nb.get_set_ref(nid).clone();
1973 let mut futures = NodeId::BOT;
1974 for n in nref.iter() {
1975 if !n.is_mask_nullable(mask) {
1976 continue;
1977 }
1978
1979 let eff = if n.rel == 0 {
1980 NodeId::EPS
1981 } else {
1982 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
1983 };
1984 futures = self.mk_union(futures, eff)
1985 }
1986 futures
1987 }
1988
1989 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
1990 if cfg!(feature = "norewrite") {
1991 return None;
1992 }
1993
1994 if self.get_kind(tail) == Kind::Lookbehind {
1995 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
1996 return match self
1997 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
1998 {
1999 Ok(node) => Some(node),
2000 Err(_) => None,
2001 };
2002 }
2003 if self.get_kind(head) == Kind::Lookahead {
2004 let la_body = self.get_lookahead_inner(head);
2005 let la_tail = self.get_lookahead_tail(head);
2006 let la_rel = self.get_lookahead_rel(head);
2007 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2008 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2009 let tail_rel = self.get_lookahead_rel(new_la_tail);
2010 tail_rel + la_rel
2011 } else if new_la_tail.is_center_nullable(self) {
2012 la_rel
2013 } else {
2014 u32::MAX
2015 };
2016
2017 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2018 }
2019
2020 if head.is_kind(self, Kind::End) {
2021 if tail == NodeId::TS {
2022 return Some(head);
2023 }
2024 }
2025 if head == NodeId::TS {
2026 if tail == NodeId::END {
2027 return Some(head);
2028 }
2029 }
2030
2031 if head == NodeId::TS {
2032 if self.nullability(tail) == Nullability::ALWAYS {
2033 return Some(NodeId::TS);
2034 }
2035 }
2036
2037 if tail == NodeId::TS {
2038 if self.nullability(head) == Nullability::ALWAYS {
2039 return Some(NodeId::TS);
2040 }
2041 }
2042
2043 if self.get_kind(tail) == Kind::Union {
2044 if head == NodeId::TS {
2045 let mut should_distribute_top = false;
2046 self.iter_unions(tail, |v| {
2047 if v == NodeId::BEGIN || self.starts_with_ts(v) {
2048 should_distribute_top = true;
2049 }
2050 });
2051 if should_distribute_top {
2052 let mut new_union = NodeId::BOT;
2053 let mut curr = tail;
2054 while self.get_kind(curr) == Kind::Union {
2055 let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2056 new_union = self.mk_union(new_node, new_union);
2057 curr = curr.right(self);
2058 }
2059 let new_node = self.mk_concat(NodeId::TS, curr);
2060 new_union = self.mk_union(new_union, new_node);
2061 return Some(new_union);
2062 }
2063 }
2064 }
2065
2066 if self.get_kind(head) == Kind::Union {
2067 if head == NodeId::TS {
2068 let mut should_distribute_top = false;
2069 self.iter_unions(head, |v| {
2070 if v == NodeId::END || self.starts_with_ts(v) {
2071 should_distribute_top = true;
2072 }
2073 });
2074 if should_distribute_top {
2075 let mut new_union = NodeId::BOT;
2076 let mut curr = head;
2077 while self.get_kind(curr) == Kind::Union {
2078 let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2079 new_union = self.mk_union(new_node, new_union);
2080 curr = curr.right(self);
2081 }
2082 let new_node = self.mk_concat(curr, NodeId::TS);
2083 new_union = self.mk_union(new_union, new_node);
2084 return Some(new_union);
2085 }
2086 }
2087 }
2088
2089 if self.get_kind(head) == Kind::Inter {
2090 if tail == NodeId::TS {
2091 let mut alltopstar = true;
2092 iter_inter!(self, head, |v| {
2093 alltopstar = self.ends_with_ts(v);
2094 });
2095 if alltopstar {
2096 return Some(head);
2097 }
2098 }
2099 }
2100
2101 if head.is_star(self) && head == tail {
2102 return Some(head);
2103 }
2104
2105 None
2106 }
2107
2108 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2109 use Kind::*;
2110
2111 if cfg!(feature = "norewrite") {
2112 return None;
2113 }
2114 if left == right {
2115 return Some(left);
2116 }
2117
2118 if right.is_kind(self, Kind::Union) && left == right.left(self) {
2119 return Some(right);
2120 }
2121
2122 if self.get_kind(left) == Kind::Pred {
2123 if self.get_kind(right) == Kind::Pred {
2124 let l = left.pred_tset(&self);
2125 let r = right.pred_tset(&self);
2126 let solver = self.solver();
2127 let psi = solver.or_id(l, r);
2128 let rewrite = self.mk_pred(psi);
2129 return Some(rewrite);
2130 }
2131 }
2132
2133 if left == NodeId::EPS {
2134 if self.get_extra(left) == 0 {
2135 if self.nullability(right) == Nullability::ALWAYS {
2136 return Some(right);
2137 }
2138 }
2139 }
2140
2141 if self.get_kind(left) == Kind::Lookahead {
2142 if self.get_kind(right) == Kind::Lookahead {
2143 let lb = left.left(self);
2144 let lt = left.right(self);
2145 let lrel = left.extra(self);
2146
2147 let rb = right.left(self);
2148 let rt = right.right(self);
2149 let rrel = right.extra(self);
2150
2151 if lrel == rrel {
2152 if lb == rb {}
2153 if lt.is_missing() {
2154 if rt.is_missing() {
2155 let unioned = self.mk_union(lb, rb);
2156 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2157 return Some(node);
2158 }
2159 }
2160 }
2161 }
2162 }
2163
2164 if right.is_kind(self, Concat) {
2165 if left == NodeId::END {
2166 if right.left(self) == NodeId::END {
2167 if self.nullability(right).has(Nullability::END) {
2168 return Some(right);
2169 }
2170 }
2171 }
2172 if left == right.left(self) {
2174 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2175 let rw = self.mk_concat(left, rhs);
2176 return Some(rw);
2177 }
2178 if left.is_kind(self, Concat) {
2179 let head1 = left.left(self);
2180 let head2 = right.left(self);
2181
2182 if head1 == head2 {
2183 let t1 = left.right(self);
2184 let t2 = right.right(self);
2185 if head1 == NodeId::TS {
2187 if t2.has_concat_tail(self, t1) {
2188 return Some(left);
2189 }
2190 if t1.has_concat_tail(self, t2) {
2191 return Some(right);
2192 }
2193 }
2194 let un = self.mk_union(t1, t2);
2195 return Some(self.mk_concat(left.left(self), un));
2196 }
2197
2198 if false {
2201 let end1 = self.get_concat_end(left);
2202 let end2 = self.get_concat_end(right);
2203 if end1 == end2 {
2204 let flags = left.flags_contains(self).or(right.flags_contains(self));
2205 if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2206 let rev1 = self.reverse(left).unwrap();
2207 let rev2 = self.reverse(right).unwrap();
2208
2209 let union_rev = self.mk_union(rev1, rev2);
2210 return Some(self.reverse(union_rev).unwrap());
2211 }
2212 }
2213 }
2214 }
2215 if self.get_kind(left) == Kind::Pred {
2216 if left == right.left(self) {
2217 let un = self.mk_opt(right.right(self));
2218 let conc = self.mk_concat(left, un);
2219 return Some(conc);
2220 }
2221 }
2222 }
2223
2224 if left == NodeId::EPS {
2225 if right.is_plus(self) {
2226 let result = self.mk_star(right.left(self));
2227 return Some(result);
2228 }
2229 }
2230
2231 if self.flags == BuilderFlags::SUBSUME {
2233 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2234 return Some(rw);
2235 }
2236 }
2237
2238 None
2239 }
2240
2241 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2242 let mut curr = node;
2243 while self.get_kind(curr) == Kind::Inter {
2244 out.push(self.get_left(curr));
2245 curr = self.get_right(curr);
2246 }
2247 out.push(curr);
2248 }
2249
2250 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2253 let mut curr = node;
2254 let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2255 if has_prefix {
2256 curr = self.get_right(curr);
2257 }
2258 let mut count = 0u32;
2259 let mut pred_set = None;
2260 while self.get_kind(curr) == Kind::Concat {
2261 let head = self.get_left(curr);
2262 if self.get_kind(head) != Kind::Pred {
2263 return None;
2264 }
2265 let ps = head.pred_tset(self);
2266 match pred_set {
2267 None => pred_set = Some(ps),
2268 Some(existing) => {
2269 if existing != ps {
2270 return None;
2271 }
2272 }
2273 }
2274 curr = self.get_right(curr);
2275 count += 1;
2276 }
2277 if count == 0 || self.get_kind(curr) != Kind::Star {
2278 return None;
2279 }
2280 Some((has_prefix, pred_set.unwrap(), curr, count))
2281 }
2282
2283 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2284 let mut j = 0;
2285 for &s in sub {
2286 while j < sup.len() && sup[j] < s {
2287 j += 1;
2288 }
2289 if j >= sup.len() || sup[j] != s {
2290 return false;
2291 }
2292 j += 1;
2293 }
2294 true
2295 }
2296
2297 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2298 if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2299 return None;
2300 }
2301
2302 let mut lc: Vec<NodeId> = Vec::new();
2303 let mut rc: Vec<NodeId> = Vec::new();
2304 self.collect_inter_components(left, &mut lc);
2305 self.collect_inter_components(right, &mut rc);
2306
2307 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2310 return Some(left);
2311 }
2312 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2314 return Some(right);
2315 }
2316
2317 if lc.len() == rc.len() {
2318 let mut diff_idx = None;
2319 for i in 0..lc.len() {
2320 if lc[i] != rc[i] {
2321 if diff_idx.is_some() {
2322 return None;
2323 }
2324 diff_idx = Some(i);
2325 }
2326 }
2327 if let Some(idx) = diff_idx {
2328 let a = lc[idx];
2329 let b = rc[idx];
2330 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2331 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2332 {
2333 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2334 return if ca < cb { Some(left) } else { Some(right) };
2335 }
2336 }
2337 }
2338 }
2339
2340 None
2341 }
2342
2343 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2344 if cfg!(feature = "norewrite") {
2345 return None;
2346 }
2347 if left == right {
2348 return Some(left);
2349 }
2350
2351 if self.get_kind(right) == Kind::Union {
2352 let mut result = NodeId::BOT;
2353 self.iter_unions_b(
2354 right,
2355 &mut (|b, v| {
2356 let new_inter = b.mk_inter(v, left);
2357 result = b.mk_union(result, new_inter);
2358 }),
2359 );
2360 return Some(result);
2361 }
2362 if self.get_kind(left) == Kind::Union {
2363 let mut result = NodeId::BOT;
2364 self.iter_unions_b(
2365 left,
2366 &mut (|b, v| {
2367 let new_inter = b.mk_inter(v, right);
2368 result = b.mk_union(result, new_inter);
2369 }),
2370 );
2371 return Some(result);
2372 }
2373
2374 if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2375 return Some(NodeId::BOT);
2376 }
2377
2378 if left.kind(&self) == Kind::Compl {
2379 if right.kind(&self) == Kind::Compl {
2380 let bodies = self.mk_union(left.left(self), right.left(self));
2381 return Some(self.mk_compl(bodies));
2382 }
2383 }
2384
2385 if left == NodeId::TOPPLUS {
2386 if let Some(_) = right.is_pred_star(self) {
2387 let newloop = self.mk_plus(right.left(self));
2388 return Some(newloop);
2389 }
2390 if right.is_never_nullable(&self) {
2391 return Some(right);
2392 }
2393 if right.is_kind(self, Kind::Lookahead) {
2394 if self.get_lookahead_tail(right).is_missing() {
2395 return Some(NodeId::BOT);
2396 }
2397 }
2398 if right.is_kind(self, Kind::Concat) {}
2399 }
2400
2401 if right.is_lookahead(&self) {
2402 if left.is_never_nullable(&self) {
2403 return Some(NodeId::BOT);
2404 }
2405 }
2406
2407 if self.get_kind(right) == Kind::Compl {
2408 let compl_body = right.left(self);
2409 if left == compl_body {
2410 return Some(NodeId::BOT);
2411 }
2412 if self.get_kind(compl_body) == Kind::Concat {
2413 let compl_head = compl_body.left(self);
2414 if compl_body.right(self) == NodeId::TS {
2416 if compl_head == left {
2417 return Some(NodeId::BOT);
2418 }
2419 }
2420 }
2421 }
2422
2423 if let Some(pleft) = left.is_pred_star(&self) {
2424 if let Some(pright) = right.is_pred_star(&self) {
2425 let merged = self.mk_inter(pleft, pright);
2426 return Some(self.mk_star(merged));
2427 }
2428 }
2429
2430 {
2434 let l_is_clb = self.get_kind(left) == Kind::Concat
2435 && self.get_kind(left.left(self)) == Kind::Lookbehind;
2436 let r_is_clb = self.get_kind(right) == Kind::Concat
2437 && self.get_kind(right.left(self)) == Kind::Lookbehind;
2438 if l_is_clb || r_is_clb {
2439 let (lb, body) = if l_is_clb && r_is_clb {
2440 let lb1 = left.left(self);
2441 let lb2 = right.left(self);
2442 let inner = self.mk_inter(
2443 self.get_lookbehind_inner(lb1),
2444 self.get_lookbehind_inner(lb2),
2445 );
2446 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2448 let body = self.mk_inter(left.right(self), right.right(self));
2449 (lb, body)
2450 } else if l_is_clb {
2451 let lb = left.left(self);
2452 let body = self.mk_inter(left.right(self), right);
2453 (lb, body)
2454 } else {
2455 let lb = right.left(self);
2456 let body = self.mk_inter(left, right.right(self));
2457 (lb, body)
2458 };
2459 return Some(self.mk_concat(lb, body));
2460 }
2461 }
2462
2463 {
2467 let l_has_la = self.has_trailing_la(left);
2468 let r_has_la = self.has_trailing_la(right);
2469 if l_has_la || r_has_la {
2470 let (body, la) = if l_has_la && r_has_la {
2471 let (lbody, l_la) = self.strip_trailing_la(left);
2472 let (rbody, r_la) = self.strip_trailing_la(right);
2473 let inner = self.mk_inter(
2474 self.get_lookahead_inner(l_la),
2475 self.get_lookahead_inner(r_la),
2476 );
2477 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2478 let body = self.mk_inter(lbody, rbody);
2479 (body, la)
2480 } else if l_has_la {
2481 let (lbody, la) = self.strip_trailing_la(left);
2482 let body = self.mk_inter(lbody, right);
2483 (body, la)
2484 } else {
2485 let (rbody, la) = self.strip_trailing_la(right);
2486 let body = self.mk_inter(left, rbody);
2487 (body, la)
2488 };
2489 return Some(self.mk_concat(body, la));
2490 }
2491 }
2492
2493 return None;
2494 }
2495
2496 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2497 if cfg!(feature = "norewrite") {
2498 return None;
2499 }
2500 debug_assert!(self.get_kind(right_union) == Kind::Union);
2501
2502 let mut rewritten = None;
2503 right_union.iter_union_while(
2504 self,
2505 &mut (|b, v| {
2506 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2507 rewritten = Some((v, rw));
2508 false
2509 } else {
2510 true
2511 }
2512 }),
2513 );
2514
2515 if let Some(rw) = rewritten {
2516 let mut new_union = NodeId::BOT;
2517 right_union.iter_union(
2518 self,
2519 &mut (|b, v| {
2520 if v == rw.0 {
2521 new_union = b.mk_union(rw.1, new_union)
2522 } else {
2523 new_union = b.mk_union(v, new_union)
2524 }
2525 }),
2526 );
2527 return Some(new_union);
2528 };
2529
2530 return None;
2531 }
2532
2533 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2534 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2535 debug_assert!(tail != NodeId::MISSING);
2536 let key = NodeKey {
2537 kind: Kind::Concat,
2538 left: head,
2539 right: tail,
2540 extra: u32::MAX,
2541 };
2542 if let Some(id) = self.key_is_created(&key) {
2543 return *id;
2544 }
2545
2546 if head == NodeId::BOT || tail == NodeId::BOT {
2547 return NodeId::BOT;
2548 }
2549 if head == NodeId::EPS {
2550 return tail;
2551 }
2552 if tail == NodeId::EPS {
2553 return head;
2554 }
2555
2556 match tail {
2557 NodeId::BEGIN => {
2559 if !self.is_nullable(head, Nullability::BEGIN) {
2560 return NodeId::BOT;
2561 } else {
2562 return NodeId::BEGIN;
2563 }
2564 }
2565 _ => {}
2566 }
2567
2568 if head.is_kind(self, Kind::Concat) {
2570 let left = head.left(self);
2571 let newright = self.mk_concat(head.right(self), tail);
2572 let updated = self.mk_concat(left, newright);
2573 return self.init_as(key, updated);
2574 }
2575
2576 if head == NodeId::TS && tail == NodeId::END {
2577 return NodeId::TS;
2578 }
2579
2580 if self.get_kind(head) == Kind::End {
2581 if !self.is_nullable(tail, Nullability::END) {
2582 return NodeId::BOT;
2583 }
2584 }
2585
2586 if self.get_kind(tail) == Kind::Concat {
2587 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2588 let upd = self.mk_concat(rw, tail.right(self));
2589 return self.init_as(key, upd);
2590 }
2591 }
2592
2593 match self.attempt_rw_concat_2(head, tail) {
2594 Some(new) => return self.init_as(key, new),
2595 None => {}
2596 }
2597
2598 match (self.get_kind(head), self.get_kind(tail)) {
2599 (Kind::Star, Kind::Concat) if head.is_star(&self) => {
2601 let rl = tail.left(self);
2602 if head == rl {
2603 return self.init_as(key, tail);
2604 }
2605 }
2606 (_, Kind::Concat) => {
2608 let curr = head;
2609 let h2 = self.mk_concat(curr, tail.left(self));
2610 let tr = tail.right(self);
2611 match self.attempt_rw_concat_2(h2, tr) {
2612 Some(new) => {
2613 return self.init_as(key, new);
2614 }
2615 None => {}
2616 }
2617 }
2618 _ if head == NodeId::TS => {
2619 if self.starts_with_ts(tail) {
2620 return self.init_as(key, tail);
2621 }
2622 }
2623 _ => {}
2624 }
2625
2626 self.init(key)
2627 }
2628
2629 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2630 let lb_body = {
2632 match self.starts_with_ts(lb_body) {
2633 true => lb_body,
2634 false => self.mk_concat(NodeId::TS, lb_body),
2635 }
2636 };
2637 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2639 }
2640
2641 fn mk_lookbehind_internal(
2642 &mut self,
2643 lb_body: NodeId,
2644 lb_prev: NodeId,
2645 ) -> Result<NodeId, AlgebraError> {
2646 debug_assert!(lb_body != NodeId::MISSING);
2647 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2648 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2649 return Ok(NodeId::BOT);
2650 }
2651 if lb_body == NodeId::TS {
2652 return Ok(lb_prev);
2653 }
2654 if lb_body == NodeId::EPS {
2655 match lb_prev {
2656 NodeId::MISSING => return Ok(NodeId::EPS),
2657 NodeId::EPS => return Ok(NodeId::EPS),
2658 _ => return Err(AlgebraError::UnsupportedPattern),
2659 }
2660 }
2661
2662 let key = NodeKey {
2663 kind: Kind::Lookbehind,
2664 left: lb_body,
2665 right: lb_prev,
2666 extra: u32::MAX,
2667 };
2668 match self.key_is_created(&key) {
2669 Some(id) => return Ok(*id),
2670 None => {
2671 if lb_prev == NodeId::TS {
2672 return Ok(self.mk_concat(lb_prev, lb_body));
2673 }
2674
2675 Ok(self.init(key))
2676 }
2677 }
2678 }
2679
2680 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2681 let la_body = {
2683 match self.ends_with_ts(la_body) {
2684 true => la_body,
2685 false => self.mk_concat(la_body, NodeId::TS),
2686 }
2687 };
2688 let rel = if NodeId::MISSING == la_tail {
2689 rel
2690 } else {
2691 match la_tail.is_center_nullable(self) {
2692 false => u32::MAX,
2693 true => rel,
2694 }
2695 };
2696
2697 self.mk_lookahead_internal(la_body, la_tail, rel)
2698 }
2699
2700 pub fn mk_lookahead_internal(
2702 &mut self,
2703 la_body: NodeId,
2704 la_tail: NodeId,
2705 rel: u32,
2706 ) -> NodeId {
2707 let key = NodeKey {
2708 kind: Kind::Lookahead,
2709 left: la_body,
2710 right: la_tail,
2711 extra: rel,
2712 };
2713 if let Some(id) = self.key_is_created(&key) {
2714 return *id;
2715 }
2716 if la_body == NodeId::TS {
2717 if rel == 0 {
2718 return la_tail.missing_to_eps();
2719 } else {
2720 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2721 }
2722 }
2723 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2724 return NodeId::BOT;
2725 }
2726 if la_tail.is_missing() && rel == u32::MAX {
2727 return NodeId::BOT;
2728 }
2729
2730 if la_body == NodeId::EPS && la_tail.is_missing() {
2731 if rel == 0 {
2732 return la_body;
2733 }
2734 }
2735
2736 if la_tail == NodeId::TS {
2737 if rel == 0 || rel == u32::MAX {
2738 return self.mk_concat(la_body, NodeId::TS);
2739 } else if rel == u32::MAX {
2740 return self.mk_begins_with(la_body);
2741 }
2742 }
2743
2744 if rel == u32::MAX {
2745 if la_tail.is_missing() {
2746 return NodeId::BOT;
2747 }
2748
2749 if self.is_always_nullable(la_body) {
2750 return la_tail.missing_to_eps();
2751 }
2752
2753 if la_tail != NodeId::MISSING {
2754 match self.get_kind(la_tail) {
2755 _ => {
2756 if la_body.is_compl_plus_end(&self) {
2757 let minlen = self.get_min_length_only(la_tail);
2758 if minlen >= 1 {
2759 return NodeId::BOT;
2760 }
2761 }
2762 }
2763 }
2764 }
2765 }
2766
2767 if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2769 let la_body2 = self.get_lookahead_inner(la_tail);
2770 let body1_ts = self.mk_concat(la_body, NodeId::TS);
2771 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2772 let new_la_body = self.mk_inter(body1_ts, body2_ts);
2773 let new_la_rel = self.get_lookahead_rel(la_tail);
2774 let new_la_tail = self.get_lookahead_tail(la_tail);
2775 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2776 }
2777
2778 if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2780 let la_body_right = la_body.right(self);
2781 if self.is_always_nullable(la_body_right) {
2782 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2783 }
2784 if la_body.right(self) == NodeId::END {
2785 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2786 }
2787 let bodyright = la_body.right(self);
2788 if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2789 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2790 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2791 }
2792 }
2793
2794 if la_tail != NodeId::MISSING {
2795 match (self.get_kind(la_body), self.get_kind(la_tail)) {
2796 (Kind::Concat, Kind::Pred) => {
2797 let lpred = la_body.left(self);
2798 if self.get_kind(lpred) == Kind::Pred {
2799 let l = lpred.pred_tset(&self);
2800 let r = la_tail.pred_tset(&self);
2801 let psi_and = self.solver().and_id(l, r);
2802 let rewrite = self.mk_pred(psi_and);
2803 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2804 let new_right = self.mk_lookahead_internal(
2805 la_body.right(self),
2806 NodeId::MISSING,
2807 new_rel,
2808 );
2809 return self.mk_concat(rewrite, new_right);
2810 }
2811 }
2812 _ => {}
2813 }
2814 }
2815
2816 self.get_node_id(key)
2817 }
2818
2819 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2820 let has_match = (packed >> 16) > 0;
2821 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2822 return NodeId::BOT;
2823 }
2824 let chain = self.prune_counted_chain(body, chain);
2825 let key = NodeKey {
2826 kind: Kind::Counted,
2827 left: body,
2828 right: chain,
2829 extra: packed,
2830 };
2831 if let Some(id) = self.key_is_created(&key) {
2832 return *id;
2833 }
2834 self.get_node_id(key)
2835 }
2836
2837 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2838 if chain == NodeId::MISSING || body == NodeId::BOT {
2839 return chain;
2840 }
2841 if self.nullability(body) != Nullability::NEVER {
2843 return NodeId::MISSING;
2844 }
2845 let chain_body = chain.left(self);
2846 if chain_body == NodeId::BOT {
2847 return chain; }
2849 let (body_min, _) = self.get_min_max_length(body);
2851 let (chain_min, _) = self.get_min_max_length(chain_body);
2852 if body_min > 0 && body_min <= chain_min {
2853 return self.prune_counted_chain(body, chain.right(self));
2854 }
2855 let not_begins = self.mk_not_begins_with(body);
2856 let inter = self.mk_inter(chain_body, not_begins);
2857 let is_empty = inter == NodeId::BOT;
2858 if is_empty {
2859 self.prune_counted_chain(body, chain.right(self))
2860 } else {
2861 chain
2862 }
2863 }
2864
2865 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2866 match self.get_node(body).kind {
2867 Kind::Pred => {
2868 let psi = body.pred_tset(&self);
2869 let negated = self.mk_pred_not(psi);
2870 let union = self.mk_union(NodeId::END, negated);
2871 let la = self.mk_lookahead(union, NodeId::MISSING, rel);
2872 la
2873 }
2874 _ => {
2875 let neg_inner = self.mk_concat(body, NodeId::TS);
2876 let neg_part = self.mk_compl(neg_inner);
2877 let conc = self.mk_concat(neg_part, NodeId::END);
2878 let look = self.mk_lookahead(conc, NodeId::MISSING, rel);
2879 look
2880 }
2881 }
2882 }
2883
2884 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2885 match self.get_node(body).kind {
2886 Kind::Pred => {
2887 let psi = body.pred_tset(&self);
2888 let negated = self.mk_pred_not(psi);
2889 let union = self.mk_union(NodeId::BEGIN, negated);
2890 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2892 }
2893 _ => {
2894 let neg_inner = self.mk_concat(NodeId::TS, body);
2895 let neg_part = self.mk_compl(neg_inner);
2896 let conc = self.mk_concat(NodeId::BEGIN, neg_part);
2897 self.mk_lookbehind_internal(conc, NodeId::MISSING).unwrap()
2899 }
2900 }
2901 }
2902
2903 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2904 debug_assert!(left != NodeId::MISSING);
2905 debug_assert!(right != NodeId::MISSING);
2906 if left > right {
2907 return self.mk_union(right, left);
2908 }
2909 let key = NodeKey {
2910 kind: Kind::Union,
2911 left,
2912 right,
2913 extra: u32::MAX,
2914 };
2915 if let Some(id) = self.key_is_created(&key) {
2916 return *id;
2917 }
2918
2919 if left == right {
2920 return left;
2921 }
2922 if left == NodeId::BOT {
2923 return right;
2924 }
2925 if right == NodeId::BOT {
2926 return left;
2927 }
2928 if right == NodeId::TS {
2929 return right;
2930 }
2931 if left == NodeId::TS {
2932 return left;
2933 }
2934
2935 match (self.get_kind(left), self.get_kind(right)) {
2936 (Kind::Union, _) => {
2937 self.iter_unions_b(left, &mut |b, v| {
2938 b.temp_vec.push(v);
2939 });
2940 self.iter_unions_b(right, &mut |b, v| {
2941 b.temp_vec.push(v);
2942 });
2943 self.temp_vec.sort();
2944 let tree = self.temp_vec.clone();
2945 self.temp_vec.clear();
2946 let newnode = tree
2947 .iter()
2948 .rev()
2949 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2950 return self.init_as(key, newnode);
2951 }
2952 (_, Kind::Union) => {
2953 let rleft = right.left(&self);
2954 if left > rleft {
2956 self.iter_unions_b(left, &mut |b, v| {
2957 b.temp_vec.push(v);
2958 });
2959 self.iter_unions_b(right, &mut |b, v| {
2960 b.temp_vec.push(v);
2961 });
2962 self.temp_vec.sort();
2963 let tree = self.temp_vec.clone();
2964 self.temp_vec.clear();
2965 let newnode = tree
2966 .iter()
2967 .rev()
2968 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2969 return self.init_as(key, newnode);
2970 } else {
2971 if let Some(rw) = self.attempt_rw_unions(left, right) {
2972 return self.init_as(key, rw);
2973 }
2974 }
2975 }
2976 _ => {}
2977 }
2978
2979 if let Some(rw) = self.attempt_rw_union_2(left, right) {
2980 return self.init_as(key, rw);
2981 }
2982 self.init(key)
2983 }
2984
2985 pub(crate) fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
2986 debug_assert!(left_id != NodeId::MISSING);
2987 debug_assert!(right_id != NodeId::MISSING);
2988 if left_id == right_id {
2989 return left_id;
2990 }
2991 if left_id == NodeId::BOT || right_id == NodeId::BOT {
2992 return NodeId::BOT;
2993 }
2994 if left_id == NodeId::TS {
2995 return right_id;
2996 }
2997 if right_id == NodeId::TS {
2998 return left_id;
2999 }
3000 if left_id > right_id {
3001 return self.mk_inter(right_id, left_id);
3002 }
3003 let key = NodeKey {
3004 kind: Kind::Inter,
3005 left: left_id,
3006 right: right_id,
3007 extra: u32::MAX,
3008 };
3009 if let Some(id) = self.key_is_created(&key) {
3010 return *id;
3011 }
3012
3013 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3014 return self.init_as(key, rw);
3015 }
3016
3017 self.init(key)
3018 }
3019
3020 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3021 let node = NodeKey {
3022 kind,
3023 left: NodeId::MISSING,
3024 right: NodeId::MISSING,
3025 extra: u32::MAX,
3026 };
3027 self.init(node)
3028 }
3029
3030 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3031 let star = self.mk_star(body_id);
3032 self.mk_concat(body_id, star)
3033 }
3034 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3035 let opt = self.mk_opt(body_id);
3036 let mut nodes1 = vec![];
3037 for _ in lower..upper {
3038 nodes1.push(opt);
3039 }
3040 for _ in 0..lower {
3041 nodes1.push(body_id);
3042 }
3043 self.mk_concats(nodes1.into_iter())
3044 }
3045 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3046 self.mk_union(NodeId::EPS, body_id)
3047 }
3048
3049 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3050 let key = NodeKey {
3051 kind: Kind::Star,
3052 left: body_id,
3053 right: NodeId::MISSING,
3054 extra: 0,
3055 };
3056 if let Some(id) = self.key_is_created(&key) {
3057 return *id;
3058 }
3059 if body_id.is_kind(self, Kind::Star) {
3061 return body_id;
3062 }
3063 self.get_node_id(key)
3064 }
3065
3066 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3069 match self.get_kind(node_id) {
3070 Kind::End => Nullability::EMPTYSTRING,
3071 Kind::Begin => Nullability::EMPTYSTRING,
3072 Kind::Pred => Nullability::NEVER,
3073 Kind::Star => Nullability::ALWAYS,
3074 Kind::Inter | Kind::Concat => {
3075 let lnull = self.nullability_emptystring(node_id.left(self));
3076 let rnull = self.nullability_emptystring(node_id.right(self));
3077 lnull.and(rnull) }
3079 Kind::Union => {
3080 let lnull = self.nullability_emptystring(node_id.left(self));
3081 let rnull = self.nullability_emptystring(node_id.right(self));
3082 lnull.or(rnull)
3083 }
3084 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3085 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3086 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3087 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3088 }
3089 }
3090
3091 #[inline(always)]
3092 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3093 self.get_meta(node_id)
3094 .flags
3095 .nullability()
3096 .has(Nullability::CENTER.or(Nullability::END))
3097 }
3098
3099 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3100 self.get_only_nullability(node_id)
3101 }
3102
3103 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3104 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3105 }
3106
3107 pub fn pp(&self, node_id: NodeId) -> String {
3108 let mut s = String::new();
3109 self.ppw(&mut s, node_id).unwrap();
3110 s
3111 }
3112
3113 #[allow(dead_code)]
3114 pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
3115 let nu = self.get_nulls_id(node_id);
3116 let nr = self.mb.nb.get_set_ref(nu);
3117 let s1 = format!("{:?}", nr);
3118 s1
3119 }
3120
3121 #[allow(dead_code)]
3122 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3123 match self.get_tregex(term_id) {
3124 TRegex::Leaf(node_id) => {
3125 let mut s = String::new();
3126 self.ppw(&mut s, *node_id).unwrap();
3127 s
3128 }
3129 TRegex::ITE(cond, then_id, else_id) => {
3130 format!(
3131 "ITE({},{},{})",
3132 self.solver_ref().pp(*cond),
3133 self.ppt(*then_id),
3134 self.ppt(*else_id)
3135 )
3136 }
3137 }
3138 }
3139
3140 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3141 if cfg!(feature = "graphviz") {
3142 match node_id {
3143 NodeId::MISSING => return write!(s, "MISSING"),
3144 NodeId::BOT => return write!(s, "⊥"),
3145 NodeId::TS => return write!(s, "_*"),
3146 NodeId::TOP => return write!(s, "_"),
3147 NodeId::EPS => return write!(s, "ε"),
3148 _ => {}
3149 }
3150 }
3151
3152 match node_id {
3153 NodeId::MISSING => return write!(s, "MISSING"),
3154 NodeId::BOT => return write!(s, "⊥"),
3155 NodeId::TS => return write!(s, "_*"),
3156 NodeId::TOP => return write!(s, "_"),
3157 NodeId::EPS => return write!(s, ""),
3158 _ => {}
3159 }
3160
3161 match self.get_kind(node_id) {
3162 Kind::End => return write!(s, r"\z"),
3163 Kind::Begin => return write!(s, r"\A"),
3164 Kind::Pred => {
3165 let psi = node_id.pred_tset(self);
3166 if psi == TSetId::EMPTY {
3167 return write!(s, r"⊥");
3168 } else if psi == TSetId::FULL {
3169 return write!(s, r"_");
3170 } else {
3171 return write!(s, "{}", self.solver_ref().pp(psi));
3172 }
3173 }
3174 Kind::Inter => {
3175 write!(s, "(")?;
3176 self.ppw(s, node_id.left(self))?;
3177 write!(s, "&")?;
3178 let mut curr = node_id.right(self);
3179 while self.get_kind(curr) == Kind::Inter {
3180 let n = curr.left(self);
3181 self.ppw(s, n)?;
3182 write!(s, "&")?;
3183 curr = curr.right(self);
3184 }
3185 self.ppw(s, curr)?;
3186 write!(s, ")")
3187 }
3188 Kind::Union => {
3189 let left = node_id.left(self);
3190 let right = node_id.right(self);
3191 write!(s, "(")?;
3192 self.ppw(s, left)?;
3193 write!(s, "|")?;
3194 let mut curr = right;
3195 while self.get_kind(curr) == Kind::Union {
3196 let n = curr.left(self);
3197 self.ppw(s, n)?;
3198 write!(s, "|")?;
3199 curr = curr.right(self);
3200 }
3201 self.ppw(s, curr)?;
3202 write!(s, ")")
3203 }
3204 Kind::Concat => {
3205 let left = node_id.left(self);
3206 let right = node_id.right(self);
3207 if right.is_star(self) && right.left(self) == left {
3208 self.ppw(s, left)?;
3209 write!(s, "+")?;
3210 return Ok(());
3211 }
3212 if right.is_concat(self) {
3213 let rl = right.left(self);
3214 if rl.is_star(self) && rl.left(self) == left {
3215 self.ppw(s, left)?;
3216 write!(s, "+")?;
3217 return self.ppw(s, right.right(self));
3218 }
3219 }
3220 if right.is_concat(self) && right.left(self) == left {
3221 let mut num = 1;
3222 let mut right = right;
3223 while right.is_concat(self) && right.left(self) == left {
3224 num = num + 1;
3225 right = right.right(self);
3226 }
3227 if let Some(inner) = left.is_opt_v(self) {
3229 let mut inner_count = 0;
3230 let mut right2 = right;
3231 while right2.is_concat(self) && right2.left(self) == inner {
3232 inner_count += 1;
3233 right2 = right2.right(self);
3234 }
3235 if right2 == inner {
3236 inner_count += 1;
3237 self.ppw(s, inner)?;
3238 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3239 }
3240 if inner_count > 0 {
3241 self.ppw(s, inner)?;
3242 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3243 return self.ppw(s, right2);
3244 }
3245 }
3246 self.ppw(s, left)?;
3247 if right == left {
3248 num = num + 1;
3249 return write!(s, "{{{}}}", num);
3250 }
3251 if num <= 3 && left.is_pred(self) {
3252 for _ in 1..num {
3253 self.ppw(s, left)?;
3254 }
3255 return self.ppw(s, right);
3256 } else {
3257 write!(s, "{{{}}}", num)?;
3258 return self.ppw(s, right);
3259 }
3260 }
3261 self.ppw(s, left)?;
3262 self.ppw(s, right)
3263 }
3264 Kind::Star => {
3265 let left = node_id.left(self);
3266 let leftkind = self.get_kind(left);
3267 match leftkind {
3268 Kind::Concat | Kind::Star | Kind::Compl => {
3269 write!(s, "(")?;
3270 self.ppw(s, left)?;
3271 write!(s, ")")?;
3272 }
3273 _ => {
3274 self.ppw(s, left)?;
3275 }
3276 };
3277 write!(s, "*")
3278 }
3279 Kind::Compl => {
3280 write!(s, "~(")?;
3281 self.ppw(s, node_id.left(self))?;
3282 write!(s, ")")
3283 }
3284 Kind::Lookbehind => {
3285 let lbleft = self.get_lookbehind_prev(node_id);
3286 let lbinner = self.get_lookbehind_inner(node_id);
3287 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3288 if lbleft != NodeId::MISSING {
3289 write!(s, "「")?;
3290 self.ppw(s, lbleft)?;
3291 write!(s, "」")?;
3292 }
3293
3294 write!(s, "(?<=")?;
3295 self.ppw(s, lbinner)?;
3296 write!(s, ")")
3297 }
3298 Kind::Lookahead => {
3299 let inner = self.get_lookahead_inner(node_id);
3300 write!(s, "(?=")?;
3301 self.ppw(s, inner)?;
3302 write!(s, ")")?;
3303 if self.get_lookahead_rel(node_id) != 0 {
3304 write!(s, "{{")?;
3305 let rel = self.get_lookahead_rel(node_id);
3306 if rel == u32::MAX {
3307 write!(s, "∅")?;
3308 } else {
3309 write!(s, "{}", rel)?;
3310 }
3311 write!(s, "}}")?;
3312 }
3313 if node_id.right(self) == NodeId::MISSING {
3314 Ok(())
3315 } else {
3316 write!(s, "『")?;
3317 self.ppw(s, node_id.right(self))?;
3318 write!(s, "』")
3319 }
3320 }
3321 Kind::Counted => {
3322 let body = node_id.left(self);
3323 let packed = self.get_extra(node_id);
3324 let step = packed & 0xFFFF;
3325 let best = packed >> 16;
3326 write!(s, "#(")?;
3327 self.ppw(s, body)?;
3328 write!(s, ")s{}b{}", step, best)
3329 }
3330 }
3331 }
3332
3333 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3334 self.mk_concat(node, NodeId::TS)
3335 }
3336
3337 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3338 let node_ts = self.mk_concat(node, NodeId::TS);
3339 self.mk_compl(node_ts)
3340 }
3341
3342
3343 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3344 let notset = self.solver().not_id(set);
3345 self.mk_pred(notset)
3346 }
3347
3348 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3349 let set_id = self.solver().u8_to_set_id(char as u8);
3350 self.mk_pred(set_id)
3351 }
3352
3353 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3354 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3355 self.mk_pred(rangeset)
3356 }
3357
3358 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3359 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3360 for &(lo, hi) in &ranges[1..] {
3361 let r = self.mk_range_u8(lo, hi);
3362 node = self.mk_union(node, r);
3363 }
3364 node
3365 }
3366
3367 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3368 let mut prefix = Vec::new();
3369 let mut curr = node;
3370 loop {
3371 if curr == NodeId::EPS {
3372 let full = !prefix.is_empty();
3373 return (prefix, full);
3374 }
3375 if curr == NodeId::BOT {
3376 break;
3377 }
3378 if self.get_kind(curr) == Kind::Pred {
3379 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3380 Some(byte) => {
3381 prefix.push(byte);
3382 return (prefix, true);
3383 }
3384 None => break, }
3386 }
3387 if self.get_kind(curr) != Kind::Concat {
3388 break;
3389 }
3390 let left = curr.left(self);
3391 if self.get_kind(left) != Kind::Pred {
3392 break;
3393 }
3394 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3395 Some(byte) => prefix.push(byte),
3396 None => break,
3397 }
3398 curr = curr.right(self);
3399 }
3400 (prefix, false)
3401 }
3402
3403 #[allow(dead_code)]
3404 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3405 let mut result = NodeId::EPS;
3406 for byte in raw_str.into_iter().rev() {
3407 let node = self.mk_u8(*byte);
3408 result = self.mk_concat(node, result);
3409 }
3410 result
3411 }
3412
3413 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3414 let mut result = NodeId::EPS;
3415 for byte in raw_str.bytes().rev() {
3416 let node = self.mk_u8(byte);
3417 result = self.mk_concat(node, result);
3418 }
3419 result
3420 }
3421
3422 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3423 let mut sorted: Vec<NodeId> = nodes.collect();
3424 if sorted.len() <= 1 {
3425 return sorted.pop().unwrap_or(NodeId::BOT);
3426 }
3427 sorted.sort();
3428 sorted.dedup();
3429 sorted.retain(|&x| x != NodeId::BOT);
3430 if sorted.is_empty() {
3431 return NodeId::BOT;
3432 }
3433 if sorted.len() > 16 {
3434 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3435 let mut non_concat: Vec<NodeId> = Vec::new();
3436 for &n in &sorted {
3437 if self.get_kind(n) == Kind::Concat {
3438 by_head.entry(self.get_left(n)).or_default().push(n);
3439 } else {
3440 non_concat.push(n);
3441 }
3442 }
3443 let mut absorbed: Vec<NodeId> = Vec::new();
3448 for &n in &non_concat {
3449 if by_head.contains_key(&n) {
3450 absorbed.push(n);
3451 }
3452 }
3453 if !absorbed.is_empty() {
3454 non_concat.retain(|n| !absorbed.contains(n));
3455 }
3456 if by_head.len() < sorted.len() {
3457 let mut groups: Vec<NodeId> = non_concat;
3458 for (head, tails) in by_head {
3459 let mut tail_nodes: Vec<NodeId> =
3460 tails.iter().map(|&n| self.get_right(n)).collect();
3461 if absorbed.contains(&head) {
3462 tail_nodes.push(NodeId::EPS);
3463 }
3464 let tail_union = self.mk_unions(tail_nodes.into_iter());
3465 let factored = self.mk_concat(head, tail_union);
3466 groups.push(factored);
3467 }
3468 groups.sort();
3469 groups.dedup();
3470 return self.mk_unions_balanced(&groups);
3471 }
3472 }
3473 self.mk_unions_balanced(&sorted)
3474 }
3475
3476 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3477 match nodes.len() {
3478 0 => NodeId::BOT,
3479 1 => nodes[0],
3480 n => {
3481 let mid = n / 2;
3482 let left = self.mk_unions_balanced(&nodes[..mid]);
3483 let right = self.mk_unions_balanced(&nodes[mid..]);
3484 self.mk_union(left, right)
3485 }
3486 }
3487 }
3488
3489 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3490 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3491 }
3492
3493 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3494 nodes
3495 .rev()
3496 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3497 }
3498}
3499
3500impl RegexBuilder {
3501 #[allow(dead_code)]
3502 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3503 match self.get_tregex(term_id).clone() {
3504 TRegex::Leaf(node_id) => {
3505 if NodeId::BOT == node_id {
3506 vec![]
3507 } else {
3508 vec![node_id]
3509 }
3510 }
3511 TRegex::ITE(_, then_id, else_id) => {
3512 let mut then_nodes = self.extract_sat(then_id);
3513 let mut else_nodes = self.extract_sat(else_id);
3514 then_nodes.append(&mut else_nodes);
3515 then_nodes
3516 }
3517 }
3518 }
3519
3520 pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3521 debug_assert!(self.get_kind(start) == Kind::Union);
3522 let mut curr = start;
3523 while self.get_kind(curr) == Kind::Union {
3524 f(curr.left(self));
3525 curr = curr.right(self);
3526 }
3527 f(curr);
3528 }
3529
3530 pub(crate) fn iter_unions_b(
3531 &mut self,
3532 curr: NodeId,
3533 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3534 ) {
3535 let mut curr = curr;
3536 while self.get_kind(curr) == Kind::Union {
3537 f(self, curr.left(self));
3538 curr = curr.right(self);
3539 }
3540 f(self, curr);
3541 }
3542
3543 pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3544 if !self.contains_look(node_id) {
3545 return Some(node_id);
3546 }
3547 match self.get_kind(node_id) {
3548 Kind::Pred | Kind::Begin | Kind::End => return Some(node_id),
3549 Kind::Concat => {
3550 let left = node_id.left(self);
3551 let right = node_id.right(self);
3552 let elim_l = self.try_elim_lookarounds(left)?;
3553 let elim_r = self.try_elim_lookarounds(right)?;
3554 let rw = self.mk_concat(elim_l, elim_r);
3555 Some(rw)
3556 }
3557 Kind::Union => {
3558 let left = node_id.left(self);
3559 let right = node_id.right(self);
3560 let elim_l = self.try_elim_lookarounds(left)?;
3561 let elim_r = self.try_elim_lookarounds(right)?;
3562 let rw = self.mk_union(elim_l, elim_r);
3563 Some(rw)
3564 }
3565
3566 Kind::Star => {
3567 let body = node_id.left(self);
3568 let elim_l = self.try_elim_lookarounds(body)?;
3569 Some(self.mk_star(elim_l))
3570 }
3571 Kind::Compl => {
3572 let left = node_id.left(self);
3573 let elim_l = self.try_elim_lookarounds(left)?;
3574 Some(self.mk_compl(elim_l))
3575 }
3576 Kind::Lookahead => {
3577 let rel = self.get_lookahead_rel(node_id);
3578 if rel != 0 {
3579 return None;
3580 }
3581 let lbody = self.get_lookahead_inner(node_id);
3582 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3583 let elim_l = self.try_elim_lookarounds(lbody)?;
3584 let elim_r = self.try_elim_lookarounds(ltail)?;
3585 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3586 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3587 let rw = self.mk_inter(lbody_ts, ltail_ts);
3588 Some(rw)
3589 }
3590 Kind::Lookbehind => {
3591 let linner = self.get_lookbehind_inner(node_id);
3592 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3593 let elim_l = self.try_elim_lookarounds(linner)?;
3594 let elim_r = self.try_elim_lookarounds(lprev)?;
3595 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3596 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3597 let rw = self.mk_inter(lbody_ts, ltail_ts);
3598 Some(rw)
3599 }
3600 Kind::Inter => {
3601 let left = node_id.left(self);
3602 let right = node_id.right(self);
3603 let elim_l = self.try_elim_lookarounds(left)?;
3604 let elim_r = self.try_elim_lookarounds(right)?;
3605 let rw = self.mk_inter(elim_l, elim_r);
3606 Some(rw)
3607 }
3608 Kind::Counted => None,
3609 }
3610 }
3611
3612 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3614 if self.nullability(node) == Nullability::NEVER {
3615 node
3616 } else {
3617 self.mk_inter(NodeId::TOPPLUS, node)
3618 }
3619 }
3620
3621 fn iter_find_stack(
3622 &self,
3623 stack: &mut Vec<TRegexId>,
3624 mut f: impl FnMut(NodeId) -> bool,
3625 ) -> bool {
3626 loop {
3627 match stack.pop() {
3628 None => return false,
3629 Some(curr) => match self.get_tregex(curr) {
3630 TRegex::Leaf(n) => {
3631 let mut curr = *n;
3632 while curr != NodeId::BOT {
3633 match self.get_kind(curr) {
3634 Kind::Union => {
3635 if f(curr.left(self)) {
3636 return true;
3637 }
3638 curr = curr.right(self);
3639 }
3640 _ => {
3641 if f(*n) {
3642 return true;
3643 }
3644 curr = NodeId::BOT;
3645 }
3646 }
3647 }
3648 }
3649 TRegex::ITE(_, then_id, else_id) => {
3650 if *else_id != TRegexId::BOT {
3651 stack.push(*else_id);
3652 }
3653 stack.push(*then_id);
3654 }
3655 },
3656 }
3657 }
3658 }
3659
3660 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3661 if node == NodeId::BOT {
3662 return Some(true);
3663 }
3664 if self.nullability(node) != Nullability::NEVER {
3665 return Some(false);
3666 }
3667 if let Some(cached) = self.cache_empty.get(&node) {
3668 if cached.is_checked() {
3669 return Some(cached.is_empty());
3670 }
3671 }
3672 let node = if !self.contains_look(node) {
3673 node
3674 } else {
3675 let elim = self.try_elim_lookarounds(node)?;
3676 elim
3677 };
3678 let isempty_flag = self.is_empty_lang_internal(node);
3679
3680 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3681 }
3682
3683 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3684 if !self.get_meta_flags(initial_node).contains_inter() {
3686 return Ok(NodeFlags::ZERO);
3687 }
3688
3689 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3690 let mut todo: VecDeque<NodeId> = VecDeque::new();
3691 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3692 let mut stack = Vec::new();
3693 stack.push(begin_der);
3694 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3695 visited.insert(node, initial_node);
3696 let nullability = self.nullability(node);
3697 if nullability != Nullability::NEVER {
3698 true
3699 } else {
3700 todo.push_back(node);
3701 false
3702 }
3703 });
3704 if found_nullable_right_away {
3705 return Ok(NodeFlags::ZERO);
3706 }
3707
3708 todo.push_back(initial_node);
3709 let isempty_flag: NodeFlags;
3710 let mut found_node = NodeId::BOT;
3711
3712 loop {
3713 match todo.pop_front() {
3714 None => {
3715 isempty_flag = NodeFlags::IS_EMPTY;
3716 break;
3717 }
3718 Some(outer) => {
3719 if let Some(cached) = self.cache_empty.get(&outer) {
3720 if cached.is_checked() {
3721 if cached.is_empty() {
3722 continue;
3723 } else {
3724 return Ok(NodeFlags::ZERO);
3725 }
3726 }
3727 }
3728
3729 let derivative = self.der(outer, Nullability::CENTER)?;
3730
3731 stack.push(derivative);
3732
3733 let found_null = self.iter_find_stack(&mut stack, |node| {
3734 if visited.contains_key(&node) {
3735 false
3736 } else {
3737 found_node = node;
3738 if !self.get_meta_flags(node).contains_inter() {
3739 return true;
3740 } else {
3741 visited.insert(node, outer);
3742 todo.push_front(node);
3743 self.any_nonbegin_nullable(node)
3744 }
3745 }
3746 });
3747 if found_null {
3748 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3749 isempty_flag = NodeFlags::ZERO;
3750 break;
3751 }
3752 }
3753 }
3754 }
3755
3756 self.cache_empty.insert(
3757 initial_node,
3758 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3759 );
3760 Ok(isempty_flag)
3761 }
3762
3763 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3765 if larger_lang == smaller_lang {
3766 return Some(true);
3767 }
3768
3769 if self
3771 .nullability(larger_lang)
3772 .not()
3773 .and(self.nullability(smaller_lang))
3774 != Nullability::NEVER
3775 {
3776 return Some(false);
3777 }
3778
3779 let (smaller_lang, larger_lang) =
3784 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3785 let wrap = |b: &mut Self, n: NodeId| {
3786 let tmp = b.mk_concat(n, NodeId::TS);
3787 b.mk_concat(NodeId::TS, tmp)
3788 };
3789 (wrap(self, smaller_lang), wrap(self, larger_lang))
3790 } else {
3791 (smaller_lang, larger_lang)
3792 };
3793
3794 let nota = self.mk_compl(larger_lang);
3795 let diff = self.mk_inter(smaller_lang, nota);
3796 self.is_empty_lang(diff)
3797 }
3798}