1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::UnicodeClassCache;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 AlgebraError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 & Self::NULL_MASK)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_lookaround(self) -> bool {
134 self.has(MetaFlags::CONTAINS_LOOKAROUND)
135 }
136 pub(crate) fn contains_inter(self) -> bool {
137 self.has(MetaFlags::CONTAINS_INTER)
138 }
139
140 pub(crate) fn all_contains_flags(self) -> MetaFlags {
141 self.and(
142 MetaFlags::CONTAINS_LOOKAROUND
143 .or(MetaFlags::CONTAINS_ANCHORS)
144 .or(MetaFlags::CONTAINS_INTER),
145 )
146 }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152 #[default]
153 Pred,
154 Star,
155 Begin,
156 End,
157 Concat,
158 Union,
159 Compl,
160 Lookbehind,
161 Lookahead,
162 Inter,
163 Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168 flags: MetaFlags,
169 nulls: NullsId,
170}
171
172struct MetadataBuilder {
173 num_created: u32,
174 solver: Solver,
175 nb: NullsBuilder,
176 index: FxHashMap<Metadata, MetadataId>,
177 pub array: Vec<Metadata>,
178}
179
180mod helpers {
181 pub(crate) fn incr_rel(n1: u32) -> u32 {
182 match n1.overflowing_add(1) {
183 (_, true) => u32::MAX,
184 (res, false) => res,
185 }
186 }
187}
188
189impl MetadataBuilder {
190 fn new() -> MetadataBuilder {
191 Self {
192 index: FxHashMap::default(),
193 array: vec![Metadata {
194 flags: MetaFlags::ZERO,
195 nulls: NullsId::EMPTY,
196 }],
197 solver: Solver::new(),
198 num_created: 0,
199 nb: NullsBuilder::new(),
200 }
201 }
202
203 fn init(&mut self, inst: Metadata) -> MetadataId {
204 self.num_created += 1;
205 let new_id = MetadataId(self.num_created);
206 self.index.insert(inst.clone(), new_id);
207 self.array.push(inst);
208 new_id
209 }
210
211 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212 match self.index.get(&inst) {
213 Some(&id) => id,
214 None => self.init(inst),
215 }
216 }
217
218 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219 &self.array[inst.0 as usize]
220 }
221
222 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223 setflags.all_contains_flags()
224 }
225
226 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227 let left = &self.array[body.0 as usize].flags;
228 let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229 let inf = if body_id == NodeId::BOT {
231 MetaFlags::ZERO
232 } else {
233 MetaFlags::INFINITE_LENGTH
234 };
235 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236 }
237
238 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239 let left = &self.array[left_id.0 as usize].flags;
240 let null = left.nullability().not().and(Nullability::ALWAYS);
241 let contains = self.get_contains(*left);
242 MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
243 }
244
245 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
246 let left = &self.array[left_id.0 as usize].flags;
247 let right = &self.array[right_id.0 as usize].flags;
248 let null = left.nullability().and(right.nullability());
249 let contains = self.get_contains(left.or(*right));
250 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
251 MetaFlags::with_nullability(null, contains.or(len))
252 }
253
254 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
255 let left = &self.array[left_id.0 as usize].flags;
256 let right = &self.array[right_id.0 as usize].flags;
257 let null = left.nullability().and(right.nullability());
258 let contains = self
259 .get_contains(left.or(*right))
260 .or(MetaFlags::CONTAINS_INTER);
261 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
262 MetaFlags::with_nullability(null, contains.or(len))
263 }
264
265 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
266 let left = &self.array[left_id.0 as usize].flags;
267 let right = &self.array[right_id.0 as usize].flags;
268 let null = left.nullability().or(right.nullability());
269 let contains = self.get_contains(left.or(*right));
270 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
271 MetaFlags::with_nullability(null, contains.or(len))
272 }
273}
274
275#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
276pub struct NodeKey {
277 pub(crate) kind: Kind,
278 pub(crate) left: NodeId,
279 pub(crate) right: NodeId,
280 pub(crate) extra: u32,
281}
282
283#[derive(Eq, Hash, PartialEq, Clone, Debug)]
284pub enum TRegex<TSet> {
285 Leaf(NodeId),
286 ITE(TSet, TRegexId, TRegexId),
287}
288
289#[derive(Eq, Hash, PartialEq, Clone, Copy)]
290pub(crate) struct NodeFlags(u8);
291impl NodeFlags {
292 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
293 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
294 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
295
296 fn is_checked(self) -> bool {
297 self.0 >= NodeFlags::IS_CHECKED.0
298 }
299 fn is_empty(self) -> bool {
300 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
301 }
302}
303
304#[derive(Eq, Hash, PartialEq, Clone, Copy)]
305pub(crate) struct BuilderFlags(u8);
306impl BuilderFlags {
307 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
308 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
309}
310
311pub struct RegexBuilder {
312 mb: MetadataBuilder,
313 temp_vec: Vec<NodeId>,
314 num_created: u32,
315 index: FxHashMap<NodeKey, NodeId>,
316 array: Vec<NodeKey>,
317 metadata: Vec<MetadataId>,
318 reversed: Vec<NodeId>,
319 cache_empty: FxHashMap<NodeId, NodeFlags>,
320 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
321 tr_array: Vec<TRegex<TSetId>>,
322 tr_der_center: Vec<TRegexId>,
323 tr_der_begin: Vec<TRegexId>,
324 flags: BuilderFlags,
325 pub lookahead_context_max: u32,
327 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
328 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
329}
330
331macro_rules! iter_inter {
332 ($compiler:ident, $start:ident, $expression:expr) => {
333 let mut curr = $start;
334 while $compiler.get_kind(curr) == Kind::Inter {
335 let left = $compiler.get_left(curr);
336 $expression(left);
337 curr = $compiler.get_right(curr);
338 }
339 $expression(curr);
340 };
341}
342
343impl NodeId {
344 fn is_missing(&self) -> bool {
345 *self == NodeId::MISSING
346 }
347 #[inline]
348 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
349 b.get_flags_contains(self)
350 }
351 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
352 if self == tail {
353 true
354 } else if self.is_kind(b, Kind::Concat) {
355 self.right(b).has_concat_tail(b, tail)
356 } else {
357 false
358 }
359 }
360 fn missing_to_eps(&self) -> NodeId {
361 if *self == NodeId::MISSING {
362 NodeId::EPS
363 } else {
364 *self
365 }
366 }
367
368 #[inline]
369 fn kind(self, b: &RegexBuilder) -> Kind {
370 b.get_kind(self)
371 }
372 #[inline]
373 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
374 b.get_kind(self) == k
375 }
376 #[inline]
377 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
378 b.nullability(self) == Nullability::NEVER
379 }
380 #[inline]
381 fn nullability(self, b: &RegexBuilder) -> Nullability {
382 b.nullability(self)
383 }
384
385 #[inline]
386 fn is_center_nullable(self, b: &RegexBuilder) -> bool {
387 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
388 }
389 #[inline]
390 pub fn left(self, b: &RegexBuilder) -> NodeId {
391 b.get_left(self)
392 }
393
394 #[inline]
395 pub fn right(self, b: &RegexBuilder) -> NodeId {
396 b.get_right(self)
397 }
398
399 #[inline]
400 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
401 b.der(self, mask)
402 }
403
404 #[inline]
405 fn extra(self, b: &RegexBuilder) -> u32 {
406 b.get_extra(self)
407 }
408
409 #[inline]
410 fn is_pred(self, b: &RegexBuilder) -> bool {
411 b.get_kind(self) == Kind::Pred
412 }
413
414 #[inline]
415 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
416 debug_assert!(self.is_pred(b));
417 TSetId(b.get_extra(self))
418 }
419 #[inline]
420 fn is_star(self, b: &RegexBuilder) -> bool {
421 if NodeId::EPS == self {
422 return false;
423 }
424 b.get_kind(self) == Kind::Star
425 }
426
427 #[inline]
428 pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
429 b.get_kind(self) == Kind::Inter
430 }
431
432 #[inline]
433 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
434 if self.is_concat(b) {
435 let r = self.right(b);
436 return r.is_star(b) && r.left(b) == self.left(b);
437 }
438 false
439 }
440
441 #[inline]
442 fn is_concat(self, b: &RegexBuilder) -> bool {
443 b.get_kind(self) == Kind::Concat
444 }
445
446 #[inline]
447 fn is_lookahead(self, b: &RegexBuilder) -> bool {
448 b.get_kind(self) == Kind::Lookahead
449 }
450
451 #[inline]
452 fn is_lookbehind(self, b: &RegexBuilder) -> bool {
453 b.get_kind(self) == Kind::Lookbehind
454 }
455
456 #[inline]
457 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
458 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
459 Some(b.get_right(self))
460 } else {
461 None
462 }
463 }
464
465 #[inline]
466 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
467 if b.get_kind(self) == Kind::Concat {
468 let left = self.left(b);
469 let right = self.right(b);
470 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
471 return right == NodeId::END;
472 }
473 }
474 false
475 }
476
477 #[inline]
478 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
479 if NodeId::EPS == self {
480 return None;
481 }
482 if self.is_star(b) && self.left(b).is_pred(b) {
483 Some(self.left(b))
484 } else {
485 None
486 }
487 }
488
489 #[inline]
490 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
491 if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
492 let middle = self.right(b);
493 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
494 return Some(middle.left(b));
495 }
496 }
497 None
498 }
499
500 #[inline]
501 pub(crate) fn iter_union(
502 self,
503 b: &mut RegexBuilder,
504 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
505 ) {
506 let mut curr: NodeId = self;
507 while curr.kind(b) == Kind::Union {
508 f(b, curr.left(b));
509 curr = curr.right(b);
510 }
511 f(b, curr);
512 }
513
514 #[inline]
515 pub(crate) fn iter_union_while(
516 self,
517 b: &mut RegexBuilder,
518 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
519 ) {
520 let mut curr: NodeId = self;
521 let mut continue_loop = true;
522 while continue_loop && curr.kind(b) == Kind::Union {
523 continue_loop = f(b, curr.left(b));
524 curr = curr.right(b);
525 }
526 if continue_loop {
527 f(b, curr);
528 }
529 }
530}
531
532impl Default for RegexBuilder {
533 fn default() -> Self {
534 Self::new()
535 }
536}
537
538impl RegexBuilder {
539 pub fn new() -> RegexBuilder {
540 let mut inst = Self {
541 mb: MetadataBuilder::new(),
542 array: Vec::new(),
543 index: FxHashMap::default(),
544 cache_empty: FxHashMap::default(),
545 tr_array: Vec::new(),
546 tr_cache: FxHashMap::default(),
547 flags: BuilderFlags::ZERO,
548 lookahead_context_max: 800,
549 num_created: 0,
550 metadata: Vec::new(),
551 reversed: Vec::new(),
552 tr_der_center: Vec::new(),
553 tr_der_begin: Vec::new(),
554 temp_vec: Vec::new(),
555 mk_binary_memo: FxHashMap::default(),
556 clean_cache: FxHashMap::default(),
557 };
558 inst.array.push(NodeKey::default());
559 inst.mk_pred(TSetId::EMPTY);
560 inst.mk_star(NodeId::BOT);
561 inst.mk_pred(TSetId::FULL);
562 inst.mk_star(NodeId::TOP);
563 let top_plus_id = inst.mk_plus(NodeId::TOP);
564 inst.mk_unset(Kind::Begin);
565 inst.mk_unset(Kind::End);
566 debug_assert!(top_plus_id == NodeId::TOPPLUS);
567
568 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
569 inst.mk_leaf(NodeId::BOT);
570 inst.mk_leaf(NodeId::EPS);
571 inst.mk_leaf(NodeId::TOP);
572 inst.mk_leaf(NodeId::TS);
573
574 inst.flags = BuilderFlags::SUBSUME;
575 inst
576 }
577
578 #[inline]
579 pub fn solver_ref(&self) -> &Solver {
580 &self.mb.solver
581 }
582
583 #[inline]
584 pub fn solver(&mut self) -> &mut Solver {
585 &mut self.mb.solver
586 }
587
588 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
589 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
590 self.tr_cache.insert(inst.clone(), new_id);
591 self.tr_array.push(inst);
592 new_id
593 }
594
595 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
596 match self.tr_cache.get(&inst) {
597 Some(&id) => id,
598 None => self.tr_init(inst),
599 }
600 }
601
602 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
603 &self.tr_array[inst.0 as usize]
604 }
605
606 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
607 let solv = self.solver();
608 !solv.is_sat_id(cond1, cond2)
609 }
610
611 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
612 let term = TRegex::<TSetId>::Leaf(node_id);
613 self.get_tregex_id(term)
614 }
615
616 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
617 let tmp_inst = TRegex::ITE(cond, _then, _else);
618 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
619 return *cached;
620 }
621 if _then == _else {
622 return _then;
623 }
624 if self.solver().is_full_id(cond) {
625 return _then;
626 }
627 if self.solver().is_empty_id(cond) {
628 return _else;
629 }
630 let _clean_then = match self.tr_array[_then.0 as usize] {
631 TRegex::Leaf(_) => _then,
632 _ => self.clean(cond, _then),
633 };
634 let notcond = self.solver().not_id(cond);
635 let _clean_else = match self.tr_array[_else.0 as usize] {
636 TRegex::Leaf(_) => _else,
637 _ => self.clean(notcond, _else),
638 };
639
640 if _clean_then == _clean_else {
641 return _clean_then;
642 }
643 match self.get_tregex(_clean_then) {
645 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
646 let _cond_then = *leftcond;
647 let new_then = *_inner_then;
648 let sand = self.solver().and_id(cond, _cond_then);
649 return self.mk_ite(sand, new_then, _clean_else);
650 }
651 _ => {}
652 }
653 match self.get_tregex(_clean_else) {
655 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
657 let e2clone = *_e2;
658 let new_cond = self.mb.solver.or_id(cond, *_c2);
659 return self.mk_ite(new_cond, _clean_then, e2clone);
660 }
661 _ => {}
662 }
663
664 if _clean_then == TRegexId::BOT {
665 let flip_cond = self.solver().not_id(cond);
666 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
667 return cleaned_id;
668 }
669
670 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
671 }
672
673 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
674 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
675 return cached;
676 }
677 let result = match *self.get_tregex(tterm) {
678 TRegex::Leaf(_) => tterm,
679 TRegex::ITE(alpha, _then_id, _else_id) => {
680 let notalpha = self.mb.solver.not_id(alpha);
681 if self.mb.solver.unsat_id(beta, alpha) {
682 self.clean(beta, _else_id)
684 } else if self.unsat(beta, notalpha) {
685 self.clean(beta, _then_id)
687 } else {
688 let tc = self.mb.solver.and_id(beta, alpha);
689 let ec = self.mb.solver.and_id(beta, notalpha);
690 let new_then = self.clean(tc, _then_id);
691 let new_else = self.clean(ec, _else_id);
692 self.mk_ite(alpha, new_then, new_else)
693 }
694 }
695 };
696 self.clean_cache.insert((beta, tterm), result);
697 result
698 }
699
700 fn mk_unary(
701 &mut self,
702 term: TRegexId,
703 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
704 ) -> TRegexId {
705 match self.tr_array[term.0 as usize] {
706 TRegex::Leaf(node_id) => {
707 let applied = apply(self, node_id);
708 self.mk_leaf(applied)
709 }
710 TRegex::ITE(c1, _then, _else) => {
711 let _then1 = self.mk_unary(_then, apply);
712 let _else1 = self.mk_unary(_else, apply);
713 self.mk_ite(c1, _then1, _else1)
714 }
715 }
716 }
717
718 fn mk_binary_result(
719 &mut self,
720 left: TRegexId,
721 right: TRegexId,
722 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
723 ) -> Result<TRegexId, AlgebraError> {
724 match self.tr_array[left.0 as usize] {
725 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
726 TRegex::Leaf(right_node_id) => {
727 let applied = apply(self, left_node_id, right_node_id)?;
728 Ok(self.mk_leaf(applied))
729 }
730 TRegex::ITE(c2, _then, _else) => {
731 let then2 = self.mk_binary_result(left, _then, apply)?;
732 let else2 = self.mk_binary_result(left, _else, apply)?;
733 Ok(self.mk_ite(c2, then2, else2))
734 }
735 },
736 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
737 TRegex::Leaf(_) => {
738 let then2 = self.mk_binary_result(_then1, right, apply)?;
739 let else2 = self.mk_binary_result(_else1, right, apply)?;
740 Ok(self.mk_ite(c1, then2, else2))
741 }
742 TRegex::ITE(c2, _then2, _else2) => {
743 if c1 == c2 {
744 let _then = self.mk_binary_result(_then1, _then2, apply)?;
745 let _else = self.mk_binary_result(_else1, _else2, apply)?;
746 return Ok(self.mk_ite(c1, _then, _else));
747 }
748 if c1.0 > c2.0 {
749 let _then = self.mk_binary_result(_then1, right, apply)?;
750 let _else = self.mk_binary_result(_else1, right, apply)?;
751 Ok(self.mk_ite(c1, _then, _else))
752 } else {
753 let _then = self.mk_binary_result(left, _then2, apply)?;
754 let _else = self.mk_binary_result(left, _else2, apply)?;
755 Ok(self.mk_ite(c2, _then, _else))
756 }
757 }
758 },
759 }
760 }
761
762 fn mk_binary(
763 &mut self,
764 left: TRegexId,
765 right: TRegexId,
766 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
767 ) -> TRegexId {
768 self.mk_binary_memo.clear();
769 self.mk_binary_inner(left, right, apply)
770 }
771
772 fn mk_binary_inner(
773 &mut self,
774 left: TRegexId,
775 right: TRegexId,
776 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
777 ) -> TRegexId {
778 if left == right {
779 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
780 }
781 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
782 return cached;
783 }
784 let result = match self.tr_array[left.0 as usize] {
785 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
786 TRegex::Leaf(right_node_id) => {
787 let applied = apply(self, left_node_id, right_node_id);
788 self.mk_leaf(applied)
789 }
790 TRegex::ITE(c2, _then, _else) => {
791 let then2 = self.mk_binary_inner(left, _then, apply);
792 let else2 = self.mk_binary_inner(left, _else, apply);
793 self.mk_ite(c2, then2, else2)
794 }
795 },
796 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
797 TRegex::Leaf(_) => {
798 let then2 = self.mk_binary_inner(_then1, right, apply);
799 let else2 = self.mk_binary_inner(_else1, right, apply);
800 self.mk_ite(c1, then2, else2)
801 }
802 TRegex::ITE(c2, _then2, _else2) => {
803 if c1 == c2 {
804 let _then = self.mk_binary_inner(_then1, _then2, apply);
805 let _else = self.mk_binary_inner(_else1, _else2, apply);
806 self.mk_ite(c1, _then, _else)
807 } else if c1.0 > c2.0 {
808 let _then = self.mk_binary_inner(_then1, right, apply);
809 let _else = self.mk_binary_inner(_else1, right, apply);
810 self.mk_ite(c1, _then, _else)
811 } else {
812 let _then = self.mk_binary_inner(left, _then2, apply);
813 let _else = self.mk_binary_inner(left, _else2, apply);
814 self.mk_ite(c2, _then, _else)
815 }
816 }
817 },
818 };
819 self.mk_binary_memo.insert((left, right), result);
820 result
821 }
822
823 pub fn get_nulls(
824 &mut self,
825 pending_rel: u32,
826 mask: Nullability,
827 acc: &mut BTreeSet<NullState>,
828 node_id: NodeId,
829 ) {
830 debug_assert!(node_id != NodeId::MISSING);
831 if !self.is_nullable(node_id, mask) {
832 return;
833 }
834 match self.get_kind(node_id) {
835 Kind::Pred => {}
836 Kind::End => {
837 if mask.has(Nullability::END) {
838 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
839 }
840 }
841 Kind::Begin => {
842 if mask.has(Nullability::BEGIN) {
843 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
844 }
845 }
846 Kind::Concat => {
847 let new_mask = self.nullability(node_id).and(mask);
848 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
849 if self.is_nullable(node_id.left(self), mask) {
850 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
851 }
852 }
853 Kind::Union => {
854 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
855 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
856 }
857 Kind::Inter => {
858 let new_mask = self.nullability(node_id).and(mask);
859 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
860 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
861 }
862 Kind::Star => {
863 acc.insert(NullState::new(mask, pending_rel));
864 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
865 }
866 Kind::Compl => {
867 if !self.is_nullable(node_id.left(self), mask) {
868 acc.insert(NullState::new(mask, 0));
869 }
870 }
871 Kind::Lookbehind => {
872 let new_mask = self.nullability(node_id).and(mask);
873 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
874 if node_id.right(self) != NodeId::MISSING {
875 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
876 }
877 }
878 Kind::Lookahead => {
879 let la_inner = self.get_lookahead_inner(node_id);
880 if self.is_nullable(la_inner, mask) {
881 let rel = self.get_lookahead_rel(node_id);
882 if rel != u32::MAX {
883 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
884 }
885 let la_tail = self.get_lookahead_tail(node_id);
887 if la_tail != NodeId::MISSING {
888 self.get_nulls(pending_rel, mask, acc, la_tail);
889 }
890 }
891 }
892 Kind::Counted => {
893 let packed = self.get_extra(node_id);
894 let best = packed >> 16;
895 if best > 0 {
896 acc.insert(NullState::new(mask, pending_rel + best));
897 }
898 }
899 }
900 }
901
902 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
903 self.get_meta_flags(node_id)
904 .has(MetaFlags::CONTAINS_LOOKAROUND)
905 }
906
907 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
909 self.get_meta_flags(node_id)
910 .has(MetaFlags::CONTAINS_ANCHORS)
911 }
912
913 pub fn is_infinite(&self, node_id: NodeId) -> bool {
914 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
915 }
916
917 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
919 if self.is_infinite(node_id) {
920 if node_id.is_inter(self) {
921 self.get_bounded_length(node_id)
922 } else {
923 (self.get_min_length_only(node_id), u32::MAX)
924 }
925 } else {
926 self.get_bounded_length(node_id)
927 }
928 }
929
930 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
931 if node_id == NodeId::EPS {
932 return (0, 0);
933 }
934 match self.get_kind(node_id) {
935 Kind::End | Kind::Begin => (0, 0),
936 Kind::Pred => (1, 1),
937 Kind::Concat => {
938 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
939 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
940 (lmin + rmin, lmax.saturating_add(rmax))
941 }
942 Kind::Union => {
943 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
944 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
945 (lmin.min(rmin), lmax.max(rmax))
946 }
947 Kind::Inter => {
948 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
949 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
950 (lmin.max(rmin), lmax.min(rmax))
951 }
952 Kind::Lookahead => {
953 let body = node_id.left(self);
954 if self.is_infinite(body) {
955 return (0, u32::MAX);
956 }
957 let right = node_id.right(self);
958 if right.is_missing() {
959 (0, 0)
960 } else {
961 self.get_min_max_length(right)
962 }
963 }
964 Kind::Counted => (0, 0),
965 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
966 }
967 }
968
969 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
970 match self.get_kind(node_id) {
971 Kind::End | Kind::Begin => Some(0),
972 Kind::Pred => Some(1),
973 Kind::Concat => {
974 let l = self.get_fixed_length(node_id.left(self))?;
975 let r = self.get_fixed_length(node_id.right(self))?;
976 Some(l + r)
977 }
978 Kind::Union => {
979 let l = self.get_fixed_length(node_id.left(self))?;
980 let r = self.get_fixed_length(node_id.right(self))?;
981 if l == r {
982 Some(l)
983 } else {
984 None
985 }
986 }
987 Kind::Inter => {
988 let l = self.get_fixed_length(node_id.left(self))?;
989 let r = self.get_fixed_length(node_id.right(self))?;
990 if l == r {
991 Some(l)
992 } else {
993 None
994 }
995 }
996 Kind::Lookahead => {
997 let right = node_id.right(self);
998 if right.is_missing() {
999 Some(0)
1000 } else {
1001 self.get_fixed_length(right)
1002 }
1003 }
1004 Kind::Counted => Some(0),
1005 Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1006 }
1007 }
1008
1009 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1010 match self.get_kind(node_id) {
1011 Kind::End | Kind::Begin => 0,
1012 Kind::Pred => 1,
1013 Kind::Concat => {
1014 self.get_min_length_only(node_id.left(self))
1015 + self.get_min_length_only(node_id.right(self))
1016 }
1017 Kind::Union => self
1018 .get_min_length_only(node_id.left(self))
1019 .min(self.get_min_length_only(node_id.right(self))),
1020 Kind::Inter => self
1021 .get_min_length_only(node_id.left(self))
1022 .max(self.get_min_length_only(node_id.right(self))),
1023 Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1024 Kind::Compl => {
1025 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1026 0
1027 } else {
1028 1
1029 }
1030 }
1031 }
1032 }
1033
1034 fn starts_with_ts(&self, node_id: NodeId) -> bool {
1035 if node_id == NodeId::TS {
1036 return true;
1037 }
1038 match self.get_kind(node_id) {
1039 Kind::Inter => {
1040 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1041 }
1042 Kind::Union => {
1043 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1044 }
1045 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1046 _ => false,
1047 }
1048 }
1049
1050 #[inline]
1051 pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1052 if node_id.is_concat(self) {
1053 self.ends_with_ts(node_id.right(self))
1054 } else {
1055 node_id == NodeId::TS
1056 }
1057 }
1058
1059 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1060 debug_assert!(node_id != NodeId::MISSING);
1061 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1062 }
1063
1064 pub(crate) fn cache_der(
1065 &mut self,
1066 node_id: NodeId,
1067 result: TRegexId,
1068 mask: Nullability,
1069 ) -> TRegexId {
1070 if mask == Nullability::CENTER {
1071 self.tr_der_center[node_id.0 as usize] = result
1072 } else {
1073 self.tr_der_begin[node_id.0 as usize] = result
1074 };
1075 result
1076 }
1077
1078 pub(crate) fn try_cached_der(
1079 &mut self,
1080 node_id: NodeId,
1081 mask: Nullability,
1082 ) -> Option<TRegexId> {
1083 let cache = if mask == Nullability::CENTER {
1084 &mut self.tr_der_center
1085 } else {
1086 &mut self.tr_der_begin
1087 };
1088 match cache.get(node_id.0 as usize) {
1089 Some(&TRegexId::MISSING) => {}
1090 Some(&result) => {
1091 return Some(result);
1092 }
1093 None => {
1094 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1095 }
1096 }
1097 None
1098 }
1099
1100 pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1101 let mut term = self.get_tregex(der);
1102 loop {
1103 match *term {
1104 TRegex::Leaf(node_id) => return node_id,
1105 TRegex::ITE(cond, _then, _else) => {
1106 if self.solver().is_sat_id(set, cond) {
1107 term = self.get_tregex(_then);
1108 } else {
1109 term = self.get_tregex(_else);
1110 }
1111 }
1112 }
1113 }
1114 }
1115
1116 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1117 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1118 debug_assert!(
1119 node_id != NodeId::MISSING,
1120 "attempting to derive missing node"
1121 );
1122 if let Some(result) = self.try_cached_der(node_id, mask) {
1123 return Ok(result);
1124 }
1125
1126 let result = match node_id.kind(self) {
1127 Kind::Compl => {
1128 let leftd = node_id.left(self).der(self, mask)?;
1129 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1130 }
1131 Kind::Inter => {
1132 let leftd = node_id.left(self).der(self, mask)?;
1133 let rightd = node_id.right(self).der(self, mask)?;
1134 {
1135 let this = &mut *self;
1136 this.mk_binary(
1137 leftd,
1138 rightd,
1139 &mut (|b, left, right| b.mk_inter(left, right)),
1140 )
1141 }
1142 }
1143 Kind::Union => {
1144 let leftd = self.der(node_id.left(self), mask)?;
1145 let rightd = self.der(node_id.right(self), mask)?;
1146 {
1147 let this = &mut *self;
1148 this.mk_binary(
1149 leftd,
1150 rightd,
1151 &mut (|b, left, right| b.mk_union(left, right)),
1152 )
1153 }
1154 }
1155 Kind::Concat => {
1156 let head = node_id.left(self);
1157 let tail = node_id.right(self);
1158 let tail_leaf = self.mk_leaf(tail);
1159 let head_der = self.der(head, mask)?;
1160
1161 if self.is_nullable(head, mask) {
1162 let rightd = self.der(tail, mask)?;
1163 let ldr = {
1164 let this = &mut *self;
1165 this.mk_binary(
1166 head_der,
1167 tail_leaf,
1168 &mut (|b, left, right| b.mk_concat(left, right)),
1169 )
1170 };
1171 {
1172 let this = &mut *self;
1173 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1174 }
1175 } else {
1176 let this = &mut *self;
1177 this.mk_binary(
1178 head_der,
1179 tail_leaf,
1180 &mut (|b, left, right| b.mk_concat(left, right)),
1181 )
1182 }
1183 }
1184 Kind::Star => {
1185 if node_id == NodeId::EPS {
1186 TRegexId::BOT
1187 } else {
1188 let left = node_id.left(self);
1189 let r_decr_leaf = self.mk_leaf(node_id);
1190 let r_der = self.der(left, mask)?;
1191 let this = &mut *self;
1192 this.mk_binary(
1193 r_der,
1194 r_decr_leaf,
1195 &mut (|b, left, right| b.mk_concat(left, right)),
1196 )
1197 }
1198 }
1199 Kind::Lookbehind => {
1200 let lb_prev_der = {
1201 let lb_prev = self.get_lookbehind_prev(node_id);
1202 if lb_prev == NodeId::MISSING {
1203 TRegexId::MISSING
1204 } else {
1205 self.der(lb_prev, mask)?
1206 }
1207 };
1208 let lb_inner = self.get_lookbehind_inner(node_id);
1209 let lb_inner_der = self.der(lb_inner, mask)?;
1210 {
1211 let this = &mut *self;
1212 this.mk_binary_result(
1213 lb_inner_der,
1214 lb_prev_der,
1215 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1216 )?
1217 }
1218 }
1219 Kind::Lookahead => {
1220 let la_tail = self.get_lookahead_tail(node_id);
1221 let la_body = node_id.left(self);
1222 let rel = self.get_lookahead_rel(node_id);
1223
1224 if self.is_nullable(la_body, mask) {
1225 let right = node_id.right(self).missing_to_eps();
1227 let rder = self.der(right, mask).clone();
1228 return rder;
1229 }
1230
1231 if rel == u32::MAX {
1232 let la_body_der = self.der(la_body, mask)?;
1233 if la_tail.is_kind(self, Kind::Pred) {
1234 let transitioned =
1235 self.transition_term(la_body_der, la_tail.pred_tset(self));
1236 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1237 let concated = self.mk_concat(la_tail, new_la);
1238 return self.der(concated, mask);
1239 }
1240 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1241 let left = la_tail.left(self);
1242 let tset = left.pred_tset(self);
1243 let transitioned = self.transition_term(la_body_der, tset);
1244 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1245 let tail_right = la_tail.right(self);
1246 let concated = self.mk_concat(new_la, tail_right);
1247 let concated = self.mk_concat(left, concated);
1248 return self.der(concated, mask);
1249 }
1250 }
1251
1252 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1253 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1254 let concated = self.mk_concat(la_body, nulls_mask);
1255 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1256 let non_nulled = self.mk_non_nullable_safe(la_tail);
1257 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1258 let new_union = self.mk_union(concated_look, new_look);
1259 return self.der(new_union, mask);
1260 }
1261
1262 let la_tail_der = if la_tail == NodeId::MISSING {
1263 TRegexId::MISSING
1264 } else {
1265 if self.is_nullable(la_tail, mask) {
1266 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1267 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1268 let la_union = self.mk_union(la_tail, nulls_la);
1269 self.der(la_union, mask)?
1270 } else {
1271 self.der(la_tail, mask)?
1272 }
1273 };
1274
1275 let la_body_der = self.der(la_body, mask)?;
1276
1277 if rel != u32::MAX && rel > self.lookahead_context_max {
1278 return Err(AlgebraError::AnchorLimit);
1279 }
1280
1281 let la = {
1282 let this = &mut *self;
1283 let rel = helpers::incr_rel(rel);
1284 this.mk_binary(
1285 la_body_der,
1286 la_tail_der,
1287 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1288 )
1289 };
1290
1291 if rel != u32::MAX
1292 && la_tail_der != TRegexId::MISSING
1293 && self.is_nullable(la_tail, mask)
1294 {
1295 let look_only = {
1296 let this = &mut *self;
1297 let right = TRegexId::MISSING;
1298 let rel = helpers::incr_rel(rel);
1299 this.mk_binary(
1300 la_body_der,
1301 right,
1302 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1303 )
1304 };
1305 {
1306 let this = &mut *self;
1307 this.mk_binary(
1308 look_only,
1309 la,
1310 &mut (|b, left, right| b.mk_union(left, right)),
1311 )
1312 }
1313 } else {
1314 la
1315 }
1316 }
1317 Kind::Counted => {
1318 let body = node_id.left(self);
1319 let chain = node_id.right(self);
1320 let packed = self.get_extra(node_id);
1321 let step = (packed & 0xFFFF) as u16;
1322 let best = (packed >> 16) as u16;
1323
1324 let mid_best = if self.is_nullable(body, mask) && step >= best {
1325 step
1326 } else {
1327 best
1328 };
1329
1330 let body_der = self.der(body, mask)?;
1331 let new_step = step.saturating_add(1);
1332 self.mk_unary(body_der, &mut |b, new_body| {
1333 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1334 new_step
1335 } else {
1336 mid_best
1337 };
1338 let packed = (final_best as u32) << 16 | new_step as u32;
1339 b.mk_counted(new_body, chain, packed)
1340 })
1341 }
1342 Kind::Begin | Kind::End => TRegexId::BOT,
1343 Kind::Pred => {
1344 let psi = node_id.pred_tset(self);
1345 if psi == TSetId::EMPTY {
1346 TRegexId::BOT
1347 } else {
1348 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1349 }
1350 }
1351 };
1352
1353 self.cache_der(node_id, result, mask);
1357 Ok(result)
1358 }
1359
1360 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1361 debug_assert!(meta_id != MetadataId::MISSING);
1362 match self.metadata.get_mut(node_id.0 as usize) {
1363 Some(v) => *v = meta_id,
1364 None => {
1365 self.metadata
1366 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1367 self.metadata[node_id.0 as usize] = meta_id;
1368 }
1369 }
1370 }
1371
1372 fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1373 debug_assert!(reversed_id != NodeId::MISSING);
1374 match self.reversed.get_mut(node_id.0 as usize) {
1375 Some(v) => *v = reversed_id,
1376 None => {
1377 self.reversed
1378 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1379 self.reversed[node_id.0 as usize] = reversed_id;
1380 }
1381 }
1382 return reversed_id;
1383 }
1384
1385 fn init(&mut self, inst: NodeKey) -> NodeId {
1386 self.num_created += 1;
1387 let node_id = NodeId(self.num_created);
1388 self.index.insert(inst.clone(), node_id);
1389 match inst.kind {
1390 Kind::Pred => {
1391 let meta_id = self.mb.get_meta_id(Metadata {
1392 flags: MetaFlags::ZERO,
1393 nulls: NullsId::EMPTY,
1394 });
1395 self.init_metadata(node_id, meta_id);
1396 }
1397 Kind::Begin => {
1398 let meta_id = self.mb.get_meta_id(Metadata {
1399 flags: MetaFlags::with_nullability(
1400 Nullability::BEGIN,
1401 MetaFlags::CONTAINS_ANCHORS,
1402 ),
1403 nulls: NullsId::BEGIN0,
1404 });
1405 self.init_metadata(node_id, meta_id);
1406 }
1407 Kind::End => {
1408 let meta_id = self.mb.get_meta_id(Metadata {
1409 flags: MetaFlags::with_nullability(
1410 Nullability::END,
1411 MetaFlags::CONTAINS_ANCHORS,
1412 ),
1413 nulls: NullsId::END0,
1414 });
1415 self.init_metadata(node_id, meta_id);
1416 }
1417 Kind::Inter => {
1418 let m1 = self.get_node_meta_id(inst.left);
1419 let m2 = self.get_node_meta_id(inst.right);
1420 let meta_id = {
1421 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1422 let mask_l = inst.left.nullability(self);
1423 let mask_r = inst.right.nullability(self);
1424 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1425 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1426 nulls = self.mb.nb.and_mask(nulls, mask_l);
1427 nulls = self.mb.nb.and_mask(nulls, mask_r);
1428 let new_meta = Metadata {
1429 flags: self.mb.flags_inter(m1, m2),
1430 nulls,
1431 };
1432 self.mb.get_meta_id(new_meta)
1433 };
1434 self.init_metadata(node_id, meta_id);
1435 }
1436 Kind::Union => {
1437 let m1 = self.get_node_meta_id(inst.left);
1438 let m2 = self.get_node_meta_id(inst.right);
1439 let meta_id = {
1440 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1441 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1442 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1443 let new_meta = Metadata {
1444 flags: self.mb.flags_union(m1, m2),
1445 nulls,
1446 };
1447 self.mb.get_meta_id(new_meta)
1448 };
1449 self.init_metadata(node_id, meta_id);
1450 }
1451 Kind::Concat => {
1452 let flags = self.mb.flags_concat(
1453 self.get_node_meta_id(inst.left),
1454 self.get_node_meta_id(inst.right),
1455 );
1456
1457 let right_nullability = inst.right.nullability(self);
1458 let left_nullability = inst.left.nullability(self);
1459 let nulls_left = self.get_nulls_id(inst.left);
1460 let nulls_right = self.get_nulls_id(inst.right);
1461 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1462 let mask = right_nullability.and(left_nullability);
1463 nulls = self.mb.nb.and_mask(nulls, mask);
1464
1465 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1466 self.init_metadata(node_id, new_id);
1467 }
1468 Kind::Star => {
1469 let left_nulls = self.get_nulls_id(inst.left);
1470 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1471 let meta_id = self.mb.get_meta_id(Metadata {
1472 flags: self
1473 .mb
1474 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1475 nulls,
1476 });
1477 self.init_metadata(node_id, meta_id);
1478 }
1479 Kind::Compl => {
1480 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1481 let meta_id = self.mb.get_meta_id(Metadata {
1482 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1483 nulls,
1484 });
1485 self.init_metadata(node_id, meta_id);
1486 }
1487 Kind::Lookbehind => {
1488 let mut left_nullability = self.nullability(inst.left);
1489 let mut contains_flags = self.get_flags_contains(inst.left);
1490 let nulls = if inst.right.is_missing() {
1491 self.get_nulls_id(inst.left)
1492 } else {
1493 let right_nullability = self.nullability(inst.right);
1494 let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1495 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1496 left_nullability = left_nullability.and(right_nullability);
1497 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1498 self.mb.nb.and_id(nulls_left, nulls_right)
1499 };
1500
1501 let meta_id = self.mb.get_meta_id(Metadata {
1502 flags: MetaFlags::with_nullability(
1503 left_nullability,
1504 contains_flags
1505 .or(MetaFlags::CONTAINS_LOOKAROUND)
1506 .or(MetaFlags::CONTAINS_LOOKBEHIND),
1507 ),
1508 nulls,
1509 });
1510 self.init_metadata(node_id, meta_id);
1511 }
1512 Kind::Lookahead => {
1513 let mut nulls = self.get_nulls_id(inst.left);
1514 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1516 let left_nullability = inst.left.nullability(self);
1517 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1518
1519 nulls = self.mb.nb.or_id(nulls, nulls_right);
1520
1521 let la_inner = inst.left;
1522 let la_tail = inst.right;
1523 let null = self
1524 .get_meta_flags(la_inner)
1525 .nullability()
1526 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1527 let contains_flags = self
1528 .get_flags_contains(la_inner)
1529 .or(self.get_flags_contains(la_tail));
1530
1531 let meta_id = self.mb.get_meta_id(Metadata {
1532 flags: MetaFlags::with_nullability(
1533 null,
1534 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1535 ),
1536 nulls,
1537 });
1538 self.init_metadata(node_id, meta_id);
1539 }
1540 Kind::Counted => {
1541 let best = inst.extra >> 16;
1542 let (null, nulls) = if best > 0 {
1543 let mut ns = BTreeSet::new();
1544 ns.insert(NullState::new(Nullability::CENTER, best));
1545 (Nullability::CENTER, self.mb.nb.get_id(ns))
1546 } else {
1547 (Nullability::NEVER, NullsId::EMPTY)
1548 };
1549 let meta_id = self.mb.get_meta_id(Metadata {
1550 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1551 nulls,
1552 });
1553 self.init_metadata(node_id, meta_id);
1554 }
1555 }
1556
1557 self.array.push(inst);
1558
1559 if let Some(rw) = self.post_init_simplify(node_id) {
1560 self.override_as(node_id, rw)
1561 } else {
1562 node_id
1563 }
1564 }
1565
1566 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1567 match self.get_kind(node_id) {
1568 Kind::Concat => {
1569 let lhs = node_id.left(self);
1570 let rhs = node_id.right(self);
1571 if lhs.is_pred_star(self).is_some() {
1572 if let Some(opttail) = rhs.is_opt_v(self) {
1573 if let Some(true) = self.subsumes(lhs, opttail) {
1574 return Some(lhs);
1575 }
1576 }
1577 }
1578 }
1579 Kind::Union => {
1580 let lhs = node_id.left(self);
1581 let rhs = node_id.right(self);
1582 let mut subsumed = false;
1583 rhs.iter_union_while(self, &mut |b, branch| {
1584 if b.nullable_subsumes(branch, lhs) {
1585 subsumed = true;
1586 }
1587 !subsumed
1588 });
1589 if subsumed {
1590 return Some(rhs);
1591 }
1592 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1593 return Some(rhs);
1594 }
1595 }
1596 _ => {}
1597 }
1598
1599 None
1600 }
1601
1602 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1604 if self.get_kind(lhs) != Kind::Union {
1605 return false; }
1607 let mut rhs_branches = Vec::new();
1608 let mut curr = rhs;
1609 while self.get_kind(curr) == Kind::Union {
1610 rhs_branches.push(self.get_left(curr));
1611 curr = self.get_right(curr);
1612 }
1613 rhs_branches.push(curr);
1614
1615 curr = lhs;
1616 while self.get_kind(curr) == Kind::Union {
1617 if !rhs_branches.contains(&self.get_left(curr)) {
1618 return false;
1619 }
1620 curr = self.get_right(curr);
1621 }
1622 rhs_branches.contains(&curr)
1623 }
1624
1625 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1627 if node == target {
1628 return true;
1629 }
1630 match self.get_kind(node) {
1631 Kind::Union => {
1632 self.nullable_subsumes(self.get_left(node), target)
1633 || self.nullable_subsumes(self.get_right(node), target)
1634 }
1635 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1636 self.nullable_subsumes(self.get_right(node), target)
1637 }
1638 _ => false,
1639 }
1640 }
1641
1642 pub fn num_nodes(&self) -> u32 {
1643 self.num_created
1644 }
1645 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1646 match self.index.get(&inst) {
1647 Some(&id) => id,
1648 None => self.init(inst),
1649 }
1650 }
1651 #[inline]
1652 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1653 self.index.get(inst)
1654 }
1655
1656 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1657 self.index.insert(key, subsumed);
1658 subsumed
1659 }
1660
1661 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1662 let key = &self.array[key.0 as usize];
1663 let entry = self.index.get_mut(key).unwrap();
1664 *entry = subsumed;
1665 subsumed
1666 }
1667
1668 #[inline]
1669 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1670 self.array[node_id.0 as usize].left
1671 }
1672
1673 #[inline]
1674 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1675 self.array[node_id.0 as usize].right
1676 }
1677
1678 #[inline]
1679 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1680 self.array[node_id.0 as usize].extra
1681 }
1682
1683 #[inline]
1684 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1685 debug_assert!(node_id.is_concat(self));
1686 let mut curr = node_id;
1687 while curr.is_concat(self) {
1688 curr = curr.right(self);
1689 }
1690 curr
1691 }
1692
1693 fn has_trailing_la(&self, node: NodeId) -> bool {
1694 let end = match self.get_kind(node) {
1695 Kind::Concat => self.get_concat_end(node),
1696 Kind::Lookahead => node,
1697 _ => return false,
1698 };
1699 end.is_lookahead(self) && end.right(self).is_missing()
1700 }
1701
1702 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1703 if node.is_lookahead(self) {
1704 return (NodeId::EPS, node);
1705 }
1706 debug_assert!(node.is_concat(self));
1707 let right = node.right(self);
1708 if !right.is_concat(self) {
1709 return (node.left(self), right);
1710 }
1711 let (stripped, la) = self.strip_trailing_la(right);
1712 (self.mk_concat(node.left(self), stripped), la)
1713 }
1714 #[inline]
1715 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1716 debug_assert!(matches!(
1717 self.get_kind(lookahead_node_id),
1718 Kind::Lookahead | Kind::Counted
1719 ));
1720 lookahead_node_id.left(self)
1721 }
1722 #[inline]
1723 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1724 debug_assert!(lookahead_node_id.is_lookahead(self));
1725 lookahead_node_id.right(self)
1726 }
1727 #[inline]
1728 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1729 debug_assert!(
1730 matches!(
1731 self.get_kind(lookahead_node_id),
1732 Kind::Lookahead | Kind::Counted
1733 ),
1734 "not lookahead/counted: {:?}",
1735 self.pp(lookahead_node_id)
1736 );
1737 self.get_extra(lookahead_node_id)
1738 }
1739 #[inline]
1740 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1741 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1742 lookbehind_node_id.left(self)
1743 }
1744 #[inline]
1745 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1746 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1747 lookbehind_node_id.right(self)
1748 }
1749
1750 #[inline]
1751 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1752 debug_assert!(
1753 self.array.len() > node_id.0 as usize,
1754 "array len: {:?}",
1755 node_id
1756 );
1757 self.array[node_id.0 as usize].kind
1758 }
1759
1760 #[inline]
1761 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1762 &self.array[node_id.0 as usize]
1763 }
1764
1765 #[inline]
1766 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1767 self.metadata[node_id.0 as usize]
1768 }
1769
1770 #[inline]
1771 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1772 debug_assert!(node_id.0 != u32::MAX);
1773 let meta_id = self.get_node_meta_id(node_id);
1774 debug_assert!(meta_id != MetadataId::MISSING);
1775 &self.mb.array[meta_id.0 as usize]
1776 }
1777
1778 #[inline]
1779 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1780 if node_id == NodeId::MISSING {
1781 NullsId::EMPTY
1782 } else {
1783 self.get_meta(node_id).nulls
1784 }
1785 }
1786
1787 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1788 self.mb
1789 .nb
1790 .array
1791 .iter()
1792 .map(|set| set.iter().cloned().collect())
1793 .collect()
1794 }
1795
1796 pub fn nulls_count(&self) -> usize {
1797 self.mb.nb.array.len()
1798 }
1799
1800 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1801 self.mb.nb.array[id as usize].iter().cloned().collect()
1802 }
1803
1804 #[inline]
1805 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1806 if node_id == NodeId::MISSING {
1807 NullsId::EMPTY
1808 } else {
1809 let nulls = self.get_meta(node_id).nulls;
1810 self.mb.nb.and_mask(nulls, mask)
1811 }
1812 }
1813
1814 #[inline]
1815 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1816 let meta_id = self.get_node_meta_id(node_id);
1817 let meta = &self.mb.array[meta_id.0 as usize];
1818 meta.flags
1819 }
1820
1821 #[inline]
1822 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1823 self.get_meta(node_id).flags.nullability()
1824 }
1825
1826 #[inline]
1827 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1828 let meta_id = self.get_node_meta_id(node_id);
1829 let meta = &self.mb.array[meta_id.0 as usize];
1830 meta.flags.all_contains_flags()
1831 }
1832
1833 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1834 if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
1835 return self.strip_lb(node_id.right(self));
1836 }
1837 self.strip_lb_inner(node_id)
1838 }
1839
1840 fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1841 if !self.contains_look(node_id) {
1842 return Ok(node_id);
1843 }
1844 if node_id.is_concat(self) && node_id.left(self).is_lookbehind(self) {
1845 let lb = node_id.left(self);
1846 let prev = self.get_lookbehind_prev(lb);
1847 let tail = self.strip_lb_inner(node_id.right(self))?;
1848 if prev != NodeId::MISSING {
1849 let stripped_prev = self.strip_lb_inner(prev)?;
1850 return Ok(self.mk_concat(stripped_prev, tail));
1851 }
1852 return Ok(tail);
1853 }
1854 if node_id.is_inter(self) {
1855 let left = self.strip_lb_inner(node_id.left(self))?;
1856 let right = self.strip_lb_inner(node_id.right(self))?;
1857 return Ok(self.mk_inter(left, right));
1858 }
1859 if self.get_kind(node_id) == Kind::Union {
1860 let left = self.strip_lb_inner(node_id.left(self))?;
1861 let right = self.strip_lb_inner(node_id.right(self))?;
1862 return Ok(self.mk_union(left, right));
1863 }
1864 match self.get_kind(node_id) {
1865 Kind::Lookbehind => {
1866 let prev = self.get_lookbehind_prev(node_id);
1867 if prev != NodeId::MISSING {
1868 self.strip_lb_inner(prev)
1869 } else {
1870 Ok(NodeId::EPS)
1871 }
1872 }
1873 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1874 Err(AlgebraError::UnsupportedPattern)
1875 }
1876 _ => Ok(node_id),
1877 }
1878 }
1879
1880 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1882 if !self.contains_anchors(node_id) {
1883 return node_id;
1884 }
1885 match self.get_kind(node_id) {
1886 Kind::Begin => NodeId::BOT,
1887 Kind::Concat => {
1888 let left = self.nonbegins(node_id.left(self));
1889 if left == NodeId::BOT {
1890 return NodeId::BOT;
1891 }
1892 self.mk_concat(left, node_id.right(self))
1893 }
1894 Kind::Union => {
1895 let left = self.nonbegins(node_id.left(self));
1896 let right = self.nonbegins(node_id.right(self));
1897 self.mk_union(left, right)
1898 }
1899 _ => node_id,
1900 }
1901 }
1902
1903 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1904 match self.get_kind(node_id) {
1905 Kind::Concat => {
1906 let head = node_id.left(self);
1907 match self.get_kind(head) {
1908 _ if self.any_nonbegin_nullable(head) => {
1909 self.strip_prefix_safe(node_id.right(self))
1910 }
1911 _ => node_id,
1912 }
1913 }
1914 _ => node_id,
1915 }
1916 }
1917 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1918 match self.get_kind(node_id) {
1919 Kind::Begin => NodeId::BOT,
1920 Kind::Concat => {
1921 let head = self.prune_begin(node_id.left(self));
1922 let tail = self.prune_begin(node_id.right(self));
1923 self.mk_concat(head, tail)
1924 }
1925 Kind::Lookbehind => {
1926 if !node_id.right(self).is_missing() {
1927 return node_id;
1928 }
1929 let head = self.prune_begin(node_id.left(self));
1930 head
1931 }
1932 Kind::Union => {
1933 let left = self.prune_begin(node_id.left(self));
1934 let right = self.prune_begin(node_id.right(self));
1935 self.mk_union(left, right)
1936 }
1937 _ => node_id,
1938 }
1939 }
1940
1941 pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1942 if !self.contains_look(node_id) {
1943 return Ok(node_id);
1944 }
1945 let result = match self.get_kind(node_id) {
1946 Kind::Concat => {
1947 let left = self.normalize_rev(node_id.left(self))?;
1948 let right = self.normalize_rev(node_id.right(self))?;
1949 self.mk_concat(left, right)
1950 }
1951 Kind::Inter => {
1952 let left = self.normalize_rev(node_id.left(self))?;
1953 let right = self.normalize_rev(node_id.right(self))?;
1954 self.mk_inter(left, right)
1955 }
1956 Kind::Union => {
1957 let left = self.normalize_rev(node_id.left(self))?;
1958 let right = self.normalize_rev(node_id.right(self))?;
1959 self.mk_union(left, right)
1960 }
1961 Kind::Lookbehind => {
1962 let left = self.normalize_rev(node_id.left(self))?;
1963 let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
1964 let lbody_ts = self.mk_concat(NodeId::TS, left);
1965 let ltail_ts = self.mk_concat(NodeId::TS, right);
1966 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
1967 as_inter
1968 }
1969 Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
1970 return Err(AlgebraError::UnsupportedPattern)
1971 }
1972 _ => node_id,
1973 };
1974 Ok(result)
1975 }
1976
1977 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1978 debug_assert!(node_id != NodeId::MISSING);
1979 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1980 if *rev != NodeId::MISSING {
1981 return Ok(*rev);
1982 }
1983 }
1984 let rw = match self.get_kind(node_id) {
1985 Kind::End => NodeId::BEGIN,
1986 Kind::Begin => NodeId::END,
1987 Kind::Pred => node_id,
1988 Kind::Concat => {
1989 let left = self.reverse(node_id.left(self))?;
1990 let right = self.reverse(node_id.right(self))?;
1991 self.mk_concat(right, left)
1992 }
1993 Kind::Union => {
1994 let left = self.reverse(node_id.left(self))?;
1995 let right = self.reverse(node_id.right(self))?;
1996 self.mk_union(left, right)
1997 }
1998 Kind::Inter => {
1999 let left = self.reverse(node_id.left(self))?;
2000 let right = self.reverse(node_id.right(self))?;
2001 self.mk_inter(left, right)
2002 }
2003 Kind::Star => {
2004 let body = self.reverse(node_id.left(self))?;
2005 self.mk_star(body)
2006 }
2007 Kind::Compl => {
2008 if self.contains_look(node_id.left(self)) {
2009 return Err(AlgebraError::UnsupportedPattern);
2010 }
2011 let body = self.reverse(node_id.left(self))?;
2012 self.mk_compl(body)
2013 }
2014 Kind::Lookbehind => {
2015 let prev = self.get_lookbehind_prev(node_id);
2016 let inner_id = self.get_lookbehind_inner(node_id);
2017 let rev_inner = self.reverse(inner_id)?;
2018 let rev_prev = if prev != NodeId::MISSING {
2019 self.reverse(prev)?
2020 } else {
2021 NodeId::MISSING
2022 };
2023 self.mk_lookahead(rev_inner, rev_prev, 0)
2024 }
2025 Kind::Lookahead => {
2026 let rel = self.get_lookahead_rel(node_id);
2027 if rel == u32::MAX {
2029 let lbody = self.get_lookahead_inner(node_id);
2030 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2031 let rbody = self.reverse(lbody);
2032 let rtail = self.reverse(ltail);
2033 let rev = self.mk_lookbehind(rbody?, rtail?);
2034 return Ok(self.cache_reversed(node_id, rev));
2035 }
2036 if rel != 0 {
2037 return Err(AlgebraError::UnsupportedPattern);
2038 }
2039 let tail_node = self.get_lookahead_tail(node_id);
2040 let rev_tail = if tail_node != NodeId::MISSING {
2041 self.reverse(tail_node)?
2042 } else {
2043 NodeId::MISSING
2044 };
2045 let inner_id = self.get_lookahead_inner(node_id);
2046 let rev_inner = self.reverse(inner_id)?;
2047 self.mk_lookbehind(rev_inner, rev_tail)
2048 }
2049 Kind::Counted => {
2050 return Err(AlgebraError::UnsupportedPattern);
2051 }
2052 };
2053 self.cache_reversed(node_id, rw);
2054 Ok(rw)
2055 }
2056
2057 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2058 let node = NodeKey {
2059 kind: Kind::Pred,
2060 left: NodeId::MISSING,
2061 right: NodeId::MISSING,
2062 extra: pred.0,
2063 };
2064 self.get_node_id(node)
2065 }
2066
2067 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2068 let key = NodeKey {
2069 kind: Kind::Compl,
2070 left: body,
2071 right: NodeId::MISSING,
2072 extra: u32::MAX,
2073 };
2074 if let Some(id) = self.key_is_created(&key) {
2075 return *id;
2076 }
2077 if body == NodeId::BOT {
2078 return NodeId::TS;
2079 }
2080 if body == NodeId::TS {
2081 return NodeId::BOT;
2082 }
2083
2084 if let Some(contains_body) = body.is_contains(self) {
2085 if contains_body.is_pred(self) {
2086 let pred = contains_body.pred_tset(self);
2087 let notpred = self.mk_pred_not(pred);
2088 let node = self.mk_star(notpred);
2089 return self.init_as(key, node);
2090 } else if contains_body == NodeId::END {
2091 return self.init_as(key, NodeId::BOT);
2092 }
2093 };
2094
2095 if self.get_kind(body) == Kind::Compl {
2096 return self.get_node(body).left;
2097 }
2098
2099 self.get_node_id(key)
2100 }
2101
2102 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2103 let nid = self.get_nulls_id(body);
2104 let nref = self.mb.nb.get_set_ref(nid).clone();
2105 let mut futures = NodeId::BOT;
2106 for n in nref.iter() {
2107 if !n.is_mask_nullable(mask) {
2108 continue;
2109 }
2110
2111 let eff = if n.rel == 0 {
2112 NodeId::EPS
2113 } else {
2114 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2115 };
2116 futures = self.mk_union(futures, eff)
2117 }
2118 futures
2119 }
2120
2121 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2122 if cfg!(feature = "norewrite") {
2123 return None;
2124 }
2125 #[cfg(debug_assertions)]
2126 if std::env::var("TRACE_CONCAT").is_ok() {
2127 eprintln!("rw2 head={} tail={}", self.pp(head), self.pp(tail));
2128 }
2129
2130 if tail.is_lookbehind(self) {
2131 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2132 return self
2133 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2134 .ok();
2135 }
2136 if head.is_lookahead(self) {
2137 let la_tail = self.get_lookahead_tail(head);
2138 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2139 if new_la_tail.is_center_nullable(self) {
2140 let la_new = self.mk_lookahead_internal(
2141 self.get_lookahead_inner(head),
2142 new_la_tail,
2143 u32::MAX,
2144 );
2145 return Some(la_new);
2146 }
2147 let la_body = self.get_lookahead_inner(head);
2148 let la_rel = self.get_lookahead_rel(head);
2149 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2150 let tail_rel = self.get_lookahead_rel(new_la_tail);
2151 tail_rel + la_rel
2152 } else {
2153 u32::MAX
2154 };
2155
2156 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2157 }
2158
2159 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2160 return Some(head);
2161 }
2162
2163 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2169 return Some(NodeId::TS);
2170 }
2171
2172 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2173 return Some(NodeId::TS);
2174 }
2175
2176 if head.is_inter(self) && tail == NodeId::TS {
2177 let mut alltopstar = true;
2178 iter_inter!(self, head, |v| {
2179 alltopstar = self.ends_with_ts(v);
2180 });
2181 if alltopstar {
2182 return Some(head);
2183 }
2184 }
2185
2186 if head.is_star(self) && head == tail {
2187 return Some(head);
2188 }
2189
2190 None
2191 }
2192
2193 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2194 use Kind::*;
2195
2196 if cfg!(feature = "norewrite") {
2197 return None;
2198 }
2199 if left == right {
2200 return Some(left);
2201 }
2202
2203 if right.is_kind(self, Kind::Union) && left == right.left(self) {
2204 return Some(right);
2205 }
2206
2207 if left.is_pred(self) && right.is_pred(self) {
2208 let l = left.pred_tset(self);
2209 let r = right.pred_tset(self);
2210 let solver = self.solver();
2211 let psi = solver.or_id(l, r);
2212 let rewrite = self.mk_pred(psi);
2213 return Some(rewrite);
2214 }
2215
2216 if left == NodeId::EPS
2217 && self.get_extra(left) == 0
2218 && self.nullability(right) == Nullability::ALWAYS
2219 {
2220 return Some(right);
2221 }
2222
2223 if left.is_lookahead(self) && right.is_lookahead(self) {
2224 let lb = left.left(self);
2225 let lt = left.right(self);
2226 let lrel = left.extra(self);
2227
2228 let rb = right.left(self);
2229 let rt = right.right(self);
2230 let rrel = right.extra(self);
2231
2232 if lrel == rrel && lt.is_missing() && rt.is_missing() {
2233 let unioned = self.mk_union(lb, rb);
2234 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2235 return Some(node);
2236 }
2237 }
2238
2239 if right.is_kind(self, Concat) {
2240 if left == NodeId::END
2241 && right.left(self) == NodeId::END
2242 && self.nullability(right).has(Nullability::END)
2243 {
2244 return Some(right);
2245 }
2246 if left == right.left(self) {
2248 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2249 let rw = self.mk_concat(left, rhs);
2250 return Some(rw);
2251 }
2252 if left.is_kind(self, Concat) {
2253 let head1 = left.left(self);
2254 let head2 = right.left(self);
2255
2256 if head1 == head2 {
2257 let t1 = left.right(self);
2258 let t2 = right.right(self);
2259 if head1 == NodeId::TS {
2261 if t2.has_concat_tail(self, t1) {
2262 return Some(left);
2263 }
2264 if t1.has_concat_tail(self, t2) {
2265 return Some(right);
2266 }
2267 }
2268 let un = self.mk_union(t1, t2);
2269 return Some(self.mk_concat(left.left(self), un));
2270 }
2271
2272 if false {
2276 let end1 = self.get_concat_end(left);
2277 let end2 = self.get_concat_end(right);
2278 if end1 == end2 {
2279 let flags = left.flags_contains(self).or(right.flags_contains(self));
2280 if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2281 let rev1 = self.reverse(left).unwrap();
2282 let rev2 = self.reverse(right).unwrap();
2283
2284 let union_rev = self.mk_union(rev1, rev2);
2285 return Some(self.reverse(union_rev).unwrap());
2286 }
2287 }
2288 }
2289 }
2290 if left.is_pred(self) && left == right.left(self) {
2291 let un = self.mk_opt(right.right(self));
2292 let conc = self.mk_concat(left, un);
2293 return Some(conc);
2294 }
2295 }
2296
2297 if left == NodeId::EPS && right.is_plus(self) {
2298 let result = self.mk_star(right.left(self));
2299 return Some(result);
2300 }
2301
2302 if left.is_inter(self) && right.is_inter(self) {
2304 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2305 return Some(rw);
2306 }
2307 }
2308
2309 None
2310 }
2311
2312 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2313 let mut curr = node;
2314 while curr.is_inter(self) {
2315 out.push(self.get_left(curr));
2316 curr = self.get_right(curr);
2317 }
2318 out.push(curr);
2319 }
2320
2321 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2322 let mut curr = node;
2323 let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2324 if has_prefix {
2325 curr = self.get_right(curr);
2326 }
2327 let mut count = 0u32;
2328 let mut pred_set = None;
2329 while curr.is_concat(self) {
2330 let head = self.get_left(curr);
2331 if !head.is_pred(self) {
2332 return None;
2333 }
2334 let ps = head.pred_tset(self);
2335 match pred_set {
2336 None => pred_set = Some(ps),
2337 Some(existing) => {
2338 if existing != ps {
2339 return None;
2340 }
2341 }
2342 }
2343 curr = self.get_right(curr);
2344 count += 1;
2345 }
2346 if count == 0 || !curr.is_star(self) {
2347 return None;
2348 }
2349 Some((has_prefix, pred_set.unwrap(), curr, count))
2350 }
2351
2352 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2353 let mut j = 0;
2354 for &s in sub {
2355 while j < sup.len() && sup[j] < s {
2356 j += 1;
2357 }
2358 if j >= sup.len() || sup[j] != s {
2359 return false;
2360 }
2361 j += 1;
2362 }
2363 true
2364 }
2365
2366 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2367 if !left.is_inter(self) || !right.is_inter(self) {
2368 return None;
2369 }
2370
2371 let mut lc: Vec<NodeId> = Vec::new();
2372 let mut rc: Vec<NodeId> = Vec::new();
2373 self.collect_inter_components(left, &mut lc);
2374 self.collect_inter_components(right, &mut rc);
2375
2376 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2378 return Some(left);
2379 }
2380 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2382 return Some(right);
2383 }
2384
2385 if lc.len() == rc.len() {
2386 let mut diff_idx = None;
2387 for i in 0..lc.len() {
2388 if lc[i] != rc[i] {
2389 if diff_idx.is_some() {
2390 return None;
2391 }
2392 diff_idx = Some(i);
2393 }
2394 }
2395 if let Some(idx) = diff_idx {
2396 let a = lc[idx];
2397 let b = rc[idx];
2398 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2399 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2400 {
2401 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2402 return if ca < cb { Some(left) } else { Some(right) };
2403 }
2404 }
2405 }
2406 }
2407
2408 None
2409 }
2410
2411 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2412 if cfg!(feature = "norewrite") {
2413 return None;
2414 }
2415 if left == right {
2416 return Some(left);
2417 }
2418
2419 if self.get_kind(right) == Kind::Union {
2420 let mut result = NodeId::BOT;
2421 self.iter_unions_b(
2422 right,
2423 &mut (|b, v| {
2424 let new_inter = b.mk_inter(v, left);
2425 result = b.mk_union(result, new_inter);
2426 }),
2427 );
2428 return Some(result);
2429 }
2430 if self.get_kind(left) == Kind::Union {
2431 let mut result = NodeId::BOT;
2432 self.iter_unions_b(
2433 left,
2434 &mut (|b, v| {
2435 let new_inter = b.mk_inter(v, right);
2436 result = b.mk_union(result, new_inter);
2437 }),
2438 );
2439 return Some(result);
2440 }
2441
2442 if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2443 return Some(NodeId::BOT);
2444 }
2445
2446 if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2447 let bodies = self.mk_union(left.left(self), right.left(self));
2448 return Some(self.mk_compl(bodies));
2449 }
2450
2451 if left == NodeId::TOPPLUS {
2452 if right.is_pred_star(self).is_some() {
2453 let newloop = self.mk_plus(right.left(self));
2454 return Some(newloop);
2455 }
2456 if right.is_never_nullable(self) {
2457 return Some(right);
2458 }
2459 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2460 return Some(NodeId::BOT);
2461 }
2462 if right.is_kind(self, Kind::Concat) {}
2463 }
2464
2465 {
2466 let l_is_la = left.is_lookahead(self);
2467 let r_is_la = right.is_lookahead(self);
2468 let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
2469 let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
2470 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2471 let (la_node, other, concat_body) = if r_is_la {
2472 (right, left, NodeId::MISSING)
2473 } else if l_is_la {
2474 (left, right, NodeId::MISSING)
2475 } else if r_is_cla {
2476 (right.left(self), left, right.right(self))
2477 } else {
2478 (left.left(self), right, left.right(self))
2479 };
2480 let la_body = la_node.left(self);
2481 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2482 let inter_right = if concat_body.is_missing() {
2483 la_tail
2484 } else {
2485 self.mk_concat(la_tail, concat_body)
2486 };
2487 let new_body = self.mk_inter(other, inter_right);
2488 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2489 return Some(self.mk_concat(la, new_body));
2490 }
2491 }
2492
2493 if self.get_kind(right) == Kind::Compl {
2494 let compl_body = right.left(self);
2495 if left == compl_body {
2496 return Some(NodeId::BOT);
2497 }
2498 if compl_body.is_concat(self) {
2499 let compl_head = compl_body.left(self);
2500 if compl_body.right(self) == NodeId::TS && compl_head == left {
2501 return Some(NodeId::BOT);
2502 }
2503 }
2504 }
2505
2506 if let Some(pleft) = left.is_pred_star(self) {
2507 if let Some(pright) = right.is_pred_star(self) {
2508 let merged = self.mk_inter(pleft, pright);
2509 return Some(self.mk_star(merged));
2510 }
2511 }
2512
2513 {
2514 let l_is_clb = left.is_concat(self) && left.left(self).is_lookbehind(self);
2515 let r_is_clb = right.is_concat(self) && right.left(self).is_lookbehind(self);
2516 if l_is_clb || r_is_clb {
2517 let (lb, body) = if l_is_clb && r_is_clb {
2518 let lb1 = left.left(self);
2519 let lb2 = right.left(self);
2520 let inner = self.mk_inter(
2521 self.get_lookbehind_inner(lb1),
2522 self.get_lookbehind_inner(lb2),
2523 );
2524 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2525 let body = self.mk_inter(left.right(self), right.right(self));
2526 (lb, body)
2527 } else if l_is_clb {
2528 let lb = left.left(self);
2529 let body = self.mk_inter(left.right(self), right);
2530 (lb, body)
2531 } else {
2532 let lb = right.left(self);
2533 let body = self.mk_inter(left, right.right(self));
2534 (lb, body)
2535 };
2536 return Some(self.mk_concat(lb, body));
2537 }
2538 }
2539
2540 {
2541 let l_has_la = self.has_trailing_la(left);
2542 let r_has_la = self.has_trailing_la(right);
2543 if l_has_la || r_has_la {
2544 let (body, la) = if l_has_la && r_has_la {
2545 let (lbody, l_la) = self.strip_trailing_la(left);
2546 let (rbody, r_la) = self.strip_trailing_la(right);
2547 let inner = self.mk_inter(
2548 self.get_lookahead_inner(l_la),
2549 self.get_lookahead_inner(r_la),
2550 );
2551 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2552 let body = self.mk_inter(lbody, rbody);
2553 (body, la)
2554 } else if l_has_la {
2555 let (lbody, la) = self.strip_trailing_la(left);
2556 let body = self.mk_inter(lbody, right);
2557 (body, la)
2558 } else {
2559 let (rbody, la) = self.strip_trailing_la(right);
2560 let body = self.mk_inter(left, rbody);
2561 (body, la)
2562 };
2563 return Some(self.mk_concat(body, la));
2564 }
2565 }
2566
2567 None
2568 }
2569
2570 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2571 if cfg!(feature = "norewrite") {
2572 return None;
2573 }
2574 debug_assert!(self.get_kind(right_union) == Kind::Union);
2575
2576 let mut rewritten = None;
2577 right_union.iter_union_while(
2578 self,
2579 &mut (|b, v| {
2580 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2581 rewritten = Some((v, rw));
2582 false
2583 } else {
2584 true
2585 }
2586 }),
2587 );
2588
2589 if let Some(rw) = rewritten {
2590 let mut new_union = NodeId::BOT;
2591 right_union.iter_union(
2592 self,
2593 &mut (|b, v| {
2594 if v == rw.0 {
2595 new_union = b.mk_union(rw.1, new_union)
2596 } else {
2597 new_union = b.mk_union(v, new_union)
2598 }
2599 }),
2600 );
2601 return Some(new_union);
2602 };
2603
2604 None
2605 }
2606
2607 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2608 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2609 debug_assert!(tail != NodeId::MISSING);
2610 let key = NodeKey {
2611 kind: Kind::Concat,
2612 left: head,
2613 right: tail,
2614 extra: u32::MAX,
2615 };
2616 if let Some(id) = self.key_is_created(&key) {
2617 return *id;
2618 }
2619
2620 if head == NodeId::BOT || tail == NodeId::BOT {
2621 return NodeId::BOT;
2622 }
2623 if head == NodeId::EPS {
2624 return tail;
2625 }
2626 if tail == NodeId::EPS {
2627 return head;
2628 }
2629
2630 match tail {
2631 NodeId::BEGIN => {
2633 if !self.is_nullable(head, Nullability::BEGIN) {
2634 return NodeId::BOT;
2635 } else {
2636 return NodeId::BEGIN;
2637 }
2638 }
2639 _ => {}
2640 }
2641
2642 if head.is_kind(self, Kind::Concat) {
2644 let left = head.left(self);
2645 let newright = self.mk_concat(head.right(self), tail);
2646 let updated = self.mk_concat(left, newright);
2647 return self.init_as(key, updated);
2648 }
2649
2650 if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2651 return NodeId::BOT;
2652 }
2653
2654 if tail.is_concat(self) {
2655 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2656 let upd = self.mk_concat(rw, tail.right(self));
2657 return self.init_as(key, upd);
2658 }
2659 }
2660
2661 if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2662 return self.init_as(key, new);
2663 }
2664
2665 match (self.get_kind(head), self.get_kind(tail)) {
2666 (Kind::Star, Kind::Concat) if head.is_star(self) => {
2668 let rl = tail.left(self);
2669 if head == rl {
2670 return self.init_as(key, tail);
2671 }
2672 }
2673 (_, Kind::Concat) => {
2675 let curr = head;
2676 let h2 = self.mk_concat(curr, tail.left(self));
2677 let tr = tail.right(self);
2678 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2679 return self.init_as(key, new);
2680 }
2681 }
2682 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2683 return self.init_as(key, tail);
2684 }
2685 _ => {}
2686 }
2687
2688 self.init(key)
2689 }
2690
2691 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2692 let lb_body = {
2694 match self.starts_with_ts(lb_body) {
2695 true => lb_body,
2696 false => self.mk_concat(NodeId::TS, lb_body),
2697 }
2698 };
2699 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2701 }
2702
2703 fn mk_lookbehind_internal(
2704 &mut self,
2705 lb_body: NodeId,
2706 lb_prev: NodeId,
2707 ) -> Result<NodeId, AlgebraError> {
2708 debug_assert!(lb_body != NodeId::MISSING);
2709 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2710 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2711 return Ok(NodeId::BOT);
2712 }
2713 if lb_body == NodeId::TS {
2714 return Ok(lb_prev);
2715 }
2716 if lb_body == NodeId::EPS {
2717 match lb_prev {
2718 NodeId::MISSING => return Ok(NodeId::EPS),
2719 NodeId::EPS => return Ok(NodeId::EPS),
2720 _ => return Ok(lb_prev),
2721 }
2722 }
2723
2724 let key = NodeKey {
2725 kind: Kind::Lookbehind,
2726 left: lb_body,
2727 right: lb_prev,
2728 extra: u32::MAX,
2729 };
2730 match self.key_is_created(&key) {
2731 Some(id) => Ok(*id),
2732 None => {
2733 if lb_prev == NodeId::TS {
2734 return Ok(self.mk_concat(lb_prev, lb_body));
2735 }
2736
2737 Ok(self.init(key))
2738 }
2739 }
2740 }
2741
2742 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2743 let la_body = {
2745 match self.ends_with_ts(la_body) {
2746 true => la_body,
2747 false => self.mk_concat(la_body, NodeId::TS),
2748 }
2749 };
2750 let rel = if NodeId::MISSING == la_tail {
2751 rel
2752 } else {
2753 match la_tail.is_center_nullable(self) {
2754 false => u32::MAX,
2755 true => rel,
2756 }
2757 };
2758
2759 self.mk_lookahead_internal(la_body, la_tail, rel)
2760 }
2761
2762 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2764 let key = NodeKey {
2765 kind: Kind::Lookahead,
2766 left: la_body,
2767 right: la_tail,
2768 extra: rel,
2769 };
2770 if let Some(id) = self.key_is_created(&key) {
2771 return *id;
2772 }
2773 if la_body == NodeId::TS {
2774 if rel == 0 {
2775 return la_tail.missing_to_eps();
2776 } else {
2777 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2778 }
2779 }
2780 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2781 return NodeId::BOT;
2782 }
2783 if la_tail.is_missing() && rel == u32::MAX {
2784 return NodeId::BOT;
2785 }
2786
2787 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2788 return la_body;
2789 }
2790
2791 if la_tail == NodeId::TS {
2792 if rel == 0 || rel == u32::MAX {
2793 return self.mk_concat(la_body, NodeId::TS);
2794 } else if rel == u32::MAX {
2795 return self.mk_begins_with(la_body);
2796 }
2797 }
2798
2799 if rel == u32::MAX {
2800 if la_tail.is_missing() {
2801 return NodeId::BOT;
2802 }
2803
2804 if self.is_always_nullable(la_body) {
2805 return la_tail.missing_to_eps();
2806 }
2807
2808 if la_tail != NodeId::MISSING {
2809 match self.get_kind(la_tail) {
2810 _ => {
2811 if la_body.is_compl_plus_end(self) {
2812 let minlen = self.get_min_length_only(la_tail);
2813 if minlen >= 1 {
2814 return NodeId::BOT;
2815 }
2816 }
2817 }
2818 }
2819 }
2820 }
2821
2822 if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
2823 let la_body2 = self.get_lookahead_inner(la_tail);
2824 let body1_ts = self.mk_concat(la_body, NodeId::TS);
2825 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2826 let new_la_body = self.mk_inter(body1_ts, body2_ts);
2827 let new_la_rel = self.get_lookahead_rel(la_tail);
2828 let new_la_tail = self.get_lookahead_tail(la_tail);
2829 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2830 }
2831
2832 if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
2833 let la_body_right = la_body.right(self);
2834 if self.is_always_nullable(la_body_right) {
2835 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2836 }
2837 if la_body.right(self) == NodeId::END {
2838 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2839 }
2840 let bodyright = la_body.right(self);
2841 if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
2842 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2843 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2844 }
2845 }
2846
2847 if la_tail != NodeId::MISSING {
2848 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2849 let lpred = la_body.left(self);
2850 if lpred.is_pred(self) {
2851 let l = lpred.pred_tset(self);
2852 let r = la_tail.pred_tset(self);
2853 let psi_and = self.solver().and_id(l, r);
2854 let rewrite = self.mk_pred(psi_and);
2855 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2856 let new_right =
2857 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2858 return self.mk_concat(rewrite, new_right);
2859 }
2860 }
2861 }
2862
2863 self.get_node_id(key)
2864 }
2865
2866 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2867 let has_match = (packed >> 16) > 0;
2868 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2869 return NodeId::BOT;
2870 }
2871 debug_assert!(
2872 body == NodeId::BOT || !self.is_infinite(body),
2873 "Counted body must have finite max length"
2874 );
2875 let chain = self.prune_counted_chain(body, chain);
2876 let key = NodeKey {
2877 kind: Kind::Counted,
2878 left: body,
2879 right: chain,
2880 extra: packed,
2881 };
2882 if let Some(id) = self.key_is_created(&key) {
2883 return *id;
2884 }
2885 self.get_node_id(key)
2886 }
2887
2888 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2889 if chain == NodeId::MISSING || body == NodeId::BOT {
2890 return chain;
2891 }
2892 if self.nullability(body) != Nullability::NEVER {
2893 return NodeId::MISSING;
2894 }
2895 let chain_body = chain.left(self);
2896 if chain_body == NodeId::BOT {
2897 return chain;
2898 }
2899 let not_begins = self.mk_not_begins_with(body);
2900 let inter = self.mk_inter(chain_body, not_begins);
2901 let is_empty = inter == NodeId::BOT;
2902 if is_empty {
2903 self.prune_counted_chain(body, chain.right(self))
2904 } else {
2905 chain
2906 }
2907 }
2908
2909 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2910 let neg_inner = self.mk_concat(body, NodeId::TS);
2911 let neg_part = self.mk_compl(neg_inner);
2912 let conc = self.mk_concat(neg_part, NodeId::END);
2913 self.mk_lookahead(conc, NodeId::MISSING, rel)
2914 }
2915
2916 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2917 match self.get_node(body).kind {
2918 Kind::Pred => {
2919 let psi = body.pred_tset(self);
2920 let negated = self.mk_pred_not(psi);
2921 let union = self.mk_union(NodeId::BEGIN, negated);
2922 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2924 }
2925 _ => {
2926 let uc = crate::unicode_classes::utf8_char(self);
2929 let neg = self.mk_compl(body);
2930 let negated = self.mk_inter(neg, uc);
2931 let union = self.mk_union(NodeId::BEGIN, negated);
2932 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2934 }
2935 }
2936 }
2937
2938 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2939 debug_assert!(left != NodeId::MISSING);
2940 debug_assert!(right != NodeId::MISSING);
2941 if left > right {
2942 return self.mk_union(right, left);
2943 }
2944 let key = NodeKey {
2945 kind: Kind::Union,
2946 left,
2947 right,
2948 extra: u32::MAX,
2949 };
2950 if let Some(id) = self.key_is_created(&key) {
2951 return *id;
2952 }
2953
2954 if left == right {
2955 return left;
2956 }
2957 if left == NodeId::BOT {
2958 return right;
2959 }
2960 if right == NodeId::BOT {
2961 return left;
2962 }
2963 if right == NodeId::TS {
2964 return right;
2965 }
2966 if left == NodeId::TS {
2967 return left;
2968 }
2969
2970 match (self.get_kind(left), self.get_kind(right)) {
2971 (Kind::Union, _) => {
2972 self.iter_unions_b(left, &mut |b, v| {
2973 b.temp_vec.push(v);
2974 });
2975 self.iter_unions_b(right, &mut |b, v| {
2976 b.temp_vec.push(v);
2977 });
2978 self.temp_vec.sort();
2979 let tree = self.temp_vec.clone();
2980 self.temp_vec.clear();
2981 let newnode = tree
2982 .iter()
2983 .rev()
2984 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2985 return self.init_as(key, newnode);
2986 }
2987 (_, Kind::Union) => {
2988 let rleft = right.left(self);
2989 if left > rleft {
2991 self.iter_unions_b(left, &mut |b, v| {
2992 b.temp_vec.push(v);
2993 });
2994 self.iter_unions_b(right, &mut |b, v| {
2995 b.temp_vec.push(v);
2996 });
2997 self.temp_vec.sort();
2998 let tree = self.temp_vec.clone();
2999 self.temp_vec.clear();
3000 let newnode = tree
3001 .iter()
3002 .rev()
3003 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3004 return self.init_as(key, newnode);
3005 } else {
3006 if let Some(rw) = self.attempt_rw_unions(left, right) {
3007 return self.init_as(key, rw);
3008 }
3009 }
3010 }
3011 _ => {}
3012 }
3013
3014 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3015 return self.init_as(key, rw);
3016 }
3017 self.init(key)
3018 }
3019
3020 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3021 debug_assert!(left_id != NodeId::MISSING);
3022 debug_assert!(right_id != NodeId::MISSING);
3023 if left_id == right_id {
3024 return left_id;
3025 }
3026 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3027 return NodeId::BOT;
3028 }
3029 if left_id == NodeId::TS {
3030 return right_id;
3031 }
3032 if right_id == NodeId::TS {
3033 return left_id;
3034 }
3035 if left_id > right_id {
3036 return self.mk_inter(right_id, left_id);
3037 }
3038 let key = NodeKey {
3039 kind: Kind::Inter,
3040 left: left_id,
3041 right: right_id,
3042 extra: u32::MAX,
3043 };
3044 if let Some(id) = self.key_is_created(&key) {
3045 return *id;
3046 }
3047
3048 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3049 return self.init_as(key, rw);
3050 }
3051
3052 self.init(key)
3053 }
3054
3055 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3056 let node = NodeKey {
3057 kind,
3058 left: NodeId::MISSING,
3059 right: NodeId::MISSING,
3060 extra: u32::MAX,
3061 };
3062 self.init(node)
3063 }
3064
3065 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3066 let star = self.mk_star(body_id);
3067 self.mk_concat(body_id, star)
3068 }
3069 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3070 let opt = self.mk_opt(body_id);
3071 let mut nodes1 = vec![];
3072 for _ in lower..upper {
3073 nodes1.push(opt);
3074 }
3075 for _ in 0..lower {
3076 nodes1.push(body_id);
3077 }
3078 self.mk_concats(nodes1.into_iter())
3079 }
3080 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3081 self.mk_union(NodeId::EPS, body_id)
3082 }
3083
3084 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3085 let key = NodeKey {
3086 kind: Kind::Star,
3087 left: body_id,
3088 right: NodeId::MISSING,
3089 extra: 0,
3090 };
3091 if let Some(id) = self.key_is_created(&key) {
3092 return *id;
3093 }
3094 if body_id.is_kind(self, Kind::Star) {
3096 return body_id;
3097 }
3098 self.get_node_id(key)
3099 }
3100
3101 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3104 match self.get_kind(node_id) {
3105 Kind::End => Nullability::EMPTYSTRING,
3106 Kind::Begin => Nullability::EMPTYSTRING,
3107 Kind::Pred => Nullability::NEVER,
3108 Kind::Star => Nullability::ALWAYS,
3109 Kind::Inter | Kind::Concat => {
3110 let lnull = self.nullability_emptystring(node_id.left(self));
3111 let rnull = self.nullability_emptystring(node_id.right(self));
3112 lnull.and(rnull) }
3114 Kind::Union => {
3115 let lnull = self.nullability_emptystring(node_id.left(self));
3116 let rnull = self.nullability_emptystring(node_id.right(self));
3117 lnull.or(rnull)
3118 }
3119 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3120 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3121 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3122 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3123 }
3124 }
3125
3126 #[inline(always)]
3127 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3128 self.get_meta(node_id)
3129 .flags
3130 .nullability()
3131 .has(Nullability::CENTER.or(Nullability::END))
3132 }
3133
3134 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3135 self.get_only_nullability(node_id)
3136 }
3137
3138 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3139 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3140 }
3141
3142 pub fn pp(&self, node_id: NodeId) -> String {
3143 let mut s = String::new();
3144 self.ppw(&mut s, node_id).unwrap();
3145 s
3146 }
3147
3148 #[allow(dead_code)]
3149 pub fn pp_nulls(&self, node_id: NodeId) -> String {
3150 let nu = self.get_nulls_id(node_id);
3151 let nr = self.mb.nb.get_set_ref(nu);
3152 let s1 = format!("{:?}", nr);
3153 s1
3154 }
3155
3156 #[allow(dead_code)]
3157 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3158 match self.get_tregex(term_id) {
3159 TRegex::Leaf(node_id) => {
3160 let mut s = String::new();
3161 self.ppw(&mut s, *node_id).unwrap();
3162 s
3163 }
3164 TRegex::ITE(cond, then_id, else_id) => {
3165 format!(
3166 "ITE({},{},{})",
3167 self.solver_ref().pp(*cond),
3168 self.ppt(*then_id),
3169 self.ppt(*else_id)
3170 )
3171 }
3172 }
3173 }
3174
3175 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3176 if cfg!(feature = "graphviz") {
3177 match node_id {
3178 NodeId::MISSING => return write!(s, "MISSING"),
3179 NodeId::BOT => return write!(s, "⊥"),
3180 NodeId::TS => return write!(s, "_*"),
3181 NodeId::TOP => return write!(s, "_"),
3182 NodeId::EPS => return write!(s, "ε"),
3183 _ => {}
3184 }
3185 }
3186
3187 match node_id {
3188 NodeId::MISSING => return write!(s, "MISSING"),
3189 NodeId::BOT => return write!(s, "⊥"),
3190 NodeId::TS => return write!(s, "_*"),
3191 NodeId::TOP => return write!(s, "_"),
3192 NodeId::EPS => return write!(s, ""),
3193 _ => {}
3194 }
3195
3196 match self.get_kind(node_id) {
3197 Kind::End => write!(s, r"\z"),
3198 Kind::Begin => write!(s, r"\A"),
3199 Kind::Pred => {
3200 let psi = node_id.pred_tset(self);
3201 if psi == TSetId::EMPTY {
3202 write!(s, r"⊥")
3203 } else if psi == TSetId::FULL {
3204 write!(s, r"_")
3205 } else {
3206 write!(s, "{}", self.solver_ref().pp(psi))
3207 }
3208 }
3209 Kind::Inter => {
3210 write!(s, "(")?;
3211 self.ppw(s, node_id.left(self))?;
3212 write!(s, "&")?;
3213 let mut curr = node_id.right(self);
3214 while curr.is_inter(self) {
3215 let n = curr.left(self);
3216 self.ppw(s, n)?;
3217 write!(s, "&")?;
3218 curr = curr.right(self);
3219 }
3220 self.ppw(s, curr)?;
3221 write!(s, ")")
3222 }
3223 Kind::Union => {
3224 let left = node_id.left(self);
3225 let right = node_id.right(self);
3226 write!(s, "(")?;
3227 self.ppw(s, left)?;
3228 write!(s, "|")?;
3229 let mut curr = right;
3230 while self.get_kind(curr) == Kind::Union {
3231 let n = curr.left(self);
3232 self.ppw(s, n)?;
3233 write!(s, "|")?;
3234 curr = curr.right(self);
3235 }
3236 self.ppw(s, curr)?;
3237 write!(s, ")")
3238 }
3239 Kind::Concat => {
3240 let left = node_id.left(self);
3241 let right = node_id.right(self);
3242 if right.is_star(self) && right.left(self) == left {
3243 self.ppw(s, left)?;
3244 write!(s, "+")?;
3245 return Ok(());
3246 }
3247 if right.is_concat(self) {
3248 let rl = right.left(self);
3249 if rl.is_star(self) && rl.left(self) == left {
3250 self.ppw(s, left)?;
3251 write!(s, "+")?;
3252 return self.ppw(s, right.right(self));
3253 }
3254 }
3255 if right.is_concat(self) && right.left(self) == left {
3256 let mut num = 1;
3257 let mut right = right;
3258 while right.is_concat(self) && right.left(self) == left {
3259 num += 1;
3260 right = right.right(self);
3261 }
3262 if let Some(inner) = left.is_opt_v(self) {
3264 let mut inner_count = 0;
3265 let mut right2 = right;
3266 while right2.is_concat(self) && right2.left(self) == inner {
3267 inner_count += 1;
3268 right2 = right2.right(self);
3269 }
3270 if right2 == inner {
3271 inner_count += 1;
3272 self.ppw(s, inner)?;
3273 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3274 }
3275 if inner_count > 0 {
3276 self.ppw(s, inner)?;
3277 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3278 return self.ppw(s, right2);
3279 }
3280 }
3281 self.ppw(s, left)?;
3282 if right == left {
3283 num += 1;
3284 return write!(s, "{{{}}}", num);
3285 }
3286 if num <= 3 && left.is_pred(self) {
3287 for _ in 1..num {
3288 self.ppw(s, left)?;
3289 }
3290 return self.ppw(s, right);
3291 } else {
3292 write!(s, "{{{}}}", num)?;
3293 return self.ppw(s, right);
3294 }
3295 }
3296 self.ppw(s, left)?;
3297 self.ppw(s, right)
3298 }
3299 Kind::Star => {
3300 let left = node_id.left(self);
3301 let leftkind = self.get_kind(left);
3302 match leftkind {
3303 Kind::Concat | Kind::Star | Kind::Compl => {
3304 write!(s, "(")?;
3305 self.ppw(s, left)?;
3306 write!(s, ")")?;
3307 }
3308 _ => {
3309 self.ppw(s, left)?;
3310 }
3311 };
3312 write!(s, "*")
3313 }
3314 Kind::Compl => {
3315 write!(s, "~(")?;
3316 self.ppw(s, node_id.left(self))?;
3317 write!(s, ")")
3318 }
3319 Kind::Lookbehind => {
3320 let lbleft = self.get_lookbehind_prev(node_id);
3321 let lbinner = self.get_lookbehind_inner(node_id);
3322 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3323 if lbleft != NodeId::MISSING {
3324 write!(s, "❮")?;
3325 self.ppw(s, lbleft)?;
3326 write!(s, "❯")?;
3327 }
3328
3329 write!(s, "(?<=")?;
3330 self.ppw(s, lbinner)?;
3331 write!(s, ")")
3332 }
3333 Kind::Lookahead => {
3334 let inner = self.get_lookahead_inner(node_id);
3335 write!(s, "(?=")?;
3336 self.ppw(s, inner)?;
3337 write!(s, ")")?;
3338 if self.get_lookahead_rel(node_id) != 0 {
3339 write!(s, "{{")?;
3340 let rel = self.get_lookahead_rel(node_id);
3341 if rel == u32::MAX {
3342 write!(s, "∅")?;
3343 } else {
3344 write!(s, "{}", rel)?;
3345 }
3346 write!(s, "}}")?;
3347 }
3348 if node_id.right(self) == NodeId::MISSING {
3349 Ok(())
3350 } else {
3351 write!(s, "❮")?;
3352 self.ppw(s, node_id.right(self))?;
3353 write!(s, "❯")
3354 }
3355 }
3356 Kind::Counted => {
3357 let body = node_id.left(self);
3358 let packed = self.get_extra(node_id);
3359 let step = packed & 0xFFFF;
3360 let best = packed >> 16;
3361 write!(s, "#(")?;
3362 self.ppw(s, body)?;
3363 write!(s, ")s{}b{}", step, best)
3364 }
3365 }
3366 }
3367
3368 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3369 self.mk_concat(node, NodeId::TS)
3370 }
3371
3372 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3373 let node_ts = self.mk_concat(node, NodeId::TS);
3374 self.mk_compl(node_ts)
3375 }
3376
3377 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3378 let notset = self.solver().not_id(set);
3379 self.mk_pred(notset)
3380 }
3381
3382 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3383 let set_id = self.solver().u8_to_set_id(char);
3384 self.mk_pred(set_id)
3385 }
3386
3387 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3388 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3389 self.mk_pred(rangeset)
3390 }
3391
3392 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3393 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3394 for &(lo, hi) in &ranges[1..] {
3395 let r = self.mk_range_u8(lo, hi);
3396 node = self.mk_union(node, r);
3397 }
3398 node
3399 }
3400
3401 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3402 let mut prefix = Vec::new();
3403 let mut curr = node;
3404 loop {
3405 if curr == NodeId::EPS {
3406 let full = !prefix.is_empty();
3407 return (prefix, full);
3408 }
3409 if curr == NodeId::BOT {
3410 break;
3411 }
3412 if curr.is_pred(self) {
3413 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3414 Some(byte) => {
3415 prefix.push(byte);
3416 return (prefix, true);
3417 }
3418 None => break, }
3420 }
3421 if !curr.is_concat(self) {
3422 break;
3423 }
3424 let left = curr.left(self);
3425 if !left.is_pred(self) {
3426 break;
3427 }
3428 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3429 Some(byte) => prefix.push(byte),
3430 None => break,
3431 }
3432 curr = curr.right(self);
3433 }
3434 (prefix, false)
3435 }
3436
3437 #[allow(dead_code)]
3438 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3439 let mut result = NodeId::EPS;
3440 for byte in raw_str.iter().rev() {
3441 let node = self.mk_u8(*byte);
3442 result = self.mk_concat(node, result);
3443 }
3444 result
3445 }
3446
3447 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3448 let mut result = NodeId::EPS;
3449 for byte in raw_str.bytes().rev() {
3450 let node = self.mk_u8(byte);
3451 result = self.mk_concat(node, result);
3452 }
3453 result
3454 }
3455
3456 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3457 let mut sorted: Vec<NodeId> = nodes.collect();
3458 if sorted.len() <= 1 {
3459 return sorted.pop().unwrap_or(NodeId::BOT);
3460 }
3461 sorted.sort();
3462 sorted.dedup();
3463 sorted.retain(|&x| x != NodeId::BOT);
3464 if sorted.is_empty() {
3465 return NodeId::BOT;
3466 }
3467 if sorted.len() > 16 {
3468 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3469 let mut non_concat: Vec<NodeId> = Vec::new();
3470 for &n in &sorted {
3471 if n.is_concat(self) {
3472 by_head.entry(self.get_left(n)).or_default().push(n);
3473 } else {
3474 non_concat.push(n);
3475 }
3476 }
3477 let mut absorbed: Vec<NodeId> = Vec::new();
3478 for &n in &non_concat {
3479 if by_head.contains_key(&n) {
3480 absorbed.push(n);
3481 }
3482 }
3483 if !absorbed.is_empty() {
3484 non_concat.retain(|n| !absorbed.contains(n));
3485 }
3486 if by_head.len() < sorted.len() {
3487 let mut groups: Vec<NodeId> = non_concat;
3488 for (head, tails) in by_head {
3489 let mut tail_nodes: Vec<NodeId> =
3490 tails.iter().map(|&n| self.get_right(n)).collect();
3491 if absorbed.contains(&head) {
3492 tail_nodes.push(NodeId::EPS);
3493 }
3494 let tail_union = self.mk_unions(tail_nodes.into_iter());
3495 let factored = self.mk_concat(head, tail_union);
3496 groups.push(factored);
3497 }
3498 groups.sort();
3499 groups.dedup();
3500 return self.mk_unions_balanced(&groups);
3501 }
3502 }
3503 self.mk_unions_balanced(&sorted)
3504 }
3505
3506 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3507 match nodes.len() {
3508 0 => NodeId::BOT,
3509 1 => nodes[0],
3510 n => {
3511 let mid = n / 2;
3512 let left = self.mk_unions_balanced(&nodes[..mid]);
3513 let right = self.mk_unions_balanced(&nodes[mid..]);
3514 self.mk_union(left, right)
3515 }
3516 }
3517 }
3518
3519 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3520 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3521 }
3522
3523 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3524 nodes
3525 .rev()
3526 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3527 }
3528}
3529
3530impl RegexBuilder {
3531 #[allow(dead_code)]
3532 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3533 match self.get_tregex(term_id).clone() {
3534 TRegex::Leaf(node_id) => {
3535 if NodeId::BOT == node_id {
3536 vec![]
3537 } else {
3538 vec![node_id]
3539 }
3540 }
3541 TRegex::ITE(_, then_id, else_id) => {
3542 let mut then_nodes = self.extract_sat(then_id);
3543 let mut else_nodes = self.extract_sat(else_id);
3544 then_nodes.append(&mut else_nodes);
3545 then_nodes
3546 }
3547 }
3548 }
3549
3550 pub(crate) fn iter_unions_b(
3551 &mut self,
3552 curr: NodeId,
3553 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3554 ) {
3555 let mut curr = curr;
3556 while self.get_kind(curr) == Kind::Union {
3557 f(self, curr.left(self));
3558 curr = curr.right(self);
3559 }
3560 f(self, curr);
3561 }
3562
3563 pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3564 if !self.contains_look(node_id) {
3565 return Some(node_id);
3566 }
3567 match self.get_kind(node_id) {
3568 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3569 Kind::Concat => {
3570 let left = node_id.left(self);
3571 let right = node_id.right(self);
3572 let elim_l = self.try_elim_lookarounds(left)?;
3573 let elim_r = self.try_elim_lookarounds(right)?;
3574 let rw = self.mk_concat(elim_l, elim_r);
3575 Some(rw)
3576 }
3577 Kind::Union => {
3578 let left = node_id.left(self);
3579 let right = node_id.right(self);
3580 let elim_l = self.try_elim_lookarounds(left)?;
3581 let elim_r = self.try_elim_lookarounds(right)?;
3582 let rw = self.mk_union(elim_l, elim_r);
3583 Some(rw)
3584 }
3585
3586 Kind::Star => {
3587 let body = node_id.left(self);
3588 let elim_l = self.try_elim_lookarounds(body)?;
3589 Some(self.mk_star(elim_l))
3590 }
3591 Kind::Compl => {
3592 let left = node_id.left(self);
3593 let elim_l = self.try_elim_lookarounds(left)?;
3594 Some(self.mk_compl(elim_l))
3595 }
3596 Kind::Lookahead => {
3597 let rel = self.get_lookahead_rel(node_id);
3598 if rel != 0 {
3599 return None;
3600 }
3601 let lbody = self.get_lookahead_inner(node_id);
3602 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3603 let elim_l = self.try_elim_lookarounds(lbody)?;
3604 let elim_r = self.try_elim_lookarounds(ltail)?;
3605 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3606 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3607 let rw = self.mk_inter(lbody_ts, ltail_ts);
3608 Some(rw)
3609 }
3610 Kind::Lookbehind => {
3611 let linner = self.get_lookbehind_inner(node_id);
3612 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3613 let elim_l = self.try_elim_lookarounds(linner)?;
3614 let elim_r = self.try_elim_lookarounds(lprev)?;
3615 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3616 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3617 let rw = self.mk_inter(lbody_ts, ltail_ts);
3618 Some(rw)
3619 }
3620 Kind::Inter => {
3621 let left = node_id.left(self);
3622 let right = node_id.right(self);
3623 let elim_l = self.try_elim_lookarounds(left)?;
3624 let elim_r = self.try_elim_lookarounds(right)?;
3625 let rw = self.mk_inter(elim_l, elim_r);
3626 Some(rw)
3627 }
3628 Kind::Counted => None,
3629 }
3630 }
3631
3632 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3634 if self.nullability(node) == Nullability::NEVER {
3635 node
3636 } else {
3637 self.mk_inter(NodeId::TOPPLUS, node)
3638 }
3639 }
3640
3641 fn iter_find_stack(
3642 &self,
3643 stack: &mut Vec<TRegexId>,
3644 mut f: impl FnMut(NodeId) -> bool,
3645 ) -> bool {
3646 loop {
3647 match stack.pop() {
3648 None => return false,
3649 Some(curr) => match self.get_tregex(curr) {
3650 TRegex::Leaf(n) => {
3651 let mut curr = *n;
3652 while curr != NodeId::BOT {
3653 match self.get_kind(curr) {
3654 Kind::Union => {
3655 if f(curr.left(self)) {
3656 return true;
3657 }
3658 curr = curr.right(self);
3659 }
3660 _ => {
3661 if f(*n) {
3662 return true;
3663 }
3664 curr = NodeId::BOT;
3665 }
3666 }
3667 }
3668 }
3669 TRegex::ITE(_, then_id, else_id) => {
3670 if *else_id != TRegexId::BOT {
3671 stack.push(*else_id);
3672 }
3673 stack.push(*then_id);
3674 }
3675 },
3676 }
3677 }
3678 }
3679
3680 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3681 if node == NodeId::BOT {
3682 return Some(true);
3683 }
3684 if self.nullability(node) != Nullability::NEVER {
3685 return Some(false);
3686 }
3687 if let Some(cached) = self.cache_empty.get(&node) {
3688 if cached.is_checked() {
3689 return Some(cached.is_empty());
3690 }
3691 }
3692 let node = if !self.contains_look(node) {
3693 node
3694 } else {
3695 self.try_elim_lookarounds(node)?
3696 };
3697 let isempty_flag = self.is_empty_lang_internal(node);
3698
3699 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3700 }
3701
3702 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3703 if !self.get_meta_flags(initial_node).contains_inter() {
3705 return Ok(NodeFlags::ZERO);
3706 }
3707
3708 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3709 let mut worklist: VecDeque<NodeId> = VecDeque::new();
3710 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3711 let mut stack = Vec::new();
3712 stack.push(begin_der);
3713 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3714 visited.insert(node, initial_node);
3715 let nullability = self.nullability(node);
3716 if nullability != Nullability::NEVER {
3717 true
3718 } else {
3719 worklist.push_back(node);
3720 false
3721 }
3722 });
3723 if found_nullable_right_away {
3724 return Ok(NodeFlags::ZERO);
3725 }
3726
3727 worklist.push_back(initial_node);
3728 let isempty_flag: NodeFlags;
3729 let mut found_node = NodeId::BOT;
3730
3731 loop {
3732 match worklist.pop_front() {
3733 None => {
3734 isempty_flag = NodeFlags::IS_EMPTY;
3735 break;
3736 }
3737 Some(outer) => {
3738 if let Some(cached) = self.cache_empty.get(&outer) {
3739 if cached.is_checked() {
3740 if cached.is_empty() {
3741 continue;
3742 } else {
3743 return Ok(NodeFlags::ZERO);
3744 }
3745 }
3746 }
3747
3748 let derivative = self.der(outer, Nullability::CENTER)?;
3749
3750 stack.push(derivative);
3751
3752 let found_null = self.iter_find_stack(&mut stack, |node| {
3753 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3754 found_node = node;
3755 if !self.get_meta_flags(node).contains_inter() {
3756 true
3757 } else {
3758 e.insert(outer);
3759 worklist.push_front(node);
3760 self.any_nonbegin_nullable(node)
3761 }
3762 } else {
3763 false
3764 }
3765 });
3766 if found_null {
3767 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3768 isempty_flag = NodeFlags::ZERO;
3769 break;
3770 }
3771 }
3772 }
3773 }
3774
3775 self.cache_empty.insert(
3776 initial_node,
3777 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3778 );
3779 Ok(isempty_flag)
3780 }
3781
3782 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3784 if larger_lang == smaller_lang {
3785 return Some(true);
3786 }
3787
3788 if self
3790 .nullability(larger_lang)
3791 .not()
3792 .and(self.nullability(smaller_lang))
3793 != Nullability::NEVER
3794 {
3795 return Some(false);
3796 }
3797
3798 let (smaller_lang, larger_lang) =
3803 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3804 let wrap = |b: &mut Self, n: NodeId| {
3805 let tmp = b.mk_concat(n, NodeId::TS);
3806 b.mk_concat(NodeId::TS, tmp)
3807 };
3808 (wrap(self, smaller_lang), wrap(self, larger_lang))
3809 } else {
3810 (smaller_lang, larger_lang)
3811 };
3812
3813 let nota = self.mk_compl(larger_lang);
3814 let diff = self.mk_inter(smaller_lang, nota);
3815 self.is_empty_lang(diff)
3816 }
3817}