1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::UnicodeClassCache;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 AlgebraError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 & Self::NULL_MASK)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_lookaround(self) -> bool {
134 self.has(MetaFlags::CONTAINS_LOOKAROUND)
135 }
136 pub(crate) fn contains_inter(self) -> bool {
137 self.has(MetaFlags::CONTAINS_INTER)
138 }
139
140 pub(crate) fn all_contains_flags(self) -> MetaFlags {
141 self.and(
142 MetaFlags::CONTAINS_LOOKAROUND
143 .or(MetaFlags::CONTAINS_ANCHORS)
144 .or(MetaFlags::CONTAINS_INTER),
145 )
146 }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152 #[default]
153 Pred,
154 Star,
155 Begin,
156 End,
157 Concat,
158 Union,
159 Compl,
160 Lookbehind,
161 Lookahead,
162 Inter,
163 Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168 flags: MetaFlags,
169 nulls: NullsId,
170}
171
172struct MetadataBuilder {
173 num_created: u32,
174 solver: Solver,
175 nb: NullsBuilder,
176 index: FxHashMap<Metadata, MetadataId>,
177 pub array: Vec<Metadata>,
178}
179
180mod helpers {
181 pub(crate) fn incr_rel(n1: u32) -> u32 {
182 match n1.overflowing_add(1) {
183 (_, true) => u32::MAX,
184 (res, false) => res,
185 }
186 }
187}
188
189impl MetadataBuilder {
190 fn new() -> MetadataBuilder {
191 Self {
192 index: FxHashMap::default(),
193 array: vec![Metadata {
194 flags: MetaFlags::ZERO,
195 nulls: NullsId::EMPTY,
196 }],
197 solver: Solver::new(),
198 num_created: 0,
199 nb: NullsBuilder::new(),
200 }
201 }
202
203 fn init(&mut self, inst: Metadata) -> MetadataId {
204 self.num_created += 1;
205 let new_id = MetadataId(self.num_created);
206 self.index.insert(inst.clone(), new_id);
207 self.array.push(inst);
208 new_id
209 }
210
211 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212 match self.index.get(&inst) {
213 Some(&id) => id,
214 None => self.init(inst),
215 }
216 }
217
218 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219 &self.array[inst.0 as usize]
220 }
221
222 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223 setflags.all_contains_flags()
224 }
225
226 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227 let left = &self.array[body.0 as usize].flags;
228 let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229 let inf = if body_id == NodeId::BOT {
231 MetaFlags::ZERO
232 } else {
233 MetaFlags::INFINITE_LENGTH
234 };
235 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236 }
237
238 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239 let left = &self.array[left_id.0 as usize].flags;
240 let null = left.nullability().not().and(Nullability::ALWAYS);
241 let contains = self.get_contains(*left);
242 MetaFlags::with_nullability(
243 null,
244 contains
245 .or(MetaFlags::INFINITE_LENGTH)
246 .or(MetaFlags::CONTAINS_COMPL),
247 )
248 }
249
250 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
251 let left = &self.array[left_id.0 as usize].flags;
252 let right = &self.array[right_id.0 as usize].flags;
253 let null = left.nullability().and(right.nullability());
254 let contains = self.get_contains(left.or(*right));
255 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
256 MetaFlags::with_nullability(null, contains.or(len))
257 }
258
259 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260 let left = &self.array[left_id.0 as usize].flags;
261 let right = &self.array[right_id.0 as usize].flags;
262 let null = left.nullability().and(right.nullability());
263 let contains = self
264 .get_contains(left.or(*right))
265 .or(MetaFlags::CONTAINS_INTER);
266 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
267 MetaFlags::with_nullability(null, contains.or(len))
268 }
269
270 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
271 let left = &self.array[left_id.0 as usize].flags;
272 let right = &self.array[right_id.0 as usize].flags;
273 let null = left.nullability().or(right.nullability());
274 let contains = self.get_contains(left.or(*right));
275 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
276 MetaFlags::with_nullability(null, contains.or(len))
277 }
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
281pub struct NodeKey {
282 pub(crate) kind: Kind,
283 pub(crate) left: NodeId,
284 pub(crate) right: NodeId,
285 pub(crate) extra: u32,
286}
287
288#[derive(Eq, Hash, PartialEq, Clone, Debug)]
289pub enum TRegex<TSet> {
290 Leaf(NodeId),
291 ITE(TSet, TRegexId, TRegexId),
292}
293
294#[derive(Eq, Hash, PartialEq, Clone, Copy)]
295pub(crate) struct NodeFlags(u8);
296impl NodeFlags {
297 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
298 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
299 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
300
301 fn is_checked(self) -> bool {
302 self.0 >= NodeFlags::IS_CHECKED.0
303 }
304 fn is_empty(self) -> bool {
305 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
306 }
307}
308
309#[derive(Eq, Hash, PartialEq, Clone, Copy)]
310pub(crate) struct BuilderFlags(u8);
311impl BuilderFlags {
312 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
313 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
314}
315
316pub struct RegexBuilder {
317 mb: MetadataBuilder,
318 temp_vec: Vec<NodeId>,
319 num_created: u32,
320 index: FxHashMap<NodeKey, NodeId>,
321 array: Vec<NodeKey>,
322 metadata: Vec<MetadataId>,
323 reversed: Vec<NodeId>,
324 cache_empty: FxHashMap<NodeId, NodeFlags>,
325 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
326 tr_array: Vec<TRegex<TSetId>>,
327 tr_der_center: Vec<TRegexId>,
328 tr_der_begin: Vec<TRegexId>,
329 flags: BuilderFlags,
330 pub lookahead_context_max: u32,
332 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
333 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
334}
335
336macro_rules! iter_inter {
337 ($compiler:ident, $start:ident, $expression:expr) => {
338 let mut curr = $start;
339 while $compiler.get_kind(curr) == Kind::Inter {
340 let left = $compiler.get_left(curr);
341 $expression(left);
342 curr = $compiler.get_right(curr);
343 }
344 $expression(curr);
345 };
346}
347
348impl NodeId {
349 fn is_missing(&self) -> bool {
350 *self == NodeId::MISSING
351 }
352 #[inline]
353 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
354 b.get_flags_contains(self)
355 }
356 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
357 if self == tail {
358 true
359 } else if self.is_kind(b, Kind::Concat) {
360 self.right(b).has_concat_tail(b, tail)
361 } else {
362 false
363 }
364 }
365 fn missing_to_eps(&self) -> NodeId {
366 if *self == NodeId::MISSING {
367 NodeId::EPS
368 } else {
369 *self
370 }
371 }
372
373 #[inline]
374 fn kind(self, b: &RegexBuilder) -> Kind {
375 b.get_kind(self)
376 }
377 #[inline]
378 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
379 b.get_kind(self) == k
380 }
381 #[inline]
382 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
383 b.nullability(self) == Nullability::NEVER
384 }
385 #[inline]
386 fn nullability(self, b: &RegexBuilder) -> Nullability {
387 b.nullability(self)
388 }
389
390 #[inline]
391 fn is_center_nullable(self, b: &RegexBuilder) -> bool {
392 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
393 }
394 #[inline]
395 pub fn left(self, b: &RegexBuilder) -> NodeId {
396 b.get_left(self)
397 }
398
399 #[inline]
400 pub fn right(self, b: &RegexBuilder) -> NodeId {
401 b.get_right(self)
402 }
403
404 #[inline]
405 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
406 b.der(self, mask)
407 }
408
409 #[inline]
410 fn extra(self, b: &RegexBuilder) -> u32 {
411 b.get_extra(self)
412 }
413
414 #[inline]
415 fn is_pred(self, b: &RegexBuilder) -> bool {
416 b.get_kind(self) == Kind::Pred
417 }
418 #[inline]
419 fn is_lookahead(self, b: &RegexBuilder) -> bool {
420 b.get_kind(self) == Kind::Lookahead
421 }
422 #[inline]
423 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
424 debug_assert!(self.is_pred(b));
425 TSetId(b.get_extra(self))
426 }
427 #[inline]
428 fn is_star(self, b: &RegexBuilder) -> bool {
429 if NodeId::EPS == self {
430 return false;
431 }
432 b.get_kind(self) == Kind::Star
433 }
434
435 #[inline]
436 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
437 if self.is_concat(b) {
438 let r = self.right(b);
439 return r.is_star(b) && r.left(b) == self.left(b);
440 }
441 false
442 }
443
444 #[inline]
445 fn is_concat(self, b: &RegexBuilder) -> bool {
446 b.get_kind(self) == Kind::Concat
447 }
448
449 #[inline]
450 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
451 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
452 Some(b.get_right(self))
453 } else {
454 None
455 }
456 }
457
458 #[inline]
459 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
460 if b.get_kind(self) == Kind::Concat {
461 let left = self.left(b);
462 let right = self.right(b);
463 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
464 return right == NodeId::END;
465 }
466 }
467 false
468 }
469
470 #[inline]
471 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
472 if NodeId::EPS == self {
473 return None;
474 }
475 if self.is_star(b) && self.left(b).is_pred(b) {
476 Some(self.left(b))
477 } else {
478 None
479 }
480 }
481
482 #[inline]
483 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
484 if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
485 let middle = self.right(b);
486 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
487 return Some(middle.left(b));
488 }
489 }
490 None
491 }
492
493 #[inline]
494 pub(crate) fn iter_union(
495 self,
496 b: &mut RegexBuilder,
497 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
498 ) {
499 let mut curr: NodeId = self;
500 while curr.kind(b) == Kind::Union {
501 f(b, curr.left(b));
502 curr = curr.right(b);
503 }
504 f(b, curr);
505 }
506
507 #[inline]
508 pub(crate) fn iter_union_while(
509 self,
510 b: &mut RegexBuilder,
511 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
512 ) {
513 let mut curr: NodeId = self;
514 let mut continue_loop = true;
515 while continue_loop && curr.kind(b) == Kind::Union {
516 continue_loop = f(b, curr.left(b));
517 curr = curr.right(b);
518 }
519 if continue_loop {
520 f(b, curr);
521 }
522 }
523}
524
525impl Default for RegexBuilder {
526 fn default() -> Self {
527 Self::new()
528 }
529}
530
531impl RegexBuilder {
532 pub fn new() -> RegexBuilder {
533 let mut inst = Self {
534 mb: MetadataBuilder::new(),
535 array: Vec::new(),
536 index: FxHashMap::default(),
537 cache_empty: FxHashMap::default(),
538 tr_array: Vec::new(),
539 tr_cache: FxHashMap::default(),
540 flags: BuilderFlags::ZERO,
541 lookahead_context_max: 800,
542 num_created: 0,
543 metadata: Vec::new(),
544 reversed: Vec::new(),
545 tr_der_center: Vec::new(),
546 tr_der_begin: Vec::new(),
547 temp_vec: Vec::new(),
548 mk_binary_memo: FxHashMap::default(),
549 clean_cache: FxHashMap::default(),
550 };
551 inst.array.push(NodeKey::default());
552 inst.mk_pred(TSetId::EMPTY);
553 inst.mk_star(NodeId::BOT);
554 inst.mk_pred(TSetId::FULL);
555 inst.mk_star(NodeId::TOP);
556 let top_plus_id = inst.mk_plus(NodeId::TOP);
557 inst.mk_unset(Kind::Begin);
558 inst.mk_unset(Kind::End);
559 debug_assert!(top_plus_id == NodeId::TOPPLUS);
560
561 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
562 inst.mk_leaf(NodeId::BOT);
563 inst.mk_leaf(NodeId::EPS);
564 inst.mk_leaf(NodeId::TOP);
565 inst.mk_leaf(NodeId::TS);
566
567 inst.flags = BuilderFlags::SUBSUME;
568 inst
569 }
570
571 #[inline]
572 pub fn solver_ref(&self) -> &Solver {
573 &self.mb.solver
574 }
575
576 #[inline]
577 pub fn solver(&mut self) -> &mut Solver {
578 &mut self.mb.solver
579 }
580
581 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
582 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
583 self.tr_cache.insert(inst.clone(), new_id);
584 self.tr_array.push(inst);
585 new_id
586 }
587
588 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
589 match self.tr_cache.get(&inst) {
590 Some(&id) => id,
591 None => self.tr_init(inst),
592 }
593 }
594
595 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
596 &self.tr_array[inst.0 as usize]
597 }
598
599 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
600 let solv = self.solver();
601 !solv.is_sat_id(cond1, cond2)
602 }
603
604 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
605 let term = TRegex::<TSetId>::Leaf(node_id);
606 self.get_tregex_id(term)
607 }
608
609 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
610 let tmp_inst = TRegex::ITE(cond, _then, _else);
611 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
612 return *cached;
613 }
614 if _then == _else {
615 return _then;
616 }
617 if self.solver().is_full_id(cond) {
618 return _then;
619 }
620 if self.solver().is_empty_id(cond) {
621 return _else;
622 }
623 let _clean_then = match self.tr_array[_then.0 as usize] {
624 TRegex::Leaf(_) => _then,
625 _ => self.clean(cond, _then),
626 };
627 let notcond = self.solver().not_id(cond);
628 let _clean_else = match self.tr_array[_else.0 as usize] {
629 TRegex::Leaf(_) => _else,
630 _ => self.clean(notcond, _else),
631 };
632
633 if _clean_then == _clean_else {
634 return _clean_then;
635 }
636 match self.get_tregex(_clean_then) {
638 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
639 let _cond_then = *leftcond;
640 let new_then = *_inner_then;
641 let sand = self.solver().and_id(cond, _cond_then);
642 return self.mk_ite(sand, new_then, _clean_else);
643 }
644 _ => {}
645 }
646 match self.get_tregex(_clean_else) {
648 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
650 let e2clone = *_e2;
651 let new_cond = self.mb.solver.or_id(cond, *_c2);
652 return self.mk_ite(new_cond, _clean_then, e2clone);
653 }
654 _ => {}
655 }
656
657 if _clean_then == TRegexId::BOT {
658 let flip_cond = self.solver().not_id(cond);
659 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
660 return cleaned_id;
661 }
662
663 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
664 }
665
666 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
667 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
668 return cached;
669 }
670 let result = match *self.get_tregex(tterm) {
671 TRegex::Leaf(_) => tterm,
672 TRegex::ITE(alpha, _then_id, _else_id) => {
673 let notalpha = self.mb.solver.not_id(alpha);
674 if self.mb.solver.unsat_id(beta, alpha) {
675 self.clean(beta, _else_id)
677 } else if self.unsat(beta, notalpha) {
678 self.clean(beta, _then_id)
680 } else {
681 let tc = self.mb.solver.and_id(beta, alpha);
682 let ec = self.mb.solver.and_id(beta, notalpha);
683 let new_then = self.clean(tc, _then_id);
684 let new_else = self.clean(ec, _else_id);
685 self.mk_ite(alpha, new_then, new_else)
686 }
687 }
688 };
689 self.clean_cache.insert((beta, tterm), result);
690 result
691 }
692
693 fn mk_unary(
694 &mut self,
695 term: TRegexId,
696 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
697 ) -> TRegexId {
698 match self.tr_array[term.0 as usize] {
699 TRegex::Leaf(node_id) => {
700 let applied = apply(self, node_id);
701 self.mk_leaf(applied)
702 }
703 TRegex::ITE(c1, _then, _else) => {
704 let _then1 = self.mk_unary(_then, apply);
705 let _else1 = self.mk_unary(_else, apply);
706 self.mk_ite(c1, _then1, _else1)
707 }
708 }
709 }
710
711 fn mk_binary_result(
712 &mut self,
713 left: TRegexId,
714 right: TRegexId,
715 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
716 ) -> Result<TRegexId, AlgebraError> {
717 match self.tr_array[left.0 as usize] {
718 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
719 TRegex::Leaf(right_node_id) => {
720 let applied = apply(self, left_node_id, right_node_id)?;
721 Ok(self.mk_leaf(applied))
722 }
723 TRegex::ITE(c2, _then, _else) => {
724 let then2 = self.mk_binary_result(left, _then, apply)?;
725 let else2 = self.mk_binary_result(left, _else, apply)?;
726 Ok(self.mk_ite(c2, then2, else2))
727 }
728 },
729 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
730 TRegex::Leaf(_) => {
731 let then2 = self.mk_binary_result(_then1, right, apply)?;
732 let else2 = self.mk_binary_result(_else1, right, apply)?;
733 Ok(self.mk_ite(c1, then2, else2))
734 }
735 TRegex::ITE(c2, _then2, _else2) => {
736 if c1 == c2 {
737 let _then = self.mk_binary_result(_then1, _then2, apply)?;
738 let _else = self.mk_binary_result(_else1, _else2, apply)?;
739 return Ok(self.mk_ite(c1, _then, _else));
740 }
741 if c1.0 > c2.0 {
742 let _then = self.mk_binary_result(_then1, right, apply)?;
743 let _else = self.mk_binary_result(_else1, right, apply)?;
744 Ok(self.mk_ite(c1, _then, _else))
745 } else {
746 let _then = self.mk_binary_result(left, _then2, apply)?;
747 let _else = self.mk_binary_result(left, _else2, apply)?;
748 Ok(self.mk_ite(c2, _then, _else))
749 }
750 }
751 },
752 }
753 }
754
755 fn mk_binary(
756 &mut self,
757 left: TRegexId,
758 right: TRegexId,
759 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
760 ) -> TRegexId {
761 self.mk_binary_memo.clear();
762 self.mk_binary_inner(left, right, apply)
763 }
764
765 fn mk_binary_inner(
766 &mut self,
767 left: TRegexId,
768 right: TRegexId,
769 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
770 ) -> TRegexId {
771 if left == right {
772 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
773 }
774 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
775 return cached;
776 }
777 let result = match self.tr_array[left.0 as usize] {
778 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
779 TRegex::Leaf(right_node_id) => {
780 let applied = apply(self, left_node_id, right_node_id);
781 self.mk_leaf(applied)
782 }
783 TRegex::ITE(c2, _then, _else) => {
784 let then2 = self.mk_binary_inner(left, _then, apply);
785 let else2 = self.mk_binary_inner(left, _else, apply);
786 self.mk_ite(c2, then2, else2)
787 }
788 },
789 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
790 TRegex::Leaf(_) => {
791 let then2 = self.mk_binary_inner(_then1, right, apply);
792 let else2 = self.mk_binary_inner(_else1, right, apply);
793 self.mk_ite(c1, then2, else2)
794 }
795 TRegex::ITE(c2, _then2, _else2) => {
796 if c1 == c2 {
797 let _then = self.mk_binary_inner(_then1, _then2, apply);
798 let _else = self.mk_binary_inner(_else1, _else2, apply);
799 self.mk_ite(c1, _then, _else)
800 } else if c1.0 > c2.0 {
801 let _then = self.mk_binary_inner(_then1, right, apply);
802 let _else = self.mk_binary_inner(_else1, right, apply);
803 self.mk_ite(c1, _then, _else)
804 } else {
805 let _then = self.mk_binary_inner(left, _then2, apply);
806 let _else = self.mk_binary_inner(left, _else2, apply);
807 self.mk_ite(c2, _then, _else)
808 }
809 }
810 },
811 };
812 self.mk_binary_memo.insert((left, right), result);
813 result
814 }
815
816 pub fn get_nulls(
817 &mut self,
818 pending_rel: u32,
819 mask: Nullability,
820 acc: &mut BTreeSet<NullState>,
821 node_id: NodeId,
822 ) {
823 debug_assert!(node_id != NodeId::MISSING);
824 if !self.is_nullable(node_id, mask) {
825 return;
826 }
827 match self.get_kind(node_id) {
828 Kind::Pred => {}
829 Kind::End => {
830 if mask.has(Nullability::END) {
831 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
832 }
833 }
834 Kind::Begin => {
835 if mask.has(Nullability::BEGIN) {
836 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
837 }
838 }
839 Kind::Concat => {
840 let new_mask = self.nullability(node_id).and(mask);
841 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
842 if self.is_nullable(node_id.left(self), mask) {
843 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
844 }
845 }
846 Kind::Union => {
847 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
848 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
849 }
850 Kind::Inter => {
851 let new_mask = self.nullability(node_id).and(mask);
852 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
853 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
854 }
855 Kind::Star => {
856 acc.insert(NullState::new(mask, pending_rel));
857 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
858 }
859 Kind::Compl => {
860 if !self.is_nullable(node_id.left(self), mask) {
861 acc.insert(NullState::new(mask, 0));
862 }
863 }
864 Kind::Lookbehind => {
865 let new_mask = self.nullability(node_id).and(mask);
866 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
867 if node_id.right(self) != NodeId::MISSING {
868 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
869 }
870 }
871 Kind::Lookahead => {
872 let la_inner = self.get_lookahead_inner(node_id);
873 if self.is_nullable(la_inner, mask) {
874 let rel = self.get_lookahead_rel(node_id);
875 if rel != u32::MAX {
876 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
877 }
878 let la_tail = self.get_lookahead_tail(node_id);
880 if la_tail != NodeId::MISSING {
881 self.get_nulls(pending_rel, mask, acc, la_tail);
882 }
883 }
884 }
885 Kind::Counted => {
886 let packed = self.get_extra(node_id);
887 let best = packed >> 16;
888 if best > 0 {
889 acc.insert(NullState::new(mask, pending_rel + best));
890 }
891 }
892 }
893 }
894
895 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
896 self.get_meta_flags(node_id)
897 .has(MetaFlags::CONTAINS_LOOKAROUND)
898 }
899
900 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
902 self.get_meta_flags(node_id)
903 .has(MetaFlags::CONTAINS_ANCHORS)
904 }
905
906 pub fn is_infinite(&self, node_id: NodeId) -> bool {
907 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
908 }
909
910 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
912 if self.is_infinite(node_id) {
913 if self.get_kind(node_id) == Kind::Inter {
914 self.get_bounded_length(node_id)
915 } else {
916 (self.get_min_length_only(node_id), u32::MAX)
917 }
918 } else {
919 self.get_bounded_length(node_id)
920 }
921 }
922
923 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
924 if node_id == NodeId::EPS {
925 return (0, 0);
926 }
927 match self.get_kind(node_id) {
928 Kind::End | Kind::Begin => (0, 0),
929 Kind::Pred => (1, 1),
930 Kind::Concat => {
931 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
932 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
933 (lmin + rmin, lmax.saturating_add(rmax))
934 }
935 Kind::Union => {
936 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
937 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
938 (lmin.min(rmin), lmax.max(rmax))
939 }
940 Kind::Inter => {
941 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
942 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
943 (lmin.max(rmin), lmax.min(rmax))
944 }
945 Kind::Lookahead => {
946 let body = node_id.left(self);
947 if self.is_infinite(body) {
948 return (0, u32::MAX);
949 }
950 let right = node_id.right(self);
951 if right.is_missing() {
952 (0, 0)
953 } else {
954 self.get_min_max_length(right)
955 }
956 }
957 Kind::Counted => (0, 0),
958 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
959 }
960 }
961
962 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
963 match self.get_kind(node_id) {
964 Kind::End | Kind::Begin => Some(0),
965 Kind::Pred => Some(1),
966 Kind::Concat => {
967 let l = self.get_fixed_length(node_id.left(self))?;
968 let r = self.get_fixed_length(node_id.right(self))?;
969 Some(l + r)
970 }
971 Kind::Union => {
972 let l = self.get_fixed_length(node_id.left(self))?;
973 let r = self.get_fixed_length(node_id.right(self))?;
974 if l == r {
975 Some(l)
976 } else {
977 None
978 }
979 }
980 Kind::Inter => {
981 let l = self.get_fixed_length(node_id.left(self))?;
982 let r = self.get_fixed_length(node_id.right(self))?;
983 if l == r {
984 Some(l)
985 } else {
986 None
987 }
988 }
989 Kind::Lookahead => {
990 let right = node_id.right(self);
991 if right.is_missing() {
992 Some(0)
993 } else {
994 self.get_fixed_length(right)
995 }
996 }
997 Kind::Counted => Some(0),
998 Kind::Star | Kind::Lookbehind | Kind::Compl => None,
999 }
1000 }
1001
1002 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1003 match self.get_kind(node_id) {
1004 Kind::End | Kind::Begin => 0,
1005 Kind::Pred => 1,
1006 Kind::Concat => {
1007 self.get_min_length_only(node_id.left(self))
1008 + self.get_min_length_only(node_id.right(self))
1009 }
1010 Kind::Union => self
1011 .get_min_length_only(node_id.left(self))
1012 .min(self.get_min_length_only(node_id.right(self))),
1013 Kind::Inter => self
1014 .get_min_length_only(node_id.left(self))
1015 .max(self.get_min_length_only(node_id.right(self))),
1016 Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1017 Kind::Compl => {
1018 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1019 0
1020 } else {
1021 1
1022 }
1023 }
1024 }
1025 }
1026
1027 fn starts_with_ts(&self, node_id: NodeId) -> bool {
1028 if node_id == NodeId::TS {
1029 return true;
1030 }
1031 match self.get_kind(node_id) {
1032 Kind::Inter => {
1033 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1034 }
1035 Kind::Union => {
1036 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1037 }
1038 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1039 _ => false,
1040 }
1041 }
1042
1043 #[inline]
1044 pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1045 if self.get_kind(node_id) == Kind::Concat {
1046 self.ends_with_ts(node_id.right(self))
1047 } else {
1048 node_id == NodeId::TS
1049 }
1050 }
1051
1052 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1053 debug_assert!(node_id != NodeId::MISSING);
1054 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1055 }
1056
1057 pub(crate) fn cache_der(
1058 &mut self,
1059 node_id: NodeId,
1060 result: TRegexId,
1061 mask: Nullability,
1062 ) -> TRegexId {
1063 if mask == Nullability::CENTER {
1064 self.tr_der_center[node_id.0 as usize] = result
1065 } else {
1066 self.tr_der_begin[node_id.0 as usize] = result
1067 };
1068 result
1069 }
1070
1071 pub(crate) fn try_cached_der(
1072 &mut self,
1073 node_id: NodeId,
1074 mask: Nullability,
1075 ) -> Option<TRegexId> {
1076 let cache = if mask == Nullability::CENTER {
1077 &mut self.tr_der_center
1078 } else {
1079 &mut self.tr_der_begin
1080 };
1081 match cache.get(node_id.0 as usize) {
1082 Some(&TRegexId::MISSING) => {}
1083 Some(&result) => {
1084 return Some(result);
1085 }
1086 None => {
1087 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1088 }
1089 }
1090 None
1091 }
1092
1093 pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1094 let mut term = self.get_tregex(der);
1095 loop {
1096 match *term {
1097 TRegex::Leaf(node_id) => return node_id,
1098 TRegex::ITE(cond, _then, _else) => {
1099 if self.solver().is_sat_id(set, cond) {
1100 term = self.get_tregex(_then);
1101 } else {
1102 term = self.get_tregex(_else);
1103 }
1104 }
1105 }
1106 }
1107 }
1108
1109 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1110 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1111 debug_assert!(
1112 node_id != NodeId::MISSING,
1113 "attempting to derive missing node"
1114 );
1115 if let Some(result) = self.try_cached_der(node_id, mask) {
1116 return Ok(result);
1117 }
1118
1119 let result = match node_id.kind(self) {
1120 Kind::Compl => {
1121 let leftd = node_id.left(self).der(self, mask)?;
1122 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1123 }
1124 Kind::Inter => {
1125 let leftd = node_id.left(self).der(self, mask)?;
1126 let rightd = node_id.right(self).der(self, mask)?;
1127 {
1128 let this = &mut *self;
1129 this.mk_binary(
1130 leftd,
1131 rightd,
1132 &mut (|b, left, right| b.mk_inter(left, right)),
1133 )
1134 }
1135 }
1136 Kind::Union => {
1137 let leftd = self.der(node_id.left(self), mask)?;
1138 let rightd = self.der(node_id.right(self), mask)?;
1139 {
1140 let this = &mut *self;
1141 this.mk_binary(
1142 leftd,
1143 rightd,
1144 &mut (|b, left, right| b.mk_union(left, right)),
1145 )
1146 }
1147 }
1148 Kind::Concat => {
1149 let head = node_id.left(self);
1150 let tail = node_id.right(self);
1151 let tail_leaf = self.mk_leaf(tail);
1152 let head_der = self.der(head, mask)?;
1153
1154 if self.is_nullable(head, mask) {
1155 let rightd = self.der(tail, mask)?;
1156 let ldr = {
1157 let this = &mut *self;
1158 this.mk_binary(
1159 head_der,
1160 tail_leaf,
1161 &mut (|b, left, right| b.mk_concat(left, right)),
1162 )
1163 };
1164 {
1165 let this = &mut *self;
1166 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1167 }
1168 } else {
1169 let this = &mut *self;
1170 this.mk_binary(
1171 head_der,
1172 tail_leaf,
1173 &mut (|b, left, right| b.mk_concat(left, right)),
1174 )
1175 }
1176 }
1177 Kind::Star => {
1178 if node_id == NodeId::EPS {
1179 TRegexId::BOT
1180 } else {
1181 let left = node_id.left(self);
1182 let r_decr_leaf = self.mk_leaf(node_id);
1183 let r_der = self.der(left, mask)?;
1184 let this = &mut *self;
1185 this.mk_binary(
1186 r_der,
1187 r_decr_leaf,
1188 &mut (|b, left, right| b.mk_concat(left, right)),
1189 )
1190 }
1191 }
1192 Kind::Lookbehind => {
1193 let lb_prev_der = {
1194 let lb_prev = self.get_lookbehind_prev(node_id);
1195 if lb_prev == NodeId::MISSING {
1196 TRegexId::MISSING
1197 } else {
1198 self.der(lb_prev, mask)?
1199 }
1200 };
1201 let lb_inner = self.get_lookbehind_inner(node_id);
1202 let lb_inner_der = self.der(lb_inner, mask)?;
1203 {
1204 let this = &mut *self;
1205 this.mk_binary_result(
1206 lb_inner_der,
1207 lb_prev_der,
1208 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1209 )?
1210 }
1211 }
1212 Kind::Lookahead => {
1213 let la_tail = self.get_lookahead_tail(node_id);
1214 let la_body = node_id.left(self);
1215 let rel = self.get_lookahead_rel(node_id);
1216
1217 if self.is_nullable(la_body, mask) {
1218 let right = node_id.right(self).missing_to_eps();
1220 let rder = self.der(right, mask).clone();
1221 return rder;
1222 }
1223
1224 if rel == u32::MAX {
1225 let la_body_der = self.der(la_body, mask)?;
1226 if la_tail.is_kind(self, Kind::Pred) {
1227 let transitioned =
1228 self.transition_term(la_body_der, la_tail.pred_tset(self));
1229 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1230 let concated = self.mk_concat(la_tail, new_la);
1231 return self.der(concated, mask);
1232 }
1233 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1234 let left = la_tail.left(self);
1235 let tset = left.pred_tset(self);
1236 let transitioned = self.transition_term(la_body_der, tset);
1237 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1238 let tail_right = la_tail.right(self);
1239 let concated = self.mk_concat(new_la, tail_right);
1240 let concated = self.mk_concat(left, concated);
1241 return self.der(concated, mask);
1242 }
1243 }
1244
1245 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1246 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1247 let concated = self.mk_concat(la_body, nulls_mask);
1248 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1249 let non_nulled = self.mk_non_nullable_safe(la_tail);
1250 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1251 let new_union = self.mk_union(concated_look, new_look);
1252 return self.der(new_union, mask);
1253 }
1254
1255 let la_tail_der = if la_tail == NodeId::MISSING {
1256 TRegexId::MISSING
1257 } else {
1258 if self.is_nullable(la_tail, mask) {
1259 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1260 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1261 let la_union = self.mk_union(la_tail, nulls_la);
1262 self.der(la_union, mask)?
1263 } else {
1264 self.der(la_tail, mask)?
1265 }
1266 };
1267
1268 let la_body_der = self.der(la_body, mask)?;
1269
1270 if rel != u32::MAX && rel > self.lookahead_context_max {
1271 return Err(AlgebraError::AnchorLimit);
1272 }
1273
1274 let la = {
1275 let this = &mut *self;
1276 let rel = helpers::incr_rel(rel);
1277 this.mk_binary(
1278 la_body_der,
1279 la_tail_der,
1280 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1281 )
1282 };
1283
1284 if rel != u32::MAX
1285 && la_tail_der != TRegexId::MISSING
1286 && self.is_nullable(la_tail, mask)
1287 {
1288 let look_only = {
1289 let this = &mut *self;
1290 let right = TRegexId::MISSING;
1291 let rel = helpers::incr_rel(rel);
1292 this.mk_binary(
1293 la_body_der,
1294 right,
1295 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1296 )
1297 };
1298 {
1299 let this = &mut *self;
1300 this.mk_binary(
1301 look_only,
1302 la,
1303 &mut (|b, left, right| b.mk_union(left, right)),
1304 )
1305 }
1306 } else {
1307 la
1308 }
1309 }
1310 Kind::Counted => {
1311 let body = node_id.left(self);
1312 let chain = node_id.right(self);
1313 let packed = self.get_extra(node_id);
1314 let step = (packed & 0xFFFF) as u16;
1315 let best = (packed >> 16) as u16;
1316
1317 let mid_best = if self.is_nullable(body, mask) && step >= best {
1318 step
1319 } else {
1320 best
1321 };
1322
1323 let body_der = self.der(body, mask)?;
1324 let new_step = step.saturating_add(1);
1325 self.mk_unary(body_der, &mut |b, new_body| {
1326 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1327 new_step
1328 } else {
1329 mid_best
1330 };
1331 let packed = (final_best as u32) << 16 | new_step as u32;
1332 b.mk_counted(new_body, chain, packed)
1333 })
1334 }
1335 Kind::Begin | Kind::End => TRegexId::BOT,
1336 Kind::Pred => {
1337 let psi = node_id.pred_tset(self);
1338 if psi == TSetId::EMPTY {
1339 TRegexId::BOT
1340 } else {
1341 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1342 }
1343 }
1344 };
1345
1346 self.cache_der(node_id, result, mask);
1350 Ok(result)
1351 }
1352
1353 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1354 debug_assert!(meta_id != MetadataId::MISSING);
1355 match self.metadata.get_mut(node_id.0 as usize) {
1356 Some(v) => *v = meta_id,
1357 None => {
1358 self.metadata
1359 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1360 self.metadata[node_id.0 as usize] = meta_id;
1361 }
1362 }
1363 }
1364
1365 fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1366 debug_assert!(reversed_id != NodeId::MISSING);
1367 match self.reversed.get_mut(node_id.0 as usize) {
1368 Some(v) => *v = reversed_id,
1369 None => {
1370 self.reversed
1371 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1372 self.reversed[node_id.0 as usize] = reversed_id;
1373 }
1374 }
1375 }
1376
1377 fn init(&mut self, inst: NodeKey) -> NodeId {
1378 self.num_created += 1;
1379 let node_id = NodeId(self.num_created);
1380 self.index.insert(inst.clone(), node_id);
1381 match inst.kind {
1382 Kind::Pred => {
1383 let meta_id = self.mb.get_meta_id(Metadata {
1384 flags: MetaFlags::ZERO,
1385 nulls: NullsId::EMPTY,
1386 });
1387 self.init_metadata(node_id, meta_id);
1388 }
1389 Kind::Begin => {
1390 let meta_id = self.mb.get_meta_id(Metadata {
1391 flags: MetaFlags::with_nullability(
1392 Nullability::BEGIN,
1393 MetaFlags::CONTAINS_ANCHORS,
1394 ),
1395 nulls: NullsId::BEGIN0,
1396 });
1397 self.init_metadata(node_id, meta_id);
1398 }
1399 Kind::End => {
1400 let meta_id = self.mb.get_meta_id(Metadata {
1401 flags: MetaFlags::with_nullability(
1402 Nullability::END,
1403 MetaFlags::CONTAINS_ANCHORS,
1404 ),
1405 nulls: NullsId::END0,
1406 });
1407 self.init_metadata(node_id, meta_id);
1408 }
1409 Kind::Inter => {
1410 let m1 = self.get_node_meta_id(inst.left);
1411 let m2 = self.get_node_meta_id(inst.right);
1412 let meta_id = {
1413 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1414 let mask_l = inst.left.nullability(self);
1415 let mask_r = inst.right.nullability(self);
1416 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1417 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1418 nulls = self.mb.nb.and_mask(nulls, mask_l);
1419 nulls = self.mb.nb.and_mask(nulls, mask_r);
1420 let new_meta = Metadata {
1421 flags: self.mb.flags_inter(m1, m2),
1422 nulls,
1423 };
1424 self.mb.get_meta_id(new_meta)
1425 };
1426 self.init_metadata(node_id, meta_id);
1427 }
1428 Kind::Union => {
1429 let m1 = self.get_node_meta_id(inst.left);
1430 let m2 = self.get_node_meta_id(inst.right);
1431 let meta_id = {
1432 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1433 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1434 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1435 let new_meta = Metadata {
1436 flags: self.mb.flags_union(m1, m2),
1437 nulls,
1438 };
1439 self.mb.get_meta_id(new_meta)
1440 };
1441 self.init_metadata(node_id, meta_id);
1442 }
1443 Kind::Concat => {
1444 let flags = self.mb.flags_concat(
1445 self.get_node_meta_id(inst.left),
1446 self.get_node_meta_id(inst.right),
1447 );
1448
1449 let right_nullability = inst.right.nullability(self);
1450 let left_nullability = inst.left.nullability(self);
1451 let nulls_left = self.get_nulls_id(inst.left);
1452 let nulls_right = self.get_nulls_id(inst.right);
1453 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1454 let mask = right_nullability.and(left_nullability);
1455 nulls = self.mb.nb.and_mask(nulls, mask);
1456
1457 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1458 self.init_metadata(node_id, new_id);
1459 }
1460 Kind::Star => {
1461 let left_nulls = self.get_nulls_id(inst.left);
1462 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1463 let meta_id = self.mb.get_meta_id(Metadata {
1464 flags: self
1465 .mb
1466 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1467 nulls,
1468 });
1469 self.init_metadata(node_id, meta_id);
1470 }
1471 Kind::Compl => {
1472 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1473 let meta_id = self.mb.get_meta_id(Metadata {
1474 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1475 nulls,
1476 });
1477 self.init_metadata(node_id, meta_id);
1478 }
1479 Kind::Lookbehind => {
1480 let mut null = self.get_meta_flags(inst.left).nullability();
1481 let mut contains_flags = self.get_flags_contains(inst.left);
1482 if !inst.right.is_missing() {
1483 null = null.and(self.get_meta_flags(inst.right).nullability());
1484 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1485 }
1486
1487 let left_nulls = self.get_nulls_id(inst.left);
1488 let right_nulls = self.get_nulls_id(inst.right);
1489 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1490 let meta_id = self.mb.get_meta_id(Metadata {
1491 flags: MetaFlags::with_nullability(
1492 null,
1493 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1494 ),
1495 nulls,
1496 });
1497 self.init_metadata(node_id, meta_id);
1498 }
1499 Kind::Lookahead => {
1500 let mut nulls = self.get_nulls_id(inst.left);
1501 let left_nullability = inst.left.nullability(self);
1502 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1503 nulls = self.mb.nb.or_id(nulls, nulls_right);
1504 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1505
1506 let la_inner = inst.left;
1507 let la_tail = inst.right;
1508 let null = self
1509 .get_meta_flags(la_inner)
1510 .nullability()
1511 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1512 let contains_flags = self
1513 .get_flags_contains(la_inner)
1514 .or(self.get_flags_contains(la_tail));
1515
1516 let meta_id = self.mb.get_meta_id(Metadata {
1517 flags: MetaFlags::with_nullability(
1518 null,
1519 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1520 ),
1521 nulls,
1522 });
1523 self.init_metadata(node_id, meta_id);
1524 }
1525 Kind::Counted => {
1526 let best = inst.extra >> 16;
1527 let (null, nulls) = if best > 0 {
1528 let mut ns = BTreeSet::new();
1529 ns.insert(NullState::new(Nullability::CENTER, best));
1530 (Nullability::CENTER, self.mb.nb.get_id(ns))
1531 } else {
1532 (Nullability::NEVER, NullsId::EMPTY)
1533 };
1534 let meta_id = self.mb.get_meta_id(Metadata {
1535 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1536 nulls,
1537 });
1538 self.init_metadata(node_id, meta_id);
1539 }
1540 }
1541
1542 self.array.push(inst);
1543
1544 if let Some(rw) = self.post_init_simplify(node_id) {
1545 self.override_as(node_id, rw)
1546 } else {
1547 node_id
1548 }
1549 }
1550
1551 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1552 match self.get_kind(node_id) {
1553 Kind::Concat => {
1554 let lhs = node_id.left(self);
1555 let rhs = node_id.right(self);
1556 if lhs.is_pred_star(self).is_some() {
1557 if let Some(opttail) = rhs.is_opt_v(self) {
1558 if let Some(true) = self.subsumes(lhs, opttail) {
1559 return Some(lhs);
1561 }
1562 }
1563 }
1564 }
1565 Kind::Union => {
1566 let lhs = node_id.left(self);
1567 let rhs = node_id.right(self);
1568 let mut subsumed = false;
1569 rhs.iter_union_while(self, &mut |b, branch| {
1570 if b.nullable_subsumes(branch, lhs) {
1571 subsumed = true;
1572 }
1573 !subsumed
1574 });
1575 if subsumed {
1576 return Some(rhs);
1577 }
1578 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1579 return Some(rhs);
1580 }
1581 }
1582 _ => {}
1583 }
1584
1585 None
1586 }
1587
1588 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1590 if self.get_kind(lhs) != Kind::Union {
1591 return false; }
1593 let mut rhs_branches = Vec::new();
1594 let mut curr = rhs;
1595 while self.get_kind(curr) == Kind::Union {
1596 rhs_branches.push(self.get_left(curr));
1597 curr = self.get_right(curr);
1598 }
1599 rhs_branches.push(curr);
1600
1601 curr = lhs;
1602 while self.get_kind(curr) == Kind::Union {
1603 if !rhs_branches.contains(&self.get_left(curr)) {
1604 return false;
1605 }
1606 curr = self.get_right(curr);
1607 }
1608 rhs_branches.contains(&curr)
1609 }
1610
1611 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1613 if node == target {
1614 return true;
1615 }
1616 match self.get_kind(node) {
1617 Kind::Union => {
1618 self.nullable_subsumes(self.get_left(node), target)
1619 || self.nullable_subsumes(self.get_right(node), target)
1620 }
1621 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1622 self.nullable_subsumes(self.get_right(node), target)
1623 }
1624 _ => false,
1625 }
1626 }
1627
1628 pub fn num_nodes(&self) -> u32 {
1629 self.num_created
1630 }
1631 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1632 match self.index.get(&inst) {
1633 Some(&id) => id,
1634 None => self.init(inst),
1635 }
1636 }
1637 #[inline]
1638 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1639 self.index.get(inst)
1640 }
1641
1642 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1643 self.index.insert(key, subsumed);
1644 subsumed
1645 }
1646
1647 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1648 let key = &self.array[key.0 as usize];
1649 let entry = self.index.get_mut(key).unwrap();
1650 *entry = subsumed;
1651 subsumed
1652 }
1653
1654 #[inline]
1655 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1656 self.array[node_id.0 as usize].left
1657 }
1658
1659 #[inline]
1660 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1661 self.array[node_id.0 as usize].right
1662 }
1663
1664 #[inline]
1665 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1666 self.array[node_id.0 as usize].extra
1667 }
1668
1669 #[inline]
1670 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1671 debug_assert!(self.get_kind(node_id) == Kind::Concat);
1672 let mut curr = node_id;
1673 while self.get_kind(curr) == Kind::Concat {
1674 curr = curr.right(self);
1675 }
1676 curr
1677 }
1678
1679 fn has_trailing_la(&self, node: NodeId) -> bool {
1680 let end = match self.get_kind(node) {
1681 Kind::Concat => self.get_concat_end(node),
1682 Kind::Lookahead => node,
1683 _ => return false,
1684 };
1685 self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1686 }
1687
1688 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1689 if self.get_kind(node) == Kind::Lookahead {
1690 return (NodeId::EPS, node);
1691 }
1692 debug_assert!(self.get_kind(node) == Kind::Concat);
1693 let right = node.right(self);
1694 if self.get_kind(right) != Kind::Concat {
1695 return (node.left(self), right);
1696 }
1697 let (stripped, la) = self.strip_trailing_la(right);
1698 (self.mk_concat(node.left(self), stripped), la)
1699 }
1700 #[inline]
1701 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1702 debug_assert!(matches!(
1703 self.get_kind(lookahead_node_id),
1704 Kind::Lookahead | Kind::Counted
1705 ));
1706 lookahead_node_id.left(self)
1707 }
1708 #[inline]
1709 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1710 debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1711 lookahead_node_id.right(self)
1712 }
1713 #[inline]
1714 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1715 debug_assert!(
1716 matches!(
1717 self.get_kind(lookahead_node_id),
1718 Kind::Lookahead | Kind::Counted
1719 ),
1720 "not lookahead/counted: {:?}",
1721 self.pp(lookahead_node_id)
1722 );
1723 self.get_extra(lookahead_node_id)
1724 }
1725 #[inline]
1726 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1727 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1728 lookbehind_node_id.left(self)
1729 }
1730 #[inline]
1731 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1732 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1733 lookbehind_node_id.right(self)
1734 }
1735
1736 #[inline]
1737 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1738 debug_assert!(
1739 self.array.len() > node_id.0 as usize,
1740 "array len: {:?}",
1741 node_id
1742 );
1743 self.array[node_id.0 as usize].kind
1744 }
1745
1746 #[inline]
1747 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1748 &self.array[node_id.0 as usize]
1749 }
1750
1751 #[inline]
1752 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1753 self.metadata[node_id.0 as usize]
1754 }
1755
1756 #[inline]
1757 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1758 debug_assert!(node_id.0 != u32::MAX);
1759 let meta_id = self.get_node_meta_id(node_id);
1760 debug_assert!(meta_id != MetadataId::MISSING);
1761 &self.mb.array[meta_id.0 as usize]
1762 }
1763
1764 #[inline]
1765 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1766 if node_id == NodeId::MISSING {
1767 NullsId::EMPTY
1768 } else {
1769 self.get_meta(node_id).nulls
1770 }
1771 }
1772
1773 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1774 self.mb
1775 .nb
1776 .array
1777 .iter()
1778 .map(|set| set.iter().cloned().collect())
1779 .collect()
1780 }
1781
1782 pub fn nulls_count(&self) -> usize {
1783 self.mb.nb.array.len()
1784 }
1785
1786 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1787 self.mb.nb.array[id as usize].iter().cloned().collect()
1788 }
1789
1790 #[inline]
1791 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1792 if node_id == NodeId::MISSING {
1793 NullsId::EMPTY
1794 } else {
1795 let nulls = self.get_meta(node_id).nulls;
1796 self.mb.nb.and_mask(nulls, mask)
1797 }
1798 }
1799
1800 #[inline]
1801 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1802 let meta_id = self.get_node_meta_id(node_id);
1803 let meta = &self.mb.array[meta_id.0 as usize];
1804 meta.flags
1805 }
1806
1807 #[inline]
1808 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1809 self.get_meta(node_id).flags.nullability()
1810 }
1811
1812 #[inline]
1813 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1814 let meta_id = self.get_node_meta_id(node_id);
1815 let meta = &self.mb.array[meta_id.0 as usize];
1816 meta.flags.all_contains_flags()
1817 }
1818
1819 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1820 if self.get_kind(node_id) == Kind::Concat && node_id.left(self) == NodeId::BEGIN {
1821 return self.strip_lb(node_id.right(self));
1822 }
1823 self.strip_lb_inner(node_id)
1824 }
1825
1826 fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1827 if !self.contains_look(node_id) {
1828 return Ok(node_id);
1829 }
1830 if self.get_kind(node_id) == Kind::Concat
1831 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1832 {
1833 return self.strip_lb_inner(node_id.right(self));
1834 }
1835 if self.get_kind(node_id) == Kind::Inter {
1836 let left = self.strip_lb_inner(node_id.left(self))?;
1837 let right = self.strip_lb_inner(node_id.right(self))?;
1838 return Ok(self.mk_inter(left, right));
1839 }
1840 if self.get_kind(node_id) == Kind::Union {
1841 let left = self.strip_lb_inner(node_id.left(self))?;
1842 let right = self.strip_lb_inner(node_id.right(self))?;
1843 return Ok(self.mk_union(left, right));
1844 }
1845 match self.get_kind(node_id) {
1846 Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1847 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1848 Err(AlgebraError::UnsupportedPattern)
1849 }
1850 _ => Ok(node_id),
1851 }
1852 }
1853
1854 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1856 if !self.contains_anchors(node_id) {
1857 return node_id;
1858 }
1859 match self.get_kind(node_id) {
1860 Kind::Begin => NodeId::BOT,
1861 Kind::Concat => {
1862 let left = self.nonbegins(node_id.left(self));
1863 if left == NodeId::BOT {
1864 return NodeId::BOT;
1865 }
1866 self.mk_concat(left, node_id.right(self))
1867 }
1868 Kind::Union => {
1869 let left = self.nonbegins(node_id.left(self));
1870 let right = self.nonbegins(node_id.right(self));
1871 self.mk_union(left, right)
1872 }
1873 _ => node_id,
1874 }
1875 }
1876
1877 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1878 match self.get_kind(node_id) {
1879 Kind::Concat => {
1880 let head = node_id.left(self);
1881 match self.get_kind(head) {
1882 _ if self.any_nonbegin_nullable(head) => {
1883 self.strip_prefix_safe(node_id.right(self))
1884 }
1885 _ => node_id,
1886 }
1887 }
1888 _ => node_id,
1889 }
1890 }
1891 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1892 match self.get_kind(node_id) {
1893 Kind::Begin => NodeId::BOT,
1894 Kind::Concat => {
1895 let head = self.prune_begin(node_id.left(self));
1896 let tail = self.prune_begin(node_id.right(self));
1897 self.mk_concat(head, tail)
1898 }
1899 Kind::Lookbehind => {
1900 if !node_id.right(self).is_missing() {
1901 return node_id;
1902 }
1903 let head = self.prune_begin(node_id.left(self));
1904 head
1905 }
1906 Kind::Union => {
1907 let left = self.prune_begin(node_id.left(self));
1908 let right = self.prune_begin(node_id.right(self));
1909 self.mk_union(left, right)
1910 }
1911 _ => node_id,
1912 }
1913 }
1914
1915 pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1916 if !self.contains_look(node_id) {
1917 return Ok(node_id);
1918 }
1919 if self.get_kind(node_id) == Kind::Concat
1920 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1921 {
1922 let left = node_id.left(self);
1923 let ll = left.left(self).missing_to_eps();
1924 let lr = left.right(self).missing_to_eps();
1925 let new_l = self.mk_concat(ll, lr);
1926 let new = self.mk_concat(new_l, node_id.right(self));
1927 return Ok(new);
1928 }
1929 if self.get_kind(node_id) == Kind::Inter {
1930 let left = self.normalize_rev(node_id.left(self))?;
1931 let right = self.normalize_rev(node_id.right(self))?;
1932 return Ok(self.mk_inter(left, right));
1933 }
1934 if self.get_kind(node_id) == Kind::Union {
1935 let left = self.normalize_rev(node_id.left(self))?;
1936 let right = self.normalize_rev(node_id.right(self))?;
1937 return Ok(self.mk_union(left, right));
1938 }
1939 match self.get_kind(node_id) {
1940 Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1941 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1942 Err(AlgebraError::UnsupportedPattern)
1943 }
1944 _ => Ok(node_id),
1945 }
1946 }
1947
1948 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1949 debug_assert!(node_id != NodeId::MISSING);
1950 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1951 if *rev != NodeId::MISSING {
1952 return Ok(*rev);
1953 }
1954 }
1955 let rw = match self.get_kind(node_id) {
1956 Kind::End => NodeId::BEGIN,
1957 Kind::Begin => NodeId::END,
1958 Kind::Pred => node_id,
1959 Kind::Concat => {
1960 let left = self.reverse(node_id.left(self))?;
1961 let right = self.reverse(node_id.right(self))?;
1962 self.mk_concat(right, left)
1963 }
1964 Kind::Union => {
1965 let left = self.reverse(node_id.left(self))?;
1966 let right = self.reverse(node_id.right(self))?;
1967 self.mk_union(left, right)
1968 }
1969 Kind::Inter => {
1970 let left = self.reverse(node_id.left(self))?;
1971 let right = self.reverse(node_id.right(self))?;
1972 self.mk_inter(left, right)
1973 }
1974 Kind::Star => {
1975 let body = self.reverse(node_id.left(self))?;
1976 self.mk_star(body)
1977 }
1978 Kind::Compl => {
1979 if self.contains_look(node_id.left(self)) {
1980 return Err(AlgebraError::UnsupportedPattern);
1981 }
1982 let body = self.reverse(node_id.left(self))?;
1983 self.mk_compl(body)
1984 }
1985 Kind::Lookbehind => {
1986 let prev = self.get_lookbehind_prev(node_id);
1987 let inner_id = self.get_lookbehind_inner(node_id);
1988 let rev_inner = self.reverse(inner_id)?;
1989 let rev_prev = if prev != NodeId::MISSING {
1990 self.reverse(prev)?
1991 } else {
1992 NodeId::MISSING
1993 };
1994 self.mk_lookahead(rev_inner, rev_prev, 0)
1995 }
1996 Kind::Lookahead => {
1997 let rel = self.get_lookahead_rel(node_id);
1998 if rel == u32::MAX {
1999 let lbody = self.get_lookahead_inner(node_id);
2001 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2002 let lbody_ts = self.mk_concat(lbody, NodeId::TS);
2003 let ltail_ts = self.mk_concat(ltail, NodeId::TS);
2004 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2005 let rev = self.reverse(as_inter)?;
2006 self.init_reversed(node_id, rev);
2007 return Ok(rev);
2008 }
2009 if rel != 0 {
2010 return Err(AlgebraError::UnsupportedPattern);
2011 }
2012 let tail_node = self.get_lookahead_tail(node_id);
2013 let rev_tail = if tail_node != NodeId::MISSING {
2014 self.reverse(tail_node)?
2015 } else {
2016 NodeId::MISSING
2017 };
2018 let inner_id = self.get_lookahead_inner(node_id);
2019 let rev_inner = self.reverse(inner_id)?;
2020 self.mk_lookbehind(rev_inner, rev_tail)
2021 }
2022 Kind::Counted => {
2023 return Err(AlgebraError::UnsupportedPattern);
2024 }
2025 };
2026 self.init_reversed(node_id, rw);
2027 Ok(rw)
2028 }
2029
2030 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2031 let node = NodeKey {
2032 kind: Kind::Pred,
2033 left: NodeId::MISSING,
2034 right: NodeId::MISSING,
2035 extra: pred.0,
2036 };
2037 self.get_node_id(node)
2038 }
2039
2040 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2041 let key = NodeKey {
2042 kind: Kind::Compl,
2043 left: body,
2044 right: NodeId::MISSING,
2045 extra: u32::MAX,
2046 };
2047 if let Some(id) = self.key_is_created(&key) {
2048 return *id;
2049 }
2050 if body == NodeId::BOT {
2051 return NodeId::TS;
2052 }
2053 if body == NodeId::TS {
2054 return NodeId::BOT;
2055 }
2056
2057 if let Some(contains_body) = body.is_contains(self) {
2058 if contains_body.is_pred(self) {
2059 let pred = contains_body.pred_tset(self);
2060 let notpred = self.mk_pred_not(pred);
2061 let node = self.mk_star(notpred);
2062 return self.init_as(key, node);
2063 } else if contains_body == NodeId::END {
2064 return self.init_as(key, NodeId::BOT);
2065 }
2066 };
2067
2068 if self.get_kind(body) == Kind::Compl {
2069 return self.get_node(body).left;
2070 }
2071
2072 self.get_node_id(key)
2073 }
2074
2075 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2076 let nid = self.get_nulls_id(body);
2077 let nref = self.mb.nb.get_set_ref(nid).clone();
2078 let mut futures = NodeId::BOT;
2079 for n in nref.iter() {
2080 if !n.is_mask_nullable(mask) {
2081 continue;
2082 }
2083
2084 let eff = if n.rel == 0 {
2085 NodeId::EPS
2086 } else {
2087 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2088 };
2089 futures = self.mk_union(futures, eff)
2090 }
2091 futures
2092 }
2093
2094 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2095 if cfg!(feature = "norewrite") {
2096 return None;
2097 }
2098
2099 if self.get_kind(tail) == Kind::Lookbehind {
2100 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2101 return self
2102 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2103 .ok();
2104 }
2105 if self.get_kind(head) == Kind::Lookahead {
2106 let la_tail = self.get_lookahead_tail(head);
2107 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2108 if new_la_tail.is_center_nullable(self) {
2109 let non_null_tail = self.mk_non_nullable_safe(new_la_tail);
2110 if non_null_tail == NodeId::BOT {
2111 return None;
2112 }
2113 let la_body = self.get_lookahead_inner(head);
2114 return Some(self.mk_lookahead_internal(la_body, non_null_tail, u32::MAX));
2115 }
2116 let la_body = self.get_lookahead_inner(head);
2117 let la_rel = self.get_lookahead_rel(head);
2118 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2119 let tail_rel = self.get_lookahead_rel(new_la_tail);
2120 tail_rel + la_rel
2121 } else {
2122 u32::MAX
2123 };
2124
2125 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2126 }
2127
2128 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2129 return Some(head);
2130 }
2131 if head == NodeId::TS && tail == NodeId::END {
2132 return Some(head);
2133 }
2134
2135 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2136 return Some(NodeId::TS);
2137 }
2138
2139 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2140 return Some(NodeId::TS);
2141 }
2142
2143 if self.get_kind(tail) == Kind::Union && head == NodeId::TS {
2144 let mut should_distribute_top = false;
2145 self.iter_unions(tail, |v| {
2146 if v == NodeId::BEGIN || self.starts_with_ts(v) {
2147 should_distribute_top = true;
2148 }
2149 });
2150 if should_distribute_top {
2151 let mut new_union = NodeId::BOT;
2152 let mut curr = tail;
2153 while self.get_kind(curr) == Kind::Union {
2154 let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2155 new_union = self.mk_union(new_node, new_union);
2156 curr = curr.right(self);
2157 }
2158 let new_node = self.mk_concat(NodeId::TS, curr);
2159 new_union = self.mk_union(new_union, new_node);
2160 return Some(new_union);
2161 }
2162 }
2163
2164 if self.get_kind(head) == Kind::Union && head == NodeId::TS {
2165 let mut should_distribute_top = false;
2166 self.iter_unions(head, |v| {
2167 if v == NodeId::END || self.starts_with_ts(v) {
2168 should_distribute_top = true;
2169 }
2170 });
2171 if should_distribute_top {
2172 let mut new_union = NodeId::BOT;
2173 let mut curr = head;
2174 while self.get_kind(curr) == Kind::Union {
2175 let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2176 new_union = self.mk_union(new_node, new_union);
2177 curr = curr.right(self);
2178 }
2179 let new_node = self.mk_concat(curr, NodeId::TS);
2180 new_union = self.mk_union(new_union, new_node);
2181 return Some(new_union);
2182 }
2183 }
2184
2185 if self.get_kind(head) == Kind::Inter && tail == NodeId::TS {
2186 let mut alltopstar = true;
2187 iter_inter!(self, head, |v| {
2188 alltopstar = self.ends_with_ts(v);
2189 });
2190 if alltopstar {
2191 return Some(head);
2192 }
2193 }
2194
2195 if head.is_star(self) && head == tail {
2196 return Some(head);
2197 }
2198
2199 None
2200 }
2201
2202 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2203 use Kind::*;
2204
2205 if cfg!(feature = "norewrite") {
2206 return None;
2207 }
2208 if left == right {
2209 return Some(left);
2210 }
2211
2212 if right.is_kind(self, Kind::Union) && left == right.left(self) {
2213 return Some(right);
2214 }
2215
2216 if self.get_kind(left) == Kind::Pred && self.get_kind(right) == Kind::Pred {
2217 let l = left.pred_tset(self);
2218 let r = right.pred_tset(self);
2219 let solver = self.solver();
2220 let psi = solver.or_id(l, r);
2221 let rewrite = self.mk_pred(psi);
2222 return Some(rewrite);
2223 }
2224
2225 if left == NodeId::EPS
2226 && self.get_extra(left) == 0
2227 && self.nullability(right) == Nullability::ALWAYS
2228 {
2229 return Some(right);
2230 }
2231
2232 if self.get_kind(left) == Kind::Lookahead && self.get_kind(right) == Kind::Lookahead {
2233 let lb = left.left(self);
2234 let lt = left.right(self);
2235 let lrel = left.extra(self);
2236
2237 let rb = right.left(self);
2238 let rt = right.right(self);
2239 let rrel = right.extra(self);
2240
2241 if lrel == rrel && lt.is_missing() && rt.is_missing() {
2242 let unioned = self.mk_union(lb, rb);
2243 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2244 return Some(node);
2245 }
2246 }
2247
2248 if right.is_kind(self, Concat) {
2249 if left == NodeId::END
2250 && right.left(self) == NodeId::END
2251 && self.nullability(right).has(Nullability::END)
2252 {
2253 return Some(right);
2254 }
2255 if left == right.left(self) {
2257 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2258 let rw = self.mk_concat(left, rhs);
2259 return Some(rw);
2260 }
2261 if left.is_kind(self, Concat) {
2262 let head1 = left.left(self);
2263 let head2 = right.left(self);
2264
2265 if head1 == head2 {
2266 let t1 = left.right(self);
2267 let t2 = right.right(self);
2268 if head1 == NodeId::TS {
2270 if t2.has_concat_tail(self, t1) {
2271 return Some(left);
2272 }
2273 if t1.has_concat_tail(self, t2) {
2274 return Some(right);
2275 }
2276 }
2277 let un = self.mk_union(t1, t2);
2278 return Some(self.mk_concat(left.left(self), un));
2279 }
2280
2281 if false {
2284 let end1 = self.get_concat_end(left);
2285 let end2 = self.get_concat_end(right);
2286 if end1 == end2 {
2287 let flags = left.flags_contains(self).or(right.flags_contains(self));
2288 if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2289 let rev1 = self.reverse(left).unwrap();
2290 let rev2 = self.reverse(right).unwrap();
2291
2292 let union_rev = self.mk_union(rev1, rev2);
2293 return Some(self.reverse(union_rev).unwrap());
2294 }
2295 }
2296 }
2297 }
2298 if left.is_pred(self) && left == right.left(self) {
2299 let un = self.mk_opt(right.right(self));
2300 let conc = self.mk_concat(left, un);
2301 return Some(conc);
2302 }
2303 }
2304
2305 if left == NodeId::EPS && right.is_plus(self) {
2306 let result = self.mk_star(right.left(self));
2307 return Some(result);
2308 }
2309
2310 if self.flags == BuilderFlags::SUBSUME {
2313 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2314 return Some(rw);
2315 }
2316 }
2317
2318 None
2319 }
2320
2321 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2322 let mut curr = node;
2323 while self.get_kind(curr) == Kind::Inter {
2324 out.push(self.get_left(curr));
2325 curr = self.get_right(curr);
2326 }
2327 out.push(curr);
2328 }
2329
2330 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2331 let mut curr = node;
2332 let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2333 if has_prefix {
2334 curr = self.get_right(curr);
2335 }
2336 let mut count = 0u32;
2337 let mut pred_set = None;
2338 while self.get_kind(curr) == Kind::Concat {
2339 let head = self.get_left(curr);
2340 if self.get_kind(head) != Kind::Pred {
2341 return None;
2342 }
2343 let ps = head.pred_tset(self);
2344 match pred_set {
2345 None => pred_set = Some(ps),
2346 Some(existing) => {
2347 if existing != ps {
2348 return None;
2349 }
2350 }
2351 }
2352 curr = self.get_right(curr);
2353 count += 1;
2354 }
2355 if count == 0 || self.get_kind(curr) != Kind::Star {
2356 return None;
2357 }
2358 Some((has_prefix, pred_set.unwrap(), curr, count))
2359 }
2360
2361 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2362 let mut j = 0;
2363 for &s in sub {
2364 while j < sup.len() && sup[j] < s {
2365 j += 1;
2366 }
2367 if j >= sup.len() || sup[j] != s {
2368 return false;
2369 }
2370 j += 1;
2371 }
2372 true
2373 }
2374
2375 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2376 if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2377 return None;
2378 }
2379
2380 let mut lc: Vec<NodeId> = Vec::new();
2381 let mut rc: Vec<NodeId> = Vec::new();
2382 self.collect_inter_components(left, &mut lc);
2383 self.collect_inter_components(right, &mut rc);
2384
2385 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2387 return Some(left);
2388 }
2389 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2391 return Some(right);
2392 }
2393
2394 if lc.len() == rc.len() {
2395 let mut diff_idx = None;
2396 for i in 0..lc.len() {
2397 if lc[i] != rc[i] {
2398 if diff_idx.is_some() {
2399 return None;
2400 }
2401 diff_idx = Some(i);
2402 }
2403 }
2404 if let Some(idx) = diff_idx {
2405 let a = lc[idx];
2406 let b = rc[idx];
2407 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2408 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2409 {
2410 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2411 return if ca < cb { Some(left) } else { Some(right) };
2412 }
2413 }
2414 }
2415 }
2416
2417 None
2418 }
2419
2420 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2421 if cfg!(feature = "norewrite") {
2422 return None;
2423 }
2424 if left == right {
2425 return Some(left);
2426 }
2427
2428 if self.get_kind(right) == Kind::Union {
2429 let mut result = NodeId::BOT;
2430 self.iter_unions_b(
2431 right,
2432 &mut (|b, v| {
2433 let new_inter = b.mk_inter(v, left);
2434 result = b.mk_union(result, new_inter);
2435 }),
2436 );
2437 return Some(result);
2438 }
2439 if self.get_kind(left) == Kind::Union {
2440 let mut result = NodeId::BOT;
2441 self.iter_unions_b(
2442 left,
2443 &mut (|b, v| {
2444 let new_inter = b.mk_inter(v, right);
2445 result = b.mk_union(result, new_inter);
2446 }),
2447 );
2448 return Some(result);
2449 }
2450
2451 if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2452 return Some(NodeId::BOT);
2453 }
2454
2455 if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2456 let bodies = self.mk_union(left.left(self), right.left(self));
2457 return Some(self.mk_compl(bodies));
2458 }
2459
2460 if left == NodeId::TOPPLUS {
2461 if right.is_pred_star(self).is_some() {
2462 let newloop = self.mk_plus(right.left(self));
2463 return Some(newloop);
2464 }
2465 if right.is_never_nullable(self) {
2466 return Some(right);
2467 }
2468 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2469 return Some(NodeId::BOT);
2470 }
2471 if right.is_kind(self, Kind::Concat) {}
2472 }
2473
2474 {
2475 let l_is_la = left.is_lookahead(self);
2476 let r_is_la = right.is_lookahead(self);
2477 let l_is_cla = !l_is_la
2478 && self.get_kind(left) == Kind::Concat
2479 && self.get_kind(left.left(self)) == Kind::Lookahead;
2480 let r_is_cla = !r_is_la
2481 && self.get_kind(right) == Kind::Concat
2482 && self.get_kind(right.left(self)) == Kind::Lookahead;
2483 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2484 let (la_node, other, concat_body) = if r_is_la {
2485 (right, left, NodeId::MISSING)
2486 } else if l_is_la {
2487 (left, right, NodeId::MISSING)
2488 } else if r_is_cla {
2489 (right.left(self), left, right.right(self))
2490 } else {
2491 (left.left(self), right, left.right(self))
2492 };
2493 let la_body = la_node.left(self);
2494 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2495 let inter_right = if concat_body.is_missing() {
2496 la_tail
2497 } else {
2498 self.mk_concat(la_tail, concat_body)
2499 };
2500 let new_body = self.mk_inter(other, inter_right);
2501 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2502 return Some(self.mk_concat(la, new_body));
2503 }
2504 }
2505
2506 if self.get_kind(right) == Kind::Compl {
2507 let compl_body = right.left(self);
2508 if left == compl_body {
2509 return Some(NodeId::BOT);
2510 }
2511 if self.get_kind(compl_body) == Kind::Concat {
2512 let compl_head = compl_body.left(self);
2513 if compl_body.right(self) == NodeId::TS && compl_head == left {
2514 return Some(NodeId::BOT);
2515 }
2516 }
2517 }
2518
2519 if let Some(pleft) = left.is_pred_star(self) {
2520 if let Some(pright) = right.is_pred_star(self) {
2521 let merged = self.mk_inter(pleft, pright);
2522 return Some(self.mk_star(merged));
2523 }
2524 }
2525
2526 {
2527 let l_is_clb = self.get_kind(left) == Kind::Concat
2528 && self.get_kind(left.left(self)) == Kind::Lookbehind;
2529 let r_is_clb = self.get_kind(right) == Kind::Concat
2530 && self.get_kind(right.left(self)) == Kind::Lookbehind;
2531 if l_is_clb || r_is_clb {
2532 let (lb, body) = if l_is_clb && r_is_clb {
2533 let lb1 = left.left(self);
2534 let lb2 = right.left(self);
2535 let inner = self.mk_inter(
2536 self.get_lookbehind_inner(lb1),
2537 self.get_lookbehind_inner(lb2),
2538 );
2539 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2540 let body = self.mk_inter(left.right(self), right.right(self));
2541 (lb, body)
2542 } else if l_is_clb {
2543 let lb = left.left(self);
2544 let body = self.mk_inter(left.right(self), right);
2545 (lb, body)
2546 } else {
2547 let lb = right.left(self);
2548 let body = self.mk_inter(left, right.right(self));
2549 (lb, body)
2550 };
2551 return Some(self.mk_concat(lb, body));
2552 }
2553 }
2554
2555 {
2556 let l_has_la = self.has_trailing_la(left);
2557 let r_has_la = self.has_trailing_la(right);
2558 if l_has_la || r_has_la {
2559 let (body, la) = if l_has_la && r_has_la {
2560 let (lbody, l_la) = self.strip_trailing_la(left);
2561 let (rbody, r_la) = self.strip_trailing_la(right);
2562 let inner = self.mk_inter(
2563 self.get_lookahead_inner(l_la),
2564 self.get_lookahead_inner(r_la),
2565 );
2566 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2567 let body = self.mk_inter(lbody, rbody);
2568 (body, la)
2569 } else if l_has_la {
2570 let (lbody, la) = self.strip_trailing_la(left);
2571 let body = self.mk_inter(lbody, right);
2572 (body, la)
2573 } else {
2574 let (rbody, la) = self.strip_trailing_la(right);
2575 let body = self.mk_inter(left, rbody);
2576 (body, la)
2577 };
2578 return Some(self.mk_concat(body, la));
2579 }
2580 }
2581
2582 None
2583 }
2584
2585 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2586 if cfg!(feature = "norewrite") {
2587 return None;
2588 }
2589 debug_assert!(self.get_kind(right_union) == Kind::Union);
2590
2591 let mut rewritten = None;
2592 right_union.iter_union_while(
2593 self,
2594 &mut (|b, v| {
2595 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2596 rewritten = Some((v, rw));
2597 false
2598 } else {
2599 true
2600 }
2601 }),
2602 );
2603
2604 if let Some(rw) = rewritten {
2605 let mut new_union = NodeId::BOT;
2606 right_union.iter_union(
2607 self,
2608 &mut (|b, v| {
2609 if v == rw.0 {
2610 new_union = b.mk_union(rw.1, new_union)
2611 } else {
2612 new_union = b.mk_union(v, new_union)
2613 }
2614 }),
2615 );
2616 return Some(new_union);
2617 };
2618
2619 None
2620 }
2621
2622 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2623 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2624 debug_assert!(tail != NodeId::MISSING);
2625 let key = NodeKey {
2626 kind: Kind::Concat,
2627 left: head,
2628 right: tail,
2629 extra: u32::MAX,
2630 };
2631 if let Some(id) = self.key_is_created(&key) {
2632 return *id;
2633 }
2634
2635 if head == NodeId::BOT || tail == NodeId::BOT {
2636 return NodeId::BOT;
2637 }
2638 if head == NodeId::EPS {
2639 return tail;
2640 }
2641 if tail == NodeId::EPS {
2642 return head;
2643 }
2644
2645 match tail {
2646 NodeId::BEGIN => {
2648 if !self.is_nullable(head, Nullability::BEGIN) {
2649 return NodeId::BOT;
2650 } else {
2651 return NodeId::BEGIN;
2652 }
2653 }
2654 _ => {}
2655 }
2656
2657 if head.is_kind(self, Kind::Concat) {
2659 let left = head.left(self);
2660 let newright = self.mk_concat(head.right(self), tail);
2661 let updated = self.mk_concat(left, newright);
2662 return self.init_as(key, updated);
2663 }
2664
2665 if head == NodeId::TS && tail == NodeId::END {
2666 return NodeId::TS;
2667 }
2668
2669 if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2670 return NodeId::BOT;
2671 }
2672
2673 if self.get_kind(tail) == Kind::Concat {
2674 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2675 let upd = self.mk_concat(rw, tail.right(self));
2676 return self.init_as(key, upd);
2677 }
2678 }
2679
2680 if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2681 return self.init_as(key, new);
2682 }
2683
2684 match (self.get_kind(head), self.get_kind(tail)) {
2685 (Kind::Star, Kind::Concat) if head.is_star(self) => {
2687 let rl = tail.left(self);
2688 if head == rl {
2689 return self.init_as(key, tail);
2690 }
2691 }
2692 (_, Kind::Concat) => {
2694 let curr = head;
2695 let h2 = self.mk_concat(curr, tail.left(self));
2696 let tr = tail.right(self);
2697 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2698 return self.init_as(key, new);
2699 }
2700 }
2701 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2702 return self.init_as(key, tail);
2703 }
2704 _ => {}
2705 }
2706
2707 self.init(key)
2708 }
2709
2710 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2711 let lb_body = {
2713 match self.starts_with_ts(lb_body) {
2714 true => lb_body,
2715 false => self.mk_concat(NodeId::TS, lb_body),
2716 }
2717 };
2718 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2720 }
2721
2722 fn mk_lookbehind_internal(
2723 &mut self,
2724 lb_body: NodeId,
2725 lb_prev: NodeId,
2726 ) -> Result<NodeId, AlgebraError> {
2727 debug_assert!(lb_body != NodeId::MISSING);
2728 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2729 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2730 return Ok(NodeId::BOT);
2731 }
2732 if lb_body == NodeId::TS {
2733 return Ok(lb_prev);
2734 }
2735 if lb_body == NodeId::EPS {
2736 match lb_prev {
2737 NodeId::MISSING => return Ok(NodeId::EPS),
2738 NodeId::EPS => return Ok(NodeId::EPS),
2739 _ => return Ok(lb_prev),
2740 }
2741 }
2742
2743 let key = NodeKey {
2744 kind: Kind::Lookbehind,
2745 left: lb_body,
2746 right: lb_prev,
2747 extra: u32::MAX,
2748 };
2749 match self.key_is_created(&key) {
2750 Some(id) => Ok(*id),
2751 None => {
2752 if lb_prev == NodeId::TS {
2753 return Ok(self.mk_concat(lb_prev, lb_body));
2754 }
2755
2756 Ok(self.init(key))
2757 }
2758 }
2759 }
2760
2761 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2762 let la_body = {
2764 match self.ends_with_ts(la_body) {
2765 true => la_body,
2766 false => self.mk_concat(la_body, NodeId::TS),
2767 }
2768 };
2769 let rel = if NodeId::MISSING == la_tail {
2770 rel
2771 } else {
2772 match la_tail.is_center_nullable(self) {
2773 false => u32::MAX,
2774 true => rel,
2775 }
2776 };
2777
2778 self.mk_lookahead_internal(la_body, la_tail, rel)
2779 }
2780
2781 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2783 let key = NodeKey {
2784 kind: Kind::Lookahead,
2785 left: la_body,
2786 right: la_tail,
2787 extra: rel,
2788 };
2789 if let Some(id) = self.key_is_created(&key) {
2790 return *id;
2791 }
2792 if la_body == NodeId::TS {
2793 if rel == 0 {
2794 return la_tail.missing_to_eps();
2795 } else {
2796 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2797 }
2798 }
2799 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2800 return NodeId::BOT;
2801 }
2802 if la_tail.is_missing() && rel == u32::MAX {
2803 return NodeId::BOT;
2804 }
2805
2806 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2807 return la_body;
2808 }
2809
2810 if la_tail == NodeId::TS {
2811 if rel == 0 || rel == u32::MAX {
2812 return self.mk_concat(la_body, NodeId::TS);
2813 } else if rel == u32::MAX {
2814 return self.mk_begins_with(la_body);
2815 }
2816 }
2817
2818 if rel == u32::MAX {
2819 if la_tail.is_missing() {
2820 return NodeId::BOT;
2821 }
2822
2823 if self.is_always_nullable(la_body) {
2824 return la_tail.missing_to_eps();
2825 }
2826
2827 if la_tail != NodeId::MISSING {
2828 match self.get_kind(la_tail) {
2829 _ => {
2830 if la_body.is_compl_plus_end(self) {
2831 let minlen = self.get_min_length_only(la_tail);
2832 if minlen >= 1 {
2833 return NodeId::BOT;
2834 }
2835 }
2836 }
2837 }
2838 }
2839 }
2840
2841 if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2842 let la_body2 = self.get_lookahead_inner(la_tail);
2843 let body1_ts = self.mk_concat(la_body, NodeId::TS);
2844 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2845 let new_la_body = self.mk_inter(body1_ts, body2_ts);
2846 let new_la_rel = self.get_lookahead_rel(la_tail);
2847 let new_la_tail = self.get_lookahead_tail(la_tail);
2848 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2849 }
2850
2851 if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2852 let la_body_right = la_body.right(self);
2853 if self.is_always_nullable(la_body_right) {
2854 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2855 }
2856 if la_body.right(self) == NodeId::END {
2857 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2858 }
2859 let bodyright = la_body.right(self);
2860 if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2861 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2862 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2863 }
2864 }
2865
2866 if la_tail != NodeId::MISSING {
2867 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2868 let lpred = la_body.left(self);
2869 if self.get_kind(lpred) == Kind::Pred {
2870 let l = lpred.pred_tset(self);
2871 let r = la_tail.pred_tset(self);
2872 let psi_and = self.solver().and_id(l, r);
2873 let rewrite = self.mk_pred(psi_and);
2874 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2875 let new_right =
2876 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2877 return self.mk_concat(rewrite, new_right);
2878 }
2879 }
2880 }
2881
2882 self.get_node_id(key)
2883 }
2884
2885 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2886 let has_match = (packed >> 16) > 0;
2887 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2888 return NodeId::BOT;
2889 }
2890 debug_assert!(
2891 body == NodeId::BOT || !self.is_infinite(body),
2892 "Counted body must have finite max length"
2893 );
2894 let chain = self.prune_counted_chain(body, chain);
2895 let key = NodeKey {
2896 kind: Kind::Counted,
2897 left: body,
2898 right: chain,
2899 extra: packed,
2900 };
2901 if let Some(id) = self.key_is_created(&key) {
2902 return *id;
2903 }
2904 self.get_node_id(key)
2905 }
2906
2907 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2908 if chain == NodeId::MISSING || body == NodeId::BOT {
2909 return chain;
2910 }
2911 if self.nullability(body) != Nullability::NEVER {
2912 return NodeId::MISSING;
2913 }
2914 let chain_body = chain.left(self);
2915 if chain_body == NodeId::BOT {
2916 return chain;
2917 }
2918 let not_begins = self.mk_not_begins_with(body);
2919 let inter = self.mk_inter(chain_body, not_begins);
2920 let is_empty = inter == NodeId::BOT;
2921 if is_empty {
2922 self.prune_counted_chain(body, chain.right(self))
2923 } else {
2924 chain
2925 }
2926 }
2927
2928 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2929 let neg_inner = self.mk_concat(body, NodeId::TS);
2930 let neg_part = self.mk_compl(neg_inner);
2931 let conc = self.mk_concat(neg_part, NodeId::END);
2932 self.mk_lookahead(conc, NodeId::MISSING, rel)
2933 }
2934
2935 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2936 match self.get_node(body).kind {
2937 Kind::Pred => {
2938 let psi = body.pred_tset(self);
2939 let negated = self.mk_pred_not(psi);
2940 let union = self.mk_union(NodeId::BEGIN, negated);
2941 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2943 }
2944 _ => {
2945 let uc = crate::unicode_classes::utf8_char(self);
2948 let neg = self.mk_compl(body);
2949 let negated = self.mk_inter(neg, uc);
2950 let union = self.mk_union(NodeId::BEGIN, negated);
2951 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2953 }
2954 }
2955 }
2956
2957 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2958 debug_assert!(left != NodeId::MISSING);
2959 debug_assert!(right != NodeId::MISSING);
2960 if left > right {
2961 return self.mk_union(right, left);
2962 }
2963 let key = NodeKey {
2964 kind: Kind::Union,
2965 left,
2966 right,
2967 extra: u32::MAX,
2968 };
2969 if let Some(id) = self.key_is_created(&key) {
2970 return *id;
2971 }
2972
2973 if left == right {
2974 return left;
2975 }
2976 if left == NodeId::BOT {
2977 return right;
2978 }
2979 if right == NodeId::BOT {
2980 return left;
2981 }
2982 if right == NodeId::TS {
2983 return right;
2984 }
2985 if left == NodeId::TS {
2986 return left;
2987 }
2988
2989 match (self.get_kind(left), self.get_kind(right)) {
2990 (Kind::Union, _) => {
2991 self.iter_unions_b(left, &mut |b, v| {
2992 b.temp_vec.push(v);
2993 });
2994 self.iter_unions_b(right, &mut |b, v| {
2995 b.temp_vec.push(v);
2996 });
2997 self.temp_vec.sort();
2998 let tree = self.temp_vec.clone();
2999 self.temp_vec.clear();
3000 let newnode = tree
3001 .iter()
3002 .rev()
3003 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3004 return self.init_as(key, newnode);
3005 }
3006 (_, Kind::Union) => {
3007 let rleft = right.left(self);
3008 if left > rleft {
3010 self.iter_unions_b(left, &mut |b, v| {
3011 b.temp_vec.push(v);
3012 });
3013 self.iter_unions_b(right, &mut |b, v| {
3014 b.temp_vec.push(v);
3015 });
3016 self.temp_vec.sort();
3017 let tree = self.temp_vec.clone();
3018 self.temp_vec.clear();
3019 let newnode = tree
3020 .iter()
3021 .rev()
3022 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3023 return self.init_as(key, newnode);
3024 } else {
3025 if let Some(rw) = self.attempt_rw_unions(left, right) {
3026 return self.init_as(key, rw);
3027 }
3028 }
3029 }
3030 _ => {}
3031 }
3032
3033 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3034 return self.init_as(key, rw);
3035 }
3036 self.init(key)
3037 }
3038
3039 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3040 debug_assert!(left_id != NodeId::MISSING);
3041 debug_assert!(right_id != NodeId::MISSING);
3042 if left_id == right_id {
3043 return left_id;
3044 }
3045 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3046 return NodeId::BOT;
3047 }
3048 if left_id == NodeId::TS {
3049 return right_id;
3050 }
3051 if right_id == NodeId::TS {
3052 return left_id;
3053 }
3054 if left_id > right_id {
3055 return self.mk_inter(right_id, left_id);
3056 }
3057 let key = NodeKey {
3058 kind: Kind::Inter,
3059 left: left_id,
3060 right: right_id,
3061 extra: u32::MAX,
3062 };
3063 if let Some(id) = self.key_is_created(&key) {
3064 return *id;
3065 }
3066
3067 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3068 return self.init_as(key, rw);
3069 }
3070
3071 self.init(key)
3072 }
3073
3074 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3075 let node = NodeKey {
3076 kind,
3077 left: NodeId::MISSING,
3078 right: NodeId::MISSING,
3079 extra: u32::MAX,
3080 };
3081 self.init(node)
3082 }
3083
3084 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3085 let star = self.mk_star(body_id);
3086 self.mk_concat(body_id, star)
3087 }
3088 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3089 let opt = self.mk_opt(body_id);
3090 let mut nodes1 = vec![];
3091 for _ in lower..upper {
3092 nodes1.push(opt);
3093 }
3094 for _ in 0..lower {
3095 nodes1.push(body_id);
3096 }
3097 self.mk_concats(nodes1.into_iter())
3098 }
3099 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3100 self.mk_union(NodeId::EPS, body_id)
3101 }
3102
3103 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3104 let key = NodeKey {
3105 kind: Kind::Star,
3106 left: body_id,
3107 right: NodeId::MISSING,
3108 extra: 0,
3109 };
3110 if let Some(id) = self.key_is_created(&key) {
3111 return *id;
3112 }
3113 if body_id.is_kind(self, Kind::Star) {
3115 return body_id;
3116 }
3117 self.get_node_id(key)
3118 }
3119
3120 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3123 match self.get_kind(node_id) {
3124 Kind::End => Nullability::EMPTYSTRING,
3125 Kind::Begin => Nullability::EMPTYSTRING,
3126 Kind::Pred => Nullability::NEVER,
3127 Kind::Star => Nullability::ALWAYS,
3128 Kind::Inter | Kind::Concat => {
3129 let lnull = self.nullability_emptystring(node_id.left(self));
3130 let rnull = self.nullability_emptystring(node_id.right(self));
3131 lnull.and(rnull) }
3133 Kind::Union => {
3134 let lnull = self.nullability_emptystring(node_id.left(self));
3135 let rnull = self.nullability_emptystring(node_id.right(self));
3136 lnull.or(rnull)
3137 }
3138 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3139 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3140 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3141 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3142 }
3143 }
3144
3145 #[inline(always)]
3146 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3147 self.get_meta(node_id)
3148 .flags
3149 .nullability()
3150 .has(Nullability::CENTER.or(Nullability::END))
3151 }
3152
3153 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3154 self.get_only_nullability(node_id)
3155 }
3156
3157 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3158 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3159 }
3160
3161 pub fn pp(&self, node_id: NodeId) -> String {
3162 let mut s = String::new();
3163 self.ppw(&mut s, node_id).unwrap();
3164 s
3165 }
3166
3167 #[allow(dead_code)]
3168 pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
3169 let nu = self.get_nulls_id(node_id);
3170 let nr = self.mb.nb.get_set_ref(nu);
3171 let s1 = format!("{:?}", nr);
3172 s1
3173 }
3174
3175 #[allow(dead_code)]
3176 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3177 match self.get_tregex(term_id) {
3178 TRegex::Leaf(node_id) => {
3179 let mut s = String::new();
3180 self.ppw(&mut s, *node_id).unwrap();
3181 s
3182 }
3183 TRegex::ITE(cond, then_id, else_id) => {
3184 format!(
3185 "ITE({},{},{})",
3186 self.solver_ref().pp(*cond),
3187 self.ppt(*then_id),
3188 self.ppt(*else_id)
3189 )
3190 }
3191 }
3192 }
3193
3194 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3195 if cfg!(feature = "graphviz") {
3196 match node_id {
3197 NodeId::MISSING => return write!(s, "MISSING"),
3198 NodeId::BOT => return write!(s, "⊥"),
3199 NodeId::TS => return write!(s, "_*"),
3200 NodeId::TOP => return write!(s, "_"),
3201 NodeId::EPS => return write!(s, "ε"),
3202 _ => {}
3203 }
3204 }
3205
3206 match node_id {
3207 NodeId::MISSING => return write!(s, "MISSING"),
3208 NodeId::BOT => return write!(s, "⊥"),
3209 NodeId::TS => return write!(s, "_*"),
3210 NodeId::TOP => return write!(s, "_"),
3211 NodeId::EPS => return write!(s, ""),
3212 _ => {}
3213 }
3214
3215 match self.get_kind(node_id) {
3216 Kind::End => write!(s, r"\z"),
3217 Kind::Begin => write!(s, r"\A"),
3218 Kind::Pred => {
3219 let psi = node_id.pred_tset(self);
3220 if psi == TSetId::EMPTY {
3221 write!(s, r"⊥")
3222 } else if psi == TSetId::FULL {
3223 write!(s, r"_")
3224 } else {
3225 write!(s, "{}", self.solver_ref().pp(psi))
3226 }
3227 }
3228 Kind::Inter => {
3229 write!(s, "(")?;
3230 self.ppw(s, node_id.left(self))?;
3231 write!(s, "&")?;
3232 let mut curr = node_id.right(self);
3233 while self.get_kind(curr) == Kind::Inter {
3234 let n = curr.left(self);
3235 self.ppw(s, n)?;
3236 write!(s, "&")?;
3237 curr = curr.right(self);
3238 }
3239 self.ppw(s, curr)?;
3240 write!(s, ")")
3241 }
3242 Kind::Union => {
3243 let left = node_id.left(self);
3244 let right = node_id.right(self);
3245 write!(s, "(")?;
3246 self.ppw(s, left)?;
3247 write!(s, "|")?;
3248 let mut curr = right;
3249 while self.get_kind(curr) == Kind::Union {
3250 let n = curr.left(self);
3251 self.ppw(s, n)?;
3252 write!(s, "|")?;
3253 curr = curr.right(self);
3254 }
3255 self.ppw(s, curr)?;
3256 write!(s, ")")
3257 }
3258 Kind::Concat => {
3259 let left = node_id.left(self);
3260 let right = node_id.right(self);
3261 if right.is_star(self) && right.left(self) == left {
3262 self.ppw(s, left)?;
3263 write!(s, "+")?;
3264 return Ok(());
3265 }
3266 if right.is_concat(self) {
3267 let rl = right.left(self);
3268 if rl.is_star(self) && rl.left(self) == left {
3269 self.ppw(s, left)?;
3270 write!(s, "+")?;
3271 return self.ppw(s, right.right(self));
3272 }
3273 }
3274 if right.is_concat(self) && right.left(self) == left {
3275 let mut num = 1;
3276 let mut right = right;
3277 while right.is_concat(self) && right.left(self) == left {
3278 num += 1;
3279 right = right.right(self);
3280 }
3281 if let Some(inner) = left.is_opt_v(self) {
3283 let mut inner_count = 0;
3284 let mut right2 = right;
3285 while right2.is_concat(self) && right2.left(self) == inner {
3286 inner_count += 1;
3287 right2 = right2.right(self);
3288 }
3289 if right2 == inner {
3290 inner_count += 1;
3291 self.ppw(s, inner)?;
3292 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3293 }
3294 if inner_count > 0 {
3295 self.ppw(s, inner)?;
3296 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3297 return self.ppw(s, right2);
3298 }
3299 }
3300 self.ppw(s, left)?;
3301 if right == left {
3302 num += 1;
3303 return write!(s, "{{{}}}", num);
3304 }
3305 if num <= 3 && left.is_pred(self) {
3306 for _ in 1..num {
3307 self.ppw(s, left)?;
3308 }
3309 return self.ppw(s, right);
3310 } else {
3311 write!(s, "{{{}}}", num)?;
3312 return self.ppw(s, right);
3313 }
3314 }
3315 self.ppw(s, left)?;
3316 self.ppw(s, right)
3317 }
3318 Kind::Star => {
3319 let left = node_id.left(self);
3320 let leftkind = self.get_kind(left);
3321 match leftkind {
3322 Kind::Concat | Kind::Star | Kind::Compl => {
3323 write!(s, "(")?;
3324 self.ppw(s, left)?;
3325 write!(s, ")")?;
3326 }
3327 _ => {
3328 self.ppw(s, left)?;
3329 }
3330 };
3331 write!(s, "*")
3332 }
3333 Kind::Compl => {
3334 write!(s, "~(")?;
3335 self.ppw(s, node_id.left(self))?;
3336 write!(s, ")")
3337 }
3338 Kind::Lookbehind => {
3339 let lbleft = self.get_lookbehind_prev(node_id);
3340 let lbinner = self.get_lookbehind_inner(node_id);
3341 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3342 if lbleft != NodeId::MISSING {
3343 write!(s, "「")?;
3344 self.ppw(s, lbleft)?;
3345 write!(s, "」")?;
3346 }
3347
3348 write!(s, "(?<=")?;
3349 self.ppw(s, lbinner)?;
3350 write!(s, ")")
3351 }
3352 Kind::Lookahead => {
3353 let inner = self.get_lookahead_inner(node_id);
3354 write!(s, "(?=")?;
3355 self.ppw(s, inner)?;
3356 write!(s, ")")?;
3357 if self.get_lookahead_rel(node_id) != 0 {
3358 write!(s, "{{")?;
3359 let rel = self.get_lookahead_rel(node_id);
3360 if rel == u32::MAX {
3361 write!(s, "∅")?;
3362 } else {
3363 write!(s, "{}", rel)?;
3364 }
3365 write!(s, "}}")?;
3366 }
3367 if node_id.right(self) == NodeId::MISSING {
3368 Ok(())
3369 } else {
3370 write!(s, "『")?;
3371 self.ppw(s, node_id.right(self))?;
3372 write!(s, "』")
3373 }
3374 }
3375 Kind::Counted => {
3376 let body = node_id.left(self);
3377 let packed = self.get_extra(node_id);
3378 let step = packed & 0xFFFF;
3379 let best = packed >> 16;
3380 write!(s, "#(")?;
3381 self.ppw(s, body)?;
3382 write!(s, ")s{}b{}", step, best)
3383 }
3384 }
3385 }
3386
3387 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3388 self.mk_concat(node, NodeId::TS)
3389 }
3390
3391 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3392 let node_ts = self.mk_concat(node, NodeId::TS);
3393 self.mk_compl(node_ts)
3394 }
3395
3396 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3397 let notset = self.solver().not_id(set);
3398 self.mk_pred(notset)
3399 }
3400
3401 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3402 let set_id = self.solver().u8_to_set_id(char);
3403 self.mk_pred(set_id)
3404 }
3405
3406 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3407 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3408 self.mk_pred(rangeset)
3409 }
3410
3411 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3412 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3413 for &(lo, hi) in &ranges[1..] {
3414 let r = self.mk_range_u8(lo, hi);
3415 node = self.mk_union(node, r);
3416 }
3417 node
3418 }
3419
3420 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3421 let mut prefix = Vec::new();
3422 let mut curr = node;
3423 loop {
3424 if curr == NodeId::EPS {
3425 let full = !prefix.is_empty();
3426 return (prefix, full);
3427 }
3428 if curr == NodeId::BOT {
3429 break;
3430 }
3431 if self.get_kind(curr) == Kind::Pred {
3432 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3433 Some(byte) => {
3434 prefix.push(byte);
3435 return (prefix, true);
3436 }
3437 None => break, }
3439 }
3440 if self.get_kind(curr) != Kind::Concat {
3441 break;
3442 }
3443 let left = curr.left(self);
3444 if self.get_kind(left) != Kind::Pred {
3445 break;
3446 }
3447 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3448 Some(byte) => prefix.push(byte),
3449 None => break,
3450 }
3451 curr = curr.right(self);
3452 }
3453 (prefix, false)
3454 }
3455
3456 #[allow(dead_code)]
3457 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3458 let mut result = NodeId::EPS;
3459 for byte in raw_str.iter().rev() {
3460 let node = self.mk_u8(*byte);
3461 result = self.mk_concat(node, result);
3462 }
3463 result
3464 }
3465
3466 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3467 let mut result = NodeId::EPS;
3468 for byte in raw_str.bytes().rev() {
3469 let node = self.mk_u8(byte);
3470 result = self.mk_concat(node, result);
3471 }
3472 result
3473 }
3474
3475 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3476 let mut sorted: Vec<NodeId> = nodes.collect();
3477 if sorted.len() <= 1 {
3478 return sorted.pop().unwrap_or(NodeId::BOT);
3479 }
3480 sorted.sort();
3481 sorted.dedup();
3482 sorted.retain(|&x| x != NodeId::BOT);
3483 if sorted.is_empty() {
3484 return NodeId::BOT;
3485 }
3486 if sorted.len() > 16 {
3487 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3488 let mut non_concat: Vec<NodeId> = Vec::new();
3489 for &n in &sorted {
3490 if self.get_kind(n) == Kind::Concat {
3491 by_head.entry(self.get_left(n)).or_default().push(n);
3492 } else {
3493 non_concat.push(n);
3494 }
3495 }
3496 let mut absorbed: Vec<NodeId> = Vec::new();
3497 for &n in &non_concat {
3498 if by_head.contains_key(&n) {
3499 absorbed.push(n);
3500 }
3501 }
3502 if !absorbed.is_empty() {
3503 non_concat.retain(|n| !absorbed.contains(n));
3504 }
3505 if by_head.len() < sorted.len() {
3506 let mut groups: Vec<NodeId> = non_concat;
3507 for (head, tails) in by_head {
3508 let mut tail_nodes: Vec<NodeId> =
3509 tails.iter().map(|&n| self.get_right(n)).collect();
3510 if absorbed.contains(&head) {
3511 tail_nodes.push(NodeId::EPS);
3512 }
3513 let tail_union = self.mk_unions(tail_nodes.into_iter());
3514 let factored = self.mk_concat(head, tail_union);
3515 groups.push(factored);
3516 }
3517 groups.sort();
3518 groups.dedup();
3519 return self.mk_unions_balanced(&groups);
3520 }
3521 }
3522 self.mk_unions_balanced(&sorted)
3523 }
3524
3525 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3526 match nodes.len() {
3527 0 => NodeId::BOT,
3528 1 => nodes[0],
3529 n => {
3530 let mid = n / 2;
3531 let left = self.mk_unions_balanced(&nodes[..mid]);
3532 let right = self.mk_unions_balanced(&nodes[mid..]);
3533 self.mk_union(left, right)
3534 }
3535 }
3536 }
3537
3538 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3539 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3540 }
3541
3542 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3543 nodes
3544 .rev()
3545 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3546 }
3547}
3548
3549impl RegexBuilder {
3550 #[allow(dead_code)]
3551 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3552 match self.get_tregex(term_id).clone() {
3553 TRegex::Leaf(node_id) => {
3554 if NodeId::BOT == node_id {
3555 vec![]
3556 } else {
3557 vec![node_id]
3558 }
3559 }
3560 TRegex::ITE(_, then_id, else_id) => {
3561 let mut then_nodes = self.extract_sat(then_id);
3562 let mut else_nodes = self.extract_sat(else_id);
3563 then_nodes.append(&mut else_nodes);
3564 then_nodes
3565 }
3566 }
3567 }
3568
3569 pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3570 debug_assert!(self.get_kind(start) == Kind::Union);
3571 let mut curr = start;
3572 while self.get_kind(curr) == Kind::Union {
3573 f(curr.left(self));
3574 curr = curr.right(self);
3575 }
3576 f(curr);
3577 }
3578
3579 pub(crate) fn iter_unions_b(
3580 &mut self,
3581 curr: NodeId,
3582 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3583 ) {
3584 let mut curr = curr;
3585 while self.get_kind(curr) == Kind::Union {
3586 f(self, curr.left(self));
3587 curr = curr.right(self);
3588 }
3589 f(self, curr);
3590 }
3591
3592 pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3593 if !self.contains_look(node_id) {
3594 return Some(node_id);
3595 }
3596 match self.get_kind(node_id) {
3597 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3598 Kind::Concat => {
3599 let left = node_id.left(self);
3600 let right = node_id.right(self);
3601 let elim_l = self.try_elim_lookarounds(left)?;
3602 let elim_r = self.try_elim_lookarounds(right)?;
3603 let rw = self.mk_concat(elim_l, elim_r);
3604 Some(rw)
3605 }
3606 Kind::Union => {
3607 let left = node_id.left(self);
3608 let right = node_id.right(self);
3609 let elim_l = self.try_elim_lookarounds(left)?;
3610 let elim_r = self.try_elim_lookarounds(right)?;
3611 let rw = self.mk_union(elim_l, elim_r);
3612 Some(rw)
3613 }
3614
3615 Kind::Star => {
3616 let body = node_id.left(self);
3617 let elim_l = self.try_elim_lookarounds(body)?;
3618 Some(self.mk_star(elim_l))
3619 }
3620 Kind::Compl => {
3621 let left = node_id.left(self);
3622 let elim_l = self.try_elim_lookarounds(left)?;
3623 Some(self.mk_compl(elim_l))
3624 }
3625 Kind::Lookahead => {
3626 let rel = self.get_lookahead_rel(node_id);
3627 if rel != 0 {
3628 return None;
3629 }
3630 let lbody = self.get_lookahead_inner(node_id);
3631 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3632 let elim_l = self.try_elim_lookarounds(lbody)?;
3633 let elim_r = self.try_elim_lookarounds(ltail)?;
3634 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3635 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3636 let rw = self.mk_inter(lbody_ts, ltail_ts);
3637 Some(rw)
3638 }
3639 Kind::Lookbehind => {
3640 let linner = self.get_lookbehind_inner(node_id);
3641 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3642 let elim_l = self.try_elim_lookarounds(linner)?;
3643 let elim_r = self.try_elim_lookarounds(lprev)?;
3644 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3645 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3646 let rw = self.mk_inter(lbody_ts, ltail_ts);
3647 Some(rw)
3648 }
3649 Kind::Inter => {
3650 let left = node_id.left(self);
3651 let right = node_id.right(self);
3652 let elim_l = self.try_elim_lookarounds(left)?;
3653 let elim_r = self.try_elim_lookarounds(right)?;
3654 let rw = self.mk_inter(elim_l, elim_r);
3655 Some(rw)
3656 }
3657 Kind::Counted => None,
3658 }
3659 }
3660
3661 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3663 if self.nullability(node) == Nullability::NEVER {
3664 node
3665 } else {
3666 self.mk_inter(NodeId::TOPPLUS, node)
3667 }
3668 }
3669
3670 fn iter_find_stack(
3671 &self,
3672 stack: &mut Vec<TRegexId>,
3673 mut f: impl FnMut(NodeId) -> bool,
3674 ) -> bool {
3675 loop {
3676 match stack.pop() {
3677 None => return false,
3678 Some(curr) => match self.get_tregex(curr) {
3679 TRegex::Leaf(n) => {
3680 let mut curr = *n;
3681 while curr != NodeId::BOT {
3682 match self.get_kind(curr) {
3683 Kind::Union => {
3684 if f(curr.left(self)) {
3685 return true;
3686 }
3687 curr = curr.right(self);
3688 }
3689 _ => {
3690 if f(*n) {
3691 return true;
3692 }
3693 curr = NodeId::BOT;
3694 }
3695 }
3696 }
3697 }
3698 TRegex::ITE(_, then_id, else_id) => {
3699 if *else_id != TRegexId::BOT {
3700 stack.push(*else_id);
3701 }
3702 stack.push(*then_id);
3703 }
3704 },
3705 }
3706 }
3707 }
3708
3709 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3710 if node == NodeId::BOT {
3711 return Some(true);
3712 }
3713 if self.nullability(node) != Nullability::NEVER {
3714 return Some(false);
3715 }
3716 if let Some(cached) = self.cache_empty.get(&node) {
3717 if cached.is_checked() {
3718 return Some(cached.is_empty());
3719 }
3720 }
3721 let node = if !self.contains_look(node) {
3722 node
3723 } else {
3724 self.try_elim_lookarounds(node)?
3725 };
3726 let isempty_flag = self.is_empty_lang_internal(node);
3727
3728 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3729 }
3730
3731 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3732 if !self.get_meta_flags(initial_node).contains_inter() {
3734 return Ok(NodeFlags::ZERO);
3735 }
3736
3737 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3738 let mut worklist: VecDeque<NodeId> = VecDeque::new();
3739 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3740 let mut stack = Vec::new();
3741 stack.push(begin_der);
3742 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3743 visited.insert(node, initial_node);
3744 let nullability = self.nullability(node);
3745 if nullability != Nullability::NEVER {
3746 true
3747 } else {
3748 worklist.push_back(node);
3749 false
3750 }
3751 });
3752 if found_nullable_right_away {
3753 return Ok(NodeFlags::ZERO);
3754 }
3755
3756 worklist.push_back(initial_node);
3757 let isempty_flag: NodeFlags;
3758 let mut found_node = NodeId::BOT;
3759
3760 loop {
3761 match worklist.pop_front() {
3762 None => {
3763 isempty_flag = NodeFlags::IS_EMPTY;
3764 break;
3765 }
3766 Some(outer) => {
3767 if let Some(cached) = self.cache_empty.get(&outer) {
3768 if cached.is_checked() {
3769 if cached.is_empty() {
3770 continue;
3771 } else {
3772 return Ok(NodeFlags::ZERO);
3773 }
3774 }
3775 }
3776
3777 let derivative = self.der(outer, Nullability::CENTER)?;
3778
3779 stack.push(derivative);
3780
3781 let found_null = self.iter_find_stack(&mut stack, |node| {
3782 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3783 found_node = node;
3784 if !self.get_meta_flags(node).contains_inter() {
3785 true
3786 } else {
3787 e.insert(outer);
3788 worklist.push_front(node);
3789 self.any_nonbegin_nullable(node)
3790 }
3791 } else {
3792 false
3793 }
3794 });
3795 if found_null {
3796 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3797 isempty_flag = NodeFlags::ZERO;
3798 break;
3799 }
3800 }
3801 }
3802 }
3803
3804 self.cache_empty.insert(
3805 initial_node,
3806 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3807 );
3808 Ok(isempty_flag)
3809 }
3810
3811 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3813 if larger_lang == smaller_lang {
3814 return Some(true);
3815 }
3816
3817 if self
3819 .nullability(larger_lang)
3820 .not()
3821 .and(self.nullability(smaller_lang))
3822 != Nullability::NEVER
3823 {
3824 return Some(false);
3825 }
3826
3827 let (smaller_lang, larger_lang) =
3832 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3833 let wrap = |b: &mut Self, n: NodeId| {
3834 let tmp = b.mk_concat(n, NodeId::TS);
3835 b.mk_concat(NodeId::TS, tmp)
3836 };
3837 (wrap(self, smaller_lang), wrap(self, larger_lang))
3838 } else {
3839 (smaller_lang, larger_lang)
3840 };
3841
3842 let nota = self.mk_compl(larger_lang);
3843 let diff = self.mk_inter(smaller_lang, nota);
3844 self.is_empty_lang(diff)
3845 }
3846}