1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::UnicodeClassCache;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 AlgebraError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 & Self::NULL_MASK)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_lookaround(self) -> bool {
134 self.has(MetaFlags::CONTAINS_LOOKAROUND)
135 }
136 pub(crate) fn contains_inter(self) -> bool {
137 self.has(MetaFlags::CONTAINS_INTER)
138 }
139
140 pub(crate) fn all_contains_flags(self) -> MetaFlags {
141 self.and(
142 MetaFlags::CONTAINS_LOOKAROUND
143 .or(MetaFlags::CONTAINS_ANCHORS)
144 .or(MetaFlags::CONTAINS_INTER),
145 )
146 }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152 #[default]
153 Pred,
154 Star,
155 Begin,
156 End,
157 Concat,
158 Union,
159 Compl,
160 Lookbehind,
161 Lookahead,
162 Inter,
163 Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168 flags: MetaFlags,
169 nulls: NullsId,
170}
171
172struct MetadataBuilder {
173 num_created: u32,
174 solver: Solver,
175 nb: NullsBuilder,
176 index: FxHashMap<Metadata, MetadataId>,
177 pub array: Vec<Metadata>,
178}
179
180mod helpers {
181 pub(crate) fn incr_rel(n1: u32) -> u32 {
182 match n1.overflowing_add(1) {
183 (_, true) => u32::MAX,
184 (res, false) => res,
185 }
186 }
187}
188
189impl MetadataBuilder {
190 fn new() -> MetadataBuilder {
191 Self {
192 index: FxHashMap::default(),
193 array: vec![Metadata {
194 flags: MetaFlags::ZERO,
195 nulls: NullsId::EMPTY,
196 }],
197 solver: Solver::new(),
198 num_created: 0,
199 nb: NullsBuilder::new(),
200 }
201 }
202
203 fn init(&mut self, inst: Metadata) -> MetadataId {
204 self.num_created += 1;
205 let new_id = MetadataId(self.num_created);
206 self.index.insert(inst.clone(), new_id);
207 self.array.push(inst);
208 new_id
209 }
210
211 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212 match self.index.get(&inst) {
213 Some(&id) => id,
214 None => self.init(inst),
215 }
216 }
217
218 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219 &self.array[inst.0 as usize]
220 }
221
222 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223 setflags.all_contains_flags()
224 }
225
226 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227 let left = &self.array[body.0 as usize].flags;
228 let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229 let inf = if body_id == NodeId::BOT {
231 MetaFlags::ZERO
232 } else {
233 MetaFlags::INFINITE_LENGTH
234 };
235 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236 }
237
238 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239 let left = &self.array[left_id.0 as usize].flags;
240 let null = left.nullability().not().and(Nullability::ALWAYS);
241 let contains = self.get_contains(*left);
242 MetaFlags::with_nullability(
243 null,
244 contains
245 .or(MetaFlags::INFINITE_LENGTH)
246 .or(MetaFlags::CONTAINS_COMPL),
247 )
248 }
249
250 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
251 let left = &self.array[left_id.0 as usize].flags;
252 let right = &self.array[right_id.0 as usize].flags;
253 let null = left.nullability().and(right.nullability());
254 let contains = self.get_contains(left.or(*right));
255 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
256 MetaFlags::with_nullability(null, contains.or(len))
257 }
258
259 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260 let left = &self.array[left_id.0 as usize].flags;
261 let right = &self.array[right_id.0 as usize].flags;
262 let null = left.nullability().and(right.nullability());
263 let contains = self
264 .get_contains(left.or(*right))
265 .or(MetaFlags::CONTAINS_INTER);
266 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
267 MetaFlags::with_nullability(null, contains.or(len))
268 }
269
270 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
271 let left = &self.array[left_id.0 as usize].flags;
272 let right = &self.array[right_id.0 as usize].flags;
273 let null = left.nullability().or(right.nullability());
274 let contains = self.get_contains(left.or(*right));
275 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
276 MetaFlags::with_nullability(null, contains.or(len))
277 }
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
281pub struct NodeKey {
282 pub(crate) kind: Kind,
283 pub(crate) left: NodeId,
284 pub(crate) right: NodeId,
285 pub(crate) extra: u32,
286}
287
288#[derive(Eq, Hash, PartialEq, Clone, Debug)]
289pub enum TRegex<TSet> {
290 Leaf(NodeId),
291 ITE(TSet, TRegexId, TRegexId),
292}
293
294#[derive(Eq, Hash, PartialEq, Clone, Copy)]
295pub(crate) struct NodeFlags(u8);
296impl NodeFlags {
297 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
298 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
299 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
300
301 fn is_checked(self) -> bool {
302 self.0 >= NodeFlags::IS_CHECKED.0
303 }
304 fn is_empty(self) -> bool {
305 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
306 }
307}
308
309#[derive(Eq, Hash, PartialEq, Clone, Copy)]
310pub(crate) struct BuilderFlags(u8);
311impl BuilderFlags {
312 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
313 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
314}
315
316pub struct RegexBuilder {
317 mb: MetadataBuilder,
318 temp_vec: Vec<NodeId>,
319 num_created: u32,
320 index: FxHashMap<NodeKey, NodeId>,
321 array: Vec<NodeKey>,
322 metadata: Vec<MetadataId>,
323 reversed: Vec<NodeId>,
324 cache_empty: FxHashMap<NodeId, NodeFlags>,
325 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
326 tr_array: Vec<TRegex<TSetId>>,
327 tr_der_center: Vec<TRegexId>,
328 tr_der_begin: Vec<TRegexId>,
329 flags: BuilderFlags,
330 pub lookahead_context_max: u32,
332 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
333 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
334}
335
336macro_rules! iter_inter {
337 ($compiler:ident, $start:ident, $expression:expr) => {
338 let mut curr = $start;
339 while $compiler.get_kind(curr) == Kind::Inter {
340 let left = $compiler.get_left(curr);
341 $expression(left);
342 curr = $compiler.get_right(curr);
343 }
344 $expression(curr);
345 };
346}
347
348impl NodeId {
349 fn is_missing(&self) -> bool {
350 *self == NodeId::MISSING
351 }
352 #[inline]
353 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
354 b.get_flags_contains(self)
355 }
356 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
357 if self == tail {
358 true
359 } else if self.is_kind(b, Kind::Concat) {
360 self.right(b).has_concat_tail(b, tail)
361 } else {
362 false
363 }
364 }
365 fn missing_to_eps(&self) -> NodeId {
366 if *self == NodeId::MISSING {
367 NodeId::EPS
368 } else {
369 *self
370 }
371 }
372
373 #[inline]
374 fn kind(self, b: &RegexBuilder) -> Kind {
375 b.get_kind(self)
376 }
377 #[inline]
378 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
379 b.get_kind(self) == k
380 }
381 #[inline]
382 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
383 b.nullability(self) == Nullability::NEVER
384 }
385 #[inline]
386 fn nullability(self, b: &RegexBuilder) -> Nullability {
387 b.nullability(self)
388 }
389
390 #[inline]
391 fn is_center_nullable(self, b: &RegexBuilder) -> bool {
392 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
393 }
394 #[inline]
395 pub fn left(self, b: &RegexBuilder) -> NodeId {
396 b.get_left(self)
397 }
398
399 #[inline]
400 pub fn right(self, b: &RegexBuilder) -> NodeId {
401 b.get_right(self)
402 }
403
404 #[inline]
405 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
406 b.der(self, mask)
407 }
408
409 #[inline]
410 fn extra(self, b: &RegexBuilder) -> u32 {
411 b.get_extra(self)
412 }
413
414 #[inline]
415 fn is_pred(self, b: &RegexBuilder) -> bool {
416 b.get_kind(self) == Kind::Pred
417 }
418 #[inline]
419 fn is_lookahead(self, b: &RegexBuilder) -> bool {
420 b.get_kind(self) == Kind::Lookahead
421 }
422 #[inline]
423 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
424 debug_assert!(self.is_pred(b));
425 TSetId(b.get_extra(self))
426 }
427 #[inline]
428 fn is_star(self, b: &RegexBuilder) -> bool {
429 if NodeId::EPS == self {
430 return false;
431 }
432 b.get_kind(self) == Kind::Star
433 }
434
435 #[inline]
436 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
437 if self.is_concat(b) {
438 let r = self.right(b);
439 return r.is_star(b) && r.left(b) == self.left(b);
440 }
441 false
442 }
443
444 #[inline]
445 fn is_concat(self, b: &RegexBuilder) -> bool {
446 b.get_kind(self) == Kind::Concat
447 }
448
449 #[inline]
450 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
451 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
452 Some(b.get_right(self))
453 } else {
454 None
455 }
456 }
457
458 #[inline]
459 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
460 if b.get_kind(self) == Kind::Concat {
461 let left = self.left(b);
462 let right = self.right(b);
463 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
464 return right == NodeId::END;
465 }
466 }
467 false
468 }
469
470 #[inline]
471 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
472 if NodeId::EPS == self {
473 return None;
474 }
475 if self.is_star(b) && self.left(b).is_pred(b) {
476 Some(self.left(b))
477 } else {
478 None
479 }
480 }
481
482 #[inline]
483 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
484 if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
485 let middle = self.right(b);
486 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
487 return Some(middle.left(b));
488 }
489 }
490 None
491 }
492
493 #[inline]
494 pub(crate) fn iter_union(
495 self,
496 b: &mut RegexBuilder,
497 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
498 ) {
499 let mut curr: NodeId = self;
500 while curr.kind(b) == Kind::Union {
501 f(b, curr.left(b));
502 curr = curr.right(b);
503 }
504 f(b, curr);
505 }
506
507 #[inline]
508 pub(crate) fn iter_union_while(
509 self,
510 b: &mut RegexBuilder,
511 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
512 ) {
513 let mut curr: NodeId = self;
514 let mut continue_loop = true;
515 while continue_loop && curr.kind(b) == Kind::Union {
516 continue_loop = f(b, curr.left(b));
517 curr = curr.right(b);
518 }
519 if continue_loop {
520 f(b, curr);
521 }
522 }
523}
524
525impl Default for RegexBuilder {
526 fn default() -> Self {
527 Self::new()
528 }
529}
530
531impl RegexBuilder {
532 pub fn new() -> RegexBuilder {
533 let mut inst = Self {
534 mb: MetadataBuilder::new(),
535 array: Vec::new(),
536 index: FxHashMap::default(),
537 cache_empty: FxHashMap::default(),
538 tr_array: Vec::new(),
539 tr_cache: FxHashMap::default(),
540 flags: BuilderFlags::ZERO,
541 lookahead_context_max: 800,
542 num_created: 0,
543 metadata: Vec::new(),
544 reversed: Vec::new(),
545 tr_der_center: Vec::new(),
546 tr_der_begin: Vec::new(),
547 temp_vec: Vec::new(),
548 mk_binary_memo: FxHashMap::default(),
549 clean_cache: FxHashMap::default(),
550 };
551 inst.array.push(NodeKey::default());
552 inst.mk_pred(TSetId::EMPTY);
553 inst.mk_star(NodeId::BOT);
554 inst.mk_pred(TSetId::FULL);
555 inst.mk_star(NodeId::TOP);
556 let top_plus_id = inst.mk_plus(NodeId::TOP);
557 inst.mk_unset(Kind::Begin);
558 inst.mk_unset(Kind::End);
559 debug_assert!(top_plus_id == NodeId::TOPPLUS);
560
561 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
562 inst.mk_leaf(NodeId::BOT);
563 inst.mk_leaf(NodeId::EPS);
564 inst.mk_leaf(NodeId::TOP);
565 inst.mk_leaf(NodeId::TS);
566
567 inst.flags = BuilderFlags::SUBSUME;
568 inst
569 }
570
571 #[inline]
572 pub fn solver_ref(&self) -> &Solver {
573 &self.mb.solver
574 }
575
576 #[inline]
577 pub fn solver(&mut self) -> &mut Solver {
578 &mut self.mb.solver
579 }
580
581 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
582 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
583 self.tr_cache.insert(inst.clone(), new_id);
584 self.tr_array.push(inst);
585 new_id
586 }
587
588 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
589 match self.tr_cache.get(&inst) {
590 Some(&id) => id,
591 None => self.tr_init(inst),
592 }
593 }
594
595 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
596 &self.tr_array[inst.0 as usize]
597 }
598
599 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
600 let solv = self.solver();
601 !solv.is_sat_id(cond1, cond2)
602 }
603
604 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
605 let term = TRegex::<TSetId>::Leaf(node_id);
606 self.get_tregex_id(term)
607 }
608
609 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
610 let tmp_inst = TRegex::ITE(cond, _then, _else);
611 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
612 return *cached;
613 }
614 if _then == _else {
615 return _then;
616 }
617 if self.solver().is_full_id(cond) {
618 return _then;
619 }
620 if self.solver().is_empty_id(cond) {
621 return _else;
622 }
623 let _clean_then = match self.tr_array[_then.0 as usize] {
624 TRegex::Leaf(_) => _then,
625 _ => self.clean(cond, _then),
626 };
627 let notcond = self.solver().not_id(cond);
628 let _clean_else = match self.tr_array[_else.0 as usize] {
629 TRegex::Leaf(_) => _else,
630 _ => self.clean(notcond, _else),
631 };
632
633 if _clean_then == _clean_else {
634 return _clean_then;
635 }
636 match self.get_tregex(_clean_then) {
638 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
639 let _cond_then = *leftcond;
640 let new_then = *_inner_then;
641 let sand = self.solver().and_id(cond, _cond_then);
642 return self.mk_ite(sand, new_then, _clean_else);
643 }
644 _ => {}
645 }
646 match self.get_tregex(_clean_else) {
648 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
650 let e2clone = *_e2;
651 let new_cond = self.mb.solver.or_id(cond, *_c2);
652 return self.mk_ite(new_cond, _clean_then, e2clone);
653 }
654 _ => {}
655 }
656
657 if _clean_then == TRegexId::BOT {
658 let flip_cond = self.solver().not_id(cond);
659 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
660 return cleaned_id;
661 }
662
663 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
664 }
665
666 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
667 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
668 return cached;
669 }
670 let result = match *self.get_tregex(tterm) {
671 TRegex::Leaf(_) => tterm,
672 TRegex::ITE(alpha, _then_id, _else_id) => {
673 let notalpha = self.mb.solver.not_id(alpha);
674 if self.mb.solver.unsat_id(beta, alpha) {
675 self.clean(beta, _else_id)
677 } else if self.unsat(beta, notalpha) {
678 self.clean(beta, _then_id)
680 } else {
681 let tc = self.mb.solver.and_id(beta, alpha);
682 let ec = self.mb.solver.and_id(beta, notalpha);
683 let new_then = self.clean(tc, _then_id);
684 let new_else = self.clean(ec, _else_id);
685 self.mk_ite(alpha, new_then, new_else)
686 }
687 }
688 };
689 self.clean_cache.insert((beta, tterm), result);
690 result
691 }
692
693 fn mk_unary(
694 &mut self,
695 term: TRegexId,
696 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
697 ) -> TRegexId {
698 match self.tr_array[term.0 as usize] {
699 TRegex::Leaf(node_id) => {
700 let applied = apply(self, node_id);
701 self.mk_leaf(applied)
702 }
703 TRegex::ITE(c1, _then, _else) => {
704 let _then1 = self.mk_unary(_then, apply);
705 let _else1 = self.mk_unary(_else, apply);
706 self.mk_ite(c1, _then1, _else1)
707 }
708 }
709 }
710
711 fn mk_binary_result(
712 &mut self,
713 left: TRegexId,
714 right: TRegexId,
715 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
716 ) -> Result<TRegexId, AlgebraError> {
717 match self.tr_array[left.0 as usize] {
718 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
719 TRegex::Leaf(right_node_id) => {
720 let applied = apply(self, left_node_id, right_node_id)?;
721 Ok(self.mk_leaf(applied))
722 }
723 TRegex::ITE(c2, _then, _else) => {
724 let then2 = self.mk_binary_result(left, _then, apply)?;
725 let else2 = self.mk_binary_result(left, _else, apply)?;
726 Ok(self.mk_ite(c2, then2, else2))
727 }
728 },
729 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
730 TRegex::Leaf(_) => {
731 let then2 = self.mk_binary_result(_then1, right, apply)?;
732 let else2 = self.mk_binary_result(_else1, right, apply)?;
733 Ok(self.mk_ite(c1, then2, else2))
734 }
735 TRegex::ITE(c2, _then2, _else2) => {
736 if c1 == c2 {
737 let _then = self.mk_binary_result(_then1, _then2, apply)?;
738 let _else = self.mk_binary_result(_else1, _else2, apply)?;
739 return Ok(self.mk_ite(c1, _then, _else));
740 }
741 if c1.0 > c2.0 {
742 let _then = self.mk_binary_result(_then1, right, apply)?;
743 let _else = self.mk_binary_result(_else1, right, apply)?;
744 Ok(self.mk_ite(c1, _then, _else))
745 } else {
746 let _then = self.mk_binary_result(left, _then2, apply)?;
747 let _else = self.mk_binary_result(left, _else2, apply)?;
748 Ok(self.mk_ite(c2, _then, _else))
749 }
750 }
751 },
752 }
753 }
754
755 fn mk_binary(
756 &mut self,
757 left: TRegexId,
758 right: TRegexId,
759 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
760 ) -> TRegexId {
761 self.mk_binary_memo.clear();
762 self.mk_binary_inner(left, right, apply)
763 }
764
765 fn mk_binary_inner(
766 &mut self,
767 left: TRegexId,
768 right: TRegexId,
769 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
770 ) -> TRegexId {
771 if left == right {
772 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
773 }
774 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
775 return cached;
776 }
777 let result = match self.tr_array[left.0 as usize] {
778 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
779 TRegex::Leaf(right_node_id) => {
780 let applied = apply(self, left_node_id, right_node_id);
781 self.mk_leaf(applied)
782 }
783 TRegex::ITE(c2, _then, _else) => {
784 let then2 = self.mk_binary_inner(left, _then, apply);
785 let else2 = self.mk_binary_inner(left, _else, apply);
786 self.mk_ite(c2, then2, else2)
787 }
788 },
789 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
790 TRegex::Leaf(_) => {
791 let then2 = self.mk_binary_inner(_then1, right, apply);
792 let else2 = self.mk_binary_inner(_else1, right, apply);
793 self.mk_ite(c1, then2, else2)
794 }
795 TRegex::ITE(c2, _then2, _else2) => {
796 if c1 == c2 {
797 let _then = self.mk_binary_inner(_then1, _then2, apply);
798 let _else = self.mk_binary_inner(_else1, _else2, apply);
799 self.mk_ite(c1, _then, _else)
800 } else if c1.0 > c2.0 {
801 let _then = self.mk_binary_inner(_then1, right, apply);
802 let _else = self.mk_binary_inner(_else1, right, apply);
803 self.mk_ite(c1, _then, _else)
804 } else {
805 let _then = self.mk_binary_inner(left, _then2, apply);
806 let _else = self.mk_binary_inner(left, _else2, apply);
807 self.mk_ite(c2, _then, _else)
808 }
809 }
810 },
811 };
812 self.mk_binary_memo.insert((left, right), result);
813 result
814 }
815
816 pub fn get_nulls(
817 &mut self,
818 pending_rel: u32,
819 mask: Nullability,
820 acc: &mut BTreeSet<NullState>,
821 node_id: NodeId,
822 ) {
823 debug_assert!(node_id != NodeId::MISSING);
824 if !self.is_nullable(node_id, mask) {
825 return;
826 }
827 match self.get_kind(node_id) {
828 Kind::Pred => {}
829 Kind::End => {
830 if mask.has(Nullability::END) {
831 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
832 }
833 }
834 Kind::Begin => {
835 if mask.has(Nullability::BEGIN) {
836 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
837 }
838 }
839 Kind::Concat => {
840 let new_mask = self.nullability(node_id).and(mask);
841 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
842 if self.is_nullable(node_id.left(self), mask) {
843 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
844 }
845 }
846 Kind::Union => {
847 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
848 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
849 }
850 Kind::Inter => {
851 let new_mask = self.nullability(node_id).and(mask);
852 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
853 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
854 }
855 Kind::Star => {
856 acc.insert(NullState::new(mask, pending_rel));
857 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
858 }
859 Kind::Compl => {
860 if !self.is_nullable(node_id.left(self), mask) {
861 acc.insert(NullState::new(mask, 0));
862 }
863 }
864 Kind::Lookbehind => {
865 let new_mask = self.nullability(node_id).and(mask);
866 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
867 if node_id.right(self) != NodeId::MISSING {
868 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
869 }
870 }
871 Kind::Lookahead => {
872 let la_inner = self.get_lookahead_inner(node_id);
873 if self.is_nullable(la_inner, mask) {
874 let rel = self.get_lookahead_rel(node_id);
875 if rel != u32::MAX {
876 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
877 }
878 let la_tail = self.get_lookahead_tail(node_id);
880 if la_tail != NodeId::MISSING {
881 self.get_nulls(pending_rel, mask, acc, la_tail);
882 }
883 }
884 }
885 Kind::Counted => {
886 let packed = self.get_extra(node_id);
887 let best = packed >> 16;
888 if best > 0 {
889 acc.insert(NullState::new(mask, pending_rel + best));
890 }
891 }
892 }
893 }
894
895 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
896 self.get_meta_flags(node_id)
897 .has(MetaFlags::CONTAINS_LOOKAROUND)
898 }
899
900 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
902 self.get_meta_flags(node_id)
903 .has(MetaFlags::CONTAINS_ANCHORS)
904 }
905
906 pub fn is_infinite(&self, node_id: NodeId) -> bool {
907 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
908 }
909
910 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
912 if self.is_infinite(node_id) {
913 if self.get_kind(node_id) == Kind::Inter {
914 self.get_bounded_length(node_id)
915 } else {
916 (self.get_min_length_only(node_id), u32::MAX)
917 }
918 } else {
919 self.get_bounded_length(node_id)
920 }
921 }
922
923 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
924 if node_id == NodeId::EPS {
925 return (0, 0);
926 }
927 match self.get_kind(node_id) {
928 Kind::End | Kind::Begin => (0, 0),
929 Kind::Pred => (1, 1),
930 Kind::Concat => {
931 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
932 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
933 (lmin + rmin, lmax.saturating_add(rmax))
934 }
935 Kind::Union => {
936 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
937 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
938 (lmin.min(rmin), lmax.max(rmax))
939 }
940 Kind::Inter => {
941 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
942 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
943 (lmin.max(rmin), lmax.min(rmax))
944 }
945 Kind::Lookahead => {
946 let body = node_id.left(self);
947 if self.is_infinite(body) {
948 return (0, u32::MAX);
949 }
950 let right = node_id.right(self);
951 if right.is_missing() {
952 (0, 0)
953 } else {
954 self.get_min_max_length(right)
955 }
956 }
957 Kind::Counted => (0, 0),
958 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
959 }
960 }
961
962 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
963 match self.get_kind(node_id) {
964 Kind::End | Kind::Begin => Some(0),
965 Kind::Pred => Some(1),
966 Kind::Concat => {
967 let l = self.get_fixed_length(node_id.left(self))?;
968 let r = self.get_fixed_length(node_id.right(self))?;
969 Some(l + r)
970 }
971 Kind::Union => {
972 let l = self.get_fixed_length(node_id.left(self))?;
973 let r = self.get_fixed_length(node_id.right(self))?;
974 if l == r {
975 Some(l)
976 } else {
977 None
978 }
979 }
980 Kind::Inter => {
981 let l = self.get_fixed_length(node_id.left(self))?;
982 let r = self.get_fixed_length(node_id.right(self))?;
983 if l == r {
984 Some(l)
985 } else {
986 None
987 }
988 }
989 Kind::Lookahead => {
990 let right = node_id.right(self);
991 if right.is_missing() {
992 Some(0)
993 } else {
994 self.get_fixed_length(right)
995 }
996 }
997 Kind::Counted => Some(0),
998 Kind::Star | Kind::Lookbehind | Kind::Compl => None,
999 }
1000 }
1001
1002 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1003 match self.get_kind(node_id) {
1004 Kind::End | Kind::Begin => 0,
1005 Kind::Pred => 1,
1006 Kind::Concat => {
1007 self.get_min_length_only(node_id.left(self))
1008 + self.get_min_length_only(node_id.right(self))
1009 }
1010 Kind::Union => self
1011 .get_min_length_only(node_id.left(self))
1012 .min(self.get_min_length_only(node_id.right(self))),
1013 Kind::Inter => self
1014 .get_min_length_only(node_id.left(self))
1015 .max(self.get_min_length_only(node_id.right(self))),
1016 Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1017 Kind::Compl => {
1018 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1019 0
1020 } else {
1021 1
1022 }
1023 }
1024 }
1025 }
1026
1027 fn starts_with_ts(&self, node_id: NodeId) -> bool {
1028 if node_id == NodeId::TS {
1029 return true;
1030 }
1031 match self.get_kind(node_id) {
1032 Kind::Inter => {
1033 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1034 }
1035 Kind::Union => {
1036 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1037 }
1038 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1039 _ => false,
1040 }
1041 }
1042
1043 #[inline]
1044 pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1045 if self.get_kind(node_id) == Kind::Concat {
1046 self.ends_with_ts(node_id.right(self))
1047 } else {
1048 node_id == NodeId::TS
1049 }
1050 }
1051
1052 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1053 debug_assert!(node_id != NodeId::MISSING);
1054 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1055 }
1056
1057 pub(crate) fn cache_der(
1058 &mut self,
1059 node_id: NodeId,
1060 result: TRegexId,
1061 mask: Nullability,
1062 ) -> TRegexId {
1063 if mask == Nullability::CENTER {
1064 self.tr_der_center[node_id.0 as usize] = result
1065 } else {
1066 self.tr_der_begin[node_id.0 as usize] = result
1067 };
1068 result
1069 }
1070
1071 pub(crate) fn try_cached_der(
1072 &mut self,
1073 node_id: NodeId,
1074 mask: Nullability,
1075 ) -> Option<TRegexId> {
1076 let cache = if mask == Nullability::CENTER {
1077 &mut self.tr_der_center
1078 } else {
1079 &mut self.tr_der_begin
1080 };
1081 match cache.get(node_id.0 as usize) {
1082 Some(&TRegexId::MISSING) => {}
1083 Some(&result) => {
1084 return Some(result);
1085 }
1086 None => {
1087 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1088 }
1089 }
1090 None
1091 }
1092
1093 pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1094 let mut term = self.get_tregex(der);
1095 loop {
1096 match *term {
1097 TRegex::Leaf(node_id) => return node_id,
1098 TRegex::ITE(cond, _then, _else) => {
1099 if self.solver().is_sat_id(set, cond) {
1100 term = self.get_tregex(_then);
1101 } else {
1102 term = self.get_tregex(_else);
1103 }
1104 }
1105 }
1106 }
1107 }
1108
1109 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1110 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1111 debug_assert!(
1112 node_id != NodeId::MISSING,
1113 "attempting to derive missing node"
1114 );
1115 if let Some(result) = self.try_cached_der(node_id, mask) {
1116 return Ok(result);
1117 }
1118
1119 let result = match node_id.kind(self) {
1120 Kind::Compl => {
1121 let leftd = node_id.left(self).der(self, mask)?;
1122 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1123 }
1124 Kind::Inter => {
1125 let leftd = node_id.left(self).der(self, mask)?;
1126 let rightd = node_id.right(self).der(self, mask)?;
1127 {
1128 let this = &mut *self;
1129 this.mk_binary(
1130 leftd,
1131 rightd,
1132 &mut (|b, left, right| b.mk_inter(left, right)),
1133 )
1134 }
1135 }
1136 Kind::Union => {
1137 let leftd = self.der(node_id.left(self), mask)?;
1138 let rightd = self.der(node_id.right(self), mask)?;
1139 {
1140 let this = &mut *self;
1141 this.mk_binary(
1142 leftd,
1143 rightd,
1144 &mut (|b, left, right| b.mk_union(left, right)),
1145 )
1146 }
1147 }
1148 Kind::Concat => {
1149 let head = node_id.left(self);
1150 let tail = node_id.right(self);
1151 let tail_leaf = self.mk_leaf(tail);
1152 let head_der = self.der(head, mask)?;
1153
1154 if self.is_nullable(head, mask) {
1155 let rightd = self.der(tail, mask)?;
1156 let ldr = {
1157 let this = &mut *self;
1158 this.mk_binary(
1159 head_der,
1160 tail_leaf,
1161 &mut (|b, left, right| b.mk_concat(left, right)),
1162 )
1163 };
1164 {
1165 let this = &mut *self;
1166 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1167 }
1168 } else {
1169 let this = &mut *self;
1170 this.mk_binary(
1171 head_der,
1172 tail_leaf,
1173 &mut (|b, left, right| b.mk_concat(left, right)),
1174 )
1175 }
1176 }
1177 Kind::Star => {
1178 if node_id == NodeId::EPS {
1179 TRegexId::BOT
1180 } else {
1181 let left = node_id.left(self);
1182 let r_decr_leaf = self.mk_leaf(node_id);
1183 let r_der = self.der(left, mask)?;
1184 let this = &mut *self;
1185 this.mk_binary(
1186 r_der,
1187 r_decr_leaf,
1188 &mut (|b, left, right| b.mk_concat(left, right)),
1189 )
1190 }
1191 }
1192 Kind::Lookbehind => {
1193 let lb_prev_der = {
1194 let lb_prev = self.get_lookbehind_prev(node_id);
1195 if lb_prev == NodeId::MISSING {
1196 TRegexId::MISSING
1197 } else {
1198 self.der(lb_prev, mask)?
1199 }
1200 };
1201 let lb_inner = self.get_lookbehind_inner(node_id);
1202 let lb_inner_der = self.der(lb_inner, mask)?;
1203 {
1204 let this = &mut *self;
1205 this.mk_binary_result(
1206 lb_inner_der,
1207 lb_prev_der,
1208 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1209 )?
1210 }
1211 }
1212 Kind::Lookahead => {
1213 let la_tail = self.get_lookahead_tail(node_id);
1214 let la_body = node_id.left(self);
1215 let rel = self.get_lookahead_rel(node_id);
1216
1217 if self.is_nullable(la_body, mask) {
1218 let right = node_id.right(self).missing_to_eps();
1220 let rder = self.der(right, mask).clone();
1221 return rder;
1222 }
1223
1224 if rel == u32::MAX {
1225 let la_body_der = self.der(la_body, mask)?;
1226 if la_tail.is_kind(self, Kind::Pred) {
1227 let transitioned =
1228 self.transition_term(la_body_der, la_tail.pred_tset(self));
1229 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1230 let concated = self.mk_concat(la_tail, new_la);
1231 return self.der(concated, mask);
1232 }
1233 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1234 let left = la_tail.left(self);
1235 let tset = left.pred_tset(self);
1236 let transitioned = self.transition_term(la_body_der, tset);
1237 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1238 let tail_right = la_tail.right(self);
1239 let concated = self.mk_concat(new_la, tail_right);
1240 let concated = self.mk_concat(left, concated);
1241 return self.der(concated, mask);
1242 }
1243 }
1244
1245 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1246 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1247 let concated = self.mk_concat(la_body, nulls_mask);
1248 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1249 let non_nulled = self.mk_non_nullable_safe(la_tail);
1250 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1251 let new_union = self.mk_union(concated_look, new_look);
1252 return self.der(new_union, mask);
1253 }
1254
1255 let la_tail_der = if la_tail == NodeId::MISSING {
1256 TRegexId::MISSING
1257 } else {
1258 if self.is_nullable(la_tail, mask) {
1259 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1260 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1261 let la_union = self.mk_union(la_tail, nulls_la);
1262 self.der(la_union, mask)?
1263 } else {
1264 self.der(la_tail, mask)?
1265 }
1266 };
1267
1268 let la_body_der = self.der(la_body, mask)?;
1269
1270 if rel != u32::MAX && rel > self.lookahead_context_max {
1271 return Err(AlgebraError::AnchorLimit);
1272 }
1273
1274 let la = {
1275 let this = &mut *self;
1276 let rel = helpers::incr_rel(rel);
1277 this.mk_binary(
1278 la_body_der,
1279 la_tail_der,
1280 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1281 )
1282 };
1283
1284 if rel != u32::MAX
1285 && la_tail_der != TRegexId::MISSING
1286 && self.is_nullable(la_tail, mask)
1287 {
1288 let look_only = {
1289 let this = &mut *self;
1290 let right = TRegexId::MISSING;
1291 let rel = helpers::incr_rel(rel);
1292 this.mk_binary(
1293 la_body_der,
1294 right,
1295 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1296 )
1297 };
1298 {
1299 let this = &mut *self;
1300 this.mk_binary(
1301 look_only,
1302 la,
1303 &mut (|b, left, right| b.mk_union(left, right)),
1304 )
1305 }
1306 } else {
1307 la
1308 }
1309 }
1310 Kind::Counted => {
1311 let body = node_id.left(self);
1312 let chain = node_id.right(self);
1313 let packed = self.get_extra(node_id);
1314 let step = (packed & 0xFFFF) as u16;
1315 let best = (packed >> 16) as u16;
1316
1317 let mid_best = if self.is_nullable(body, mask) && step >= best {
1318 step
1319 } else {
1320 best
1321 };
1322
1323 let body_der = self.der(body, mask)?;
1324 let new_step = step.saturating_add(1);
1325 self.mk_unary(body_der, &mut |b, new_body| {
1326 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1327 new_step
1328 } else {
1329 mid_best
1330 };
1331 let packed = (final_best as u32) << 16 | new_step as u32;
1332 b.mk_counted(new_body, chain, packed)
1333 })
1334 }
1335 Kind::Begin | Kind::End => TRegexId::BOT,
1336 Kind::Pred => {
1337 let psi = node_id.pred_tset(self);
1338 if psi == TSetId::EMPTY {
1339 TRegexId::BOT
1340 } else {
1341 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1342 }
1343 }
1344 };
1345
1346 self.cache_der(node_id, result, mask);
1350 Ok(result)
1351 }
1352
1353 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1354 debug_assert!(meta_id != MetadataId::MISSING);
1355 match self.metadata.get_mut(node_id.0 as usize) {
1356 Some(v) => *v = meta_id,
1357 None => {
1358 self.metadata
1359 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1360 self.metadata[node_id.0 as usize] = meta_id;
1361 }
1362 }
1363 }
1364
1365 fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1366 debug_assert!(reversed_id != NodeId::MISSING);
1367 match self.reversed.get_mut(node_id.0 as usize) {
1368 Some(v) => *v = reversed_id,
1369 None => {
1370 self.reversed
1371 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1372 self.reversed[node_id.0 as usize] = reversed_id;
1373 }
1374 }
1375 }
1376
1377 fn init(&mut self, inst: NodeKey) -> NodeId {
1378 self.num_created += 1;
1379 let node_id = NodeId(self.num_created);
1380 self.index.insert(inst.clone(), node_id);
1381 match inst.kind {
1382 Kind::Pred => {
1383 let meta_id = self.mb.get_meta_id(Metadata {
1384 flags: MetaFlags::ZERO,
1385 nulls: NullsId::EMPTY,
1386 });
1387 self.init_metadata(node_id, meta_id);
1388 }
1389 Kind::Begin => {
1390 let meta_id = self.mb.get_meta_id(Metadata {
1391 flags: MetaFlags::with_nullability(
1392 Nullability::BEGIN,
1393 MetaFlags::CONTAINS_ANCHORS,
1394 ),
1395 nulls: NullsId::BEGIN0,
1396 });
1397 self.init_metadata(node_id, meta_id);
1398 }
1399 Kind::End => {
1400 let meta_id = self.mb.get_meta_id(Metadata {
1401 flags: MetaFlags::with_nullability(
1402 Nullability::END,
1403 MetaFlags::CONTAINS_ANCHORS,
1404 ),
1405 nulls: NullsId::END0,
1406 });
1407 self.init_metadata(node_id, meta_id);
1408 }
1409 Kind::Inter => {
1410 let m1 = self.get_node_meta_id(inst.left);
1411 let m2 = self.get_node_meta_id(inst.right);
1412 let meta_id = {
1413 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1414 let mask_l = inst.left.nullability(self);
1415 let mask_r = inst.right.nullability(self);
1416 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1417 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1418 nulls = self.mb.nb.and_mask(nulls, mask_l);
1419 nulls = self.mb.nb.and_mask(nulls, mask_r);
1420 let new_meta = Metadata {
1421 flags: self.mb.flags_inter(m1, m2),
1422 nulls,
1423 };
1424 self.mb.get_meta_id(new_meta)
1425 };
1426 self.init_metadata(node_id, meta_id);
1427 }
1428 Kind::Union => {
1429 let m1 = self.get_node_meta_id(inst.left);
1430 let m2 = self.get_node_meta_id(inst.right);
1431 let meta_id = {
1432 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1433 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1434 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1435 let new_meta = Metadata {
1436 flags: self.mb.flags_union(m1, m2),
1437 nulls,
1438 };
1439 self.mb.get_meta_id(new_meta)
1440 };
1441 self.init_metadata(node_id, meta_id);
1442 }
1443 Kind::Concat => {
1444 let flags = self.mb.flags_concat(
1445 self.get_node_meta_id(inst.left),
1446 self.get_node_meta_id(inst.right),
1447 );
1448
1449 let right_nullability = inst.right.nullability(self);
1450 let left_nullability = inst.left.nullability(self);
1451 let nulls_left = self.get_nulls_id(inst.left);
1452 let nulls_right = self.get_nulls_id(inst.right);
1453 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1454 let mask = right_nullability.and(left_nullability);
1455 nulls = self.mb.nb.and_mask(nulls, mask);
1456
1457 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1458 self.init_metadata(node_id, new_id);
1459 }
1460 Kind::Star => {
1461 let left_nulls = self.get_nulls_id(inst.left);
1462 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1463 let meta_id = self.mb.get_meta_id(Metadata {
1464 flags: self
1465 .mb
1466 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1467 nulls,
1468 });
1469 self.init_metadata(node_id, meta_id);
1470 }
1471 Kind::Compl => {
1472 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1473 let meta_id = self.mb.get_meta_id(Metadata {
1474 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1475 nulls,
1476 });
1477 self.init_metadata(node_id, meta_id);
1478 }
1479 Kind::Lookbehind => {
1480 let mut null = self.get_meta_flags(inst.left).nullability();
1481 let mut contains_flags = self.get_flags_contains(inst.left);
1482 if !inst.right.is_missing() {
1483 null = null.and(self.get_meta_flags(inst.right).nullability());
1484 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1485 }
1486
1487 let left_nulls = self.get_nulls_id(inst.left);
1488 let right_nulls = self.get_nulls_id(inst.right);
1489 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1490 let meta_id = self.mb.get_meta_id(Metadata {
1491 flags: MetaFlags::with_nullability(
1492 null,
1493 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1494 ),
1495 nulls,
1496 });
1497 self.init_metadata(node_id, meta_id);
1498 }
1499 Kind::Lookahead => {
1500 let mut nulls = self.get_nulls_id(inst.left);
1501 let left_nullability = inst.left.nullability(self);
1502 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1503 nulls = self.mb.nb.or_id(nulls, nulls_right);
1504 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1505
1506 let la_inner = inst.left;
1507 let la_tail = inst.right;
1508 let null = self
1509 .get_meta_flags(la_inner)
1510 .nullability()
1511 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1512 let contains_flags = self
1513 .get_flags_contains(la_inner)
1514 .or(self.get_flags_contains(la_tail));
1515
1516 let meta_id = self.mb.get_meta_id(Metadata {
1517 flags: MetaFlags::with_nullability(
1518 null,
1519 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1520 ),
1521 nulls,
1522 });
1523 self.init_metadata(node_id, meta_id);
1524 }
1525 Kind::Counted => {
1526 let best = inst.extra >> 16;
1527 let (null, nulls) = if best > 0 {
1528 let mut ns = BTreeSet::new();
1529 ns.insert(NullState::new(Nullability::CENTER, best));
1530 (Nullability::CENTER, self.mb.nb.get_id(ns))
1531 } else {
1532 (Nullability::NEVER, NullsId::EMPTY)
1533 };
1534 let meta_id = self.mb.get_meta_id(Metadata {
1535 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1536 nulls,
1537 });
1538 self.init_metadata(node_id, meta_id);
1539 }
1540 }
1541
1542 self.array.push(inst);
1543
1544 if let Some(rw) = self.post_init_simplify(node_id) {
1545 self.override_as(node_id, rw)
1546 } else {
1547 node_id
1548 }
1549 }
1550
1551 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1552 match self.get_kind(node_id) {
1553 Kind::Concat => {
1554 let lhs = node_id.left(self);
1555 let rhs = node_id.right(self);
1556 if lhs.is_pred_star(self).is_some() {
1557 if let Some(opttail) = rhs.is_opt_v(self) {
1558 if let Some(true) = self.subsumes(lhs, opttail) {
1559 return Some(lhs);
1561 }
1562 }
1563 }
1564 }
1565 Kind::Union => {
1566 let lhs = node_id.left(self);
1567 let rhs = node_id.right(self);
1568 let mut subsumed = false;
1569 rhs.iter_union_while(self, &mut |b, branch| {
1570 if b.nullable_subsumes(branch, lhs) {
1571 subsumed = true;
1572 }
1573 !subsumed
1574 });
1575 if subsumed {
1576 return Some(rhs);
1577 }
1578 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1579 return Some(rhs);
1580 }
1581 }
1582 _ => {}
1583 }
1584
1585 None
1586 }
1587
1588 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1590 if self.get_kind(lhs) != Kind::Union {
1591 return false; }
1593 let mut rhs_branches = Vec::new();
1594 let mut curr = rhs;
1595 while self.get_kind(curr) == Kind::Union {
1596 rhs_branches.push(self.get_left(curr));
1597 curr = self.get_right(curr);
1598 }
1599 rhs_branches.push(curr);
1600
1601 curr = lhs;
1602 while self.get_kind(curr) == Kind::Union {
1603 if !rhs_branches.contains(&self.get_left(curr)) {
1604 return false;
1605 }
1606 curr = self.get_right(curr);
1607 }
1608 rhs_branches.contains(&curr)
1609 }
1610
1611 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1613 if node == target {
1614 return true;
1615 }
1616 match self.get_kind(node) {
1617 Kind::Union => {
1618 self.nullable_subsumes(self.get_left(node), target)
1619 || self.nullable_subsumes(self.get_right(node), target)
1620 }
1621 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1622 self.nullable_subsumes(self.get_right(node), target)
1623 }
1624 _ => false,
1625 }
1626 }
1627
1628 pub fn num_nodes(&self) -> u32 {
1629 self.num_created
1630 }
1631 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1632 match self.index.get(&inst) {
1633 Some(&id) => id,
1634 None => self.init(inst),
1635 }
1636 }
1637 #[inline]
1638 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1639 self.index.get(inst)
1640 }
1641
1642 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1643 self.index.insert(key, subsumed);
1644 subsumed
1645 }
1646
1647 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1648 let key = &self.array[key.0 as usize];
1649 let entry = self.index.get_mut(key).unwrap();
1650 *entry = subsumed;
1651 subsumed
1652 }
1653
1654 #[inline]
1655 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1656 self.array[node_id.0 as usize].left
1657 }
1658
1659 #[inline]
1660 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1661 self.array[node_id.0 as usize].right
1662 }
1663
1664 #[inline]
1665 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1666 self.array[node_id.0 as usize].extra
1667 }
1668
1669 #[inline]
1670 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1671 debug_assert!(self.get_kind(node_id) == Kind::Concat);
1672 let mut curr = node_id;
1673 while self.get_kind(curr) == Kind::Concat {
1674 curr = curr.right(self);
1675 }
1676 curr
1677 }
1678
1679 fn has_trailing_la(&self, node: NodeId) -> bool {
1681 let end = match self.get_kind(node) {
1682 Kind::Concat => self.get_concat_end(node),
1683 Kind::Lookahead => node,
1684 _ => return false,
1685 };
1686 self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1687 }
1688
1689 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1691 if self.get_kind(node) == Kind::Lookahead {
1692 return (NodeId::EPS, node);
1693 }
1694 debug_assert!(self.get_kind(node) == Kind::Concat);
1695 let right = node.right(self);
1696 if self.get_kind(right) != Kind::Concat {
1697 return (node.left(self), right);
1699 }
1700 let (stripped, la) = self.strip_trailing_la(right);
1701 (self.mk_concat(node.left(self), stripped), la)
1702 }
1703 #[inline]
1704 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1705 debug_assert!(matches!(
1706 self.get_kind(lookahead_node_id),
1707 Kind::Lookahead | Kind::Counted
1708 ));
1709 lookahead_node_id.left(self)
1710 }
1711 #[inline]
1712 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1713 debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1714 lookahead_node_id.right(self)
1715 }
1716 #[inline]
1717 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1718 debug_assert!(
1719 matches!(
1720 self.get_kind(lookahead_node_id),
1721 Kind::Lookahead | Kind::Counted
1722 ),
1723 "not lookahead/counted: {:?}",
1724 self.pp(lookahead_node_id)
1725 );
1726 self.get_extra(lookahead_node_id)
1727 }
1728 #[inline]
1729 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1730 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1731 lookbehind_node_id.left(self)
1732 }
1733 #[inline]
1734 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1735 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1736 lookbehind_node_id.right(self)
1737 }
1738
1739 #[inline]
1740 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1741 debug_assert!(
1742 self.array.len() > node_id.0 as usize,
1743 "array len: {:?}",
1744 node_id
1745 );
1746 self.array[node_id.0 as usize].kind
1747 }
1748
1749 #[inline]
1750 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1751 &self.array[node_id.0 as usize]
1752 }
1753
1754 #[inline]
1755 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1756 self.metadata[node_id.0 as usize]
1757 }
1758
1759 #[inline]
1760 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1761 debug_assert!(node_id.0 != u32::MAX);
1762 let meta_id = self.get_node_meta_id(node_id);
1763 debug_assert!(meta_id != MetadataId::MISSING);
1764 &self.mb.array[meta_id.0 as usize]
1765 }
1766
1767 #[inline]
1768 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1769 if node_id == NodeId::MISSING {
1770 NullsId::EMPTY
1771 } else {
1772 self.get_meta(node_id).nulls
1773 }
1774 }
1775
1776 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1777 self.mb
1778 .nb
1779 .array
1780 .iter()
1781 .map(|set| set.iter().cloned().collect())
1782 .collect()
1783 }
1784
1785 pub fn nulls_count(&self) -> usize {
1786 self.mb.nb.array.len()
1787 }
1788
1789 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1790 self.mb.nb.array[id as usize].iter().cloned().collect()
1791 }
1792
1793 #[inline]
1794 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1795 if node_id == NodeId::MISSING {
1796 NullsId::EMPTY
1797 } else {
1798 let nulls = self.get_meta(node_id).nulls;
1799 self.mb.nb.and_mask(nulls, mask)
1800 }
1801 }
1802
1803 #[inline]
1804 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1805 let meta_id = self.get_node_meta_id(node_id);
1806 let meta = &self.mb.array[meta_id.0 as usize];
1807 meta.flags
1808 }
1809
1810 #[inline]
1811 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1812 self.get_meta(node_id).flags.nullability()
1813 }
1814
1815 #[inline]
1816 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1817 let meta_id = self.get_node_meta_id(node_id);
1818 let meta = &self.mb.array[meta_id.0 as usize];
1819 meta.flags.all_contains_flags()
1820 }
1821
1822 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1823 if self.get_kind(node_id) == Kind::Concat && node_id.left(self) == NodeId::BEGIN {
1824 return self.strip_lb(node_id.right(self));
1825 }
1826 self.strip_lb_inner(node_id)
1827 }
1828
1829 fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1830 if !self.contains_look(node_id) {
1831 return Ok(node_id);
1832 }
1833 if self.get_kind(node_id) == Kind::Concat
1834 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1835 {
1836 return self.strip_lb_inner(node_id.right(self));
1837 }
1838 if self.get_kind(node_id) == Kind::Inter {
1839 let left = self.strip_lb_inner(node_id.left(self))?;
1840 let right = self.strip_lb_inner(node_id.right(self))?;
1841 return Ok(self.mk_inter(left, right));
1842 }
1843 if self.get_kind(node_id) == Kind::Union {
1844 let left = self.strip_lb_inner(node_id.left(self))?;
1845 let right = self.strip_lb_inner(node_id.right(self))?;
1846 return Ok(self.mk_union(left, right));
1847 }
1848 match self.get_kind(node_id) {
1849 Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1850 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1851 Err(AlgebraError::UnsupportedPattern)
1852 }
1853 _ => Ok(node_id),
1854 }
1855 }
1856
1857 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1859 if !self.contains_anchors(node_id) {
1860 return node_id;
1861 }
1862 match self.get_kind(node_id) {
1863 Kind::Begin => NodeId::BOT,
1864 Kind::Concat => {
1865 let left = self.nonbegins(node_id.left(self));
1866 if left == NodeId::BOT {
1867 return NodeId::BOT;
1868 }
1869 self.mk_concat(left, node_id.right(self))
1870 }
1871 Kind::Union => {
1872 let left = self.nonbegins(node_id.left(self));
1873 let right = self.nonbegins(node_id.right(self));
1874 self.mk_union(left, right)
1875 }
1876 _ => node_id,
1877 }
1878 }
1879
1880 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1881 match self.get_kind(node_id) {
1882 Kind::Concat => {
1883 let head = node_id.left(self);
1884 match self.get_kind(head) {
1885 _ if self.any_nonbegin_nullable(head) => {
1886 self.strip_prefix_safe(node_id.right(self))
1887 }
1888 _ => node_id,
1889 }
1890 }
1891 _ => node_id,
1892 }
1893 }
1894 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1895 match self.get_kind(node_id) {
1896 Kind::Begin => NodeId::BOT,
1897 Kind::Concat => {
1898 let head = self.prune_begin(node_id.left(self));
1899 let tail = self.prune_begin(node_id.right(self));
1900 self.mk_concat(head, tail)
1901 }
1902 Kind::Lookbehind => {
1903 if !node_id.right(self).is_missing() {
1904 return node_id;
1905 }
1906 let head = self.prune_begin(node_id.left(self));
1907 head
1908 }
1909 Kind::Union => {
1910 let left = self.prune_begin(node_id.left(self));
1911 let right = self.prune_begin(node_id.right(self));
1912 self.mk_union(left, right)
1913 }
1914 _ => node_id,
1915 }
1916 }
1917
1918 pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1919 if !self.contains_look(node_id) {
1920 return Ok(node_id);
1921 }
1922 if self.get_kind(node_id) == Kind::Concat
1923 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1924 {
1925 let left = node_id.left(self);
1926 let ll = left.left(self).missing_to_eps();
1927 let lr = left.right(self).missing_to_eps();
1928 let new_l = self.mk_concat(ll, lr);
1929 let new = self.mk_concat(new_l, node_id.right(self));
1930 return Ok(new);
1931 }
1932 if self.get_kind(node_id) == Kind::Inter {
1933 let left = self.normalize_rev(node_id.left(self))?;
1934 let right = self.normalize_rev(node_id.right(self))?;
1935 return Ok(self.mk_inter(left, right));
1936 }
1937 if self.get_kind(node_id) == Kind::Union {
1938 let left = self.normalize_rev(node_id.left(self))?;
1939 let right = self.normalize_rev(node_id.right(self))?;
1940 return Ok(self.mk_union(left, right));
1941 }
1942 match self.get_kind(node_id) {
1943 Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1944 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1945 Err(AlgebraError::UnsupportedPattern)
1946 }
1947 _ => Ok(node_id),
1948 }
1949 }
1950
1951 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1952 debug_assert!(node_id != NodeId::MISSING);
1953 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1954 if *rev != NodeId::MISSING {
1955 return Ok(*rev);
1956 }
1957 }
1958 let rw = match self.get_kind(node_id) {
1959 Kind::End => NodeId::BEGIN,
1960 Kind::Begin => NodeId::END,
1961 Kind::Pred => node_id,
1962 Kind::Concat => {
1963 let left = self.reverse(node_id.left(self))?;
1964 let right = self.reverse(node_id.right(self))?;
1965 self.mk_concat(right, left)
1966 }
1967 Kind::Union => {
1968 let left = self.reverse(node_id.left(self))?;
1969 let right = self.reverse(node_id.right(self))?;
1970 self.mk_union(left, right)
1971 }
1972 Kind::Inter => {
1973 let left = self.reverse(node_id.left(self))?;
1974 let right = self.reverse(node_id.right(self))?;
1975 self.mk_inter(left, right)
1976 }
1977 Kind::Star => {
1978 let body = self.reverse(node_id.left(self))?;
1979 self.mk_star(body)
1980 }
1981 Kind::Compl => {
1982 if self.contains_look(node_id.left(self)) {
1983 return Err(AlgebraError::UnsupportedPattern);
1984 }
1985 let body = self.reverse(node_id.left(self))?;
1986 self.mk_compl(body)
1987 }
1988 Kind::Lookbehind => {
1989 let prev = self.get_lookbehind_prev(node_id);
1990 let inner_id = self.get_lookbehind_inner(node_id);
1991 let rev_inner = self.reverse(inner_id)?;
1992 let rev_prev = if prev != NodeId::MISSING {
1993 self.reverse(prev)?
1994 } else {
1995 NodeId::MISSING
1996 };
1997 self.mk_lookahead(rev_inner, rev_prev, 0)
1998 }
1999 Kind::Lookahead => {
2000 let rel = self.get_lookahead_rel(node_id);
2001 if rel == u32::MAX {
2002 let lbody = self.get_lookahead_inner(node_id);
2004 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2005 let lbody_ts = self.mk_concat(lbody, NodeId::TS);
2006 let ltail_ts = self.mk_concat(ltail, NodeId::TS);
2007 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2008 let rev = self.reverse(as_inter)?;
2009 self.init_reversed(node_id, rev);
2010 return Ok(rev);
2011 }
2012 if rel != 0 {
2013 return Err(AlgebraError::UnsupportedPattern);
2014 }
2015 let tail_node = self.get_lookahead_tail(node_id);
2016 let rev_tail = if tail_node != NodeId::MISSING {
2017 self.reverse(tail_node)?
2018 } else {
2019 NodeId::MISSING
2020 };
2021 let inner_id = self.get_lookahead_inner(node_id);
2022 let rev_inner = self.reverse(inner_id)?;
2023 self.mk_lookbehind(rev_inner, rev_tail)
2024 }
2025 Kind::Counted => {
2026 return Err(AlgebraError::UnsupportedPattern);
2027 }
2028 };
2029 self.init_reversed(node_id, rw);
2030 Ok(rw)
2031 }
2032
2033 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2034 let node = NodeKey {
2035 kind: Kind::Pred,
2036 left: NodeId::MISSING,
2037 right: NodeId::MISSING,
2038 extra: pred.0,
2039 };
2040 self.get_node_id(node)
2041 }
2042
2043 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2044 let key = NodeKey {
2045 kind: Kind::Compl,
2046 left: body,
2047 right: NodeId::MISSING,
2048 extra: u32::MAX,
2049 };
2050 if let Some(id) = self.key_is_created(&key) {
2051 return *id;
2052 }
2053 if body == NodeId::BOT {
2054 return NodeId::TS;
2055 }
2056 if body == NodeId::TS {
2057 return NodeId::BOT;
2058 }
2059
2060 if let Some(contains_body) = body.is_contains(self) {
2061 if contains_body.is_pred(self) {
2062 let pred = contains_body.pred_tset(self);
2063 let notpred = self.mk_pred_not(pred);
2064 let node = self.mk_star(notpred);
2065 return self.init_as(key, node);
2066 } else if contains_body == NodeId::END {
2067 return self.init_as(key, NodeId::BOT);
2068 }
2069 };
2070
2071 if self.get_kind(body) == Kind::Compl {
2072 return self.get_node(body).left;
2073 }
2074
2075 self.get_node_id(key)
2076 }
2077
2078 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2079 let nid = self.get_nulls_id(body);
2080 let nref = self.mb.nb.get_set_ref(nid).clone();
2081 let mut futures = NodeId::BOT;
2082 for n in nref.iter() {
2083 if !n.is_mask_nullable(mask) {
2084 continue;
2085 }
2086
2087 let eff = if n.rel == 0 {
2088 NodeId::EPS
2089 } else {
2090 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2091 };
2092 futures = self.mk_union(futures, eff)
2093 }
2094 futures
2095 }
2096
2097 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2098 if cfg!(feature = "norewrite") {
2099 return None;
2100 }
2101
2102 if self.get_kind(tail) == Kind::Lookbehind {
2103 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2104 return self
2105 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2106 .ok();
2107 }
2108 if self.get_kind(head) == Kind::Lookahead {
2109 let la_tail = self.get_lookahead_tail(head);
2110 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2111 if new_la_tail.is_center_nullable(self) {
2112 let non_null_tail = self.mk_non_nullable_safe(new_la_tail);
2113 if non_null_tail == NodeId::BOT {
2114 return None;
2115 }
2116 let la_body = self.get_lookahead_inner(head);
2117 return Some(self.mk_lookahead_internal(la_body, non_null_tail, u32::MAX));
2118 }
2119 let la_body = self.get_lookahead_inner(head);
2120 let la_rel = self.get_lookahead_rel(head);
2121 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2122 let tail_rel = self.get_lookahead_rel(new_la_tail);
2123 tail_rel + la_rel
2124 } else {
2125 u32::MAX
2126 };
2127
2128 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2129 }
2130
2131 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2132 return Some(head);
2133 }
2134 if head == NodeId::TS && tail == NodeId::END {
2135 return Some(head);
2136 }
2137
2138 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2139 return Some(NodeId::TS);
2140 }
2141
2142 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2143 return Some(NodeId::TS);
2144 }
2145
2146 if self.get_kind(tail) == Kind::Union && head == NodeId::TS {
2147 let mut should_distribute_top = false;
2148 self.iter_unions(tail, |v| {
2149 if v == NodeId::BEGIN || self.starts_with_ts(v) {
2150 should_distribute_top = true;
2151 }
2152 });
2153 if should_distribute_top {
2154 let mut new_union = NodeId::BOT;
2155 let mut curr = tail;
2156 while self.get_kind(curr) == Kind::Union {
2157 let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2158 new_union = self.mk_union(new_node, new_union);
2159 curr = curr.right(self);
2160 }
2161 let new_node = self.mk_concat(NodeId::TS, curr);
2162 new_union = self.mk_union(new_union, new_node);
2163 return Some(new_union);
2164 }
2165 }
2166
2167 if self.get_kind(head) == Kind::Union && head == NodeId::TS {
2168 let mut should_distribute_top = false;
2169 self.iter_unions(head, |v| {
2170 if v == NodeId::END || self.starts_with_ts(v) {
2171 should_distribute_top = true;
2172 }
2173 });
2174 if should_distribute_top {
2175 let mut new_union = NodeId::BOT;
2176 let mut curr = head;
2177 while self.get_kind(curr) == Kind::Union {
2178 let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2179 new_union = self.mk_union(new_node, new_union);
2180 curr = curr.right(self);
2181 }
2182 let new_node = self.mk_concat(curr, NodeId::TS);
2183 new_union = self.mk_union(new_union, new_node);
2184 return Some(new_union);
2185 }
2186 }
2187
2188 if self.get_kind(head) == Kind::Inter && tail == NodeId::TS {
2189 let mut alltopstar = true;
2190 iter_inter!(self, head, |v| {
2191 alltopstar = self.ends_with_ts(v);
2192 });
2193 if alltopstar {
2194 return Some(head);
2195 }
2196 }
2197
2198 if head.is_star(self) && head == tail {
2199 return Some(head);
2200 }
2201
2202 None
2203 }
2204
2205 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2206 use Kind::*;
2207
2208 if cfg!(feature = "norewrite") {
2209 return None;
2210 }
2211 if left == right {
2212 return Some(left);
2213 }
2214
2215 if right.is_kind(self, Kind::Union) && left == right.left(self) {
2216 return Some(right);
2217 }
2218
2219 if self.get_kind(left) == Kind::Pred && self.get_kind(right) == Kind::Pred {
2220 let l = left.pred_tset(self);
2221 let r = right.pred_tset(self);
2222 let solver = self.solver();
2223 let psi = solver.or_id(l, r);
2224 let rewrite = self.mk_pred(psi);
2225 return Some(rewrite);
2226 }
2227
2228 if left == NodeId::EPS
2229 && self.get_extra(left) == 0
2230 && self.nullability(right) == Nullability::ALWAYS
2231 {
2232 return Some(right);
2233 }
2234
2235 if self.get_kind(left) == Kind::Lookahead && self.get_kind(right) == Kind::Lookahead {
2236 let lb = left.left(self);
2237 let lt = left.right(self);
2238 let lrel = left.extra(self);
2239
2240 let rb = right.left(self);
2241 let rt = right.right(self);
2242 let rrel = right.extra(self);
2243
2244 if lrel == rrel && lt.is_missing() && rt.is_missing() {
2245 let unioned = self.mk_union(lb, rb);
2246 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2247 return Some(node);
2248 }
2249 }
2250
2251 if right.is_kind(self, Concat) {
2252 if left == NodeId::END
2253 && right.left(self) == NodeId::END
2254 && self.nullability(right).has(Nullability::END)
2255 {
2256 return Some(right);
2257 }
2258 if left == right.left(self) {
2260 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2261 let rw = self.mk_concat(left, rhs);
2262 return Some(rw);
2263 }
2264 if left.is_kind(self, Concat) {
2265 let head1 = left.left(self);
2266 let head2 = right.left(self);
2267
2268 if head1 == head2 {
2269 let t1 = left.right(self);
2270 let t2 = right.right(self);
2271 if head1 == NodeId::TS {
2273 if t2.has_concat_tail(self, t1) {
2274 return Some(left);
2275 }
2276 if t1.has_concat_tail(self, t2) {
2277 return Some(right);
2278 }
2279 }
2280 let un = self.mk_union(t1, t2);
2281 return Some(self.mk_concat(left.left(self), un));
2282 }
2283
2284 if false {
2287 let end1 = self.get_concat_end(left);
2288 let end2 = self.get_concat_end(right);
2289 if end1 == end2 {
2290 let flags = left.flags_contains(self).or(right.flags_contains(self));
2291 if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2292 let rev1 = self.reverse(left).unwrap();
2293 let rev2 = self.reverse(right).unwrap();
2294
2295 let union_rev = self.mk_union(rev1, rev2);
2296 return Some(self.reverse(union_rev).unwrap());
2297 }
2298 }
2299 }
2300 }
2301 if left.is_pred(self) && left == right.left(self) {
2302 let un = self.mk_opt(right.right(self));
2303 let conc = self.mk_concat(left, un);
2304 return Some(conc);
2305 }
2306 }
2307
2308 if left == NodeId::EPS && right.is_plus(self) {
2309 let result = self.mk_star(right.left(self));
2310 return Some(result);
2311 }
2312
2313 if self.flags == BuilderFlags::SUBSUME {
2315 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2316 return Some(rw);
2317 }
2318 }
2319
2320 None
2321 }
2322
2323 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2324 let mut curr = node;
2325 while self.get_kind(curr) == Kind::Inter {
2326 out.push(self.get_left(curr));
2327 curr = self.get_right(curr);
2328 }
2329 out.push(curr);
2330 }
2331
2332 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2335 let mut curr = node;
2336 let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2337 if has_prefix {
2338 curr = self.get_right(curr);
2339 }
2340 let mut count = 0u32;
2341 let mut pred_set = None;
2342 while self.get_kind(curr) == Kind::Concat {
2343 let head = self.get_left(curr);
2344 if self.get_kind(head) != Kind::Pred {
2345 return None;
2346 }
2347 let ps = head.pred_tset(self);
2348 match pred_set {
2349 None => pred_set = Some(ps),
2350 Some(existing) => {
2351 if existing != ps {
2352 return None;
2353 }
2354 }
2355 }
2356 curr = self.get_right(curr);
2357 count += 1;
2358 }
2359 if count == 0 || self.get_kind(curr) != Kind::Star {
2360 return None;
2361 }
2362 Some((has_prefix, pred_set.unwrap(), curr, count))
2363 }
2364
2365 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2366 let mut j = 0;
2367 for &s in sub {
2368 while j < sup.len() && sup[j] < s {
2369 j += 1;
2370 }
2371 if j >= sup.len() || sup[j] != s {
2372 return false;
2373 }
2374 j += 1;
2375 }
2376 true
2377 }
2378
2379 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2380 if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2381 return None;
2382 }
2383
2384 let mut lc: Vec<NodeId> = Vec::new();
2385 let mut rc: Vec<NodeId> = Vec::new();
2386 self.collect_inter_components(left, &mut lc);
2387 self.collect_inter_components(right, &mut rc);
2388
2389 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2392 return Some(left);
2393 }
2394 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2396 return Some(right);
2397 }
2398
2399 if lc.len() == rc.len() {
2400 let mut diff_idx = None;
2401 for i in 0..lc.len() {
2402 if lc[i] != rc[i] {
2403 if diff_idx.is_some() {
2404 return None;
2405 }
2406 diff_idx = Some(i);
2407 }
2408 }
2409 if let Some(idx) = diff_idx {
2410 let a = lc[idx];
2411 let b = rc[idx];
2412 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2413 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2414 {
2415 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2416 return if ca < cb { Some(left) } else { Some(right) };
2417 }
2418 }
2419 }
2420 }
2421
2422 None
2423 }
2424
2425 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2426 if cfg!(feature = "norewrite") {
2427 return None;
2428 }
2429 if left == right {
2430 return Some(left);
2431 }
2432
2433 if self.get_kind(right) == Kind::Union {
2434 let mut result = NodeId::BOT;
2435 self.iter_unions_b(
2436 right,
2437 &mut (|b, v| {
2438 let new_inter = b.mk_inter(v, left);
2439 result = b.mk_union(result, new_inter);
2440 }),
2441 );
2442 return Some(result);
2443 }
2444 if self.get_kind(left) == Kind::Union {
2445 let mut result = NodeId::BOT;
2446 self.iter_unions_b(
2447 left,
2448 &mut (|b, v| {
2449 let new_inter = b.mk_inter(v, right);
2450 result = b.mk_union(result, new_inter);
2451 }),
2452 );
2453 return Some(result);
2454 }
2455
2456 if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2457 return Some(NodeId::BOT);
2458 }
2459
2460 if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2461 let bodies = self.mk_union(left.left(self), right.left(self));
2462 return Some(self.mk_compl(bodies));
2463 }
2464
2465 if left == NodeId::TOPPLUS {
2466 if right.is_pred_star(self).is_some() {
2467 let newloop = self.mk_plus(right.left(self));
2468 return Some(newloop);
2469 }
2470 if right.is_never_nullable(self) {
2471 return Some(right);
2472 }
2473 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2474 return Some(NodeId::BOT);
2475 }
2476 if right.is_kind(self, Kind::Concat) {}
2477 }
2478
2479 {
2483 let l_is_la = left.is_lookahead(self);
2484 let r_is_la = right.is_lookahead(self);
2485 let l_is_cla = !l_is_la
2486 && self.get_kind(left) == Kind::Concat
2487 && self.get_kind(left.left(self)) == Kind::Lookahead;
2488 let r_is_cla = !r_is_la
2489 && self.get_kind(right) == Kind::Concat
2490 && self.get_kind(right.left(self)) == Kind::Lookahead;
2491 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2492 let (la_node, other, concat_body) = if r_is_la {
2493 (right, left, NodeId::MISSING)
2494 } else if l_is_la {
2495 (left, right, NodeId::MISSING)
2496 } else if r_is_cla {
2497 (right.left(self), left, right.right(self))
2498 } else {
2499 (left.left(self), right, left.right(self))
2500 };
2501 let la_body = la_node.left(self);
2502 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2503 let inter_right = if concat_body.is_missing() {
2504 la_tail
2505 } else {
2506 self.mk_concat(la_tail, concat_body)
2507 };
2508 let new_body = self.mk_inter(other, inter_right);
2509 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2510 return Some(self.mk_concat(la, new_body));
2511 }
2512 }
2513
2514 if self.get_kind(right) == Kind::Compl {
2515 let compl_body = right.left(self);
2516 if left == compl_body {
2517 return Some(NodeId::BOT);
2518 }
2519 if self.get_kind(compl_body) == Kind::Concat {
2520 let compl_head = compl_body.left(self);
2521 if compl_body.right(self) == NodeId::TS && compl_head == left {
2523 return Some(NodeId::BOT);
2524 }
2525 }
2526 }
2527
2528 if let Some(pleft) = left.is_pred_star(self) {
2529 if let Some(pright) = right.is_pred_star(self) {
2530 let merged = self.mk_inter(pleft, pright);
2531 return Some(self.mk_star(merged));
2532 }
2533 }
2534
2535 {
2539 let l_is_clb = self.get_kind(left) == Kind::Concat
2540 && self.get_kind(left.left(self)) == Kind::Lookbehind;
2541 let r_is_clb = self.get_kind(right) == Kind::Concat
2542 && self.get_kind(right.left(self)) == Kind::Lookbehind;
2543 if l_is_clb || r_is_clb {
2544 let (lb, body) = if l_is_clb && r_is_clb {
2545 let lb1 = left.left(self);
2546 let lb2 = right.left(self);
2547 let inner = self.mk_inter(
2548 self.get_lookbehind_inner(lb1),
2549 self.get_lookbehind_inner(lb2),
2550 );
2551 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2553 let body = self.mk_inter(left.right(self), right.right(self));
2554 (lb, body)
2555 } else if l_is_clb {
2556 let lb = left.left(self);
2557 let body = self.mk_inter(left.right(self), right);
2558 (lb, body)
2559 } else {
2560 let lb = right.left(self);
2561 let body = self.mk_inter(left, right.right(self));
2562 (lb, body)
2563 };
2564 return Some(self.mk_concat(lb, body));
2565 }
2566 }
2567
2568 {
2572 let l_has_la = self.has_trailing_la(left);
2573 let r_has_la = self.has_trailing_la(right);
2574 if l_has_la || r_has_la {
2575 let (body, la) = if l_has_la && r_has_la {
2576 let (lbody, l_la) = self.strip_trailing_la(left);
2577 let (rbody, r_la) = self.strip_trailing_la(right);
2578 let inner = self.mk_inter(
2579 self.get_lookahead_inner(l_la),
2580 self.get_lookahead_inner(r_la),
2581 );
2582 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2583 let body = self.mk_inter(lbody, rbody);
2584 (body, la)
2585 } else if l_has_la {
2586 let (lbody, la) = self.strip_trailing_la(left);
2587 let body = self.mk_inter(lbody, right);
2588 (body, la)
2589 } else {
2590 let (rbody, la) = self.strip_trailing_la(right);
2591 let body = self.mk_inter(left, rbody);
2592 (body, la)
2593 };
2594 return Some(self.mk_concat(body, la));
2595 }
2596 }
2597
2598 None
2599 }
2600
2601 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2602 if cfg!(feature = "norewrite") {
2603 return None;
2604 }
2605 debug_assert!(self.get_kind(right_union) == Kind::Union);
2606
2607 let mut rewritten = None;
2608 right_union.iter_union_while(
2609 self,
2610 &mut (|b, v| {
2611 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2612 rewritten = Some((v, rw));
2613 false
2614 } else {
2615 true
2616 }
2617 }),
2618 );
2619
2620 if let Some(rw) = rewritten {
2621 let mut new_union = NodeId::BOT;
2622 right_union.iter_union(
2623 self,
2624 &mut (|b, v| {
2625 if v == rw.0 {
2626 new_union = b.mk_union(rw.1, new_union)
2627 } else {
2628 new_union = b.mk_union(v, new_union)
2629 }
2630 }),
2631 );
2632 return Some(new_union);
2633 };
2634
2635 None
2636 }
2637
2638 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2639 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2640 debug_assert!(tail != NodeId::MISSING);
2641 let key = NodeKey {
2642 kind: Kind::Concat,
2643 left: head,
2644 right: tail,
2645 extra: u32::MAX,
2646 };
2647 if let Some(id) = self.key_is_created(&key) {
2648 return *id;
2649 }
2650
2651 if head == NodeId::BOT || tail == NodeId::BOT {
2652 return NodeId::BOT;
2653 }
2654 if head == NodeId::EPS {
2655 return tail;
2656 }
2657 if tail == NodeId::EPS {
2658 return head;
2659 }
2660
2661 match tail {
2662 NodeId::BEGIN => {
2664 if !self.is_nullable(head, Nullability::BEGIN) {
2665 return NodeId::BOT;
2666 } else {
2667 return NodeId::BEGIN;
2668 }
2669 }
2670 _ => {}
2671 }
2672
2673 if head.is_kind(self, Kind::Concat) {
2675 let left = head.left(self);
2676 let newright = self.mk_concat(head.right(self), tail);
2677 let updated = self.mk_concat(left, newright);
2678 return self.init_as(key, updated);
2679 }
2680
2681 if head == NodeId::TS && tail == NodeId::END {
2682 return NodeId::TS;
2683 }
2684
2685 if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2686 return NodeId::BOT;
2687 }
2688
2689 if self.get_kind(tail) == Kind::Concat {
2690 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2691 let upd = self.mk_concat(rw, tail.right(self));
2692 return self.init_as(key, upd);
2693 }
2694 }
2695
2696 if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2697 return self.init_as(key, new);
2698 }
2699
2700 match (self.get_kind(head), self.get_kind(tail)) {
2701 (Kind::Star, Kind::Concat) if head.is_star(self) => {
2703 let rl = tail.left(self);
2704 if head == rl {
2705 return self.init_as(key, tail);
2706 }
2707 }
2708 (_, Kind::Concat) => {
2710 let curr = head;
2711 let h2 = self.mk_concat(curr, tail.left(self));
2712 let tr = tail.right(self);
2713 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2714 return self.init_as(key, new);
2715 }
2716 }
2717 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2718 return self.init_as(key, tail);
2719 }
2720 _ => {}
2721 }
2722
2723 self.init(key)
2724 }
2725
2726 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2727 let lb_body = {
2729 match self.starts_with_ts(lb_body) {
2730 true => lb_body,
2731 false => self.mk_concat(NodeId::TS, lb_body),
2732 }
2733 };
2734 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2736 }
2737
2738 fn mk_lookbehind_internal(
2739 &mut self,
2740 lb_body: NodeId,
2741 lb_prev: NodeId,
2742 ) -> Result<NodeId, AlgebraError> {
2743 debug_assert!(lb_body != NodeId::MISSING);
2744 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2745 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2746 return Ok(NodeId::BOT);
2747 }
2748 if lb_body == NodeId::TS {
2749 return Ok(lb_prev);
2750 }
2751 if lb_body == NodeId::EPS {
2752 match lb_prev {
2753 NodeId::MISSING => return Ok(NodeId::EPS),
2754 NodeId::EPS => return Ok(NodeId::EPS),
2755 _ => return Ok(lb_prev),
2756 }
2757 }
2758
2759 let key = NodeKey {
2760 kind: Kind::Lookbehind,
2761 left: lb_body,
2762 right: lb_prev,
2763 extra: u32::MAX,
2764 };
2765 match self.key_is_created(&key) {
2766 Some(id) => Ok(*id),
2767 None => {
2768 if lb_prev == NodeId::TS {
2769 return Ok(self.mk_concat(lb_prev, lb_body));
2770 }
2771
2772 Ok(self.init(key))
2773 }
2774 }
2775 }
2776
2777 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2778 let la_body = {
2780 match self.ends_with_ts(la_body) {
2781 true => la_body,
2782 false => self.mk_concat(la_body, NodeId::TS),
2783 }
2784 };
2785 let rel = if NodeId::MISSING == la_tail {
2786 rel
2787 } else {
2788 match la_tail.is_center_nullable(self) {
2789 false => u32::MAX,
2790 true => rel,
2791 }
2792 };
2793
2794 self.mk_lookahead_internal(la_body, la_tail, rel)
2795 }
2796
2797 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2799 let key = NodeKey {
2800 kind: Kind::Lookahead,
2801 left: la_body,
2802 right: la_tail,
2803 extra: rel,
2804 };
2805 if let Some(id) = self.key_is_created(&key) {
2806 return *id;
2807 }
2808 if la_body == NodeId::TS {
2809 if rel == 0 {
2810 return la_tail.missing_to_eps();
2811 } else {
2812 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2813 }
2814 }
2815 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2816 return NodeId::BOT;
2817 }
2818 if la_tail.is_missing() && rel == u32::MAX {
2819 return NodeId::BOT;
2820 }
2821
2822 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2823 return la_body;
2824 }
2825
2826 if la_tail == NodeId::TS {
2827 if rel == 0 || rel == u32::MAX {
2828 return self.mk_concat(la_body, NodeId::TS);
2829 } else if rel == u32::MAX {
2830 return self.mk_begins_with(la_body);
2831 }
2832 }
2833
2834 if rel == u32::MAX {
2835 if la_tail.is_missing() {
2836 return NodeId::BOT;
2837 }
2838
2839 if self.is_always_nullable(la_body) {
2840 return la_tail.missing_to_eps();
2841 }
2842
2843 if la_tail != NodeId::MISSING {
2844 match self.get_kind(la_tail) {
2845 _ => {
2846 if la_body.is_compl_plus_end(self) {
2847 let minlen = self.get_min_length_only(la_tail);
2848 if minlen >= 1 {
2849 return NodeId::BOT;
2850 }
2851 }
2852 }
2853 }
2854 }
2855 }
2856
2857 if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2859 let la_body2 = self.get_lookahead_inner(la_tail);
2860 let body1_ts = self.mk_concat(la_body, NodeId::TS);
2861 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2862 let new_la_body = self.mk_inter(body1_ts, body2_ts);
2863 let new_la_rel = self.get_lookahead_rel(la_tail);
2864 let new_la_tail = self.get_lookahead_tail(la_tail);
2865 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2866 }
2867
2868 if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2870 let la_body_right = la_body.right(self);
2871 if self.is_always_nullable(la_body_right) {
2872 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2873 }
2874 if la_body.right(self) == NodeId::END {
2875 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2876 }
2877 let bodyright = la_body.right(self);
2878 if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2879 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2880 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2881 }
2882 }
2883
2884 if la_tail != NodeId::MISSING {
2885 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2886 let lpred = la_body.left(self);
2887 if self.get_kind(lpred) == Kind::Pred {
2888 let l = lpred.pred_tset(self);
2889 let r = la_tail.pred_tset(self);
2890 let psi_and = self.solver().and_id(l, r);
2891 let rewrite = self.mk_pred(psi_and);
2892 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2893 let new_right =
2894 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2895 return self.mk_concat(rewrite, new_right);
2896 }
2897 }
2898 }
2899
2900 self.get_node_id(key)
2901 }
2902
2903 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2904 let has_match = (packed >> 16) > 0;
2905 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2906 return NodeId::BOT;
2907 }
2908 debug_assert!(
2909 body == NodeId::BOT || !self.is_infinite(body),
2910 "Counted body must have finite max length"
2911 );
2912 let chain = self.prune_counted_chain(body, chain);
2913 let key = NodeKey {
2914 kind: Kind::Counted,
2915 left: body,
2916 right: chain,
2917 extra: packed,
2918 };
2919 if let Some(id) = self.key_is_created(&key) {
2920 return *id;
2921 }
2922 self.get_node_id(key)
2923 }
2924
2925 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2926 if chain == NodeId::MISSING || body == NodeId::BOT {
2927 return chain;
2928 }
2929 if self.nullability(body) != Nullability::NEVER {
2931 return NodeId::MISSING;
2932 }
2933 let chain_body = chain.left(self);
2934 if chain_body == NodeId::BOT {
2935 return chain; }
2937 let not_begins = self.mk_not_begins_with(body);
2938 let inter = self.mk_inter(chain_body, not_begins);
2939 let is_empty = inter == NodeId::BOT;
2940 if is_empty {
2941 self.prune_counted_chain(body, chain.right(self))
2942 } else {
2943 chain
2944 }
2945 }
2946
2947 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2948 let neg_inner = self.mk_concat(body, NodeId::TS);
2949 let neg_part = self.mk_compl(neg_inner);
2950 let conc = self.mk_concat(neg_part, NodeId::END);
2951 self.mk_lookahead(conc, NodeId::MISSING, rel)
2952 }
2953
2954 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2955 match self.get_node(body).kind {
2956 Kind::Pred => {
2957 let psi = body.pred_tset(self);
2958 let negated = self.mk_pred_not(psi);
2959 let union = self.mk_union(NodeId::BEGIN, negated);
2960 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2962 }
2963 _ => {
2964 let uc = crate::unicode_classes::utf8_char(self);
2967 let neg = self.mk_compl(body);
2968 let negated = self.mk_inter(neg, uc);
2969 let union = self.mk_union(NodeId::BEGIN, negated);
2970 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2972 }
2973 }
2974 }
2975
2976 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2977 debug_assert!(left != NodeId::MISSING);
2978 debug_assert!(right != NodeId::MISSING);
2979 if left > right {
2980 return self.mk_union(right, left);
2981 }
2982 let key = NodeKey {
2983 kind: Kind::Union,
2984 left,
2985 right,
2986 extra: u32::MAX,
2987 };
2988 if let Some(id) = self.key_is_created(&key) {
2989 return *id;
2990 }
2991
2992 if left == right {
2993 return left;
2994 }
2995 if left == NodeId::BOT {
2996 return right;
2997 }
2998 if right == NodeId::BOT {
2999 return left;
3000 }
3001 if right == NodeId::TS {
3002 return right;
3003 }
3004 if left == NodeId::TS {
3005 return left;
3006 }
3007
3008 match (self.get_kind(left), self.get_kind(right)) {
3009 (Kind::Union, _) => {
3010 self.iter_unions_b(left, &mut |b, v| {
3011 b.temp_vec.push(v);
3012 });
3013 self.iter_unions_b(right, &mut |b, v| {
3014 b.temp_vec.push(v);
3015 });
3016 self.temp_vec.sort();
3017 let tree = self.temp_vec.clone();
3018 self.temp_vec.clear();
3019 let newnode = tree
3020 .iter()
3021 .rev()
3022 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3023 return self.init_as(key, newnode);
3024 }
3025 (_, Kind::Union) => {
3026 let rleft = right.left(self);
3027 if left > rleft {
3029 self.iter_unions_b(left, &mut |b, v| {
3030 b.temp_vec.push(v);
3031 });
3032 self.iter_unions_b(right, &mut |b, v| {
3033 b.temp_vec.push(v);
3034 });
3035 self.temp_vec.sort();
3036 let tree = self.temp_vec.clone();
3037 self.temp_vec.clear();
3038 let newnode = tree
3039 .iter()
3040 .rev()
3041 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3042 return self.init_as(key, newnode);
3043 } else {
3044 if let Some(rw) = self.attempt_rw_unions(left, right) {
3045 return self.init_as(key, rw);
3046 }
3047 }
3048 }
3049 _ => {}
3050 }
3051
3052 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3053 return self.init_as(key, rw);
3054 }
3055 self.init(key)
3056 }
3057
3058 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3059 debug_assert!(left_id != NodeId::MISSING);
3060 debug_assert!(right_id != NodeId::MISSING);
3061 if left_id == right_id {
3062 return left_id;
3063 }
3064 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3065 return NodeId::BOT;
3066 }
3067 if left_id == NodeId::TS {
3068 return right_id;
3069 }
3070 if right_id == NodeId::TS {
3071 return left_id;
3072 }
3073 if left_id > right_id {
3074 return self.mk_inter(right_id, left_id);
3075 }
3076 let key = NodeKey {
3077 kind: Kind::Inter,
3078 left: left_id,
3079 right: right_id,
3080 extra: u32::MAX,
3081 };
3082 if let Some(id) = self.key_is_created(&key) {
3083 return *id;
3084 }
3085
3086 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3087 return self.init_as(key, rw);
3088 }
3089
3090 self.init(key)
3091 }
3092
3093 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3094 let node = NodeKey {
3095 kind,
3096 left: NodeId::MISSING,
3097 right: NodeId::MISSING,
3098 extra: u32::MAX,
3099 };
3100 self.init(node)
3101 }
3102
3103 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3104 let star = self.mk_star(body_id);
3105 self.mk_concat(body_id, star)
3106 }
3107 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3108 let opt = self.mk_opt(body_id);
3109 let mut nodes1 = vec![];
3110 for _ in lower..upper {
3111 nodes1.push(opt);
3112 }
3113 for _ in 0..lower {
3114 nodes1.push(body_id);
3115 }
3116 self.mk_concats(nodes1.into_iter())
3117 }
3118 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3119 self.mk_union(NodeId::EPS, body_id)
3120 }
3121
3122 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3123 let key = NodeKey {
3124 kind: Kind::Star,
3125 left: body_id,
3126 right: NodeId::MISSING,
3127 extra: 0,
3128 };
3129 if let Some(id) = self.key_is_created(&key) {
3130 return *id;
3131 }
3132 if body_id.is_kind(self, Kind::Star) {
3134 return body_id;
3135 }
3136 self.get_node_id(key)
3137 }
3138
3139 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3142 match self.get_kind(node_id) {
3143 Kind::End => Nullability::EMPTYSTRING,
3144 Kind::Begin => Nullability::EMPTYSTRING,
3145 Kind::Pred => Nullability::NEVER,
3146 Kind::Star => Nullability::ALWAYS,
3147 Kind::Inter | Kind::Concat => {
3148 let lnull = self.nullability_emptystring(node_id.left(self));
3149 let rnull = self.nullability_emptystring(node_id.right(self));
3150 lnull.and(rnull) }
3152 Kind::Union => {
3153 let lnull = self.nullability_emptystring(node_id.left(self));
3154 let rnull = self.nullability_emptystring(node_id.right(self));
3155 lnull.or(rnull)
3156 }
3157 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3158 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3159 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3160 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3161 }
3162 }
3163
3164 #[inline(always)]
3165 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3166 self.get_meta(node_id)
3167 .flags
3168 .nullability()
3169 .has(Nullability::CENTER.or(Nullability::END))
3170 }
3171
3172 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3173 self.get_only_nullability(node_id)
3174 }
3175
3176 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3177 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3178 }
3179
3180 pub fn pp(&self, node_id: NodeId) -> String {
3181 let mut s = String::new();
3182 self.ppw(&mut s, node_id).unwrap();
3183 s
3184 }
3185
3186 #[allow(dead_code)]
3187 pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
3188 let nu = self.get_nulls_id(node_id);
3189 let nr = self.mb.nb.get_set_ref(nu);
3190 let s1 = format!("{:?}", nr);
3191 s1
3192 }
3193
3194 #[allow(dead_code)]
3195 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3196 match self.get_tregex(term_id) {
3197 TRegex::Leaf(node_id) => {
3198 let mut s = String::new();
3199 self.ppw(&mut s, *node_id).unwrap();
3200 s
3201 }
3202 TRegex::ITE(cond, then_id, else_id) => {
3203 format!(
3204 "ITE({},{},{})",
3205 self.solver_ref().pp(*cond),
3206 self.ppt(*then_id),
3207 self.ppt(*else_id)
3208 )
3209 }
3210 }
3211 }
3212
3213 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3214 if cfg!(feature = "graphviz") {
3215 match node_id {
3216 NodeId::MISSING => return write!(s, "MISSING"),
3217 NodeId::BOT => return write!(s, "⊥"),
3218 NodeId::TS => return write!(s, "_*"),
3219 NodeId::TOP => return write!(s, "_"),
3220 NodeId::EPS => return write!(s, "ε"),
3221 _ => {}
3222 }
3223 }
3224
3225 match node_id {
3226 NodeId::MISSING => return write!(s, "MISSING"),
3227 NodeId::BOT => return write!(s, "⊥"),
3228 NodeId::TS => return write!(s, "_*"),
3229 NodeId::TOP => return write!(s, "_"),
3230 NodeId::EPS => return write!(s, ""),
3231 _ => {}
3232 }
3233
3234 match self.get_kind(node_id) {
3235 Kind::End => write!(s, r"\z"),
3236 Kind::Begin => write!(s, r"\A"),
3237 Kind::Pred => {
3238 let psi = node_id.pred_tset(self);
3239 if psi == TSetId::EMPTY {
3240 write!(s, r"⊥")
3241 } else if psi == TSetId::FULL {
3242 write!(s, r"_")
3243 } else {
3244 write!(s, "{}", self.solver_ref().pp(psi))
3245 }
3246 }
3247 Kind::Inter => {
3248 write!(s, "(")?;
3249 self.ppw(s, node_id.left(self))?;
3250 write!(s, "&")?;
3251 let mut curr = node_id.right(self);
3252 while self.get_kind(curr) == Kind::Inter {
3253 let n = curr.left(self);
3254 self.ppw(s, n)?;
3255 write!(s, "&")?;
3256 curr = curr.right(self);
3257 }
3258 self.ppw(s, curr)?;
3259 write!(s, ")")
3260 }
3261 Kind::Union => {
3262 let left = node_id.left(self);
3263 let right = node_id.right(self);
3264 write!(s, "(")?;
3265 self.ppw(s, left)?;
3266 write!(s, "|")?;
3267 let mut curr = right;
3268 while self.get_kind(curr) == Kind::Union {
3269 let n = curr.left(self);
3270 self.ppw(s, n)?;
3271 write!(s, "|")?;
3272 curr = curr.right(self);
3273 }
3274 self.ppw(s, curr)?;
3275 write!(s, ")")
3276 }
3277 Kind::Concat => {
3278 let left = node_id.left(self);
3279 let right = node_id.right(self);
3280 if right.is_star(self) && right.left(self) == left {
3281 self.ppw(s, left)?;
3282 write!(s, "+")?;
3283 return Ok(());
3284 }
3285 if right.is_concat(self) {
3286 let rl = right.left(self);
3287 if rl.is_star(self) && rl.left(self) == left {
3288 self.ppw(s, left)?;
3289 write!(s, "+")?;
3290 return self.ppw(s, right.right(self));
3291 }
3292 }
3293 if right.is_concat(self) && right.left(self) == left {
3294 let mut num = 1;
3295 let mut right = right;
3296 while right.is_concat(self) && right.left(self) == left {
3297 num += 1;
3298 right = right.right(self);
3299 }
3300 if let Some(inner) = left.is_opt_v(self) {
3302 let mut inner_count = 0;
3303 let mut right2 = right;
3304 while right2.is_concat(self) && right2.left(self) == inner {
3305 inner_count += 1;
3306 right2 = right2.right(self);
3307 }
3308 if right2 == inner {
3309 inner_count += 1;
3310 self.ppw(s, inner)?;
3311 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3312 }
3313 if inner_count > 0 {
3314 self.ppw(s, inner)?;
3315 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3316 return self.ppw(s, right2);
3317 }
3318 }
3319 self.ppw(s, left)?;
3320 if right == left {
3321 num += 1;
3322 return write!(s, "{{{}}}", num);
3323 }
3324 if num <= 3 && left.is_pred(self) {
3325 for _ in 1..num {
3326 self.ppw(s, left)?;
3327 }
3328 return self.ppw(s, right);
3329 } else {
3330 write!(s, "{{{}}}", num)?;
3331 return self.ppw(s, right);
3332 }
3333 }
3334 self.ppw(s, left)?;
3335 self.ppw(s, right)
3336 }
3337 Kind::Star => {
3338 let left = node_id.left(self);
3339 let leftkind = self.get_kind(left);
3340 match leftkind {
3341 Kind::Concat | Kind::Star | Kind::Compl => {
3342 write!(s, "(")?;
3343 self.ppw(s, left)?;
3344 write!(s, ")")?;
3345 }
3346 _ => {
3347 self.ppw(s, left)?;
3348 }
3349 };
3350 write!(s, "*")
3351 }
3352 Kind::Compl => {
3353 write!(s, "~(")?;
3354 self.ppw(s, node_id.left(self))?;
3355 write!(s, ")")
3356 }
3357 Kind::Lookbehind => {
3358 let lbleft = self.get_lookbehind_prev(node_id);
3359 let lbinner = self.get_lookbehind_inner(node_id);
3360 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3361 if lbleft != NodeId::MISSING {
3362 write!(s, "「")?;
3363 self.ppw(s, lbleft)?;
3364 write!(s, "」")?;
3365 }
3366
3367 write!(s, "(?<=")?;
3368 self.ppw(s, lbinner)?;
3369 write!(s, ")")
3370 }
3371 Kind::Lookahead => {
3372 let inner = self.get_lookahead_inner(node_id);
3373 write!(s, "(?=")?;
3374 self.ppw(s, inner)?;
3375 write!(s, ")")?;
3376 if self.get_lookahead_rel(node_id) != 0 {
3377 write!(s, "{{")?;
3378 let rel = self.get_lookahead_rel(node_id);
3379 if rel == u32::MAX {
3380 write!(s, "∅")?;
3381 } else {
3382 write!(s, "{}", rel)?;
3383 }
3384 write!(s, "}}")?;
3385 }
3386 if node_id.right(self) == NodeId::MISSING {
3387 Ok(())
3388 } else {
3389 write!(s, "『")?;
3390 self.ppw(s, node_id.right(self))?;
3391 write!(s, "』")
3392 }
3393 }
3394 Kind::Counted => {
3395 let body = node_id.left(self);
3396 let packed = self.get_extra(node_id);
3397 let step = packed & 0xFFFF;
3398 let best = packed >> 16;
3399 write!(s, "#(")?;
3400 self.ppw(s, body)?;
3401 write!(s, ")s{}b{}", step, best)
3402 }
3403 }
3404 }
3405
3406 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3407 self.mk_concat(node, NodeId::TS)
3408 }
3409
3410 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3411 let node_ts = self.mk_concat(node, NodeId::TS);
3412 self.mk_compl(node_ts)
3413 }
3414
3415 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3416 let notset = self.solver().not_id(set);
3417 self.mk_pred(notset)
3418 }
3419
3420 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3421 let set_id = self.solver().u8_to_set_id(char);
3422 self.mk_pred(set_id)
3423 }
3424
3425 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3426 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3427 self.mk_pred(rangeset)
3428 }
3429
3430 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3431 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3432 for &(lo, hi) in &ranges[1..] {
3433 let r = self.mk_range_u8(lo, hi);
3434 node = self.mk_union(node, r);
3435 }
3436 node
3437 }
3438
3439 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3440 let mut prefix = Vec::new();
3441 let mut curr = node;
3442 loop {
3443 if curr == NodeId::EPS {
3444 let full = !prefix.is_empty();
3445 return (prefix, full);
3446 }
3447 if curr == NodeId::BOT {
3448 break;
3449 }
3450 if self.get_kind(curr) == Kind::Pred {
3451 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3452 Some(byte) => {
3453 prefix.push(byte);
3454 return (prefix, true);
3455 }
3456 None => break, }
3458 }
3459 if self.get_kind(curr) != Kind::Concat {
3460 break;
3461 }
3462 let left = curr.left(self);
3463 if self.get_kind(left) != Kind::Pred {
3464 break;
3465 }
3466 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3467 Some(byte) => prefix.push(byte),
3468 None => break,
3469 }
3470 curr = curr.right(self);
3471 }
3472 (prefix, false)
3473 }
3474
3475 #[allow(dead_code)]
3476 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3477 let mut result = NodeId::EPS;
3478 for byte in raw_str.iter().rev() {
3479 let node = self.mk_u8(*byte);
3480 result = self.mk_concat(node, result);
3481 }
3482 result
3483 }
3484
3485 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3486 let mut result = NodeId::EPS;
3487 for byte in raw_str.bytes().rev() {
3488 let node = self.mk_u8(byte);
3489 result = self.mk_concat(node, result);
3490 }
3491 result
3492 }
3493
3494 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3495 let mut sorted: Vec<NodeId> = nodes.collect();
3496 if sorted.len() <= 1 {
3497 return sorted.pop().unwrap_or(NodeId::BOT);
3498 }
3499 sorted.sort();
3500 sorted.dedup();
3501 sorted.retain(|&x| x != NodeId::BOT);
3502 if sorted.is_empty() {
3503 return NodeId::BOT;
3504 }
3505 if sorted.len() > 16 {
3506 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3507 let mut non_concat: Vec<NodeId> = Vec::new();
3508 for &n in &sorted {
3509 if self.get_kind(n) == Kind::Concat {
3510 by_head.entry(self.get_left(n)).or_default().push(n);
3511 } else {
3512 non_concat.push(n);
3513 }
3514 }
3515 let mut absorbed: Vec<NodeId> = Vec::new();
3520 for &n in &non_concat {
3521 if by_head.contains_key(&n) {
3522 absorbed.push(n);
3523 }
3524 }
3525 if !absorbed.is_empty() {
3526 non_concat.retain(|n| !absorbed.contains(n));
3527 }
3528 if by_head.len() < sorted.len() {
3529 let mut groups: Vec<NodeId> = non_concat;
3530 for (head, tails) in by_head {
3531 let mut tail_nodes: Vec<NodeId> =
3532 tails.iter().map(|&n| self.get_right(n)).collect();
3533 if absorbed.contains(&head) {
3534 tail_nodes.push(NodeId::EPS);
3535 }
3536 let tail_union = self.mk_unions(tail_nodes.into_iter());
3537 let factored = self.mk_concat(head, tail_union);
3538 groups.push(factored);
3539 }
3540 groups.sort();
3541 groups.dedup();
3542 return self.mk_unions_balanced(&groups);
3543 }
3544 }
3545 self.mk_unions_balanced(&sorted)
3546 }
3547
3548 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3549 match nodes.len() {
3550 0 => NodeId::BOT,
3551 1 => nodes[0],
3552 n => {
3553 let mid = n / 2;
3554 let left = self.mk_unions_balanced(&nodes[..mid]);
3555 let right = self.mk_unions_balanced(&nodes[mid..]);
3556 self.mk_union(left, right)
3557 }
3558 }
3559 }
3560
3561 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3562 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3563 }
3564
3565 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3566 nodes
3567 .rev()
3568 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3569 }
3570}
3571
3572impl RegexBuilder {
3573 #[allow(dead_code)]
3574 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3575 match self.get_tregex(term_id).clone() {
3576 TRegex::Leaf(node_id) => {
3577 if NodeId::BOT == node_id {
3578 vec![]
3579 } else {
3580 vec![node_id]
3581 }
3582 }
3583 TRegex::ITE(_, then_id, else_id) => {
3584 let mut then_nodes = self.extract_sat(then_id);
3585 let mut else_nodes = self.extract_sat(else_id);
3586 then_nodes.append(&mut else_nodes);
3587 then_nodes
3588 }
3589 }
3590 }
3591
3592 pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3593 debug_assert!(self.get_kind(start) == Kind::Union);
3594 let mut curr = start;
3595 while self.get_kind(curr) == Kind::Union {
3596 f(curr.left(self));
3597 curr = curr.right(self);
3598 }
3599 f(curr);
3600 }
3601
3602 pub(crate) fn iter_unions_b(
3603 &mut self,
3604 curr: NodeId,
3605 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3606 ) {
3607 let mut curr = curr;
3608 while self.get_kind(curr) == Kind::Union {
3609 f(self, curr.left(self));
3610 curr = curr.right(self);
3611 }
3612 f(self, curr);
3613 }
3614
3615 pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3616 if !self.contains_look(node_id) {
3617 return Some(node_id);
3618 }
3619 match self.get_kind(node_id) {
3620 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3621 Kind::Concat => {
3622 let left = node_id.left(self);
3623 let right = node_id.right(self);
3624 let elim_l = self.try_elim_lookarounds(left)?;
3625 let elim_r = self.try_elim_lookarounds(right)?;
3626 let rw = self.mk_concat(elim_l, elim_r);
3627 Some(rw)
3628 }
3629 Kind::Union => {
3630 let left = node_id.left(self);
3631 let right = node_id.right(self);
3632 let elim_l = self.try_elim_lookarounds(left)?;
3633 let elim_r = self.try_elim_lookarounds(right)?;
3634 let rw = self.mk_union(elim_l, elim_r);
3635 Some(rw)
3636 }
3637
3638 Kind::Star => {
3639 let body = node_id.left(self);
3640 let elim_l = self.try_elim_lookarounds(body)?;
3641 Some(self.mk_star(elim_l))
3642 }
3643 Kind::Compl => {
3644 let left = node_id.left(self);
3645 let elim_l = self.try_elim_lookarounds(left)?;
3646 Some(self.mk_compl(elim_l))
3647 }
3648 Kind::Lookahead => {
3649 let rel = self.get_lookahead_rel(node_id);
3650 if rel != 0 {
3651 return None;
3652 }
3653 let lbody = self.get_lookahead_inner(node_id);
3654 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3655 let elim_l = self.try_elim_lookarounds(lbody)?;
3656 let elim_r = self.try_elim_lookarounds(ltail)?;
3657 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3658 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3659 let rw = self.mk_inter(lbody_ts, ltail_ts);
3660 Some(rw)
3661 }
3662 Kind::Lookbehind => {
3663 let linner = self.get_lookbehind_inner(node_id);
3664 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3665 let elim_l = self.try_elim_lookarounds(linner)?;
3666 let elim_r = self.try_elim_lookarounds(lprev)?;
3667 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3668 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3669 let rw = self.mk_inter(lbody_ts, ltail_ts);
3670 Some(rw)
3671 }
3672 Kind::Inter => {
3673 let left = node_id.left(self);
3674 let right = node_id.right(self);
3675 let elim_l = self.try_elim_lookarounds(left)?;
3676 let elim_r = self.try_elim_lookarounds(right)?;
3677 let rw = self.mk_inter(elim_l, elim_r);
3678 Some(rw)
3679 }
3680 Kind::Counted => None,
3681 }
3682 }
3683
3684 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3686 if self.nullability(node) == Nullability::NEVER {
3687 node
3688 } else {
3689 self.mk_inter(NodeId::TOPPLUS, node)
3690 }
3691 }
3692
3693 fn iter_find_stack(
3694 &self,
3695 stack: &mut Vec<TRegexId>,
3696 mut f: impl FnMut(NodeId) -> bool,
3697 ) -> bool {
3698 loop {
3699 match stack.pop() {
3700 None => return false,
3701 Some(curr) => match self.get_tregex(curr) {
3702 TRegex::Leaf(n) => {
3703 let mut curr = *n;
3704 while curr != NodeId::BOT {
3705 match self.get_kind(curr) {
3706 Kind::Union => {
3707 if f(curr.left(self)) {
3708 return true;
3709 }
3710 curr = curr.right(self);
3711 }
3712 _ => {
3713 if f(*n) {
3714 return true;
3715 }
3716 curr = NodeId::BOT;
3717 }
3718 }
3719 }
3720 }
3721 TRegex::ITE(_, then_id, else_id) => {
3722 if *else_id != TRegexId::BOT {
3723 stack.push(*else_id);
3724 }
3725 stack.push(*then_id);
3726 }
3727 },
3728 }
3729 }
3730 }
3731
3732 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3733 if node == NodeId::BOT {
3734 return Some(true);
3735 }
3736 if self.nullability(node) != Nullability::NEVER {
3737 return Some(false);
3738 }
3739 if let Some(cached) = self.cache_empty.get(&node) {
3740 if cached.is_checked() {
3741 return Some(cached.is_empty());
3742 }
3743 }
3744 let node = if !self.contains_look(node) {
3745 node
3746 } else {
3747 self.try_elim_lookarounds(node)?
3748 };
3749 let isempty_flag = self.is_empty_lang_internal(node);
3750
3751 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3752 }
3753
3754 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3755 if !self.get_meta_flags(initial_node).contains_inter() {
3757 return Ok(NodeFlags::ZERO);
3758 }
3759
3760 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3761 let mut worklist: VecDeque<NodeId> = VecDeque::new();
3762 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3763 let mut stack = Vec::new();
3764 stack.push(begin_der);
3765 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3766 visited.insert(node, initial_node);
3767 let nullability = self.nullability(node);
3768 if nullability != Nullability::NEVER {
3769 true
3770 } else {
3771 worklist.push_back(node);
3772 false
3773 }
3774 });
3775 if found_nullable_right_away {
3776 return Ok(NodeFlags::ZERO);
3777 }
3778
3779 worklist.push_back(initial_node);
3780 let isempty_flag: NodeFlags;
3781 let mut found_node = NodeId::BOT;
3782
3783 loop {
3784 match worklist.pop_front() {
3785 None => {
3786 isempty_flag = NodeFlags::IS_EMPTY;
3787 break;
3788 }
3789 Some(outer) => {
3790 if let Some(cached) = self.cache_empty.get(&outer) {
3791 if cached.is_checked() {
3792 if cached.is_empty() {
3793 continue;
3794 } else {
3795 return Ok(NodeFlags::ZERO);
3796 }
3797 }
3798 }
3799
3800 let derivative = self.der(outer, Nullability::CENTER)?;
3801
3802 stack.push(derivative);
3803
3804 let found_null = self.iter_find_stack(&mut stack, |node| {
3805 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3806 found_node = node;
3807 if !self.get_meta_flags(node).contains_inter() {
3808 true
3809 } else {
3810 e.insert(outer);
3811 worklist.push_front(node);
3812 self.any_nonbegin_nullable(node)
3813 }
3814 } else {
3815 false
3816 }
3817 });
3818 if found_null {
3819 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3820 isempty_flag = NodeFlags::ZERO;
3821 break;
3822 }
3823 }
3824 }
3825 }
3826
3827 self.cache_empty.insert(
3828 initial_node,
3829 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3830 );
3831 Ok(isempty_flag)
3832 }
3833
3834 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3836 if larger_lang == smaller_lang {
3837 return Some(true);
3838 }
3839
3840 if self
3842 .nullability(larger_lang)
3843 .not()
3844 .and(self.nullability(smaller_lang))
3845 != Nullability::NEVER
3846 {
3847 return Some(false);
3848 }
3849
3850 let (smaller_lang, larger_lang) =
3855 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3856 let wrap = |b: &mut Self, n: NodeId| {
3857 let tmp = b.mk_concat(n, NodeId::TS);
3858 b.mk_concat(NodeId::TS, tmp)
3859 };
3860 (wrap(self, smaller_lang), wrap(self, larger_lang))
3861 } else {
3862 (smaller_lang, larger_lang)
3863 };
3864
3865 let nota = self.mk_compl(larger_lang);
3866 let diff = self.mk_inter(smaller_lang, nota);
3867 self.is_empty_lang(diff)
3868 }
3869}