1#![warn(dead_code)]
7
8pub mod unicode_classes;
9pub use unicode_classes::UnicodeClassCache;
10use solver::{Solver, TSetId};
11use rustc_hash::FxHashMap;
12use std::collections::{BTreeSet, VecDeque};
13use std::fmt::Debug;
14use std::fmt::Write;
15use std::hash::Hash;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 AlgebraError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 & Self::NULL_MASK)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_lookaround(self) -> bool {
134 self.has(MetaFlags::CONTAINS_LOOKAROUND)
135 }
136 pub(crate) fn contains_inter(self) -> bool {
137 self.has(MetaFlags::CONTAINS_INTER)
138 }
139
140 pub(crate) fn all_contains_flags(self) -> MetaFlags {
141 self.and(
142 MetaFlags::CONTAINS_LOOKAROUND
143 .or(MetaFlags::CONTAINS_ANCHORS)
144 .or(MetaFlags::CONTAINS_INTER),
145 )
146 }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152 #[default]
153 Pred,
154 Star,
155 Begin,
156 End,
157 Concat,
158 Union,
159 Compl,
160 Lookbehind,
161 Lookahead,
162 Inter,
163 Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168 flags: MetaFlags,
169 nulls: NullsId,
170}
171
172struct MetadataBuilder {
173 num_created: u32,
174 solver: Solver,
175 nb: NullsBuilder,
176 index: FxHashMap<Metadata, MetadataId>,
177 pub array: Vec<Metadata>,
178}
179
180mod helpers {
181 pub(crate) fn incr_rel(n1: u32) -> u32 {
182 match n1.overflowing_add(1) {
183 (_, true) => u32::MAX,
184 (res, false) => res,
185 }
186 }
187}
188
189impl MetadataBuilder {
190 fn new() -> MetadataBuilder {
191 Self {
192 index: FxHashMap::default(),
193 array: vec![Metadata {
194 flags: MetaFlags::ZERO,
195 nulls: NullsId::EMPTY,
196 }],
197 solver: Solver::new(),
198 num_created: 0,
199 nb: NullsBuilder::new(),
200 }
201 }
202
203 fn init(&mut self, inst: Metadata) -> MetadataId {
204 self.num_created += 1;
205 let new_id = MetadataId(self.num_created);
206 self.index.insert(inst.clone(), new_id);
207 self.array.push(inst);
208 new_id
209 }
210
211 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212 match self.index.get(&inst) {
213 Some(&id) => id,
214 None => self.init(inst),
215 }
216 }
217
218 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219 &self.array[inst.0 as usize]
220 }
221
222 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223 setflags.all_contains_flags()
224 }
225
226 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227 let left = &self.array[body.0 as usize].flags;
228 let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229 let inf = if body_id == NodeId::BOT {
231 MetaFlags::ZERO
232 } else {
233 MetaFlags::INFINITE_LENGTH
234 };
235 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236 }
237
238 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239 let left = &self.array[left_id.0 as usize].flags;
240 let null = left.nullability().not().and(Nullability::ALWAYS);
241 let contains = self.get_contains(*left);
242 MetaFlags::with_nullability(
243 null,
244 contains
245 .or(MetaFlags::INFINITE_LENGTH)
246 .or(MetaFlags::CONTAINS_COMPL),
247 )
248 }
249
250 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
251 let left = &self.array[left_id.0 as usize].flags;
252 let right = &self.array[right_id.0 as usize].flags;
253 let null = left.nullability().and(right.nullability());
254 let contains = self.get_contains(left.or(*right));
255 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
256 MetaFlags::with_nullability(null, contains.or(len))
257 }
258
259 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260 let left = &self.array[left_id.0 as usize].flags;
261 let right = &self.array[right_id.0 as usize].flags;
262 let null = left.nullability().and(right.nullability());
263 let contains = self
264 .get_contains(left.or(*right))
265 .or(MetaFlags::CONTAINS_INTER);
266 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
267 MetaFlags::with_nullability(null, contains.or(len))
268 }
269
270 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
271 let left = &self.array[left_id.0 as usize].flags;
272 let right = &self.array[right_id.0 as usize].flags;
273 let null = left.nullability().or(right.nullability());
274 let contains = self.get_contains(left.or(*right));
275 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
276 MetaFlags::with_nullability(null, contains.or(len))
277 }
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
281pub struct NodeKey {
282 pub(crate) kind: Kind,
283 pub(crate) left: NodeId,
284 pub(crate) right: NodeId,
285 pub(crate) extra: u32,
286}
287
288#[derive(Eq, Hash, PartialEq, Clone, Debug)]
289pub enum TRegex<TSet> {
290 Leaf(NodeId),
291 ITE(TSet, TRegexId, TRegexId),
292}
293
294#[derive(Eq, Hash, PartialEq, Clone, Copy)]
295pub(crate) struct NodeFlags(u8);
296impl NodeFlags {
297 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
298 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
299 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
300
301 fn is_checked(self) -> bool {
302 self.0 >= NodeFlags::IS_CHECKED.0
303 }
304 fn is_empty(self) -> bool {
305 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
306 }
307}
308
309#[derive(Eq, Hash, PartialEq, Clone, Copy)]
310pub(crate) struct BuilderFlags(u8);
311impl BuilderFlags {
312 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
313 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
314}
315
316pub struct RegexBuilder {
317 mb: MetadataBuilder,
318 temp_vec: Vec<NodeId>,
319 num_created: u32,
320 index: FxHashMap<NodeKey, NodeId>,
321 array: Vec<NodeKey>,
322 metadata: Vec<MetadataId>,
323 reversed: Vec<NodeId>,
324 cache_empty: FxHashMap<NodeId, NodeFlags>,
325 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
326 tr_array: Vec<TRegex<TSetId>>,
327 tr_der_center: Vec<TRegexId>,
328 tr_der_begin: Vec<TRegexId>,
329 flags: BuilderFlags,
330 pub lookahead_context_max: u32,
332 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
333 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
334}
335
336
337macro_rules! iter_inter {
338 ($compiler:ident, $start:ident, $expression:expr) => {
339 let mut curr = $start;
340 while $compiler.get_kind(curr) == Kind::Inter {
341 let left = $compiler.get_left(curr);
342 $expression(left);
343 curr = $compiler.get_right(curr);
344 }
345 $expression(curr);
346 };
347}
348
349impl NodeId {
350 fn is_missing(&self) -> bool {
351 *self == NodeId::MISSING
352 }
353 #[inline]
354 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
355 b.get_flags_contains(self)
356 }
357 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
358 if self == tail {
359 true
360 } else if self.is_kind(b, Kind::Concat) {
361 self.right(b).has_concat_tail(b, tail)
362 } else {
363 false
364 }
365 }
366 fn missing_to_eps(&self) -> NodeId {
367 if *self == NodeId::MISSING {
368 NodeId::EPS
369 } else {
370 *self
371 }
372 }
373
374 #[inline]
375 fn kind(self, b: &RegexBuilder) -> Kind {
376 b.get_kind(self)
377 }
378 #[inline]
379 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
380 b.get_kind(self) == k
381 }
382 #[inline]
383 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
384 b.nullability(self) == Nullability::NEVER
385 }
386 #[inline]
387 fn nullability(self, b: &RegexBuilder) -> Nullability {
388 b.nullability(self)
389 }
390
391 #[inline]
392 fn is_center_nullable(self, b: &RegexBuilder) -> bool {
393 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
394 }
395 #[inline]
396 pub fn left(self, b: &RegexBuilder) -> NodeId {
397 b.get_left(self)
398 }
399
400 #[inline]
401 pub fn right(self, b: &RegexBuilder) -> NodeId {
402 b.get_right(self)
403 }
404
405 #[inline]
406 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
407 b.der(self, mask)
408 }
409
410 #[inline]
411 fn extra(self, b: &RegexBuilder) -> u32 {
412 b.get_extra(self)
413 }
414
415 #[inline]
416 fn is_pred(self, b: &RegexBuilder) -> bool {
417 b.get_kind(self) == Kind::Pred
418 }
419 #[inline]
420 fn is_lookahead(self, b: &RegexBuilder) -> bool {
421 b.get_kind(self) == Kind::Lookahead
422 }
423 #[inline]
424 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
425 debug_assert!(self.is_pred(b));
426 TSetId(b.get_extra(self))
427 }
428 #[inline]
429 fn is_star(self, b: &RegexBuilder) -> bool {
430 if NodeId::EPS == self {
431 return false;
432 }
433 b.get_kind(self) == Kind::Star
434 }
435
436 #[inline]
437 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
438 if self.is_concat(b) {
439 let r = self.right(b);
440 return r.is_star(b) && r.left(b) == self.left(b);
441 }
442 false
443 }
444
445 #[inline]
446 fn is_concat(self, b: &RegexBuilder) -> bool {
447 b.get_kind(self) == Kind::Concat
448 }
449
450 #[inline]
451 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
452 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
453 Some(b.get_right(self))
454 } else {
455 None
456 }
457 }
458
459 #[inline]
460 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
461 if b.get_kind(self) == Kind::Concat {
462 let left = self.left(b);
463 let right = self.right(b);
464 if left.is_kind(b, Kind::Compl)
465 && left.left(b) == NodeId::TOPPLUS {
466 return right == NodeId::END;
467 }
468 }
469 false
470 }
471
472 #[inline]
473 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
474 if NodeId::EPS == self {
475 return None;
476 }
477 if self.is_star(b) && self.left(b).is_pred(b) {
478 Some(self.left(b))
479 } else {
480 None
481 }
482 }
483
484 #[inline]
485 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
486 if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
487 let middle = self.right(b);
488 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
489 return Some(middle.left(b));
490 }
491 }
492 None
493 }
494
495 #[inline]
496 pub(crate) fn iter_union(
497 self,
498 b: &mut RegexBuilder,
499 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
500 ) {
501 let mut curr: NodeId = self;
502 while curr.kind(b) == Kind::Union {
503 f(b, curr.left(b));
504 curr = curr.right(b);
505 }
506 f(b, curr);
507 }
508
509 #[inline]
510 pub(crate) fn iter_union_while(
511 self,
512 b: &mut RegexBuilder,
513 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
514 ) {
515 let mut curr: NodeId = self;
516 let mut continue_loop = true;
517 while continue_loop && curr.kind(b) == Kind::Union {
518 continue_loop = f(b, curr.left(b));
519 curr = curr.right(b);
520 }
521 if continue_loop {
522 f(b, curr);
523 }
524 }
525}
526
527impl Default for RegexBuilder {
528 fn default() -> Self {
529 Self::new()
530 }
531}
532
533impl RegexBuilder {
534 pub fn new() -> RegexBuilder {
535 let mut inst = Self {
536 mb: MetadataBuilder::new(),
537 array: Vec::new(),
538 index: FxHashMap::default(),
539 cache_empty: FxHashMap::default(),
540 tr_array: Vec::new(),
541 tr_cache: FxHashMap::default(),
542 flags: BuilderFlags::ZERO,
543 lookahead_context_max: 800,
544 num_created: 0,
545 metadata: Vec::new(),
546 reversed: Vec::new(),
547 tr_der_center: Vec::new(),
548 tr_der_begin: Vec::new(),
549 temp_vec: Vec::new(),
550 mk_binary_memo: FxHashMap::default(),
551 clean_cache: FxHashMap::default(),
552 };
553 inst.array.push(NodeKey::default());
554 inst.mk_pred(TSetId::EMPTY);
555 inst.mk_star(NodeId::BOT);
556 inst.mk_pred(TSetId::FULL);
557 inst.mk_star(NodeId::TOP);
558 let top_plus_id = inst.mk_plus(NodeId::TOP);
559 inst.mk_unset(Kind::Begin);
560 inst.mk_unset(Kind::End);
561 debug_assert!(top_plus_id == NodeId::TOPPLUS);
562
563 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
564 inst.mk_leaf(NodeId::BOT);
565 inst.mk_leaf(NodeId::EPS);
566 inst.mk_leaf(NodeId::TOP);
567 inst.mk_leaf(NodeId::TS);
568
569 inst.flags = BuilderFlags::SUBSUME;
570 inst
571 }
572
573 #[inline]
574 pub fn solver_ref(&self) -> &Solver {
575 &self.mb.solver
576 }
577
578 #[inline]
579 pub fn solver(&mut self) -> &mut Solver {
580 &mut self.mb.solver
581 }
582
583 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
584 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
585 self.tr_cache.insert(inst.clone(), new_id);
586 self.tr_array.push(inst);
587 new_id
588 }
589
590 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
591 match self.tr_cache.get(&inst) {
592 Some(&id) => id,
593 None => self.tr_init(inst),
594 }
595 }
596
597 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
598 &self.tr_array[inst.0 as usize]
599 }
600
601 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
602 let solv = self.solver();
603 !solv.is_sat_id(cond1, cond2)
604 }
605
606 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
607 let term = TRegex::<TSetId>::Leaf(node_id);
608 self.get_tregex_id(term)
609 }
610
611 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
612 let tmp_inst = TRegex::ITE(cond, _then, _else);
613 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
614 return *cached;
615 }
616 if _then == _else {
617 return _then;
618 }
619 if self.solver().is_full_id(cond) {
620 return _then;
621 }
622 if self.solver().is_empty_id(cond) {
623 return _else;
624 }
625 let _clean_then = match self.tr_array[_then.0 as usize] {
626 TRegex::Leaf(_) => _then,
627 _ => self.clean(cond, _then),
628 };
629 let notcond = self.solver().not_id(cond);
630 let _clean_else = match self.tr_array[_else.0 as usize] {
631 TRegex::Leaf(_) => _else,
632 _ => self.clean(notcond, _else),
633 };
634
635 if _clean_then == _clean_else {
636 return _clean_then;
637 }
638 match self.get_tregex(_clean_then) {
640 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
641 let _cond_then = *leftcond;
642 let new_then = *_inner_then;
643 let sand = self.solver().and_id(cond, _cond_then);
644 return self.mk_ite(sand, new_then, _clean_else);
645 }
646 _ => {}
647 }
648 match self.get_tregex(_clean_else) {
650 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
652 let e2clone = *_e2;
653 let new_cond = self.mb.solver.or_id(cond, *_c2);
654 return self.mk_ite(new_cond, _clean_then, e2clone);
655 }
656 _ => {}
657 }
658
659 if _clean_then == TRegexId::BOT {
660 let flip_cond = self.solver().not_id(cond);
661 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
662 return cleaned_id;
663 }
664
665 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
666 }
667
668 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
669 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
670 return cached;
671 }
672 let result = match *self.get_tregex(tterm) {
673 TRegex::Leaf(_) => tterm,
674 TRegex::ITE(alpha, _then_id, _else_id) => {
675 let notalpha = self.mb.solver.not_id(alpha);
676 if self.mb.solver.unsat_id(beta, alpha) {
677 self.clean(beta, _else_id)
679 } else if self.unsat(beta, notalpha) {
680 self.clean(beta, _then_id)
682 } else {
683 let tc = self.mb.solver.and_id(beta, alpha);
684 let ec = self.mb.solver.and_id(beta, notalpha);
685 let new_then = self.clean(tc, _then_id);
686 let new_else = self.clean(ec, _else_id);
687 self.mk_ite(alpha, new_then, new_else)
688 }
689 }
690 };
691 self.clean_cache.insert((beta, tterm), result);
692 result
693 }
694
695 fn mk_unary(
696 &mut self,
697 term: TRegexId,
698 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
699 ) -> TRegexId {
700 match self.tr_array[term.0 as usize] {
701 TRegex::Leaf(node_id) => {
702 let applied = apply(self, node_id);
703 self.mk_leaf(applied)
704 }
705 TRegex::ITE(c1, _then, _else) => {
706 let _then1 = self.mk_unary(_then, apply);
707 let _else1 = self.mk_unary(_else, apply);
708 self.mk_ite(c1, _then1, _else1)
709 }
710 }
711 }
712
713 fn mk_binary_result(
714 &mut self,
715 left: TRegexId,
716 right: TRegexId,
717 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
718 ) -> Result<TRegexId, AlgebraError> {
719 match self.tr_array[left.0 as usize] {
720 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
721 TRegex::Leaf(right_node_id) => {
722 let applied = apply(self, left_node_id, right_node_id)?;
723 Ok(self.mk_leaf(applied))
724 }
725 TRegex::ITE(c2, _then, _else) => {
726 let then2 = self.mk_binary_result(left, _then, apply)?;
727 let else2 = self.mk_binary_result(left, _else, apply)?;
728 Ok(self.mk_ite(c2, then2, else2))
729 }
730 },
731 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
732 TRegex::Leaf(_) => {
733 let then2 = self.mk_binary_result(_then1, right, apply)?;
734 let else2 = self.mk_binary_result(_else1, right, apply)?;
735 Ok(self.mk_ite(c1, then2, else2))
736 }
737 TRegex::ITE(c2, _then2, _else2) => {
738 if c1 == c2 {
739 let _then = self.mk_binary_result(_then1, _then2, apply)?;
740 let _else = self.mk_binary_result(_else1, _else2, apply)?;
741 return Ok(self.mk_ite(c1, _then, _else));
742 }
743 if c1.0 > c2.0 {
744 let _then = self.mk_binary_result(_then1, right, apply)?;
745 let _else = self.mk_binary_result(_else1, right, apply)?;
746 Ok(self.mk_ite(c1, _then, _else))
747 } else {
748 let _then = self.mk_binary_result(left, _then2, apply)?;
749 let _else = self.mk_binary_result(left, _else2, apply)?;
750 Ok(self.mk_ite(c2, _then, _else))
751 }
752 }
753 },
754 }
755 }
756
757 fn mk_binary(
758 &mut self,
759 left: TRegexId,
760 right: TRegexId,
761 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
762 ) -> TRegexId {
763 self.mk_binary_memo.clear();
764 self.mk_binary_inner(left, right, apply)
765 }
766
767 fn mk_binary_inner(
768 &mut self,
769 left: TRegexId,
770 right: TRegexId,
771 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
772 ) -> TRegexId {
773 if left == right {
774 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
775 }
776 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
777 return cached;
778 }
779 let result = match self.tr_array[left.0 as usize] {
780 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
781 TRegex::Leaf(right_node_id) => {
782 let applied = apply(self, left_node_id, right_node_id);
783 self.mk_leaf(applied)
784 }
785 TRegex::ITE(c2, _then, _else) => {
786 let then2 = self.mk_binary_inner(left, _then, apply);
787 let else2 = self.mk_binary_inner(left, _else, apply);
788 self.mk_ite(c2, then2, else2)
789 }
790 },
791 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
792 TRegex::Leaf(_) => {
793 let then2 = self.mk_binary_inner(_then1, right, apply);
794 let else2 = self.mk_binary_inner(_else1, right, apply);
795 self.mk_ite(c1, then2, else2)
796 }
797 TRegex::ITE(c2, _then2, _else2) => {
798 if c1 == c2 {
799 let _then = self.mk_binary_inner(_then1, _then2, apply);
800 let _else = self.mk_binary_inner(_else1, _else2, apply);
801 self.mk_ite(c1, _then, _else)
802 } else if c1.0 > c2.0 {
803 let _then = self.mk_binary_inner(_then1, right, apply);
804 let _else = self.mk_binary_inner(_else1, right, apply);
805 self.mk_ite(c1, _then, _else)
806 } else {
807 let _then = self.mk_binary_inner(left, _then2, apply);
808 let _else = self.mk_binary_inner(left, _else2, apply);
809 self.mk_ite(c2, _then, _else)
810 }
811 }
812 },
813 };
814 self.mk_binary_memo.insert((left, right), result);
815 result
816 }
817
818 pub fn get_nulls(
819 &mut self,
820 pending_rel: u32,
821 mask: Nullability,
822 acc: &mut BTreeSet<NullState>,
823 node_id: NodeId,
824 ) {
825 debug_assert!(node_id != NodeId::MISSING);
826 if !self.is_nullable(node_id, mask) {
827 return;
828 }
829 match self.get_kind(node_id) {
830 Kind::Pred => {}
831 Kind::End => {
832 if mask.has(Nullability::END) {
833 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
834 }
835 }
836 Kind::Begin => {
837 if mask.has(Nullability::BEGIN) {
838 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
839 }
840 }
841 Kind::Concat => {
842 let new_mask = self.nullability(node_id).and(mask);
843 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
844 if self.is_nullable(node_id.left(self), mask) {
845 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
846 }
847 }
848 Kind::Union => {
849 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
850 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
851 }
852 Kind::Inter => {
853 let new_mask = self.nullability(node_id).and(mask);
854 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
855 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
856 }
857 Kind::Star => {
858 acc.insert(NullState::new(mask, pending_rel));
859 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
860 }
861 Kind::Compl => {
862 if !self.is_nullable(node_id.left(self), mask) {
863 acc.insert(NullState::new(mask, 0));
864 }
865 }
866 Kind::Lookbehind => {
867 let new_mask = self.nullability(node_id).and(mask);
868 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
869 if node_id.right(self) != NodeId::MISSING {
870 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
871 }
872 }
873 Kind::Lookahead => {
874 let la_inner = self.get_lookahead_inner(node_id);
875 if self.is_nullable(la_inner, mask) {
876 let rel = self.get_lookahead_rel(node_id);
877 if rel != u32::MAX {
878 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
879 }
880 let la_tail = self.get_lookahead_tail(node_id);
882 if la_tail != NodeId::MISSING {
883 self.get_nulls(pending_rel, mask, acc, la_tail);
884 }
885 }
886 }
887 Kind::Counted => {
888 let packed = self.get_extra(node_id);
889 let best = packed >> 16;
890 if best > 0 {
891 acc.insert(NullState::new(mask, pending_rel + best));
892 }
893 }
894 }
895 }
896
897 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
898 self.get_meta_flags(node_id)
899 .has(MetaFlags::CONTAINS_LOOKAROUND)
900 }
901
902 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
904 self.get_meta_flags(node_id)
905 .has(MetaFlags::CONTAINS_ANCHORS)
906 }
907
908
909 pub fn is_infinite(&self, node_id: NodeId) -> bool {
910 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
911 }
912
913 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
915 if self.is_infinite(node_id) {
916 if self.get_kind(node_id) == Kind::Inter {
917 self.get_bounded_length(node_id)
918 } else {
919 (self.get_min_length_only(node_id), u32::MAX)
920 }
921 } else {
922 self.get_bounded_length(node_id)
923 }
924 }
925
926 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
927 if node_id == NodeId::EPS {
928 return (0, 0);
929 }
930 match self.get_kind(node_id) {
931 Kind::End | Kind::Begin => (0, 0),
932 Kind::Pred => (1, 1),
933 Kind::Concat => {
934 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
935 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
936 (lmin + rmin, lmax.saturating_add(rmax))
937 }
938 Kind::Union => {
939 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
940 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
941 (lmin.min(rmin), lmax.max(rmax))
942 }
943 Kind::Inter => {
944 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
945 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
946 (lmin.max(rmin), lmax.min(rmax))
947 }
948 Kind::Lookahead => {
949 let body = node_id.left(self);
950 if self.is_infinite(body) {
951 return (0, u32::MAX);
952 }
953 let right = node_id.right(self);
954 if right.is_missing() {
955 (0, 0)
956 } else {
957 self.get_min_max_length(right)
958 }
959 }
960 Kind::Counted => (0, 0),
961 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
962 }
963 }
964
965 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
966 match self.get_kind(node_id) {
967 Kind::End | Kind::Begin => Some(0),
968 Kind::Pred => Some(1),
969 Kind::Concat => {
970 let l = self.get_fixed_length(node_id.left(self))?;
971 let r = self.get_fixed_length(node_id.right(self))?;
972 Some(l + r)
973 }
974 Kind::Union => {
975 let l = self.get_fixed_length(node_id.left(self))?;
976 let r = self.get_fixed_length(node_id.right(self))?;
977 if l == r {
978 Some(l)
979 } else {
980 None
981 }
982 }
983 Kind::Inter => {
984 let l = self.get_fixed_length(node_id.left(self))?;
985 let r = self.get_fixed_length(node_id.right(self))?;
986 if l == r {
987 Some(l)
988 } else {
989 None
990 }
991 }
992 Kind::Lookahead => {
993 let right = node_id.right(self);
994 if right.is_missing() {
995 Some(0)
996 } else {
997 self.get_fixed_length(right)
998 }
999 }
1000 Kind::Counted => Some(0),
1001 Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1002 }
1003 }
1004
1005 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1006 match self.get_kind(node_id) {
1007 Kind::End | Kind::Begin => 0,
1008 Kind::Pred => 1,
1009 Kind::Concat => {
1010 self.get_min_length_only(node_id.left(self))
1011 + self.get_min_length_only(node_id.right(self))
1012 }
1013 Kind::Union => self
1014 .get_min_length_only(node_id.left(self))
1015 .min(self.get_min_length_only(node_id.right(self))),
1016 Kind::Inter => self
1017 .get_min_length_only(node_id.left(self))
1018 .max(self.get_min_length_only(node_id.right(self))),
1019 Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1020 Kind::Compl => {
1021 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1022 0
1023 } else {
1024 1
1025 }
1026 }
1027 }
1028 }
1029
1030 fn starts_with_ts(&self, node_id: NodeId) -> bool {
1031 if node_id == NodeId::TS {
1032 return true;
1033 }
1034 match self.get_kind(node_id) {
1035 Kind::Inter => {
1036 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1037 }
1038 Kind::Union => {
1039 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1040 }
1041 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1042 _ => false,
1043 }
1044 }
1045
1046 #[inline]
1047 pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1048 if self.get_kind(node_id) == Kind::Concat {
1049 self.ends_with_ts(node_id.right(self))
1050 } else {
1051 node_id == NodeId::TS
1052 }
1053 }
1054
1055 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1056 debug_assert!(node_id != NodeId::MISSING);
1057 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1058 }
1059
1060 pub(crate) fn cache_der(
1061 &mut self,
1062 node_id: NodeId,
1063 result: TRegexId,
1064 mask: Nullability,
1065 ) -> TRegexId {
1066 if mask == Nullability::CENTER {
1067 self.tr_der_center[node_id.0 as usize] = result
1068 } else {
1069 self.tr_der_begin[node_id.0 as usize] = result
1070 };
1071 result
1072 }
1073
1074 pub(crate) fn try_cached_der(
1075 &mut self,
1076 node_id: NodeId,
1077 mask: Nullability,
1078 ) -> Option<TRegexId> {
1079 let cache = if mask == Nullability::CENTER {
1080 &mut self.tr_der_center
1081 } else {
1082 &mut self.tr_der_begin
1083 };
1084 match cache.get(node_id.0 as usize) {
1085 Some(&TRegexId::MISSING) => {}
1086 Some(&result) => {
1087 return Some(result);
1088 }
1089 None => {
1090 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1091 }
1092 }
1093 None
1094 }
1095
1096 pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1097 let mut term = self.get_tregex(der);
1098 loop {
1099 match *term {
1100 TRegex::Leaf(node_id) => return node_id,
1101 TRegex::ITE(cond, _then, _else) => {
1102 if self.solver().is_sat_id(set, cond) {
1103 term = self.get_tregex(_then);
1104 } else {
1105 term = self.get_tregex(_else);
1106 }
1107 }
1108 }
1109 }
1110 }
1111
1112 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1113 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1114 debug_assert!(
1115 node_id != NodeId::MISSING,
1116 "attempting to derive missing node"
1117 );
1118 if let Some(result) = self.try_cached_der(node_id, mask) {
1119 return Ok(result);
1120 }
1121
1122 let result = match node_id.kind(self) {
1123 Kind::Compl => {
1124 let leftd = node_id.left(self).der(self, mask)?;
1125 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1126 }
1127 Kind::Inter => {
1128 let leftd = node_id.left(self).der(self, mask)?;
1129 let rightd = node_id.right(self).der(self, mask)?;
1130 {
1131 let this = &mut *self;
1132 this.mk_binary(
1133 leftd,
1134 rightd,
1135 &mut (|b, left, right| b.mk_inter(left, right)),
1136 )
1137 }
1138 }
1139 Kind::Union => {
1140 let leftd = self.der(node_id.left(self), mask)?;
1141 let rightd = self.der(node_id.right(self), mask)?;
1142 {
1143 let this = &mut *self;
1144 this.mk_binary(
1145 leftd,
1146 rightd,
1147 &mut (|b, left, right| b.mk_union(left, right)),
1148 )
1149 }
1150 }
1151 Kind::Concat => {
1152 let head = node_id.left(self);
1153 let tail = node_id.right(self);
1154 let tail_leaf = self.mk_leaf(tail);
1155 let head_der = self.der(head, mask)?;
1156
1157 if self.is_nullable(head, mask) {
1158 let rightd = self.der(tail, mask)?;
1159 let ldr = {
1160 let this = &mut *self;
1161 this.mk_binary(
1162 head_der,
1163 tail_leaf,
1164 &mut (|b, left, right| b.mk_concat(left, right)),
1165 )
1166 };
1167 {
1168 let this = &mut *self;
1169 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1170 }
1171 } else {
1172 let this = &mut *self;
1173 this.mk_binary(
1174 head_der,
1175 tail_leaf,
1176 &mut (|b, left, right| b.mk_concat(left, right)),
1177 )
1178 }
1179 }
1180 Kind::Star => {
1181 if node_id == NodeId::EPS {
1182 TRegexId::BOT
1183 } else {
1184 let left = node_id.left(self);
1185 let r_decr_leaf = self.mk_leaf(node_id);
1186 let r_der = self.der(left, mask)?;
1187 let this = &mut *self;
1188 this.mk_binary(
1189 r_der,
1190 r_decr_leaf,
1191 &mut (|b, left, right| b.mk_concat(left, right)),
1192 )
1193 }
1194 }
1195 Kind::Lookbehind => {
1196 let lb_prev_der = {
1197 let lb_prev = self.get_lookbehind_prev(node_id);
1198 if lb_prev == NodeId::MISSING {
1199 TRegexId::MISSING
1200 } else {
1201 self.der(lb_prev, mask)?
1202 }
1203 };
1204 let lb_inner = self.get_lookbehind_inner(node_id);
1205 let lb_inner_der = self.der(lb_inner, mask)?;
1206 {
1207 let this = &mut *self;
1208 this.mk_binary_result(
1209 lb_inner_der,
1210 lb_prev_der,
1211 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1212 )?
1213 }
1214 }
1215 Kind::Lookahead => {
1216 let la_tail = self.get_lookahead_tail(node_id);
1217 let la_body = node_id.left(self);
1218 let rel = self.get_lookahead_rel(node_id);
1219
1220 if self.is_nullable(la_body, mask) {
1221 let right = node_id.right(self).missing_to_eps();
1223 let rder = self.der(right, mask).clone();
1224 return rder;
1225 }
1226
1227 if rel == u32::MAX {
1228 let la_body_der = self.der(la_body, mask)?;
1229 if la_tail.is_kind(self, Kind::Pred) {
1230 let transitioned =
1231 self.transition_term(la_body_der, la_tail.pred_tset(self));
1232 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1233 let concated = self.mk_concat(la_tail, new_la);
1234 return self.der(concated, mask);
1235 }
1236 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1237 let left = la_tail.left(self);
1238 let tset = left.pred_tset(self);
1239 let transitioned = self.transition_term(la_body_der, tset);
1240 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1241 let tail_right = la_tail.right(self);
1242 let concated = self.mk_concat(new_la, tail_right);
1243 let concated = self.mk_concat(left, concated);
1244 return self.der(concated, mask);
1245 }
1246 }
1247
1248 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1249 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1250 let concated = self.mk_concat(la_body, nulls_mask);
1251 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1252 let non_nulled = self.mk_non_nullable_safe(la_tail);
1253 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1254 let new_union = self.mk_union(concated_look, new_look);
1255 return self.der(new_union, mask);
1256 }
1257
1258 let la_tail_der = if la_tail == NodeId::MISSING {
1259 TRegexId::MISSING
1260 } else {
1261 if self.is_nullable(la_tail, mask) {
1262 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1263 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1264 let la_union = self.mk_union(la_tail, nulls_la);
1265 self.der(la_union, mask)?
1266 } else {
1267 self.der(la_tail, mask)?
1268 }
1269 };
1270
1271 let la_body_der = self.der(la_body, mask)?;
1272
1273 if rel != u32::MAX && rel > self.lookahead_context_max {
1274 return Err(AlgebraError::AnchorLimit);
1275 }
1276
1277 let la = {
1278 let this = &mut *self;
1279 let rel = helpers::incr_rel(rel);
1280 this.mk_binary(
1281 la_body_der,
1282 la_tail_der,
1283 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1284 )
1285 };
1286
1287 if rel != u32::MAX
1288 && la_tail_der != TRegexId::MISSING
1289 && self.is_nullable(la_tail, mask)
1290 {
1291 let look_only = {
1292 let this = &mut *self;
1293 let right = TRegexId::MISSING;
1294 let rel = helpers::incr_rel(rel);
1295 this.mk_binary(
1296 la_body_der,
1297 right,
1298 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1299 )
1300 };
1301 {
1302 let this = &mut *self;
1303 this.mk_binary(
1304 look_only,
1305 la,
1306 &mut (|b, left, right| b.mk_union(left, right)),
1307 )
1308 }
1309 } else {
1310 la
1311 }
1312 }
1313 Kind::Counted => {
1314 let body = node_id.left(self);
1315 let chain = node_id.right(self);
1316 let packed = self.get_extra(node_id);
1317 let step = (packed & 0xFFFF) as u16;
1318 let best = (packed >> 16) as u16;
1319
1320 let mid_best = if self.is_nullable(body, mask) && step >= best {
1321 step
1322 } else {
1323 best
1324 };
1325
1326 let body_der = self.der(body, mask)?;
1327 let new_step = step.saturating_add(1);
1328 self.mk_unary(body_der, &mut |b, new_body| {
1329 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1330 new_step
1331 } else {
1332 mid_best
1333 };
1334 let packed = (final_best as u32) << 16 | new_step as u32;
1335 b.mk_counted(new_body, chain, packed)
1336 })
1337 }
1338 Kind::Begin | Kind::End => TRegexId::BOT,
1339 Kind::Pred => {
1340 let psi = node_id.pred_tset(self);
1341 if psi == TSetId::EMPTY {
1342 TRegexId::BOT
1343 } else {
1344 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1345 }
1346 }
1347 };
1348
1349 self.cache_der(node_id, result, mask);
1353 Ok(result)
1354 }
1355
1356 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1357 debug_assert!(meta_id != MetadataId::MISSING);
1358 match self.metadata.get_mut(node_id.0 as usize) {
1359 Some(v) => *v = meta_id,
1360 None => {
1361 self.metadata
1362 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1363 self.metadata[node_id.0 as usize] = meta_id;
1364 }
1365 }
1366 }
1367
1368 fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1369 debug_assert!(reversed_id != NodeId::MISSING);
1370 match self.reversed.get_mut(node_id.0 as usize) {
1371 Some(v) => *v = reversed_id,
1372 None => {
1373 self.reversed
1374 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1375 self.reversed[node_id.0 as usize] = reversed_id;
1376 }
1377 }
1378 }
1379
1380 fn init(&mut self, inst: NodeKey) -> NodeId {
1381 self.num_created += 1;
1382 let node_id = NodeId(self.num_created);
1383 self.index.insert(inst.clone(), node_id);
1384 match inst.kind {
1385 Kind::Pred => {
1386 let meta_id = self.mb.get_meta_id(Metadata {
1387 flags: MetaFlags::ZERO,
1388 nulls: NullsId::EMPTY,
1389 });
1390 self.init_metadata(node_id, meta_id);
1391 }
1392 Kind::Begin => {
1393 let meta_id = self.mb.get_meta_id(Metadata {
1394 flags: MetaFlags::with_nullability(
1395 Nullability::BEGIN,
1396 MetaFlags::CONTAINS_ANCHORS,
1397 ),
1398 nulls: NullsId::BEGIN0,
1399 });
1400 self.init_metadata(node_id, meta_id);
1401 }
1402 Kind::End => {
1403 let meta_id = self.mb.get_meta_id(Metadata {
1404 flags: MetaFlags::with_nullability(
1405 Nullability::END,
1406 MetaFlags::CONTAINS_ANCHORS,
1407 ),
1408 nulls: NullsId::END0,
1409 });
1410 self.init_metadata(node_id, meta_id);
1411 }
1412 Kind::Inter => {
1413 let m1 = self.get_node_meta_id(inst.left);
1414 let m2 = self.get_node_meta_id(inst.right);
1415 let meta_id = {
1416 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1417 let mask_l = inst.left.nullability(self);
1418 let mask_r = inst.right.nullability(self);
1419 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1420 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1421 nulls = self.mb.nb.and_mask(nulls, mask_l);
1422 nulls = self.mb.nb.and_mask(nulls, mask_r);
1423 let new_meta = Metadata {
1424 flags: self.mb.flags_inter(m1, m2),
1425 nulls,
1426 };
1427 self.mb.get_meta_id(new_meta)
1428 };
1429 self.init_metadata(node_id, meta_id);
1430 }
1431 Kind::Union => {
1432 let m1 = self.get_node_meta_id(inst.left);
1433 let m2 = self.get_node_meta_id(inst.right);
1434 let meta_id = {
1435 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1436 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1437 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1438 let new_meta = Metadata {
1439 flags: self.mb.flags_union(m1, m2),
1440 nulls,
1441 };
1442 self.mb.get_meta_id(new_meta)
1443 };
1444 self.init_metadata(node_id, meta_id);
1445 }
1446 Kind::Concat => {
1447 let flags = self.mb.flags_concat(
1448 self.get_node_meta_id(inst.left),
1449 self.get_node_meta_id(inst.right),
1450 );
1451
1452 let right_nullability = inst.right.nullability(self);
1453 let left_nullability = inst.left.nullability(self);
1454 let nulls_left = self.get_nulls_id(inst.left);
1455 let nulls_right = self.get_nulls_id(inst.right);
1456 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1457 let mask = right_nullability.and(left_nullability);
1458 nulls = self.mb.nb.and_mask(nulls, mask);
1459
1460 let new_id = self.mb.get_meta_id(Metadata {
1461 flags,
1462 nulls,
1463 });
1464 self.init_metadata(node_id, new_id);
1465 }
1466 Kind::Star => {
1467 let left_nulls = self.get_nulls_id(inst.left);
1468 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1469 let meta_id = self.mb.get_meta_id(Metadata {
1470 flags: self
1471 .mb
1472 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1473 nulls,
1474 });
1475 self.init_metadata(node_id, meta_id);
1476 }
1477 Kind::Compl => {
1478 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1479 let meta_id = self.mb.get_meta_id(Metadata {
1480 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1481 nulls,
1482 });
1483 self.init_metadata(node_id, meta_id);
1484 }
1485 Kind::Lookbehind => {
1486 let mut null = self.get_meta_flags(inst.left).nullability();
1487 let mut contains_flags = self.get_flags_contains(inst.left);
1488 if !inst.right.is_missing() {
1489 null = null.and(self.get_meta_flags(inst.right).nullability());
1490 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1491 }
1492
1493 let left_nulls = self.get_nulls_id(inst.left);
1494 let right_nulls = self.get_nulls_id(inst.right);
1495 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1496 let meta_id = self.mb.get_meta_id(Metadata {
1497 flags: MetaFlags::with_nullability(
1498 null,
1499 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1500 ),
1501 nulls,
1502 });
1503 self.init_metadata(node_id, meta_id);
1504 }
1505 Kind::Lookahead => {
1506 let mut nulls = self.get_nulls_id(inst.left);
1507 let left_nullability = inst.left.nullability(self);
1508 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1509 nulls = self.mb.nb.or_id(nulls, nulls_right);
1510 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1511
1512 let la_inner = inst.left;
1513 let la_tail = inst.right;
1514 let null = self
1515 .get_meta_flags(la_inner)
1516 .nullability()
1517 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1518 let contains_flags = self
1519 .get_flags_contains(la_inner)
1520 .or(self.get_flags_contains(la_tail));
1521
1522 let meta_id = self.mb.get_meta_id(Metadata {
1523 flags: MetaFlags::with_nullability(
1524 null,
1525 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1526 ),
1527 nulls,
1528 });
1529 self.init_metadata(node_id, meta_id);
1530 }
1531 Kind::Counted => {
1532 let best = inst.extra >> 16;
1533 let (null, nulls) = if best > 0 {
1534 let mut ns = BTreeSet::new();
1535 ns.insert(NullState::new(Nullability::CENTER, best));
1536 (Nullability::CENTER, self.mb.nb.get_id(ns))
1537 } else {
1538 (Nullability::NEVER, NullsId::EMPTY)
1539 };
1540 let meta_id = self.mb.get_meta_id(Metadata {
1541 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1542 nulls,
1543 });
1544 self.init_metadata(node_id, meta_id);
1545 }
1546 }
1547
1548 self.array.push(inst);
1549
1550 if let Some(rw) = self.post_init_simplify(node_id) {
1551 self.override_as(node_id, rw)
1552 } else {
1553 node_id
1554 }
1555 }
1556
1557 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1558 match self.get_kind(node_id) {
1559 Kind::Concat => {
1560 let lhs = node_id.left(self);
1561 let rhs = node_id.right(self);
1562 if lhs.is_pred_star(self).is_some() {
1563 if let Some(opttail) = rhs.is_opt_v(self) {
1564 if let Some(true) = self.subsumes(lhs, opttail) {
1565 return Some(lhs);
1567 }
1568 }
1569 }
1570 }
1571 Kind::Union => {
1572 let lhs = node_id.left(self);
1573 let rhs = node_id.right(self);
1574 let mut subsumed = false;
1575 rhs.iter_union_while(self, &mut |b, branch| {
1576 if b.nullable_subsumes(branch, lhs) {
1577 subsumed = true;
1578 }
1579 !subsumed
1580 });
1581 if subsumed {
1582 return Some(rhs);
1583 }
1584 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1585 return Some(rhs);
1586 }
1587 }
1588 _ => {}
1589 }
1590
1591 None
1592 }
1593
1594 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1596 if self.get_kind(lhs) != Kind::Union {
1597 return false; }
1599 let mut rhs_branches = Vec::new();
1600 let mut curr = rhs;
1601 while self.get_kind(curr) == Kind::Union {
1602 rhs_branches.push(self.get_left(curr));
1603 curr = self.get_right(curr);
1604 }
1605 rhs_branches.push(curr);
1606
1607 curr = lhs;
1608 while self.get_kind(curr) == Kind::Union {
1609 if !rhs_branches.contains(&self.get_left(curr)) {
1610 return false;
1611 }
1612 curr = self.get_right(curr);
1613 }
1614 rhs_branches.contains(&curr)
1615 }
1616
1617 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1619 if node == target {
1620 return true;
1621 }
1622 match self.get_kind(node) {
1623 Kind::Union => {
1624 self.nullable_subsumes(self.get_left(node), target)
1625 || self.nullable_subsumes(self.get_right(node), target)
1626 }
1627 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1628 self.nullable_subsumes(self.get_right(node), target)
1629 }
1630 _ => false,
1631 }
1632 }
1633
1634 pub fn num_nodes(&self) -> u32 {
1635 self.num_created
1636 }
1637 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1638 match self.index.get(&inst) {
1639 Some(&id) => id,
1640 None => self.init(inst),
1641 }
1642 }
1643 #[inline]
1644 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1645 self.index.get(inst)
1646 }
1647
1648 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1649 self.index.insert(key, subsumed);
1650 subsumed
1651 }
1652
1653 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1654 let key = &self.array[key.0 as usize];
1655 let entry = self.index.get_mut(key).unwrap();
1656 *entry = subsumed;
1657 subsumed
1658 }
1659
1660 #[inline]
1661 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1662 self.array[node_id.0 as usize].left
1663 }
1664
1665 #[inline]
1666 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1667 self.array[node_id.0 as usize].right
1668 }
1669
1670 #[inline]
1671 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1672 self.array[node_id.0 as usize].extra
1673 }
1674
1675 #[inline]
1676 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1677 debug_assert!(self.get_kind(node_id) == Kind::Concat);
1678 let mut curr = node_id;
1679 while self.get_kind(curr) == Kind::Concat {
1680 curr = curr.right(self);
1681 }
1682 curr
1683 }
1684
1685 fn has_trailing_la(&self, node: NodeId) -> bool {
1687 let end = match self.get_kind(node) {
1688 Kind::Concat => self.get_concat_end(node),
1689 Kind::Lookahead => node,
1690 _ => return false,
1691 };
1692 self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1693 }
1694
1695 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1697 if self.get_kind(node) == Kind::Lookahead {
1698 return (NodeId::EPS, node);
1699 }
1700 debug_assert!(self.get_kind(node) == Kind::Concat);
1701 let right = node.right(self);
1702 if self.get_kind(right) != Kind::Concat {
1703 return (node.left(self), right);
1705 }
1706 let (stripped, la) = self.strip_trailing_la(right);
1707 (self.mk_concat(node.left(self), stripped), la)
1708 }
1709 #[inline]
1710 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1711 debug_assert!(matches!(self.get_kind(lookahead_node_id), Kind::Lookahead | Kind::Counted));
1712 lookahead_node_id.left(self)
1713 }
1714 #[inline]
1715 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1716 debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1717 lookahead_node_id.right(self)
1718 }
1719 #[inline]
1720 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1721 debug_assert!(
1722 matches!(self.get_kind(lookahead_node_id), Kind::Lookahead | Kind::Counted),
1723 "not lookahead/counted: {:?}",
1724 self.pp(lookahead_node_id)
1725 );
1726 self.get_extra(lookahead_node_id)
1727 }
1728 #[inline]
1729 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1730 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1731 lookbehind_node_id.left(self)
1732 }
1733 #[inline]
1734 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1735 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1736 lookbehind_node_id.right(self)
1737 }
1738
1739 #[inline]
1740 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1741 debug_assert!(
1742 self.array.len() > node_id.0 as usize,
1743 "array len: {:?}",
1744 node_id
1745 );
1746 self.array[node_id.0 as usize].kind
1747 }
1748
1749 #[inline]
1750 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1751 &self.array[node_id.0 as usize]
1752 }
1753
1754 #[inline]
1755 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1756 self.metadata[node_id.0 as usize]
1757 }
1758
1759 #[inline]
1760 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1761 debug_assert!(node_id.0 != u32::MAX);
1762 let meta_id = self.get_node_meta_id(node_id);
1763 debug_assert!(meta_id != MetadataId::MISSING);
1764 &self.mb.array[meta_id.0 as usize]
1765 }
1766
1767 #[inline]
1768 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1769 if node_id == NodeId::MISSING {
1770 NullsId::EMPTY
1771 } else {
1772 self.get_meta(node_id).nulls
1773 }
1774 }
1775
1776 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1777 self.mb
1778 .nb
1779 .array
1780 .iter()
1781 .map(|set| set.iter().cloned().collect())
1782 .collect()
1783 }
1784
1785 pub fn nulls_count(&self) -> usize {
1786 self.mb.nb.array.len()
1787 }
1788
1789 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1790 self.mb.nb.array[id as usize].iter().cloned().collect()
1791 }
1792
1793 #[inline]
1794 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1795 if node_id == NodeId::MISSING {
1796 NullsId::EMPTY
1797 } else {
1798 let nulls = self.get_meta(node_id).nulls;
1799 self.mb.nb.and_mask(nulls, mask)
1800 }
1801 }
1802
1803 #[inline]
1804 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1805 let meta_id = self.get_node_meta_id(node_id);
1806 let meta = &self.mb.array[meta_id.0 as usize];
1807 meta.flags
1808 }
1809
1810 #[inline]
1811 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1812 self.get_meta(node_id).flags.nullability()
1813 }
1814
1815 #[inline]
1816 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1817 let meta_id = self.get_node_meta_id(node_id);
1818 let meta = &self.mb.array[meta_id.0 as usize];
1819 meta.flags.all_contains_flags()
1820 }
1821
1822 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1823 if self.get_kind(node_id) == Kind::Concat && node_id.left(self) == NodeId::BEGIN {
1824 return self.strip_lb(node_id.right(self));
1825 }
1826 self.strip_lb_inner(node_id)
1827 }
1828
1829 fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1830 if !self.contains_look(node_id) {
1831 return Ok(node_id);
1832 }
1833 if self.get_kind(node_id) == Kind::Concat
1834 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1835 {
1836 return self.strip_lb_inner(node_id.right(self));
1837 }
1838 if self.get_kind(node_id) == Kind::Inter {
1839 let left = self.strip_lb_inner(node_id.left(self))?;
1840 let right = self.strip_lb_inner(node_id.right(self))?;
1841 return Ok(self.mk_inter(left, right));
1842 }
1843 if self.get_kind(node_id) == Kind::Union {
1844 let left = self.strip_lb_inner(node_id.left(self))?;
1845 let right = self.strip_lb_inner(node_id.right(self))?;
1846 return Ok(self.mk_union(left, right));
1847 }
1848 match self.get_kind(node_id) {
1849 Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1850 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1851 Err(AlgebraError::UnsupportedPattern)
1852 }
1853 _ => Ok(node_id),
1854 }
1855 }
1856
1857 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1858 debug_assert!(node_id != NodeId::MISSING);
1859 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1860 if *rev != NodeId::MISSING {
1861 return Ok(*rev);
1862 }
1863 }
1864 let rw = match self.get_kind(node_id) {
1865 Kind::End => NodeId::BEGIN,
1866 Kind::Begin => NodeId::END,
1867 Kind::Pred => node_id,
1868 Kind::Concat => {
1869 let left = self.reverse(node_id.left(self))?;
1870 let right = self.reverse(node_id.right(self))?;
1871 self.mk_concat(right, left)
1872 }
1873 Kind::Union => {
1874 let left = self.reverse(node_id.left(self))?;
1875 let right = self.reverse(node_id.right(self))?;
1876 self.mk_union(left, right)
1877 }
1878 Kind::Inter => {
1879 let left = self.reverse(node_id.left(self))?;
1880 let right = self.reverse(node_id.right(self))?;
1881 self.mk_inter(left, right)
1882 }
1883 Kind::Star => {
1884 let body = self.reverse(node_id.left(self))?;
1885 self.mk_star(body)
1886 }
1887 Kind::Compl => {
1888 if self.contains_look(node_id.left(self)) {
1889 return Err(AlgebraError::UnsupportedPattern);
1890 }
1891 let body = self.reverse(node_id.left(self))?;
1892 self.mk_compl(body)
1893 }
1894 Kind::Lookbehind => {
1895 let prev = self.get_lookbehind_prev(node_id);
1897 let inner_id = self.get_lookbehind_inner(node_id);
1898 let rev_inner = self.reverse(inner_id)?;
1899 let rev_prev = if prev != NodeId::MISSING {
1900 self.reverse(prev)?
1901 } else {
1902 NodeId::MISSING
1903 };
1904 self.mk_lookahead(rev_inner, rev_prev, 0)
1905 }
1906 Kind::Lookahead => {
1907 let rel = self.get_lookahead_rel(node_id);
1908 if rel == u32::MAX {
1909 let lbody = self.get_lookahead_inner(node_id);
1911 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
1912 let lbody_ts = self.mk_concat(lbody, NodeId::TS);
1913 let ltail_ts = self.mk_concat(ltail, NodeId::TS);
1914 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
1915 let rev = self.reverse(as_inter)?;
1916 self.init_reversed(node_id, rev);
1917 return Ok(rev);
1918 }
1919 if rel != 0 {
1920 return Err(AlgebraError::UnsupportedPattern);
1921 }
1922 let tail_node = self.get_lookahead_tail(node_id);
1924 let rev_tail = if tail_node != NodeId::MISSING {
1925 self.reverse(tail_node)?
1926 } else {
1927 NodeId::MISSING
1928 };
1929 let inner_id = self.get_lookahead_inner(node_id);
1930 let rev_inner = self.reverse(inner_id)?;
1931 self.mk_lookbehind(rev_inner, rev_tail)
1932 }
1933 Kind::Counted => {
1934 return Err(AlgebraError::UnsupportedPattern);
1935 }
1936 };
1937 self.init_reversed(node_id, rw);
1938 Ok(rw)
1939 }
1940
1941 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
1942 let node = NodeKey {
1943 kind: Kind::Pred,
1944 left: NodeId::MISSING,
1945 right: NodeId::MISSING,
1946 extra: pred.0,
1947 };
1948 self.get_node_id(node)
1949 }
1950
1951 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
1952 let key = NodeKey {
1953 kind: Kind::Compl,
1954 left: body,
1955 right: NodeId::MISSING,
1956 extra: u32::MAX,
1957 };
1958 if let Some(id) = self.key_is_created(&key) {
1959 return *id;
1960 }
1961 if body == NodeId::BOT {
1962 return NodeId::TS;
1963 }
1964 if body == NodeId::TS {
1965 return NodeId::BOT;
1966 }
1967
1968 if let Some(contains_body) = body.is_contains(self) {
1969 if contains_body.is_pred(self) {
1970 let pred = contains_body.pred_tset(self);
1971 let notpred = self.mk_pred_not(pred);
1972 let node = self.mk_star(notpred);
1973 return self.init_as(key, node);
1974 } else if contains_body == NodeId::END {
1975 return self.init_as(key, NodeId::BOT);
1976 }
1977 };
1978
1979 if self.get_kind(body) == Kind::Compl {
1980 return self.get_node(body).left;
1981 }
1982
1983 self.get_node_id(key)
1984 }
1985
1986 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
1987 let nid = self.get_nulls_id(body);
1988 let nref = self.mb.nb.get_set_ref(nid).clone();
1989 let mut futures = NodeId::BOT;
1990 for n in nref.iter() {
1991 if !n.is_mask_nullable(mask) {
1992 continue;
1993 }
1994
1995 let eff = if n.rel == 0 {
1996 NodeId::EPS
1997 } else {
1998 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
1999 };
2000 futures = self.mk_union(futures, eff)
2001 }
2002 futures
2003 }
2004
2005 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2006 if cfg!(feature = "norewrite") {
2007 return None;
2008 }
2009
2010 if self.get_kind(tail) == Kind::Lookbehind {
2011 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2012 return self
2013 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft).ok();
2014 }
2015 if self.get_kind(head) == Kind::Lookahead {
2016 let la_tail = self.get_lookahead_tail(head);
2017 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2018 if new_la_tail.is_center_nullable(self) {
2019 let non_null_tail = self.mk_non_nullable_safe(new_la_tail);
2020 if non_null_tail == NodeId::BOT {
2021 return None;
2022 }
2023 let la_body = self.get_lookahead_inner(head);
2024 return Some(self.mk_lookahead_internal(la_body, non_null_tail, u32::MAX));
2025 }
2026 let la_body = self.get_lookahead_inner(head);
2027 let la_rel = self.get_lookahead_rel(head);
2028 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2029 let tail_rel = self.get_lookahead_rel(new_la_tail);
2030 tail_rel + la_rel
2031 } else {
2032 u32::MAX
2033 };
2034
2035 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2036 }
2037
2038 if head.is_kind(self, Kind::End)
2039 && tail == NodeId::TS {
2040 return Some(head);
2041 }
2042 if head == NodeId::TS
2043 && tail == NodeId::END {
2044 return Some(head);
2045 }
2046
2047 if head == NodeId::TS
2048 && self.nullability(tail) == Nullability::ALWAYS {
2049 return Some(NodeId::TS);
2050 }
2051
2052 if tail == NodeId::TS
2053 && self.nullability(head) == Nullability::ALWAYS {
2054 return Some(NodeId::TS);
2055 }
2056
2057 if self.get_kind(tail) == Kind::Union
2058 && head == NodeId::TS {
2059 let mut should_distribute_top = false;
2060 self.iter_unions(tail, |v| {
2061 if v == NodeId::BEGIN || self.starts_with_ts(v) {
2062 should_distribute_top = true;
2063 }
2064 });
2065 if should_distribute_top {
2066 let mut new_union = NodeId::BOT;
2067 let mut curr = tail;
2068 while self.get_kind(curr) == Kind::Union {
2069 let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2070 new_union = self.mk_union(new_node, new_union);
2071 curr = curr.right(self);
2072 }
2073 let new_node = self.mk_concat(NodeId::TS, curr);
2074 new_union = self.mk_union(new_union, new_node);
2075 return Some(new_union);
2076 }
2077 }
2078
2079 if self.get_kind(head) == Kind::Union
2080 && head == NodeId::TS {
2081 let mut should_distribute_top = false;
2082 self.iter_unions(head, |v| {
2083 if v == NodeId::END || self.starts_with_ts(v) {
2084 should_distribute_top = true;
2085 }
2086 });
2087 if should_distribute_top {
2088 let mut new_union = NodeId::BOT;
2089 let mut curr = head;
2090 while self.get_kind(curr) == Kind::Union {
2091 let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2092 new_union = self.mk_union(new_node, new_union);
2093 curr = curr.right(self);
2094 }
2095 let new_node = self.mk_concat(curr, NodeId::TS);
2096 new_union = self.mk_union(new_union, new_node);
2097 return Some(new_union);
2098 }
2099 }
2100
2101 if self.get_kind(head) == Kind::Inter
2102 && tail == NodeId::TS {
2103 let mut alltopstar = true;
2104 iter_inter!(self, head, |v| {
2105 alltopstar = self.ends_with_ts(v);
2106 });
2107 if alltopstar {
2108 return Some(head);
2109 }
2110 }
2111
2112 if head.is_star(self) && head == tail {
2113 return Some(head);
2114 }
2115
2116 None
2117 }
2118
2119 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2120 use Kind::*;
2121
2122 if cfg!(feature = "norewrite") {
2123 return None;
2124 }
2125 if left == right {
2126 return Some(left);
2127 }
2128
2129 if right.is_kind(self, Kind::Union) && left == right.left(self) {
2130 return Some(right);
2131 }
2132
2133 if self.get_kind(left) == Kind::Pred
2134 && self.get_kind(right) == Kind::Pred {
2135 let l = left.pred_tset(self);
2136 let r = right.pred_tset(self);
2137 let solver = self.solver();
2138 let psi = solver.or_id(l, r);
2139 let rewrite = self.mk_pred(psi);
2140 return Some(rewrite);
2141 }
2142
2143 if left == NodeId::EPS
2144 && self.get_extra(left) == 0
2145 && self.nullability(right) == Nullability::ALWAYS {
2146 return Some(right);
2147 }
2148
2149 if self.get_kind(left) == Kind::Lookahead
2150 && self.get_kind(right) == Kind::Lookahead {
2151 let lb = left.left(self);
2152 let lt = left.right(self);
2153 let lrel = left.extra(self);
2154
2155 let rb = right.left(self);
2156 let rt = right.right(self);
2157 let rrel = right.extra(self);
2158
2159 if lrel == rrel
2160
2161 && lt.is_missing()
2162 && rt.is_missing() {
2163 let unioned = self.mk_union(lb, rb);
2164 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2165 return Some(node);
2166 }
2167 }
2168
2169 if right.is_kind(self, Concat) {
2170 if left == NodeId::END
2171 && right.left(self) == NodeId::END
2172 && self.nullability(right).has(Nullability::END) {
2173 return Some(right);
2174 }
2175 if left == right.left(self) {
2177 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2178 let rw = self.mk_concat(left, rhs);
2179 return Some(rw);
2180 }
2181 if left.is_kind(self, Concat) {
2182 let head1 = left.left(self);
2183 let head2 = right.left(self);
2184
2185 if head1 == head2 {
2186 let t1 = left.right(self);
2187 let t2 = right.right(self);
2188 if head1 == NodeId::TS {
2190 if t2.has_concat_tail(self, t1) {
2191 return Some(left);
2192 }
2193 if t1.has_concat_tail(self, t2) {
2194 return Some(right);
2195 }
2196 }
2197 let un = self.mk_union(t1, t2);
2198 return Some(self.mk_concat(left.left(self), un));
2199 }
2200
2201 if false {
2204 let end1 = self.get_concat_end(left);
2205 let end2 = self.get_concat_end(right);
2206 if end1 == end2 {
2207 let flags = left.flags_contains(self).or(right.flags_contains(self));
2208 if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2209 let rev1 = self.reverse(left).unwrap();
2210 let rev2 = self.reverse(right).unwrap();
2211
2212 let union_rev = self.mk_union(rev1, rev2);
2213 return Some(self.reverse(union_rev).unwrap());
2214 }
2215 }
2216 }
2217 }
2218 if self.get_kind(left) == Kind::Pred
2219 && left == right.left(self) {
2220 let un = self.mk_opt(right.right(self));
2221 let conc = self.mk_concat(left, un);
2222 return Some(conc);
2223 }
2224 }
2225
2226 if left == NodeId::EPS
2227 && right.is_plus(self) {
2228 let result = self.mk_star(right.left(self));
2229 return Some(result);
2230 }
2231
2232 if self.flags == BuilderFlags::SUBSUME {
2234 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2235 return Some(rw);
2236 }
2237 }
2238
2239 None
2240 }
2241
2242 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2243 let mut curr = node;
2244 while self.get_kind(curr) == Kind::Inter {
2245 out.push(self.get_left(curr));
2246 curr = self.get_right(curr);
2247 }
2248 out.push(curr);
2249 }
2250
2251 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2254 let mut curr = node;
2255 let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2256 if has_prefix {
2257 curr = self.get_right(curr);
2258 }
2259 let mut count = 0u32;
2260 let mut pred_set = None;
2261 while self.get_kind(curr) == Kind::Concat {
2262 let head = self.get_left(curr);
2263 if self.get_kind(head) != Kind::Pred {
2264 return None;
2265 }
2266 let ps = head.pred_tset(self);
2267 match pred_set {
2268 None => pred_set = Some(ps),
2269 Some(existing) => {
2270 if existing != ps {
2271 return None;
2272 }
2273 }
2274 }
2275 curr = self.get_right(curr);
2276 count += 1;
2277 }
2278 if count == 0 || self.get_kind(curr) != Kind::Star {
2279 return None;
2280 }
2281 Some((has_prefix, pred_set.unwrap(), curr, count))
2282 }
2283
2284 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2285 let mut j = 0;
2286 for &s in sub {
2287 while j < sup.len() && sup[j] < s {
2288 j += 1;
2289 }
2290 if j >= sup.len() || sup[j] != s {
2291 return false;
2292 }
2293 j += 1;
2294 }
2295 true
2296 }
2297
2298 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2299 if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2300 return None;
2301 }
2302
2303 let mut lc: Vec<NodeId> = Vec::new();
2304 let mut rc: Vec<NodeId> = Vec::new();
2305 self.collect_inter_components(left, &mut lc);
2306 self.collect_inter_components(right, &mut rc);
2307
2308 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2311 return Some(left);
2312 }
2313 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2315 return Some(right);
2316 }
2317
2318 if lc.len() == rc.len() {
2319 let mut diff_idx = None;
2320 for i in 0..lc.len() {
2321 if lc[i] != rc[i] {
2322 if diff_idx.is_some() {
2323 return None;
2324 }
2325 diff_idx = Some(i);
2326 }
2327 }
2328 if let Some(idx) = diff_idx {
2329 let a = lc[idx];
2330 let b = rc[idx];
2331 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2332 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2333 {
2334 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2335 return if ca < cb { Some(left) } else { Some(right) };
2336 }
2337 }
2338 }
2339 }
2340
2341 None
2342 }
2343
2344 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2345 if cfg!(feature = "norewrite") {
2346 return None;
2347 }
2348 if left == right {
2349 return Some(left);
2350 }
2351
2352 if self.get_kind(right) == Kind::Union {
2353 let mut result = NodeId::BOT;
2354 self.iter_unions_b(
2355 right,
2356 &mut (|b, v| {
2357 let new_inter = b.mk_inter(v, left);
2358 result = b.mk_union(result, new_inter);
2359 }),
2360 );
2361 return Some(result);
2362 }
2363 if self.get_kind(left) == Kind::Union {
2364 let mut result = NodeId::BOT;
2365 self.iter_unions_b(
2366 left,
2367 &mut (|b, v| {
2368 let new_inter = b.mk_inter(v, right);
2369 result = b.mk_union(result, new_inter);
2370 }),
2371 );
2372 return Some(result);
2373 }
2374
2375 if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2376 return Some(NodeId::BOT);
2377 }
2378
2379 if left.kind(self) == Kind::Compl
2380 && right.kind(self) == Kind::Compl {
2381 let bodies = self.mk_union(left.left(self), right.left(self));
2382 return Some(self.mk_compl(bodies));
2383 }
2384
2385 if left == NodeId::TOPPLUS {
2386 if right.is_pred_star(self).is_some() {
2387 let newloop = self.mk_plus(right.left(self));
2388 return Some(newloop);
2389 }
2390 if right.is_never_nullable(self) {
2391 return Some(right);
2392 }
2393 if right.is_kind(self, Kind::Lookahead)
2394 && self.get_lookahead_tail(right).is_missing() {
2395 return Some(NodeId::BOT);
2396 }
2397 if right.is_kind(self, Kind::Concat) {}
2398 }
2399
2400 {
2404 let l_is_la = left.is_lookahead(self);
2405 let r_is_la = right.is_lookahead(self);
2406 let l_is_cla = !l_is_la
2407 && self.get_kind(left) == Kind::Concat
2408 && self.get_kind(left.left(self)) == Kind::Lookahead;
2409 let r_is_cla = !r_is_la
2410 && self.get_kind(right) == Kind::Concat
2411 && self.get_kind(right.left(self)) == Kind::Lookahead;
2412 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2413 let (la_node, other, concat_body) = if r_is_la {
2414 (right, left, NodeId::MISSING)
2415 } else if l_is_la {
2416 (left, right, NodeId::MISSING)
2417 } else if r_is_cla {
2418 (right.left(self), left, right.right(self))
2419 } else {
2420 (left.left(self), right, left.right(self))
2421 };
2422 let la_body = la_node.left(self);
2423 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2424 let inter_right = if concat_body.is_missing() {
2425 la_tail
2426 } else {
2427 self.mk_concat(la_tail, concat_body)
2428 };
2429 let new_body = self.mk_inter(other, inter_right);
2430 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2431 return Some(self.mk_concat(la, new_body));
2432 }
2433 }
2434
2435 if self.get_kind(right) == Kind::Compl {
2436 let compl_body = right.left(self);
2437 if left == compl_body {
2438 return Some(NodeId::BOT);
2439 }
2440 if self.get_kind(compl_body) == Kind::Concat {
2441 let compl_head = compl_body.left(self);
2442 if compl_body.right(self) == NodeId::TS
2444 && compl_head == left {
2445 return Some(NodeId::BOT);
2446 }
2447 }
2448 }
2449
2450 if let Some(pleft) = left.is_pred_star(self) {
2451 if let Some(pright) = right.is_pred_star(self) {
2452 let merged = self.mk_inter(pleft, pright);
2453 return Some(self.mk_star(merged));
2454 }
2455 }
2456
2457 {
2461 let l_is_clb = self.get_kind(left) == Kind::Concat
2462 && self.get_kind(left.left(self)) == Kind::Lookbehind;
2463 let r_is_clb = self.get_kind(right) == Kind::Concat
2464 && self.get_kind(right.left(self)) == Kind::Lookbehind;
2465 if l_is_clb || r_is_clb {
2466 let (lb, body) = if l_is_clb && r_is_clb {
2467 let lb1 = left.left(self);
2468 let lb2 = right.left(self);
2469 let inner = self.mk_inter(
2470 self.get_lookbehind_inner(lb1),
2471 self.get_lookbehind_inner(lb2),
2472 );
2473 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2475 let body = self.mk_inter(left.right(self), right.right(self));
2476 (lb, body)
2477 } else if l_is_clb {
2478 let lb = left.left(self);
2479 let body = self.mk_inter(left.right(self), right);
2480 (lb, body)
2481 } else {
2482 let lb = right.left(self);
2483 let body = self.mk_inter(left, right.right(self));
2484 (lb, body)
2485 };
2486 return Some(self.mk_concat(lb, body));
2487 }
2488 }
2489
2490 {
2494 let l_has_la = self.has_trailing_la(left);
2495 let r_has_la = self.has_trailing_la(right);
2496 if l_has_la || r_has_la {
2497 let (body, la) = if l_has_la && r_has_la {
2498 let (lbody, l_la) = self.strip_trailing_la(left);
2499 let (rbody, r_la) = self.strip_trailing_la(right);
2500 let inner = self.mk_inter(
2501 self.get_lookahead_inner(l_la),
2502 self.get_lookahead_inner(r_la),
2503 );
2504 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2505 let body = self.mk_inter(lbody, rbody);
2506 (body, la)
2507 } else if l_has_la {
2508 let (lbody, la) = self.strip_trailing_la(left);
2509 let body = self.mk_inter(lbody, right);
2510 (body, la)
2511 } else {
2512 let (rbody, la) = self.strip_trailing_la(right);
2513 let body = self.mk_inter(left, rbody);
2514 (body, la)
2515 };
2516 return Some(self.mk_concat(body, la));
2517 }
2518 }
2519
2520 None
2521 }
2522
2523 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2524 if cfg!(feature = "norewrite") {
2525 return None;
2526 }
2527 debug_assert!(self.get_kind(right_union) == Kind::Union);
2528
2529 let mut rewritten = None;
2530 right_union.iter_union_while(
2531 self,
2532 &mut (|b, v| {
2533 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2534 rewritten = Some((v, rw));
2535 false
2536 } else {
2537 true
2538 }
2539 }),
2540 );
2541
2542 if let Some(rw) = rewritten {
2543 let mut new_union = NodeId::BOT;
2544 right_union.iter_union(
2545 self,
2546 &mut (|b, v| {
2547 if v == rw.0 {
2548 new_union = b.mk_union(rw.1, new_union)
2549 } else {
2550 new_union = b.mk_union(v, new_union)
2551 }
2552 }),
2553 );
2554 return Some(new_union);
2555 };
2556
2557 None
2558 }
2559
2560 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2561 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2562 debug_assert!(tail != NodeId::MISSING);
2563 let key = NodeKey {
2564 kind: Kind::Concat,
2565 left: head,
2566 right: tail,
2567 extra: u32::MAX,
2568 };
2569 if let Some(id) = self.key_is_created(&key) {
2570 return *id;
2571 }
2572
2573 if head == NodeId::BOT || tail == NodeId::BOT {
2574 return NodeId::BOT;
2575 }
2576 if head == NodeId::EPS {
2577 return tail;
2578 }
2579 if tail == NodeId::EPS {
2580 return head;
2581 }
2582
2583 match tail {
2584 NodeId::BEGIN => {
2586 if !self.is_nullable(head, Nullability::BEGIN) {
2587 return NodeId::BOT;
2588 } else {
2589 return NodeId::BEGIN;
2590 }
2591 }
2592 _ => {}
2593 }
2594
2595 if head.is_kind(self, Kind::Concat) {
2597 let left = head.left(self);
2598 let newright = self.mk_concat(head.right(self), tail);
2599 let updated = self.mk_concat(left, newright);
2600 return self.init_as(key, updated);
2601 }
2602
2603 if head == NodeId::TS && tail == NodeId::END {
2604 return NodeId::TS;
2605 }
2606
2607 if self.get_kind(head) == Kind::End
2608 && !self.is_nullable(tail, Nullability::END) {
2609 return NodeId::BOT;
2610 }
2611
2612 if self.get_kind(tail) == Kind::Concat {
2613 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2614 let upd = self.mk_concat(rw, tail.right(self));
2615 return self.init_as(key, upd);
2616 }
2617 }
2618
2619 if let Some(new) = self.attempt_rw_concat_2(head, tail) { return self.init_as(key, new) }
2620
2621 match (self.get_kind(head), self.get_kind(tail)) {
2622 (Kind::Star, Kind::Concat) if head.is_star(self) => {
2624 let rl = tail.left(self);
2625 if head == rl {
2626 return self.init_as(key, tail);
2627 }
2628 }
2629 (_, Kind::Concat) => {
2631 let curr = head;
2632 let h2 = self.mk_concat(curr, tail.left(self));
2633 let tr = tail.right(self);
2634 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2635 return self.init_as(key, new);
2636 }
2637 }
2638 _ if head == NodeId::TS
2639 && self.starts_with_ts(tail) => {
2640 return self.init_as(key, tail);
2641 }
2642 _ => {}
2643 }
2644
2645 self.init(key)
2646 }
2647
2648 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2649 let lb_body = {
2651 match self.starts_with_ts(lb_body) {
2652 true => lb_body,
2653 false => self.mk_concat(NodeId::TS, lb_body),
2654 }
2655 };
2656 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2658 }
2659
2660 fn mk_lookbehind_internal(
2661 &mut self,
2662 lb_body: NodeId,
2663 lb_prev: NodeId,
2664 ) -> Result<NodeId, AlgebraError> {
2665 debug_assert!(lb_body != NodeId::MISSING);
2666 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2667 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2668 return Ok(NodeId::BOT);
2669 }
2670 if lb_body == NodeId::TS {
2671 return Ok(lb_prev);
2672 }
2673 if lb_body == NodeId::EPS {
2674 match lb_prev {
2675 NodeId::MISSING => return Ok(NodeId::EPS),
2676 NodeId::EPS => return Ok(NodeId::EPS),
2677 _ => return Err(AlgebraError::UnsupportedPattern),
2678 }
2679 }
2680
2681 let key = NodeKey {
2682 kind: Kind::Lookbehind,
2683 left: lb_body,
2684 right: lb_prev,
2685 extra: u32::MAX,
2686 };
2687 match self.key_is_created(&key) {
2688 Some(id) => Ok(*id),
2689 None => {
2690 if lb_prev == NodeId::TS {
2691 return Ok(self.mk_concat(lb_prev, lb_body));
2692 }
2693
2694 Ok(self.init(key))
2695 }
2696 }
2697 }
2698
2699 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2700 let la_body = {
2702 match self.ends_with_ts(la_body) {
2703 true => la_body,
2704 false => self.mk_concat(la_body, NodeId::TS),
2705 }
2706 };
2707 let rel = if NodeId::MISSING == la_tail {
2708 rel
2709 } else {
2710 match la_tail.is_center_nullable(self) {
2711 false => u32::MAX,
2712 true => rel,
2713 }
2714 };
2715
2716 self.mk_lookahead_internal(la_body, la_tail, rel)
2717 }
2718
2719 pub fn mk_lookahead_internal(
2721 &mut self,
2722 la_body: NodeId,
2723 la_tail: NodeId,
2724 rel: u32,
2725 ) -> NodeId {
2726 let key = NodeKey {
2727 kind: Kind::Lookahead,
2728 left: la_body,
2729 right: la_tail,
2730 extra: rel,
2731 };
2732 if let Some(id) = self.key_is_created(&key) {
2733 return *id;
2734 }
2735 if la_body == NodeId::TS {
2736 if rel == 0 {
2737 return la_tail.missing_to_eps();
2738 } else {
2739 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2740 }
2741 }
2742 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2743 return NodeId::BOT;
2744 }
2745 if la_tail.is_missing() && rel == u32::MAX {
2746 return NodeId::BOT;
2747 }
2748
2749 if la_body == NodeId::EPS && la_tail.is_missing()
2750 && rel == 0 {
2751 return la_body;
2752 }
2753
2754 if la_tail == NodeId::TS {
2755 if rel == 0 || rel == u32::MAX {
2756 return self.mk_concat(la_body, NodeId::TS);
2757 } else if rel == u32::MAX {
2758 return self.mk_begins_with(la_body);
2759 }
2760 }
2761
2762 if rel == u32::MAX {
2763 if la_tail.is_missing() {
2764 return NodeId::BOT;
2765 }
2766
2767 if self.is_always_nullable(la_body) {
2768 return la_tail.missing_to_eps();
2769 }
2770
2771 if la_tail != NodeId::MISSING {
2772 match self.get_kind(la_tail) {
2773 _ => {
2774 if la_body.is_compl_plus_end(self) {
2775 let minlen = self.get_min_length_only(la_tail);
2776 if minlen >= 1 {
2777 return NodeId::BOT;
2778 }
2779 }
2780 }
2781 }
2782 }
2783 }
2784
2785 if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2787 let la_body2 = self.get_lookahead_inner(la_tail);
2788 let body1_ts = self.mk_concat(la_body, NodeId::TS);
2789 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2790 let new_la_body = self.mk_inter(body1_ts, body2_ts);
2791 let new_la_rel = self.get_lookahead_rel(la_tail);
2792 let new_la_tail = self.get_lookahead_tail(la_tail);
2793 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2794 }
2795
2796 if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2798 let la_body_right = la_body.right(self);
2799 if self.is_always_nullable(la_body_right) {
2800 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2801 }
2802 if la_body.right(self) == NodeId::END {
2803 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2804 }
2805 let bodyright = la_body.right(self);
2806 if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2807 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2808 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2809 }
2810 }
2811
2812 if la_tail != NodeId::MISSING {
2813 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2814 let lpred = la_body.left(self);
2815 if self.get_kind(lpred) == Kind::Pred {
2816 let l = lpred.pred_tset(self);
2817 let r = la_tail.pred_tset(self);
2818 let psi_and = self.solver().and_id(l, r);
2819 let rewrite = self.mk_pred(psi_and);
2820 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2821 let new_right = self.mk_lookahead_internal(
2822 la_body.right(self),
2823 NodeId::MISSING,
2824 new_rel,
2825 );
2826 return self.mk_concat(rewrite, new_right);
2827 }
2828 }
2829 }
2830
2831 self.get_node_id(key)
2832 }
2833
2834 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2835 let has_match = (packed >> 16) > 0;
2836 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2837 return NodeId::BOT;
2838 }
2839 debug_assert!(
2840 body == NodeId::BOT || !self.is_infinite(body),
2841 "Counted body must have finite max length"
2842 );
2843 let chain = self.prune_counted_chain(body, chain);
2844 let key = NodeKey {
2845 kind: Kind::Counted,
2846 left: body,
2847 right: chain,
2848 extra: packed,
2849 };
2850 if let Some(id) = self.key_is_created(&key) {
2851 return *id;
2852 }
2853 self.get_node_id(key)
2854 }
2855
2856 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2857 if chain == NodeId::MISSING || body == NodeId::BOT {
2858 return chain;
2859 }
2860 if self.nullability(body) != Nullability::NEVER {
2862 return NodeId::MISSING;
2863 }
2864 let chain_body = chain.left(self);
2865 if chain_body == NodeId::BOT {
2866 return chain; }
2868 let not_begins = self.mk_not_begins_with(body);
2869 let inter = self.mk_inter(chain_body, not_begins);
2870 let is_empty = inter == NodeId::BOT;
2871 if is_empty {
2872 self.prune_counted_chain(body, chain.right(self))
2873 } else {
2874 chain
2875 }
2876 }
2877
2878 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2879 let neg_inner = self.mk_concat(body, NodeId::TS);
2880 let neg_part = self.mk_compl(neg_inner);
2881 let conc = self.mk_concat(neg_part, NodeId::END);
2882 self.mk_lookahead(conc, NodeId::MISSING, rel)
2883 }
2884
2885 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2886 match self.get_node(body).kind {
2887 Kind::Pred => {
2888 let psi = body.pred_tset(self);
2889 let negated = self.mk_pred_not(psi);
2890 let union = self.mk_union(NodeId::BEGIN, negated);
2891 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2893 }
2894 _ => {
2895 let neg_inner = self.mk_concat(NodeId::TS, body);
2896 let neg_part = self.mk_compl(neg_inner);
2897 let conc = self.mk_concat(NodeId::BEGIN, neg_part);
2898 self.mk_lookbehind_internal(conc, NodeId::MISSING).unwrap()
2900 }
2901 }
2902 }
2903
2904 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2905 debug_assert!(left != NodeId::MISSING);
2906 debug_assert!(right != NodeId::MISSING);
2907 if left > right {
2908 return self.mk_union(right, left);
2909 }
2910 let key = NodeKey {
2911 kind: Kind::Union,
2912 left,
2913 right,
2914 extra: u32::MAX,
2915 };
2916 if let Some(id) = self.key_is_created(&key) {
2917 return *id;
2918 }
2919
2920 if left == right {
2921 return left;
2922 }
2923 if left == NodeId::BOT {
2924 return right;
2925 }
2926 if right == NodeId::BOT {
2927 return left;
2928 }
2929 if right == NodeId::TS {
2930 return right;
2931 }
2932 if left == NodeId::TS {
2933 return left;
2934 }
2935
2936 match (self.get_kind(left), self.get_kind(right)) {
2937 (Kind::Union, _) => {
2938 self.iter_unions_b(left, &mut |b, v| {
2939 b.temp_vec.push(v);
2940 });
2941 self.iter_unions_b(right, &mut |b, v| {
2942 b.temp_vec.push(v);
2943 });
2944 self.temp_vec.sort();
2945 let tree = self.temp_vec.clone();
2946 self.temp_vec.clear();
2947 let newnode = tree
2948 .iter()
2949 .rev()
2950 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2951 return self.init_as(key, newnode);
2952 }
2953 (_, Kind::Union) => {
2954 let rleft = right.left(self);
2955 if left > rleft {
2957 self.iter_unions_b(left, &mut |b, v| {
2958 b.temp_vec.push(v);
2959 });
2960 self.iter_unions_b(right, &mut |b, v| {
2961 b.temp_vec.push(v);
2962 });
2963 self.temp_vec.sort();
2964 let tree = self.temp_vec.clone();
2965 self.temp_vec.clear();
2966 let newnode = tree
2967 .iter()
2968 .rev()
2969 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2970 return self.init_as(key, newnode);
2971 } else {
2972 if let Some(rw) = self.attempt_rw_unions(left, right) {
2973 return self.init_as(key, rw);
2974 }
2975 }
2976 }
2977 _ => {}
2978 }
2979
2980 if let Some(rw) = self.attempt_rw_union_2(left, right) {
2981 return self.init_as(key, rw);
2982 }
2983 self.init(key)
2984 }
2985
2986 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
2987 debug_assert!(left_id != NodeId::MISSING);
2988 debug_assert!(right_id != NodeId::MISSING);
2989 if left_id == right_id {
2990 return left_id;
2991 }
2992 if left_id == NodeId::BOT || right_id == NodeId::BOT {
2993 return NodeId::BOT;
2994 }
2995 if left_id == NodeId::TS {
2996 return right_id;
2997 }
2998 if right_id == NodeId::TS {
2999 return left_id;
3000 }
3001 if left_id > right_id {
3002 return self.mk_inter(right_id, left_id);
3003 }
3004 let key = NodeKey {
3005 kind: Kind::Inter,
3006 left: left_id,
3007 right: right_id,
3008 extra: u32::MAX,
3009 };
3010 if let Some(id) = self.key_is_created(&key) {
3011 return *id;
3012 }
3013
3014 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3015 return self.init_as(key, rw);
3016 }
3017
3018 self.init(key)
3019 }
3020
3021 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3022 let node = NodeKey {
3023 kind,
3024 left: NodeId::MISSING,
3025 right: NodeId::MISSING,
3026 extra: u32::MAX,
3027 };
3028 self.init(node)
3029 }
3030
3031 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3032 let star = self.mk_star(body_id);
3033 self.mk_concat(body_id, star)
3034 }
3035 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3036 let opt = self.mk_opt(body_id);
3037 let mut nodes1 = vec![];
3038 for _ in lower..upper {
3039 nodes1.push(opt);
3040 }
3041 for _ in 0..lower {
3042 nodes1.push(body_id);
3043 }
3044 self.mk_concats(nodes1.into_iter())
3045 }
3046 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3047 self.mk_union(NodeId::EPS, body_id)
3048 }
3049
3050 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3051 let key = NodeKey {
3052 kind: Kind::Star,
3053 left: body_id,
3054 right: NodeId::MISSING,
3055 extra: 0,
3056 };
3057 if let Some(id) = self.key_is_created(&key) {
3058 return *id;
3059 }
3060 if body_id.is_kind(self, Kind::Star) {
3062 return body_id;
3063 }
3064 self.get_node_id(key)
3065 }
3066
3067 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3070 match self.get_kind(node_id) {
3071 Kind::End => Nullability::EMPTYSTRING,
3072 Kind::Begin => Nullability::EMPTYSTRING,
3073 Kind::Pred => Nullability::NEVER,
3074 Kind::Star => Nullability::ALWAYS,
3075 Kind::Inter | Kind::Concat => {
3076 let lnull = self.nullability_emptystring(node_id.left(self));
3077 let rnull = self.nullability_emptystring(node_id.right(self));
3078 lnull.and(rnull) }
3080 Kind::Union => {
3081 let lnull = self.nullability_emptystring(node_id.left(self));
3082 let rnull = self.nullability_emptystring(node_id.right(self));
3083 lnull.or(rnull)
3084 }
3085 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3086 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3087 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3088 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3089 }
3090 }
3091
3092 #[inline(always)]
3093 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3094 self.get_meta(node_id)
3095 .flags
3096 .nullability()
3097 .has(Nullability::CENTER.or(Nullability::END))
3098 }
3099
3100 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3101 self.get_only_nullability(node_id)
3102 }
3103
3104 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3105 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3106 }
3107
3108 pub fn pp(&self, node_id: NodeId) -> String {
3109 let mut s = String::new();
3110 self.ppw(&mut s, node_id).unwrap();
3111 s
3112 }
3113
3114 #[allow(dead_code)]
3115 pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
3116 let nu = self.get_nulls_id(node_id);
3117 let nr = self.mb.nb.get_set_ref(nu);
3118 let s1 = format!("{:?}", nr);
3119 s1
3120 }
3121
3122 #[allow(dead_code)]
3123 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3124 match self.get_tregex(term_id) {
3125 TRegex::Leaf(node_id) => {
3126 let mut s = String::new();
3127 self.ppw(&mut s, *node_id).unwrap();
3128 s
3129 }
3130 TRegex::ITE(cond, then_id, else_id) => {
3131 format!(
3132 "ITE({},{},{})",
3133 self.solver_ref().pp(*cond),
3134 self.ppt(*then_id),
3135 self.ppt(*else_id)
3136 )
3137 }
3138 }
3139 }
3140
3141 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3142 if cfg!(feature = "graphviz") {
3143 match node_id {
3144 NodeId::MISSING => return write!(s, "MISSING"),
3145 NodeId::BOT => return write!(s, "⊥"),
3146 NodeId::TS => return write!(s, "_*"),
3147 NodeId::TOP => return write!(s, "_"),
3148 NodeId::EPS => return write!(s, "ε"),
3149 _ => {}
3150 }
3151 }
3152
3153 match node_id {
3154 NodeId::MISSING => return write!(s, "MISSING"),
3155 NodeId::BOT => return write!(s, "⊥"),
3156 NodeId::TS => return write!(s, "_*"),
3157 NodeId::TOP => return write!(s, "_"),
3158 NodeId::EPS => return write!(s, ""),
3159 _ => {}
3160 }
3161
3162 match self.get_kind(node_id) {
3163 Kind::End => write!(s, r"\z"),
3164 Kind::Begin => write!(s, r"\A"),
3165 Kind::Pred => {
3166 let psi = node_id.pred_tset(self);
3167 if psi == TSetId::EMPTY {
3168 write!(s, r"⊥")
3169 } else if psi == TSetId::FULL {
3170 write!(s, r"_")
3171 } else {
3172 write!(s, "{}", self.solver_ref().pp(psi))
3173 }
3174 }
3175 Kind::Inter => {
3176 write!(s, "(")?;
3177 self.ppw(s, node_id.left(self))?;
3178 write!(s, "&")?;
3179 let mut curr = node_id.right(self);
3180 while self.get_kind(curr) == Kind::Inter {
3181 let n = curr.left(self);
3182 self.ppw(s, n)?;
3183 write!(s, "&")?;
3184 curr = curr.right(self);
3185 }
3186 self.ppw(s, curr)?;
3187 write!(s, ")")
3188 }
3189 Kind::Union => {
3190 let left = node_id.left(self);
3191 let right = node_id.right(self);
3192 write!(s, "(")?;
3193 self.ppw(s, left)?;
3194 write!(s, "|")?;
3195 let mut curr = right;
3196 while self.get_kind(curr) == Kind::Union {
3197 let n = curr.left(self);
3198 self.ppw(s, n)?;
3199 write!(s, "|")?;
3200 curr = curr.right(self);
3201 }
3202 self.ppw(s, curr)?;
3203 write!(s, ")")
3204 }
3205 Kind::Concat => {
3206 let left = node_id.left(self);
3207 let right = node_id.right(self);
3208 if right.is_star(self) && right.left(self) == left {
3209 self.ppw(s, left)?;
3210 write!(s, "+")?;
3211 return Ok(());
3212 }
3213 if right.is_concat(self) {
3214 let rl = right.left(self);
3215 if rl.is_star(self) && rl.left(self) == left {
3216 self.ppw(s, left)?;
3217 write!(s, "+")?;
3218 return self.ppw(s, right.right(self));
3219 }
3220 }
3221 if right.is_concat(self) && right.left(self) == left {
3222 let mut num = 1;
3223 let mut right = right;
3224 while right.is_concat(self) && right.left(self) == left {
3225 num += 1;
3226 right = right.right(self);
3227 }
3228 if let Some(inner) = left.is_opt_v(self) {
3230 let mut inner_count = 0;
3231 let mut right2 = right;
3232 while right2.is_concat(self) && right2.left(self) == inner {
3233 inner_count += 1;
3234 right2 = right2.right(self);
3235 }
3236 if right2 == inner {
3237 inner_count += 1;
3238 self.ppw(s, inner)?;
3239 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3240 }
3241 if inner_count > 0 {
3242 self.ppw(s, inner)?;
3243 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3244 return self.ppw(s, right2);
3245 }
3246 }
3247 self.ppw(s, left)?;
3248 if right == left {
3249 num += 1;
3250 return write!(s, "{{{}}}", num);
3251 }
3252 if num <= 3 && left.is_pred(self) {
3253 for _ in 1..num {
3254 self.ppw(s, left)?;
3255 }
3256 return self.ppw(s, right);
3257 } else {
3258 write!(s, "{{{}}}", num)?;
3259 return self.ppw(s, right);
3260 }
3261 }
3262 self.ppw(s, left)?;
3263 self.ppw(s, right)
3264 }
3265 Kind::Star => {
3266 let left = node_id.left(self);
3267 let leftkind = self.get_kind(left);
3268 match leftkind {
3269 Kind::Concat | Kind::Star | Kind::Compl => {
3270 write!(s, "(")?;
3271 self.ppw(s, left)?;
3272 write!(s, ")")?;
3273 }
3274 _ => {
3275 self.ppw(s, left)?;
3276 }
3277 };
3278 write!(s, "*")
3279 }
3280 Kind::Compl => {
3281 write!(s, "~(")?;
3282 self.ppw(s, node_id.left(self))?;
3283 write!(s, ")")
3284 }
3285 Kind::Lookbehind => {
3286 let lbleft = self.get_lookbehind_prev(node_id);
3287 let lbinner = self.get_lookbehind_inner(node_id);
3288 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3289 if lbleft != NodeId::MISSING {
3290 write!(s, "「")?;
3291 self.ppw(s, lbleft)?;
3292 write!(s, "」")?;
3293 }
3294
3295 write!(s, "(?<=")?;
3296 self.ppw(s, lbinner)?;
3297 write!(s, ")")
3298 }
3299 Kind::Lookahead => {
3300 let inner = self.get_lookahead_inner(node_id);
3301 write!(s, "(?=")?;
3302 self.ppw(s, inner)?;
3303 write!(s, ")")?;
3304 if self.get_lookahead_rel(node_id) != 0 {
3305 write!(s, "{{")?;
3306 let rel = self.get_lookahead_rel(node_id);
3307 if rel == u32::MAX {
3308 write!(s, "∅")?;
3309 } else {
3310 write!(s, "{}", rel)?;
3311 }
3312 write!(s, "}}")?;
3313 }
3314 if node_id.right(self) == NodeId::MISSING {
3315 Ok(())
3316 } else {
3317 write!(s, "『")?;
3318 self.ppw(s, node_id.right(self))?;
3319 write!(s, "』")
3320 }
3321 }
3322 Kind::Counted => {
3323 let body = node_id.left(self);
3324 let packed = self.get_extra(node_id);
3325 let step = packed & 0xFFFF;
3326 let best = packed >> 16;
3327 write!(s, "#(")?;
3328 self.ppw(s, body)?;
3329 write!(s, ")s{}b{}", step, best)
3330 }
3331 }
3332 }
3333
3334 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3335 self.mk_concat(node, NodeId::TS)
3336 }
3337
3338 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3339 let node_ts = self.mk_concat(node, NodeId::TS);
3340 self.mk_compl(node_ts)
3341 }
3342
3343
3344 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3345 let notset = self.solver().not_id(set);
3346 self.mk_pred(notset)
3347 }
3348
3349 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3350 let set_id = self.solver().u8_to_set_id(char);
3351 self.mk_pred(set_id)
3352 }
3353
3354 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3355 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3356 self.mk_pred(rangeset)
3357 }
3358
3359 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3360 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3361 for &(lo, hi) in &ranges[1..] {
3362 let r = self.mk_range_u8(lo, hi);
3363 node = self.mk_union(node, r);
3364 }
3365 node
3366 }
3367
3368 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3369 let mut prefix = Vec::new();
3370 let mut curr = node;
3371 loop {
3372 if curr == NodeId::EPS {
3373 let full = !prefix.is_empty();
3374 return (prefix, full);
3375 }
3376 if curr == NodeId::BOT {
3377 break;
3378 }
3379 if self.get_kind(curr) == Kind::Pred {
3380 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3381 Some(byte) => {
3382 prefix.push(byte);
3383 return (prefix, true);
3384 }
3385 None => break, }
3387 }
3388 if self.get_kind(curr) != Kind::Concat {
3389 break;
3390 }
3391 let left = curr.left(self);
3392 if self.get_kind(left) != Kind::Pred {
3393 break;
3394 }
3395 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3396 Some(byte) => prefix.push(byte),
3397 None => break,
3398 }
3399 curr = curr.right(self);
3400 }
3401 (prefix, false)
3402 }
3403
3404 #[allow(dead_code)]
3405 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3406 let mut result = NodeId::EPS;
3407 for byte in raw_str.iter().rev() {
3408 let node = self.mk_u8(*byte);
3409 result = self.mk_concat(node, result);
3410 }
3411 result
3412 }
3413
3414 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3415 let mut result = NodeId::EPS;
3416 for byte in raw_str.bytes().rev() {
3417 let node = self.mk_u8(byte);
3418 result = self.mk_concat(node, result);
3419 }
3420 result
3421 }
3422
3423 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3424 let mut sorted: Vec<NodeId> = nodes.collect();
3425 if sorted.len() <= 1 {
3426 return sorted.pop().unwrap_or(NodeId::BOT);
3427 }
3428 sorted.sort();
3429 sorted.dedup();
3430 sorted.retain(|&x| x != NodeId::BOT);
3431 if sorted.is_empty() {
3432 return NodeId::BOT;
3433 }
3434 if sorted.len() > 16 {
3435 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3436 let mut non_concat: Vec<NodeId> = Vec::new();
3437 for &n in &sorted {
3438 if self.get_kind(n) == Kind::Concat {
3439 by_head.entry(self.get_left(n)).or_default().push(n);
3440 } else {
3441 non_concat.push(n);
3442 }
3443 }
3444 let mut absorbed: Vec<NodeId> = Vec::new();
3449 for &n in &non_concat {
3450 if by_head.contains_key(&n) {
3451 absorbed.push(n);
3452 }
3453 }
3454 if !absorbed.is_empty() {
3455 non_concat.retain(|n| !absorbed.contains(n));
3456 }
3457 if by_head.len() < sorted.len() {
3458 let mut groups: Vec<NodeId> = non_concat;
3459 for (head, tails) in by_head {
3460 let mut tail_nodes: Vec<NodeId> =
3461 tails.iter().map(|&n| self.get_right(n)).collect();
3462 if absorbed.contains(&head) {
3463 tail_nodes.push(NodeId::EPS);
3464 }
3465 let tail_union = self.mk_unions(tail_nodes.into_iter());
3466 let factored = self.mk_concat(head, tail_union);
3467 groups.push(factored);
3468 }
3469 groups.sort();
3470 groups.dedup();
3471 return self.mk_unions_balanced(&groups);
3472 }
3473 }
3474 self.mk_unions_balanced(&sorted)
3475 }
3476
3477 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3478 match nodes.len() {
3479 0 => NodeId::BOT,
3480 1 => nodes[0],
3481 n => {
3482 let mid = n / 2;
3483 let left = self.mk_unions_balanced(&nodes[..mid]);
3484 let right = self.mk_unions_balanced(&nodes[mid..]);
3485 self.mk_union(left, right)
3486 }
3487 }
3488 }
3489
3490 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3491 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3492 }
3493
3494 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3495 nodes
3496 .rev()
3497 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3498 }
3499}
3500
3501impl RegexBuilder {
3502 #[allow(dead_code)]
3503 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3504 match self.get_tregex(term_id).clone() {
3505 TRegex::Leaf(node_id) => {
3506 if NodeId::BOT == node_id {
3507 vec![]
3508 } else {
3509 vec![node_id]
3510 }
3511 }
3512 TRegex::ITE(_, then_id, else_id) => {
3513 let mut then_nodes = self.extract_sat(then_id);
3514 let mut else_nodes = self.extract_sat(else_id);
3515 then_nodes.append(&mut else_nodes);
3516 then_nodes
3517 }
3518 }
3519 }
3520
3521 pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3522 debug_assert!(self.get_kind(start) == Kind::Union);
3523 let mut curr = start;
3524 while self.get_kind(curr) == Kind::Union {
3525 f(curr.left(self));
3526 curr = curr.right(self);
3527 }
3528 f(curr);
3529 }
3530
3531 pub(crate) fn iter_unions_b(
3532 &mut self,
3533 curr: NodeId,
3534 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3535 ) {
3536 let mut curr = curr;
3537 while self.get_kind(curr) == Kind::Union {
3538 f(self, curr.left(self));
3539 curr = curr.right(self);
3540 }
3541 f(self, curr);
3542 }
3543
3544 pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3545 if !self.contains_look(node_id) {
3546 return Some(node_id);
3547 }
3548 match self.get_kind(node_id) {
3549 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3550 Kind::Concat => {
3551 let left = node_id.left(self);
3552 let right = node_id.right(self);
3553 let elim_l = self.try_elim_lookarounds(left)?;
3554 let elim_r = self.try_elim_lookarounds(right)?;
3555 let rw = self.mk_concat(elim_l, elim_r);
3556 Some(rw)
3557 }
3558 Kind::Union => {
3559 let left = node_id.left(self);
3560 let right = node_id.right(self);
3561 let elim_l = self.try_elim_lookarounds(left)?;
3562 let elim_r = self.try_elim_lookarounds(right)?;
3563 let rw = self.mk_union(elim_l, elim_r);
3564 Some(rw)
3565 }
3566
3567 Kind::Star => {
3568 let body = node_id.left(self);
3569 let elim_l = self.try_elim_lookarounds(body)?;
3570 Some(self.mk_star(elim_l))
3571 }
3572 Kind::Compl => {
3573 let left = node_id.left(self);
3574 let elim_l = self.try_elim_lookarounds(left)?;
3575 Some(self.mk_compl(elim_l))
3576 }
3577 Kind::Lookahead => {
3578 let rel = self.get_lookahead_rel(node_id);
3579 if rel != 0 {
3580 return None;
3581 }
3582 let lbody = self.get_lookahead_inner(node_id);
3583 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3584 let elim_l = self.try_elim_lookarounds(lbody)?;
3585 let elim_r = self.try_elim_lookarounds(ltail)?;
3586 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3587 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3588 let rw = self.mk_inter(lbody_ts, ltail_ts);
3589 Some(rw)
3590 }
3591 Kind::Lookbehind => {
3592 let linner = self.get_lookbehind_inner(node_id);
3593 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3594 let elim_l = self.try_elim_lookarounds(linner)?;
3595 let elim_r = self.try_elim_lookarounds(lprev)?;
3596 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3597 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3598 let rw = self.mk_inter(lbody_ts, ltail_ts);
3599 Some(rw)
3600 }
3601 Kind::Inter => {
3602 let left = node_id.left(self);
3603 let right = node_id.right(self);
3604 let elim_l = self.try_elim_lookarounds(left)?;
3605 let elim_r = self.try_elim_lookarounds(right)?;
3606 let rw = self.mk_inter(elim_l, elim_r);
3607 Some(rw)
3608 }
3609 Kind::Counted => None,
3610 }
3611 }
3612
3613 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3615 if self.nullability(node) == Nullability::NEVER {
3616 node
3617 } else {
3618 self.mk_inter(NodeId::TOPPLUS, node)
3619 }
3620 }
3621
3622 fn iter_find_stack(
3623 &self,
3624 stack: &mut Vec<TRegexId>,
3625 mut f: impl FnMut(NodeId) -> bool,
3626 ) -> bool {
3627 loop {
3628 match stack.pop() {
3629 None => return false,
3630 Some(curr) => match self.get_tregex(curr) {
3631 TRegex::Leaf(n) => {
3632 let mut curr = *n;
3633 while curr != NodeId::BOT {
3634 match self.get_kind(curr) {
3635 Kind::Union => {
3636 if f(curr.left(self)) {
3637 return true;
3638 }
3639 curr = curr.right(self);
3640 }
3641 _ => {
3642 if f(*n) {
3643 return true;
3644 }
3645 curr = NodeId::BOT;
3646 }
3647 }
3648 }
3649 }
3650 TRegex::ITE(_, then_id, else_id) => {
3651 if *else_id != TRegexId::BOT {
3652 stack.push(*else_id);
3653 }
3654 stack.push(*then_id);
3655 }
3656 },
3657 }
3658 }
3659 }
3660
3661 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3662 if node == NodeId::BOT {
3663 return Some(true);
3664 }
3665 if self.nullability(node) != Nullability::NEVER {
3666 return Some(false);
3667 }
3668 if let Some(cached) = self.cache_empty.get(&node) {
3669 if cached.is_checked() {
3670 return Some(cached.is_empty());
3671 }
3672 }
3673 let node = if !self.contains_look(node) {
3674 node
3675 } else {
3676 self.try_elim_lookarounds(node)?
3677 };
3678 let isempty_flag = self.is_empty_lang_internal(node);
3679
3680 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3681 }
3682
3683 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3684 if !self.get_meta_flags(initial_node).contains_inter() {
3686 return Ok(NodeFlags::ZERO);
3687 }
3688
3689 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3690 let mut todo: VecDeque<NodeId> = VecDeque::new();
3691 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3692 let mut stack = Vec::new();
3693 stack.push(begin_der);
3694 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3695 visited.insert(node, initial_node);
3696 let nullability = self.nullability(node);
3697 if nullability != Nullability::NEVER {
3698 true
3699 } else {
3700 todo.push_back(node);
3701 false
3702 }
3703 });
3704 if found_nullable_right_away {
3705 return Ok(NodeFlags::ZERO);
3706 }
3707
3708 todo.push_back(initial_node);
3709 let isempty_flag: NodeFlags;
3710 let mut found_node = NodeId::BOT;
3711
3712 loop {
3713 match todo.pop_front() {
3714 None => {
3715 isempty_flag = NodeFlags::IS_EMPTY;
3716 break;
3717 }
3718 Some(outer) => {
3719 if let Some(cached) = self.cache_empty.get(&outer) {
3720 if cached.is_checked() {
3721 if cached.is_empty() {
3722 continue;
3723 } else {
3724 return Ok(NodeFlags::ZERO);
3725 }
3726 }
3727 }
3728
3729 let derivative = self.der(outer, Nullability::CENTER)?;
3730
3731 stack.push(derivative);
3732
3733 let found_null = self.iter_find_stack(&mut stack, |node| {
3734 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3735 found_node = node;
3736 if !self.get_meta_flags(node).contains_inter() {
3737 true
3738 } else {
3739 e.insert(outer);
3740 todo.push_front(node);
3741 self.any_nonbegin_nullable(node)
3742 }
3743 } else {
3744 false
3745 }
3746 });
3747 if found_null {
3748 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3749 isempty_flag = NodeFlags::ZERO;
3750 break;
3751 }
3752 }
3753 }
3754 }
3755
3756 self.cache_empty.insert(
3757 initial_node,
3758 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3759 );
3760 Ok(isempty_flag)
3761 }
3762
3763 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3765 if larger_lang == smaller_lang {
3766 return Some(true);
3767 }
3768
3769 if self
3771 .nullability(larger_lang)
3772 .not()
3773 .and(self.nullability(smaller_lang))
3774 != Nullability::NEVER
3775 {
3776 return Some(false);
3777 }
3778
3779 let (smaller_lang, larger_lang) =
3784 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3785 let wrap = |b: &mut Self, n: NodeId| {
3786 let tmp = b.mk_concat(n, NodeId::TS);
3787 b.mk_concat(NodeId::TS, tmp)
3788 };
3789 (wrap(self, smaller_lang), wrap(self, larger_lang))
3790 } else {
3791 (smaller_lang, larger_lang)
3792 };
3793
3794 let nota = self.mk_compl(larger_lang);
3795 let diff = self.mk_inter(smaller_lang, nota);
3796 self.is_empty_lang(diff)
3797 }
3798}