1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::UnicodeClassCache;
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum AlgebraError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for AlgebraError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 AlgebraError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for AlgebraError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 & Self::NULL_MASK)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_lookaround(self) -> bool {
134 self.has(MetaFlags::CONTAINS_LOOKAROUND)
135 }
136 pub(crate) fn contains_inter(self) -> bool {
137 self.has(MetaFlags::CONTAINS_INTER)
138 }
139
140 pub(crate) fn all_contains_flags(self) -> MetaFlags {
141 self.and(
142 MetaFlags::CONTAINS_LOOKAROUND
143 .or(MetaFlags::CONTAINS_ANCHORS)
144 .or(MetaFlags::CONTAINS_INTER),
145 )
146 }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
150#[repr(u8)]
151pub enum Kind {
152 #[default]
153 Pred,
154 Star,
155 Begin,
156 End,
157 Concat,
158 Union,
159 Compl,
160 Lookbehind,
161 Lookahead,
162 Inter,
163 Counted,
164}
165
166#[derive(Eq, Hash, PartialEq, Clone)]
167struct Metadata {
168 flags: MetaFlags,
169 nulls: NullsId,
170}
171
172struct MetadataBuilder {
173 num_created: u32,
174 solver: Solver,
175 nb: NullsBuilder,
176 index: FxHashMap<Metadata, MetadataId>,
177 pub array: Vec<Metadata>,
178}
179
180mod helpers {
181 pub(crate) fn incr_rel(n1: u32) -> u32 {
182 match n1.overflowing_add(1) {
183 (_, true) => u32::MAX,
184 (res, false) => res,
185 }
186 }
187}
188
189impl MetadataBuilder {
190 fn new() -> MetadataBuilder {
191 Self {
192 index: FxHashMap::default(),
193 array: vec![Metadata {
194 flags: MetaFlags::ZERO,
195 nulls: NullsId::EMPTY,
196 }],
197 solver: Solver::new(),
198 num_created: 0,
199 nb: NullsBuilder::new(),
200 }
201 }
202
203 fn init(&mut self, inst: Metadata) -> MetadataId {
204 self.num_created += 1;
205 let new_id = MetadataId(self.num_created);
206 self.index.insert(inst.clone(), new_id);
207 self.array.push(inst);
208 new_id
209 }
210
211 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
212 match self.index.get(&inst) {
213 Some(&id) => id,
214 None => self.init(inst),
215 }
216 }
217
218 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
219 &self.array[inst.0 as usize]
220 }
221
222 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
223 setflags.all_contains_flags()
224 }
225
226 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
227 let left = &self.array[body.0 as usize].flags;
228 let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
229 let inf = if body_id == NodeId::BOT {
231 MetaFlags::ZERO
232 } else {
233 MetaFlags::INFINITE_LENGTH
234 };
235 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
236 }
237
238 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
239 let left = &self.array[left_id.0 as usize].flags;
240 let null = left.nullability().not().and(Nullability::ALWAYS);
241 let contains = self.get_contains(*left);
242 MetaFlags::with_nullability(
243 null,
244 contains
245 .or(MetaFlags::INFINITE_LENGTH)
246 .or(MetaFlags::CONTAINS_COMPL),
247 )
248 }
249
250 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
251 let left = &self.array[left_id.0 as usize].flags;
252 let right = &self.array[right_id.0 as usize].flags;
253 let null = left.nullability().and(right.nullability());
254 let contains = self.get_contains(left.or(*right));
255 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
256 MetaFlags::with_nullability(null, contains.or(len))
257 }
258
259 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260 let left = &self.array[left_id.0 as usize].flags;
261 let right = &self.array[right_id.0 as usize].flags;
262 let null = left.nullability().and(right.nullability());
263 let contains = self
264 .get_contains(left.or(*right))
265 .or(MetaFlags::CONTAINS_INTER);
266 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
267 MetaFlags::with_nullability(null, contains.or(len))
268 }
269
270 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
271 let left = &self.array[left_id.0 as usize].flags;
272 let right = &self.array[right_id.0 as usize].flags;
273 let null = left.nullability().or(right.nullability());
274 let contains = self.get_contains(left.or(*right));
275 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
276 MetaFlags::with_nullability(null, contains.or(len))
277 }
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
281pub struct NodeKey {
282 pub(crate) kind: Kind,
283 pub(crate) left: NodeId,
284 pub(crate) right: NodeId,
285 pub(crate) extra: u32,
286}
287
288#[derive(Eq, Hash, PartialEq, Clone, Debug)]
289pub enum TRegex<TSet> {
290 Leaf(NodeId),
291 ITE(TSet, TRegexId, TRegexId),
292}
293
294#[derive(Eq, Hash, PartialEq, Clone, Copy)]
295pub(crate) struct NodeFlags(u8);
296impl NodeFlags {
297 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
298 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
299 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
300
301 fn is_checked(self) -> bool {
302 self.0 >= NodeFlags::IS_CHECKED.0
303 }
304 fn is_empty(self) -> bool {
305 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
306 }
307}
308
309#[derive(Eq, Hash, PartialEq, Clone, Copy)]
310pub(crate) struct BuilderFlags(u8);
311impl BuilderFlags {
312 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
313 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
314}
315
316pub struct RegexBuilder {
317 mb: MetadataBuilder,
318 temp_vec: Vec<NodeId>,
319 num_created: u32,
320 index: FxHashMap<NodeKey, NodeId>,
321 array: Vec<NodeKey>,
322 metadata: Vec<MetadataId>,
323 reversed: Vec<NodeId>,
324 cache_empty: FxHashMap<NodeId, NodeFlags>,
325 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
326 tr_array: Vec<TRegex<TSetId>>,
327 tr_der_center: Vec<TRegexId>,
328 tr_der_begin: Vec<TRegexId>,
329 flags: BuilderFlags,
330 pub lookahead_context_max: u32,
332 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
333 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
334}
335
336macro_rules! iter_inter {
337 ($compiler:ident, $start:ident, $expression:expr) => {
338 let mut curr = $start;
339 while $compiler.get_kind(curr) == Kind::Inter {
340 let left = $compiler.get_left(curr);
341 $expression(left);
342 curr = $compiler.get_right(curr);
343 }
344 $expression(curr);
345 };
346}
347
348impl NodeId {
349 fn is_missing(&self) -> bool {
350 *self == NodeId::MISSING
351 }
352 #[inline]
353 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
354 b.get_flags_contains(self)
355 }
356 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
357 if self == tail {
358 true
359 } else if self.is_kind(b, Kind::Concat) {
360 self.right(b).has_concat_tail(b, tail)
361 } else {
362 false
363 }
364 }
365 fn missing_to_eps(&self) -> NodeId {
366 if *self == NodeId::MISSING {
367 NodeId::EPS
368 } else {
369 *self
370 }
371 }
372
373 #[inline]
374 fn kind(self, b: &RegexBuilder) -> Kind {
375 b.get_kind(self)
376 }
377 #[inline]
378 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
379 b.get_kind(self) == k
380 }
381 #[inline]
382 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
383 b.nullability(self) == Nullability::NEVER
384 }
385 #[inline]
386 fn nullability(self, b: &RegexBuilder) -> Nullability {
387 b.nullability(self)
388 }
389
390 #[inline]
391 fn is_center_nullable(self, b: &RegexBuilder) -> bool {
392 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
393 }
394 #[inline]
395 pub fn left(self, b: &RegexBuilder) -> NodeId {
396 b.get_left(self)
397 }
398
399 #[inline]
400 pub fn right(self, b: &RegexBuilder) -> NodeId {
401 b.get_right(self)
402 }
403
404 #[inline]
405 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
406 b.der(self, mask)
407 }
408
409 #[inline]
410 fn extra(self, b: &RegexBuilder) -> u32 {
411 b.get_extra(self)
412 }
413
414 #[inline]
415 fn is_pred(self, b: &RegexBuilder) -> bool {
416 b.get_kind(self) == Kind::Pred
417 }
418 #[inline]
419 fn is_lookahead(self, b: &RegexBuilder) -> bool {
420 b.get_kind(self) == Kind::Lookahead
421 }
422 #[inline]
423 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
424 debug_assert!(self.is_pred(b));
425 TSetId(b.get_extra(self))
426 }
427 #[inline]
428 fn is_star(self, b: &RegexBuilder) -> bool {
429 if NodeId::EPS == self {
430 return false;
431 }
432 b.get_kind(self) == Kind::Star
433 }
434
435 #[inline]
436 pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
437 b.get_kind(self) == Kind::Inter
438 }
439
440 #[inline]
441 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
442 if self.is_concat(b) {
443 let r = self.right(b);
444 return r.is_star(b) && r.left(b) == self.left(b);
445 }
446 false
447 }
448
449 #[inline]
450 fn is_concat(self, b: &RegexBuilder) -> bool {
451 b.get_kind(self) == Kind::Concat
452 }
453
454 #[inline]
455 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
456 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
457 Some(b.get_right(self))
458 } else {
459 None
460 }
461 }
462
463 #[inline]
464 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
465 if b.get_kind(self) == Kind::Concat {
466 let left = self.left(b);
467 let right = self.right(b);
468 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
469 return right == NodeId::END;
470 }
471 }
472 false
473 }
474
475 #[inline]
476 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
477 if NodeId::EPS == self {
478 return None;
479 }
480 if self.is_star(b) && self.left(b).is_pred(b) {
481 Some(self.left(b))
482 } else {
483 None
484 }
485 }
486
487 #[inline]
488 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
489 if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
490 let middle = self.right(b);
491 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
492 return Some(middle.left(b));
493 }
494 }
495 None
496 }
497
498 #[inline]
499 pub(crate) fn iter_union(
500 self,
501 b: &mut RegexBuilder,
502 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
503 ) {
504 let mut curr: NodeId = self;
505 while curr.kind(b) == Kind::Union {
506 f(b, curr.left(b));
507 curr = curr.right(b);
508 }
509 f(b, curr);
510 }
511
512 #[inline]
513 pub(crate) fn iter_union_while(
514 self,
515 b: &mut RegexBuilder,
516 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
517 ) {
518 let mut curr: NodeId = self;
519 let mut continue_loop = true;
520 while continue_loop && curr.kind(b) == Kind::Union {
521 continue_loop = f(b, curr.left(b));
522 curr = curr.right(b);
523 }
524 if continue_loop {
525 f(b, curr);
526 }
527 }
528}
529
530impl Default for RegexBuilder {
531 fn default() -> Self {
532 Self::new()
533 }
534}
535
536impl RegexBuilder {
537 pub fn new() -> RegexBuilder {
538 let mut inst = Self {
539 mb: MetadataBuilder::new(),
540 array: Vec::new(),
541 index: FxHashMap::default(),
542 cache_empty: FxHashMap::default(),
543 tr_array: Vec::new(),
544 tr_cache: FxHashMap::default(),
545 flags: BuilderFlags::ZERO,
546 lookahead_context_max: 800,
547 num_created: 0,
548 metadata: Vec::new(),
549 reversed: Vec::new(),
550 tr_der_center: Vec::new(),
551 tr_der_begin: Vec::new(),
552 temp_vec: Vec::new(),
553 mk_binary_memo: FxHashMap::default(),
554 clean_cache: FxHashMap::default(),
555 };
556 inst.array.push(NodeKey::default());
557 inst.mk_pred(TSetId::EMPTY);
558 inst.mk_star(NodeId::BOT);
559 inst.mk_pred(TSetId::FULL);
560 inst.mk_star(NodeId::TOP);
561 let top_plus_id = inst.mk_plus(NodeId::TOP);
562 inst.mk_unset(Kind::Begin);
563 inst.mk_unset(Kind::End);
564 debug_assert!(top_plus_id == NodeId::TOPPLUS);
565
566 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
567 inst.mk_leaf(NodeId::BOT);
568 inst.mk_leaf(NodeId::EPS);
569 inst.mk_leaf(NodeId::TOP);
570 inst.mk_leaf(NodeId::TS);
571
572 inst.flags = BuilderFlags::SUBSUME;
573 inst
574 }
575
576 #[inline]
577 pub fn solver_ref(&self) -> &Solver {
578 &self.mb.solver
579 }
580
581 #[inline]
582 pub fn solver(&mut self) -> &mut Solver {
583 &mut self.mb.solver
584 }
585
586 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
587 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
588 self.tr_cache.insert(inst.clone(), new_id);
589 self.tr_array.push(inst);
590 new_id
591 }
592
593 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
594 match self.tr_cache.get(&inst) {
595 Some(&id) => id,
596 None => self.tr_init(inst),
597 }
598 }
599
600 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
601 &self.tr_array[inst.0 as usize]
602 }
603
604 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
605 let solv = self.solver();
606 !solv.is_sat_id(cond1, cond2)
607 }
608
609 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
610 let term = TRegex::<TSetId>::Leaf(node_id);
611 self.get_tregex_id(term)
612 }
613
614 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
615 let tmp_inst = TRegex::ITE(cond, _then, _else);
616 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
617 return *cached;
618 }
619 if _then == _else {
620 return _then;
621 }
622 if self.solver().is_full_id(cond) {
623 return _then;
624 }
625 if self.solver().is_empty_id(cond) {
626 return _else;
627 }
628 let _clean_then = match self.tr_array[_then.0 as usize] {
629 TRegex::Leaf(_) => _then,
630 _ => self.clean(cond, _then),
631 };
632 let notcond = self.solver().not_id(cond);
633 let _clean_else = match self.tr_array[_else.0 as usize] {
634 TRegex::Leaf(_) => _else,
635 _ => self.clean(notcond, _else),
636 };
637
638 if _clean_then == _clean_else {
639 return _clean_then;
640 }
641 match self.get_tregex(_clean_then) {
643 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
644 let _cond_then = *leftcond;
645 let new_then = *_inner_then;
646 let sand = self.solver().and_id(cond, _cond_then);
647 return self.mk_ite(sand, new_then, _clean_else);
648 }
649 _ => {}
650 }
651 match self.get_tregex(_clean_else) {
653 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
655 let e2clone = *_e2;
656 let new_cond = self.mb.solver.or_id(cond, *_c2);
657 return self.mk_ite(new_cond, _clean_then, e2clone);
658 }
659 _ => {}
660 }
661
662 if _clean_then == TRegexId::BOT {
663 let flip_cond = self.solver().not_id(cond);
664 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
665 return cleaned_id;
666 }
667
668 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
669 }
670
671 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
672 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
673 return cached;
674 }
675 let result = match *self.get_tregex(tterm) {
676 TRegex::Leaf(_) => tterm,
677 TRegex::ITE(alpha, _then_id, _else_id) => {
678 let notalpha = self.mb.solver.not_id(alpha);
679 if self.mb.solver.unsat_id(beta, alpha) {
680 self.clean(beta, _else_id)
682 } else if self.unsat(beta, notalpha) {
683 self.clean(beta, _then_id)
685 } else {
686 let tc = self.mb.solver.and_id(beta, alpha);
687 let ec = self.mb.solver.and_id(beta, notalpha);
688 let new_then = self.clean(tc, _then_id);
689 let new_else = self.clean(ec, _else_id);
690 self.mk_ite(alpha, new_then, new_else)
691 }
692 }
693 };
694 self.clean_cache.insert((beta, tterm), result);
695 result
696 }
697
698 fn mk_unary(
699 &mut self,
700 term: TRegexId,
701 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
702 ) -> TRegexId {
703 match self.tr_array[term.0 as usize] {
704 TRegex::Leaf(node_id) => {
705 let applied = apply(self, node_id);
706 self.mk_leaf(applied)
707 }
708 TRegex::ITE(c1, _then, _else) => {
709 let _then1 = self.mk_unary(_then, apply);
710 let _else1 = self.mk_unary(_else, apply);
711 self.mk_ite(c1, _then1, _else1)
712 }
713 }
714 }
715
716 fn mk_binary_result(
717 &mut self,
718 left: TRegexId,
719 right: TRegexId,
720 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
721 ) -> Result<TRegexId, AlgebraError> {
722 match self.tr_array[left.0 as usize] {
723 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
724 TRegex::Leaf(right_node_id) => {
725 let applied = apply(self, left_node_id, right_node_id)?;
726 Ok(self.mk_leaf(applied))
727 }
728 TRegex::ITE(c2, _then, _else) => {
729 let then2 = self.mk_binary_result(left, _then, apply)?;
730 let else2 = self.mk_binary_result(left, _else, apply)?;
731 Ok(self.mk_ite(c2, then2, else2))
732 }
733 },
734 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
735 TRegex::Leaf(_) => {
736 let then2 = self.mk_binary_result(_then1, right, apply)?;
737 let else2 = self.mk_binary_result(_else1, right, apply)?;
738 Ok(self.mk_ite(c1, then2, else2))
739 }
740 TRegex::ITE(c2, _then2, _else2) => {
741 if c1 == c2 {
742 let _then = self.mk_binary_result(_then1, _then2, apply)?;
743 let _else = self.mk_binary_result(_else1, _else2, apply)?;
744 return Ok(self.mk_ite(c1, _then, _else));
745 }
746 if c1.0 > c2.0 {
747 let _then = self.mk_binary_result(_then1, right, apply)?;
748 let _else = self.mk_binary_result(_else1, right, apply)?;
749 Ok(self.mk_ite(c1, _then, _else))
750 } else {
751 let _then = self.mk_binary_result(left, _then2, apply)?;
752 let _else = self.mk_binary_result(left, _else2, apply)?;
753 Ok(self.mk_ite(c2, _then, _else))
754 }
755 }
756 },
757 }
758 }
759
760 fn mk_binary(
761 &mut self,
762 left: TRegexId,
763 right: TRegexId,
764 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
765 ) -> TRegexId {
766 self.mk_binary_memo.clear();
767 self.mk_binary_inner(left, right, apply)
768 }
769
770 fn mk_binary_inner(
771 &mut self,
772 left: TRegexId,
773 right: TRegexId,
774 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
775 ) -> TRegexId {
776 if left == right {
777 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
778 }
779 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
780 return cached;
781 }
782 let result = match self.tr_array[left.0 as usize] {
783 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
784 TRegex::Leaf(right_node_id) => {
785 let applied = apply(self, left_node_id, right_node_id);
786 self.mk_leaf(applied)
787 }
788 TRegex::ITE(c2, _then, _else) => {
789 let then2 = self.mk_binary_inner(left, _then, apply);
790 let else2 = self.mk_binary_inner(left, _else, apply);
791 self.mk_ite(c2, then2, else2)
792 }
793 },
794 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
795 TRegex::Leaf(_) => {
796 let then2 = self.mk_binary_inner(_then1, right, apply);
797 let else2 = self.mk_binary_inner(_else1, right, apply);
798 self.mk_ite(c1, then2, else2)
799 }
800 TRegex::ITE(c2, _then2, _else2) => {
801 if c1 == c2 {
802 let _then = self.mk_binary_inner(_then1, _then2, apply);
803 let _else = self.mk_binary_inner(_else1, _else2, apply);
804 self.mk_ite(c1, _then, _else)
805 } else if c1.0 > c2.0 {
806 let _then = self.mk_binary_inner(_then1, right, apply);
807 let _else = self.mk_binary_inner(_else1, right, apply);
808 self.mk_ite(c1, _then, _else)
809 } else {
810 let _then = self.mk_binary_inner(left, _then2, apply);
811 let _else = self.mk_binary_inner(left, _else2, apply);
812 self.mk_ite(c2, _then, _else)
813 }
814 }
815 },
816 };
817 self.mk_binary_memo.insert((left, right), result);
818 result
819 }
820
821 pub fn get_nulls(
822 &mut self,
823 pending_rel: u32,
824 mask: Nullability,
825 acc: &mut BTreeSet<NullState>,
826 node_id: NodeId,
827 ) {
828 debug_assert!(node_id != NodeId::MISSING);
829 if !self.is_nullable(node_id, mask) {
830 return;
831 }
832 match self.get_kind(node_id) {
833 Kind::Pred => {}
834 Kind::End => {
835 if mask.has(Nullability::END) {
836 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
837 }
838 }
839 Kind::Begin => {
840 if mask.has(Nullability::BEGIN) {
841 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
842 }
843 }
844 Kind::Concat => {
845 let new_mask = self.nullability(node_id).and(mask);
846 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
847 if self.is_nullable(node_id.left(self), mask) {
848 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
849 }
850 }
851 Kind::Union => {
852 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
853 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
854 }
855 Kind::Inter => {
856 let new_mask = self.nullability(node_id).and(mask);
857 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
858 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
859 }
860 Kind::Star => {
861 acc.insert(NullState::new(mask, pending_rel));
862 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
863 }
864 Kind::Compl => {
865 if !self.is_nullable(node_id.left(self), mask) {
866 acc.insert(NullState::new(mask, 0));
867 }
868 }
869 Kind::Lookbehind => {
870 let new_mask = self.nullability(node_id).and(mask);
871 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
872 if node_id.right(self) != NodeId::MISSING {
873 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
874 }
875 }
876 Kind::Lookahead => {
877 let la_inner = self.get_lookahead_inner(node_id);
878 if self.is_nullable(la_inner, mask) {
879 let rel = self.get_lookahead_rel(node_id);
880 if rel != u32::MAX {
881 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
882 }
883 let la_tail = self.get_lookahead_tail(node_id);
885 if la_tail != NodeId::MISSING {
886 self.get_nulls(pending_rel, mask, acc, la_tail);
887 }
888 }
889 }
890 Kind::Counted => {
891 let packed = self.get_extra(node_id);
892 let best = packed >> 16;
893 if best > 0 {
894 acc.insert(NullState::new(mask, pending_rel + best));
895 }
896 }
897 }
898 }
899
900 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
901 self.get_meta_flags(node_id)
902 .has(MetaFlags::CONTAINS_LOOKAROUND)
903 }
904
905 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
907 self.get_meta_flags(node_id)
908 .has(MetaFlags::CONTAINS_ANCHORS)
909 }
910
911 pub fn is_infinite(&self, node_id: NodeId) -> bool {
912 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
913 }
914
915 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
917 if self.is_infinite(node_id) {
918 if self.get_kind(node_id) == Kind::Inter {
919 self.get_bounded_length(node_id)
920 } else {
921 (self.get_min_length_only(node_id), u32::MAX)
922 }
923 } else {
924 self.get_bounded_length(node_id)
925 }
926 }
927
928 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
929 if node_id == NodeId::EPS {
930 return (0, 0);
931 }
932 match self.get_kind(node_id) {
933 Kind::End | Kind::Begin => (0, 0),
934 Kind::Pred => (1, 1),
935 Kind::Concat => {
936 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
937 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
938 (lmin + rmin, lmax.saturating_add(rmax))
939 }
940 Kind::Union => {
941 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
942 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
943 (lmin.min(rmin), lmax.max(rmax))
944 }
945 Kind::Inter => {
946 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
947 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
948 (lmin.max(rmin), lmax.min(rmax))
949 }
950 Kind::Lookahead => {
951 let body = node_id.left(self);
952 if self.is_infinite(body) {
953 return (0, u32::MAX);
954 }
955 let right = node_id.right(self);
956 if right.is_missing() {
957 (0, 0)
958 } else {
959 self.get_min_max_length(right)
960 }
961 }
962 Kind::Counted => (0, 0),
963 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
964 }
965 }
966
967 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
968 match self.get_kind(node_id) {
969 Kind::End | Kind::Begin => Some(0),
970 Kind::Pred => Some(1),
971 Kind::Concat => {
972 let l = self.get_fixed_length(node_id.left(self))?;
973 let r = self.get_fixed_length(node_id.right(self))?;
974 Some(l + r)
975 }
976 Kind::Union => {
977 let l = self.get_fixed_length(node_id.left(self))?;
978 let r = self.get_fixed_length(node_id.right(self))?;
979 if l == r {
980 Some(l)
981 } else {
982 None
983 }
984 }
985 Kind::Inter => {
986 let l = self.get_fixed_length(node_id.left(self))?;
987 let r = self.get_fixed_length(node_id.right(self))?;
988 if l == r {
989 Some(l)
990 } else {
991 None
992 }
993 }
994 Kind::Lookahead => {
995 let right = node_id.right(self);
996 if right.is_missing() {
997 Some(0)
998 } else {
999 self.get_fixed_length(right)
1000 }
1001 }
1002 Kind::Counted => Some(0),
1003 Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1004 }
1005 }
1006
1007 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1008 match self.get_kind(node_id) {
1009 Kind::End | Kind::Begin => 0,
1010 Kind::Pred => 1,
1011 Kind::Concat => {
1012 self.get_min_length_only(node_id.left(self))
1013 + self.get_min_length_only(node_id.right(self))
1014 }
1015 Kind::Union => self
1016 .get_min_length_only(node_id.left(self))
1017 .min(self.get_min_length_only(node_id.right(self))),
1018 Kind::Inter => self
1019 .get_min_length_only(node_id.left(self))
1020 .max(self.get_min_length_only(node_id.right(self))),
1021 Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1022 Kind::Compl => {
1023 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1024 0
1025 } else {
1026 1
1027 }
1028 }
1029 }
1030 }
1031
1032 fn starts_with_ts(&self, node_id: NodeId) -> bool {
1033 if node_id == NodeId::TS {
1034 return true;
1035 }
1036 match self.get_kind(node_id) {
1037 Kind::Inter => {
1038 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1039 }
1040 Kind::Union => {
1041 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1042 }
1043 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1044 _ => false,
1045 }
1046 }
1047
1048 #[inline]
1049 pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
1050 if self.get_kind(node_id) == Kind::Concat {
1051 self.ends_with_ts(node_id.right(self))
1052 } else {
1053 node_id == NodeId::TS
1054 }
1055 }
1056
1057 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1058 debug_assert!(node_id != NodeId::MISSING);
1059 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1060 }
1061
1062 pub(crate) fn cache_der(
1063 &mut self,
1064 node_id: NodeId,
1065 result: TRegexId,
1066 mask: Nullability,
1067 ) -> TRegexId {
1068 if mask == Nullability::CENTER {
1069 self.tr_der_center[node_id.0 as usize] = result
1070 } else {
1071 self.tr_der_begin[node_id.0 as usize] = result
1072 };
1073 result
1074 }
1075
1076 pub(crate) fn try_cached_der(
1077 &mut self,
1078 node_id: NodeId,
1079 mask: Nullability,
1080 ) -> Option<TRegexId> {
1081 let cache = if mask == Nullability::CENTER {
1082 &mut self.tr_der_center
1083 } else {
1084 &mut self.tr_der_begin
1085 };
1086 match cache.get(node_id.0 as usize) {
1087 Some(&TRegexId::MISSING) => {}
1088 Some(&result) => {
1089 return Some(result);
1090 }
1091 None => {
1092 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1093 }
1094 }
1095 None
1096 }
1097
1098 pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1099 let mut term = self.get_tregex(der);
1100 loop {
1101 match *term {
1102 TRegex::Leaf(node_id) => return node_id,
1103 TRegex::ITE(cond, _then, _else) => {
1104 if self.solver().is_sat_id(set, cond) {
1105 term = self.get_tregex(_then);
1106 } else {
1107 term = self.get_tregex(_else);
1108 }
1109 }
1110 }
1111 }
1112 }
1113
1114 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
1115 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1116 debug_assert!(
1117 node_id != NodeId::MISSING,
1118 "attempting to derive missing node"
1119 );
1120 if let Some(result) = self.try_cached_der(node_id, mask) {
1121 return Ok(result);
1122 }
1123
1124 let result = match node_id.kind(self) {
1125 Kind::Compl => {
1126 let leftd = node_id.left(self).der(self, mask)?;
1127 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1128 }
1129 Kind::Inter => {
1130 let leftd = node_id.left(self).der(self, mask)?;
1131 let rightd = node_id.right(self).der(self, mask)?;
1132 {
1133 let this = &mut *self;
1134 this.mk_binary(
1135 leftd,
1136 rightd,
1137 &mut (|b, left, right| b.mk_inter(left, right)),
1138 )
1139 }
1140 }
1141 Kind::Union => {
1142 let leftd = self.der(node_id.left(self), mask)?;
1143 let rightd = self.der(node_id.right(self), mask)?;
1144 {
1145 let this = &mut *self;
1146 this.mk_binary(
1147 leftd,
1148 rightd,
1149 &mut (|b, left, right| b.mk_union(left, right)),
1150 )
1151 }
1152 }
1153 Kind::Concat => {
1154 let head = node_id.left(self);
1155 let tail = node_id.right(self);
1156 let tail_leaf = self.mk_leaf(tail);
1157 let head_der = self.der(head, mask)?;
1158
1159 if self.is_nullable(head, mask) {
1160 let rightd = self.der(tail, mask)?;
1161 let ldr = {
1162 let this = &mut *self;
1163 this.mk_binary(
1164 head_der,
1165 tail_leaf,
1166 &mut (|b, left, right| b.mk_concat(left, right)),
1167 )
1168 };
1169 {
1170 let this = &mut *self;
1171 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1172 }
1173 } else {
1174 let this = &mut *self;
1175 this.mk_binary(
1176 head_der,
1177 tail_leaf,
1178 &mut (|b, left, right| b.mk_concat(left, right)),
1179 )
1180 }
1181 }
1182 Kind::Star => {
1183 if node_id == NodeId::EPS {
1184 TRegexId::BOT
1185 } else {
1186 let left = node_id.left(self);
1187 let r_decr_leaf = self.mk_leaf(node_id);
1188 let r_der = self.der(left, mask)?;
1189 let this = &mut *self;
1190 this.mk_binary(
1191 r_der,
1192 r_decr_leaf,
1193 &mut (|b, left, right| b.mk_concat(left, right)),
1194 )
1195 }
1196 }
1197 Kind::Lookbehind => {
1198 let lb_prev_der = {
1199 let lb_prev = self.get_lookbehind_prev(node_id);
1200 if lb_prev == NodeId::MISSING {
1201 TRegexId::MISSING
1202 } else {
1203 self.der(lb_prev, mask)?
1204 }
1205 };
1206 let lb_inner = self.get_lookbehind_inner(node_id);
1207 let lb_inner_der = self.der(lb_inner, mask)?;
1208 {
1209 let this = &mut *self;
1210 this.mk_binary_result(
1211 lb_inner_der,
1212 lb_prev_der,
1213 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1214 )?
1215 }
1216 }
1217 Kind::Lookahead => {
1218 let la_tail = self.get_lookahead_tail(node_id);
1219 let la_body = node_id.left(self);
1220 let rel = self.get_lookahead_rel(node_id);
1221
1222 if self.is_nullable(la_body, mask) {
1223 let right = node_id.right(self).missing_to_eps();
1225 let rder = self.der(right, mask).clone();
1226 return rder;
1227 }
1228
1229 if rel == u32::MAX {
1230 let la_body_der = self.der(la_body, mask)?;
1231 if la_tail.is_kind(self, Kind::Pred) {
1232 let transitioned =
1233 self.transition_term(la_body_der, la_tail.pred_tset(self));
1234 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1235 let concated = self.mk_concat(la_tail, new_la);
1236 return self.der(concated, mask);
1237 }
1238 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1239 let left = la_tail.left(self);
1240 let tset = left.pred_tset(self);
1241 let transitioned = self.transition_term(la_body_der, tset);
1242 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1243 let tail_right = la_tail.right(self);
1244 let concated = self.mk_concat(new_la, tail_right);
1245 let concated = self.mk_concat(left, concated);
1246 return self.der(concated, mask);
1247 }
1248 }
1249
1250 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1251 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1252 let concated = self.mk_concat(la_body, nulls_mask);
1253 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1254 let non_nulled = self.mk_non_nullable_safe(la_tail);
1255 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1256 let new_union = self.mk_union(concated_look, new_look);
1257 return self.der(new_union, mask);
1258 }
1259
1260 let la_tail_der = if la_tail == NodeId::MISSING {
1261 TRegexId::MISSING
1262 } else {
1263 if self.is_nullable(la_tail, mask) {
1264 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1265 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1266 let la_union = self.mk_union(la_tail, nulls_la);
1267 self.der(la_union, mask)?
1268 } else {
1269 self.der(la_tail, mask)?
1270 }
1271 };
1272
1273 let la_body_der = self.der(la_body, mask)?;
1274
1275 if rel != u32::MAX && rel > self.lookahead_context_max {
1276 return Err(AlgebraError::AnchorLimit);
1277 }
1278
1279 let la = {
1280 let this = &mut *self;
1281 let rel = helpers::incr_rel(rel);
1282 this.mk_binary(
1283 la_body_der,
1284 la_tail_der,
1285 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1286 )
1287 };
1288
1289 if rel != u32::MAX
1290 && la_tail_der != TRegexId::MISSING
1291 && self.is_nullable(la_tail, mask)
1292 {
1293 let look_only = {
1294 let this = &mut *self;
1295 let right = TRegexId::MISSING;
1296 let rel = helpers::incr_rel(rel);
1297 this.mk_binary(
1298 la_body_der,
1299 right,
1300 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1301 )
1302 };
1303 {
1304 let this = &mut *self;
1305 this.mk_binary(
1306 look_only,
1307 la,
1308 &mut (|b, left, right| b.mk_union(left, right)),
1309 )
1310 }
1311 } else {
1312 la
1313 }
1314 }
1315 Kind::Counted => {
1316 let body = node_id.left(self);
1317 let chain = node_id.right(self);
1318 let packed = self.get_extra(node_id);
1319 let step = (packed & 0xFFFF) as u16;
1320 let best = (packed >> 16) as u16;
1321
1322 let mid_best = if self.is_nullable(body, mask) && step >= best {
1323 step
1324 } else {
1325 best
1326 };
1327
1328 let body_der = self.der(body, mask)?;
1329 let new_step = step.saturating_add(1);
1330 self.mk_unary(body_der, &mut |b, new_body| {
1331 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1332 new_step
1333 } else {
1334 mid_best
1335 };
1336 let packed = (final_best as u32) << 16 | new_step as u32;
1337 b.mk_counted(new_body, chain, packed)
1338 })
1339 }
1340 Kind::Begin | Kind::End => TRegexId::BOT,
1341 Kind::Pred => {
1342 let psi = node_id.pred_tset(self);
1343 if psi == TSetId::EMPTY {
1344 TRegexId::BOT
1345 } else {
1346 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1347 }
1348 }
1349 };
1350
1351 self.cache_der(node_id, result, mask);
1355 Ok(result)
1356 }
1357
1358 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1359 debug_assert!(meta_id != MetadataId::MISSING);
1360 match self.metadata.get_mut(node_id.0 as usize) {
1361 Some(v) => *v = meta_id,
1362 None => {
1363 self.metadata
1364 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1365 self.metadata[node_id.0 as usize] = meta_id;
1366 }
1367 }
1368 }
1369
1370 fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1371 debug_assert!(reversed_id != NodeId::MISSING);
1372 match self.reversed.get_mut(node_id.0 as usize) {
1373 Some(v) => *v = reversed_id,
1374 None => {
1375 self.reversed
1376 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1377 self.reversed[node_id.0 as usize] = reversed_id;
1378 }
1379 }
1380 }
1381
1382 fn init(&mut self, inst: NodeKey) -> NodeId {
1383 self.num_created += 1;
1384 let node_id = NodeId(self.num_created);
1385 self.index.insert(inst.clone(), node_id);
1386 match inst.kind {
1387 Kind::Pred => {
1388 let meta_id = self.mb.get_meta_id(Metadata {
1389 flags: MetaFlags::ZERO,
1390 nulls: NullsId::EMPTY,
1391 });
1392 self.init_metadata(node_id, meta_id);
1393 }
1394 Kind::Begin => {
1395 let meta_id = self.mb.get_meta_id(Metadata {
1396 flags: MetaFlags::with_nullability(
1397 Nullability::BEGIN,
1398 MetaFlags::CONTAINS_ANCHORS,
1399 ),
1400 nulls: NullsId::BEGIN0,
1401 });
1402 self.init_metadata(node_id, meta_id);
1403 }
1404 Kind::End => {
1405 let meta_id = self.mb.get_meta_id(Metadata {
1406 flags: MetaFlags::with_nullability(
1407 Nullability::END,
1408 MetaFlags::CONTAINS_ANCHORS,
1409 ),
1410 nulls: NullsId::END0,
1411 });
1412 self.init_metadata(node_id, meta_id);
1413 }
1414 Kind::Inter => {
1415 let m1 = self.get_node_meta_id(inst.left);
1416 let m2 = self.get_node_meta_id(inst.right);
1417 let meta_id = {
1418 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1419 let mask_l = inst.left.nullability(self);
1420 let mask_r = inst.right.nullability(self);
1421 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1422 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1423 nulls = self.mb.nb.and_mask(nulls, mask_l);
1424 nulls = self.mb.nb.and_mask(nulls, mask_r);
1425 let new_meta = Metadata {
1426 flags: self.mb.flags_inter(m1, m2),
1427 nulls,
1428 };
1429 self.mb.get_meta_id(new_meta)
1430 };
1431 self.init_metadata(node_id, meta_id);
1432 }
1433 Kind::Union => {
1434 let m1 = self.get_node_meta_id(inst.left);
1435 let m2 = self.get_node_meta_id(inst.right);
1436 let meta_id = {
1437 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1438 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1439 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1440 let new_meta = Metadata {
1441 flags: self.mb.flags_union(m1, m2),
1442 nulls,
1443 };
1444 self.mb.get_meta_id(new_meta)
1445 };
1446 self.init_metadata(node_id, meta_id);
1447 }
1448 Kind::Concat => {
1449 let flags = self.mb.flags_concat(
1450 self.get_node_meta_id(inst.left),
1451 self.get_node_meta_id(inst.right),
1452 );
1453
1454 let right_nullability = inst.right.nullability(self);
1455 let left_nullability = inst.left.nullability(self);
1456 let nulls_left = self.get_nulls_id(inst.left);
1457 let nulls_right = self.get_nulls_id(inst.right);
1458 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1459 let mask = right_nullability.and(left_nullability);
1460 nulls = self.mb.nb.and_mask(nulls, mask);
1461
1462 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1463 self.init_metadata(node_id, new_id);
1464 }
1465 Kind::Star => {
1466 let left_nulls = self.get_nulls_id(inst.left);
1467 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1468 let meta_id = self.mb.get_meta_id(Metadata {
1469 flags: self
1470 .mb
1471 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1472 nulls,
1473 });
1474 self.init_metadata(node_id, meta_id);
1475 }
1476 Kind::Compl => {
1477 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1478 let meta_id = self.mb.get_meta_id(Metadata {
1479 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1480 nulls,
1481 });
1482 self.init_metadata(node_id, meta_id);
1483 }
1484 Kind::Lookbehind => {
1485 let mut null = self.get_meta_flags(inst.left).nullability();
1486 let mut contains_flags = self.get_flags_contains(inst.left);
1487 if !inst.right.is_missing() {
1488 null = null.and(self.get_meta_flags(inst.right).nullability());
1489 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1490 }
1491
1492 let left_nulls = self.get_nulls_id(inst.left);
1493 let right_nulls = self.get_nulls_id(inst.right);
1494 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1495 let meta_id = self.mb.get_meta_id(Metadata {
1496 flags: MetaFlags::with_nullability(
1497 null,
1498 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1499 ),
1500 nulls,
1501 });
1502 self.init_metadata(node_id, meta_id);
1503 }
1504 Kind::Lookahead => {
1505 let mut nulls = self.get_nulls_id(inst.left);
1506 let left_nullability = inst.left.nullability(self);
1507 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1508 nulls = self.mb.nb.or_id(nulls, nulls_right);
1509 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1510
1511 let la_inner = inst.left;
1512 let la_tail = inst.right;
1513 let null = self
1514 .get_meta_flags(la_inner)
1515 .nullability()
1516 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1517 let contains_flags = self
1518 .get_flags_contains(la_inner)
1519 .or(self.get_flags_contains(la_tail));
1520
1521 let meta_id = self.mb.get_meta_id(Metadata {
1522 flags: MetaFlags::with_nullability(
1523 null,
1524 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1525 ),
1526 nulls,
1527 });
1528 self.init_metadata(node_id, meta_id);
1529 }
1530 Kind::Counted => {
1531 let best = inst.extra >> 16;
1532 let (null, nulls) = if best > 0 {
1533 let mut ns = BTreeSet::new();
1534 ns.insert(NullState::new(Nullability::CENTER, best));
1535 (Nullability::CENTER, self.mb.nb.get_id(ns))
1536 } else {
1537 (Nullability::NEVER, NullsId::EMPTY)
1538 };
1539 let meta_id = self.mb.get_meta_id(Metadata {
1540 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1541 nulls,
1542 });
1543 self.init_metadata(node_id, meta_id);
1544 }
1545 }
1546
1547 self.array.push(inst);
1548
1549 if let Some(rw) = self.post_init_simplify(node_id) {
1550 self.override_as(node_id, rw)
1551 } else {
1552 node_id
1553 }
1554 }
1555
1556 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1557 match self.get_kind(node_id) {
1558 Kind::Concat => {
1559 let lhs = node_id.left(self);
1560 let rhs = node_id.right(self);
1561 if lhs.is_pred_star(self).is_some() {
1562 if let Some(opttail) = rhs.is_opt_v(self) {
1563 if let Some(true) = self.subsumes(lhs, opttail) {
1564 return Some(lhs);
1566 }
1567 }
1568 }
1569 }
1570 Kind::Union => {
1571 let lhs = node_id.left(self);
1572 let rhs = node_id.right(self);
1573 let mut subsumed = false;
1574 rhs.iter_union_while(self, &mut |b, branch| {
1575 if b.nullable_subsumes(branch, lhs) {
1576 subsumed = true;
1577 }
1578 !subsumed
1579 });
1580 if subsumed {
1581 return Some(rhs);
1582 }
1583 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1584 return Some(rhs);
1585 }
1586 }
1587 _ => {}
1588 }
1589
1590 None
1591 }
1592
1593 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1595 if self.get_kind(lhs) != Kind::Union {
1596 return false; }
1598 let mut rhs_branches = Vec::new();
1599 let mut curr = rhs;
1600 while self.get_kind(curr) == Kind::Union {
1601 rhs_branches.push(self.get_left(curr));
1602 curr = self.get_right(curr);
1603 }
1604 rhs_branches.push(curr);
1605
1606 curr = lhs;
1607 while self.get_kind(curr) == Kind::Union {
1608 if !rhs_branches.contains(&self.get_left(curr)) {
1609 return false;
1610 }
1611 curr = self.get_right(curr);
1612 }
1613 rhs_branches.contains(&curr)
1614 }
1615
1616 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1618 if node == target {
1619 return true;
1620 }
1621 match self.get_kind(node) {
1622 Kind::Union => {
1623 self.nullable_subsumes(self.get_left(node), target)
1624 || self.nullable_subsumes(self.get_right(node), target)
1625 }
1626 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1627 self.nullable_subsumes(self.get_right(node), target)
1628 }
1629 _ => false,
1630 }
1631 }
1632
1633 pub fn num_nodes(&self) -> u32 {
1634 self.num_created
1635 }
1636 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1637 match self.index.get(&inst) {
1638 Some(&id) => id,
1639 None => self.init(inst),
1640 }
1641 }
1642 #[inline]
1643 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1644 self.index.get(inst)
1645 }
1646
1647 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1648 self.index.insert(key, subsumed);
1649 subsumed
1650 }
1651
1652 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1653 let key = &self.array[key.0 as usize];
1654 let entry = self.index.get_mut(key).unwrap();
1655 *entry = subsumed;
1656 subsumed
1657 }
1658
1659 #[inline]
1660 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1661 self.array[node_id.0 as usize].left
1662 }
1663
1664 #[inline]
1665 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1666 self.array[node_id.0 as usize].right
1667 }
1668
1669 #[inline]
1670 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1671 self.array[node_id.0 as usize].extra
1672 }
1673
1674 #[inline]
1675 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1676 debug_assert!(self.get_kind(node_id) == Kind::Concat);
1677 let mut curr = node_id;
1678 while self.get_kind(curr) == Kind::Concat {
1679 curr = curr.right(self);
1680 }
1681 curr
1682 }
1683
1684 fn has_trailing_la(&self, node: NodeId) -> bool {
1685 let end = match self.get_kind(node) {
1686 Kind::Concat => self.get_concat_end(node),
1687 Kind::Lookahead => node,
1688 _ => return false,
1689 };
1690 self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1691 }
1692
1693 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1694 if self.get_kind(node) == Kind::Lookahead {
1695 return (NodeId::EPS, node);
1696 }
1697 debug_assert!(self.get_kind(node) == Kind::Concat);
1698 let right = node.right(self);
1699 if self.get_kind(right) != Kind::Concat {
1700 return (node.left(self), right);
1701 }
1702 let (stripped, la) = self.strip_trailing_la(right);
1703 (self.mk_concat(node.left(self), stripped), la)
1704 }
1705 #[inline]
1706 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1707 debug_assert!(matches!(
1708 self.get_kind(lookahead_node_id),
1709 Kind::Lookahead | Kind::Counted
1710 ));
1711 lookahead_node_id.left(self)
1712 }
1713 #[inline]
1714 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1715 debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1716 lookahead_node_id.right(self)
1717 }
1718 #[inline]
1719 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1720 debug_assert!(
1721 matches!(
1722 self.get_kind(lookahead_node_id),
1723 Kind::Lookahead | Kind::Counted
1724 ),
1725 "not lookahead/counted: {:?}",
1726 self.pp(lookahead_node_id)
1727 );
1728 self.get_extra(lookahead_node_id)
1729 }
1730 #[inline]
1731 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1732 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1733 lookbehind_node_id.left(self)
1734 }
1735 #[inline]
1736 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1737 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1738 lookbehind_node_id.right(self)
1739 }
1740
1741 #[inline]
1742 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1743 debug_assert!(
1744 self.array.len() > node_id.0 as usize,
1745 "array len: {:?}",
1746 node_id
1747 );
1748 self.array[node_id.0 as usize].kind
1749 }
1750
1751 #[inline]
1752 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1753 &self.array[node_id.0 as usize]
1754 }
1755
1756 #[inline]
1757 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1758 self.metadata[node_id.0 as usize]
1759 }
1760
1761 #[inline]
1762 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1763 debug_assert!(node_id.0 != u32::MAX);
1764 let meta_id = self.get_node_meta_id(node_id);
1765 debug_assert!(meta_id != MetadataId::MISSING);
1766 &self.mb.array[meta_id.0 as usize]
1767 }
1768
1769 #[inline]
1770 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1771 if node_id == NodeId::MISSING {
1772 NullsId::EMPTY
1773 } else {
1774 self.get_meta(node_id).nulls
1775 }
1776 }
1777
1778 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1779 self.mb
1780 .nb
1781 .array
1782 .iter()
1783 .map(|set| set.iter().cloned().collect())
1784 .collect()
1785 }
1786
1787 pub fn nulls_count(&self) -> usize {
1788 self.mb.nb.array.len()
1789 }
1790
1791 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1792 self.mb.nb.array[id as usize].iter().cloned().collect()
1793 }
1794
1795 #[inline]
1796 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1797 if node_id == NodeId::MISSING {
1798 NullsId::EMPTY
1799 } else {
1800 let nulls = self.get_meta(node_id).nulls;
1801 self.mb.nb.and_mask(nulls, mask)
1802 }
1803 }
1804
1805 #[inline]
1806 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1807 let meta_id = self.get_node_meta_id(node_id);
1808 let meta = &self.mb.array[meta_id.0 as usize];
1809 meta.flags
1810 }
1811
1812 #[inline]
1813 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1814 self.get_meta(node_id).flags.nullability()
1815 }
1816
1817 #[inline]
1818 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1819 let meta_id = self.get_node_meta_id(node_id);
1820 let meta = &self.mb.array[meta_id.0 as usize];
1821 meta.flags.all_contains_flags()
1822 }
1823
1824 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1825 if self.get_kind(node_id) == Kind::Concat && node_id.left(self) == NodeId::BEGIN {
1826 return self.strip_lb(node_id.right(self));
1827 }
1828 self.strip_lb_inner(node_id)
1829 }
1830
1831 fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1832 if !self.contains_look(node_id) {
1833 return Ok(node_id);
1834 }
1835 if self.get_kind(node_id) == Kind::Concat
1836 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1837 {
1838 let lb = node_id.left(self);
1839 let prev = self.get_lookbehind_prev(lb);
1840 let tail = self.strip_lb_inner(node_id.right(self))?;
1841 if prev != NodeId::MISSING {
1842 let stripped_prev = self.strip_lb_inner(prev)?;
1843 return Ok(self.mk_concat(stripped_prev, tail));
1844 }
1845 return Ok(tail);
1846 }
1847 if self.get_kind(node_id) == Kind::Inter {
1848 let left = self.strip_lb_inner(node_id.left(self))?;
1849 let right = self.strip_lb_inner(node_id.right(self))?;
1850 return Ok(self.mk_inter(left, right));
1851 }
1852 if self.get_kind(node_id) == Kind::Union {
1853 let left = self.strip_lb_inner(node_id.left(self))?;
1854 let right = self.strip_lb_inner(node_id.right(self))?;
1855 return Ok(self.mk_union(left, right));
1856 }
1857 match self.get_kind(node_id) {
1858 Kind::Lookbehind => {
1859 let prev = self.get_lookbehind_prev(node_id);
1860 if prev != NodeId::MISSING {
1861 self.strip_lb_inner(prev)
1862 } else {
1863 Ok(NodeId::EPS)
1864 }
1865 }
1866 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1867 Err(AlgebraError::UnsupportedPattern)
1868 }
1869 _ => Ok(node_id),
1870 }
1871 }
1872
1873 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
1875 if !self.contains_anchors(node_id) {
1876 return node_id;
1877 }
1878 match self.get_kind(node_id) {
1879 Kind::Begin => NodeId::BOT,
1880 Kind::Concat => {
1881 let left = self.nonbegins(node_id.left(self));
1882 if left == NodeId::BOT {
1883 return NodeId::BOT;
1884 }
1885 self.mk_concat(left, node_id.right(self))
1886 }
1887 Kind::Union => {
1888 let left = self.nonbegins(node_id.left(self));
1889 let right = self.nonbegins(node_id.right(self));
1890 self.mk_union(left, right)
1891 }
1892 _ => node_id,
1893 }
1894 }
1895
1896 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
1897 match self.get_kind(node_id) {
1898 Kind::Concat => {
1899 let head = node_id.left(self);
1900 match self.get_kind(head) {
1901 _ if self.any_nonbegin_nullable(head) => {
1902 self.strip_prefix_safe(node_id.right(self))
1903 }
1904 _ => node_id,
1905 }
1906 }
1907 _ => node_id,
1908 }
1909 }
1910 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
1911 match self.get_kind(node_id) {
1912 Kind::Begin => NodeId::BOT,
1913 Kind::Concat => {
1914 let head = self.prune_begin(node_id.left(self));
1915 let tail = self.prune_begin(node_id.right(self));
1916 self.mk_concat(head, tail)
1917 }
1918 Kind::Lookbehind => {
1919 if !node_id.right(self).is_missing() {
1920 return node_id;
1921 }
1922 let head = self.prune_begin(node_id.left(self));
1923 head
1924 }
1925 Kind::Union => {
1926 let left = self.prune_begin(node_id.left(self));
1927 let right = self.prune_begin(node_id.right(self));
1928 self.mk_union(left, right)
1929 }
1930 _ => node_id,
1931 }
1932 }
1933
1934 pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1935 if !self.contains_look(node_id) {
1936 return Ok(node_id);
1937 }
1938 if self.get_kind(node_id) == Kind::Concat
1939 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1940 {
1941 let left = node_id.left(self);
1942 let ll = left.left(self).missing_to_eps();
1943 let lr = left.right(self).missing_to_eps();
1944 let new_l = self.mk_concat(ll, lr);
1945 let new = self.mk_concat(new_l, node_id.right(self));
1946 return Ok(new);
1947 }
1948 if self.get_kind(node_id) == Kind::Inter {
1949 let left = self.normalize_rev(node_id.left(self))?;
1950 let right = self.normalize_rev(node_id.right(self))?;
1951 return Ok(self.mk_inter(left, right));
1952 }
1953 if self.get_kind(node_id) == Kind::Union {
1954 let left = self.normalize_rev(node_id.left(self))?;
1955 let right = self.normalize_rev(node_id.right(self))?;
1956 return Ok(self.mk_union(left, right));
1957 }
1958 match self.get_kind(node_id) {
1959 Kind::Lookbehind => Err(AlgebraError::UnsupportedPattern),
1960 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
1961 Err(AlgebraError::UnsupportedPattern)
1962 }
1963 _ => Ok(node_id),
1964 }
1965 }
1966
1967 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1968 debug_assert!(node_id != NodeId::MISSING);
1969 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1970 if *rev != NodeId::MISSING {
1971 return Ok(*rev);
1972 }
1973 }
1974 let rw = match self.get_kind(node_id) {
1975 Kind::End => NodeId::BEGIN,
1976 Kind::Begin => NodeId::END,
1977 Kind::Pred => node_id,
1978 Kind::Concat => {
1979 let left = self.reverse(node_id.left(self))?;
1980 let right = self.reverse(node_id.right(self))?;
1981 self.mk_concat(right, left)
1982 }
1983 Kind::Union => {
1984 let left = self.reverse(node_id.left(self))?;
1985 let right = self.reverse(node_id.right(self))?;
1986 self.mk_union(left, right)
1987 }
1988 Kind::Inter => {
1989 let left = self.reverse(node_id.left(self))?;
1990 let right = self.reverse(node_id.right(self))?;
1991 self.mk_inter(left, right)
1992 }
1993 Kind::Star => {
1994 let body = self.reverse(node_id.left(self))?;
1995 self.mk_star(body)
1996 }
1997 Kind::Compl => {
1998 if self.contains_look(node_id.left(self)) {
1999 return Err(AlgebraError::UnsupportedPattern);
2000 }
2001 let body = self.reverse(node_id.left(self))?;
2002 self.mk_compl(body)
2003 }
2004 Kind::Lookbehind => {
2005 let prev = self.get_lookbehind_prev(node_id);
2006 let inner_id = self.get_lookbehind_inner(node_id);
2007 let rev_inner = self.reverse(inner_id)?;
2008 let rev_prev = if prev != NodeId::MISSING {
2009 self.reverse(prev)?
2010 } else {
2011 NodeId::MISSING
2012 };
2013 self.mk_lookahead(rev_inner, rev_prev, 0)
2014 }
2015 Kind::Lookahead => {
2016 let rel = self.get_lookahead_rel(node_id);
2017 if rel == u32::MAX {
2018 let lbody = self.get_lookahead_inner(node_id);
2020 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2021 let lbody_ts = self.mk_concat(lbody, NodeId::TS);
2022 let ltail_ts = self.mk_concat(ltail, NodeId::TS);
2023 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2024 let rev = self.reverse(as_inter)?;
2025 self.init_reversed(node_id, rev);
2026 return Ok(rev);
2027 }
2028 if rel != 0 {
2029 return Err(AlgebraError::UnsupportedPattern);
2030 }
2031 let tail_node = self.get_lookahead_tail(node_id);
2032 let rev_tail = if tail_node != NodeId::MISSING {
2033 self.reverse(tail_node)?
2034 } else {
2035 NodeId::MISSING
2036 };
2037 let inner_id = self.get_lookahead_inner(node_id);
2038 let rev_inner = self.reverse(inner_id)?;
2039 self.mk_lookbehind(rev_inner, rev_tail)
2040 }
2041 Kind::Counted => {
2042 return Err(AlgebraError::UnsupportedPattern);
2043 }
2044 };
2045 self.init_reversed(node_id, rw);
2046 Ok(rw)
2047 }
2048
2049 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2050 let node = NodeKey {
2051 kind: Kind::Pred,
2052 left: NodeId::MISSING,
2053 right: NodeId::MISSING,
2054 extra: pred.0,
2055 };
2056 self.get_node_id(node)
2057 }
2058
2059 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2060 let key = NodeKey {
2061 kind: Kind::Compl,
2062 left: body,
2063 right: NodeId::MISSING,
2064 extra: u32::MAX,
2065 };
2066 if let Some(id) = self.key_is_created(&key) {
2067 return *id;
2068 }
2069 if body == NodeId::BOT {
2070 return NodeId::TS;
2071 }
2072 if body == NodeId::TS {
2073 return NodeId::BOT;
2074 }
2075
2076 if let Some(contains_body) = body.is_contains(self) {
2077 if contains_body.is_pred(self) {
2078 let pred = contains_body.pred_tset(self);
2079 let notpred = self.mk_pred_not(pred);
2080 let node = self.mk_star(notpred);
2081 return self.init_as(key, node);
2082 } else if contains_body == NodeId::END {
2083 return self.init_as(key, NodeId::BOT);
2084 }
2085 };
2086
2087 if self.get_kind(body) == Kind::Compl {
2088 return self.get_node(body).left;
2089 }
2090
2091 self.get_node_id(key)
2092 }
2093
2094 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2095 let nid = self.get_nulls_id(body);
2096 let nref = self.mb.nb.get_set_ref(nid).clone();
2097 let mut futures = NodeId::BOT;
2098 for n in nref.iter() {
2099 if !n.is_mask_nullable(mask) {
2100 continue;
2101 }
2102
2103 let eff = if n.rel == 0 {
2104 NodeId::EPS
2105 } else {
2106 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2107 };
2108 futures = self.mk_union(futures, eff)
2109 }
2110 futures
2111 }
2112
2113 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2114 if cfg!(feature = "norewrite") {
2115 return None;
2116 }
2117
2118 if self.get_kind(tail) == Kind::Lookbehind {
2119 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2120 return self
2121 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2122 .ok();
2123 }
2124 if self.get_kind(head) == Kind::Lookahead {
2125 let la_tail = self.get_lookahead_tail(head);
2126 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2127 if new_la_tail.is_center_nullable(self) {
2128 let non_null_tail = self.mk_non_nullable_safe(new_la_tail);
2129 if non_null_tail == NodeId::BOT {
2130 return None;
2131 }
2132 let la_body = self.get_lookahead_inner(head);
2133 return Some(self.mk_lookahead_internal(la_body, non_null_tail, u32::MAX));
2134 }
2135 let la_body = self.get_lookahead_inner(head);
2136 let la_rel = self.get_lookahead_rel(head);
2137 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2138 let tail_rel = self.get_lookahead_rel(new_la_tail);
2139 tail_rel + la_rel
2140 } else {
2141 u32::MAX
2142 };
2143
2144 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2145 }
2146
2147 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2148 return Some(head);
2149 }
2150 if head == NodeId::TS && tail == NodeId::END {
2151 return Some(head);
2152 }
2153
2154 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2155 return Some(NodeId::TS);
2156 }
2157
2158 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2159 return Some(NodeId::TS);
2160 }
2161
2162 if self.get_kind(tail) == Kind::Union && head == NodeId::TS {
2163 let mut should_distribute_top = false;
2164 self.iter_unions(tail, |v| {
2165 if v == NodeId::BEGIN || self.starts_with_ts(v) {
2166 should_distribute_top = true;
2167 }
2168 });
2169 if should_distribute_top {
2170 let mut new_union = NodeId::BOT;
2171 let mut curr = tail;
2172 while self.get_kind(curr) == Kind::Union {
2173 let new_node = self.mk_concat(NodeId::TS, curr.left(self));
2174 new_union = self.mk_union(new_node, new_union);
2175 curr = curr.right(self);
2176 }
2177 let new_node = self.mk_concat(NodeId::TS, curr);
2178 new_union = self.mk_union(new_union, new_node);
2179 return Some(new_union);
2180 }
2181 }
2182
2183 if self.get_kind(head) == Kind::Union && head == NodeId::TS {
2184 let mut should_distribute_top = false;
2185 self.iter_unions(head, |v| {
2186 if v == NodeId::END || self.starts_with_ts(v) {
2187 should_distribute_top = true;
2188 }
2189 });
2190 if should_distribute_top {
2191 let mut new_union = NodeId::BOT;
2192 let mut curr = head;
2193 while self.get_kind(curr) == Kind::Union {
2194 let new_node = self.mk_concat(curr.left(self), NodeId::TS);
2195 new_union = self.mk_union(new_node, new_union);
2196 curr = curr.right(self);
2197 }
2198 let new_node = self.mk_concat(curr, NodeId::TS);
2199 new_union = self.mk_union(new_union, new_node);
2200 return Some(new_union);
2201 }
2202 }
2203
2204 if self.get_kind(head) == Kind::Inter && tail == NodeId::TS {
2205 let mut alltopstar = true;
2206 iter_inter!(self, head, |v| {
2207 alltopstar = self.ends_with_ts(v);
2208 });
2209 if alltopstar {
2210 return Some(head);
2211 }
2212 }
2213
2214 if head.is_star(self) && head == tail {
2215 return Some(head);
2216 }
2217
2218 None
2219 }
2220
2221 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2222 use Kind::*;
2223
2224 if cfg!(feature = "norewrite") {
2225 return None;
2226 }
2227 if left == right {
2228 return Some(left);
2229 }
2230
2231 if right.is_kind(self, Kind::Union) && left == right.left(self) {
2232 return Some(right);
2233 }
2234
2235 if self.get_kind(left) == Kind::Pred && self.get_kind(right) == Kind::Pred {
2236 let l = left.pred_tset(self);
2237 let r = right.pred_tset(self);
2238 let solver = self.solver();
2239 let psi = solver.or_id(l, r);
2240 let rewrite = self.mk_pred(psi);
2241 return Some(rewrite);
2242 }
2243
2244 if left == NodeId::EPS
2245 && self.get_extra(left) == 0
2246 && self.nullability(right) == Nullability::ALWAYS
2247 {
2248 return Some(right);
2249 }
2250
2251 if self.get_kind(left) == Kind::Lookahead && self.get_kind(right) == Kind::Lookahead {
2252 let lb = left.left(self);
2253 let lt = left.right(self);
2254 let lrel = left.extra(self);
2255
2256 let rb = right.left(self);
2257 let rt = right.right(self);
2258 let rrel = right.extra(self);
2259
2260 if lrel == rrel && lt.is_missing() && rt.is_missing() {
2261 let unioned = self.mk_union(lb, rb);
2262 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2263 return Some(node);
2264 }
2265 }
2266
2267 if right.is_kind(self, Concat) {
2268 if left == NodeId::END
2269 && right.left(self) == NodeId::END
2270 && self.nullability(right).has(Nullability::END)
2271 {
2272 return Some(right);
2273 }
2274 if left == right.left(self) {
2276 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2277 let rw = self.mk_concat(left, rhs);
2278 return Some(rw);
2279 }
2280 if left.is_kind(self, Concat) {
2281 let head1 = left.left(self);
2282 let head2 = right.left(self);
2283
2284 if head1 == head2 {
2285 let t1 = left.right(self);
2286 let t2 = right.right(self);
2287 if head1 == NodeId::TS {
2289 if t2.has_concat_tail(self, t1) {
2290 return Some(left);
2291 }
2292 if t1.has_concat_tail(self, t2) {
2293 return Some(right);
2294 }
2295 }
2296 let un = self.mk_union(t1, t2);
2297 return Some(self.mk_concat(left.left(self), un));
2298 }
2299
2300 if false {
2304 let end1 = self.get_concat_end(left);
2305 let end2 = self.get_concat_end(right);
2306 if end1 == end2 {
2307 let flags = left.flags_contains(self).or(right.flags_contains(self));
2308 if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2309 let rev1 = self.reverse(left).unwrap();
2310 let rev2 = self.reverse(right).unwrap();
2311
2312 let union_rev = self.mk_union(rev1, rev2);
2313 return Some(self.reverse(union_rev).unwrap());
2314 }
2315 }
2316 }
2317 }
2318 if left.is_pred(self) && left == right.left(self) {
2319 let un = self.mk_opt(right.right(self));
2320 let conc = self.mk_concat(left, un);
2321 return Some(conc);
2322 }
2323 }
2324
2325 if left == NodeId::EPS && right.is_plus(self) {
2326 let result = self.mk_star(right.left(self));
2327 return Some(result);
2328 }
2329
2330 if left.is_inter(self) && right.is_inter(self) {
2332 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2333 return Some(rw);
2334 }
2335 }
2336
2337 None
2338 }
2339
2340 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2341 let mut curr = node;
2342 while self.get_kind(curr) == Kind::Inter {
2343 out.push(self.get_left(curr));
2344 curr = self.get_right(curr);
2345 }
2346 out.push(curr);
2347 }
2348
2349 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2350 let mut curr = node;
2351 let has_prefix = self.get_kind(curr) == Kind::Concat && self.get_left(curr) == NodeId::TS;
2352 if has_prefix {
2353 curr = self.get_right(curr);
2354 }
2355 let mut count = 0u32;
2356 let mut pred_set = None;
2357 while self.get_kind(curr) == Kind::Concat {
2358 let head = self.get_left(curr);
2359 if self.get_kind(head) != Kind::Pred {
2360 return None;
2361 }
2362 let ps = head.pred_tset(self);
2363 match pred_set {
2364 None => pred_set = Some(ps),
2365 Some(existing) => {
2366 if existing != ps {
2367 return None;
2368 }
2369 }
2370 }
2371 curr = self.get_right(curr);
2372 count += 1;
2373 }
2374 if count == 0 || self.get_kind(curr) != Kind::Star {
2375 return None;
2376 }
2377 Some((has_prefix, pred_set.unwrap(), curr, count))
2378 }
2379
2380 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2381 let mut j = 0;
2382 for &s in sub {
2383 while j < sup.len() && sup[j] < s {
2384 j += 1;
2385 }
2386 if j >= sup.len() || sup[j] != s {
2387 return false;
2388 }
2389 j += 1;
2390 }
2391 true
2392 }
2393
2394 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2395 if self.get_kind(left) != Kind::Inter || self.get_kind(right) != Kind::Inter {
2396 return None;
2397 }
2398
2399 let mut lc: Vec<NodeId> = Vec::new();
2400 let mut rc: Vec<NodeId> = Vec::new();
2401 self.collect_inter_components(left, &mut lc);
2402 self.collect_inter_components(right, &mut rc);
2403
2404 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2406 return Some(left);
2407 }
2408 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2410 return Some(right);
2411 }
2412
2413 if lc.len() == rc.len() {
2414 let mut diff_idx = None;
2415 for i in 0..lc.len() {
2416 if lc[i] != rc[i] {
2417 if diff_idx.is_some() {
2418 return None;
2419 }
2420 diff_idx = Some(i);
2421 }
2422 }
2423 if let Some(idx) = diff_idx {
2424 let a = lc[idx];
2425 let b = rc[idx];
2426 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2427 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2428 {
2429 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2430 return if ca < cb { Some(left) } else { Some(right) };
2431 }
2432 }
2433 }
2434 }
2435
2436 None
2437 }
2438
2439 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2440 if cfg!(feature = "norewrite") {
2441 return None;
2442 }
2443 if left == right {
2444 return Some(left);
2445 }
2446
2447 if self.get_kind(right) == Kind::Union {
2448 let mut result = NodeId::BOT;
2449 self.iter_unions_b(
2450 right,
2451 &mut (|b, v| {
2452 let new_inter = b.mk_inter(v, left);
2453 result = b.mk_union(result, new_inter);
2454 }),
2455 );
2456 return Some(result);
2457 }
2458 if self.get_kind(left) == Kind::Union {
2459 let mut result = NodeId::BOT;
2460 self.iter_unions_b(
2461 left,
2462 &mut (|b, v| {
2463 let new_inter = b.mk_inter(v, right);
2464 result = b.mk_union(result, new_inter);
2465 }),
2466 );
2467 return Some(result);
2468 }
2469
2470 if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2471 return Some(NodeId::BOT);
2472 }
2473
2474 if left.kind(self) == Kind::Compl && right.kind(self) == Kind::Compl {
2475 let bodies = self.mk_union(left.left(self), right.left(self));
2476 return Some(self.mk_compl(bodies));
2477 }
2478
2479 if left == NodeId::TOPPLUS {
2480 if right.is_pred_star(self).is_some() {
2481 let newloop = self.mk_plus(right.left(self));
2482 return Some(newloop);
2483 }
2484 if right.is_never_nullable(self) {
2485 return Some(right);
2486 }
2487 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2488 return Some(NodeId::BOT);
2489 }
2490 if right.is_kind(self, Kind::Concat) {}
2491 }
2492
2493 {
2494 let l_is_la = left.is_lookahead(self);
2495 let r_is_la = right.is_lookahead(self);
2496 let l_is_cla = !l_is_la
2497 && self.get_kind(left) == Kind::Concat
2498 && self.get_kind(left.left(self)) == Kind::Lookahead;
2499 let r_is_cla = !r_is_la
2500 && self.get_kind(right) == Kind::Concat
2501 && self.get_kind(right.left(self)) == Kind::Lookahead;
2502 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2503 let (la_node, other, concat_body) = if r_is_la {
2504 (right, left, NodeId::MISSING)
2505 } else if l_is_la {
2506 (left, right, NodeId::MISSING)
2507 } else if r_is_cla {
2508 (right.left(self), left, right.right(self))
2509 } else {
2510 (left.left(self), right, left.right(self))
2511 };
2512 let la_body = la_node.left(self);
2513 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2514 let inter_right = if concat_body.is_missing() {
2515 la_tail
2516 } else {
2517 self.mk_concat(la_tail, concat_body)
2518 };
2519 let new_body = self.mk_inter(other, inter_right);
2520 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2521 return Some(self.mk_concat(la, new_body));
2522 }
2523 }
2524
2525 if self.get_kind(right) == Kind::Compl {
2526 let compl_body = right.left(self);
2527 if left == compl_body {
2528 return Some(NodeId::BOT);
2529 }
2530 if self.get_kind(compl_body) == Kind::Concat {
2531 let compl_head = compl_body.left(self);
2532 if compl_body.right(self) == NodeId::TS && compl_head == left {
2533 return Some(NodeId::BOT);
2534 }
2535 }
2536 }
2537
2538 if let Some(pleft) = left.is_pred_star(self) {
2539 if let Some(pright) = right.is_pred_star(self) {
2540 let merged = self.mk_inter(pleft, pright);
2541 return Some(self.mk_star(merged));
2542 }
2543 }
2544
2545 {
2546 let l_is_clb = self.get_kind(left) == Kind::Concat
2547 && self.get_kind(left.left(self)) == Kind::Lookbehind;
2548 let r_is_clb = self.get_kind(right) == Kind::Concat
2549 && self.get_kind(right.left(self)) == Kind::Lookbehind;
2550 if l_is_clb || r_is_clb {
2551 let (lb, body) = if l_is_clb && r_is_clb {
2552 let lb1 = left.left(self);
2553 let lb2 = right.left(self);
2554 let inner = self.mk_inter(
2555 self.get_lookbehind_inner(lb1),
2556 self.get_lookbehind_inner(lb2),
2557 );
2558 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2559 let body = self.mk_inter(left.right(self), right.right(self));
2560 (lb, body)
2561 } else if l_is_clb {
2562 let lb = left.left(self);
2563 let body = self.mk_inter(left.right(self), right);
2564 (lb, body)
2565 } else {
2566 let lb = right.left(self);
2567 let body = self.mk_inter(left, right.right(self));
2568 (lb, body)
2569 };
2570 return Some(self.mk_concat(lb, body));
2571 }
2572 }
2573
2574 {
2575 let l_has_la = self.has_trailing_la(left);
2576 let r_has_la = self.has_trailing_la(right);
2577 if l_has_la || r_has_la {
2578 let (body, la) = if l_has_la && r_has_la {
2579 let (lbody, l_la) = self.strip_trailing_la(left);
2580 let (rbody, r_la) = self.strip_trailing_la(right);
2581 let inner = self.mk_inter(
2582 self.get_lookahead_inner(l_la),
2583 self.get_lookahead_inner(r_la),
2584 );
2585 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2586 let body = self.mk_inter(lbody, rbody);
2587 (body, la)
2588 } else if l_has_la {
2589 let (lbody, la) = self.strip_trailing_la(left);
2590 let body = self.mk_inter(lbody, right);
2591 (body, la)
2592 } else {
2593 let (rbody, la) = self.strip_trailing_la(right);
2594 let body = self.mk_inter(left, rbody);
2595 (body, la)
2596 };
2597 return Some(self.mk_concat(body, la));
2598 }
2599 }
2600
2601 None
2602 }
2603
2604 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2605 if cfg!(feature = "norewrite") {
2606 return None;
2607 }
2608 debug_assert!(self.get_kind(right_union) == Kind::Union);
2609
2610 let mut rewritten = None;
2611 right_union.iter_union_while(
2612 self,
2613 &mut (|b, v| {
2614 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2615 rewritten = Some((v, rw));
2616 false
2617 } else {
2618 true
2619 }
2620 }),
2621 );
2622
2623 if let Some(rw) = rewritten {
2624 let mut new_union = NodeId::BOT;
2625 right_union.iter_union(
2626 self,
2627 &mut (|b, v| {
2628 if v == rw.0 {
2629 new_union = b.mk_union(rw.1, new_union)
2630 } else {
2631 new_union = b.mk_union(v, new_union)
2632 }
2633 }),
2634 );
2635 return Some(new_union);
2636 };
2637
2638 None
2639 }
2640
2641 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2642 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2643 debug_assert!(tail != NodeId::MISSING);
2644 let key = NodeKey {
2645 kind: Kind::Concat,
2646 left: head,
2647 right: tail,
2648 extra: u32::MAX,
2649 };
2650 if let Some(id) = self.key_is_created(&key) {
2651 return *id;
2652 }
2653
2654 if head == NodeId::BOT || tail == NodeId::BOT {
2655 return NodeId::BOT;
2656 }
2657 if head == NodeId::EPS {
2658 return tail;
2659 }
2660 if tail == NodeId::EPS {
2661 return head;
2662 }
2663
2664 match tail {
2665 NodeId::BEGIN => {
2667 if !self.is_nullable(head, Nullability::BEGIN) {
2668 return NodeId::BOT;
2669 } else {
2670 return NodeId::BEGIN;
2671 }
2672 }
2673 _ => {}
2674 }
2675
2676 if head.is_kind(self, Kind::Concat) {
2678 let left = head.left(self);
2679 let newright = self.mk_concat(head.right(self), tail);
2680 let updated = self.mk_concat(left, newright);
2681 return self.init_as(key, updated);
2682 }
2683
2684 if head == NodeId::TS && tail == NodeId::END {
2685 return NodeId::TS;
2686 }
2687
2688 if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
2689 return NodeId::BOT;
2690 }
2691
2692 if self.get_kind(tail) == Kind::Concat {
2693 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2694 let upd = self.mk_concat(rw, tail.right(self));
2695 return self.init_as(key, upd);
2696 }
2697 }
2698
2699 if let Some(new) = self.attempt_rw_concat_2(head, tail) {
2700 return self.init_as(key, new);
2701 }
2702
2703 match (self.get_kind(head), self.get_kind(tail)) {
2704 (Kind::Star, Kind::Concat) if head.is_star(self) => {
2706 let rl = tail.left(self);
2707 if head == rl {
2708 return self.init_as(key, tail);
2709 }
2710 }
2711 (_, Kind::Concat) => {
2713 let curr = head;
2714 let h2 = self.mk_concat(curr, tail.left(self));
2715 let tr = tail.right(self);
2716 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
2717 return self.init_as(key, new);
2718 }
2719 }
2720 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
2721 return self.init_as(key, tail);
2722 }
2723 _ => {}
2724 }
2725
2726 self.init(key)
2727 }
2728
2729 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2730 let lb_body = {
2732 match self.starts_with_ts(lb_body) {
2733 true => lb_body,
2734 false => self.mk_concat(NodeId::TS, lb_body),
2735 }
2736 };
2737 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2739 }
2740
2741 fn mk_lookbehind_internal(
2742 &mut self,
2743 lb_body: NodeId,
2744 lb_prev: NodeId,
2745 ) -> Result<NodeId, AlgebraError> {
2746 debug_assert!(lb_body != NodeId::MISSING);
2747 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2748 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2749 return Ok(NodeId::BOT);
2750 }
2751 if lb_body == NodeId::TS {
2752 return Ok(lb_prev);
2753 }
2754 if lb_body == NodeId::EPS {
2755 match lb_prev {
2756 NodeId::MISSING => return Ok(NodeId::EPS),
2757 NodeId::EPS => return Ok(NodeId::EPS),
2758 _ => return Ok(lb_prev),
2759 }
2760 }
2761
2762 let key = NodeKey {
2763 kind: Kind::Lookbehind,
2764 left: lb_body,
2765 right: lb_prev,
2766 extra: u32::MAX,
2767 };
2768 match self.key_is_created(&key) {
2769 Some(id) => Ok(*id),
2770 None => {
2771 if lb_prev == NodeId::TS {
2772 return Ok(self.mk_concat(lb_prev, lb_body));
2773 }
2774
2775 Ok(self.init(key))
2776 }
2777 }
2778 }
2779
2780 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2781 let la_body = {
2783 match self.ends_with_ts(la_body) {
2784 true => la_body,
2785 false => self.mk_concat(la_body, NodeId::TS),
2786 }
2787 };
2788 let rel = if NodeId::MISSING == la_tail {
2789 rel
2790 } else {
2791 match la_tail.is_center_nullable(self) {
2792 false => u32::MAX,
2793 true => rel,
2794 }
2795 };
2796
2797 self.mk_lookahead_internal(la_body, la_tail, rel)
2798 }
2799
2800 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2802 let key = NodeKey {
2803 kind: Kind::Lookahead,
2804 left: la_body,
2805 right: la_tail,
2806 extra: rel,
2807 };
2808 if let Some(id) = self.key_is_created(&key) {
2809 return *id;
2810 }
2811 if la_body == NodeId::TS {
2812 if rel == 0 {
2813 return la_tail.missing_to_eps();
2814 } else {
2815 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2816 }
2817 }
2818 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2819 return NodeId::BOT;
2820 }
2821 if la_tail.is_missing() && rel == u32::MAX {
2822 return NodeId::BOT;
2823 }
2824
2825 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
2826 return la_body;
2827 }
2828
2829 if la_tail == NodeId::TS {
2830 if rel == 0 || rel == u32::MAX {
2831 return self.mk_concat(la_body, NodeId::TS);
2832 } else if rel == u32::MAX {
2833 return self.mk_begins_with(la_body);
2834 }
2835 }
2836
2837 if rel == u32::MAX {
2838 if la_tail.is_missing() {
2839 return NodeId::BOT;
2840 }
2841
2842 if self.is_always_nullable(la_body) {
2843 return la_tail.missing_to_eps();
2844 }
2845
2846 if la_tail != NodeId::MISSING {
2847 match self.get_kind(la_tail) {
2848 _ => {
2849 if la_body.is_compl_plus_end(self) {
2850 let minlen = self.get_min_length_only(la_tail);
2851 if minlen >= 1 {
2852 return NodeId::BOT;
2853 }
2854 }
2855 }
2856 }
2857 }
2858 }
2859
2860 if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2861 let la_body2 = self.get_lookahead_inner(la_tail);
2862 let body1_ts = self.mk_concat(la_body, NodeId::TS);
2863 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2864 let new_la_body = self.mk_inter(body1_ts, body2_ts);
2865 let new_la_rel = self.get_lookahead_rel(la_tail);
2866 let new_la_tail = self.get_lookahead_tail(la_tail);
2867 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2868 }
2869
2870 if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2871 let la_body_right = la_body.right(self);
2872 if self.is_always_nullable(la_body_right) {
2873 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2874 }
2875 if la_body.right(self) == NodeId::END {
2876 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2877 }
2878 let bodyright = la_body.right(self);
2879 if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2880 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2881 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2882 }
2883 }
2884
2885 if la_tail != NodeId::MISSING {
2886 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
2887 let lpred = la_body.left(self);
2888 if self.get_kind(lpred) == Kind::Pred {
2889 let l = lpred.pred_tset(self);
2890 let r = la_tail.pred_tset(self);
2891 let psi_and = self.solver().and_id(l, r);
2892 let rewrite = self.mk_pred(psi_and);
2893 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2894 let new_right =
2895 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
2896 return self.mk_concat(rewrite, new_right);
2897 }
2898 }
2899 }
2900
2901 self.get_node_id(key)
2902 }
2903
2904 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
2905 let has_match = (packed >> 16) > 0;
2906 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
2907 return NodeId::BOT;
2908 }
2909 debug_assert!(
2910 body == NodeId::BOT || !self.is_infinite(body),
2911 "Counted body must have finite max length"
2912 );
2913 let chain = self.prune_counted_chain(body, chain);
2914 let key = NodeKey {
2915 kind: Kind::Counted,
2916 left: body,
2917 right: chain,
2918 extra: packed,
2919 };
2920 if let Some(id) = self.key_is_created(&key) {
2921 return *id;
2922 }
2923 self.get_node_id(key)
2924 }
2925
2926 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
2927 if chain == NodeId::MISSING || body == NodeId::BOT {
2928 return chain;
2929 }
2930 if self.nullability(body) != Nullability::NEVER {
2931 return NodeId::MISSING;
2932 }
2933 let chain_body = chain.left(self);
2934 if chain_body == NodeId::BOT {
2935 return chain;
2936 }
2937 let not_begins = self.mk_not_begins_with(body);
2938 let inter = self.mk_inter(chain_body, not_begins);
2939 let is_empty = inter == NodeId::BOT;
2940 if is_empty {
2941 self.prune_counted_chain(body, chain.right(self))
2942 } else {
2943 chain
2944 }
2945 }
2946
2947 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2948 let neg_inner = self.mk_concat(body, NodeId::TS);
2949 let neg_part = self.mk_compl(neg_inner);
2950 let conc = self.mk_concat(neg_part, NodeId::END);
2951 self.mk_lookahead(conc, NodeId::MISSING, rel)
2952 }
2953
2954 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2955 match self.get_node(body).kind {
2956 Kind::Pred => {
2957 let psi = body.pred_tset(self);
2958 let negated = self.mk_pred_not(psi);
2959 let union = self.mk_union(NodeId::BEGIN, negated);
2960 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2962 }
2963 _ => {
2964 let uc = crate::unicode_classes::utf8_char(self);
2967 let neg = self.mk_compl(body);
2968 let negated = self.mk_inter(neg, uc);
2969 let union = self.mk_union(NodeId::BEGIN, negated);
2970 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2972 }
2973 }
2974 }
2975
2976 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2977 debug_assert!(left != NodeId::MISSING);
2978 debug_assert!(right != NodeId::MISSING);
2979 if left > right {
2980 return self.mk_union(right, left);
2981 }
2982 let key = NodeKey {
2983 kind: Kind::Union,
2984 left,
2985 right,
2986 extra: u32::MAX,
2987 };
2988 if let Some(id) = self.key_is_created(&key) {
2989 return *id;
2990 }
2991
2992 if left == right {
2993 return left;
2994 }
2995 if left == NodeId::BOT {
2996 return right;
2997 }
2998 if right == NodeId::BOT {
2999 return left;
3000 }
3001 if right == NodeId::TS {
3002 return right;
3003 }
3004 if left == NodeId::TS {
3005 return left;
3006 }
3007
3008 match (self.get_kind(left), self.get_kind(right)) {
3009 (Kind::Union, _) => {
3010 self.iter_unions_b(left, &mut |b, v| {
3011 b.temp_vec.push(v);
3012 });
3013 self.iter_unions_b(right, &mut |b, v| {
3014 b.temp_vec.push(v);
3015 });
3016 self.temp_vec.sort();
3017 let tree = self.temp_vec.clone();
3018 self.temp_vec.clear();
3019 let newnode = tree
3020 .iter()
3021 .rev()
3022 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3023 return self.init_as(key, newnode);
3024 }
3025 (_, Kind::Union) => {
3026 let rleft = right.left(self);
3027 if left > rleft {
3029 self.iter_unions_b(left, &mut |b, v| {
3030 b.temp_vec.push(v);
3031 });
3032 self.iter_unions_b(right, &mut |b, v| {
3033 b.temp_vec.push(v);
3034 });
3035 self.temp_vec.sort();
3036 let tree = self.temp_vec.clone();
3037 self.temp_vec.clear();
3038 let newnode = tree
3039 .iter()
3040 .rev()
3041 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3042 return self.init_as(key, newnode);
3043 } else {
3044 if let Some(rw) = self.attempt_rw_unions(left, right) {
3045 return self.init_as(key, rw);
3046 }
3047 }
3048 }
3049 _ => {}
3050 }
3051
3052 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3053 return self.init_as(key, rw);
3054 }
3055 self.init(key)
3056 }
3057
3058 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3059 debug_assert!(left_id != NodeId::MISSING);
3060 debug_assert!(right_id != NodeId::MISSING);
3061 if left_id == right_id {
3062 return left_id;
3063 }
3064 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3065 return NodeId::BOT;
3066 }
3067 if left_id == NodeId::TS {
3068 return right_id;
3069 }
3070 if right_id == NodeId::TS {
3071 return left_id;
3072 }
3073 if left_id > right_id {
3074 return self.mk_inter(right_id, left_id);
3075 }
3076 let key = NodeKey {
3077 kind: Kind::Inter,
3078 left: left_id,
3079 right: right_id,
3080 extra: u32::MAX,
3081 };
3082 if let Some(id) = self.key_is_created(&key) {
3083 return *id;
3084 }
3085
3086 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3087 return self.init_as(key, rw);
3088 }
3089
3090 self.init(key)
3091 }
3092
3093 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3094 let node = NodeKey {
3095 kind,
3096 left: NodeId::MISSING,
3097 right: NodeId::MISSING,
3098 extra: u32::MAX,
3099 };
3100 self.init(node)
3101 }
3102
3103 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3104 let star = self.mk_star(body_id);
3105 self.mk_concat(body_id, star)
3106 }
3107 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3108 let opt = self.mk_opt(body_id);
3109 let mut nodes1 = vec![];
3110 for _ in lower..upper {
3111 nodes1.push(opt);
3112 }
3113 for _ in 0..lower {
3114 nodes1.push(body_id);
3115 }
3116 self.mk_concats(nodes1.into_iter())
3117 }
3118 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3119 self.mk_union(NodeId::EPS, body_id)
3120 }
3121
3122 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3123 let key = NodeKey {
3124 kind: Kind::Star,
3125 left: body_id,
3126 right: NodeId::MISSING,
3127 extra: 0,
3128 };
3129 if let Some(id) = self.key_is_created(&key) {
3130 return *id;
3131 }
3132 if body_id.is_kind(self, Kind::Star) {
3134 return body_id;
3135 }
3136 self.get_node_id(key)
3137 }
3138
3139 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3142 match self.get_kind(node_id) {
3143 Kind::End => Nullability::EMPTYSTRING,
3144 Kind::Begin => Nullability::EMPTYSTRING,
3145 Kind::Pred => Nullability::NEVER,
3146 Kind::Star => Nullability::ALWAYS,
3147 Kind::Inter | Kind::Concat => {
3148 let lnull = self.nullability_emptystring(node_id.left(self));
3149 let rnull = self.nullability_emptystring(node_id.right(self));
3150 lnull.and(rnull) }
3152 Kind::Union => {
3153 let lnull = self.nullability_emptystring(node_id.left(self));
3154 let rnull = self.nullability_emptystring(node_id.right(self));
3155 lnull.or(rnull)
3156 }
3157 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3158 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3159 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3160 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3161 }
3162 }
3163
3164 #[inline(always)]
3165 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3166 self.get_meta(node_id)
3167 .flags
3168 .nullability()
3169 .has(Nullability::CENTER.or(Nullability::END))
3170 }
3171
3172 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3173 self.get_only_nullability(node_id)
3174 }
3175
3176 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3177 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3178 }
3179
3180 pub fn pp(&self, node_id: NodeId) -> String {
3181 let mut s = String::new();
3182 self.ppw(&mut s, node_id).unwrap();
3183 s
3184 }
3185
3186 #[allow(dead_code)]
3187 pub fn pp_nulls(&self, node_id: NodeId) -> String {
3188 let nu = self.get_nulls_id(node_id);
3189 let nr = self.mb.nb.get_set_ref(nu);
3190 let s1 = format!("{:?}", nr);
3191 s1
3192 }
3193
3194 #[allow(dead_code)]
3195 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3196 match self.get_tregex(term_id) {
3197 TRegex::Leaf(node_id) => {
3198 let mut s = String::new();
3199 self.ppw(&mut s, *node_id).unwrap();
3200 s
3201 }
3202 TRegex::ITE(cond, then_id, else_id) => {
3203 format!(
3204 "ITE({},{},{})",
3205 self.solver_ref().pp(*cond),
3206 self.ppt(*then_id),
3207 self.ppt(*else_id)
3208 )
3209 }
3210 }
3211 }
3212
3213 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3214 if cfg!(feature = "graphviz") {
3215 match node_id {
3216 NodeId::MISSING => return write!(s, "MISSING"),
3217 NodeId::BOT => return write!(s, "⊥"),
3218 NodeId::TS => return write!(s, "_*"),
3219 NodeId::TOP => return write!(s, "_"),
3220 NodeId::EPS => return write!(s, "ε"),
3221 _ => {}
3222 }
3223 }
3224
3225 match node_id {
3226 NodeId::MISSING => return write!(s, "MISSING"),
3227 NodeId::BOT => return write!(s, "⊥"),
3228 NodeId::TS => return write!(s, "_*"),
3229 NodeId::TOP => return write!(s, "_"),
3230 NodeId::EPS => return write!(s, ""),
3231 _ => {}
3232 }
3233
3234 match self.get_kind(node_id) {
3235 Kind::End => write!(s, r"\z"),
3236 Kind::Begin => write!(s, r"\A"),
3237 Kind::Pred => {
3238 let psi = node_id.pred_tset(self);
3239 if psi == TSetId::EMPTY {
3240 write!(s, r"⊥")
3241 } else if psi == TSetId::FULL {
3242 write!(s, r"_")
3243 } else {
3244 write!(s, "{}", self.solver_ref().pp(psi))
3245 }
3246 }
3247 Kind::Inter => {
3248 write!(s, "(")?;
3249 self.ppw(s, node_id.left(self))?;
3250 write!(s, "&")?;
3251 let mut curr = node_id.right(self);
3252 while self.get_kind(curr) == Kind::Inter {
3253 let n = curr.left(self);
3254 self.ppw(s, n)?;
3255 write!(s, "&")?;
3256 curr = curr.right(self);
3257 }
3258 self.ppw(s, curr)?;
3259 write!(s, ")")
3260 }
3261 Kind::Union => {
3262 let left = node_id.left(self);
3263 let right = node_id.right(self);
3264 write!(s, "(")?;
3265 self.ppw(s, left)?;
3266 write!(s, "|")?;
3267 let mut curr = right;
3268 while self.get_kind(curr) == Kind::Union {
3269 let n = curr.left(self);
3270 self.ppw(s, n)?;
3271 write!(s, "|")?;
3272 curr = curr.right(self);
3273 }
3274 self.ppw(s, curr)?;
3275 write!(s, ")")
3276 }
3277 Kind::Concat => {
3278 let left = node_id.left(self);
3279 let right = node_id.right(self);
3280 if right.is_star(self) && right.left(self) == left {
3281 self.ppw(s, left)?;
3282 write!(s, "+")?;
3283 return Ok(());
3284 }
3285 if right.is_concat(self) {
3286 let rl = right.left(self);
3287 if rl.is_star(self) && rl.left(self) == left {
3288 self.ppw(s, left)?;
3289 write!(s, "+")?;
3290 return self.ppw(s, right.right(self));
3291 }
3292 }
3293 if right.is_concat(self) && right.left(self) == left {
3294 let mut num = 1;
3295 let mut right = right;
3296 while right.is_concat(self) && right.left(self) == left {
3297 num += 1;
3298 right = right.right(self);
3299 }
3300 if let Some(inner) = left.is_opt_v(self) {
3302 let mut inner_count = 0;
3303 let mut right2 = right;
3304 while right2.is_concat(self) && right2.left(self) == inner {
3305 inner_count += 1;
3306 right2 = right2.right(self);
3307 }
3308 if right2 == inner {
3309 inner_count += 1;
3310 self.ppw(s, inner)?;
3311 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3312 }
3313 if inner_count > 0 {
3314 self.ppw(s, inner)?;
3315 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3316 return self.ppw(s, right2);
3317 }
3318 }
3319 self.ppw(s, left)?;
3320 if right == left {
3321 num += 1;
3322 return write!(s, "{{{}}}", num);
3323 }
3324 if num <= 3 && left.is_pred(self) {
3325 for _ in 1..num {
3326 self.ppw(s, left)?;
3327 }
3328 return self.ppw(s, right);
3329 } else {
3330 write!(s, "{{{}}}", num)?;
3331 return self.ppw(s, right);
3332 }
3333 }
3334 self.ppw(s, left)?;
3335 self.ppw(s, right)
3336 }
3337 Kind::Star => {
3338 let left = node_id.left(self);
3339 let leftkind = self.get_kind(left);
3340 match leftkind {
3341 Kind::Concat | Kind::Star | Kind::Compl => {
3342 write!(s, "(")?;
3343 self.ppw(s, left)?;
3344 write!(s, ")")?;
3345 }
3346 _ => {
3347 self.ppw(s, left)?;
3348 }
3349 };
3350 write!(s, "*")
3351 }
3352 Kind::Compl => {
3353 write!(s, "~(")?;
3354 self.ppw(s, node_id.left(self))?;
3355 write!(s, ")")
3356 }
3357 Kind::Lookbehind => {
3358 let lbleft = self.get_lookbehind_prev(node_id);
3359 let lbinner = self.get_lookbehind_inner(node_id);
3360 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3361 if lbleft != NodeId::MISSING {
3362 write!(s, "「")?;
3363 self.ppw(s, lbleft)?;
3364 write!(s, "」")?;
3365 }
3366
3367 write!(s, "(?<=")?;
3368 self.ppw(s, lbinner)?;
3369 write!(s, ")")
3370 }
3371 Kind::Lookahead => {
3372 let inner = self.get_lookahead_inner(node_id);
3373 write!(s, "(?=")?;
3374 self.ppw(s, inner)?;
3375 write!(s, ")")?;
3376 if self.get_lookahead_rel(node_id) != 0 {
3377 write!(s, "{{")?;
3378 let rel = self.get_lookahead_rel(node_id);
3379 if rel == u32::MAX {
3380 write!(s, "∅")?;
3381 } else {
3382 write!(s, "{}", rel)?;
3383 }
3384 write!(s, "}}")?;
3385 }
3386 if node_id.right(self) == NodeId::MISSING {
3387 Ok(())
3388 } else {
3389 write!(s, "『")?;
3390 self.ppw(s, node_id.right(self))?;
3391 write!(s, "』")
3392 }
3393 }
3394 Kind::Counted => {
3395 let body = node_id.left(self);
3396 let packed = self.get_extra(node_id);
3397 let step = packed & 0xFFFF;
3398 let best = packed >> 16;
3399 write!(s, "#(")?;
3400 self.ppw(s, body)?;
3401 write!(s, ")s{}b{}", step, best)
3402 }
3403 }
3404 }
3405
3406 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3407 self.mk_concat(node, NodeId::TS)
3408 }
3409
3410 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3411 let node_ts = self.mk_concat(node, NodeId::TS);
3412 self.mk_compl(node_ts)
3413 }
3414
3415 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3416 let notset = self.solver().not_id(set);
3417 self.mk_pred(notset)
3418 }
3419
3420 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3421 let set_id = self.solver().u8_to_set_id(char);
3422 self.mk_pred(set_id)
3423 }
3424
3425 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3426 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3427 self.mk_pred(rangeset)
3428 }
3429
3430 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3431 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3432 for &(lo, hi) in &ranges[1..] {
3433 let r = self.mk_range_u8(lo, hi);
3434 node = self.mk_union(node, r);
3435 }
3436 node
3437 }
3438
3439 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3440 let mut prefix = Vec::new();
3441 let mut curr = node;
3442 loop {
3443 if curr == NodeId::EPS {
3444 let full = !prefix.is_empty();
3445 return (prefix, full);
3446 }
3447 if curr == NodeId::BOT {
3448 break;
3449 }
3450 if self.get_kind(curr) == Kind::Pred {
3451 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3452 Some(byte) => {
3453 prefix.push(byte);
3454 return (prefix, true);
3455 }
3456 None => break, }
3458 }
3459 if self.get_kind(curr) != Kind::Concat {
3460 break;
3461 }
3462 let left = curr.left(self);
3463 if self.get_kind(left) != Kind::Pred {
3464 break;
3465 }
3466 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3467 Some(byte) => prefix.push(byte),
3468 None => break,
3469 }
3470 curr = curr.right(self);
3471 }
3472 (prefix, false)
3473 }
3474
3475 #[allow(dead_code)]
3476 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3477 let mut result = NodeId::EPS;
3478 for byte in raw_str.iter().rev() {
3479 let node = self.mk_u8(*byte);
3480 result = self.mk_concat(node, result);
3481 }
3482 result
3483 }
3484
3485 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3486 let mut result = NodeId::EPS;
3487 for byte in raw_str.bytes().rev() {
3488 let node = self.mk_u8(byte);
3489 result = self.mk_concat(node, result);
3490 }
3491 result
3492 }
3493
3494 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3495 let mut sorted: Vec<NodeId> = nodes.collect();
3496 if sorted.len() <= 1 {
3497 return sorted.pop().unwrap_or(NodeId::BOT);
3498 }
3499 sorted.sort();
3500 sorted.dedup();
3501 sorted.retain(|&x| x != NodeId::BOT);
3502 if sorted.is_empty() {
3503 return NodeId::BOT;
3504 }
3505 if sorted.len() > 16 {
3506 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
3507 let mut non_concat: Vec<NodeId> = Vec::new();
3508 for &n in &sorted {
3509 if self.get_kind(n) == Kind::Concat {
3510 by_head.entry(self.get_left(n)).or_default().push(n);
3511 } else {
3512 non_concat.push(n);
3513 }
3514 }
3515 let mut absorbed: Vec<NodeId> = Vec::new();
3516 for &n in &non_concat {
3517 if by_head.contains_key(&n) {
3518 absorbed.push(n);
3519 }
3520 }
3521 if !absorbed.is_empty() {
3522 non_concat.retain(|n| !absorbed.contains(n));
3523 }
3524 if by_head.len() < sorted.len() {
3525 let mut groups: Vec<NodeId> = non_concat;
3526 for (head, tails) in by_head {
3527 let mut tail_nodes: Vec<NodeId> =
3528 tails.iter().map(|&n| self.get_right(n)).collect();
3529 if absorbed.contains(&head) {
3530 tail_nodes.push(NodeId::EPS);
3531 }
3532 let tail_union = self.mk_unions(tail_nodes.into_iter());
3533 let factored = self.mk_concat(head, tail_union);
3534 groups.push(factored);
3535 }
3536 groups.sort();
3537 groups.dedup();
3538 return self.mk_unions_balanced(&groups);
3539 }
3540 }
3541 self.mk_unions_balanced(&sorted)
3542 }
3543
3544 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3545 match nodes.len() {
3546 0 => NodeId::BOT,
3547 1 => nodes[0],
3548 n => {
3549 let mid = n / 2;
3550 let left = self.mk_unions_balanced(&nodes[..mid]);
3551 let right = self.mk_unions_balanced(&nodes[mid..]);
3552 self.mk_union(left, right)
3553 }
3554 }
3555 }
3556
3557 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3558 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3559 }
3560
3561 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3562 nodes
3563 .rev()
3564 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3565 }
3566}
3567
3568impl RegexBuilder {
3569 #[allow(dead_code)]
3570 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3571 match self.get_tregex(term_id).clone() {
3572 TRegex::Leaf(node_id) => {
3573 if NodeId::BOT == node_id {
3574 vec![]
3575 } else {
3576 vec![node_id]
3577 }
3578 }
3579 TRegex::ITE(_, then_id, else_id) => {
3580 let mut then_nodes = self.extract_sat(then_id);
3581 let mut else_nodes = self.extract_sat(else_id);
3582 then_nodes.append(&mut else_nodes);
3583 then_nodes
3584 }
3585 }
3586 }
3587
3588 pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3589 debug_assert!(self.get_kind(start) == Kind::Union);
3590 let mut curr = start;
3591 while self.get_kind(curr) == Kind::Union {
3592 f(curr.left(self));
3593 curr = curr.right(self);
3594 }
3595 f(curr);
3596 }
3597
3598 pub(crate) fn iter_unions_b(
3599 &mut self,
3600 curr: NodeId,
3601 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3602 ) {
3603 let mut curr = curr;
3604 while self.get_kind(curr) == Kind::Union {
3605 f(self, curr.left(self));
3606 curr = curr.right(self);
3607 }
3608 f(self, curr);
3609 }
3610
3611 pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3612 if !self.contains_look(node_id) {
3613 return Some(node_id);
3614 }
3615 match self.get_kind(node_id) {
3616 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
3617 Kind::Concat => {
3618 let left = node_id.left(self);
3619 let right = node_id.right(self);
3620 let elim_l = self.try_elim_lookarounds(left)?;
3621 let elim_r = self.try_elim_lookarounds(right)?;
3622 let rw = self.mk_concat(elim_l, elim_r);
3623 Some(rw)
3624 }
3625 Kind::Union => {
3626 let left = node_id.left(self);
3627 let right = node_id.right(self);
3628 let elim_l = self.try_elim_lookarounds(left)?;
3629 let elim_r = self.try_elim_lookarounds(right)?;
3630 let rw = self.mk_union(elim_l, elim_r);
3631 Some(rw)
3632 }
3633
3634 Kind::Star => {
3635 let body = node_id.left(self);
3636 let elim_l = self.try_elim_lookarounds(body)?;
3637 Some(self.mk_star(elim_l))
3638 }
3639 Kind::Compl => {
3640 let left = node_id.left(self);
3641 let elim_l = self.try_elim_lookarounds(left)?;
3642 Some(self.mk_compl(elim_l))
3643 }
3644 Kind::Lookahead => {
3645 let rel = self.get_lookahead_rel(node_id);
3646 if rel != 0 {
3647 return None;
3648 }
3649 let lbody = self.get_lookahead_inner(node_id);
3650 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3651 let elim_l = self.try_elim_lookarounds(lbody)?;
3652 let elim_r = self.try_elim_lookarounds(ltail)?;
3653 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3654 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3655 let rw = self.mk_inter(lbody_ts, ltail_ts);
3656 Some(rw)
3657 }
3658 Kind::Lookbehind => {
3659 let linner = self.get_lookbehind_inner(node_id);
3660 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3661 let elim_l = self.try_elim_lookarounds(linner)?;
3662 let elim_r = self.try_elim_lookarounds(lprev)?;
3663 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3664 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3665 let rw = self.mk_inter(lbody_ts, ltail_ts);
3666 Some(rw)
3667 }
3668 Kind::Inter => {
3669 let left = node_id.left(self);
3670 let right = node_id.right(self);
3671 let elim_l = self.try_elim_lookarounds(left)?;
3672 let elim_r = self.try_elim_lookarounds(right)?;
3673 let rw = self.mk_inter(elim_l, elim_r);
3674 Some(rw)
3675 }
3676 Kind::Counted => None,
3677 }
3678 }
3679
3680 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3682 if self.nullability(node) == Nullability::NEVER {
3683 node
3684 } else {
3685 self.mk_inter(NodeId::TOPPLUS, node)
3686 }
3687 }
3688
3689 fn iter_find_stack(
3690 &self,
3691 stack: &mut Vec<TRegexId>,
3692 mut f: impl FnMut(NodeId) -> bool,
3693 ) -> bool {
3694 loop {
3695 match stack.pop() {
3696 None => return false,
3697 Some(curr) => match self.get_tregex(curr) {
3698 TRegex::Leaf(n) => {
3699 let mut curr = *n;
3700 while curr != NodeId::BOT {
3701 match self.get_kind(curr) {
3702 Kind::Union => {
3703 if f(curr.left(self)) {
3704 return true;
3705 }
3706 curr = curr.right(self);
3707 }
3708 _ => {
3709 if f(*n) {
3710 return true;
3711 }
3712 curr = NodeId::BOT;
3713 }
3714 }
3715 }
3716 }
3717 TRegex::ITE(_, then_id, else_id) => {
3718 if *else_id != TRegexId::BOT {
3719 stack.push(*else_id);
3720 }
3721 stack.push(*then_id);
3722 }
3723 },
3724 }
3725 }
3726 }
3727
3728 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3729 if node == NodeId::BOT {
3730 return Some(true);
3731 }
3732 if self.nullability(node) != Nullability::NEVER {
3733 return Some(false);
3734 }
3735 if let Some(cached) = self.cache_empty.get(&node) {
3736 if cached.is_checked() {
3737 return Some(cached.is_empty());
3738 }
3739 }
3740 let node = if !self.contains_look(node) {
3741 node
3742 } else {
3743 self.try_elim_lookarounds(node)?
3744 };
3745 let isempty_flag = self.is_empty_lang_internal(node);
3746
3747 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3748 }
3749
3750 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3751 if !self.get_meta_flags(initial_node).contains_inter() {
3753 return Ok(NodeFlags::ZERO);
3754 }
3755
3756 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3757 let mut worklist: VecDeque<NodeId> = VecDeque::new();
3758 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3759 let mut stack = Vec::new();
3760 stack.push(begin_der);
3761 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3762 visited.insert(node, initial_node);
3763 let nullability = self.nullability(node);
3764 if nullability != Nullability::NEVER {
3765 true
3766 } else {
3767 worklist.push_back(node);
3768 false
3769 }
3770 });
3771 if found_nullable_right_away {
3772 return Ok(NodeFlags::ZERO);
3773 }
3774
3775 worklist.push_back(initial_node);
3776 let isempty_flag: NodeFlags;
3777 let mut found_node = NodeId::BOT;
3778
3779 loop {
3780 match worklist.pop_front() {
3781 None => {
3782 isempty_flag = NodeFlags::IS_EMPTY;
3783 break;
3784 }
3785 Some(outer) => {
3786 if let Some(cached) = self.cache_empty.get(&outer) {
3787 if cached.is_checked() {
3788 if cached.is_empty() {
3789 continue;
3790 } else {
3791 return Ok(NodeFlags::ZERO);
3792 }
3793 }
3794 }
3795
3796 let derivative = self.der(outer, Nullability::CENTER)?;
3797
3798 stack.push(derivative);
3799
3800 let found_null = self.iter_find_stack(&mut stack, |node| {
3801 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
3802 found_node = node;
3803 if !self.get_meta_flags(node).contains_inter() {
3804 true
3805 } else {
3806 e.insert(outer);
3807 worklist.push_front(node);
3808 self.any_nonbegin_nullable(node)
3809 }
3810 } else {
3811 false
3812 }
3813 });
3814 if found_null {
3815 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3816 isempty_flag = NodeFlags::ZERO;
3817 break;
3818 }
3819 }
3820 }
3821 }
3822
3823 self.cache_empty.insert(
3824 initial_node,
3825 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3826 );
3827 Ok(isempty_flag)
3828 }
3829
3830 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3832 if larger_lang == smaller_lang {
3833 return Some(true);
3834 }
3835
3836 if self
3838 .nullability(larger_lang)
3839 .not()
3840 .and(self.nullability(smaller_lang))
3841 != Nullability::NEVER
3842 {
3843 return Some(false);
3844 }
3845
3846 let (smaller_lang, larger_lang) =
3851 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3852 let wrap = |b: &mut Self, n: NodeId| {
3853 let tmp = b.mk_concat(n, NodeId::TS);
3854 b.mk_concat(NodeId::TS, tmp)
3855 };
3856 (wrap(self, smaller_lang), wrap(self, larger_lang))
3857 } else {
3858 (smaller_lang, larger_lang)
3859 };
3860
3861 let nota = self.mk_compl(larger_lang);
3862 let diff = self.mk_inter(smaller_lang, nota);
3863 self.is_empty_lang(diff)
3864 }
3865}