1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10#[cfg(feature = "reentrant-assert")]
11use rustc_hash::FxHashSet;
12use solver::{Solver, TSetId};
13use std::collections::{BTreeSet, VecDeque};
14use std::fmt::Debug;
15use std::fmt::Write;
16use std::hash::Hash;
17pub use unicode_classes::{neg_class, utf8_char, UnicodeClassCache};
18
19use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
20pub mod nulls;
21pub mod solver;
22
23const IS_EMPTY_LANG_NODE_BUDGET: usize = 10_000;
24
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub enum ResharpError {
27 AnchorLimit,
28 StateSpaceExplosion,
29 UnsupportedPattern,
30}
31
32impl std::fmt::Display for ResharpError {
33 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34 match self {
35 ResharpError::AnchorLimit => write!(f, "anchor limit exceeded"),
36 ResharpError::StateSpaceExplosion => {
37 write!(f, "too many states, likely infinite state space")
38 }
39 ResharpError::UnsupportedPattern => write!(
40 f,
41 "unsupported pattern: eg. lookaround, `\\b`/`^`/`$` inside a complement `~(...)` or a star `*`"
42 ),
43 }
44 }
45}
46
47impl std::error::Error for ResharpError {}
48
49mod id {
50 pub const MISSING: u32 = 0;
51 pub const BOT: u32 = 1;
52 pub const EPS: u32 = 2;
53 pub const TOP: u32 = 3;
54 pub const TOPSTAR: u32 = 4;
55 pub const TOPPLUS: u32 = 5;
56 pub const BEGIN: u32 = 6;
57 pub const END: u32 = 7;
58}
59
60#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
61pub(crate) struct MetadataId(u32);
62impl MetadataId {
63 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
64}
65
66#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
67#[cfg_attr(feature = "serialize", derive(serde::Serialize, serde::Deserialize))]
68pub struct NodeId(pub u32);
69impl NodeId {
70 pub const MISSING: NodeId = NodeId(id::MISSING);
71 pub const BOT: NodeId = NodeId(id::BOT);
72 pub const EPS: NodeId = NodeId(id::EPS);
73 pub const TOP: NodeId = NodeId(id::TOP);
74 pub const TS: NodeId = NodeId(id::TOPSTAR);
75 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
76 pub const BEGIN: NodeId = NodeId(id::BEGIN);
77 pub const END: NodeId = NodeId(id::END);
78
79 #[inline]
80 pub fn as_u32(self) -> u32 {
81 self.0
82 }
83
84 #[inline]
85 pub fn from_u32(v: u32) -> NodeId {
86 NodeId(v)
87 }
88}
89impl Debug for NodeId {
90 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91 let num = &self.0;
92 f.write_str(format!("{num}").as_str())
93 }
94}
95
96#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
97pub struct TRegexId(u32);
98impl TRegexId {
99 pub const MISSING: TRegexId = TRegexId(id::MISSING);
100 pub const EPS: TRegexId = TRegexId(id::EPS);
101 pub const BOT: TRegexId = TRegexId(id::BOT);
102 pub const TOP: TRegexId = TRegexId(id::TOP);
103 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
104}
105
106#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
107pub(crate) struct MetaFlags(u8);
108impl MetaFlags {
109 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
112 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 3);
113 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 4);
114 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 5);
115 pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 6);
116 pub(crate) const CONTAINS_LOOKAHEAD: MetaFlags = MetaFlags(1 << 7);
117
118 #[inline]
119 pub(crate) fn nullability(self) -> Nullability {
120 Nullability(self.0 & Self::NULL_MASK)
121 }
122
123 #[inline]
124 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
125 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
126 }
127
128 #[inline]
129 pub(crate) fn has(self, flag: MetaFlags) -> bool {
130 self.0 & flag.0 != 0
131 }
132 #[inline]
133 const fn and(self, other: MetaFlags) -> MetaFlags {
134 MetaFlags(self.0 & other.0)
135 }
136 #[inline]
137 const fn or(self, other: MetaFlags) -> MetaFlags {
138 MetaFlags(self.0 | other.0)
139 }
140
141 pub(crate) fn contains_inter(self) -> bool {
142 self.has(MetaFlags::CONTAINS_INTER)
143 }
144
145 pub(crate) fn all_contains_flags(self) -> MetaFlags {
146 self.and(
147 MetaFlags::CONTAINS_ANCHORS
148 .or(MetaFlags::CONTAINS_INTER)
149 .or(MetaFlags::CONTAINS_LOOKBEHIND)
150 .or(MetaFlags::CONTAINS_LOOKAHEAD),
151 )
152 }
153}
154
155#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
156#[repr(u8)]
157pub enum Kind {
158 #[default]
159 Pred,
160 Star,
161 Begin,
162 End,
163 Concat,
164 Union,
165 Compl,
166 Lookbehind,
167 Lookahead,
168 Inter,
169 Ordered,
170}
171
172#[derive(Clone, Copy)]
173struct NodeLen {
174 mml_min: u32,
175 mml_max: u32,
176 es_null: Nullability,
177}
178
179impl Default for NodeLen {
180 fn default() -> Self {
181 NodeLen {
182 mml_min: 0,
183 mml_max: 0,
184 es_null: Nullability::NEVER,
185 }
186 }
187}
188
189#[derive(Eq, Hash, PartialEq, Clone)]
190struct Metadata {
191 flags: MetaFlags,
192 nulls: NullsId,
193}
194
195struct MetadataBuilder {
196 num_created: u32,
197 solver: Solver,
198 nb: NullsBuilder,
199 index: FxHashMap<Metadata, MetadataId>,
200 pub array: Vec<Metadata>,
201}
202
203impl MetadataBuilder {
204 fn new() -> MetadataBuilder {
205 Self {
206 index: FxHashMap::default(),
207 array: vec![Metadata {
208 flags: MetaFlags::ZERO,
209 nulls: NullsId::EMPTY,
210 }],
211 solver: Solver::new(),
212 num_created: 0,
213 nb: NullsBuilder::new(),
214 }
215 }
216
217 fn init(&mut self, inst: Metadata) -> MetadataId {
218 self.num_created += 1;
219 let new_id = MetadataId(self.num_created);
220 self.index.insert(inst.clone(), new_id);
221 self.array.push(inst);
222 new_id
223 }
224
225 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
226 match self.index.get(&inst) {
227 Some(&id) => id,
228 None => self.init(inst),
229 }
230 }
231
232 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
233 &self.array[inst.0 as usize]
234 }
235
236 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
237 setflags.all_contains_flags()
238 }
239
240 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
241 let left = &self.array[body.0 as usize].flags;
242 let contains = self.get_contains(*left);
243 let inf = if body_id == NodeId::BOT {
245 MetaFlags::ZERO
246 } else {
247 MetaFlags::INFINITE_LENGTH
248 };
249 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
250 }
251
252 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
253 let left = &self.array[left_id.0 as usize].flags;
254 let null = left.nullability().not().and(Nullability::ALWAYS);
255 let contains = self.get_contains(*left);
256 MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
257 }
258
259 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
260 let left = &self.array[left_id.0 as usize].flags;
261 let right = &self.array[right_id.0 as usize].flags;
262 let null = left.nullability().and(right.nullability());
263 let contains = self.get_contains(left.or(*right));
264 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
265 MetaFlags::with_nullability(null, contains.or(len))
266 }
267
268 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
269 let left = &self.array[left_id.0 as usize].flags;
270 let right = &self.array[right_id.0 as usize].flags;
271 let null = left.nullability().and(right.nullability());
272 let contains = self
273 .get_contains(left.or(*right))
274 .or(MetaFlags::CONTAINS_INTER);
275 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
276 MetaFlags::with_nullability(null, contains.or(len))
277 }
278
279 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
280 let left = &self.array[left_id.0 as usize].flags;
281 let right = &self.array[right_id.0 as usize].flags;
282 let null = left.nullability().or(right.nullability());
283 let contains = self.get_contains(left.or(*right));
284 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
285 MetaFlags::with_nullability(null, contains.or(len))
286 }
287}
288
289#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
290pub struct NodeKey {
291 pub(crate) kind: Kind,
292 pub(crate) left: NodeId,
293 pub(crate) right: NodeId,
294 pub(crate) extra: u32,
295}
296
297#[derive(Eq, Hash, PartialEq, Clone, Debug)]
298pub enum TRegex<TSet> {
299 Leaf(NodeId),
300 ITE(TSet, TRegexId, TRegexId),
301}
302
303#[derive(Eq, Hash, PartialEq, Clone, Copy)]
304pub(crate) struct NodeFlags(u8);
305impl NodeFlags {
306 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
307 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
308 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
309
310 fn is_checked(self) -> bool {
311 self.0 >= NodeFlags::IS_CHECKED.0
312 }
313 fn is_empty(self) -> bool {
314 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
315 }
316}
317
318#[derive(Eq, Hash, PartialEq, Clone, Copy)]
319pub(crate) struct BuilderFlags(u8);
320impl BuilderFlags {
321 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
322 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
323}
324
325pub struct RegexBuilder {
326 mb: MetadataBuilder,
327 temp_vec: Vec<NodeId>,
328 num_created: u32,
329 index: FxHashMap<NodeKey, NodeId>,
330 array: Vec<NodeKey>,
331 metadata: Vec<MetadataId>,
332 reversed: Vec<NodeId>,
333 cache_empty: FxHashMap<NodeId, NodeFlags>,
334 lengths: Vec<NodeLen>,
335 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
336 tr_array: Vec<TRegex<TSetId>>,
337 tr_der_center: Vec<TRegexId>,
338 tr_der_begin: Vec<TRegexId>,
339 flags: BuilderFlags,
340 pub lookahead_context_max: u32,
342 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
343 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
344 #[cfg(feature = "reentrant-assert")]
345 rw_active: FxHashSet<(Kind, NodeId, NodeId)>,
346}
347
348impl NodeId {
349 fn is_missing(&self) -> bool {
350 *self == NodeId::MISSING
351 }
352 fn is_eps(&self) -> bool {
353 *self == NodeId::EPS
354 }
355 #[inline]
356 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
357 b.get_flags_contains(self)
358 }
359 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
360 if self == tail {
361 true
362 } else if self.is_kind(b, Kind::Concat) {
363 self.right(b).has_concat_tail(b, tail)
364 } else {
365 false
366 }
367 }
368 fn missing_to_eps(&self) -> NodeId {
369 if *self == NodeId::MISSING {
370 NodeId::EPS
371 } else {
372 *self
373 }
374 }
375
376 #[inline]
377 fn kind(self, b: &RegexBuilder) -> Kind {
378 b.get_kind(self)
379 }
380 #[inline]
381 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
382 b.get_kind(self) == k
383 }
384 #[inline]
385 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
386 b.nullability(self) == Nullability::NEVER
387 }
388 #[inline]
389 pub fn nullability(self, b: &RegexBuilder) -> Nullability {
390 b.nullability(self)
391 }
392
393 #[inline]
394 pub fn is_center_nullable(self, b: &RegexBuilder) -> bool {
395 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
396 }
397 #[inline]
398 pub fn is_begin_nullable(self, b: &RegexBuilder) -> bool {
399 b.nullability(self).has(Nullability::BEGIN)
400 }
401 #[inline]
402 pub fn left(self, b: &RegexBuilder) -> NodeId {
403 b.get_left(self)
404 }
405
406 #[inline]
407 pub fn right(self, b: &RegexBuilder) -> NodeId {
408 b.get_right(self)
409 }
410
411 #[inline]
412 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, ResharpError> {
413 b.der(self, mask)
414 }
415
416 #[inline]
417 fn extra(self, b: &RegexBuilder) -> u32 {
418 b.get_extra(self)
419 }
420
421 #[inline]
422 fn is_pred(self, b: &RegexBuilder) -> bool {
423 b.get_kind(self) == Kind::Pred
424 }
425
426 #[inline]
427 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
428 debug_assert!(self.is_pred(b));
429 TSetId(b.get_extra(self))
430 }
431 #[inline]
432 pub fn is_star(self, b: &RegexBuilder) -> bool {
433 if NodeId::EPS == self {
434 return false;
435 }
436 b.get_kind(self) == Kind::Star
437 }
438
439 pub fn contains_lookbehind(self, b: &RegexBuilder) -> bool {
440 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKBEHIND)
441 }
442
443 pub fn contains_lookahead(self, b: &RegexBuilder) -> bool {
444 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKAHEAD)
445 }
446
447 pub fn contains_lookaround(self, b: &RegexBuilder) -> bool {
448 b.get_meta_flags(self)
449 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
450 }
451
452 #[inline]
453 pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
454 b.get_kind(self) == Kind::Inter
455 }
456
457 #[inline]
458 pub(crate) fn is_compl(self, b: &RegexBuilder) -> bool {
459 b.get_kind(self) == Kind::Compl
460 }
461
462 #[inline]
463 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
464 if self.is_concat(b) {
465 let r = self.right(b);
466 return r.is_star(b) && r.left(b) == self.left(b);
467 }
468 false
469 }
470
471 #[inline]
472 fn is_begin(self) -> bool {
473 self == NodeId::BEGIN
474 }
475
476 #[inline]
477 fn is_end(self) -> bool {
478 self == NodeId::END
479 }
480
481 #[inline]
482 fn is_union(self, b: &RegexBuilder) -> bool {
483 b.get_kind(self) == Kind::Union
484 }
485
486 #[inline]
487 pub fn is_concat(self, b: &RegexBuilder) -> bool {
488 b.get_kind(self) == Kind::Concat
489 }
490
491 #[inline]
492 fn is_lookahead(self, b: &RegexBuilder) -> bool {
493 b.get_kind(self) == Kind::Lookahead
494 }
495
496 #[inline]
497 fn is_lookbehind(self, b: &RegexBuilder) -> bool {
498 b.get_kind(self) == Kind::Lookbehind
499 }
500
501 #[inline]
502 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
503 if b.get_kind(self) == Kind::Union && b.get_left(self).is_eps() {
504 Some(b.get_right(self))
505 } else {
506 None
507 }
508 }
509
510 #[inline]
511 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
512 if self.is_concat(b) {
513 let left = self.left(b);
514 let right = self.right(b);
515 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
516 return right == NodeId::END;
517 }
518 }
519 false
520 }
521
522 #[inline]
523 #[allow(dead_code)]
524 fn is_ts(self) -> bool {
525 NodeId::TS == self
526 }
527
528 #[inline]
529 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
530 if NodeId::EPS == self {
531 return None;
532 }
533 if self.is_star(b) && self.left(b).is_pred(b) {
534 Some(self.left(b))
535 } else {
536 None
537 }
538 }
539
540 #[inline]
541 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
542 if self.is_concat(b) && self.left(b) == NodeId::TS {
543 let middle = self.right(b);
544 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
545 return Some(middle.left(b));
546 }
547 }
548 None
549 }
550
551 #[inline]
552 pub(crate) fn iter_union(
553 self,
554 b: &mut RegexBuilder,
555 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
556 ) {
557 let mut curr: NodeId = self;
558 while curr.kind(b) == Kind::Union {
559 f(b, curr.left(b));
560 curr = curr.right(b);
561 }
562 f(b, curr);
563 }
564
565 #[inline]
566 pub(crate) fn iter_inter(
567 self,
568 b: &mut RegexBuilder,
569 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
570 ) {
571 let mut curr: NodeId = self;
572 while curr.kind(b) == Kind::Inter {
573 f(b, curr.left(b));
574 curr = curr.right(b);
575 }
576 f(b, curr);
577 }
578
579 #[inline]
580 pub fn iter_union_while(
581 self,
582 b: &mut RegexBuilder,
583 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
584 ) {
585 let mut curr: NodeId = self;
586 let mut continue_loop = true;
587 while continue_loop && curr.kind(b) == Kind::Union {
588 continue_loop = f(b, curr.left(b));
589 curr = curr.right(b);
590 }
591 if continue_loop {
592 f(b, curr);
593 }
594 }
595
596 #[inline]
597 pub(crate) fn any_inter_component(
598 self,
599 b: &RegexBuilder,
600 mut f: impl FnMut(NodeId) -> bool,
601 ) -> bool {
602 debug_assert!(self.kind(b) == Kind::Inter);
603 let mut cur = self;
604 while cur.kind(b) == Kind::Inter {
605 if f(cur.left(b)) {
606 return true;
607 }
608 cur = cur.right(b);
609 }
610 f(cur)
611 }
612
613 #[inline]
614 pub(crate) fn any_union_component(
615 self,
616 b: &RegexBuilder,
617 mut f: impl FnMut(NodeId) -> bool,
618 ) -> bool {
619 let mut cur = self;
620 while cur.kind(b) == Kind::Union {
621 if f(cur.left(b)) {
622 return true;
623 }
624 cur = cur.right(b);
625 }
626 f(cur)
627 }
628}
629
630impl Default for RegexBuilder {
631 fn default() -> Self {
632 Self::new()
633 }
634}
635
636impl RegexBuilder {
637 pub fn new() -> RegexBuilder {
638 let mut inst = Self {
639 mb: MetadataBuilder::new(),
640 array: Vec::new(),
641 index: FxHashMap::default(),
642 cache_empty: FxHashMap::default(),
643 lengths: vec![NodeLen::default()],
644 tr_array: Vec::new(),
645 tr_cache: FxHashMap::default(),
646 flags: BuilderFlags::ZERO,
647 lookahead_context_max: 800,
648 num_created: 0,
649 metadata: Vec::new(),
650 reversed: Vec::new(),
651 tr_der_center: Vec::new(),
652 tr_der_begin: Vec::new(),
653 temp_vec: Vec::new(),
654 mk_binary_memo: FxHashMap::default(),
655 clean_cache: FxHashMap::default(),
656 #[cfg(feature = "reentrant-assert")]
657 rw_active: FxHashSet::default(),
658 };
659 inst.array.push(NodeKey::default());
660 inst.mk_pred(TSetId::EMPTY);
661 inst.mk_star(NodeId::BOT);
662 inst.mk_pred(TSetId::FULL);
663 inst.mk_star(NodeId::TOP);
664 let top_plus_id = inst.mk_plus(NodeId::TOP);
665 inst.mk_unset(Kind::Begin);
666 inst.mk_unset(Kind::End);
667 debug_assert!(top_plus_id == NodeId::TOPPLUS);
668
669 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
670 inst.mk_leaf(NodeId::BOT);
671 inst.mk_leaf(NodeId::EPS);
672 inst.mk_leaf(NodeId::TOP);
673 inst.mk_leaf(NodeId::TS);
674
675 inst.flags = BuilderFlags::SUBSUME;
676 inst
677 }
678
679 #[inline]
680 pub fn solver_ref(&self) -> &Solver {
681 &self.mb.solver
682 }
683
684 #[inline]
685 pub fn solver(&mut self) -> &mut Solver {
686 &mut self.mb.solver
687 }
688
689 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
690 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
691 self.tr_cache.insert(inst.clone(), new_id);
692 self.tr_array.push(inst);
693 new_id
694 }
695
696 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
697 match self.tr_cache.get(&inst) {
698 Some(&id) => id,
699 None => self.tr_init(inst),
700 }
701 }
702
703 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
704 &self.tr_array[inst.0 as usize]
705 }
706
707 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
708 let solv = self.solver();
709 !solv.is_sat_id(cond1, cond2)
710 }
711
712 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
713 let term = TRegex::<TSetId>::Leaf(node_id);
714 self.get_tregex_id(term)
715 }
716
717 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
718 let tmp_inst = TRegex::ITE(cond, _then, _else);
719 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
720 return *cached;
721 }
722 if _then == _else {
723 return _then;
724 }
725 if self.solver().is_full_id(cond) {
726 return _then;
727 }
728 if self.solver().is_empty_id(cond) {
729 return _else;
730 }
731 let _clean_then = match self.tr_array[_then.0 as usize] {
732 TRegex::Leaf(_) => _then,
733 _ => self.clean(cond, _then),
734 };
735 let notcond = self.solver().not_id(cond);
736 let _clean_else = match self.tr_array[_else.0 as usize] {
737 TRegex::Leaf(_) => _else,
738 _ => self.clean(notcond, _else),
739 };
740
741 if _clean_then == _clean_else {
742 return _clean_then;
743 }
744 match self.get_tregex(_clean_then) {
746 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
747 let _cond_then = *leftcond;
748 let new_then = *_inner_then;
749 let sand = self.solver().and_id(cond, _cond_then);
750 return self.mk_ite(sand, new_then, _clean_else);
751 }
752 _ => {}
753 }
754 match self.get_tregex(_clean_else) {
756 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
758 let e2clone = *_e2;
759 let new_cond = self.mb.solver.or_id(cond, *_c2);
760 return self.mk_ite(new_cond, _clean_then, e2clone);
761 }
762 _ => {}
763 }
764
765 if _clean_then == TRegexId::BOT {
766 let flip_cond = self.solver().not_id(cond);
767 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
768 return cleaned_id;
769 }
770
771 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
772 }
773
774 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
775 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
776 return cached;
777 }
778 let result = match *self.get_tregex(tterm) {
779 TRegex::Leaf(_) => tterm,
780 TRegex::ITE(alpha, _then_id, _else_id) => {
781 let notalpha = self.mb.solver.not_id(alpha);
782 if self.mb.solver.unsat_id(beta, alpha) {
783 self.clean(beta, _else_id)
785 } else if self.unsat(beta, notalpha) {
786 self.clean(beta, _then_id)
788 } else {
789 let tc = self.mb.solver.and_id(beta, alpha);
790 let ec = self.mb.solver.and_id(beta, notalpha);
791 let new_then = self.clean(tc, _then_id);
792 let new_else = self.clean(ec, _else_id);
793 self.mk_ite(alpha, new_then, new_else)
794 }
795 }
796 };
797 self.clean_cache.insert((beta, tterm), result);
798 result
799 }
800
801 fn mk_unary(
802 &mut self,
803 term: TRegexId,
804 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
805 ) -> TRegexId {
806 match self.tr_array[term.0 as usize] {
807 TRegex::Leaf(node_id) => {
808 let applied = apply(self, node_id);
809 self.mk_leaf(applied)
810 }
811 TRegex::ITE(c1, _then, _else) => {
812 let _then1 = self.mk_unary(_then, apply);
813 let _else1 = self.mk_unary(_else, apply);
814 self.mk_ite(c1, _then1, _else1)
815 }
816 }
817 }
818
819 fn mk_binary_result(
820 &mut self,
821 left: TRegexId,
822 right: TRegexId,
823 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, ResharpError>,
824 ) -> Result<TRegexId, ResharpError> {
825 match self.tr_array[left.0 as usize] {
826 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
827 TRegex::Leaf(right_node_id) => {
828 let applied = apply(self, left_node_id, right_node_id)?;
829 Ok(self.mk_leaf(applied))
830 }
831 TRegex::ITE(c2, _then, _else) => {
832 let then2 = self.mk_binary_result(left, _then, apply)?;
833 let else2 = self.mk_binary_result(left, _else, apply)?;
834 Ok(self.mk_ite(c2, then2, else2))
835 }
836 },
837 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
838 TRegex::Leaf(_) => {
839 let then2 = self.mk_binary_result(_then1, right, apply)?;
840 let else2 = self.mk_binary_result(_else1, right, apply)?;
841 Ok(self.mk_ite(c1, then2, else2))
842 }
843 TRegex::ITE(c2, _then2, _else2) => {
844 if c1 == c2 {
845 let _then = self.mk_binary_result(_then1, _then2, apply)?;
846 let _else = self.mk_binary_result(_else1, _else2, apply)?;
847 return Ok(self.mk_ite(c1, _then, _else));
848 }
849 if c1.0 > c2.0 {
850 let _then = self.mk_binary_result(_then1, right, apply)?;
851 let _else = self.mk_binary_result(_else1, right, apply)?;
852 Ok(self.mk_ite(c1, _then, _else))
853 } else {
854 let _then = self.mk_binary_result(left, _then2, apply)?;
855 let _else = self.mk_binary_result(left, _else2, apply)?;
856 Ok(self.mk_ite(c2, _then, _else))
857 }
858 }
859 },
860 }
861 }
862
863 fn mk_binary(
864 &mut self,
865 left: TRegexId,
866 right: TRegexId,
867 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
868 ) -> TRegexId {
869 self.mk_binary_memo.clear();
870 self.mk_binary_inner(left, right, apply)
871 }
872
873 fn mk_binary_inner(
874 &mut self,
875 left: TRegexId,
876 right: TRegexId,
877 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
878 ) -> TRegexId {
879 if left == right {
880 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
881 }
882 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
883 return cached;
884 }
885 let result = match self.tr_array[left.0 as usize] {
886 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
887 TRegex::Leaf(right_node_id) => {
888 let applied = apply(self, left_node_id, right_node_id);
889 self.mk_leaf(applied)
890 }
891 TRegex::ITE(c2, _then, _else) => {
892 let then2 = self.mk_binary_inner(left, _then, apply);
893 let else2 = self.mk_binary_inner(left, _else, apply);
894 self.mk_ite(c2, then2, else2)
895 }
896 },
897 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
898 TRegex::Leaf(_) => {
899 let then2 = self.mk_binary_inner(_then1, right, apply);
900 let else2 = self.mk_binary_inner(_else1, right, apply);
901 self.mk_ite(c1, then2, else2)
902 }
903 TRegex::ITE(c2, _then2, _else2) => {
904 if c1 == c2 {
905 let _then = self.mk_binary_inner(_then1, _then2, apply);
906 let _else = self.mk_binary_inner(_else1, _else2, apply);
907 self.mk_ite(c1, _then, _else)
908 } else if c1.0 > c2.0 {
909 let _then = self.mk_binary_inner(_then1, right, apply);
910 let _else = self.mk_binary_inner(_else1, right, apply);
911 self.mk_ite(c1, _then, _else)
912 } else {
913 let _then = self.mk_binary_inner(left, _then2, apply);
914 let _else = self.mk_binary_inner(left, _else2, apply);
915 self.mk_ite(c2, _then, _else)
916 }
917 }
918 },
919 };
920 self.mk_binary_memo.insert((left, right), result);
921 result
922 }
923
924 pub fn get_nulls(
925 &mut self,
926 pending_rel: u32,
927 mask: Nullability,
928 acc: &mut BTreeSet<NullState>,
929 node_id: NodeId,
930 ) {
931 debug_assert!(node_id != NodeId::MISSING);
932 if !self.is_nullable(node_id, mask) {
933 return;
934 }
935 match self.get_kind(node_id) {
936 Kind::Pred => {}
937 Kind::End => {
938 if mask.has(Nullability::END) {
939 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
940 }
941 }
942 Kind::Begin => {
943 if mask.has(Nullability::BEGIN) {
944 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
945 }
946 }
947 Kind::Concat => {
948 let new_mask = self.nullability(node_id).and(mask);
949 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
950 if self.is_nullable(node_id.left(self), mask) {
951 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
952 }
953 }
954 Kind::Union => {
955 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
956 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
957 }
958 Kind::Inter => {
959 let new_mask = self.nullability(node_id).and(mask);
960 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
961 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
962 }
963 Kind::Star => {
964 acc.insert(NullState::new(mask, pending_rel));
965 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
966 }
967 Kind::Compl => {
968 if !self.is_nullable(node_id.left(self), mask) {
969 acc.insert(NullState::new(mask, 0));
970 }
971 }
972 Kind::Lookbehind => {
973 let new_mask = self.nullability(node_id).and(mask);
974 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
975 if node_id.right(self) != NodeId::MISSING {
976 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
977 }
978 }
979 Kind::Lookahead => {
980 let la_inner = self.get_lookahead_inner(node_id);
981 if self.is_nullable(la_inner, mask) {
982 let lo = self.get_lookahead_rel_lo(node_id);
983 let hi = self.get_lookahead_rel(node_id);
984 if hi != u32::MAX {
985 for rel in lo..=hi {
986 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
987 }
988 }
989 let la_tail = self.get_lookahead_tail(node_id);
991 if la_tail != NodeId::MISSING {
992 self.get_nulls(pending_rel, mask, acc, la_tail);
993 }
994 }
995 }
996 Kind::Ordered => {
997 let packed = self.get_extra(node_id);
998 let best = packed >> 16;
999 if best > 0 {
1000 acc.insert(NullState::new(mask, pending_rel + best));
1001 }
1002 }
1003 }
1004 }
1005
1006 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
1007 self.get_meta_flags(node_id)
1008 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
1009 }
1010
1011 pub fn contains_lookbehind(&mut self, node_id: NodeId) -> bool {
1012 self.get_meta_flags(node_id)
1013 .has(MetaFlags::CONTAINS_LOOKBEHIND)
1014 }
1015
1016 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
1017 self.get_meta_flags(node_id)
1018 .has(MetaFlags::CONTAINS_ANCHORS)
1019 }
1020
1021 pub fn is_infinite(&self, node_id: NodeId) -> bool {
1022 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
1023 }
1024
1025 pub fn max_lookahead_body_len(&self, node_id: NodeId) -> u32 {
1031 let mut visited: FxHashMap<NodeId, ()> = FxHashMap::default();
1032 let mut stack = vec![node_id];
1033 let mut best: u32 = 0;
1034 while let Some(n) = stack.pop() {
1035 if n == NodeId::MISSING || visited.insert(n, ()).is_some() {
1036 continue;
1037 }
1038 let kind = self.get_kind(n);
1039 if kind == Kind::Lookahead {
1040 let mut body = self.get_lookahead_inner(n);
1041 if body.is_concat(self) && body.right(self) == NodeId::TS {
1044 body = body.left(self);
1045 }
1046 let (_, mx) = self.get_min_max_length(body);
1047 if mx > best {
1048 best = mx;
1049 }
1050 }
1051 match kind {
1052 Kind::Pred | Kind::Begin | Kind::End => {}
1053 Kind::Star | Kind::Compl => stack.push(n.left(self)),
1054 _ => {
1055 stack.push(n.left(self));
1056 stack.push(n.right(self));
1057 }
1058 }
1059 }
1060 best
1061 }
1062
1063 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
1064 let nl = &self.lengths[node_id.0 as usize];
1065 (nl.mml_min, nl.mml_max)
1066 }
1067
1068 #[inline]
1069 fn mml_of(&self, node_id: NodeId) -> (u32, u32) {
1070 let nl = &self.lengths[node_id.0 as usize];
1071 (nl.mml_min, nl.mml_max)
1072 }
1073
1074 fn compute_node_len(&self, node_id: NodeId) -> NodeLen {
1075 let es_null = self.compute_es_null(node_id);
1076 if node_id == NodeId::EPS {
1077 return NodeLen {
1078 mml_min: 0,
1079 mml_max: 0,
1080 es_null,
1081 };
1082 }
1083 let kind = self.get_kind(node_id);
1084 let l = node_id.left(self);
1085 let r = node_id.right(self);
1086 let min_of = |n: NodeId| self.lengths[n.0 as usize].mml_min;
1087
1088 let mml_min = match kind {
1089 Kind::End | Kind::Begin => 0,
1090 Kind::Pred => 1,
1091 Kind::Concat => min_of(l) + min_of(r),
1092 Kind::Union => min_of(l).min(min_of(r)),
1093 Kind::Inter => min_of(l).max(min_of(r)),
1094 Kind::Star | Kind::Lookbehind | Kind::Lookahead => 0,
1095 Kind::Ordered => min_of(l),
1096 Kind::Compl => {
1097 if self.nullability(l) == Nullability::ALWAYS {
1098 1
1099 } else {
1100 0
1101 }
1102 }
1103 };
1104
1105 let bounded_max = match kind {
1106 Kind::End | Kind::Begin => 0,
1107 Kind::Pred => 1,
1108 Kind::Concat => self.mml_of(l).1.saturating_add(self.mml_of(r).1),
1109 Kind::Union => self.mml_of(l).1.max(self.mml_of(r).1),
1110 Kind::Inter => self.mml_of(l).1.min(self.mml_of(r).1),
1111 Kind::Lookahead => self.mml_of(r.missing_to_eps()).1,
1112 Kind::Ordered => self.mml_of(l).1,
1113 Kind::Star | Kind::Compl => u32::MAX,
1114 Kind::Lookbehind => self.mml_of(r.missing_to_eps()).1,
1115 };
1116
1117 let mml_max = if self.is_infinite(node_id) && kind != Kind::Inter {
1118 u32::MAX
1119 } else {
1120 bounded_max
1121 };
1122
1123 NodeLen {
1124 mml_min,
1125 mml_max,
1126 es_null,
1127 }
1128 }
1129
1130 fn compute_es_null(&self, node_id: NodeId) -> Nullability {
1131 let es = |n: NodeId| self.lengths[n.0 as usize].es_null;
1132 match self.get_kind(node_id) {
1133 Kind::End | Kind::Begin => Nullability::EMPTYSTRING,
1134 Kind::Pred => Nullability::NEVER,
1135 Kind::Star => Nullability::ALWAYS,
1136 Kind::Inter | Kind::Concat => es(node_id.left(self)).and(es(node_id.right(self))),
1137 Kind::Union => es(node_id.left(self)).or(es(node_id.right(self))),
1138 Kind::Compl => es(node_id.left(self)).not(),
1139 Kind::Lookbehind => es(node_id.left(self)),
1140 Kind::Lookahead => {
1141 let body_null = es(node_id.left(self));
1142 let la_tail = self.get_lookahead_tail(node_id);
1143 if la_tail == NodeId::MISSING {
1144 body_null
1145 } else {
1146 body_null.and(es(la_tail))
1147 }
1148 }
1149 Kind::Ordered => es(node_id.left(self)),
1150 }
1151 }
1152
1153 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
1154 if node_id == NodeId::EPS {
1155 return Some(0);
1156 }
1157 match self.get_kind(node_id) {
1158 Kind::End | Kind::Begin => Some(0),
1159 Kind::Pred => Some(1),
1160 Kind::Concat => {
1161 let l = self.get_fixed_length(node_id.left(self))?;
1162 let r = self.get_fixed_length(node_id.right(self))?;
1163 Some(l + r)
1164 }
1165 Kind::Union => {
1166 let l = self.get_fixed_length(node_id.left(self))?;
1167 let r = self.get_fixed_length(node_id.right(self))?;
1168 if l == r {
1169 Some(l)
1170 } else {
1171 None
1172 }
1173 }
1174 Kind::Inter => {
1175 let l = self.get_fixed_length(node_id.left(self));
1176 let r = self.get_fixed_length(node_id.right(self));
1177 match (l, r) {
1178 (Some(a), Some(b)) if a == b => Some(a),
1179 (Some(_), Some(_)) => None,
1180 (Some(a), None) | (None, Some(a)) => Some(a),
1181 (None, None) => None,
1182 }
1183 }
1184 Kind::Lookahead | Kind::Lookbehind => {
1185 let prev = node_id.right(self).missing_to_eps();
1186 self.get_fixed_length(prev)
1187 }
1188 Kind::Ordered => self.get_fixed_length(node_id.left(self)),
1189 Kind::Star | Kind::Compl => None,
1190 }
1191 }
1192
1193 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1194 self.lengths[node_id.0 as usize].mml_min
1195 }
1196
1197 pub fn starts_with_ts(&self, node_id: NodeId) -> bool {
1198 if node_id == NodeId::TS {
1199 return true;
1200 }
1201 match self.get_kind(node_id) {
1202 Kind::Inter => {
1203 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1204 }
1205 Kind::Union => {
1206 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1207 }
1208 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1209 _ => false,
1210 }
1211 }
1212
1213 #[inline]
1214 pub fn ends_with_ts(&self, node_id: NodeId) -> bool {
1215 if node_id.is_concat(self) {
1216 return self.ends_with_ts(node_id.right(self));
1217 }
1218 if node_id.is_lookahead(self) {
1219 let tail = self.get_lookahead_tail(node_id);
1220 if !tail.is_missing() {
1221 return self.ends_with_ts(tail);
1222 }
1223 }
1224 node_id == NodeId::TS
1225 }
1226
1227 pub fn ends_with_end(&self, node_id: NodeId) -> bool {
1228 if node_id.is_concat(self) {
1229 return self.ends_with_end(node_id.right(self));
1230 }
1231 node_id.is_end()
1232 }
1233
1234 pub fn is_begin_anchored(&self, node_id: NodeId) -> bool {
1235 if node_id == NodeId::BEGIN {
1236 return true;
1237 }
1238 match self.get_kind(node_id) {
1239 Kind::Concat => self.is_begin_anchored(node_id.left(self)),
1240 Kind::Union => {
1241 self.is_begin_anchored(node_id.left(self))
1242 && self.is_begin_anchored(node_id.right(self))
1243 }
1244 Kind::Lookahead => {
1245 if self.is_begin_anchored(self.get_lookahead_inner(node_id)) {
1246 return true;
1247 }
1248 let tail = self.get_lookahead_tail(node_id);
1249 !tail.is_missing() && self.is_begin_anchored(tail)
1250 }
1251 _ => false,
1252 }
1253 }
1254
1255
1256 pub fn ends_with_ts_any_branch(&self, node_id: NodeId) -> bool {
1257 if node_id == NodeId::TS {
1258 return true;
1259 }
1260 match self.get_kind(node_id) {
1261 Kind::Concat => self.ends_with_ts_any_branch(node_id.right(self)),
1262 Kind::Union => {
1263 self.ends_with_ts_any_branch(node_id.left(self))
1264 || self.ends_with_ts_any_branch(node_id.right(self))
1265 }
1266 Kind::Lookahead => {
1267 let tail = self.get_lookahead_tail(node_id);
1268 !tail.is_missing() && self.ends_with_ts_any_branch(tail)
1269 }
1270 _ => false,
1271 }
1272 }
1273
1274 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1275 debug_assert!(node_id != NodeId::MISSING);
1276 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1277 }
1278
1279 pub(crate) fn cache_der(
1280 &mut self,
1281 node_id: NodeId,
1282 result: TRegexId,
1283 mask: Nullability,
1284 ) -> TRegexId {
1285 if mask == Nullability::CENTER {
1286 self.tr_der_center[node_id.0 as usize] = result
1287 } else {
1288 self.tr_der_begin[node_id.0 as usize] = result
1289 };
1290 result
1291 }
1292
1293 pub(crate) fn try_cached_der(
1294 &mut self,
1295 node_id: NodeId,
1296 mask: Nullability,
1297 ) -> Option<TRegexId> {
1298 let cache = if mask == Nullability::CENTER {
1299 &mut self.tr_der_center
1300 } else {
1301 &mut self.tr_der_begin
1302 };
1303 match cache.get(node_id.0 as usize) {
1304 Some(&TRegexId::MISSING) => {}
1305 Some(&result) => {
1306 return Some(result);
1307 }
1308 None => {
1309 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1310 }
1311 }
1312 None
1313 }
1314
1315 pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1316 let mut term = self.get_tregex(der);
1317 loop {
1318 match *term {
1319 TRegex::Leaf(node_id) => return node_id,
1320 TRegex::ITE(cond, _then, _else) => {
1321 if self.solver().is_sat_id(set, cond) {
1322 term = self.get_tregex(_then);
1323 } else {
1324 term = self.get_tregex(_else);
1325 }
1326 }
1327 }
1328 }
1329 }
1330
1331 pub fn transition_term_uniform(&mut self, der: TRegexId, set: TSetId) -> Option<NodeId> {
1332 let mut term = self.get_tregex(der);
1333 loop {
1334 match *term {
1335 TRegex::Leaf(node_id) => return Some(node_id),
1336 TRegex::ITE(cond, _then, _else) => {
1337 let solver = self.solver();
1338 let hits_then = solver.is_sat_id(set, cond);
1339 let fully_in_then = solver.contains_id(cond, set);
1340 if fully_in_then {
1341 term = self.get_tregex(_then);
1342 } else if !hits_then {
1343 term = self.get_tregex(_else);
1344 } else {
1345 return None;
1346 }
1347 }
1348 }
1349 }
1350 }
1351
1352 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, ResharpError> {
1353 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1354 debug_assert!(
1355 node_id != NodeId::MISSING,
1356 "attempting to derive missing node"
1357 );
1358 if let Some(result) = self.try_cached_der(node_id, mask) {
1359 return Ok(result);
1360 }
1361
1362 let result = match node_id.kind(self) {
1363 Kind::Compl => {
1364 let leftd = node_id.left(self).der(self, mask)?;
1365 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1366 }
1367 Kind::Inter => {
1368 let leftd = node_id.left(self).der(self, mask)?;
1369 let rightd = node_id.right(self).der(self, mask)?;
1370 {
1371 let this = &mut *self;
1372 this.mk_binary(
1373 leftd,
1374 rightd,
1375 &mut (|b, left, right| b.mk_inter(left, right)),
1376 )
1377 }
1378 }
1379 Kind::Union => {
1380 let leftd = self.der(node_id.left(self), mask)?;
1381 let rightd = self.der(node_id.right(self), mask)?;
1382 {
1383 let this = &mut *self;
1384 this.mk_binary(
1385 leftd,
1386 rightd,
1387 &mut (|b, left, right| b.mk_union(left, right)),
1388 )
1389 }
1390 }
1391 Kind::Concat => {
1392 let head = node_id.left(self);
1393 let tail = node_id.right(self);
1394 let tail_leaf = self.mk_leaf(tail);
1395 let head_der = self.der(head, mask)?;
1396
1397 if self.is_nullable(head, mask) {
1398 let rightd = self.der(tail, mask)?;
1399 let ldr = {
1400 let this = &mut *self;
1401 this.mk_binary(
1402 head_der,
1403 tail_leaf,
1404 &mut (|b, left, right| b.mk_concat(left, right)),
1405 )
1406 };
1407 {
1408 let this = &mut *self;
1409 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1410 }
1411 } else {
1412 let this = &mut *self;
1413 this.mk_binary(
1414 head_der,
1415 tail_leaf,
1416 &mut (|b, left, right| b.mk_concat(left, right)),
1417 )
1418 }
1419 }
1420 Kind::Star => {
1421 if node_id == NodeId::EPS {
1422 TRegexId::BOT
1423 } else {
1424 let left = node_id.left(self);
1425 let r_decr_leaf = self.mk_leaf(node_id);
1426 let r_der = self.der(left, mask)?;
1427 let this = &mut *self;
1428 this.mk_binary(
1429 r_der,
1430 r_decr_leaf,
1431 &mut (|b, left, right| b.mk_concat(left, right)),
1432 )
1433 }
1434 }
1435 Kind::Lookbehind => {
1436 let lb_prev_der = {
1437 let lb_prev = self.get_lookbehind_prev(node_id);
1438 if lb_prev == NodeId::MISSING {
1439 TRegexId::MISSING
1440 } else {
1441 self.der(lb_prev, mask)?
1442 }
1443 };
1444 let lb_inner = self.get_lookbehind_inner(node_id);
1445 let lb_inner_der = self.der(lb_inner, mask)?;
1446 {
1447 let this = &mut *self;
1448 this.mk_binary_result(
1449 lb_inner_der,
1450 lb_prev_der,
1451 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1452 )?
1453 }
1454 }
1455 Kind::Lookahead => {
1456 let la_tail = self.get_lookahead_tail(node_id);
1457 let la_body = node_id.left(self);
1458 let rel = self.get_lookahead_rel(node_id);
1459 let cur_rels = if rel == u32::MAX {
1460 NullsId(u32::MAX)
1461 } else {
1462 NullsId(self.get_extra(node_id))
1463 };
1464
1465 if self.is_nullable(la_body, mask) {
1466 let right = node_id.right(self).missing_to_eps();
1468 let rder = self.der(right, mask).clone();
1469 return rder;
1470 }
1471
1472 if rel == u32::MAX {
1473 let la_body_der = self.der(la_body, mask)?;
1474 if la_tail.is_kind(self, Kind::Pred) {
1475 if let Some(transitioned) =
1476 self.transition_term_uniform(la_body_der, la_tail.pred_tset(self))
1477 {
1478 let new_la =
1479 self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1480 let concated = self.mk_concat(la_tail, new_la);
1481 return self.der(concated, mask);
1482 }
1483 }
1484 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1485 let left = la_tail.left(self);
1486 let tset = left.pred_tset(self);
1487 if let Some(transitioned) = self.transition_term_uniform(la_body_der, tset)
1488 {
1489 let new_la =
1490 self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1491 let tail_right = la_tail.right(self);
1492 let concated = self.mk_concat(new_la, tail_right);
1493 let concated = self.mk_concat(left, concated);
1494 return self.der(concated, mask);
1495 }
1496 }
1497 }
1498
1499 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1500 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1501 let concated = self.mk_concat(la_body, nulls_mask);
1502 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1503 let non_nulled = self.mk_non_nullable_safe(la_tail);
1504 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1505 let new_union = self.mk_union(concated_look, new_look);
1506 return self.der(new_union, mask);
1507 }
1508
1509 let la_tail_der = if la_tail == NodeId::MISSING {
1510 TRegexId::MISSING
1511 } else {
1512 if self.is_nullable(la_tail, mask) {
1513 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1514 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1515 let la_union = self.mk_union(la_tail, nulls_la);
1516 self.der(la_union, mask)?
1517 } else {
1518 self.der(la_tail, mask)?
1519 }
1520 };
1521
1522 let la_body_der = self.der(la_body, mask)?;
1523
1524 if rel != u32::MAX && rel > self.lookahead_context_max {
1525 return Err(ResharpError::AnchorLimit);
1526 }
1527
1528 let new_rels = if cur_rels.0 == u32::MAX {
1529 NullsId(u32::MAX)
1530 } else {
1531 self.mb.nb.add_rel(cur_rels, 1)
1532 };
1533 let single_rel = if new_rels.0 == u32::MAX {
1534 Some(u32::MAX)
1535 } else {
1536 let lo = self.mb.nb.min_rel(new_rels);
1537 let hi = self.mb.nb.max_rel(new_rels);
1538 if lo == hi { Some(hi) } else { None }
1539 };
1540
1541 let la = {
1542 let this = &mut *self;
1543 this.mk_binary(
1544 la_body_der,
1545 la_tail_der,
1546 &mut (|b, left, right| match single_rel {
1547 Some(r) => b.mk_lookahead_internal(left, right, r),
1548 None => b.mk_lookahead_nid(left, right, new_rels),
1549 }),
1550 )
1551 };
1552
1553 if rel != u32::MAX
1554 && la_tail_der != TRegexId::MISSING
1555 && self.is_nullable(la_tail, mask)
1556 {
1557 let look_only = {
1558 let this = &mut *self;
1559 let right = TRegexId::MISSING;
1560 this.mk_binary(
1561 la_body_der,
1562 right,
1563 &mut (|b, left, right| match single_rel {
1564 Some(r) => b.mk_lookahead_internal(left, right, r),
1565 None => b.mk_lookahead_nid(left, right, new_rels),
1566 }),
1567 )
1568 };
1569 {
1570 let this = &mut *self;
1571 this.mk_binary(
1572 look_only,
1573 la,
1574 &mut (|b, left, right| b.mk_union(left, right)),
1575 )
1576 }
1577 } else {
1578 la
1579 }
1580 }
1581 Kind::Ordered => {
1582 let body = node_id.left(self);
1583 let chain = node_id.right(self);
1584 let packed = self.get_extra(node_id);
1585 let step = (packed & 0xFFFF) as u16;
1586 let best = (packed >> 16) as u16;
1587
1588 let mid_best = if self.is_nullable(body, mask) && step >= best {
1589 step
1590 } else {
1591 best
1592 };
1593
1594 let body_der = self.der(body, mask)?;
1595 let new_step = step.saturating_add(1);
1596 self.mk_unary(body_der, &mut |b, new_body| {
1597 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1598 new_step
1599 } else {
1600 mid_best
1601 };
1602 let packed = (final_best as u32) << 16 | new_step as u32;
1603 b.mk_ordered(new_body, chain, packed)
1604 })
1605 }
1606 Kind::Begin | Kind::End => TRegexId::BOT,
1607 Kind::Pred => {
1608 let psi = node_id.pred_tset(self);
1609 if psi == TSetId::EMPTY {
1610 TRegexId::BOT
1611 } else {
1612 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1613 }
1614 }
1615 };
1616
1617 self.cache_der(node_id, result, mask);
1621 Ok(result)
1622 }
1623
1624 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1625 debug_assert!(meta_id != MetadataId::MISSING);
1626 match self.metadata.get_mut(node_id.0 as usize) {
1627 Some(v) => *v = meta_id,
1628 None => {
1629 self.metadata
1630 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1631 self.metadata[node_id.0 as usize] = meta_id;
1632 }
1633 }
1634 }
1635
1636 fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1637 debug_assert!(reversed_id != NodeId::MISSING);
1638 match self.reversed.get_mut(node_id.0 as usize) {
1639 Some(v) => *v = reversed_id,
1640 None => {
1641 self.reversed
1642 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1643 self.reversed[node_id.0 as usize] = reversed_id;
1644 }
1645 }
1646 return reversed_id;
1647 }
1648
1649 fn init(&mut self, inst: NodeKey) -> NodeId {
1650 self.num_created += 1;
1651 let node_id = NodeId(self.num_created);
1652 self.index.insert(inst.clone(), node_id);
1653 match inst.kind {
1654 Kind::Pred => {
1655 let meta_id = self.mb.get_meta_id(Metadata {
1656 flags: MetaFlags::ZERO,
1657 nulls: NullsId::EMPTY,
1658 });
1659 self.init_metadata(node_id, meta_id);
1660 }
1661 Kind::Begin => {
1662 let meta_id = self.mb.get_meta_id(Metadata {
1663 flags: MetaFlags::with_nullability(
1664 Nullability::BEGIN,
1665 MetaFlags::CONTAINS_ANCHORS,
1666 ),
1667 nulls: NullsId::BEGIN0,
1668 });
1669 self.init_metadata(node_id, meta_id);
1670 }
1671 Kind::End => {
1672 let meta_id = self.mb.get_meta_id(Metadata {
1673 flags: MetaFlags::with_nullability(
1674 Nullability::END,
1675 MetaFlags::CONTAINS_ANCHORS,
1676 ),
1677 nulls: NullsId::END0,
1678 });
1679 self.init_metadata(node_id, meta_id);
1680 }
1681 Kind::Inter => {
1682 let m1 = self.get_node_meta_id(inst.left);
1683 let m2 = self.get_node_meta_id(inst.right);
1684 let meta_id = {
1685 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1686 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1687 let nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1688 let new_meta = Metadata {
1689 flags: self.mb.flags_inter(m1, m2),
1690 nulls,
1691 };
1692 self.mb.get_meta_id(new_meta)
1693 };
1694 self.init_metadata(node_id, meta_id);
1695 }
1696 Kind::Union => {
1697 let m1 = self.get_node_meta_id(inst.left);
1698 let m2 = self.get_node_meta_id(inst.right);
1699 let meta_id = {
1700 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1701 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1702 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1703 let new_meta = Metadata {
1704 flags: self.mb.flags_union(m1, m2),
1705 nulls,
1706 };
1707 self.mb.get_meta_id(new_meta)
1708 };
1709 self.init_metadata(node_id, meta_id);
1710 }
1711 Kind::Concat => {
1712 let flags = self.mb.flags_concat(
1713 self.get_node_meta_id(inst.left),
1714 self.get_node_meta_id(inst.right),
1715 );
1716
1717 let right_nullability = inst.right.nullability(self);
1718 let left_nullability = inst.left.nullability(self);
1719 let nulls_left = self.get_nulls_id(inst.left);
1720 let nulls_right = self.get_nulls_id(inst.right);
1721 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1722 let mask = right_nullability.and(left_nullability);
1723 nulls = self.mb.nb.and_mask(nulls, mask);
1724
1725 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1726 self.init_metadata(node_id, new_id);
1727 }
1728 Kind::Star => {
1729 let left_nulls = self.get_nulls_id(inst.left);
1730 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1731 let meta_id = self.mb.get_meta_id(Metadata {
1732 flags: self
1733 .mb
1734 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1735 nulls,
1736 });
1737 self.init_metadata(node_id, meta_id);
1738 }
1739 Kind::Compl => {
1740 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1741 let meta_id = self.mb.get_meta_id(Metadata {
1742 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1743 nulls,
1744 });
1745 self.init_metadata(node_id, meta_id);
1746 }
1747 Kind::Lookbehind => {
1748 let mut left_nullability = self.nullability(inst.left);
1749 let mut contains_flags = self.get_flags_contains(inst.left);
1750 let nulls = if inst.right.is_missing() {
1751 self.get_nulls_id(inst.left)
1752 } else {
1753 let right_nullability = self.nullability(inst.right);
1754 let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1755 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1756 left_nullability = left_nullability.and(right_nullability);
1757 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1758 self.mb.nb.and_id(nulls_left, nulls_right)
1759 };
1760
1761 let meta_id = self.mb.get_meta_id(Metadata {
1762 flags: MetaFlags::with_nullability(
1763 left_nullability,
1764 contains_flags.or(MetaFlags::CONTAINS_LOOKBEHIND),
1765 ),
1766 nulls,
1767 });
1768 self.init_metadata(node_id, meta_id);
1769 }
1770 Kind::Lookahead => {
1771 let mut nulls = if inst.extra == u32::MAX {
1772 NullsId::EMPTY
1773 } else {
1774 let rels = NullsId(inst.extra);
1775 let body_nulls = self.get_nulls_id(inst.left);
1776 let rel_vals: Vec<u32> =
1777 self.mb.nb.get_set_ref(rels).iter().rev().map(|ns| ns.rel).collect();
1778 let mut combined = NullsId::EMPTY;
1779 for &rel in &rel_vals {
1780 let part = self.mb.nb.add_rel(body_nulls, rel);
1781 combined = self.mb.nb.or_id(combined, part);
1782 }
1783 combined
1784 };
1785 let left_nullability = inst.left.nullability(self);
1786 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1787
1788 nulls = self.mb.nb.or_id(nulls, nulls_right);
1789
1790 let la_inner = inst.left;
1791 let la_tail = inst.right;
1792 let null = self
1793 .get_meta_flags(la_inner)
1794 .nullability()
1795 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1796 let contains_flags = self
1797 .get_flags_contains(la_inner)
1798 .or(self.get_flags_contains(la_tail));
1799
1800 let meta_id = self.mb.get_meta_id(Metadata {
1801 flags: MetaFlags::with_nullability(
1802 null,
1803 contains_flags.or(MetaFlags::CONTAINS_LOOKAHEAD),
1804 ),
1805 nulls,
1806 });
1807 self.init_metadata(node_id, meta_id);
1808 }
1809 Kind::Ordered => {
1810 let best = inst.extra >> 16;
1811 let (null, nulls) = if best > 0 {
1812 let mut ns = BTreeSet::new();
1813 ns.insert(NullState::new(Nullability::CENTER, best));
1814 (Nullability::CENTER, self.mb.nb.get_id(ns))
1815 } else {
1816 (Nullability::NEVER, NullsId::EMPTY)
1817 };
1818 let meta_id = self.mb.get_meta_id(Metadata {
1819 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1820 nulls,
1821 });
1822 self.init_metadata(node_id, meta_id);
1823 }
1824 }
1825
1826 self.array.push(inst);
1827
1828 let nl = self.compute_node_len(node_id);
1829 debug_assert!(self.lengths.len() == node_id.0 as usize);
1830 self.lengths.push(nl);
1831
1832 if let Some(rw) = self.post_init_simplify(node_id) {
1833 self.override_as(node_id, rw)
1834 } else {
1835 node_id
1836 }
1837 }
1838
1839 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1840 match self.get_kind(node_id) {
1841 Kind::Concat => {
1842 let lhs = node_id.left(self);
1843 let rhs = node_id.right(self);
1844 if let Some(_) = lhs.is_pred_star(self) {
1845 if let Some(opttail) = rhs.is_opt_v(self) {
1846 let (_, max) = self.get_min_max_length(node_id);
1847 if max < u32::MAX {
1848 if let Some(true) = self.subsumes(lhs, opttail) {
1849 return Some(lhs);
1850 }
1851 }
1852 }
1853 }
1854 }
1855 Kind::Union => {
1856 let lhs = node_id.left(self);
1857 let rhs = node_id.right(self);
1858 let mut subsumed = false;
1859 rhs.iter_union_while(self, &mut |b, branch| {
1860 if b.nullable_subsumes(branch, lhs) {
1861 subsumed = true;
1862 }
1863 !subsumed
1864 });
1865 if subsumed {
1866 return Some(rhs);
1867 }
1868 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1869 return Some(rhs);
1870 }
1871 }
1872 _ => {}
1873 }
1874
1875 None
1876 }
1877
1878 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1880 if self.get_kind(lhs) != Kind::Union {
1881 return false; }
1883 let mut rhs_branches = Vec::new();
1884 let mut curr = rhs;
1885 while self.get_kind(curr) == Kind::Union {
1886 rhs_branches.push(self.get_left(curr));
1887 curr = self.get_right(curr);
1888 }
1889 rhs_branches.push(curr);
1890
1891 curr = lhs;
1892 while self.get_kind(curr) == Kind::Union {
1893 if !rhs_branches.contains(&self.get_left(curr)) {
1894 return false;
1895 }
1896 curr = self.get_right(curr);
1897 }
1898 rhs_branches.contains(&curr)
1899 }
1900
1901 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1903 if node == target {
1904 return true;
1905 }
1906 match self.get_kind(node) {
1907 Kind::Union => {
1908 self.nullable_subsumes(self.get_left(node), target)
1909 || self.nullable_subsumes(self.get_right(node), target)
1910 }
1911 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1912 self.nullable_subsumes(self.get_right(node), target)
1913 }
1914 _ => false,
1915 }
1916 }
1917
1918 pub fn num_nodes(&self) -> u32 {
1919 self.num_created
1920 }
1921
1922 pub fn tree_size(&self, root: NodeId, limit: usize) -> usize {
1923 let mut seen: FxHashMap<NodeId, ()> = FxHashMap::default();
1924 let mut stack: Vec<NodeId> = vec![root];
1925 while let Some(n) = stack.pop() {
1926 if n == NodeId::MISSING
1927 || n == NodeId::BOT
1928 || n == NodeId::EPS
1929 || n == NodeId::TS
1930 || n == NodeId::BEGIN
1931 || n == NodeId::END
1932 {
1933 continue;
1934 }
1935 if seen.insert(n, ()).is_some() {
1936 continue;
1937 }
1938 if seen.len() >= limit {
1939 return limit;
1940 }
1941 stack.push(self.get_left(n));
1942 stack.push(self.get_right(n));
1943 }
1944 seen.len()
1945 }
1946 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1947 match self.index.get(&inst) {
1948 Some(&id) => id,
1949 None => self.init(inst),
1950 }
1951 }
1952 #[inline]
1953 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1954 self.index.get(inst)
1955 }
1956
1957 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1958 self.index.insert(key, subsumed);
1959 subsumed
1960 }
1961
1962 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1963 let key = &self.array[key.0 as usize];
1964 let entry = self.index.get_mut(key).unwrap();
1965 *entry = subsumed;
1966 subsumed
1967 }
1968
1969 #[inline]
1970 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1971 self.array[node_id.0 as usize].left
1972 }
1973
1974 #[inline]
1975 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1976 self.array[node_id.0 as usize].right
1977 }
1978
1979 #[inline]
1980 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1981 self.array[node_id.0 as usize].extra
1982 }
1983
1984 #[inline]
1985 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1986 debug_assert!(node_id.is_concat(self));
1987 let mut curr = node_id;
1988 while curr.is_concat(self) {
1989 curr = curr.right(self);
1990 }
1991 curr
1992 }
1993
1994 fn has_trailing_la(&self, node: NodeId) -> bool {
1995 let end = match self.get_kind(node) {
1996 Kind::Concat => self.get_concat_end(node),
1997 Kind::Lookahead => node,
1998 _ => return false,
1999 };
2000 end.is_lookahead(self) && end.right(self).is_missing()
2001 }
2002
2003 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
2004 if node.is_lookahead(self) {
2005 return (NodeId::EPS, node);
2006 }
2007 debug_assert!(node.is_concat(self));
2008 let right = node.right(self);
2009 if !right.is_concat(self) {
2010 return (node.left(self), right);
2011 }
2012 let (stripped, la) = self.strip_trailing_la(right);
2013 (self.mk_concat(node.left(self), stripped), la)
2014 }
2015 #[inline]
2016 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
2017 debug_assert!(matches!(
2018 self.get_kind(lookahead_node_id),
2019 Kind::Lookahead | Kind::Ordered
2020 ));
2021 lookahead_node_id.left(self)
2022 }
2023 #[inline]
2024 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
2025 debug_assert!(lookahead_node_id.is_lookahead(self));
2026 lookahead_node_id.right(self)
2027 }
2028 #[inline]
2029 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
2030 debug_assert!(
2031 matches!(
2032 self.get_kind(lookahead_node_id),
2033 Kind::Lookahead | Kind::Ordered
2034 ),
2035 "not lookahead/counted: {:?}",
2036 self.pp(lookahead_node_id)
2037 );
2038 let extra = self.get_extra(lookahead_node_id);
2039 if extra == u32::MAX {
2040 u32::MAX
2041 } else {
2042 self.mb.nb.max_rel(NullsId(extra))
2043 }
2044 }
2045 #[inline]
2046 pub(crate) fn get_lookahead_rel_lo(&self, lookahead_node_id: NodeId) -> u32 {
2047 debug_assert!(lookahead_node_id.is_lookahead(self));
2048 let extra = self.get_extra(lookahead_node_id);
2049 if extra == u32::MAX {
2050 u32::MAX
2051 } else {
2052 self.mb.nb.min_rel(NullsId(extra))
2053 }
2054 }
2055 #[inline]
2056 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
2057 debug_assert!(lookbehind_node_id.is_lookbehind(self));
2058 lookbehind_node_id.left(self)
2059 }
2060 #[inline]
2061 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
2062 debug_assert!(lookbehind_node_id.is_lookbehind(self));
2063 lookbehind_node_id.right(self)
2064 }
2065
2066 #[inline]
2067 pub fn get_kind(&self, node_id: NodeId) -> Kind {
2068 debug_assert!(
2069 self.array.len() > node_id.0 as usize,
2070 "array len: {:?}",
2071 node_id
2072 );
2073 self.array[node_id.0 as usize].kind
2074 }
2075
2076 #[inline]
2077 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
2078 &self.array[node_id.0 as usize]
2079 }
2080
2081 #[inline]
2082 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
2083 self.metadata[node_id.0 as usize]
2084 }
2085
2086 #[inline]
2087 fn get_meta(&self, node_id: NodeId) -> &Metadata {
2088 debug_assert!(node_id.0 != u32::MAX);
2089 let meta_id = self.get_node_meta_id(node_id);
2090 debug_assert!(meta_id != MetadataId::MISSING);
2091 &self.mb.array[meta_id.0 as usize]
2092 }
2093
2094 #[inline]
2095 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
2096 if node_id == NodeId::MISSING {
2097 NullsId::EMPTY
2098 } else {
2099 self.get_meta(node_id).nulls
2100 }
2101 }
2102
2103 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
2104 self.mb
2105 .nb
2106 .array
2107 .iter()
2108 .map(|set| set.iter().cloned().collect())
2109 .collect()
2110 }
2111
2112 pub fn nulls_count(&self) -> usize {
2113 self.mb.nb.array.len()
2114 }
2115
2116 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
2117 self.mb.nb.array[id as usize].iter().cloned().collect()
2118 }
2119
2120 pub fn center_nulls_id(&mut self, nid: NullsId) -> NullsId {
2121 self.mb.nb.and_mask(nid, Nullability::CENTER)
2122 }
2123
2124 #[inline]
2125 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
2126 if node_id == NodeId::MISSING {
2127 NullsId::EMPTY
2128 } else {
2129 let nulls = self.get_meta(node_id).nulls;
2130 self.mb.nb.and_mask(nulls, mask)
2131 }
2132 }
2133
2134 #[inline]
2135 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
2136 let meta_id = self.get_node_meta_id(node_id);
2137 let meta = &self.mb.array[meta_id.0 as usize];
2138 meta.flags
2139 }
2140
2141 #[inline]
2142 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
2143 self.get_meta(node_id).flags.nullability()
2144 }
2145
2146 #[inline]
2147 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
2148 let meta_id = self.get_node_meta_id(node_id);
2149 let meta = &self.mb.array[meta_id.0 as usize];
2150 meta.flags.all_contains_flags()
2151 }
2152
2153 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2154 if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2155 return self.strip_lb(node_id.right(self));
2156 }
2157 let result = self.strip_lb_inner(true, node_id)?;
2158 if self.contains_lookbehind(result) {
2159 return Err(ResharpError::UnsupportedPattern);
2160 }
2161 Ok(result)
2162 }
2163
2164 fn strip_lb_inner(&mut self, allowed: bool, node_id: NodeId) -> Result<NodeId, ResharpError> {
2165 if !self.contains_lookbehind(node_id) {
2166 return Ok(node_id);
2167 }
2168 if !allowed {
2169 return Result::Err(ResharpError::UnsupportedPattern);
2170 }
2171 if node_id.is_inter(self) {
2172 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2173 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2174 return Ok(self.mk_inter(left, right));
2175 }
2176 if node_id.is_union(self) {
2177 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2178 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2179 return Ok(self.mk_union(left, right));
2180 }
2181 match self.get_kind(node_id) {
2182 Kind::Compl => Result::Err(ResharpError::UnsupportedPattern),
2183 Kind::Concat => {
2184 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2185 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2186 Ok(self.mk_concat(left, right))
2187 }
2188 Kind::Lookbehind => {
2189 let prev = self.get_lookbehind_prev(node_id);
2190 if prev != NodeId::MISSING {
2191 self.strip_lb_inner(allowed, prev)
2192 } else {
2193 Ok(NodeId::EPS)
2194 }
2195 }
2196 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => Ok(node_id),
2197 _ => Ok(node_id),
2198 }
2199 }
2200
2201 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
2203 if !self.contains_anchors(node_id) {
2204 return node_id;
2205 }
2206 match self.get_kind(node_id) {
2207 Kind::Begin => NodeId::BOT,
2208 Kind::Concat => {
2209 let left = self.nonbegins(node_id.left(self));
2210 if left == NodeId::BOT {
2211 return NodeId::BOT;
2212 }
2213 self.mk_concat(left, node_id.right(self))
2214 }
2215 Kind::Union => {
2216 let left = self.nonbegins(node_id.left(self));
2217 let right = self.nonbegins(node_id.right(self));
2218 self.mk_union(left, right)
2219 }
2220 _ => node_id,
2221 }
2222 }
2223
2224 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
2225 match self.get_kind(node_id) {
2226 Kind::Concat => {
2227 let head = node_id.left(self);
2228 match self.get_kind(head) {
2229 _ if self.any_nonbegin_nullable(head) => {
2230 self.strip_prefix_safe(node_id.right(self))
2231 }
2232 _ => node_id,
2233 }
2234 }
2235 _ => node_id,
2236 }
2237 }
2238 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
2239 match self.get_kind(node_id) {
2240 Kind::Begin => NodeId::BOT,
2241 Kind::Concat => {
2242 let head = self.prune_begin(node_id.left(self));
2243 let tail = self.prune_begin(node_id.right(self));
2244 self.mk_concat(head, tail)
2245 }
2246 Kind::Lookbehind => {
2247 if !node_id.right(self).is_missing() {
2248 return node_id;
2249 }
2250 let head = self.prune_begin(node_id.left(self));
2251 head
2252 }
2253 Kind::Union => {
2254 let left = self.prune_begin(node_id.left(self));
2255 let right = self.prune_begin(node_id.right(self));
2256 self.mk_union(left, right)
2257 }
2258 _ => node_id,
2259 }
2260 }
2261 pub fn prune_begin_eps(&mut self, node_id: NodeId) -> NodeId {
2262 match self.get_kind(node_id) {
2263 Kind::Begin => NodeId::EPS,
2264 Kind::Concat => {
2265 let head = self.prune_begin_eps(node_id.left(self));
2266 let tail = self.prune_begin_eps(node_id.right(self));
2267 self.mk_concat(head, tail)
2268 }
2269 Kind::Lookbehind => {
2270 if !node_id.right(self).is_missing() {
2271 return node_id;
2272 }
2273 let head = self.prune_begin_eps(node_id.left(self));
2274 head
2275 }
2276 Kind::Union => {
2277 let left = self.prune_begin_eps(node_id.left(self));
2278 let right = self.prune_begin_eps(node_id.right(self));
2279 self.mk_union(left, right)
2280 }
2281 _ => node_id,
2282 }
2283 }
2284
2285 pub fn normalize_rev(&mut self, node_id: NodeId, depth: usize) -> Result<NodeId, ResharpError> {
2286 if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2287 return Ok(node_id);
2288 }
2289 let next_depth = depth + 1;
2290 let result = match self.get_kind(node_id) {
2291 Kind::Concat => {
2292 let left = self.normalize_rev(node_id.left(self), depth)?;
2293 let right = self.normalize_rev(node_id.right(self), next_depth)?;
2294 self.mk_concat(left, right)
2295 }
2296 Kind::Inter => {
2297 let left = self.normalize_rev(node_id.left(self), depth)?;
2298 let right = self.normalize_rev(node_id.right(self), depth)?;
2299 self.mk_inter(left, right)
2300 }
2301 Kind::Union => {
2302 let left = self.normalize_rev(node_id.left(self), depth)?;
2303 let right = self.normalize_rev(node_id.right(self), depth)?;
2304 self.mk_union(left, right)
2305 }
2306 Kind::Lookbehind if depth == 0 => {
2307 let left = self.normalize_rev(node_id.left(self), depth)?;
2308 let right = self.normalize_rev(node_id.right(self).missing_to_eps(), depth)?;
2309 let lbody_ts = self.mk_concat(NodeId::TS, left);
2310 let ltail_ts = self.mk_concat(NodeId::TS, right);
2311 self.mk_inter(lbody_ts, ltail_ts)
2312 }
2313 Kind::Lookbehind => {
2314 let left = self.normalize_rev(node_id.left(self), depth)?;
2315 let right = self.normalize_rev(node_id.right(self), depth)?;
2316 self.mk_lookbehind(left, right)
2317 }
2318 Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2319 return Err(ResharpError::UnsupportedPattern)
2320 }
2321 _ => node_id,
2322 };
2323 Ok(result)
2324 }
2325
2326 pub fn collect_der_targets(
2327 &mut self,
2328 der: TRegexId,
2329 path_set: TSetId,
2330 out: &mut Vec<(NodeId, TSetId)>,
2331 ) {
2332 match *self.get_tregex(der) {
2333 TRegex::Leaf(target) => {
2334 if let Some(entry) = out.iter_mut().find(|(t, _)| *t == target) {
2335 entry.1 = self.solver().or_id(entry.1, path_set);
2336 } else {
2337 out.push((target, path_set));
2338 }
2339 }
2340 TRegex::ITE(cond, then_b, else_b) => {
2341 let then_path = self.solver().and_id(path_set, cond);
2342 self.collect_der_targets(then_b, then_path, out);
2343 let not_cond = self.solver().not_id(cond);
2344 let else_path = self.solver().and_id(path_set, not_cond);
2345 self.collect_der_targets(else_b, else_path, out);
2346 }
2347 }
2348 }
2349
2350 pub fn prefix_ts(&mut self, node_id: NodeId) -> NodeId {
2351 let with_ts = if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2352 node_id
2353 } else {
2354 self.mk_concat(NodeId::TS, node_id)
2355 };
2356 with_ts
2357 }
2358
2359 pub fn ts_rev_start(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2360 let rev = self.reverse(node_id)?;
2361 let rev = self.normalize_rev(rev, 0)?;
2362 let with_ts = self.prefix_ts(rev);
2363 Ok(self.simplify_rev_initial(with_ts))
2364 }
2365
2366 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2367 debug_assert!(node_id != NodeId::MISSING);
2368 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
2369 if *rev != NodeId::MISSING {
2370 return Ok(*rev);
2371 }
2372 }
2373 let rw = match self.get_kind(node_id) {
2374 Kind::End => NodeId::BEGIN,
2375 Kind::Begin => NodeId::END,
2376 Kind::Pred => node_id,
2377 Kind::Concat => {
2378 let new_right = self.reverse(node_id.left(self))?;
2379 let new_left = self.reverse(node_id.right(self))?;
2380 if new_right.is_union(self) {
2381 let union = new_right;
2382 if union.left(self).contains_lookbehind(self)
2383 || union.right(self).contains_lookbehind(self)
2384 {
2385 if new_left.is_ts() {
2386 self.mk_concat(new_left, new_right)
2387 } else {
2388 #[cfg(feature = "debug")]
2389 {
2390 eprintln!(
2391 "left: {:?}, right: {:?}",
2392 self.pp(new_left),
2393 self.pp(union)
2394 );
2395 }
2396 return Err(ResharpError::UnsupportedPattern);
2397 }
2398 } else {
2399 self.mk_concat(new_left, new_right)
2400 }
2401 } else if node_id.right(self).is_union(self) {
2402 let union = node_id.right(self);
2405 if union.left(self).contains_lookbehind(self)
2406 || union.right(self).contains_lookbehind(self)
2407 {
2408 if new_left.is_ts() {
2409 self.mk_concat(new_left, new_right)
2410 } else {
2411 return Err(ResharpError::UnsupportedPattern);
2412 }
2413 } else {
2414 self.mk_concat(new_left, new_right)
2415 }
2416 } else {
2417 self.mk_concat(new_left, new_right)
2418 }
2419 }
2420 Kind::Union => {
2421 let left = self.reverse(node_id.left(self))?;
2422 let right = self.reverse(node_id.right(self))?;
2423 self.mk_union(left, right)
2424 }
2425 Kind::Inter => {
2426 let left = self.reverse(node_id.left(self))?;
2427 let right = self.reverse(node_id.right(self))?;
2428 self.mk_inter(left, right)
2429 }
2430 Kind::Star => {
2431 let body = self.reverse(node_id.left(self))?;
2432 self.mk_star(body)
2433 }
2434 Kind::Compl => {
2435 if self.contains_look(node_id.left(self)) {
2436 return Err(ResharpError::UnsupportedPattern);
2437 }
2438 let body = self.reverse(node_id.left(self))?;
2439 self.mk_compl(body)
2440 }
2441 Kind::Lookbehind => {
2442 let prev = self.get_lookbehind_prev(node_id);
2443 let inner_id = self.get_lookbehind_inner(node_id);
2444 let rev_inner = self.reverse(inner_id)?;
2445 let rev_prev = if prev != NodeId::MISSING {
2446 self.reverse(prev)?
2447 } else {
2448 NodeId::MISSING
2449 };
2450 self.mk_lookahead(rev_inner, rev_prev, 0)
2451 }
2452 Kind::Lookahead => {
2453 let rel = self.get_lookahead_rel(node_id);
2454 if rel == u32::MAX {
2456 let lbody = self.get_lookahead_inner(node_id);
2457 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2458 let rbody = self.reverse(lbody);
2459 let rtail = self.reverse(ltail);
2460 let rev = self.mk_lookbehind(rbody?, rtail?);
2461 return Ok(self.cache_reversed(node_id, rev));
2462 }
2463 if rel != 0 {
2464 return Err(ResharpError::UnsupportedPattern);
2465 }
2466 let tail_node = self.get_lookahead_tail(node_id);
2467 let rev_tail = if tail_node != NodeId::MISSING {
2468 self.reverse(tail_node)?
2469 } else {
2470 NodeId::MISSING
2471 };
2472 let inner_id = self.get_lookahead_inner(node_id);
2473 let rev_inner = self.reverse(inner_id)?;
2474 self.mk_lookbehind(rev_inner, rev_tail)
2475 }
2476 Kind::Ordered => {
2477 return Err(ResharpError::UnsupportedPattern);
2478 }
2479 };
2480 self.cache_reversed(node_id, rw);
2482 Ok(rw)
2483 }
2484
2485 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2486 let node = NodeKey {
2487 kind: Kind::Pred,
2488 left: NodeId::MISSING,
2489 right: NodeId::MISSING,
2490 extra: pred.0,
2491 };
2492 self.get_node_id(node)
2493 }
2494
2495 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2496 let key = NodeKey {
2497 kind: Kind::Compl,
2498 left: body,
2499 right: NodeId::MISSING,
2500 extra: u32::MAX,
2501 };
2502 if let Some(id) = self.key_is_created(&key) {
2503 return *id;
2504 }
2505 if body == NodeId::BOT {
2506 return NodeId::TS;
2507 }
2508 if body == NodeId::TS {
2509 return NodeId::BOT;
2510 }
2511
2512 if let Some(contains_body) = body.is_contains(self) {
2513 if contains_body.is_pred(self) {
2514 let pred = contains_body.pred_tset(self);
2515 let notpred = self.mk_pred_not(pred);
2516 let node = self.mk_star(notpred);
2517 return self.init_as(key, node);
2518 } else if contains_body == NodeId::END {
2519 return self.init_as(key, NodeId::BOT);
2520 }
2521 };
2522
2523 if self.get_kind(body) == Kind::Compl {
2524 return self.get_node(body).left;
2525 }
2526
2527 self.get_node_id(key)
2528 }
2529
2530 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2531 let nid = self.get_nulls_id(body);
2532 let nref = self.mb.nb.get_set_ref(nid).clone();
2533 let mut futures = NodeId::BOT;
2534 for n in nref.iter() {
2535 if !n.is_mask_nullable(mask) {
2536 continue;
2537 }
2538
2539 let eff = if n.rel == 0 {
2540 NodeId::EPS
2541 } else {
2542 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2543 };
2544 futures = self.mk_union(futures, eff)
2545 }
2546 futures
2547 }
2548
2549 fn strip_ts_max_len(&self, node: NodeId) -> Option<u32> {
2551 let mut cur = node;
2552 let mut total: u32 = 0;
2553 loop {
2554 if !cur.is_concat(self) {
2555 return None;
2556 }
2557 let r = cur.right(self);
2558 let (_, lmax) = self.get_min_max_length(cur.left(self));
2559 if lmax == u32::MAX {
2560 return None;
2561 }
2562 total = total.saturating_add(lmax);
2563 if r == NodeId::TS {
2564 return Some(total);
2565 }
2566 cur = r;
2567 }
2568 }
2569
2570 fn peel_head_pred(&self, node: NodeId) -> Option<(TSetId, NodeId)> {
2571 if node.is_pred(self) {
2572 Some((node.pred_tset(self), NodeId::EPS))
2573 } else if node.is_concat(self) && node.left(self).is_pred(self) {
2574 Some((node.left(self).pred_tset(self), node.right(self)))
2575 } else {
2576 None
2577 }
2578 }
2579
2580 fn strip_end_from_la_head(&mut self, node: NodeId) -> NodeId {
2582 let (head, rest) = if node.is_concat(self) {
2583 (node.left(self), node.right(self))
2584 } else {
2585 (node, NodeId::EPS)
2586 };
2587 if !head.is_kind(self, Kind::Union) {
2588 return node;
2589 }
2590 let l = head.left(self);
2591 if !l.is_end() {
2592 return node;
2593 }
2594 let r = head.right(self);
2595 self.mk_concat(r, rest)
2596 }
2597
2598 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2599 if cfg!(feature = "norewrite") {
2600 return None;
2601 }
2602 if tail.is_lookbehind(self) {
2603 let tail_prev = self.get_lookbehind_prev(tail);
2604 if head.is_end() && (tail_prev.is_missing()) {
2606 return Some(self.mk_concat(tail, head));
2607 }
2608 if head.is_lookahead(self)
2609 && self.get_lookahead_rel(head) == 0
2610 && self.get_lookahead_tail(head).is_missing()
2611 && (tail_prev == NodeId::MISSING || tail_prev == NodeId::EPS)
2612 {
2613 return Some(self.mk_concat(tail, head));
2614 }
2615 if head.is_lookbehind(self) {
2617 let head_prev = self.get_lookbehind_prev(head);
2618 if (head_prev.is_missing()) && (tail_prev.is_missing()) {
2619 let head_inner = self.get_lookbehind_inner(head).missing_to_eps();
2620 let tail_inner = self.get_lookbehind_inner(tail).missing_to_eps();
2621 let merged = self.mk_inter(head_inner, tail_inner);
2622 return self.mk_lookbehind_internal(merged, NodeId::MISSING).ok();
2623 }
2624 }
2625 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2626 return self
2627 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2628 .ok();
2629 }
2630 if head.is_lookahead(self) {
2631 let la_tail = self.get_lookahead_tail(head);
2632 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2633 let la_body = self.get_lookahead_inner(head);
2634 let la_body = if new_la_tail.is_never_nullable(self) {
2636 self.strip_end_from_la_head(la_body)
2637 } else {
2638 la_body
2639 };
2640 if la_body == NodeId::BOT {
2641 return Some(NodeId::BOT);
2642 }
2643 if let (Some((p_l, body_rest)), Some((p_b, tail_rest))) = (
2644 self.peel_head_pred(la_body),
2645 self.peel_head_pred(new_la_tail),
2646 ) {
2647 let p = self.solver().and_id(p_l, p_b);
2648 let merged = self.mk_pred(p);
2649 if merged == NodeId::BOT {
2650 return Some(NodeId::BOT);
2651 }
2652 let new_la = if body_rest == NodeId::EPS {
2653 NodeId::EPS
2654 } else {
2655 self.mk_lookahead(body_rest, NodeId::MISSING, 0)
2656 };
2657 let after = self.mk_concat(new_la, tail_rest);
2658 return Some(self.mk_concat(merged, after));
2659 }
2660
2661 if la_body.is_concat(self)
2662 && la_body.right(self).is_end()
2663 && la_body.left(self).is_compl(self)
2664 {
2665 let not_p_ts = la_body.left(self); if let Some(p_max) = self.strip_ts_max_len(not_p_ts.left(self)) {
2667 let (tail_min, _) = self.get_min_max_length(new_la_tail);
2668 if p_max <= tail_min {
2669 return Some(self.mk_inter(not_p_ts, new_la_tail));
2670 }
2671 }
2672 }
2673
2674 if new_la_tail.is_center_nullable(self) {
2675 let la_new = self.mk_lookahead_internal(la_body, new_la_tail, u32::MAX);
2676 return Some(la_new);
2677 }
2678 let la_rel = self.get_lookahead_rel(head);
2679 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2680 let tail_rel = self.get_lookahead_rel(new_la_tail);
2681 tail_rel.saturating_add(la_rel)
2682 } else {
2683 u32::MAX
2684 };
2685
2686 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2687 }
2688
2689 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2690 return Some(head);
2691 }
2692
2693 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2694 return Some(NodeId::TS);
2695 }
2696
2697 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2698 return Some(NodeId::TS);
2699 }
2700
2701 if head.is_inter(self) && tail == NodeId::TS {
2702 let mut alltopstar = true;
2703 head.iter_inter(
2704 self,
2705 &mut (|b, v| {
2706 alltopstar = b.ends_with_ts(v);
2707 }),
2708 );
2709
2710 if alltopstar {
2711 return Some(head);
2712 }
2713 }
2714
2715 if head.is_star(self) && head == tail {
2716 return Some(head);
2717 }
2718
2719 None
2720 }
2721
2722 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2723 #[cfg(feature = "reentrant-assert")]
2724 if !self.rw_active.insert((Kind::Union, left, right)) {
2725 #[cfg(debug_assertions)]
2726 panic!(
2727 "reentrant union rewrite {:?} | {:?}, this is a bug, please file an issue with the pattern",
2728 self.pp(left),
2729 self.pp(right)
2730 );
2731 #[cfg(not(debug_assertions))]
2732 return None;
2733 }
2734 let r = self.attempt_rw_union_2_inner(left, right);
2735 #[cfg(feature = "reentrant-assert")]
2736 self.rw_active.remove(&(Kind::Union, left, right));
2737 r
2738 }
2739
2740 fn attempt_rw_union_2_inner(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2741 use Kind::*;
2742
2743 if cfg!(feature = "norewrite") {
2744 return None;
2745 }
2746 if left == right {
2747 return Some(left);
2748 }
2749
2750 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2751 return Some(left);
2752 }
2753 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2754 return Some(right);
2755 }
2756
2757 if left.is_pred(self) && right.is_pred(self) {
2758 let l = left.pred_tset(self);
2759 let r = right.pred_tset(self);
2760 let solver = self.solver();
2761 let psi = solver.or_id(l, r);
2762 let rewrite = self.mk_pred(psi);
2763 return Some(rewrite);
2764 }
2765
2766 if left == NodeId::EPS && self.nullability(right) == Nullability::ALWAYS {
2767 let right_nulls_id = self.get_nulls_id(right);
2768 if self.mb.nb.array[right_nulls_id.0 as usize]
2769 .iter()
2770 .any(|n| n.rel == 0 && n.mask.has(Nullability::ALWAYS))
2771 {
2772 return Some(right);
2773 }
2774 }
2775
2776 if left.is_lookahead(self) && right.is_lookahead(self) {
2777 let lb = left.left(self);
2778 let lt = left.right(self);
2779 let lrel_extra = left.extra(self);
2780
2781 let rb = right.left(self);
2782 let rt = right.right(self);
2783 let rrel_extra = right.extra(self);
2784
2785 if lrel_extra == rrel_extra && lt.is_missing() && rt.is_missing() {
2786 let unioned = self.mk_union(lb, rb);
2787 let node = if lrel_extra == u32::MAX {
2788 self.mk_lookahead_internal(unioned, NodeId::MISSING, u32::MAX)
2789 } else {
2790 let rels = NullsId(lrel_extra);
2791 let lo = self.mb.nb.min_rel(rels);
2792 let hi = self.mb.nb.max_rel(rels);
2793 if lo == hi {
2794 self.mk_lookahead_internal(unioned, NodeId::MISSING, hi)
2795 } else {
2796 self.mk_lookahead_nid(unioned, NodeId::MISSING, rels)
2797 }
2798 };
2799 return Some(node);
2800 }
2801 }
2802
2803 if right.is_kind(self, Concat) {
2804 if left == NodeId::END
2805 && right.left(self) == NodeId::END
2806 && self.nullability(right).has(Nullability::END)
2807 {
2808 return Some(right);
2809 }
2810 if left == right.left(self) {
2812 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2813 let rw = self.mk_concat(left, rhs);
2814 return Some(rw);
2815 }
2816 if left.is_kind(self, Concat) {
2817 let head1 = left.left(self);
2818 let head2 = right.left(self);
2819
2820 if head1 == head2 {
2821 let t1 = left.right(self);
2822 let t2 = right.right(self);
2823 if head1 == NodeId::TS {
2825 if t2.has_concat_tail(self, t1) {
2826 return Some(left);
2827 }
2828 if t1.has_concat_tail(self, t2) {
2829 return Some(right);
2830 }
2831 }
2832 let un = self.mk_union(t1, t2);
2833 return Some(self.mk_concat(left.left(self), un));
2834 }
2835
2836 {
2838 let t1 = left.right(self);
2839 let t2 = right.right(self);
2840 if t1 == t2 {
2841 let begin_lb = (head1 == NodeId::BEGIN
2842 && head2.is_kind(self, Kind::Lookbehind))
2843 || (head2 == NodeId::BEGIN && head1.is_kind(self, Kind::Lookbehind));
2844 if begin_lb {
2845 let un = self.mk_union(head1, head2);
2846 return Some(self.mk_concat(un, t1));
2847 }
2848 }
2849 }
2850
2851 if false {
2855 let end1 = self.get_concat_end(left);
2856 let end2 = self.get_concat_end(right);
2857 if end1 == end2 {
2858 let contains_lookarounds =
2859 left.contains_lookaround(self) || right.contains_lookaround(self);
2860 let flags = left.flags_contains(self).or(right.flags_contains(self));
2861 if !contains_lookarounds && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2862 let rev1 = self.reverse(left).unwrap();
2863 let rev2 = self.reverse(right).unwrap();
2864
2865 let union_rev = self.mk_union(rev1, rev2);
2866 return Some(self.reverse(union_rev).unwrap());
2867 }
2868 }
2869 }
2870 }
2871 if left.is_pred(self) && left == right.left(self) {
2872 let un = self.mk_opt(right.right(self));
2873 let conc = self.mk_concat(left, un);
2874 return Some(conc);
2875 }
2876 }
2877
2878 if left == NodeId::EPS && right.is_plus(self) {
2879 let result = self.mk_star(right.left(self));
2880 return Some(result);
2881 }
2882
2883 if left.is_inter(self) && right.is_inter(self) {
2885 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2886 return Some(rw);
2887 }
2888 }
2889
2890 None
2891 }
2892
2893 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2894 let mut curr = node;
2895 while curr.is_inter(self) {
2896 out.push(self.get_left(curr));
2897 curr = self.get_right(curr);
2898 }
2899 out.push(curr);
2900 }
2901
2902 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2903 let mut curr = node;
2904 let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2905 if has_prefix {
2906 curr = self.get_right(curr);
2907 }
2908 let mut count = 0u32;
2909 let mut pred_set = None;
2910 while curr.is_concat(self) {
2911 let head = self.get_left(curr);
2912 if !head.is_pred(self) {
2913 return None;
2914 }
2915 let ps = head.pred_tset(self);
2916 match pred_set {
2917 None => pred_set = Some(ps),
2918 Some(existing) => {
2919 if existing != ps {
2920 return None;
2921 }
2922 }
2923 }
2924 curr = self.get_right(curr);
2925 count += 1;
2926 }
2927 if count == 0 || !curr.is_star(self) {
2928 return None;
2929 }
2930 Some((has_prefix, pred_set.unwrap(), curr, count))
2931 }
2932
2933 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2934 let mut j = 0;
2935 for &s in sub {
2936 while j < sup.len() && sup[j] < s {
2937 j += 1;
2938 }
2939 if j >= sup.len() || sup[j] != s {
2940 return false;
2941 }
2942 j += 1;
2943 }
2944 true
2945 }
2946
2947 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2948 if !left.is_inter(self) || !right.is_inter(self) {
2949 return None;
2950 }
2951
2952 let mut lc: Vec<NodeId> = Vec::new();
2953 let mut rc: Vec<NodeId> = Vec::new();
2954 self.collect_inter_components(left, &mut lc);
2955 self.collect_inter_components(right, &mut rc);
2956
2957 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2959 return Some(left);
2960 }
2961 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2963 return Some(right);
2964 }
2965
2966 if lc.len() == rc.len() {
2967 let mut diff_idx = None;
2968 for i in 0..lc.len() {
2969 if lc[i] != rc[i] {
2970 if diff_idx.is_some() {
2971 return None;
2972 }
2973 diff_idx = Some(i);
2974 }
2975 }
2976 if let Some(idx) = diff_idx {
2977 let a = lc[idx];
2978 let b = rc[idx];
2979 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2980 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2981 {
2982 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2983 return if ca < cb { Some(left) } else { Some(right) };
2984 }
2985 }
2986 }
2987 }
2988
2989 None
2990 }
2991
2992 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2993 #[cfg(feature = "reentrant-assert")]
2994 if !self.rw_active.insert((Kind::Inter, left, right)) {
2995 #[cfg(debug_assertions)]
2996 panic!(
2997 "reentrant inter rewrite {:?} & {:?}, this is a bug, please file an issue with the pattern",
2998 self.pp(left),
2999 self.pp(right)
3000 );
3001 #[cfg(not(debug_assertions))]
3002 return None;
3003 }
3004 let r = self.attempt_rw_inter_2_inner(left, right);
3005 #[cfg(feature = "reentrant-assert")]
3006 self.rw_active.remove(&(Kind::Inter, left, right));
3007 r
3008 }
3009
3010 fn attempt_rw_inter_2_inner(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
3011 if cfg!(feature = "norewrite") {
3012 return None;
3013 }
3014 if left == right {
3015 return Some(left);
3016 }
3017 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
3018 return Some(right);
3019 }
3020 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
3021 return Some(left);
3022 }
3023
3024 {
3025 let (l_min, l_max) = self.get_min_max_length(left);
3026 let (r_min, r_max) = self.get_min_max_length(right);
3027 if l_max < r_min || r_max < l_min {
3028 return Some(NodeId::BOT);
3029 }
3030 }
3031
3032 if left.is_pred(self) && right.is_pred(self) {
3033 let l = left.pred_tset(self);
3034 let r = right.pred_tset(self);
3035 let solver = self.solver();
3036 let psi = solver.and_id(l, r);
3037 let rewrite = self.mk_pred(psi);
3038 return Some(rewrite);
3039 }
3040
3041 for (a, b) in [(left, right), (right, left)] {
3042 if a.is_pred(self) && b.is_compl(self) {
3043 let cbody = b.left(self);
3044 if cbody.is_concat(self)
3045 && cbody.right(self) == NodeId::TS
3046 && cbody.left(self).is_pred(self)
3047 {
3048 let q = a.pred_tset(self);
3049 let p = cbody.left(self).pred_tset(self);
3050 let solver = self.solver();
3051 let notp = solver.not_id(p);
3052 let anded = solver.and_id(q, notp);
3053 return Some(self.mk_pred(anded));
3054 }
3055 }
3056 }
3057
3058 if left.is_concat(self) && right.is_concat(self) {
3059 if left.left(self) == right.left(self) {
3060 if left.left(self).is_pred(self) {
3061 let new_right = self.mk_inter(left.right(self), right.right(self));
3062 return Some(self.mk_concat(left.left(self), new_right));
3063 }
3064 }
3065 }
3066
3067 if right.is_compl(self) && right.left(self) == left {
3068 return Some(NodeId::BOT);
3069 }
3070
3071 if left.is_compl(self) && right.is_compl(self) {
3072 let bodies = self.mk_union(left.left(self), right.left(self));
3073 return Some(self.mk_compl(bodies));
3074 }
3075
3076 if left == NodeId::TOPPLUS {
3077 if right.is_pred_star(self).is_some() {
3078 let newloop = self.mk_plus(right.left(self));
3079 return Some(newloop);
3080 }
3081 if right.is_never_nullable(self) {
3082 return Some(right);
3083 }
3084 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
3085 return Some(NodeId::BOT);
3086 }
3087 if right.is_kind(self, Kind::Concat) {}
3088 }
3089
3090 {
3091 let l_is_la = left.is_lookahead(self);
3092 let r_is_la = right.is_lookahead(self);
3093 let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
3094 let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
3095 if l_is_la || r_is_la || l_is_cla || r_is_cla {
3096 let (la_node, other, concat_body) = if r_is_la {
3097 (right, left, NodeId::MISSING)
3098 } else if l_is_la {
3099 (left, right, NodeId::MISSING)
3100 } else if r_is_cla {
3101 (right.left(self), left, right.right(self))
3102 } else {
3103 (left.left(self), right, left.right(self))
3104 };
3105 let la_body = la_node.left(self);
3106 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
3107 let inter_right = if concat_body.is_missing() {
3108 la_tail
3109 } else {
3110 self.mk_concat(la_tail, concat_body)
3111 };
3112 let new_body = self.mk_inter(other, inter_right);
3113 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
3114 return Some(self.mk_concat(la, new_body));
3115 }
3116 }
3117
3118 if self.get_kind(right) == Kind::Compl {
3119 let compl_body = right.left(self);
3120 if left == compl_body {
3121 return Some(NodeId::BOT);
3122 }
3123 if compl_body.is_concat(self) {
3124 let compl_head = compl_body.left(self);
3125 if compl_body.right(self) == NodeId::TS && compl_head == left {
3126 return Some(NodeId::BOT);
3127 }
3128 }
3129 }
3130
3131 if let Some(pleft) = left.is_pred_star(self) {
3132 if let Some(pright) = right.is_pred_star(self) {
3133 let merged = self.mk_inter(pleft, pright);
3134 return Some(self.mk_star(merged));
3135 }
3136 }
3137
3138 for (star, lb) in [(left, right), (right, left)] {
3139 if star.is_star(self) && lb.is_lookbehind(self) {
3140 return Some(lb);
3141 }
3142 }
3143
3144 for (star, pred) in [(left, right), (right, left)] {
3145 if let Some(star_pred) = star.is_pred_star(self) {
3146 if pred.is_pred(self) {
3147 let sp = star_pred.pred_tset(self);
3148 let op = pred.pred_tset(self);
3149 if self.solver().and_id(sp, op) == TSetId::EMPTY {
3150 return Some(NodeId::BOT);
3151 }
3152 }
3153 }
3154 }
3155
3156 {
3157 let l_union_lb = self.union_has_leading_lb(left);
3158 let r_union_lb = self.union_has_leading_lb(right);
3159 if l_union_lb || r_union_lb {
3160 let (u, other) = if l_union_lb {
3161 (left, right)
3162 } else {
3163 (right, left)
3164 };
3165 let mut acc = NodeId::BOT;
3166 u.iter_union(self, &mut |b, v| {
3167 let inter = b.mk_inter(v, other);
3168 acc = b.mk_union(inter, acc);
3169 });
3170 return Some(acc);
3171 }
3172 }
3173
3174 {
3175 let l_is_clb = left.is_concat(self)
3176 && left.left(self).is_lookbehind(self)
3177 && self.lookbehind_prev_missing(left.left(self));
3178 let r_is_clb = right.is_concat(self)
3179 && right.left(self).is_lookbehind(self)
3180 && self.lookbehind_prev_missing(right.left(self));
3181 if l_is_clb || r_is_clb {
3182 let (lb, body) = if l_is_clb && r_is_clb {
3183 let lb1 = left.left(self);
3184 let lb2 = right.left(self);
3185 let inner = self.mk_inter(
3186 self.get_lookbehind_inner(lb1),
3187 self.get_lookbehind_inner(lb2),
3188 );
3189 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
3190 let body = self.mk_inter(left.right(self), right.right(self));
3191 (lb, body)
3192 } else if l_is_clb {
3193 let lb = left.left(self);
3194 let body = self.mk_inter(left.right(self), right);
3195 (lb, body)
3196 } else {
3197 let lb = right.left(self);
3198 let body = self.mk_inter(left, right.right(self));
3199 (lb, body)
3200 };
3201 return Some(self.mk_concat(lb, body));
3202 }
3203 }
3204
3205 {
3206 let l_has_la = self.has_trailing_la(left);
3207 let r_has_la = self.has_trailing_la(right);
3208 if l_has_la || r_has_la {
3209 let (body, la) = if l_has_la && r_has_la {
3210 let (lbody, l_la) = self.strip_trailing_la(left);
3211 let (rbody, r_la) = self.strip_trailing_la(right);
3212 let inner = self.mk_inter(
3213 self.get_lookahead_inner(l_la),
3214 self.get_lookahead_inner(r_la),
3215 );
3216 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
3217 let body = self.mk_inter(lbody, rbody);
3218 (body, la)
3219 } else if l_has_la {
3220 let (lbody, la) = self.strip_trailing_la(left);
3221 let body = self.mk_inter(lbody, right);
3222 (body, la)
3223 } else {
3224 let (rbody, la) = self.strip_trailing_la(right);
3225 let body = self.mk_inter(left, rbody);
3226 (body, la)
3227 };
3228 return Some(self.mk_concat(body, la));
3229 }
3230 }
3231
3232 None
3233 }
3234
3235 fn lookbehind_prev_missing(&self, lb: NodeId) -> bool {
3236 matches!(self.get_lookbehind_prev(lb), NodeId::MISSING)
3237 }
3238
3239 fn union_has_leading_lb(&self, node: NodeId) -> bool {
3240 if !node.is_union(self) {
3241 return false;
3242 }
3243 node.any_union_component(self, |v| {
3244 v.is_lookbehind(self) || (v.is_concat(self) && v.left(self).is_lookbehind(self))
3245 })
3246 }
3247
3248 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
3249 if cfg!(feature = "norewrite") {
3250 return None;
3251 }
3252 debug_assert!(self.get_kind(right_union) == Kind::Union);
3253
3254 let mut rewritten = None;
3255 right_union.iter_union_while(
3256 self,
3257 &mut (|b, v| {
3258 if let Some(rw) = b.attempt_rw_union_2(left, v) {
3259 rewritten = Some((v, rw));
3260 false
3261 } else {
3262 true
3263 }
3264 }),
3265 );
3266
3267 if let Some(rw) = rewritten {
3268 let mut new_union = NodeId::BOT;
3269 right_union.iter_union(
3270 self,
3271 &mut (|b, v| {
3272 if v == rw.0 {
3273 new_union = b.mk_union(rw.1, new_union)
3274 } else {
3275 new_union = b.mk_union(v, new_union)
3276 }
3277 }),
3278 );
3279 return Some(new_union);
3280 };
3281
3282 None
3283 }
3284
3285 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
3286 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
3287 debug_assert!(tail != NodeId::MISSING);
3288 let key = NodeKey {
3289 kind: Kind::Concat,
3290 left: head,
3291 right: tail,
3292 extra: u32::MAX,
3293 };
3294 if let Some(id) = self.key_is_created(&key) {
3295 return *id;
3296 }
3297
3298 if head == NodeId::BOT || tail == NodeId::BOT {
3299 return NodeId::BOT;
3300 }
3301 if head == NodeId::EPS {
3302 return tail;
3303 }
3304 if tail == NodeId::EPS {
3305 return head;
3306 }
3307
3308 if head.is_kind(self, Kind::Concat) {
3310 let left = head.left(self);
3311 let newright = self.mk_concat(head.right(self), tail);
3312 let updated = self.mk_concat(left, newright);
3313 return self.init_as(key, updated);
3314 }
3315
3316 if head.is_union(self)
3327 && head.contains_lookahead(self)
3328 && self.nullability(tail) == Nullability::NEVER
3329 && !self.contains_look(tail)
3330 {
3331 let (hl, hr) = (head.left(self), head.right(self));
3332
3333 if (hl.is_eps()
3334 && hr.is_lookahead(self)
3335 && self.get_lookahead_tail(hr).is_missing())
3336 || (hr.is_eps()
3337 && hl.is_lookahead(self)
3338 && self.get_lookahead_tail(hl).is_missing())
3339 {
3340 return self.init_as(key, tail);
3341 }
3342 let l = self.mk_concat(hl, tail);
3343 let r = self.mk_concat(hr, tail);
3344 let u = self.mk_union(l, r);
3345 return self.init_as(key, u);
3346 }
3347
3348 if self.get_kind(head) == Kind::End
3349 && !tail.is_lookbehind(self)
3350 && !self.is_nullable(tail, Nullability::EMPTYSTRING)
3351 {
3352 return NodeId::BOT;
3353 }
3354
3355 if tail.is_concat(self) && !head.is_lookahead(self) {
3356 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
3357 let upd = self.mk_concat(rw, tail.right(self));
3358 return self.init_as(key, upd);
3359 }
3360 }
3361
3362 if let Some(rw) = self.attempt_rw_concat_2(head, tail) {
3363 return self.init_as(key, rw);
3364 }
3365
3366 match (self.get_kind(head), self.get_kind(tail)) {
3367 (Kind::Star, Kind::Concat) if head.is_star(self) => {
3369 let rl = tail.left(self);
3370 if head == rl {
3371 return self.init_as(key, tail);
3372 }
3373 }
3374 (_, Kind::Concat) => {
3376 let curr = head;
3377 let h2 = self.mk_concat(curr, tail.left(self));
3378 let tr = tail.right(self);
3379 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
3380 return self.init_as(key, new);
3381 }
3382 }
3383 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
3384 return self.init_as(key, tail);
3385 }
3386 _ => {}
3387 }
3388
3389 self.init(key)
3390 }
3391
3392 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
3393 let lb_body = {
3395 match self.starts_with_ts(lb_body) {
3396 true => lb_body,
3397 false => self.mk_concat(NodeId::TS, lb_body),
3398 }
3399 };
3400 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
3402 }
3403
3404 fn mk_lookbehind_internal(
3405 &mut self,
3406 lb_body: NodeId,
3407 lb_prev: NodeId,
3408 ) -> Result<NodeId, ResharpError> {
3409 debug_assert!(lb_body != NodeId::MISSING);
3410 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
3411 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
3412 return Ok(NodeId::BOT);
3413 }
3414 if lb_body == NodeId::TS {
3415 return Ok(if lb_prev == NodeId::MISSING {
3416 NodeId::EPS
3417 } else {
3418 lb_prev
3419 });
3420 }
3421 if lb_body == NodeId::EPS {
3422 match lb_prev {
3423 NodeId::MISSING => return Ok(NodeId::EPS),
3424 NodeId::EPS => return Ok(NodeId::EPS),
3425 _ => return Ok(lb_prev),
3426 }
3427 }
3428
3429 let key = NodeKey {
3430 kind: Kind::Lookbehind,
3431 left: lb_body,
3432 right: lb_prev,
3433 extra: u32::MAX,
3434 };
3435 match self.key_is_created(&key) {
3436 Some(id) => Ok(*id),
3437 None => {
3438 if lb_prev == NodeId::TS {
3439 return Ok(self.mk_concat(lb_prev, lb_body));
3440 }
3441
3442 Ok(self.init(key))
3443 }
3444 }
3445 }
3446
3447 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3448 let la_body = if la_tail.is_missing() && rel == 0 {
3449 self.flatten_la_body(la_body)
3450 } else {
3451 la_body
3452 };
3453 let la_body = {
3455 match self.ends_with_ts(la_body) {
3456 true => la_body,
3457 false => self.mk_concat(la_body, NodeId::TS),
3458 }
3459 };
3460 let rel = if NodeId::MISSING == la_tail {
3461 rel
3462 } else {
3463 match la_tail.is_center_nullable(self) {
3464 false => u32::MAX,
3465 true => rel,
3466 }
3467 };
3468
3469 self.mk_lookahead_internal(la_body, la_tail, rel)
3470 }
3471
3472 fn flatten_la_body(&mut self, node: NodeId) -> NodeId {
3473 if !self
3474 .get_meta_flags(node)
3475 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
3476 {
3477 return node;
3478 }
3479 match self.get_kind(node) {
3480 Kind::Lookahead
3481 if self.get_lookahead_tail(node).is_missing()
3482 && self.get_lookahead_rel(node) == 0 =>
3483 {
3484 let inner = self.get_lookahead_inner(node);
3485 let inner = self.strip_trailing_ts(inner);
3486 self.flatten_la_body(inner)
3487 }
3488 Kind::Union => {
3489 let l = self.flatten_la_body(node.left(self));
3490 let r = self.flatten_la_body(node.right(self));
3491 self.mk_union(l, r)
3492 }
3493 _ => node,
3494 }
3495 }
3496
3497 fn strip_trailing_ts(&self, node: NodeId) -> NodeId {
3498 if node.is_concat(self) && node.right(self) == NodeId::TS {
3499 node.left(self)
3500 } else {
3501 node
3502 }
3503 }
3504
3505 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3507 let extra = if rel == u32::MAX {
3508 u32::MAX
3509 } else {
3510 self.mb.nb.add_rel(NullsId::ALWAYS0, rel).0
3511 };
3512 let key = NodeKey {
3513 kind: Kind::Lookahead,
3514 left: la_body,
3515 right: la_tail,
3516 extra,
3517 };
3518 if let Some(id) = self.key_is_created(&key) {
3519 return *id;
3520 }
3521 if la_body == NodeId::TS {
3522 if rel == 0 {
3523 return la_tail.missing_to_eps();
3524 } else {
3525 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
3526 }
3527 }
3528 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
3529 return NodeId::BOT;
3530 }
3531 if la_tail.is_missing() && rel == u32::MAX {
3532 return NodeId::BOT;
3533 }
3534
3535 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
3536 return la_body;
3537 }
3538
3539 if la_tail == NodeId::TS && !self.ends_with_end(la_body) {
3540 if rel == 0 || rel == u32::MAX {
3541 return self.mk_concat(la_body, NodeId::TS);
3542 } else if rel == u32::MAX {
3543 return self.mk_begins_with(la_body);
3544 }
3545 }
3546
3547 if rel == u32::MAX {
3548 if la_tail.is_missing() {
3549 return NodeId::BOT;
3550 }
3551
3552 if self.is_always_nullable(la_body) {
3553 return la_tail.missing_to_eps();
3554 }
3555
3556 if la_tail != NodeId::MISSING {
3557 match self.get_kind(la_tail) {
3558 _ => {
3559 if la_body.is_compl_plus_end(self) {
3560 let minlen = self.get_min_length_only(la_tail);
3561 if minlen >= 1 {
3562 return NodeId::BOT;
3563 }
3564 }
3565 }
3566 }
3567 }
3568 }
3569
3570 if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
3571 let la_body2 = self.get_lookahead_inner(la_tail);
3572 let body1_ts = self.mk_concat(la_body, NodeId::TS);
3573 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
3574 let new_la_body = self.mk_inter(body1_ts, body2_ts);
3575 let new_la_rel = self.get_lookahead_rel(la_tail);
3576 let new_la_tail = self.get_lookahead_tail(la_tail);
3577 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
3578 }
3579
3580 if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
3581 let la_body_right = la_body.right(self);
3582 if self.is_always_nullable(la_body_right) {
3583 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
3584 }
3585 let bodyright = la_body.right(self);
3586 if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
3587 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
3588 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
3589 }
3590 }
3591
3592 if la_tail != NodeId::MISSING {
3593 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
3594 let lpred = la_body.left(self);
3595 if lpred.is_pred(self) {
3596 let l = lpred.pred_tset(self);
3597 let r = la_tail.pred_tset(self);
3598 let psi_and = self.solver().and_id(l, r);
3599 let rewrite = self.mk_pred(psi_and);
3600 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
3601 let new_right =
3602 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
3603 return self.mk_concat(rewrite, new_right);
3604 }
3605 }
3606 }
3607
3608 self.get_node_id(key)
3609 }
3610
3611 pub fn mk_lookahead_range(&mut self, la_body: NodeId, la_tail: NodeId, lo: u32, hi: u32) -> NodeId {
3612 if hi == u32::MAX {
3613 return self.mk_lookahead_internal(la_body, la_tail, u32::MAX);
3614 }
3615 debug_assert!(lo <= hi);
3616 let mut rels = self.mb.nb.add_rel(NullsId::ALWAYS0, lo);
3617 for k in lo + 1..=hi {
3618 let part = self.mb.nb.add_rel(NullsId::ALWAYS0, k);
3619 rels = self.mb.nb.or_id(rels, part);
3620 }
3621 self.mk_lookahead_nid(la_body, la_tail, rels)
3622 }
3623
3624 pub fn mk_lookahead_nid(&mut self, la_body: NodeId, la_tail: NodeId, rels: NullsId) -> NodeId {
3625 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
3626 return NodeId::BOT;
3627 }
3628 let key = NodeKey {
3629 kind: Kind::Lookahead,
3630 left: la_body,
3631 right: la_tail,
3632 extra: rels.0,
3633 };
3634 if let Some(id) = self.key_is_created(&key) {
3635 return *id;
3636 }
3637 self.get_node_id(key)
3638 }
3639
3640 pub fn mk_ordered(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
3641 let has_match = (packed >> 16) > 0;
3642 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
3643 return NodeId::BOT;
3644 }
3645 debug_assert!(
3646 body == NodeId::BOT || !self.is_infinite(body),
3647 "Counted body must have finite max length"
3648 );
3649 let chain = self.prune_counted_chain(body, chain);
3650 let key = NodeKey {
3651 kind: Kind::Ordered,
3652 left: body,
3653 right: chain,
3654 extra: packed,
3655 };
3656 if let Some(id) = self.key_is_created(&key) {
3657 return *id;
3658 }
3659 self.get_node_id(key)
3660 }
3661
3662 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
3663 if chain == NodeId::MISSING || body == NodeId::BOT {
3664 return chain;
3665 }
3666 if self.nullability(body) != Nullability::NEVER {
3667 return NodeId::MISSING;
3668 }
3669 let chain_body = chain.left(self);
3670 if chain_body == NodeId::BOT {
3671 return chain;
3672 }
3673 let not_begins = self.mk_not_begins_with(body);
3674 let inter = self.mk_inter(chain_body, not_begins);
3675 let is_empty = inter == NodeId::BOT;
3676 if is_empty {
3677 self.prune_counted_chain(body, chain.right(self))
3678 } else {
3679 chain
3680 }
3681 }
3682
3683 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
3684 let (_, p_max) = self.get_min_max_length(body);
3685 if p_max == 0 && !self.contains_anchors(body) {
3686 let not_body = self.mk_compl(body);
3687 return self.mk_inter(NodeId::EPS, not_body);
3688 }
3689 let neg_inner = self.mk_concat(body, NodeId::TS);
3690 let neg_part = self.mk_compl(neg_inner);
3691 let conc = self.mk_concat(neg_part, NodeId::END);
3692 self.mk_lookahead(conc, NodeId::MISSING, rel)
3693 }
3694
3695 pub fn neg_lookbehind_supported(&mut self, body: NodeId) -> bool {
3696 if body.is_center_nullable(self) {
3697 return true;
3698 }
3699 if self.get_node(body).kind == Kind::Pred {
3700 return true;
3701 }
3702 let uc = crate::unicode_classes::utf8_char(self);
3703 let allowed = self.mk_union(NodeId::BEGIN, uc);
3704 self.subsumes(allowed, body).unwrap_or(false)
3705 }
3706
3707 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
3708 if body.is_center_nullable(self) {
3709 return NodeId::BOT;
3710 }
3711 match self.get_node(body).kind {
3712 _ => {
3719 let body_ts = self.mk_concat(NodeId::TS, body);
3720 let compl = self.mk_compl(body_ts);
3721 let not_begins = self.mk_concat(NodeId::BEGIN, compl);
3722 self.mk_lookbehind_internal(not_begins, NodeId::MISSING)
3723 .unwrap()
3724 }
3725 }
3726 }
3727
3728 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
3729 debug_assert!(left != NodeId::MISSING);
3730 debug_assert!(right != NodeId::MISSING);
3731 if left > right {
3732 return self.mk_union(right, left);
3733 }
3734 let key = NodeKey {
3735 kind: Kind::Union,
3736 left,
3737 right,
3738 extra: u32::MAX,
3739 };
3740 if let Some(id) = self.key_is_created(&key) {
3741 return *id;
3742 }
3743 if left == right {
3745 return self.init_as(key, left);
3746 }
3747 if left == NodeId::BOT {
3748 return self.init_as(key, right);
3749 }
3750 if right == NodeId::BOT {
3751 return self.init_as(key, left);
3752 }
3753 if right == NodeId::TS {
3754 return self.init_as(key, right);
3755 }
3756 if left == NodeId::TS {
3757 return self.init_as(key, left);
3758 }
3759 if left == NodeId::EPS && right == NodeId::END {
3760 return self.init_as(key, NodeId::EPS);
3761 }
3762 if left == NodeId::BEGIN && self.get_kind(right) == Kind::Lookbehind {
3764 let lb_body = self.get_lookbehind_inner(right);
3765 let lb_prev = self.get_lookbehind_prev(right);
3766 let new_body = self.mk_union(NodeId::BEGIN, lb_body);
3767 let rewritten = self.mk_lookbehind(new_body, lb_prev);
3768 return self.init_as(key, rewritten);
3769 }
3770 match (self.get_kind(left), self.get_kind(right)) {
3771 (Kind::Union, _) => {
3772 self.iter_unions_b(left, &mut |b, v| {
3773 b.temp_vec.push(v);
3774 });
3775 self.iter_unions_b(right, &mut |b, v| {
3776 b.temp_vec.push(v);
3777 });
3778 self.temp_vec.sort();
3779 let tree = self.temp_vec.clone();
3780 self.temp_vec.clear();
3781 let newnode = tree
3782 .iter()
3783 .rev()
3784 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3785 return self.init_as(key, newnode);
3786 }
3787 (_, Kind::Union) => {
3788 let rleft = right.left(self);
3789 if left > rleft {
3791 self.iter_unions_b(left, &mut |b, v| {
3792 b.temp_vec.push(v);
3793 });
3794 self.iter_unions_b(right, &mut |b, v| {
3795 b.temp_vec.push(v);
3796 });
3797 self.temp_vec.sort();
3798 let tree = self.temp_vec.clone();
3799 self.temp_vec.clear();
3800 let newnode = tree
3801 .iter()
3802 .rev()
3803 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3804 return self.init_as(key, newnode);
3805 } else {
3806 if let Some(rw) = self.attempt_rw_unions(left, right) {
3807 return self.init_as(key, rw);
3808 }
3809 }
3810 }
3811 _ => {}
3812 }
3813 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3814 return self.init_as(key, rw);
3815 }
3816 self.init(key)
3817 }
3818
3819 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3820 debug_assert!(left_id != NodeId::MISSING);
3821 debug_assert!(right_id != NodeId::MISSING);
3822 if left_id == right_id {
3823 return left_id;
3824 }
3825 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3826 return NodeId::BOT;
3827 }
3828 if left_id == NodeId::TS {
3829 return right_id;
3830 }
3831 if right_id == NodeId::TS {
3832 return left_id;
3833 }
3834 if left_id > right_id {
3835 return self.mk_inter(right_id, left_id);
3836 }
3837 let key = NodeKey {
3838 kind: Kind::Inter,
3839 left: left_id,
3840 right: right_id,
3841 extra: u32::MAX,
3842 };
3843 if let Some(id) = self.key_is_created(&key) {
3844 return *id;
3845 }
3846
3847 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3848 return self.init_as(key, rw);
3849 }
3850
3851 self.init(key)
3852 }
3853
3854 fn eps_inter_look(&self, left_id: NodeId, right_id: NodeId) -> Option<NodeId> {
3855 let other = if left_id.is_eps() {
3856 right_id
3857 } else if right_id.is_eps() {
3858 left_id
3859 } else {
3860 return None;
3861 };
3862 if other.is_lookbehind(self) {
3863 return Some(other);
3864 }
3865 if other.is_lookahead(self) && self.get_lookahead_rel(other) == 0 {
3866 return Some(other);
3867 }
3868 None
3869 }
3870
3871 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3872 let node = NodeKey {
3873 kind,
3874 left: NodeId::MISSING,
3875 right: NodeId::MISSING,
3876 extra: u32::MAX,
3877 };
3878 self.init(node)
3879 }
3880
3881 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3882 let star = self.mk_star(body_id);
3883 self.mk_concat(body_id, star)
3884 }
3885 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3886 let opt = self.mk_opt(body_id);
3887 let mut nodes1 = vec![];
3888 for _ in lower..upper {
3889 nodes1.push(opt);
3890 }
3891 for _ in 0..lower {
3892 nodes1.push(body_id);
3893 }
3894 self.mk_concats(nodes1.into_iter())
3895 }
3896 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3897 self.mk_union(NodeId::EPS, body_id)
3898 }
3899
3900 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3901 let key = NodeKey {
3902 kind: Kind::Star,
3903 left: body_id,
3904 right: NodeId::MISSING,
3905 extra: 0,
3906 };
3907 if let Some(id) = self.key_is_created(&key) {
3908 return *id;
3909 }
3910 if body_id.is_begin() || body_id.is_end() {
3911 return NodeId::EPS;
3912 }
3913 if body_id.is_star(self) {
3915 return body_id;
3916 }
3917 if body_id.is_plus(self) {
3919 return body_id.right(self);
3920 }
3921 self.get_node_id(key)
3922 }
3923
3924 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3927 self.lengths[node_id.0 as usize].es_null
3928 }
3929
3930 #[inline(always)]
3931 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3932 self.get_meta(node_id)
3933 .flags
3934 .nullability()
3935 .has(Nullability::CENTER.or(Nullability::END))
3936 }
3937
3938 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3939 self.get_only_nullability(node_id)
3940 }
3941
3942 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3943 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3944 }
3945
3946 pub fn pp(&self, node_id: NodeId) -> String {
3947 let mut s = String::new();
3948 self.ppw(&mut s, node_id).unwrap();
3949 s
3950 }
3951
3952 pub fn factor_suffixes(&mut self, node: NodeId) -> Result<NodeId, ResharpError> {
3955 let mut memo = FxHashMap::default();
3956 self.factor_suffixes_rec(node, &mut memo)
3957 }
3958
3959 fn factor_suffixes_rec(
3960 &mut self,
3961 node: NodeId,
3962 memo: &mut FxHashMap<NodeId, NodeId>,
3963 ) -> Result<NodeId, ResharpError> {
3964 if let Some(&v) = memo.get(&node) {
3965 return Ok(v);
3966 }
3967 let result = match self.get_kind(node) {
3968 Kind::Pred | Kind::Begin | Kind::End => node,
3969 Kind::Star => {
3970 let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3971 self.mk_star(inner)
3972 }
3973 Kind::Compl => {
3974 let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3975 self.mk_compl(inner)
3976 }
3977 Kind::Concat => {
3978 let l = self.factor_suffixes_rec(node.left(self), memo)?;
3979 let r = self.factor_suffixes_rec(node.right(self), memo)?;
3980 self.mk_concat(l, r)
3981 }
3982 Kind::Inter => {
3983 let l = self.factor_suffixes_rec(node.left(self), memo)?;
3984 let r = self.factor_suffixes_rec(node.right(self), memo)?;
3985 self.mk_inter(l, r)
3986 }
3987 Kind::Union => {
3988 let mut comps: Vec<NodeId> = Vec::new();
3989 node.iter_union(self, &mut |_, c| comps.push(c));
3990 let mut factored: Vec<NodeId> = Vec::with_capacity(comps.len());
3991 let mut reversible = true;
3992 for c in &comps {
3993 let f = self.factor_suffixes_rec(*c, memo)?;
3994 if c.contains_lookaround(self)
3995 || c.flags_contains(self).has(MetaFlags::CONTAINS_ANCHORS)
3996 {
3997 reversible = false;
3998 }
3999 factored.push(f);
4000 }
4001 if !reversible || factored.len() < 2 {
4002 let mut acc = factored[0];
4003 for c in factored.into_iter().skip(1) {
4004 acc = self.mk_union(acc, c);
4005 }
4006 acc
4007 } else {
4008 let mut rev_comps: Vec<NodeId> = Vec::with_capacity(factored.len());
4009 for c in factored {
4010 rev_comps.push(self.reverse(c)?);
4011 }
4012 let mut acc = rev_comps[0];
4013 for c in rev_comps.into_iter().skip(1) {
4014 acc = self.mk_union(acc, c);
4015 }
4016 self.reverse(acc)?
4017 }
4018 }
4019 Kind::Lookahead => {
4020 let inner = self.get_lookahead_inner(node);
4021 let tail = self.get_lookahead_tail(node);
4022 let rel = self.get_lookahead_rel(node);
4023 let ni = self.factor_suffixes_rec(inner, memo)?;
4024 let nt = self.factor_suffixes_rec(tail, memo)?;
4025 self.mk_lookahead(ni, nt, rel)
4026 }
4027 Kind::Lookbehind => {
4028 let inner = self.get_lookbehind_inner(node);
4029 let prev = self.get_lookbehind_prev(node);
4030 let ni = self.factor_suffixes_rec(inner, memo)?;
4031 let np = if prev != NodeId::MISSING {
4032 self.factor_suffixes_rec(prev, memo)?
4033 } else {
4034 prev
4035 };
4036 self.mk_lookbehind(ni, np)
4037 }
4038 Kind::Ordered => node,
4039 };
4040 memo.insert(node, result);
4041 Ok(result)
4042 }
4043
4044 #[allow(dead_code)]
4045 pub fn pp_nulls(&self, node_id: NodeId) -> String {
4046 let nu = self.get_nulls_id(node_id);
4047 let nr = self.mb.nb.get_set_ref(nu);
4048 let s1 = format!("{:?}", nr);
4049 s1
4050 }
4051
4052 #[allow(dead_code)]
4053 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
4054 match self.get_tregex(term_id) {
4055 TRegex::Leaf(node_id) => {
4056 let mut s = String::new();
4057 self.ppw(&mut s, *node_id).unwrap();
4058 s
4059 }
4060 TRegex::ITE(cond, then_id, else_id) => {
4061 format!(
4062 "ITE({},{},{})",
4063 self.solver_ref().pp(*cond),
4064 self.ppt(*then_id),
4065 self.ppt(*else_id)
4066 )
4067 }
4068 }
4069 }
4070
4071 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
4072 if cfg!(feature = "graphviz") {
4073 match node_id {
4074 NodeId::MISSING => return write!(s, "MISSING"),
4075 NodeId::BOT => return write!(s, "⊥"),
4076 NodeId::TS => return write!(s, "_*"),
4077 NodeId::TOP => return write!(s, "_"),
4078 NodeId::EPS => return write!(s, "ε"),
4079 _ => {}
4080 }
4081 }
4082
4083 match node_id {
4084 NodeId::MISSING => return write!(s, "MISSING"),
4085 NodeId::BOT => return write!(s, "⊥"),
4086 NodeId::TS => return write!(s, "_*"),
4087 NodeId::TOP => return write!(s, "_"),
4088 NodeId::EPS => return write!(s, ""),
4089 _ => {}
4090 }
4091
4092 match self.get_kind(node_id) {
4093 Kind::End => write!(s, r"\z"),
4094 Kind::Begin => write!(s, r"\A"),
4095 Kind::Pred => {
4096 let psi = node_id.pred_tset(self);
4097 if psi == TSetId::EMPTY {
4098 write!(s, r"⊥")
4099 } else if psi == TSetId::FULL {
4100 write!(s, r"_")
4101 } else {
4102 write!(s, "{}", self.solver_ref().pp(psi))
4103 }
4104 }
4105 Kind::Inter => {
4106 write!(s, "(")?;
4107 self.ppw(s, node_id.left(self))?;
4108 write!(s, "&")?;
4109 let mut curr = node_id.right(self);
4110 while curr.is_inter(self) {
4111 let n = curr.left(self);
4112 self.ppw(s, n)?;
4113 write!(s, "&")?;
4114 curr = curr.right(self);
4115 }
4116 self.ppw(s, curr)?;
4117 write!(s, ")")
4118 }
4119 Kind::Union => {
4120 let left = node_id.left(self);
4121 let right = node_id.right(self);
4122 write!(s, "(")?;
4123 self.ppw(s, left)?;
4124 write!(s, "|")?;
4125 let mut curr = right;
4126 while self.get_kind(curr) == Kind::Union {
4127 let n = curr.left(self);
4128 self.ppw(s, n)?;
4129 write!(s, "|")?;
4130 curr = curr.right(self);
4131 }
4132 self.ppw(s, curr)?;
4133 write!(s, ")")
4134 }
4135 Kind::Concat => {
4136 let left = node_id.left(self);
4137 let right = node_id.right(self);
4138 if right.is_star(self) && right.left(self) == left {
4139 self.ppw(s, left)?;
4140 write!(s, "+")?;
4141 return Ok(());
4142 }
4143 if right.is_concat(self) {
4144 let rl = right.left(self);
4145 if rl.is_star(self) && rl.left(self) == left {
4146 self.ppw(s, left)?;
4147 write!(s, "+")?;
4148 return self.ppw(s, right.right(self));
4149 }
4150 }
4151 if right.is_concat(self) && right.left(self) == left {
4152 let mut num = 1;
4153 let mut right = right;
4154 while right.is_concat(self) && right.left(self) == left {
4155 num += 1;
4156 right = right.right(self);
4157 }
4158 if let Some(inner) = left.is_opt_v(self) {
4160 let mut inner_count = 0;
4161 let mut right2 = right;
4162 while right2.is_concat(self) && right2.left(self) == inner {
4163 inner_count += 1;
4164 right2 = right2.right(self);
4165 }
4166 if right2 == inner {
4167 inner_count += 1;
4168 self.ppw(s, inner)?;
4169 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
4170 }
4171 if inner_count > 0 {
4172 self.ppw(s, inner)?;
4173 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
4174 return self.ppw(s, right2);
4175 }
4176 }
4177 self.ppw(s, left)?;
4178 if right == left {
4179 num += 1;
4180 return write!(s, "{{{}}}", num);
4181 }
4182 if num <= 3 && left.is_pred(self) {
4183 for _ in 1..num {
4184 self.ppw(s, left)?;
4185 }
4186 return self.ppw(s, right);
4187 } else {
4188 write!(s, "{{{}}}", num)?;
4189 return self.ppw(s, right);
4190 }
4191 }
4192 self.ppw(s, left)?;
4193 self.ppw(s, right)
4194 }
4195 Kind::Star => {
4196 let left = node_id.left(self);
4197 let leftkind = self.get_kind(left);
4198 match leftkind {
4199 Kind::Concat | Kind::Star | Kind::Compl => {
4200 write!(s, "(")?;
4201 self.ppw(s, left)?;
4202 write!(s, ")")?;
4203 }
4204 _ => {
4205 self.ppw(s, left)?;
4206 }
4207 };
4208 write!(s, "*")
4209 }
4210 Kind::Compl => {
4211 write!(s, "~(")?;
4212 self.ppw(s, node_id.left(self))?;
4213 write!(s, ")")
4214 }
4215 Kind::Lookbehind => {
4216 let lbleft = self.get_lookbehind_prev(node_id);
4217 let lbinner = self.get_lookbehind_inner(node_id);
4218 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
4219 if lbleft != NodeId::MISSING {
4220 write!(s, "❮")?;
4221 self.ppw(s, lbleft)?;
4222 write!(s, "❯")?;
4223 }
4224
4225 write!(s, "(?<=")?;
4226 self.ppw(s, lbinner)?;
4227 write!(s, ")")
4228 }
4229 Kind::Lookahead => {
4230 let inner = self.get_lookahead_inner(node_id);
4231 write!(s, "(?=")?;
4232 self.ppw(s, inner)?;
4233 write!(s, ")")?;
4234 let extra = self.get_extra(node_id);
4235 if extra == u32::MAX {
4236 write!(s, "{{∅}}")?;
4237 } else {
4238 let rels = NullsId(extra);
4239 let hi = self.mb.nb.max_rel(rels);
4240 let lo = self.mb.nb.min_rel(rels);
4241 if hi != 0 {
4242 write!(s, "{{")?;
4243 if lo == hi {
4244 write!(s, "{hi}")?;
4245 } else {
4246 write!(s, "{lo}..{hi}")?;
4247 }
4248 write!(s, "}}")?;
4249 }
4250 }
4251 if node_id.right(self) == NodeId::MISSING {
4252 Ok(())
4253 } else {
4254 write!(s, "❮")?;
4255 self.ppw(s, node_id.right(self))?;
4256 write!(s, "❯")
4257 }
4258 }
4259 Kind::Ordered => {
4260 let body = node_id.left(self);
4261 let packed = self.get_extra(node_id);
4262 let step = packed & 0xFFFF;
4263 let best = packed >> 16;
4264 write!(s, "#(")?;
4265 self.ppw(s, body)?;
4266 write!(s, ")s{}b{}", step, best)
4267 }
4268 }
4269 }
4270
4271 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
4272 self.mk_concat(node, NodeId::TS)
4273 }
4274
4275 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
4276 let node_ts = self.mk_concat(node, NodeId::TS);
4277 self.mk_compl(node_ts)
4278 }
4279
4280 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
4281 let notset = self.solver().not_id(set);
4282 self.mk_pred(notset)
4283 }
4284
4285 pub fn mk_u8(&mut self, char: u8) -> NodeId {
4286 let set_id = self.solver().u8_to_set_id(char);
4287 self.mk_pred(set_id)
4288 }
4289
4290 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
4291 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
4292 self.mk_pred(rangeset)
4293 }
4294
4295 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
4296 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
4297 for &(lo, hi) in &ranges[1..] {
4298 let r = self.mk_range_u8(lo, hi);
4299 node = self.mk_union(node, r);
4300 }
4301 node
4302 }
4303
4304 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
4305 let mut prefix = Vec::new();
4306 let mut curr = node;
4307 loop {
4308 if curr == NodeId::EPS {
4309 let full = !prefix.is_empty();
4310 return (prefix, full);
4311 }
4312 if curr == NodeId::BOT {
4313 break;
4314 }
4315 if curr.is_pred(self) {
4316 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
4317 Some(byte) => {
4318 prefix.push(byte);
4319 return (prefix, true);
4320 }
4321 None => break, }
4323 }
4324 if !curr.is_concat(self) {
4325 break;
4326 }
4327 let left = curr.left(self);
4328 if !left.is_pred(self) {
4329 break;
4330 }
4331 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
4332 Some(byte) => prefix.push(byte),
4333 None => break,
4334 }
4335 curr = curr.right(self);
4336 }
4337 (prefix, false)
4338 }
4339
4340 #[allow(dead_code)]
4341 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
4342 let mut result = NodeId::EPS;
4343 for byte in raw_str.iter().rev() {
4344 let node = self.mk_u8(*byte);
4345 result = self.mk_concat(node, result);
4346 }
4347 result
4348 }
4349
4350 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
4351 let mut result = NodeId::EPS;
4352 for byte in raw_str.bytes().rev() {
4353 let node = self.mk_u8(byte);
4354 result = self.mk_concat(node, result);
4355 }
4356 result
4357 }
4358
4359 pub fn prune_fwd(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
4360 self.prune_rec::<true>(node_id, memo)
4361 }
4362
4363 pub fn prune_rev(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
4364 self.prune_rec::<false>(node_id, memo)
4365 }
4366
4367 pub fn simplify_fwd_initial(&mut self, node_id: NodeId) -> NodeId {
4368 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4369 match self.get_kind(node_id) {
4370 Kind::Concat => {
4371 let l = self.simplify_fwd_initial_rec(node_id.left(self), &mut memo);
4372 let r = self.simplify_fwd_initial_rec(node_id.right(self), &mut memo);
4373 if r.is_concat(self) {
4374 if r.left(self).is_ts() && !r.right(self).is_lookahead(self) {
4375 if l.is_begin_nullable(self) {
4376 return r;
4377 }
4378 }
4379 }
4380 }
4381 _ => {}
4382 }
4383
4384 self.simplify_fwd_initial_rec(node_id, &mut memo)
4385 }
4386
4387 fn simplify_fwd_initial_rec(
4388 &mut self,
4389 node_id: NodeId,
4390 memo: &mut FxHashMap<NodeId, NodeId>,
4391 ) -> NodeId {
4392 if let Some(&v) = memo.get(&node_id) {
4393 return v;
4394 }
4395
4396 let out = match self.get_kind(node_id) {
4397 Kind::Union => {
4398 let mut parts: Vec<NodeId> = Vec::new();
4399 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4400 for p in &mut parts {
4401 *p = self.simplify_fwd_initial_rec(*p, memo);
4402 }
4403 parts
4404 .iter()
4405 .rev()
4406 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4407 }
4408 Kind::Inter => {
4409 let l = self.simplify_fwd_initial_rec(node_id.left(self), memo);
4410 let r = self.simplify_fwd_initial_rec(node_id.right(self), memo);
4411 let mut distributed = None;
4412 for (u, other) in [(l, r), (r, l)] {
4413 if self.get_kind(u) == Kind::Union && u.contains_lookahead(self) {
4414 let mut branches: Vec<NodeId> = Vec::new();
4415 self.iter_unions_b(u, &mut |_, v| branches.push(v));
4416 let mut acc = NodeId::BOT;
4417 for &br in branches.iter().rev() {
4418 let arm = self.mk_inter(br, other);
4419 let arm = self.simplify_fwd_initial_rec(arm, memo);
4420 acc = self.mk_union(arm, acc);
4421 }
4422 distributed = Some(acc);
4423 break;
4424 }
4425 }
4426 if let Some(acc) = distributed {
4427 acc
4428 } else if let Some(look) = self.eps_inter_look(l, r) {
4429 look
4430 } else {
4431 self.mk_inter(l, r)
4432 }
4433 }
4434 Kind::Concat => {
4435 let l = self.simplify_fwd_initial_rec(node_id.left(self), memo);
4436 let r = self.simplify_fwd_initial_rec(node_id.right(self), memo);
4437 if l == NodeId::TS
4438 && r.is_lookahead(self)
4439 && self.get_lookahead_tail(r).is_missing()
4440 && self.get_lookahead_rel(r) == 0
4441 {
4442 let body = self.get_lookahead_inner(r);
4443 if self.is_nullable(body, Nullability::END) {
4444 return NodeId::TS;
4445 }
4446 }
4447
4448 if l != NodeId::TS {
4449 return self.mk_concat(l, r);
4450 }
4451 if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4452 return rewritten;
4453 }
4454 let (head, tail) = if r.is_concat(self) {
4455 (r.left(self), r.right(self))
4456 } else {
4457 (r, NodeId::EPS)
4458 };
4459 if self.get_kind(head) != Kind::Union {
4460 return self.mk_concat(l, r);
4461 }
4462 if !head.left(self).is_begin() {
4463 return self.mk_concat(l, r);
4464 }
4465 let y = head.right(self);
4466 if !y.is_pred(self) {
4467 return self.mk_concat(l, r);
4468 }
4469 if tail.is_concat(self) {
4470 let tl = tail.left(self);
4471 let tr = tail.right(self);
4472 let y_tset = y.pred_tset(self);
4473 let covers_all = if tl == NodeId::TS {
4474 true
4475 } else if let Some(x_pred) = tl.is_pred_star(self) {
4476 let x_ts = x_pred.pred_tset(self);
4477 let combined = self.solver().or_id(x_ts, y_tset);
4478 self.solver().is_full_id(combined)
4479 } else {
4480 false
4481 };
4482 if covers_all {
4483 return self.mk_concat(NodeId::TS, tr);
4484 }
4485 }
4486 self.mk_concat(l, r)
4487 }
4488 _ => node_id,
4489 };
4490 memo.insert(node_id, out);
4491 out
4492 }
4493
4494 pub fn simplify_rev_initial(&mut self, node_id: NodeId) -> NodeId {
4495 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4496 self.simplify_rev_initial_rec(node_id, &mut memo)
4497 }
4498
4499 fn simplify_rev_initial_rec(
4500 &mut self,
4501 node_id: NodeId,
4502 memo: &mut FxHashMap<NodeId, NodeId>,
4503 ) -> NodeId {
4504 if let Some(&v) = memo.get(&node_id) {
4505 return v;
4506 }
4507 let out = match self.get_kind(node_id) {
4508 Kind::Union => {
4509 let mut parts: Vec<NodeId> = Vec::new();
4510 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4511 for p in &mut parts {
4512 *p = self.simplify_rev_initial_rec(*p, memo);
4513 }
4514 parts
4515 .iter()
4516 .rev()
4517 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4518 }
4519 Kind::Concat => {
4520 let l = self.simplify_rev_initial_rec(node_id.left(self), memo);
4521 let r = self.simplify_rev_initial_rec(node_id.right(self), memo);
4522
4523 if l != NodeId::TS {
4524 return self.mk_concat(l, r);
4525 }
4526 if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4527 return rewritten;
4528 }
4529 let (tail1, tail2) = if r.is_concat(self) {
4530 (r.left(self), r.right(self))
4531 } else {
4532 (r, NodeId::EPS)
4533 };
4534
4535 if tail1.is_begin() {
4536 return r;
4537 }
4538
4539 if self.get_kind(tail1) != Kind::Union {
4540 return self.mk_concat(l, r);
4541 }
4542 if !tail1.left(self).is_begin() {
4543 return self.mk_concat(l, r);
4544 }
4545 let y = tail1.right(self);
4546 if !y.is_pred(self) {
4547 return self.mk_concat(l, r);
4548 }
4549 if tail2.is_concat(self) {
4550 let tl = tail2.left(self);
4551 let tr = tail2.right(self);
4552
4553 let y_tset = y.pred_tset(self);
4554 let covers_all = if tl == NodeId::TS {
4555 true
4556 } else if let Some(x_pred) = tl.is_pred_star(self) {
4557 let x_ts = x_pred.pred_tset(self);
4558 let combined = self.solver().or_id(x_ts, y_tset);
4559 self.solver().is_full_id(combined)
4560 } else {
4561 false
4562 };
4563 if covers_all {
4564 return self.mk_concat(NodeId::TS, tr);
4565 }
4566 }
4567 self.mk_concat(l, r)
4568 }
4569 _ => node_id,
4570 };
4571 memo.insert(node_id, out);
4572 out
4573 }
4574
4575 fn try_begin_neg_pred_rewrite(&mut self, r: NodeId) -> Option<NodeId> {
4576 if !r.is_concat(self) {
4577 return None;
4578 }
4579 let begin = r.left(self);
4580 let mid_tail = r.right(self);
4581 if !begin.is_begin() {
4582 return None;
4583 }
4584 if !mid_tail.is_concat(self) {
4585 return None;
4586 }
4587 let neg = mid_tail.left(self);
4588 let tail = mid_tail.right(self);
4589 if self.get_kind(neg) != Kind::Compl {
4590 return None;
4591 }
4592 let inside = neg.left(self);
4593 if !inside.is_concat(self) {
4594 return None;
4595 }
4596 let ts_part = inside.left(self);
4597 let c_pred = inside.right(self);
4598 if ts_part != NodeId::TS || !c_pred.is_pred(self) {
4599 return None;
4600 }
4601 let c_tset = c_pred.pred_tset(self);
4602 let not_c = self.mk_pred_not(c_tset);
4603 let left_branch = self.mk_concat(NodeId::BEGIN, tail);
4604 let not_c_tail = self.mk_concat(not_c, tail);
4605 let right_branch = self.mk_concat(NodeId::TS, not_c_tail);
4606 Some(self.mk_union(left_branch, right_branch))
4607 }
4608
4609 fn strip_la_body_end(&mut self, n: NodeId) -> NodeId {
4610 if !n.is_concat(self) {
4611 return n;
4612 }
4613 let l = n.left(self);
4614 let r = n.right(self);
4615 if l == NodeId::TS && r == NodeId::END {
4616 return NodeId::TS;
4617 }
4618 let new_r = self.strip_la_body_end(r);
4619 if new_r == r {
4620 n
4621 } else {
4622 self.mk_concat(l, new_r)
4623 }
4624 }
4625
4626 fn prune_rec<const FWD: bool>(
4627 &mut self,
4628 node_id: NodeId,
4629 memo: &mut FxHashMap<NodeId, NodeId>,
4630 ) -> NodeId {
4631 debug_assert!(node_id != NodeId::MISSING);
4632 if node_id == NodeId::MISSING {
4633 return node_id;
4634 }
4635 if let Some(&v) = memo.get(&node_id) {
4636 return v;
4637 }
4638 let (l, r) = (node_id.left(self), node_id.right(self));
4639 let out = match node_id.kind(self) {
4640 Kind::Union => {
4641 let l = self.prune_rec::<FWD>(l, memo);
4642 let r = self.prune_rec::<FWD>(r, memo);
4643 let mut parts: Vec<NodeId> = Vec::new();
4644 parts.push(l);
4645 self.iter_unions_b(r, &mut |_, v| parts.push(v));
4646 for p in &mut parts {
4647 *p = self.prune_rec::<FWD>(*p, memo);
4648 }
4649
4650 if FWD {
4651 let mut best: FxHashMap<NodeId, u32> = FxHashMap::default();
4653 for &p in &parts {
4654 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4655 let body = self.get_lookahead_inner(p);
4656 let lo = self.get_lookahead_rel_lo(p);
4657 best.entry(body)
4658 .and_modify(|r| *r = (*r).min(lo))
4659 .or_insert(lo);
4660 }
4661 }
4662 parts.iter().rev().fold(NodeId::BOT, |acc, &p| {
4663 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4664 let body = self.get_lookahead_inner(p);
4665 if self.get_lookahead_rel_lo(p) != best[&body] {
4666 return acc;
4667 }
4668 }
4669 self.mk_union(p, acc)
4670 })
4671 } else {
4672 let mut rels_map: FxHashMap<NodeId, NullsId> = FxHashMap::default();
4673 for &p in &parts {
4674 if p.is_lookahead(self)
4675 && self.get_lookahead_tail(p) == NodeId::MISSING
4676 && self.get_extra(p) != u32::MAX
4677 {
4678 let body = self.get_lookahead_inner(p);
4679 let rels = NullsId(self.get_extra(p));
4680 rels_map
4681 .entry(body)
4682 .and_modify(|r| *r = self.mb.nb.or_id(*r, rels))
4683 .or_insert(rels);
4684 }
4685 }
4686 let mut la_acc = NodeId::BOT;
4687 for (body, rels) in rels_map {
4688 let lo = self.mb.nb.min_rel(rels);
4689 let hi = self.mb.nb.max_rel(rels);
4690 let node = if lo == hi {
4691 self.mk_lookahead_internal(body, NodeId::MISSING, hi)
4692 } else {
4693 self.mk_lookahead_nid(body, NodeId::MISSING, rels)
4694 };
4695 la_acc = self.mk_union(node, la_acc);
4696 }
4697 parts.iter().rev().fold(la_acc, |acc, &p| {
4698 if p.is_lookahead(self)
4699 && self.get_lookahead_tail(p) == NodeId::MISSING
4700 && self.get_extra(p) != u32::MAX
4701 {
4702 acc
4703 } else {
4704 self.mk_union(p, acc)
4705 }
4706 })
4707 }
4708 }
4709 Kind::Concat => {
4710 if FWD
4711 && l.is_ts()
4712 && r.is_lookahead(self)
4713 && self.get_lookahead_tail(r).is_missing()
4714 && self.get_lookahead_rel(r) == 0
4715 {
4716 let body = self.get_lookahead_inner(r);
4717 if self.is_nullable(body, Nullability::END) {
4718 return NodeId::TS;
4719 }
4720 }
4721 let l = self.prune_rec::<FWD>(l, memo);
4722 let r = self.prune_rec::<FWD>(r, memo);
4723 self.mk_concat(l, r)
4724 }
4725 Kind::Inter => {
4726 let l = self.prune_rec::<FWD>(l, memo);
4727 let r = self.prune_rec::<FWD>(r, memo);
4728 self.mk_inter(l, r)
4729 }
4730 Kind::Compl => {
4731 let l = self.prune_rec::<FWD>(l, memo);
4732 self.mk_compl(l)
4733 }
4734 Kind::Lookahead => {
4735 let extra = self.get_extra(node_id);
4736 let body = self.strip_la_body_end(self.get_lookahead_inner(node_id));
4737 let body = self.prune_rec::<FWD>(body, memo);
4738
4739 let tail = if self.get_lookahead_tail(node_id).is_missing() {
4740 NodeId::MISSING
4741 } else {
4742 self.prune_rec::<FWD>(self.get_lookahead_tail(node_id), memo)
4743 };
4744 if extra == u32::MAX {
4745 self.mk_lookahead_internal(body, tail, u32::MAX)
4746 } else {
4747 let rels = NullsId(extra);
4748 let lo = self.mb.nb.min_rel(rels);
4749 let hi = self.mb.nb.max_rel(rels);
4750 if lo == hi {
4751 self.mk_lookahead_internal(body, tail, hi)
4752 } else {
4753 self.mk_lookahead_nid(body, tail, rels)
4754 }
4755 }
4756 }
4757 Kind::Begin => NodeId::BOT,
4758 Kind::Ordered => {
4759 let body = self.prune_rec::<FWD>(l, memo);
4760 let chain = self.prune_rec::<FWD>(r, memo);
4761 self.mk_ordered(body, chain, self.get_extra(node_id))
4762 }
4763 Kind::End | Kind::Pred => node_id,
4764 Kind::Star => {
4765 let l = self.prune_rec::<FWD>(l, memo);
4766 self.mk_star(l)
4767 }
4768 Kind::Lookbehind => {
4769 let l = self.prune_rec::<FWD>(l, memo);
4770 if r.is_missing() {
4771 l
4772 } else {
4773 node_id
4774 }
4775 }
4776 };
4777 memo.insert(node_id, out);
4778 out
4779 }
4780
4781 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4782 let mut sorted: Vec<NodeId> = nodes.collect();
4783 if sorted.len() <= 1 {
4784 return sorted.pop().unwrap_or(NodeId::BOT);
4785 }
4786 sorted.sort();
4787 sorted.dedup();
4788 sorted.retain(|&x| x != NodeId::BOT);
4789 if sorted.is_empty() {
4790 return NodeId::BOT;
4791 }
4792 if sorted.len() > 16 {
4793 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
4794 let mut non_concat: Vec<NodeId> = Vec::new();
4795 for &n in &sorted {
4796 if n.is_concat(self) {
4797 by_head.entry(self.get_left(n)).or_default().push(n);
4798 } else {
4799 non_concat.push(n);
4800 }
4801 }
4802 let mut absorbed: Vec<NodeId> = Vec::new();
4803 for &n in &non_concat {
4804 if by_head.contains_key(&n) {
4805 absorbed.push(n);
4806 }
4807 }
4808 if !absorbed.is_empty() {
4809 non_concat.retain(|n| !absorbed.contains(n));
4810 }
4811 if by_head.len() < sorted.len() {
4812 let mut groups: Vec<NodeId> = non_concat;
4813 for (head, tails) in by_head {
4814 let mut tail_nodes: Vec<NodeId> =
4815 tails.iter().map(|&n| self.get_right(n)).collect();
4816 if absorbed.contains(&head) {
4817 tail_nodes.push(NodeId::EPS);
4818 }
4819 let tail_union = self.mk_unions(tail_nodes.into_iter());
4820 let factored = self.mk_concat(head, tail_union);
4821 groups.push(factored);
4822 }
4823 groups.sort();
4824 groups.dedup();
4825 return self.mk_unions_balanced(&groups);
4826 }
4827 }
4828 self.mk_unions_balanced(&sorted)
4829 }
4830
4831 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
4832 match nodes.len() {
4833 0 => NodeId::BOT,
4834 1 => nodes[0],
4835 n => {
4836 let mid = n / 2;
4837 let left = self.mk_unions_balanced(&nodes[..mid]);
4838 let right = self.mk_unions_balanced(&nodes[mid..]);
4839 self.mk_union(left, right)
4840 }
4841 }
4842 }
4843
4844 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4845 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
4846 }
4847
4848 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4849 nodes
4850 .rev()
4851 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
4852 }
4853}
4854
4855impl RegexBuilder {
4856 pub fn iter_sat(
4857 &mut self,
4858 stack: &mut Vec<(TRegexId, TSetId)>,
4859 f: &mut impl FnMut(&mut RegexBuilder, NodeId, TSetId),
4860 ) {
4861 debug_assert!(!stack.is_empty());
4862 loop {
4863 match stack.pop() {
4864 None => return,
4865 Some((curr, curr_set)) => match *self.get_tregex(curr) {
4866 TRegex::Leaf(n) => {
4867 let mut curr = n;
4868 while curr != NodeId::BOT {
4869 match self.get_kind(curr) {
4870 _ => {
4871 f(self, n, curr_set);
4872 curr = NodeId::BOT;
4873 }
4874 }
4875 }
4876 }
4877 TRegex::ITE(cnd, then_id, else_id) => {
4878 if else_id != TRegexId::BOT {
4879 let notcnd = self.solver().not_id(cnd);
4880 let interset1 = self.solver().and_id(curr_set, notcnd);
4881 stack.push((else_id, interset1));
4882 }
4883 let interset2 = self.solver().and_id(curr_set, cnd);
4884 stack.push((then_id, interset2));
4885 }
4886 },
4887 }
4888 }
4889 }
4890
4891 #[allow(dead_code)]
4892 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
4893 match self.get_tregex(term_id).clone() {
4894 TRegex::Leaf(node_id) => {
4895 if NodeId::BOT == node_id {
4896 vec![]
4897 } else {
4898 vec![node_id]
4899 }
4900 }
4901 TRegex::ITE(_, then_id, else_id) => {
4902 let mut then_nodes = self.extract_sat(then_id);
4903 let mut else_nodes = self.extract_sat(else_id);
4904 then_nodes.append(&mut else_nodes);
4905 then_nodes
4906 }
4907 }
4908 }
4909
4910 pub(crate) fn iter_unions_b(
4911 &mut self,
4912 curr: NodeId,
4913 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
4914 ) {
4915 let mut curr = curr;
4916 while self.get_kind(curr) == Kind::Union {
4917 f(self, curr.left(self));
4918 curr = curr.right(self);
4919 }
4920 f(self, curr);
4921 }
4922
4923 pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
4924 if !self.contains_look(node_id) {
4925 return Some(node_id);
4926 }
4927 match self.get_kind(node_id) {
4928 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
4929 Kind::Concat => {
4930 let left = node_id.left(self);
4931 let right = node_id.right(self);
4932 let elim_l = self.try_elim_lookarounds(left)?;
4933 let elim_r = self.try_elim_lookarounds(right)?;
4934 let rw = self.mk_concat(elim_l, elim_r);
4935 Some(rw)
4936 }
4937 Kind::Union => {
4938 let left = node_id.left(self);
4939 let right = node_id.right(self);
4940 let elim_l = self.try_elim_lookarounds(left)?;
4941 let elim_r = self.try_elim_lookarounds(right)?;
4942 let rw = self.mk_union(elim_l, elim_r);
4943 Some(rw)
4944 }
4945
4946 Kind::Star => {
4947 let body = node_id.left(self);
4948 let elim_l = self.try_elim_lookarounds(body)?;
4949 Some(self.mk_star(elim_l))
4950 }
4951 Kind::Compl => {
4952 let left = node_id.left(self);
4953 let elim_l = self.try_elim_lookarounds(left)?;
4954 Some(self.mk_compl(elim_l))
4955 }
4956 Kind::Lookahead => {
4957 let rel = self.get_lookahead_rel(node_id);
4958 if rel != 0 {
4959 return None;
4960 }
4961 let lbody = self.get_lookahead_inner(node_id);
4962 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
4963 let elim_l = self.try_elim_lookarounds(lbody)?;
4964 let elim_r = self.try_elim_lookarounds(ltail)?;
4965 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
4966 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
4967 let rw = self.mk_inter(lbody_ts, ltail_ts);
4968 Some(rw)
4969 }
4970 Kind::Lookbehind => {
4971 let linner = self.get_lookbehind_inner(node_id);
4972 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
4973 let elim_l = self.try_elim_lookarounds(linner)?;
4974 let elim_r = self.try_elim_lookarounds(lprev)?;
4975 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
4976 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
4977 let rw = self.mk_inter(lbody_ts, ltail_ts);
4978 Some(rw)
4979 }
4980 Kind::Inter => {
4981 let left = node_id.left(self);
4982 let right = node_id.right(self);
4983 let elim_l = self.try_elim_lookarounds(left)?;
4984 let elim_r = self.try_elim_lookarounds(right)?;
4985 let rw = self.mk_inter(elim_l, elim_r);
4986 Some(rw)
4987 }
4988 Kind::Ordered => None,
4989 }
4990 }
4991
4992 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
4994 if self.nullability(node) == Nullability::NEVER {
4995 node
4996 } else {
4997 self.mk_inter(NodeId::TOPPLUS, node)
4998 }
4999 }
5000
5001 pub fn iter_find_stack(
5002 &self,
5003 stack: &mut Vec<TRegexId>,
5004 mut f: impl FnMut(NodeId) -> bool,
5005 ) -> bool {
5006 loop {
5007 match stack.pop() {
5008 None => return false,
5009 Some(curr) => match self.get_tregex(curr) {
5010 TRegex::Leaf(n) => {
5011 let mut curr = *n;
5012 while curr != NodeId::BOT {
5013 match self.get_kind(curr) {
5014 Kind::Union => {
5015 if f(curr.left(self)) {
5016 return true;
5017 }
5018 curr = curr.right(self);
5019 }
5020 _ => {
5021 if f(*n) {
5022 return true;
5023 }
5024 curr = NodeId::BOT;
5025 }
5026 }
5027 }
5028 }
5029 TRegex::ITE(_, then_id, else_id) => {
5030 if *else_id != TRegexId::BOT {
5031 stack.push(*else_id);
5032 }
5033 stack.push(*then_id);
5034 }
5035 },
5036 }
5037 }
5038 }
5039
5040 pub fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
5066 if node == NodeId::BOT {
5067 return Some(true);
5068 }
5069 if self.nullability(node) != Nullability::NEVER {
5070 return Some(false);
5071 }
5072 if let Some(cached) = self.cache_empty.get(&node) {
5073 if cached.is_checked() {
5074 return Some(cached.is_empty());
5075 }
5076 }
5077 let node = if !self.contains_look(node) {
5078 node
5079 } else {
5080 self.try_elim_lookarounds(node)?
5081 };
5082 match self.is_empty_lang_internal(node) {
5083 Ok(flag) => Some(flag == NodeFlags::IS_EMPTY),
5084 Err(_) => None,
5085 }
5086 }
5087
5088 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, ResharpError> {
5089 if !self.get_meta_flags(initial_node).contains_inter() {
5091 return Ok(NodeFlags::ZERO);
5092 }
5093
5094 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
5095 let mut worklist: VecDeque<NodeId> = VecDeque::new();
5096 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
5097 let mut stack = Vec::new();
5098 stack.push(begin_der);
5099 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
5100 visited.insert(node, initial_node);
5101 let nullability = self.nullability(node);
5102 if nullability != Nullability::NEVER {
5103 true
5104 } else {
5105 worklist.push_back(node);
5106 false
5107 }
5108 });
5109 if found_nullable_right_away {
5110 return Ok(NodeFlags::ZERO);
5111 }
5112
5113 worklist.push_back(initial_node);
5114 let isempty_flag: NodeFlags;
5115 let mut found_node = NodeId::BOT;
5116
5117 loop {
5118 if visited.len() > IS_EMPTY_LANG_NODE_BUDGET {
5119 return Err(ResharpError::StateSpaceExplosion);
5120 }
5121 match worklist.pop_front() {
5122 None => {
5123 isempty_flag = NodeFlags::IS_EMPTY;
5124 break;
5125 }
5126 Some(outer) => {
5127 if let Some(cached) = self.cache_empty.get(&outer) {
5128 if cached.is_checked() {
5129 if cached.is_empty() {
5130 continue;
5131 } else {
5132 return Ok(NodeFlags::ZERO);
5133 }
5134 }
5135 }
5136
5137 let derivative = self.der(outer, Nullability::CENTER)?;
5138
5139 stack.push(derivative);
5140
5141 let found_null = self.iter_find_stack(&mut stack, |node| {
5142 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
5143 found_node = node;
5144 if !self.get_meta_flags(node).contains_inter() {
5145 true
5146 } else {
5147 e.insert(outer);
5148 worklist.push_front(node);
5149 self.any_nonbegin_nullable(node)
5150 }
5151 } else {
5152 false
5153 }
5154 });
5155 if found_null {
5156 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
5157 isempty_flag = NodeFlags::ZERO;
5158 break;
5159 }
5160 }
5161 }
5162 }
5163
5164 self.cache_empty.insert(
5165 initial_node,
5166 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
5167 );
5168 Ok(isempty_flag)
5169 }
5170
5171 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
5173 if larger_lang == smaller_lang {
5174 return Some(true);
5175 }
5176
5177 if self
5179 .nullability(larger_lang)
5180 .not()
5181 .and(self.nullability(smaller_lang))
5182 != Nullability::NEVER
5183 {
5184 return Some(false);
5185 }
5186
5187 let wrap = |b: &mut Self, n: NodeId| {
5192 if b.contains_look(n) {
5193 let tmp = b.mk_concat(n, NodeId::TS);
5194 b.mk_concat(NodeId::TS, tmp)
5195 } else {
5196 n
5197 }
5198 };
5199 let smaller_lang = wrap(self, smaller_lang);
5200 let larger_lang = wrap(self, larger_lang);
5201
5202 let nota = self.mk_compl(larger_lang);
5203 let diff = self.mk_inter(smaller_lang, nota);
5204 self.is_empty_lang(diff)
5205 }
5206}