1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::{neg_class, utf8_char, UnicodeClassCache};
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum ResharpError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for ResharpError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 ResharpError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 ResharpError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 ResharpError::UnsupportedPattern => write!(
36 f,
37 "unsupported lookaround pattern (lookaround, word boundary, or `^`/`$` anchor inside a complement `~(...)`, or a lookbehind operand of an intersection `&`)"
38 ),
39 }
40 }
41}
42
43impl std::error::Error for ResharpError {}
44
45mod id {
46 pub const MISSING: u32 = 0;
47 pub const BOT: u32 = 1;
48 pub const EPS: u32 = 2;
49 pub const TOP: u32 = 3;
50 pub const TOPSTAR: u32 = 4;
51 pub const TOPPLUS: u32 = 5;
52 pub const BEGIN: u32 = 6;
53 pub const END: u32 = 7;
54}
55
56#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
57pub(crate) struct MetadataId(u32);
58impl MetadataId {
59 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
60}
61
62#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
63#[cfg_attr(feature = "serialize", derive(serde::Serialize, serde::Deserialize))]
64pub struct NodeId(pub u32);
65impl NodeId {
66 pub const MISSING: NodeId = NodeId(id::MISSING);
67 pub const BOT: NodeId = NodeId(id::BOT);
68 pub const EPS: NodeId = NodeId(id::EPS);
69 pub const TOP: NodeId = NodeId(id::TOP);
70 pub const TS: NodeId = NodeId(id::TOPSTAR);
71 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
72 pub const BEGIN: NodeId = NodeId(id::BEGIN);
73 pub const END: NodeId = NodeId(id::END);
74
75 #[inline]
76 pub fn as_u32(self) -> u32 {
77 self.0
78 }
79
80 #[inline]
81 pub fn from_u32(v: u32) -> NodeId {
82 NodeId(v)
83 }
84}
85impl Debug for NodeId {
86 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87 let num = &self.0;
88 f.write_str(format!("{num}").as_str())
89 }
90}
91
92#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
93pub struct TRegexId(u32);
94impl TRegexId {
95 pub const MISSING: TRegexId = TRegexId(id::MISSING);
96 pub const EPS: TRegexId = TRegexId(id::EPS);
97 pub const BOT: TRegexId = TRegexId(id::BOT);
98 pub const TOP: TRegexId = TRegexId(id::TOP);
99 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
100}
101
102#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
103pub(crate) struct MetaFlags(u8);
104impl MetaFlags {
105 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
108 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 3);
109 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 4);
110 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 5);
111 pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 6);
112 pub(crate) const CONTAINS_LOOKAHEAD: MetaFlags = MetaFlags(1 << 7);
113
114 #[inline]
115 pub(crate) fn nullability(self) -> Nullability {
116 Nullability(self.0 & Self::NULL_MASK)
117 }
118
119 #[inline]
120 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
121 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
122 }
123
124 #[inline]
125 pub(crate) fn has(self, flag: MetaFlags) -> bool {
126 self.0 & flag.0 != 0
127 }
128 #[inline]
129 const fn and(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 & other.0)
131 }
132 #[inline]
133 const fn or(self, other: MetaFlags) -> MetaFlags {
134 MetaFlags(self.0 | other.0)
135 }
136
137 pub(crate) fn contains_inter(self) -> bool {
138 self.has(MetaFlags::CONTAINS_INTER)
139 }
140
141 pub(crate) fn all_contains_flags(self) -> MetaFlags {
142 self.and(
143 MetaFlags::CONTAINS_ANCHORS
144 .or(MetaFlags::CONTAINS_INTER)
145 .or(MetaFlags::CONTAINS_LOOKBEHIND)
146 .or(MetaFlags::CONTAINS_LOOKAHEAD),
147 )
148 }
149}
150
151#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
152#[repr(u8)]
153pub enum Kind {
154 #[default]
155 Pred,
156 Star,
157 Begin,
158 End,
159 Concat,
160 Union,
161 Compl,
162 Lookbehind,
163 Lookahead,
164 Inter,
165 Counted,
166}
167
168#[derive(Eq, Hash, PartialEq, Clone)]
169struct Metadata {
170 flags: MetaFlags,
171 nulls: NullsId,
172}
173
174struct MetadataBuilder {
175 num_created: u32,
176 solver: Solver,
177 nb: NullsBuilder,
178 index: FxHashMap<Metadata, MetadataId>,
179 pub array: Vec<Metadata>,
180}
181
182mod helpers {
183 pub(crate) fn incr_rel(n1: u32) -> u32 {
184 match n1.overflowing_add(1) {
185 (_, true) => u32::MAX,
186 (res, false) => res,
187 }
188 }
189}
190
191impl MetadataBuilder {
192 fn new() -> MetadataBuilder {
193 Self {
194 index: FxHashMap::default(),
195 array: vec![Metadata {
196 flags: MetaFlags::ZERO,
197 nulls: NullsId::EMPTY,
198 }],
199 solver: Solver::new(),
200 num_created: 0,
201 nb: NullsBuilder::new(),
202 }
203 }
204
205 fn init(&mut self, inst: Metadata) -> MetadataId {
206 self.num_created += 1;
207 let new_id = MetadataId(self.num_created);
208 self.index.insert(inst.clone(), new_id);
209 self.array.push(inst);
210 new_id
211 }
212
213 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
214 match self.index.get(&inst) {
215 Some(&id) => id,
216 None => self.init(inst),
217 }
218 }
219
220 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
221 &self.array[inst.0 as usize]
222 }
223
224 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
225 setflags.all_contains_flags()
226 }
227
228 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
229 let left = &self.array[body.0 as usize].flags;
230 let contains = self.get_contains(*left);
231 let inf = if body_id == NodeId::BOT {
233 MetaFlags::ZERO
234 } else {
235 MetaFlags::INFINITE_LENGTH
236 };
237 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
238 }
239
240 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
241 let left = &self.array[left_id.0 as usize].flags;
242 let null = left.nullability().not().and(Nullability::ALWAYS);
243 let contains = self.get_contains(*left);
244 MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
245 }
246
247 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
248 let left = &self.array[left_id.0 as usize].flags;
249 let right = &self.array[right_id.0 as usize].flags;
250 let null = left.nullability().and(right.nullability());
251 let contains = self.get_contains(left.or(*right));
252 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
253 MetaFlags::with_nullability(null, contains.or(len))
254 }
255
256 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
257 let left = &self.array[left_id.0 as usize].flags;
258 let right = &self.array[right_id.0 as usize].flags;
259 let null = left.nullability().and(right.nullability());
260 let contains = self
261 .get_contains(left.or(*right))
262 .or(MetaFlags::CONTAINS_INTER);
263 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
264 MetaFlags::with_nullability(null, contains.or(len))
265 }
266
267 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
268 let left = &self.array[left_id.0 as usize].flags;
269 let right = &self.array[right_id.0 as usize].flags;
270 let null = left.nullability().or(right.nullability());
271 let contains = self.get_contains(left.or(*right));
272 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
273 MetaFlags::with_nullability(null, contains.or(len))
274 }
275}
276
277#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
278pub struct NodeKey {
279 pub(crate) kind: Kind,
280 pub(crate) left: NodeId,
281 pub(crate) right: NodeId,
282 pub(crate) extra: u32,
283}
284
285#[derive(Eq, Hash, PartialEq, Clone, Debug)]
286pub enum TRegex<TSet> {
287 Leaf(NodeId),
288 ITE(TSet, TRegexId, TRegexId),
289}
290
291#[derive(Eq, Hash, PartialEq, Clone, Copy)]
292pub(crate) struct NodeFlags(u8);
293impl NodeFlags {
294 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
295 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
296 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
297
298 fn is_checked(self) -> bool {
299 self.0 >= NodeFlags::IS_CHECKED.0
300 }
301 fn is_empty(self) -> bool {
302 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
303 }
304}
305
306#[derive(Eq, Hash, PartialEq, Clone, Copy)]
307pub(crate) struct BuilderFlags(u8);
308impl BuilderFlags {
309 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
310 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
311}
312
313pub struct RegexBuilder {
314 mb: MetadataBuilder,
315 temp_vec: Vec<NodeId>,
316 num_created: u32,
317 index: FxHashMap<NodeKey, NodeId>,
318 array: Vec<NodeKey>,
319 metadata: Vec<MetadataId>,
320 reversed: Vec<NodeId>,
321 cache_empty: FxHashMap<NodeId, NodeFlags>,
322 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
323 tr_array: Vec<TRegex<TSetId>>,
324 tr_der_center: Vec<TRegexId>,
325 tr_der_begin: Vec<TRegexId>,
326 flags: BuilderFlags,
327 pub lookahead_context_max: u32,
329 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
330 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
331}
332
333impl NodeId {
334 fn is_missing(&self) -> bool {
335 *self == NodeId::MISSING
336 }
337 #[inline]
338 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
339 b.get_flags_contains(self)
340 }
341 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
342 if self == tail {
343 true
344 } else if self.is_kind(b, Kind::Concat) {
345 self.right(b).has_concat_tail(b, tail)
346 } else {
347 false
348 }
349 }
350 fn missing_to_eps(&self) -> NodeId {
351 if *self == NodeId::MISSING {
352 NodeId::EPS
353 } else {
354 *self
355 }
356 }
357
358 #[inline]
359 fn kind(self, b: &RegexBuilder) -> Kind {
360 b.get_kind(self)
361 }
362 #[inline]
363 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
364 b.get_kind(self) == k
365 }
366 #[inline]
367 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
368 b.nullability(self) == Nullability::NEVER
369 }
370 #[inline]
371 pub fn nullability(self, b: &RegexBuilder) -> Nullability {
372 b.nullability(self)
373 }
374
375 #[inline]
376 pub fn is_center_nullable(self, b: &RegexBuilder) -> bool {
377 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
378 }
379 #[inline]
380 pub fn is_begin_nullable(self, b: &RegexBuilder) -> bool {
381 b.nullability(self).has(Nullability::BEGIN)
382 }
383 #[inline]
384 pub fn left(self, b: &RegexBuilder) -> NodeId {
385 b.get_left(self)
386 }
387
388 #[inline]
389 pub fn right(self, b: &RegexBuilder) -> NodeId {
390 b.get_right(self)
391 }
392
393 #[inline]
394 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, ResharpError> {
395 b.der(self, mask)
396 }
397
398 #[inline]
399 fn extra(self, b: &RegexBuilder) -> u32 {
400 b.get_extra(self)
401 }
402
403 #[inline]
404 fn is_pred(self, b: &RegexBuilder) -> bool {
405 b.get_kind(self) == Kind::Pred
406 }
407
408 #[inline]
409 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
410 debug_assert!(self.is_pred(b));
411 TSetId(b.get_extra(self))
412 }
413 #[inline]
414 fn is_star(self, b: &RegexBuilder) -> bool {
415 if NodeId::EPS == self {
416 return false;
417 }
418 b.get_kind(self) == Kind::Star
419 }
420
421 pub fn contains_lookbehind(self, b: &RegexBuilder) -> bool {
422 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKBEHIND)
423 }
424
425 pub fn contains_lookahead(self, b: &RegexBuilder) -> bool {
426 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKAHEAD)
427 }
428
429 pub fn contains_lookaround(self, b: &RegexBuilder) -> bool {
430 b.get_meta_flags(self)
431 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
432 }
433
434 #[inline]
435 pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
436 b.get_kind(self) == Kind::Inter
437 }
438
439 #[inline]
440 pub(crate) fn is_compl(self, b: &RegexBuilder) -> bool {
441 b.get_kind(self) == Kind::Compl
442 }
443
444 #[inline]
445 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
446 if self.is_concat(b) {
447 let r = self.right(b);
448 return r.is_star(b) && r.left(b) == self.left(b);
449 }
450 false
451 }
452
453 #[inline]
454 fn is_begin(self) -> bool {
455 self == NodeId::BEGIN
456 }
457
458 #[inline]
459 fn is_end(self) -> bool {
460 self == NodeId::END
461 }
462
463 #[inline]
464 fn is_union(self, b: &RegexBuilder) -> bool {
465 b.get_kind(self) == Kind::Union
466 }
467
468 #[inline]
469 fn is_concat(self, b: &RegexBuilder) -> bool {
470 b.get_kind(self) == Kind::Concat
471 }
472
473 #[inline]
474 fn is_lookahead(self, b: &RegexBuilder) -> bool {
475 b.get_kind(self) == Kind::Lookahead
476 }
477
478 #[inline]
479 fn is_lookbehind(self, b: &RegexBuilder) -> bool {
480 b.get_kind(self) == Kind::Lookbehind
481 }
482
483 #[inline]
484 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
485 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
486 Some(b.get_right(self))
487 } else {
488 None
489 }
490 }
491
492 #[inline]
493 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
494 if self.is_concat(b) {
495 let left = self.left(b);
496 let right = self.right(b);
497 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
498 return right == NodeId::END;
499 }
500 }
501 false
502 }
503
504 #[inline]
505 #[allow(dead_code)]
506 fn is_ts(self) -> bool {
507 NodeId::TS == self
508 }
509
510 #[inline]
511 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
512 if NodeId::EPS == self {
513 return None;
514 }
515 if self.is_star(b) && self.left(b).is_pred(b) {
516 Some(self.left(b))
517 } else {
518 None
519 }
520 }
521
522 #[inline]
523 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
524 if self.is_concat(b) && self.left(b) == NodeId::TS {
525 let middle = self.right(b);
526 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
527 return Some(middle.left(b));
528 }
529 }
530 None
531 }
532
533 #[inline]
534 pub(crate) fn iter_union(
535 self,
536 b: &mut RegexBuilder,
537 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
538 ) {
539 let mut curr: NodeId = self;
540 while curr.kind(b) == Kind::Union {
541 f(b, curr.left(b));
542 curr = curr.right(b);
543 }
544 f(b, curr);
545 }
546
547 #[inline]
548 pub(crate) fn iter_inter(
549 self,
550 b: &mut RegexBuilder,
551 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
552 ) {
553 let mut curr: NodeId = self;
554 while curr.kind(b) == Kind::Inter {
555 f(b, curr.left(b));
556 curr = curr.right(b);
557 }
558 f(b, curr);
559 }
560
561 #[inline]
562 pub(crate) fn iter_union_while(
563 self,
564 b: &mut RegexBuilder,
565 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
566 ) {
567 let mut curr: NodeId = self;
568 let mut continue_loop = true;
569 while continue_loop && curr.kind(b) == Kind::Union {
570 continue_loop = f(b, curr.left(b));
571 curr = curr.right(b);
572 }
573 if continue_loop {
574 f(b, curr);
575 }
576 }
577
578 #[inline]
579 pub(crate) fn any_inter_component(
580 self,
581 b: &RegexBuilder,
582 mut f: impl FnMut(NodeId) -> bool,
583 ) -> bool {
584 debug_assert!(self.kind(b) == Kind::Inter);
585 let mut cur = self;
586 while cur.kind(b) == Kind::Inter {
587 if f(cur.left(b)) {
588 return true;
589 }
590 cur = cur.right(b);
591 }
592 f(cur)
593 }
594
595 #[inline]
596 pub(crate) fn any_union_component(
597 self,
598 b: &RegexBuilder,
599 mut f: impl FnMut(NodeId) -> bool,
600 ) -> bool {
601 let mut cur = self;
602 while cur.kind(b) == Kind::Union {
603 if f(cur.left(b)) {
604 return true;
605 }
606 cur = cur.right(b);
607 }
608 f(cur)
609 }
610}
611
612impl Default for RegexBuilder {
613 fn default() -> Self {
614 Self::new()
615 }
616}
617
618impl RegexBuilder {
619 pub fn new() -> RegexBuilder {
620 let mut inst = Self {
621 mb: MetadataBuilder::new(),
622 array: Vec::new(),
623 index: FxHashMap::default(),
624 cache_empty: FxHashMap::default(),
625 tr_array: Vec::new(),
626 tr_cache: FxHashMap::default(),
627 flags: BuilderFlags::ZERO,
628 lookahead_context_max: 800,
629 num_created: 0,
630 metadata: Vec::new(),
631 reversed: Vec::new(),
632 tr_der_center: Vec::new(),
633 tr_der_begin: Vec::new(),
634 temp_vec: Vec::new(),
635 mk_binary_memo: FxHashMap::default(),
636 clean_cache: FxHashMap::default(),
637 };
638 inst.array.push(NodeKey::default());
639 inst.mk_pred(TSetId::EMPTY);
640 inst.mk_star(NodeId::BOT);
641 inst.mk_pred(TSetId::FULL);
642 inst.mk_star(NodeId::TOP);
643 let top_plus_id = inst.mk_plus(NodeId::TOP);
644 inst.mk_unset(Kind::Begin);
645 inst.mk_unset(Kind::End);
646 debug_assert!(top_plus_id == NodeId::TOPPLUS);
647
648 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
649 inst.mk_leaf(NodeId::BOT);
650 inst.mk_leaf(NodeId::EPS);
651 inst.mk_leaf(NodeId::TOP);
652 inst.mk_leaf(NodeId::TS);
653
654 inst.flags = BuilderFlags::SUBSUME;
655 inst
656 }
657
658 #[inline]
659 pub fn solver_ref(&self) -> &Solver {
660 &self.mb.solver
661 }
662
663 #[inline]
664 pub fn solver(&mut self) -> &mut Solver {
665 &mut self.mb.solver
666 }
667
668 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
669 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
670 self.tr_cache.insert(inst.clone(), new_id);
671 self.tr_array.push(inst);
672 new_id
673 }
674
675 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
676 match self.tr_cache.get(&inst) {
677 Some(&id) => id,
678 None => self.tr_init(inst),
679 }
680 }
681
682 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
683 &self.tr_array[inst.0 as usize]
684 }
685
686 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
687 let solv = self.solver();
688 !solv.is_sat_id(cond1, cond2)
689 }
690
691 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
692 let term = TRegex::<TSetId>::Leaf(node_id);
693 self.get_tregex_id(term)
694 }
695
696 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
697 let tmp_inst = TRegex::ITE(cond, _then, _else);
698 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
699 return *cached;
700 }
701 if _then == _else {
702 return _then;
703 }
704 if self.solver().is_full_id(cond) {
705 return _then;
706 }
707 if self.solver().is_empty_id(cond) {
708 return _else;
709 }
710 let _clean_then = match self.tr_array[_then.0 as usize] {
711 TRegex::Leaf(_) => _then,
712 _ => self.clean(cond, _then),
713 };
714 let notcond = self.solver().not_id(cond);
715 let _clean_else = match self.tr_array[_else.0 as usize] {
716 TRegex::Leaf(_) => _else,
717 _ => self.clean(notcond, _else),
718 };
719
720 if _clean_then == _clean_else {
721 return _clean_then;
722 }
723 match self.get_tregex(_clean_then) {
725 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
726 let _cond_then = *leftcond;
727 let new_then = *_inner_then;
728 let sand = self.solver().and_id(cond, _cond_then);
729 return self.mk_ite(sand, new_then, _clean_else);
730 }
731 _ => {}
732 }
733 match self.get_tregex(_clean_else) {
735 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
737 let e2clone = *_e2;
738 let new_cond = self.mb.solver.or_id(cond, *_c2);
739 return self.mk_ite(new_cond, _clean_then, e2clone);
740 }
741 _ => {}
742 }
743
744 if _clean_then == TRegexId::BOT {
745 let flip_cond = self.solver().not_id(cond);
746 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
747 return cleaned_id;
748 }
749
750 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
751 }
752
753 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
754 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
755 return cached;
756 }
757 let result = match *self.get_tregex(tterm) {
758 TRegex::Leaf(_) => tterm,
759 TRegex::ITE(alpha, _then_id, _else_id) => {
760 let notalpha = self.mb.solver.not_id(alpha);
761 if self.mb.solver.unsat_id(beta, alpha) {
762 self.clean(beta, _else_id)
764 } else if self.unsat(beta, notalpha) {
765 self.clean(beta, _then_id)
767 } else {
768 let tc = self.mb.solver.and_id(beta, alpha);
769 let ec = self.mb.solver.and_id(beta, notalpha);
770 let new_then = self.clean(tc, _then_id);
771 let new_else = self.clean(ec, _else_id);
772 self.mk_ite(alpha, new_then, new_else)
773 }
774 }
775 };
776 self.clean_cache.insert((beta, tterm), result);
777 result
778 }
779
780 fn mk_unary(
781 &mut self,
782 term: TRegexId,
783 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
784 ) -> TRegexId {
785 match self.tr_array[term.0 as usize] {
786 TRegex::Leaf(node_id) => {
787 let applied = apply(self, node_id);
788 self.mk_leaf(applied)
789 }
790 TRegex::ITE(c1, _then, _else) => {
791 let _then1 = self.mk_unary(_then, apply);
792 let _else1 = self.mk_unary(_else, apply);
793 self.mk_ite(c1, _then1, _else1)
794 }
795 }
796 }
797
798 fn mk_binary_result(
799 &mut self,
800 left: TRegexId,
801 right: TRegexId,
802 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, ResharpError>,
803 ) -> Result<TRegexId, ResharpError> {
804 match self.tr_array[left.0 as usize] {
805 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
806 TRegex::Leaf(right_node_id) => {
807 let applied = apply(self, left_node_id, right_node_id)?;
808 Ok(self.mk_leaf(applied))
809 }
810 TRegex::ITE(c2, _then, _else) => {
811 let then2 = self.mk_binary_result(left, _then, apply)?;
812 let else2 = self.mk_binary_result(left, _else, apply)?;
813 Ok(self.mk_ite(c2, then2, else2))
814 }
815 },
816 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
817 TRegex::Leaf(_) => {
818 let then2 = self.mk_binary_result(_then1, right, apply)?;
819 let else2 = self.mk_binary_result(_else1, right, apply)?;
820 Ok(self.mk_ite(c1, then2, else2))
821 }
822 TRegex::ITE(c2, _then2, _else2) => {
823 if c1 == c2 {
824 let _then = self.mk_binary_result(_then1, _then2, apply)?;
825 let _else = self.mk_binary_result(_else1, _else2, apply)?;
826 return Ok(self.mk_ite(c1, _then, _else));
827 }
828 if c1.0 > c2.0 {
829 let _then = self.mk_binary_result(_then1, right, apply)?;
830 let _else = self.mk_binary_result(_else1, right, apply)?;
831 Ok(self.mk_ite(c1, _then, _else))
832 } else {
833 let _then = self.mk_binary_result(left, _then2, apply)?;
834 let _else = self.mk_binary_result(left, _else2, apply)?;
835 Ok(self.mk_ite(c2, _then, _else))
836 }
837 }
838 },
839 }
840 }
841
842 fn mk_binary(
843 &mut self,
844 left: TRegexId,
845 right: TRegexId,
846 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
847 ) -> TRegexId {
848 self.mk_binary_memo.clear();
849 self.mk_binary_inner(left, right, apply)
850 }
851
852 fn mk_binary_inner(
853 &mut self,
854 left: TRegexId,
855 right: TRegexId,
856 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
857 ) -> TRegexId {
858 if left == right {
859 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
860 }
861 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
862 return cached;
863 }
864 let result = match self.tr_array[left.0 as usize] {
865 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
866 TRegex::Leaf(right_node_id) => {
867 let applied = apply(self, left_node_id, right_node_id);
868 self.mk_leaf(applied)
869 }
870 TRegex::ITE(c2, _then, _else) => {
871 let then2 = self.mk_binary_inner(left, _then, apply);
872 let else2 = self.mk_binary_inner(left, _else, apply);
873 self.mk_ite(c2, then2, else2)
874 }
875 },
876 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
877 TRegex::Leaf(_) => {
878 let then2 = self.mk_binary_inner(_then1, right, apply);
879 let else2 = self.mk_binary_inner(_else1, right, apply);
880 self.mk_ite(c1, then2, else2)
881 }
882 TRegex::ITE(c2, _then2, _else2) => {
883 if c1 == c2 {
884 let _then = self.mk_binary_inner(_then1, _then2, apply);
885 let _else = self.mk_binary_inner(_else1, _else2, apply);
886 self.mk_ite(c1, _then, _else)
887 } else if c1.0 > c2.0 {
888 let _then = self.mk_binary_inner(_then1, right, apply);
889 let _else = self.mk_binary_inner(_else1, right, apply);
890 self.mk_ite(c1, _then, _else)
891 } else {
892 let _then = self.mk_binary_inner(left, _then2, apply);
893 let _else = self.mk_binary_inner(left, _else2, apply);
894 self.mk_ite(c2, _then, _else)
895 }
896 }
897 },
898 };
899 self.mk_binary_memo.insert((left, right), result);
900 result
901 }
902
903 pub fn get_nulls(
904 &mut self,
905 pending_rel: u32,
906 mask: Nullability,
907 acc: &mut BTreeSet<NullState>,
908 node_id: NodeId,
909 ) {
910 debug_assert!(node_id != NodeId::MISSING);
911 if !self.is_nullable(node_id, mask) {
912 return;
913 }
914 match self.get_kind(node_id) {
915 Kind::Pred => {}
916 Kind::End => {
917 if mask.has(Nullability::END) {
918 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
919 }
920 }
921 Kind::Begin => {
922 if mask.has(Nullability::BEGIN) {
923 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
924 }
925 }
926 Kind::Concat => {
927 let new_mask = self.nullability(node_id).and(mask);
928 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
929 if self.is_nullable(node_id.left(self), mask) {
930 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
931 }
932 }
933 Kind::Union => {
934 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
935 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
936 }
937 Kind::Inter => {
938 let new_mask = self.nullability(node_id).and(mask);
939 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
940 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
941 }
942 Kind::Star => {
943 acc.insert(NullState::new(mask, pending_rel));
944 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
945 }
946 Kind::Compl => {
947 if !self.is_nullable(node_id.left(self), mask) {
948 acc.insert(NullState::new(mask, 0));
949 }
950 }
951 Kind::Lookbehind => {
952 let new_mask = self.nullability(node_id).and(mask);
953 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
954 if node_id.right(self) != NodeId::MISSING {
955 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
956 }
957 }
958 Kind::Lookahead => {
959 let la_inner = self.get_lookahead_inner(node_id);
960 if self.is_nullable(la_inner, mask) {
961 let rel = self.get_lookahead_rel(node_id);
962 if rel != u32::MAX {
963 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
964 }
965 let la_tail = self.get_lookahead_tail(node_id);
967 if la_tail != NodeId::MISSING {
968 self.get_nulls(pending_rel, mask, acc, la_tail);
969 }
970 }
971 }
972 Kind::Counted => {
973 let packed = self.get_extra(node_id);
974 let best = packed >> 16;
975 if best > 0 {
976 acc.insert(NullState::new(mask, pending_rel + best));
977 }
978 }
979 }
980 }
981
982 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
983 self.get_meta_flags(node_id)
984 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
985 }
986
987 pub fn contains_lookbehind(&mut self, node_id: NodeId) -> bool {
988 self.get_meta_flags(node_id)
989 .has(MetaFlags::CONTAINS_LOOKBEHIND)
990 }
991
992 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
993 self.get_meta_flags(node_id)
994 .has(MetaFlags::CONTAINS_ANCHORS)
995 }
996
997 pub fn is_infinite(&self, node_id: NodeId) -> bool {
998 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
999 }
1000
1001 pub fn max_lookahead_body_len(&self, node_id: NodeId) -> u32 {
1007 let mut visited: FxHashMap<NodeId, ()> = FxHashMap::default();
1008 let mut stack = vec![node_id];
1009 let mut best: u32 = 0;
1010 while let Some(n) = stack.pop() {
1011 if n == NodeId::MISSING || visited.insert(n, ()).is_some() {
1012 continue;
1013 }
1014 let kind = self.get_kind(n);
1015 if kind == Kind::Lookahead {
1016 let mut body = self.get_lookahead_inner(n);
1017 if body.is_concat(self) && body.right(self) == NodeId::TS {
1020 body = body.left(self);
1021 }
1022 let (_, mx) = self.get_min_max_length(body);
1023 if mx > best {
1024 best = mx;
1025 }
1026 }
1027 match kind {
1028 Kind::Pred | Kind::Begin | Kind::End => {}
1029 Kind::Star | Kind::Compl => stack.push(n.left(self)),
1030 _ => {
1031 stack.push(n.left(self));
1032 stack.push(n.right(self));
1033 }
1034 }
1035 }
1036 best
1037 }
1038
1039 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
1040 if self.is_infinite(node_id) {
1041 if node_id.is_inter(self) {
1042 self.get_bounded_length(node_id)
1043 } else {
1044 (self.get_min_length_only(node_id), u32::MAX)
1045 }
1046 } else {
1047 self.get_bounded_length(node_id)
1048 }
1049 }
1050
1051 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
1052 if node_id == NodeId::EPS {
1053 return (0, 0);
1054 }
1055 match self.get_kind(node_id) {
1056 Kind::End | Kind::Begin => (0, 0),
1057 Kind::Pred => (1, 1),
1058 Kind::Concat => {
1059 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
1060 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
1061 (lmin + rmin, lmax.saturating_add(rmax))
1062 }
1063 Kind::Union => {
1064 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
1065 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
1066 (lmin.min(rmin), lmax.max(rmax))
1067 }
1068 Kind::Inter => {
1069 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
1070 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
1071 (lmin.max(rmin), lmax.min(rmax))
1072 }
1073 Kind::Lookahead => {
1074 let body = node_id.left(self);
1075 if self.is_infinite(body) {
1076 return (0, u32::MAX);
1077 }
1078 let right = node_id.right(self);
1079 if right.is_missing() {
1080 (0, 0)
1081 } else {
1082 self.get_min_max_length(right)
1083 }
1084 }
1085 Kind::Counted => self.get_min_max_length(node_id.left(self)),
1086 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
1087 }
1088 }
1089
1090 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
1091 if node_id == NodeId::EPS {
1092 return Some(0);
1093 }
1094 match self.get_kind(node_id) {
1095 Kind::End | Kind::Begin => Some(0),
1096 Kind::Pred => Some(1),
1097 Kind::Concat => {
1098 let l = self.get_fixed_length(node_id.left(self))?;
1099 let r = self.get_fixed_length(node_id.right(self))?;
1100 Some(l + r)
1101 }
1102 Kind::Union => {
1103 let l = self.get_fixed_length(node_id.left(self))?;
1104 let r = self.get_fixed_length(node_id.right(self))?;
1105 if l == r {
1106 Some(l)
1107 } else {
1108 None
1109 }
1110 }
1111 Kind::Inter => {
1112 let l = self.get_fixed_length(node_id.left(self));
1113 let r = self.get_fixed_length(node_id.right(self));
1114 match (l, r) {
1115 (Some(a), Some(b)) if a == b => Some(a),
1116 (Some(_), Some(_)) => None,
1117 (Some(a), None) | (None, Some(a)) => Some(a),
1118 (None, None) => None,
1119 }
1120 }
1121 Kind::Lookahead | Kind::Lookbehind => {
1122 let prev = node_id.right(self).missing_to_eps();
1123 self.get_fixed_length(prev)
1124 }
1125 Kind::Counted => self.get_fixed_length(node_id.left(self)),
1126 Kind::Star | Kind::Compl => None,
1127 }
1128 }
1129
1130 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1131 match self.get_kind(node_id) {
1132 Kind::End | Kind::Begin => 0,
1133 Kind::Pred => 1,
1134 Kind::Concat => {
1135 self.get_min_length_only(node_id.left(self))
1136 + self.get_min_length_only(node_id.right(self))
1137 }
1138 Kind::Union => self
1139 .get_min_length_only(node_id.left(self))
1140 .min(self.get_min_length_only(node_id.right(self))),
1141 Kind::Inter => self
1142 .get_min_length_only(node_id.left(self))
1143 .max(self.get_min_length_only(node_id.right(self))),
1144 Kind::Star | Kind::Lookbehind | Kind::Lookahead => 0,
1145 Kind::Counted => self.get_min_length_only(node_id.left(self)),
1146 Kind::Compl => {
1147 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1148 0
1149 } else {
1150 1
1151 }
1152 }
1153 }
1154 }
1155
1156 pub fn starts_with_ts(&self, node_id: NodeId) -> bool {
1157 if node_id == NodeId::TS {
1158 return true;
1159 }
1160 match self.get_kind(node_id) {
1161 Kind::Inter => {
1162 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1163 }
1164 Kind::Union => {
1165 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1166 }
1167 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1168 _ => false,
1169 }
1170 }
1171
1172 #[inline]
1173 pub fn ends_with_ts(&self, node_id: NodeId) -> bool {
1174 if node_id.is_concat(self) {
1175 return self.ends_with_ts(node_id.right(self));
1176 }
1177 if node_id.is_lookahead(self) {
1178 let tail = self.get_lookahead_tail(node_id);
1179 if !tail.is_missing() {
1180 return self.ends_with_ts(tail);
1181 }
1182 }
1183 node_id == NodeId::TS
1184 }
1185
1186 pub fn ends_with_ts_any_branch(&self, node_id: NodeId) -> bool {
1187 if node_id == NodeId::TS {
1188 return true;
1189 }
1190 match self.get_kind(node_id) {
1191 Kind::Concat => self.ends_with_ts_any_branch(node_id.right(self)),
1192 Kind::Union => {
1193 self.ends_with_ts_any_branch(node_id.left(self))
1194 || self.ends_with_ts_any_branch(node_id.right(self))
1195 }
1196 Kind::Lookahead => {
1197 let tail = self.get_lookahead_tail(node_id);
1198 !tail.is_missing() && self.ends_with_ts_any_branch(tail)
1199 }
1200 _ => false,
1201 }
1202 }
1203
1204 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1205 debug_assert!(node_id != NodeId::MISSING);
1206 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1207 }
1208
1209 pub(crate) fn cache_der(
1210 &mut self,
1211 node_id: NodeId,
1212 result: TRegexId,
1213 mask: Nullability,
1214 ) -> TRegexId {
1215 if mask == Nullability::CENTER {
1216 self.tr_der_center[node_id.0 as usize] = result
1217 } else {
1218 self.tr_der_begin[node_id.0 as usize] = result
1219 };
1220 result
1221 }
1222
1223 pub(crate) fn try_cached_der(
1224 &mut self,
1225 node_id: NodeId,
1226 mask: Nullability,
1227 ) -> Option<TRegexId> {
1228 let cache = if mask == Nullability::CENTER {
1229 &mut self.tr_der_center
1230 } else {
1231 &mut self.tr_der_begin
1232 };
1233 match cache.get(node_id.0 as usize) {
1234 Some(&TRegexId::MISSING) => {}
1235 Some(&result) => {
1236 return Some(result);
1237 }
1238 None => {
1239 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1240 }
1241 }
1242 None
1243 }
1244
1245 pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1246 let mut term = self.get_tregex(der);
1247 loop {
1248 match *term {
1249 TRegex::Leaf(node_id) => return node_id,
1250 TRegex::ITE(cond, _then, _else) => {
1251 if self.solver().is_sat_id(set, cond) {
1252 term = self.get_tregex(_then);
1253 } else {
1254 term = self.get_tregex(_else);
1255 }
1256 }
1257 }
1258 }
1259 }
1260
1261 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, ResharpError> {
1262 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1263 debug_assert!(
1264 node_id != NodeId::MISSING,
1265 "attempting to derive missing node"
1266 );
1267 if let Some(result) = self.try_cached_der(node_id, mask) {
1268 return Ok(result);
1269 }
1270
1271 let result = match node_id.kind(self) {
1272 Kind::Compl => {
1273 let leftd = node_id.left(self).der(self, mask)?;
1274 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1275 }
1276 Kind::Inter => {
1277 let leftd = node_id.left(self).der(self, mask)?;
1278 let rightd = node_id.right(self).der(self, mask)?;
1279 {
1280 let this = &mut *self;
1281 this.mk_binary(
1282 leftd,
1283 rightd,
1284 &mut (|b, left, right| b.mk_inter(left, right)),
1285 )
1286 }
1287 }
1288 Kind::Union => {
1289 let leftd = self.der(node_id.left(self), mask)?;
1290 let rightd = self.der(node_id.right(self), mask)?;
1291 {
1292 let this = &mut *self;
1293 this.mk_binary(
1294 leftd,
1295 rightd,
1296 &mut (|b, left, right| b.mk_union(left, right)),
1297 )
1298 }
1299 }
1300 Kind::Concat => {
1301 let head = node_id.left(self);
1302 let tail = node_id.right(self);
1303 let tail_leaf = self.mk_leaf(tail);
1304 let head_der = self.der(head, mask)?;
1305
1306 if self.is_nullable(head, mask) {
1307 let rightd = self.der(tail, mask)?;
1308 let ldr = {
1309 let this = &mut *self;
1310 this.mk_binary(
1311 head_der,
1312 tail_leaf,
1313 &mut (|b, left, right| b.mk_concat(left, right)),
1314 )
1315 };
1316 {
1317 let this = &mut *self;
1318 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1319 }
1320 } else {
1321 let this = &mut *self;
1322 this.mk_binary(
1323 head_der,
1324 tail_leaf,
1325 &mut (|b, left, right| b.mk_concat(left, right)),
1326 )
1327 }
1328 }
1329 Kind::Star => {
1330 if node_id == NodeId::EPS {
1331 TRegexId::BOT
1332 } else {
1333 let left = node_id.left(self);
1334 let r_decr_leaf = self.mk_leaf(node_id);
1335 let r_der = self.der(left, mask)?;
1336 let this = &mut *self;
1337 this.mk_binary(
1338 r_der,
1339 r_decr_leaf,
1340 &mut (|b, left, right| b.mk_concat(left, right)),
1341 )
1342 }
1343 }
1344 Kind::Lookbehind => {
1345 let lb_prev_der = {
1346 let lb_prev = self.get_lookbehind_prev(node_id);
1347 if lb_prev == NodeId::MISSING {
1348 TRegexId::MISSING
1349 } else {
1350 self.der(lb_prev, mask)?
1351 }
1352 };
1353 let lb_inner = self.get_lookbehind_inner(node_id);
1354 let lb_inner_der = self.der(lb_inner, mask)?;
1355 {
1356 let this = &mut *self;
1357 this.mk_binary_result(
1358 lb_inner_der,
1359 lb_prev_der,
1360 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1361 )?
1362 }
1363 }
1364 Kind::Lookahead => {
1365 let la_tail = self.get_lookahead_tail(node_id);
1366 let la_body = node_id.left(self);
1367 let rel = self.get_lookahead_rel(node_id);
1368
1369 if self.is_nullable(la_body, mask) {
1370 let right = node_id.right(self).missing_to_eps();
1372 let rder = self.der(right, mask).clone();
1373 return rder;
1374 }
1375
1376 if rel == u32::MAX {
1377 let la_body_der = self.der(la_body, mask)?;
1378 if la_tail.is_kind(self, Kind::Pred) {
1379 let transitioned =
1380 self.transition_term(la_body_der, la_tail.pred_tset(self));
1381 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1382 let concated = self.mk_concat(la_tail, new_la);
1383 return self.der(concated, mask);
1384 }
1385 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1386 let left = la_tail.left(self);
1387 let tset = left.pred_tset(self);
1388 let transitioned = self.transition_term(la_body_der, tset);
1389 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1390 let tail_right = la_tail.right(self);
1391 let concated = self.mk_concat(new_la, tail_right);
1392 let concated = self.mk_concat(left, concated);
1393 return self.der(concated, mask);
1394 }
1395 }
1396
1397 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1398 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1399 let concated = self.mk_concat(la_body, nulls_mask);
1400 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1401 let non_nulled = self.mk_non_nullable_safe(la_tail);
1402 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1403 let new_union = self.mk_union(concated_look, new_look);
1404 return self.der(new_union, mask);
1405 }
1406
1407 let la_tail_der = if la_tail == NodeId::MISSING {
1408 TRegexId::MISSING
1409 } else {
1410 if self.is_nullable(la_tail, mask) {
1411 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1412 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1413 let la_union = self.mk_union(la_tail, nulls_la);
1414 self.der(la_union, mask)?
1415 } else {
1416 self.der(la_tail, mask)?
1417 }
1418 };
1419
1420 let la_body_der = self.der(la_body, mask)?;
1421
1422 if rel != u32::MAX && rel > self.lookahead_context_max {
1423 return Err(ResharpError::AnchorLimit);
1424 }
1425
1426 let la = {
1427 let this = &mut *self;
1428 let rel = helpers::incr_rel(rel);
1429 this.mk_binary(
1430 la_body_der,
1431 la_tail_der,
1432 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1433 )
1434 };
1435
1436 if rel != u32::MAX
1437 && la_tail_der != TRegexId::MISSING
1438 && self.is_nullable(la_tail, mask)
1439 {
1440 let look_only = {
1441 let this = &mut *self;
1442 let right = TRegexId::MISSING;
1443 let rel = helpers::incr_rel(rel);
1444 this.mk_binary(
1445 la_body_der,
1446 right,
1447 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1448 )
1449 };
1450 {
1451 let this = &mut *self;
1452 this.mk_binary(
1453 look_only,
1454 la,
1455 &mut (|b, left, right| b.mk_union(left, right)),
1456 )
1457 }
1458 } else {
1459 la
1460 }
1461 }
1462 Kind::Counted => {
1463 let body = node_id.left(self);
1464 let chain = node_id.right(self);
1465 let packed = self.get_extra(node_id);
1466 let step = (packed & 0xFFFF) as u16;
1467 let best = (packed >> 16) as u16;
1468
1469 let mid_best = if self.is_nullable(body, mask) && step >= best {
1470 step
1471 } else {
1472 best
1473 };
1474
1475 let body_der = self.der(body, mask)?;
1476 let new_step = step.saturating_add(1);
1477 self.mk_unary(body_der, &mut |b, new_body| {
1478 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1479 new_step
1480 } else {
1481 mid_best
1482 };
1483 let packed = (final_best as u32) << 16 | new_step as u32;
1484 b.mk_counted(new_body, chain, packed)
1485 })
1486 }
1487 Kind::Begin | Kind::End => TRegexId::BOT,
1488 Kind::Pred => {
1489 let psi = node_id.pred_tset(self);
1490 if psi == TSetId::EMPTY {
1491 TRegexId::BOT
1492 } else {
1493 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1494 }
1495 }
1496 };
1497
1498 self.cache_der(node_id, result, mask);
1502 Ok(result)
1503 }
1504
1505 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1506 debug_assert!(meta_id != MetadataId::MISSING);
1507 match self.metadata.get_mut(node_id.0 as usize) {
1508 Some(v) => *v = meta_id,
1509 None => {
1510 self.metadata
1511 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1512 self.metadata[node_id.0 as usize] = meta_id;
1513 }
1514 }
1515 }
1516
1517 fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1518 debug_assert!(reversed_id != NodeId::MISSING);
1519 match self.reversed.get_mut(node_id.0 as usize) {
1520 Some(v) => *v = reversed_id,
1521 None => {
1522 self.reversed
1523 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1524 self.reversed[node_id.0 as usize] = reversed_id;
1525 }
1526 }
1527 return reversed_id;
1528 }
1529
1530 fn init(&mut self, inst: NodeKey) -> NodeId {
1531 self.num_created += 1;
1532 let node_id = NodeId(self.num_created);
1533 self.index.insert(inst.clone(), node_id);
1534 match inst.kind {
1535 Kind::Pred => {
1536 let meta_id = self.mb.get_meta_id(Metadata {
1537 flags: MetaFlags::ZERO,
1538 nulls: NullsId::EMPTY,
1539 });
1540 self.init_metadata(node_id, meta_id);
1541 }
1542 Kind::Begin => {
1543 let meta_id = self.mb.get_meta_id(Metadata {
1544 flags: MetaFlags::with_nullability(
1545 Nullability::BEGIN,
1546 MetaFlags::CONTAINS_ANCHORS,
1547 ),
1548 nulls: NullsId::BEGIN0,
1549 });
1550 self.init_metadata(node_id, meta_id);
1551 }
1552 Kind::End => {
1553 let meta_id = self.mb.get_meta_id(Metadata {
1554 flags: MetaFlags::with_nullability(
1555 Nullability::END,
1556 MetaFlags::CONTAINS_ANCHORS,
1557 ),
1558 nulls: NullsId::END0,
1559 });
1560 self.init_metadata(node_id, meta_id);
1561 }
1562 Kind::Inter => {
1563 let m1 = self.get_node_meta_id(inst.left);
1564 let m2 = self.get_node_meta_id(inst.right);
1565 let meta_id = {
1566 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1567 let mask_l = inst.left.nullability(self);
1568 let mask_r = inst.right.nullability(self);
1569 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1570 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1571 nulls = self.mb.nb.and_mask(nulls, mask_l);
1572 nulls = self.mb.nb.and_mask(nulls, mask_r);
1573 let new_meta = Metadata {
1574 flags: self.mb.flags_inter(m1, m2),
1575 nulls,
1576 };
1577 self.mb.get_meta_id(new_meta)
1578 };
1579 self.init_metadata(node_id, meta_id);
1580 }
1581 Kind::Union => {
1582 let m1 = self.get_node_meta_id(inst.left);
1583 let m2 = self.get_node_meta_id(inst.right);
1584 let meta_id = {
1585 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1586 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1587 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1588 let new_meta = Metadata {
1589 flags: self.mb.flags_union(m1, m2),
1590 nulls,
1591 };
1592 self.mb.get_meta_id(new_meta)
1593 };
1594 self.init_metadata(node_id, meta_id);
1595 }
1596 Kind::Concat => {
1597 let flags = self.mb.flags_concat(
1598 self.get_node_meta_id(inst.left),
1599 self.get_node_meta_id(inst.right),
1600 );
1601
1602 let right_nullability = inst.right.nullability(self);
1603 let left_nullability = inst.left.nullability(self);
1604 let nulls_left = self.get_nulls_id(inst.left);
1605 let nulls_right = self.get_nulls_id(inst.right);
1606 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1607 let mask = right_nullability.and(left_nullability);
1608 nulls = self.mb.nb.and_mask(nulls, mask);
1609
1610 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1611 self.init_metadata(node_id, new_id);
1612 }
1613 Kind::Star => {
1614 let left_nulls = self.get_nulls_id(inst.left);
1615 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1616 let meta_id = self.mb.get_meta_id(Metadata {
1617 flags: self
1618 .mb
1619 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1620 nulls,
1621 });
1622 self.init_metadata(node_id, meta_id);
1623 }
1624 Kind::Compl => {
1625 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1626 let meta_id = self.mb.get_meta_id(Metadata {
1627 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1628 nulls,
1629 });
1630 self.init_metadata(node_id, meta_id);
1631 }
1632 Kind::Lookbehind => {
1633 let mut left_nullability = self.nullability(inst.left);
1634 let mut contains_flags = self.get_flags_contains(inst.left);
1635 let nulls = if inst.right.is_missing() {
1636 self.get_nulls_id(inst.left)
1637 } else {
1638 let right_nullability = self.nullability(inst.right);
1639 let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1640 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1641 left_nullability = left_nullability.and(right_nullability);
1642 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1643 self.mb.nb.and_id(nulls_left, nulls_right)
1644 };
1645
1646 let meta_id = self.mb.get_meta_id(Metadata {
1647 flags: MetaFlags::with_nullability(
1648 left_nullability,
1649 contains_flags.or(MetaFlags::CONTAINS_LOOKBEHIND),
1650 ),
1651 nulls,
1652 });
1653 self.init_metadata(node_id, meta_id);
1654 }
1655 Kind::Lookahead => {
1656 let mut nulls = self.get_nulls_id(inst.left);
1657 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1659 let left_nullability = inst.left.nullability(self);
1660 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1661
1662 nulls = self.mb.nb.or_id(nulls, nulls_right);
1663
1664 let la_inner = inst.left;
1665 let la_tail = inst.right;
1666 let null = self
1667 .get_meta_flags(la_inner)
1668 .nullability()
1669 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1670 let contains_flags = self
1671 .get_flags_contains(la_inner)
1672 .or(self.get_flags_contains(la_tail));
1673
1674 let meta_id = self.mb.get_meta_id(Metadata {
1675 flags: MetaFlags::with_nullability(
1676 null,
1677 contains_flags.or(MetaFlags::CONTAINS_LOOKAHEAD),
1678 ),
1679 nulls,
1680 });
1681 self.init_metadata(node_id, meta_id);
1682 }
1683 Kind::Counted => {
1684 let best = inst.extra >> 16;
1685 let (null, nulls) = if best > 0 {
1686 let mut ns = BTreeSet::new();
1687 ns.insert(NullState::new(Nullability::CENTER, best));
1688 (Nullability::CENTER, self.mb.nb.get_id(ns))
1689 } else {
1690 (Nullability::NEVER, NullsId::EMPTY)
1691 };
1692 let meta_id = self.mb.get_meta_id(Metadata {
1693 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1694 nulls,
1695 });
1696 self.init_metadata(node_id, meta_id);
1697 }
1698 }
1699
1700 self.array.push(inst);
1701
1702 if let Some(rw) = self.post_init_simplify(node_id) {
1703 self.override_as(node_id, rw)
1704 } else {
1705 node_id
1706 }
1707 }
1708
1709 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1710 match self.get_kind(node_id) {
1711 Kind::Concat => {
1712 let lhs = node_id.left(self);
1713 let rhs = node_id.right(self);
1714 if lhs.is_pred_star(self).is_some() {
1715 if let Some(opttail) = rhs.is_opt_v(self) {
1716 if let Some(true) = self.subsumes(lhs, opttail) {
1717 return Some(lhs);
1718 }
1719 }
1720 }
1721 }
1722 Kind::Union => {
1723 let lhs = node_id.left(self);
1724 let rhs = node_id.right(self);
1725 let mut subsumed = false;
1726 rhs.iter_union_while(self, &mut |b, branch| {
1727 if b.nullable_subsumes(branch, lhs) {
1728 subsumed = true;
1729 }
1730 !subsumed
1731 });
1732 if subsumed {
1733 return Some(rhs);
1734 }
1735 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1736 return Some(rhs);
1737 }
1738 }
1739 _ => {}
1740 }
1741
1742 None
1743 }
1744
1745 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1747 if self.get_kind(lhs) != Kind::Union {
1748 return false; }
1750 let mut rhs_branches = Vec::new();
1751 let mut curr = rhs;
1752 while self.get_kind(curr) == Kind::Union {
1753 rhs_branches.push(self.get_left(curr));
1754 curr = self.get_right(curr);
1755 }
1756 rhs_branches.push(curr);
1757
1758 curr = lhs;
1759 while self.get_kind(curr) == Kind::Union {
1760 if !rhs_branches.contains(&self.get_left(curr)) {
1761 return false;
1762 }
1763 curr = self.get_right(curr);
1764 }
1765 rhs_branches.contains(&curr)
1766 }
1767
1768 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1770 if node == target {
1771 return true;
1772 }
1773 match self.get_kind(node) {
1774 Kind::Union => {
1775 self.nullable_subsumes(self.get_left(node), target)
1776 || self.nullable_subsumes(self.get_right(node), target)
1777 }
1778 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1779 self.nullable_subsumes(self.get_right(node), target)
1780 }
1781 _ => false,
1782 }
1783 }
1784
1785 pub fn num_nodes(&self) -> u32 {
1786 self.num_created
1787 }
1788
1789 pub fn tree_size(&self, root: NodeId, limit: usize) -> usize {
1790 let mut seen: FxHashMap<NodeId, ()> = FxHashMap::default();
1791 let mut stack: Vec<NodeId> = vec![root];
1792 while let Some(n) = stack.pop() {
1793 if n == NodeId::MISSING
1794 || n == NodeId::BOT
1795 || n == NodeId::EPS
1796 || n == NodeId::TS
1797 || n == NodeId::BEGIN
1798 || n == NodeId::END
1799 {
1800 continue;
1801 }
1802 if seen.insert(n, ()).is_some() {
1803 continue;
1804 }
1805 if seen.len() >= limit {
1806 return limit;
1807 }
1808 stack.push(self.get_left(n));
1809 stack.push(self.get_right(n));
1810 }
1811 seen.len()
1812 }
1813 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1814 match self.index.get(&inst) {
1815 Some(&id) => id,
1816 None => self.init(inst),
1817 }
1818 }
1819 #[inline]
1820 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1821 self.index.get(inst)
1822 }
1823
1824 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1825 self.index.insert(key, subsumed);
1826 subsumed
1827 }
1828
1829 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1830 let key = &self.array[key.0 as usize];
1831 let entry = self.index.get_mut(key).unwrap();
1832 *entry = subsumed;
1833 subsumed
1834 }
1835
1836 #[inline]
1837 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1838 self.array[node_id.0 as usize].left
1839 }
1840
1841 #[inline]
1842 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1843 self.array[node_id.0 as usize].right
1844 }
1845
1846 #[inline]
1847 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1848 self.array[node_id.0 as usize].extra
1849 }
1850
1851 #[inline]
1852 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1853 debug_assert!(node_id.is_concat(self));
1854 let mut curr = node_id;
1855 while curr.is_concat(self) {
1856 curr = curr.right(self);
1857 }
1858 curr
1859 }
1860
1861 fn has_trailing_la(&self, node: NodeId) -> bool {
1862 let end = match self.get_kind(node) {
1863 Kind::Concat => self.get_concat_end(node),
1864 Kind::Lookahead => node,
1865 _ => return false,
1866 };
1867 end.is_lookahead(self) && end.right(self).is_missing()
1868 }
1869
1870 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1871 if node.is_lookahead(self) {
1872 return (NodeId::EPS, node);
1873 }
1874 debug_assert!(node.is_concat(self));
1875 let right = node.right(self);
1876 if !right.is_concat(self) {
1877 return (node.left(self), right);
1878 }
1879 let (stripped, la) = self.strip_trailing_la(right);
1880 (self.mk_concat(node.left(self), stripped), la)
1881 }
1882 #[inline]
1883 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1884 debug_assert!(matches!(
1885 self.get_kind(lookahead_node_id),
1886 Kind::Lookahead | Kind::Counted
1887 ));
1888 lookahead_node_id.left(self)
1889 }
1890 #[inline]
1891 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1892 debug_assert!(lookahead_node_id.is_lookahead(self));
1893 lookahead_node_id.right(self)
1894 }
1895 #[inline]
1896 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1897 debug_assert!(
1898 matches!(
1899 self.get_kind(lookahead_node_id),
1900 Kind::Lookahead | Kind::Counted
1901 ),
1902 "not lookahead/counted: {:?}",
1903 self.pp(lookahead_node_id)
1904 );
1905 self.get_extra(lookahead_node_id)
1906 }
1907 #[inline]
1908 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1909 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1910 lookbehind_node_id.left(self)
1911 }
1912 #[inline]
1913 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1914 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1915 lookbehind_node_id.right(self)
1916 }
1917
1918 #[inline]
1919 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1920 debug_assert!(
1921 self.array.len() > node_id.0 as usize,
1922 "array len: {:?}",
1923 node_id
1924 );
1925 self.array[node_id.0 as usize].kind
1926 }
1927
1928 #[inline]
1929 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1930 &self.array[node_id.0 as usize]
1931 }
1932
1933 #[inline]
1934 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1935 self.metadata[node_id.0 as usize]
1936 }
1937
1938 #[inline]
1939 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1940 debug_assert!(node_id.0 != u32::MAX);
1941 let meta_id = self.get_node_meta_id(node_id);
1942 debug_assert!(meta_id != MetadataId::MISSING);
1943 &self.mb.array[meta_id.0 as usize]
1944 }
1945
1946 #[inline]
1947 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1948 if node_id == NodeId::MISSING {
1949 NullsId::EMPTY
1950 } else {
1951 self.get_meta(node_id).nulls
1952 }
1953 }
1954
1955 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1956 self.mb
1957 .nb
1958 .array
1959 .iter()
1960 .map(|set| set.iter().cloned().collect())
1961 .collect()
1962 }
1963
1964 pub fn nulls_count(&self) -> usize {
1965 self.mb.nb.array.len()
1966 }
1967
1968 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1969 self.mb.nb.array[id as usize].iter().cloned().collect()
1970 }
1971
1972 pub fn center_nulls_id(&mut self, nid: NullsId) -> NullsId {
1973 self.mb.nb.and_mask(nid, Nullability::CENTER)
1974 }
1975
1976 #[inline]
1977 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1978 if node_id == NodeId::MISSING {
1979 NullsId::EMPTY
1980 } else {
1981 let nulls = self.get_meta(node_id).nulls;
1982 self.mb.nb.and_mask(nulls, mask)
1983 }
1984 }
1985
1986 #[inline]
1987 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1988 let meta_id = self.get_node_meta_id(node_id);
1989 let meta = &self.mb.array[meta_id.0 as usize];
1990 meta.flags
1991 }
1992
1993 #[inline]
1994 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1995 self.get_meta(node_id).flags.nullability()
1996 }
1997
1998 #[inline]
1999 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
2000 let meta_id = self.get_node_meta_id(node_id);
2001 let meta = &self.mb.array[meta_id.0 as usize];
2002 meta.flags.all_contains_flags()
2003 }
2004
2005 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2006 if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2007 return self.strip_lb(node_id.right(self));
2008 }
2009 let result = self.strip_lb_inner(true, node_id)?;
2010 if self.contains_lookbehind(result) {
2011 return Err(ResharpError::UnsupportedPattern);
2012 }
2013 Ok(result)
2014 }
2015
2016 fn strip_lb_inner(&mut self, allowed: bool, node_id: NodeId) -> Result<NodeId, ResharpError> {
2017 if !self.contains_lookbehind(node_id) {
2018 return Ok(node_id);
2019 }
2020 if !allowed {
2021 return Result::Err(ResharpError::UnsupportedPattern);
2022 }
2023 if node_id.is_inter(self) {
2024 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2025 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2026 return Ok(self.mk_inter(left, right));
2027 }
2028 if node_id.is_union(self) {
2029 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2030 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2031 return Ok(self.mk_union(left, right));
2032 }
2033 match self.get_kind(node_id) {
2034 Kind::Compl => Result::Err(ResharpError::UnsupportedPattern),
2035 Kind::Concat => {
2036 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2037 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2038 Ok(self.mk_concat(left, right))
2039 }
2040 Kind::Lookbehind => {
2041 let prev = self.get_lookbehind_prev(node_id);
2042 if prev != NodeId::MISSING {
2043 self.strip_lb_inner(allowed, prev)
2044 } else {
2045 Ok(NodeId::EPS)
2046 }
2047 }
2048 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => Ok(node_id),
2049 _ => Ok(node_id),
2050 }
2051 }
2052
2053 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
2055 if !self.contains_anchors(node_id) {
2056 return node_id;
2057 }
2058 match self.get_kind(node_id) {
2059 Kind::Begin => NodeId::BOT,
2060 Kind::Concat => {
2061 let left = self.nonbegins(node_id.left(self));
2062 if left == NodeId::BOT {
2063 return NodeId::BOT;
2064 }
2065 self.mk_concat(left, node_id.right(self))
2066 }
2067 Kind::Union => {
2068 let left = self.nonbegins(node_id.left(self));
2069 let right = self.nonbegins(node_id.right(self));
2070 self.mk_union(left, right)
2071 }
2072 _ => node_id,
2073 }
2074 }
2075
2076 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
2077 match self.get_kind(node_id) {
2078 Kind::Concat => {
2079 let head = node_id.left(self);
2080 match self.get_kind(head) {
2081 _ if self.any_nonbegin_nullable(head) => {
2082 self.strip_prefix_safe(node_id.right(self))
2083 }
2084 _ => node_id,
2085 }
2086 }
2087 _ => node_id,
2088 }
2089 }
2090 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
2091 match self.get_kind(node_id) {
2092 Kind::Begin => NodeId::BOT,
2093 Kind::Concat => {
2094 let head = self.prune_begin(node_id.left(self));
2095 let tail = self.prune_begin(node_id.right(self));
2096 self.mk_concat(head, tail)
2097 }
2098 Kind::Lookbehind => {
2099 if !node_id.right(self).is_missing() {
2100 return node_id;
2101 }
2102 let head = self.prune_begin(node_id.left(self));
2103 head
2104 }
2105 Kind::Union => {
2106 let left = self.prune_begin(node_id.left(self));
2107 let right = self.prune_begin(node_id.right(self));
2108 self.mk_union(left, right)
2109 }
2110 _ => node_id,
2111 }
2112 }
2113 pub fn prune_begin_eps(&mut self, node_id: NodeId) -> NodeId {
2114 match self.get_kind(node_id) {
2115 Kind::Begin => NodeId::EPS,
2116 Kind::Concat => {
2117 let head = self.prune_begin_eps(node_id.left(self));
2118 let tail = self.prune_begin_eps(node_id.right(self));
2119 self.mk_concat(head, tail)
2120 }
2121 Kind::Lookbehind => {
2122 if !node_id.right(self).is_missing() {
2123 return node_id;
2124 }
2125 let head = self.prune_begin_eps(node_id.left(self));
2126 head
2127 }
2128 Kind::Union => {
2129 let left = self.prune_begin_eps(node_id.left(self));
2130 let right = self.prune_begin_eps(node_id.right(self));
2131 self.mk_union(left, right)
2132 }
2133 _ => node_id,
2134 }
2135 }
2136
2137 pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2138 if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2139 return Ok(node_id);
2140 }
2141 let result = match self.get_kind(node_id) {
2142 Kind::Concat => {
2143 let left = self.normalize_rev(node_id.left(self))?;
2144 let right = self.normalize_rev(node_id.right(self))?;
2145 self.mk_concat(left, right)
2146 }
2147 Kind::Inter => {
2148 let left = self.normalize_rev(node_id.left(self))?;
2149 let right = self.normalize_rev(node_id.right(self))?;
2150 self.mk_inter(left, right)
2151 }
2152 Kind::Union => {
2153 let left = self.normalize_rev(node_id.left(self))?;
2154 let right = self.normalize_rev(node_id.right(self))?;
2155 self.mk_union(left, right)
2156 }
2157 Kind::Lookbehind => {
2158 let left = self.normalize_rev(node_id.left(self))?;
2159 let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
2160 let lbody_ts = self.mk_concat(NodeId::TS, left);
2161 let ltail_ts = self.mk_concat(NodeId::TS, right);
2162 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2163 as_inter
2164 }
2165 Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2166 return Err(ResharpError::UnsupportedPattern)
2167 }
2168 _ => node_id,
2169 };
2170 Ok(result)
2171 }
2172
2173 pub fn collect_der_targets(
2174 &mut self,
2175 der: TRegexId,
2176 path_set: TSetId,
2177 out: &mut Vec<(NodeId, TSetId)>,
2178 ) {
2179 match *self.get_tregex(der) {
2180 TRegex::Leaf(target) => {
2181 if let Some(entry) = out.iter_mut().find(|(t, _)| *t == target) {
2182 entry.1 = self.solver().or_id(entry.1, path_set);
2183 } else {
2184 out.push((target, path_set));
2185 }
2186 }
2187 TRegex::ITE(cond, then_b, else_b) => {
2188 let then_path = self.solver().and_id(path_set, cond);
2189 self.collect_der_targets(then_b, then_path, out);
2190 let not_cond = self.solver().not_id(cond);
2191 let else_path = self.solver().and_id(path_set, not_cond);
2192 self.collect_der_targets(else_b, else_path, out);
2193 }
2194 }
2195 }
2196
2197 pub fn prefix_ts(&mut self, node_id: NodeId) -> NodeId {
2198 let with_ts = if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2199 node_id
2200 } else {
2201 self.mk_concat(NodeId::TS, node_id)
2202 };
2203 with_ts
2204 }
2205
2206 pub fn ts_rev_start(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2207 let rev = self.reverse(node_id)?;
2208 let rev = self.normalize_rev(rev)?;
2209 let with_ts = self.prefix_ts(rev);
2210 Ok(self.simplify_rev_initial(with_ts))
2211 }
2212
2213 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2214 debug_assert!(node_id != NodeId::MISSING);
2215 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
2216 if *rev != NodeId::MISSING {
2217 return Ok(*rev);
2218 }
2219 }
2220 let rw = match self.get_kind(node_id) {
2221 Kind::End => NodeId::BEGIN,
2222 Kind::Begin => NodeId::END,
2223 Kind::Pred => node_id,
2224 Kind::Concat => {
2225 let left = self.reverse(node_id.left(self))?;
2226 let right = self.reverse(node_id.right(self))?;
2227 self.mk_concat(right, left)
2228 }
2229 Kind::Union => {
2230 let left = self.reverse(node_id.left(self))?;
2231 let right = self.reverse(node_id.right(self))?;
2232 self.mk_union(left, right)
2233 }
2234 Kind::Inter => {
2235 let left = self.reverse(node_id.left(self))?;
2236 let right = self.reverse(node_id.right(self))?;
2237 self.mk_inter(left, right)
2238 }
2239 Kind::Star => {
2240 let body = self.reverse(node_id.left(self))?;
2241 self.mk_star(body)
2242 }
2243 Kind::Compl => {
2244 if self.contains_look(node_id.left(self)) {
2245 return Err(ResharpError::UnsupportedPattern);
2246 }
2247 let body = self.reverse(node_id.left(self))?;
2248 self.mk_compl(body)
2249 }
2250 Kind::Lookbehind => {
2251 let prev = self.get_lookbehind_prev(node_id);
2252 let inner_id = self.get_lookbehind_inner(node_id);
2253 let rev_inner = self.reverse(inner_id)?;
2254 let rev_prev = if prev != NodeId::MISSING {
2255 self.reverse(prev)?
2256 } else {
2257 NodeId::MISSING
2258 };
2259 self.mk_lookahead(rev_inner, rev_prev, 0)
2260 }
2261 Kind::Lookahead => {
2262 let rel = self.get_lookahead_rel(node_id);
2263 if rel == u32::MAX {
2265 let lbody = self.get_lookahead_inner(node_id);
2266 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2267 let rbody = self.reverse(lbody);
2268 let rtail = self.reverse(ltail);
2269 let rev = self.mk_lookbehind(rbody?, rtail?);
2270 return Ok(self.cache_reversed(node_id, rev));
2271 }
2272 if rel != 0 {
2273 return Err(ResharpError::UnsupportedPattern);
2274 }
2275 let tail_node = self.get_lookahead_tail(node_id);
2276 let rev_tail = if tail_node != NodeId::MISSING {
2277 self.reverse(tail_node)?
2278 } else {
2279 NodeId::MISSING
2280 };
2281 let inner_id = self.get_lookahead_inner(node_id);
2282 let rev_inner = self.reverse(inner_id)?;
2283 self.mk_lookbehind(rev_inner, rev_tail)
2284 }
2285 Kind::Counted => {
2286 return Err(ResharpError::UnsupportedPattern);
2287 }
2288 };
2289 self.cache_reversed(node_id, rw);
2290 Ok(rw)
2291 }
2292
2293 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2294 let node = NodeKey {
2295 kind: Kind::Pred,
2296 left: NodeId::MISSING,
2297 right: NodeId::MISSING,
2298 extra: pred.0,
2299 };
2300 self.get_node_id(node)
2301 }
2302
2303 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2304 let key = NodeKey {
2305 kind: Kind::Compl,
2306 left: body,
2307 right: NodeId::MISSING,
2308 extra: u32::MAX,
2309 };
2310 if let Some(id) = self.key_is_created(&key) {
2311 return *id;
2312 }
2313 if body == NodeId::BOT {
2314 return NodeId::TS;
2315 }
2316 if body == NodeId::TS {
2317 return NodeId::BOT;
2318 }
2319
2320 if let Some(contains_body) = body.is_contains(self) {
2321 if contains_body.is_pred(self) {
2322 let pred = contains_body.pred_tset(self);
2323 let notpred = self.mk_pred_not(pred);
2324 let node = self.mk_star(notpred);
2325 return self.init_as(key, node);
2326 } else if contains_body == NodeId::END {
2327 return self.init_as(key, NodeId::BOT);
2328 }
2329 };
2330
2331 if self.get_kind(body) == Kind::Compl {
2332 return self.get_node(body).left;
2333 }
2334
2335 self.get_node_id(key)
2336 }
2337
2338 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2339 let nid = self.get_nulls_id(body);
2340 let nref = self.mb.nb.get_set_ref(nid).clone();
2341 let mut futures = NodeId::BOT;
2342 for n in nref.iter() {
2343 if !n.is_mask_nullable(mask) {
2344 continue;
2345 }
2346
2347 let eff = if n.rel == 0 {
2348 NodeId::EPS
2349 } else {
2350 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2351 };
2352 futures = self.mk_union(futures, eff)
2353 }
2354 futures
2355 }
2356
2357 fn strip_ts_max_len(&self, node: NodeId) -> Option<u32> {
2359 let mut cur = node;
2360 let mut total: u32 = 0;
2361 loop {
2362 if !cur.is_concat(self) {
2363 return None;
2364 }
2365 let r = cur.right(self);
2366 let (_, lmax) = self.get_min_max_length(cur.left(self));
2367 if lmax == u32::MAX {
2368 return None;
2369 }
2370 total = total.saturating_add(lmax);
2371 if r == NodeId::TS {
2372 return Some(total);
2373 }
2374 cur = r;
2375 }
2376 }
2377
2378 fn peel_head_pred(&self, node: NodeId) -> Option<(TSetId, NodeId)> {
2379 if node.is_pred(self) {
2380 Some((node.pred_tset(self), NodeId::EPS))
2381 } else if node.is_concat(self) && node.left(self).is_pred(self) {
2382 Some((node.left(self).pred_tset(self), node.right(self)))
2383 } else {
2384 None
2385 }
2386 }
2387
2388 fn strip_end_from_la_head(&mut self, node: NodeId) -> NodeId {
2390 let (head, rest) = if node.is_concat(self) {
2391 (node.left(self), node.right(self))
2392 } else {
2393 (node, NodeId::EPS)
2394 };
2395 if !head.is_kind(self, Kind::Union) {
2396 return node;
2397 }
2398 let l = head.left(self);
2399 if !l.is_end() {
2400 return node;
2401 }
2402 let r = head.right(self);
2403 self.mk_concat(r, rest)
2404 }
2405
2406 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2407 if cfg!(feature = "norewrite") {
2408 return None;
2409 }
2410
2411 if tail.is_lookbehind(self) {
2424 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2425 return self
2426 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2427 .ok();
2428 }
2429 if head.is_lookahead(self) {
2430 let la_tail = self.get_lookahead_tail(head);
2431 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2432 let la_body = self.get_lookahead_inner(head);
2433 let la_body = if new_la_tail.is_never_nullable(self) {
2435 self.strip_end_from_la_head(la_body)
2436 } else {
2437 la_body
2438 };
2439 if la_body == NodeId::BOT {
2440 return Some(NodeId::BOT);
2441 }
2442 if let (Some((p_l, body_rest)), Some((p_b, tail_rest))) = (
2443 self.peel_head_pred(la_body),
2444 self.peel_head_pred(new_la_tail),
2445 ) {
2446 let p = self.solver().and_id(p_l, p_b);
2447 let merged = self.mk_pred(p);
2448 if merged == NodeId::BOT {
2449 return Some(NodeId::BOT);
2450 }
2451 let new_la = if body_rest == NodeId::EPS {
2452 NodeId::EPS
2453 } else {
2454 self.mk_lookahead(body_rest, NodeId::MISSING, 0)
2455 };
2456 let after = self.mk_concat(new_la, tail_rest);
2457 return Some(self.mk_concat(merged, after));
2458 }
2459
2460 if la_body.is_concat(self)
2461 && la_body.right(self).is_end()
2462 && la_body.left(self).is_compl(self)
2463 {
2464 let not_p_ts = la_body.left(self); if let Some(p_max) = self.strip_ts_max_len(not_p_ts.left(self)) {
2466 let (tail_min, _) = self.get_min_max_length(new_la_tail);
2467 if p_max <= tail_min {
2468 return Some(self.mk_inter(not_p_ts, new_la_tail));
2469 }
2470 }
2471 }
2472
2473 if new_la_tail.is_center_nullable(self) {
2474 let la_new = self.mk_lookahead_internal(la_body, new_la_tail, u32::MAX);
2475 return Some(la_new);
2476 }
2477 let la_rel = self.get_lookahead_rel(head);
2478 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2479 let tail_rel = self.get_lookahead_rel(new_la_tail);
2480 tail_rel.saturating_add(la_rel)
2481 } else {
2482 u32::MAX
2483 };
2484
2485 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2486 }
2487
2488 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2489 return Some(head);
2490 }
2491
2492 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2493 return Some(NodeId::TS);
2494 }
2495
2496 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2497 return Some(NodeId::TS);
2498 }
2499
2500 if head.is_inter(self) && tail == NodeId::TS {
2501 let mut alltopstar = true;
2502 head.iter_inter(
2503 self,
2504 &mut (|b, v| {
2505 alltopstar = b.ends_with_ts(v);
2506 }),
2507 );
2508
2509 if alltopstar {
2510 return Some(head);
2511 }
2512 }
2513
2514 if head.is_star(self) && head == tail {
2515 return Some(head);
2516 }
2517
2518 None
2519 }
2520
2521 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2522 use Kind::*;
2523
2524 if cfg!(feature = "norewrite") {
2525 return None;
2526 }
2527 if left == right {
2528 return Some(left);
2529 }
2530
2531 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2532 return Some(left);
2533 }
2534 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2535 return Some(right);
2536 }
2537
2538 if left.is_inter(self) && right.is_inter(self) {
2539 let mut lconj: Vec<NodeId> = Vec::new();
2540 left.any_inter_component(self, |v| {
2541 lconj.push(v);
2542 false
2543 });
2544 let lconj_initial = lconj.len();
2545 let mut common = NodeId::TS;
2546 let mut r_rest = NodeId::TS;
2547 let mut cur = right;
2548 loop {
2549 let (v, next) = if cur.kind(self) == Inter {
2550 (cur.left(self), Some(cur.right(self)))
2551 } else {
2552 (cur, None)
2553 };
2554 if let Some(pos) = lconj.iter().position(|&x| x == v) {
2555 lconj.swap_remove(pos);
2556 common = self.mk_inter(v, common);
2557 } else {
2558 r_rest = self.mk_inter(v, r_rest);
2559 }
2560 match next {
2561 Some(n) => cur = n,
2562 None => break,
2563 }
2564 }
2565 if lconj.len() < lconj_initial {
2566 let l_rest = lconj
2567 .iter()
2568 .fold(NodeId::TS, |acc, &v| self.mk_inter(v, acc));
2569 let inner_union = self.mk_union(l_rest, r_rest);
2570 return Some(self.mk_inter(common, inner_union));
2571 }
2572 }
2573
2574 if left.is_pred(self) && right.is_pred(self) {
2575 let l = left.pred_tset(self);
2576 let r = right.pred_tset(self);
2577 let solver = self.solver();
2578 let psi = solver.or_id(l, r);
2579 let rewrite = self.mk_pred(psi);
2580 return Some(rewrite);
2581 }
2582
2583 if left == NodeId::EPS && self.nullability(right) == Nullability::ALWAYS {
2584 let right_nulls_id = self.get_nulls_id(right);
2585 if self.mb.nb.array[right_nulls_id.0 as usize]
2586 .iter()
2587 .any(|n| n.rel == 0 && n.mask.has(Nullability::ALWAYS))
2588 {
2589 return Some(right);
2590 }
2591 }
2592
2593 if left.is_lookahead(self) && right.is_lookahead(self) {
2594 let lb = left.left(self);
2595 let lt = left.right(self);
2596 let lrel = left.extra(self);
2597
2598 let rb = right.left(self);
2599 let rt = right.right(self);
2600 let rrel = right.extra(self);
2601
2602 if lrel == rrel && lt.is_missing() && rt.is_missing() {
2603 let unioned = self.mk_union(lb, rb);
2604 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2605 return Some(node);
2606 }
2607 }
2608
2609 if right.is_kind(self, Concat) {
2610 if left == NodeId::END
2611 && right.left(self) == NodeId::END
2612 && self.nullability(right).has(Nullability::END)
2613 {
2614 return Some(right);
2615 }
2616 if left == right.left(self) {
2618 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2619 let rw = self.mk_concat(left, rhs);
2620 return Some(rw);
2621 }
2622 if left.is_kind(self, Concat) {
2623 let head1 = left.left(self);
2624 let head2 = right.left(self);
2625
2626 if head1 == head2 {
2627 let t1 = left.right(self);
2628 let t2 = right.right(self);
2629 if head1 == NodeId::TS {
2631 if t2.has_concat_tail(self, t1) {
2632 return Some(left);
2633 }
2634 if t1.has_concat_tail(self, t2) {
2635 return Some(right);
2636 }
2637 }
2638 let un = self.mk_union(t1, t2);
2639 return Some(self.mk_concat(left.left(self), un));
2640 }
2641
2642 if false {
2646 let end1 = self.get_concat_end(left);
2647 let end2 = self.get_concat_end(right);
2648 if end1 == end2 {
2649 let contains_lookarounds =
2650 left.contains_lookaround(self) || right.contains_lookaround(self);
2651 let flags = left.flags_contains(self).or(right.flags_contains(self));
2652 if !contains_lookarounds && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2653 let rev1 = self.reverse(left).unwrap();
2654 let rev2 = self.reverse(right).unwrap();
2655
2656 let union_rev = self.mk_union(rev1, rev2);
2657 return Some(self.reverse(union_rev).unwrap());
2658 }
2659 }
2660 }
2661 }
2662 if left.is_pred(self) && left == right.left(self) {
2663 let un = self.mk_opt(right.right(self));
2664 let conc = self.mk_concat(left, un);
2665 return Some(conc);
2666 }
2667 }
2668
2669 if left == NodeId::EPS && right.is_plus(self) {
2670 let result = self.mk_star(right.left(self));
2671 return Some(result);
2672 }
2673
2674 if left.is_inter(self) && right.is_inter(self) {
2676 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2677 return Some(rw);
2678 }
2679 }
2680
2681 None
2682 }
2683
2684 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2685 let mut curr = node;
2686 while curr.is_inter(self) {
2687 out.push(self.get_left(curr));
2688 curr = self.get_right(curr);
2689 }
2690 out.push(curr);
2691 }
2692
2693 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2694 let mut curr = node;
2695 let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2696 if has_prefix {
2697 curr = self.get_right(curr);
2698 }
2699 let mut count = 0u32;
2700 let mut pred_set = None;
2701 while curr.is_concat(self) {
2702 let head = self.get_left(curr);
2703 if !head.is_pred(self) {
2704 return None;
2705 }
2706 let ps = head.pred_tset(self);
2707 match pred_set {
2708 None => pred_set = Some(ps),
2709 Some(existing) => {
2710 if existing != ps {
2711 return None;
2712 }
2713 }
2714 }
2715 curr = self.get_right(curr);
2716 count += 1;
2717 }
2718 if count == 0 || !curr.is_star(self) {
2719 return None;
2720 }
2721 Some((has_prefix, pred_set.unwrap(), curr, count))
2722 }
2723
2724 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2725 let mut j = 0;
2726 for &s in sub {
2727 while j < sup.len() && sup[j] < s {
2728 j += 1;
2729 }
2730 if j >= sup.len() || sup[j] != s {
2731 return false;
2732 }
2733 j += 1;
2734 }
2735 true
2736 }
2737
2738 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2739 if !left.is_inter(self) || !right.is_inter(self) {
2740 return None;
2741 }
2742
2743 let mut lc: Vec<NodeId> = Vec::new();
2744 let mut rc: Vec<NodeId> = Vec::new();
2745 self.collect_inter_components(left, &mut lc);
2746 self.collect_inter_components(right, &mut rc);
2747
2748 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2750 return Some(left);
2751 }
2752 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2754 return Some(right);
2755 }
2756
2757 if lc.len() == rc.len() {
2758 let mut diff_idx = None;
2759 for i in 0..lc.len() {
2760 if lc[i] != rc[i] {
2761 if diff_idx.is_some() {
2762 return None;
2763 }
2764 diff_idx = Some(i);
2765 }
2766 }
2767 if let Some(idx) = diff_idx {
2768 let a = lc[idx];
2769 let b = rc[idx];
2770 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2771 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2772 {
2773 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2774 return if ca < cb { Some(left) } else { Some(right) };
2775 }
2776 }
2777 }
2778 }
2779
2780 None
2781 }
2782
2783 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2784 if cfg!(feature = "norewrite") {
2785 return None;
2786 }
2787 if left == right {
2788 return Some(left);
2789 }
2790 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2791 return Some(right);
2792 }
2793 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2794 return Some(left);
2795 }
2796
2797 if left.is_pred(self) && right.is_pred(self) {
2798 let l = left.pred_tset(self);
2799 let r = right.pred_tset(self);
2800 let solver = self.solver();
2801 let psi = solver.and_id(l, r);
2802 let rewrite = self.mk_pred(psi);
2803 return Some(rewrite);
2804 }
2805
2806 for (a, b) in [(left, right), (right, left)] {
2807 if a.is_pred(self) && b.is_compl(self) {
2808 let cbody = b.left(self);
2809 if cbody.is_concat(self)
2810 && cbody.right(self) == NodeId::TS
2811 && cbody.left(self).is_pred(self)
2812 {
2813 let q = a.pred_tset(self);
2814 let p = cbody.left(self).pred_tset(self);
2815 let solver = self.solver();
2816 let notp = solver.not_id(p);
2817 let anded = solver.and_id(q, notp);
2818 return Some(self.mk_pred(anded));
2819 }
2820 }
2821 }
2822
2823 if left.is_concat(self) && right.is_concat(self) {
2824 if left.left(self) == right.left(self) {
2825 if left.left(self).is_pred(self) {
2826 let new_right = self.mk_inter(left.right(self), right.right(self));
2827 return Some(self.mk_concat(left.left(self), new_right));
2828 }
2829 }
2830 }
2831
2832 if right.is_compl(self) && right.left(self) == left {
2833 return Some(NodeId::BOT);
2834 }
2835
2836 if left.is_compl(self) && right.is_compl(self) {
2837 let bodies = self.mk_union(left.left(self), right.left(self));
2838 return Some(self.mk_compl(bodies));
2839 }
2840
2841 if left == NodeId::TOPPLUS {
2842 if right.is_pred_star(self).is_some() {
2843 let newloop = self.mk_plus(right.left(self));
2844 return Some(newloop);
2845 }
2846 if right.is_never_nullable(self) {
2847 return Some(right);
2848 }
2849 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2850 return Some(NodeId::BOT);
2851 }
2852 if right.is_kind(self, Kind::Concat) {}
2853 }
2854
2855 {
2856 let l_is_la = left.is_lookahead(self);
2857 let r_is_la = right.is_lookahead(self);
2858 let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
2859 let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
2860 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2861 let (la_node, other, concat_body) = if r_is_la {
2862 (right, left, NodeId::MISSING)
2863 } else if l_is_la {
2864 (left, right, NodeId::MISSING)
2865 } else if r_is_cla {
2866 (right.left(self), left, right.right(self))
2867 } else {
2868 (left.left(self), right, left.right(self))
2869 };
2870 let la_body = la_node.left(self);
2871 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2872 let inter_right = if concat_body.is_missing() {
2873 la_tail
2874 } else {
2875 self.mk_concat(la_tail, concat_body)
2876 };
2877 let new_body = self.mk_inter(other, inter_right);
2878 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2879 return Some(self.mk_concat(la, new_body));
2880 }
2881 }
2882
2883 if self.get_kind(right) == Kind::Compl {
2884 let compl_body = right.left(self);
2885 if left == compl_body {
2886 return Some(NodeId::BOT);
2887 }
2888 if compl_body.is_concat(self) {
2889 let compl_head = compl_body.left(self);
2890 if compl_body.right(self) == NodeId::TS && compl_head == left {
2891 return Some(NodeId::BOT);
2892 }
2893 }
2894 }
2895
2896 if let Some(pleft) = left.is_pred_star(self) {
2897 if let Some(pright) = right.is_pred_star(self) {
2898 let merged = self.mk_inter(pleft, pright);
2899 return Some(self.mk_star(merged));
2900 }
2901 }
2902
2903 {
2904 let l_is_clb = left.is_concat(self) && left.left(self).is_lookbehind(self);
2905 let r_is_clb = right.is_concat(self) && right.left(self).is_lookbehind(self);
2906 if l_is_clb || r_is_clb {
2907 let (lb, body) = if l_is_clb && r_is_clb {
2908 let lb1 = left.left(self);
2909 let lb2 = right.left(self);
2910 let inner = self.mk_inter(
2911 self.get_lookbehind_inner(lb1),
2912 self.get_lookbehind_inner(lb2),
2913 );
2914 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2915 let body = self.mk_inter(left.right(self), right.right(self));
2916 (lb, body)
2917 } else if l_is_clb {
2918 let lb = left.left(self);
2919 let body = self.mk_inter(left.right(self), right);
2920 (lb, body)
2921 } else {
2922 let lb = right.left(self);
2923 let body = self.mk_inter(left, right.right(self));
2924 (lb, body)
2925 };
2926 return Some(self.mk_concat(lb, body));
2927 }
2928 }
2929
2930 {
2931 let l_has_la = self.has_trailing_la(left);
2932 let r_has_la = self.has_trailing_la(right);
2933 if l_has_la || r_has_la {
2934 let (body, la) = if l_has_la && r_has_la {
2935 let (lbody, l_la) = self.strip_trailing_la(left);
2936 let (rbody, r_la) = self.strip_trailing_la(right);
2937 let inner = self.mk_inter(
2938 self.get_lookahead_inner(l_la),
2939 self.get_lookahead_inner(r_la),
2940 );
2941 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2942 let body = self.mk_inter(lbody, rbody);
2943 (body, la)
2944 } else if l_has_la {
2945 let (lbody, la) = self.strip_trailing_la(left);
2946 let body = self.mk_inter(lbody, right);
2947 (body, la)
2948 } else {
2949 let (rbody, la) = self.strip_trailing_la(right);
2950 let body = self.mk_inter(left, rbody);
2951 (body, la)
2952 };
2953 return Some(self.mk_concat(body, la));
2954 }
2955 }
2956
2957 None
2958 }
2959
2960 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2961 if cfg!(feature = "norewrite") {
2962 return None;
2963 }
2964 debug_assert!(self.get_kind(right_union) == Kind::Union);
2965
2966 let mut rewritten = None;
2967 right_union.iter_union_while(
2968 self,
2969 &mut (|b, v| {
2970 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2971 rewritten = Some((v, rw));
2972 false
2973 } else {
2974 true
2975 }
2976 }),
2977 );
2978
2979 if let Some(rw) = rewritten {
2980 let mut new_union = NodeId::BOT;
2981 right_union.iter_union(
2982 self,
2983 &mut (|b, v| {
2984 if v == rw.0 {
2985 new_union = b.mk_union(rw.1, new_union)
2986 } else {
2987 new_union = b.mk_union(v, new_union)
2988 }
2989 }),
2990 );
2991 return Some(new_union);
2992 };
2993
2994 None
2995 }
2996
2997 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2998 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2999 debug_assert!(tail != NodeId::MISSING);
3000 let key = NodeKey {
3001 kind: Kind::Concat,
3002 left: head,
3003 right: tail,
3004 extra: u32::MAX,
3005 };
3006 if let Some(id) = self.key_is_created(&key) {
3007 return *id;
3008 }
3009
3010 if head == NodeId::BOT || tail == NodeId::BOT {
3011 return NodeId::BOT;
3012 }
3013 if head == NodeId::EPS {
3014 return tail;
3015 }
3016 if tail == NodeId::EPS {
3017 return head;
3018 }
3019
3020 if head.is_kind(self, Kind::Concat) {
3022 let left = head.left(self);
3023 let newright = self.mk_concat(head.right(self), tail);
3024 let updated = self.mk_concat(left, newright);
3025 return self.init_as(key, updated);
3026 }
3027
3028 if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
3029 return NodeId::BOT;
3030 }
3031
3032 if tail.is_concat(self) {
3033 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
3034 let upd = self.mk_concat(rw, tail.right(self));
3035 return self.init_as(key, upd);
3036 }
3037 }
3038
3039 if let Some(new) = self.attempt_rw_concat_2(head, tail) {
3040 return self.init_as(key, new);
3041 }
3042
3043 match (self.get_kind(head), self.get_kind(tail)) {
3044 (Kind::Star, Kind::Concat) if head.is_star(self) => {
3046 let rl = tail.left(self);
3047 if head == rl {
3048 return self.init_as(key, tail);
3049 }
3050 }
3051 (_, Kind::Concat) => {
3053 let curr = head;
3054 let h2 = self.mk_concat(curr, tail.left(self));
3055 let tr = tail.right(self);
3056 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
3057 return self.init_as(key, new);
3058 }
3059 }
3060 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
3061 return self.init_as(key, tail);
3062 }
3063 _ => {}
3064 }
3065
3066 self.init(key)
3067 }
3068
3069 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
3070 let lb_body = {
3072 match self.starts_with_ts(lb_body) {
3073 true => lb_body,
3074 false => self.mk_concat(NodeId::TS, lb_body),
3075 }
3076 };
3077 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
3079 }
3080
3081 fn mk_lookbehind_internal(
3082 &mut self,
3083 lb_body: NodeId,
3084 lb_prev: NodeId,
3085 ) -> Result<NodeId, ResharpError> {
3086 debug_assert!(lb_body != NodeId::MISSING);
3087 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
3088 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
3089 return Ok(NodeId::BOT);
3090 }
3091 if lb_body == NodeId::TS {
3092 return Ok(if lb_prev == NodeId::MISSING {
3093 NodeId::EPS
3094 } else {
3095 lb_prev
3096 });
3097 }
3098 if lb_body == NodeId::EPS {
3099 match lb_prev {
3100 NodeId::MISSING => return Ok(NodeId::EPS),
3101 NodeId::EPS => return Ok(NodeId::EPS),
3102 _ => return Ok(lb_prev),
3103 }
3104 }
3105
3106 let key = NodeKey {
3107 kind: Kind::Lookbehind,
3108 left: lb_body,
3109 right: lb_prev,
3110 extra: u32::MAX,
3111 };
3112 match self.key_is_created(&key) {
3113 Some(id) => Ok(*id),
3114 None => {
3115 if lb_prev == NodeId::TS {
3116 return Ok(self.mk_concat(lb_prev, lb_body));
3117 }
3118
3119 Ok(self.init(key))
3120 }
3121 }
3122 }
3123
3124 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3125 let la_body = if la_tail.is_missing() && rel == 0 {
3126 self.flatten_la_body(la_body)
3127 } else {
3128 la_body
3129 };
3130 let la_body = {
3132 match self.ends_with_ts(la_body) {
3133 true => la_body,
3134 false => self.mk_concat(la_body, NodeId::TS),
3135 }
3136 };
3137 let rel = if NodeId::MISSING == la_tail {
3138 rel
3139 } else {
3140 match la_tail.is_center_nullable(self) {
3141 false => u32::MAX,
3142 true => rel,
3143 }
3144 };
3145
3146 self.mk_lookahead_internal(la_body, la_tail, rel)
3147 }
3148
3149 fn flatten_la_body(&mut self, node: NodeId) -> NodeId {
3150 if !self
3151 .get_meta_flags(node)
3152 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
3153 {
3154 return node;
3155 }
3156 match self.get_kind(node) {
3157 Kind::Lookahead
3158 if self.get_lookahead_tail(node).is_missing()
3159 && self.get_lookahead_rel(node) == 0 =>
3160 {
3161 let inner = self.get_lookahead_inner(node);
3162 let inner = self.strip_trailing_ts(inner);
3163 self.flatten_la_body(inner)
3164 }
3165 Kind::Union => {
3166 let l = self.flatten_la_body(node.left(self));
3167 let r = self.flatten_la_body(node.right(self));
3168 self.mk_union(l, r)
3169 }
3170 _ => node,
3171 }
3172 }
3173
3174 fn strip_trailing_ts(&self, node: NodeId) -> NodeId {
3175 if node.is_concat(self) && node.right(self) == NodeId::TS {
3176 node.left(self)
3177 } else {
3178 node
3179 }
3180 }
3181
3182 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3184 let key = NodeKey {
3185 kind: Kind::Lookahead,
3186 left: la_body,
3187 right: la_tail,
3188 extra: rel,
3189 };
3190 if let Some(id) = self.key_is_created(&key) {
3191 return *id;
3192 }
3193 if la_body == NodeId::TS {
3194 if rel == 0 {
3195 return la_tail.missing_to_eps();
3196 } else {
3197 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
3198 }
3199 }
3200 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
3201 return NodeId::BOT;
3202 }
3203 if la_tail.is_missing() && rel == u32::MAX {
3204 return NodeId::BOT;
3205 }
3206
3207 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
3208 return la_body;
3209 }
3210
3211 if la_tail == NodeId::TS {
3212 if rel == 0 || rel == u32::MAX {
3213 return self.mk_concat(la_body, NodeId::TS);
3214 } else if rel == u32::MAX {
3215 return self.mk_begins_with(la_body);
3216 }
3217 }
3218
3219 if rel == u32::MAX {
3220 if la_tail.is_missing() {
3221 return NodeId::BOT;
3222 }
3223
3224 if self.is_always_nullable(la_body) {
3225 return la_tail.missing_to_eps();
3226 }
3227
3228 if la_tail != NodeId::MISSING {
3229 match self.get_kind(la_tail) {
3230 _ => {
3231 if la_body.is_compl_plus_end(self) {
3232 let minlen = self.get_min_length_only(la_tail);
3233 if minlen >= 1 {
3234 return NodeId::BOT;
3235 }
3236 }
3237 }
3238 }
3239 }
3240 }
3241
3242 if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
3243 let la_body2 = self.get_lookahead_inner(la_tail);
3244 let body1_ts = self.mk_concat(la_body, NodeId::TS);
3245 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
3246 let new_la_body = self.mk_inter(body1_ts, body2_ts);
3247 let new_la_rel = self.get_lookahead_rel(la_tail);
3248 let new_la_tail = self.get_lookahead_tail(la_tail);
3249 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
3250 }
3251
3252 if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
3253 let la_body_right = la_body.right(self);
3254 if self.is_always_nullable(la_body_right) {
3255 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
3256 }
3257 let bodyright = la_body.right(self);
3258 if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
3259 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
3260 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
3261 }
3262 }
3263
3264 if la_tail != NodeId::MISSING {
3265 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
3266 let lpred = la_body.left(self);
3267 if lpred.is_pred(self) {
3268 let l = lpred.pred_tset(self);
3269 let r = la_tail.pred_tset(self);
3270 let psi_and = self.solver().and_id(l, r);
3271 let rewrite = self.mk_pred(psi_and);
3272 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
3273 let new_right =
3274 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
3275 return self.mk_concat(rewrite, new_right);
3276 }
3277 }
3278 }
3279
3280 self.get_node_id(key)
3281 }
3282
3283 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
3284 let has_match = (packed >> 16) > 0;
3285 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
3286 return NodeId::BOT;
3287 }
3288 debug_assert!(
3289 body == NodeId::BOT || !self.is_infinite(body),
3290 "Counted body must have finite max length"
3291 );
3292 let chain = self.prune_counted_chain(body, chain);
3293 let key = NodeKey {
3294 kind: Kind::Counted,
3295 left: body,
3296 right: chain,
3297 extra: packed,
3298 };
3299 if let Some(id) = self.key_is_created(&key) {
3300 return *id;
3301 }
3302 self.get_node_id(key)
3303 }
3304
3305 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
3306 if chain == NodeId::MISSING || body == NodeId::BOT {
3307 return chain;
3308 }
3309 if self.nullability(body) != Nullability::NEVER {
3310 return NodeId::MISSING;
3311 }
3312 let chain_body = chain.left(self);
3313 if chain_body == NodeId::BOT {
3314 return chain;
3315 }
3316 let not_begins = self.mk_not_begins_with(body);
3317 let inter = self.mk_inter(chain_body, not_begins);
3318 let is_empty = inter == NodeId::BOT;
3319 if is_empty {
3320 self.prune_counted_chain(body, chain.right(self))
3321 } else {
3322 chain
3323 }
3324 }
3325
3326 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
3327 let neg_inner = self.mk_concat(body, NodeId::TS);
3328 let neg_part = self.mk_compl(neg_inner);
3329 let conc = self.mk_concat(neg_part, NodeId::END);
3330 self.mk_lookahead(conc, NodeId::MISSING, rel)
3331 }
3332
3333 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
3334 match self.get_node(body).kind {
3335 Kind::Pred => {
3336 let psi = body.pred_tset(self);
3337 let negated = self.mk_pred_not(psi);
3338 let union = self.mk_union(NodeId::BEGIN, negated);
3339 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3340 }
3341 _ => {
3342 let uc = crate::unicode_classes::utf8_char(self);
3345 let neg = self.mk_compl(body);
3346 let negated = self.mk_inter(neg, uc);
3347 let union = self.mk_union(NodeId::BEGIN, negated);
3348 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3349 }
3350 }
3351 }
3352
3353 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
3354 debug_assert!(left != NodeId::MISSING);
3355 debug_assert!(right != NodeId::MISSING);
3356 if left > right {
3357 return self.mk_union(right, left);
3358 }
3359 let key = NodeKey {
3360 kind: Kind::Union,
3361 left,
3362 right,
3363 extra: u32::MAX,
3364 };
3365 if let Some(id) = self.key_is_created(&key) {
3366 return *id;
3367 }
3368 if left == right {
3369 return self.init_as(key, left);
3370 }
3371 if left == NodeId::BOT {
3372 return self.init_as(key, right);
3373 }
3374 if right == NodeId::BOT {
3375 return self.init_as(key, left);
3376 }
3377 if right == NodeId::TS {
3378 return self.init_as(key, right);
3379 }
3380 if left == NodeId::TS {
3381 return self.init_as(key, left);
3382 }
3383 match (self.get_kind(left), self.get_kind(right)) {
3384 (Kind::Union, _) => {
3385 self.iter_unions_b(left, &mut |b, v| {
3386 b.temp_vec.push(v);
3387 });
3388 self.iter_unions_b(right, &mut |b, v| {
3389 b.temp_vec.push(v);
3390 });
3391 self.temp_vec.sort();
3392 let tree = self.temp_vec.clone();
3393 self.temp_vec.clear();
3394 let newnode = tree
3395 .iter()
3396 .rev()
3397 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3398 return self.init_as(key, newnode);
3399 }
3400 (_, Kind::Union) => {
3401 let rleft = right.left(self);
3402 if left > rleft {
3404 self.iter_unions_b(left, &mut |b, v| {
3405 b.temp_vec.push(v);
3406 });
3407 self.iter_unions_b(right, &mut |b, v| {
3408 b.temp_vec.push(v);
3409 });
3410 self.temp_vec.sort();
3411 let tree = self.temp_vec.clone();
3412 self.temp_vec.clear();
3413 let newnode = tree
3414 .iter()
3415 .rev()
3416 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3417 return self.init_as(key, newnode);
3418 } else {
3419 if let Some(rw) = self.attempt_rw_unions(left, right) {
3420 return self.init_as(key, rw);
3421 }
3422 }
3423 }
3424 _ => {}
3425 }
3426
3427 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3428 return self.init_as(key, rw);
3429 }
3430 self.init(key)
3431 }
3432
3433 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3434 debug_assert!(left_id != NodeId::MISSING);
3435 debug_assert!(right_id != NodeId::MISSING);
3436 if left_id == right_id {
3437 return left_id;
3438 }
3439 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3440 return NodeId::BOT;
3441 }
3442 if left_id == NodeId::TS {
3443 return right_id;
3444 }
3445 if right_id == NodeId::TS {
3446 return left_id;
3447 }
3448 if left_id > right_id {
3449 return self.mk_inter(right_id, left_id);
3450 }
3451 let key = NodeKey {
3452 kind: Kind::Inter,
3453 left: left_id,
3454 right: right_id,
3455 extra: u32::MAX,
3456 };
3457 if let Some(id) = self.key_is_created(&key) {
3458 return *id;
3459 }
3460
3461 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3462 return self.init_as(key, rw);
3463 }
3464
3465 self.init(key)
3466 }
3467
3468 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3469 let node = NodeKey {
3470 kind,
3471 left: NodeId::MISSING,
3472 right: NodeId::MISSING,
3473 extra: u32::MAX,
3474 };
3475 self.init(node)
3476 }
3477
3478 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3479 let star = self.mk_star(body_id);
3480 self.mk_concat(body_id, star)
3481 }
3482 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3483 let opt = self.mk_opt(body_id);
3484 let mut nodes1 = vec![];
3485 for _ in lower..upper {
3486 nodes1.push(opt);
3487 }
3488 for _ in 0..lower {
3489 nodes1.push(body_id);
3490 }
3491 self.mk_concats(nodes1.into_iter())
3492 }
3493 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3494 self.mk_union(NodeId::EPS, body_id)
3495 }
3496
3497 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3498 let key = NodeKey {
3499 kind: Kind::Star,
3500 left: body_id,
3501 right: NodeId::MISSING,
3502 extra: 0,
3503 };
3504 if let Some(id) = self.key_is_created(&key) {
3505 return *id;
3506 }
3507 if body_id.is_kind(self, Kind::Star) {
3509 return body_id;
3510 }
3511 self.get_node_id(key)
3512 }
3513
3514 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3517 match self.get_kind(node_id) {
3518 Kind::End => Nullability::EMPTYSTRING,
3519 Kind::Begin => Nullability::EMPTYSTRING,
3520 Kind::Pred => Nullability::NEVER,
3521 Kind::Star => Nullability::ALWAYS,
3522 Kind::Inter | Kind::Concat => {
3523 let lnull = self.nullability_emptystring(node_id.left(self));
3524 let rnull = self.nullability_emptystring(node_id.right(self));
3525 lnull.and(rnull) }
3527 Kind::Union => {
3528 let lnull = self.nullability_emptystring(node_id.left(self));
3529 let rnull = self.nullability_emptystring(node_id.right(self));
3530 lnull.or(rnull)
3531 }
3532 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3533 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3534 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3535 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3536 }
3537 }
3538
3539 #[inline(always)]
3540 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3541 self.get_meta(node_id)
3542 .flags
3543 .nullability()
3544 .has(Nullability::CENTER.or(Nullability::END))
3545 }
3546
3547 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3548 self.get_only_nullability(node_id)
3549 }
3550
3551 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3552 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3553 }
3554
3555 pub fn pp(&self, node_id: NodeId) -> String {
3556 let mut s = String::new();
3557 self.ppw(&mut s, node_id).unwrap();
3558 s
3559 }
3560
3561 pub fn factor_suffixes(&mut self, node: NodeId) -> Result<NodeId, ResharpError> {
3564 let mut memo = FxHashMap::default();
3565 self.factor_suffixes_rec(node, &mut memo)
3566 }
3567
3568 fn factor_suffixes_rec(
3569 &mut self,
3570 node: NodeId,
3571 memo: &mut FxHashMap<NodeId, NodeId>,
3572 ) -> Result<NodeId, ResharpError> {
3573 if let Some(&v) = memo.get(&node) {
3574 return Ok(v);
3575 }
3576 let result = match self.get_kind(node) {
3577 Kind::Pred | Kind::Begin | Kind::End => node,
3578 Kind::Star => {
3579 let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3580 self.mk_star(inner)
3581 }
3582 Kind::Compl => {
3583 let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3584 self.mk_compl(inner)
3585 }
3586 Kind::Concat => {
3587 let l = self.factor_suffixes_rec(node.left(self), memo)?;
3588 let r = self.factor_suffixes_rec(node.right(self), memo)?;
3589 self.mk_concat(l, r)
3590 }
3591 Kind::Inter => {
3592 let l = self.factor_suffixes_rec(node.left(self), memo)?;
3593 let r = self.factor_suffixes_rec(node.right(self), memo)?;
3594 self.mk_inter(l, r)
3595 }
3596 Kind::Union => {
3597 let mut comps: Vec<NodeId> = Vec::new();
3598 node.iter_union(self, &mut |_, c| comps.push(c));
3599 let mut factored: Vec<NodeId> = Vec::with_capacity(comps.len());
3600 let mut reversible = true;
3601 for c in &comps {
3602 let f = self.factor_suffixes_rec(*c, memo)?;
3603 if c.contains_lookaround(self)
3604 || c.flags_contains(self).has(MetaFlags::CONTAINS_ANCHORS)
3605 {
3606 reversible = false;
3607 }
3608 factored.push(f);
3609 }
3610 if !reversible || factored.len() < 2 {
3611 let mut acc = factored[0];
3612 for c in factored.into_iter().skip(1) {
3613 acc = self.mk_union(acc, c);
3614 }
3615 acc
3616 } else {
3617 let mut rev_comps: Vec<NodeId> = Vec::with_capacity(factored.len());
3618 for c in factored {
3619 rev_comps.push(self.reverse(c)?);
3620 }
3621 let mut acc = rev_comps[0];
3622 for c in rev_comps.into_iter().skip(1) {
3623 acc = self.mk_union(acc, c);
3624 }
3625 self.reverse(acc)?
3626 }
3627 }
3628 Kind::Lookahead => {
3629 let inner = self.get_lookahead_inner(node);
3630 let tail = self.get_lookahead_tail(node);
3631 let rel = self.get_lookahead_rel(node);
3632 let ni = self.factor_suffixes_rec(inner, memo)?;
3633 let nt = self.factor_suffixes_rec(tail, memo)?;
3634 self.mk_lookahead(ni, nt, rel)
3635 }
3636 Kind::Lookbehind => {
3637 let inner = self.get_lookbehind_inner(node);
3638 let prev = self.get_lookbehind_prev(node);
3639 let ni = self.factor_suffixes_rec(inner, memo)?;
3640 let np = if prev != NodeId::MISSING {
3641 self.factor_suffixes_rec(prev, memo)?
3642 } else {
3643 prev
3644 };
3645 self.mk_lookbehind(ni, np)
3646 }
3647 Kind::Counted => node,
3648 };
3649 memo.insert(node, result);
3650 Ok(result)
3651 }
3652
3653 #[allow(dead_code)]
3654 pub fn pp_nulls(&self, node_id: NodeId) -> String {
3655 let nu = self.get_nulls_id(node_id);
3656 let nr = self.mb.nb.get_set_ref(nu);
3657 let s1 = format!("{:?}", nr);
3658 s1
3659 }
3660
3661 #[allow(dead_code)]
3662 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3663 match self.get_tregex(term_id) {
3664 TRegex::Leaf(node_id) => {
3665 let mut s = String::new();
3666 self.ppw(&mut s, *node_id).unwrap();
3667 s
3668 }
3669 TRegex::ITE(cond, then_id, else_id) => {
3670 format!(
3671 "ITE({},{},{})",
3672 self.solver_ref().pp(*cond),
3673 self.ppt(*then_id),
3674 self.ppt(*else_id)
3675 )
3676 }
3677 }
3678 }
3679
3680 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3681 if cfg!(feature = "graphviz") {
3682 match node_id {
3683 NodeId::MISSING => return write!(s, "MISSING"),
3684 NodeId::BOT => return write!(s, "⊥"),
3685 NodeId::TS => return write!(s, "_*"),
3686 NodeId::TOP => return write!(s, "_"),
3687 NodeId::EPS => return write!(s, "ε"),
3688 _ => {}
3689 }
3690 }
3691
3692 match node_id {
3693 NodeId::MISSING => return write!(s, "MISSING"),
3694 NodeId::BOT => return write!(s, "⊥"),
3695 NodeId::TS => return write!(s, "_*"),
3696 NodeId::TOP => return write!(s, "_"),
3697 NodeId::EPS => return write!(s, ""),
3698 _ => {}
3699 }
3700
3701 match self.get_kind(node_id) {
3702 Kind::End => write!(s, r"\z"),
3703 Kind::Begin => write!(s, r"\A"),
3704 Kind::Pred => {
3705 let psi = node_id.pred_tset(self);
3706 if psi == TSetId::EMPTY {
3707 write!(s, r"⊥")
3708 } else if psi == TSetId::FULL {
3709 write!(s, r"_")
3710 } else {
3711 write!(s, "{}", self.solver_ref().pp(psi))
3712 }
3713 }
3714 Kind::Inter => {
3715 write!(s, "(")?;
3716 self.ppw(s, node_id.left(self))?;
3717 write!(s, "&")?;
3718 let mut curr = node_id.right(self);
3719 while curr.is_inter(self) {
3720 let n = curr.left(self);
3721 self.ppw(s, n)?;
3722 write!(s, "&")?;
3723 curr = curr.right(self);
3724 }
3725 self.ppw(s, curr)?;
3726 write!(s, ")")
3727 }
3728 Kind::Union => {
3729 let left = node_id.left(self);
3730 let right = node_id.right(self);
3731 write!(s, "(")?;
3732 self.ppw(s, left)?;
3733 write!(s, "|")?;
3734 let mut curr = right;
3735 while self.get_kind(curr) == Kind::Union {
3736 let n = curr.left(self);
3737 self.ppw(s, n)?;
3738 write!(s, "|")?;
3739 curr = curr.right(self);
3740 }
3741 self.ppw(s, curr)?;
3742 write!(s, ")")
3743 }
3744 Kind::Concat => {
3745 let left = node_id.left(self);
3746 let right = node_id.right(self);
3747 if right.is_star(self) && right.left(self) == left {
3748 self.ppw(s, left)?;
3749 write!(s, "+")?;
3750 return Ok(());
3751 }
3752 if right.is_concat(self) {
3753 let rl = right.left(self);
3754 if rl.is_star(self) && rl.left(self) == left {
3755 self.ppw(s, left)?;
3756 write!(s, "+")?;
3757 return self.ppw(s, right.right(self));
3758 }
3759 }
3760 if right.is_concat(self) && right.left(self) == left {
3761 let mut num = 1;
3762 let mut right = right;
3763 while right.is_concat(self) && right.left(self) == left {
3764 num += 1;
3765 right = right.right(self);
3766 }
3767 if let Some(inner) = left.is_opt_v(self) {
3769 let mut inner_count = 0;
3770 let mut right2 = right;
3771 while right2.is_concat(self) && right2.left(self) == inner {
3772 inner_count += 1;
3773 right2 = right2.right(self);
3774 }
3775 if right2 == inner {
3776 inner_count += 1;
3777 self.ppw(s, inner)?;
3778 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3779 }
3780 if inner_count > 0 {
3781 self.ppw(s, inner)?;
3782 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3783 return self.ppw(s, right2);
3784 }
3785 }
3786 self.ppw(s, left)?;
3787 if right == left {
3788 num += 1;
3789 return write!(s, "{{{}}}", num);
3790 }
3791 if num <= 3 && left.is_pred(self) {
3792 for _ in 1..num {
3793 self.ppw(s, left)?;
3794 }
3795 return self.ppw(s, right);
3796 } else {
3797 write!(s, "{{{}}}", num)?;
3798 return self.ppw(s, right);
3799 }
3800 }
3801 self.ppw(s, left)?;
3802 self.ppw(s, right)
3803 }
3804 Kind::Star => {
3805 let left = node_id.left(self);
3806 let leftkind = self.get_kind(left);
3807 match leftkind {
3808 Kind::Concat | Kind::Star | Kind::Compl => {
3809 write!(s, "(")?;
3810 self.ppw(s, left)?;
3811 write!(s, ")")?;
3812 }
3813 _ => {
3814 self.ppw(s, left)?;
3815 }
3816 };
3817 write!(s, "*")
3818 }
3819 Kind::Compl => {
3820 write!(s, "~(")?;
3821 self.ppw(s, node_id.left(self))?;
3822 write!(s, ")")
3823 }
3824 Kind::Lookbehind => {
3825 let lbleft = self.get_lookbehind_prev(node_id);
3826 let lbinner = self.get_lookbehind_inner(node_id);
3827 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3828 if lbleft != NodeId::MISSING {
3829 write!(s, "❮")?;
3830 self.ppw(s, lbleft)?;
3831 write!(s, "❯")?;
3832 }
3833
3834 write!(s, "(?<=")?;
3835 self.ppw(s, lbinner)?;
3836 write!(s, ")")
3837 }
3838 Kind::Lookahead => {
3839 let inner = self.get_lookahead_inner(node_id);
3840 write!(s, "(?=")?;
3841 self.ppw(s, inner)?;
3842 write!(s, ")")?;
3843 if self.get_lookahead_rel(node_id) != 0 {
3844 write!(s, "{{")?;
3845 let rel = self.get_lookahead_rel(node_id);
3846 if rel == u32::MAX {
3847 write!(s, "∅")?;
3848 } else {
3849 write!(s, "{}", rel)?;
3850 }
3851 write!(s, "}}")?;
3852 }
3853 if node_id.right(self) == NodeId::MISSING {
3854 Ok(())
3855 } else {
3856 write!(s, "❮")?;
3857 self.ppw(s, node_id.right(self))?;
3858 write!(s, "❯")
3859 }
3860 }
3861 Kind::Counted => {
3862 let body = node_id.left(self);
3863 let packed = self.get_extra(node_id);
3864 let step = packed & 0xFFFF;
3865 let best = packed >> 16;
3866 write!(s, "#(")?;
3867 self.ppw(s, body)?;
3868 write!(s, ")s{}b{}", step, best)
3869 }
3870 }
3871 }
3872
3873 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3874 self.mk_concat(node, NodeId::TS)
3875 }
3876
3877 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3878 let node_ts = self.mk_concat(node, NodeId::TS);
3879 self.mk_compl(node_ts)
3880 }
3881
3882 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3883 let notset = self.solver().not_id(set);
3884 self.mk_pred(notset)
3885 }
3886
3887 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3888 let set_id = self.solver().u8_to_set_id(char);
3889 self.mk_pred(set_id)
3890 }
3891
3892 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3893 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3894 self.mk_pred(rangeset)
3895 }
3896
3897 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3898 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3899 for &(lo, hi) in &ranges[1..] {
3900 let r = self.mk_range_u8(lo, hi);
3901 node = self.mk_union(node, r);
3902 }
3903 node
3904 }
3905
3906 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3907 let mut prefix = Vec::new();
3908 let mut curr = node;
3909 loop {
3910 if curr == NodeId::EPS {
3911 let full = !prefix.is_empty();
3912 return (prefix, full);
3913 }
3914 if curr == NodeId::BOT {
3915 break;
3916 }
3917 if curr.is_pred(self) {
3918 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3919 Some(byte) => {
3920 prefix.push(byte);
3921 return (prefix, true);
3922 }
3923 None => break, }
3925 }
3926 if !curr.is_concat(self) {
3927 break;
3928 }
3929 let left = curr.left(self);
3930 if !left.is_pred(self) {
3931 break;
3932 }
3933 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3934 Some(byte) => prefix.push(byte),
3935 None => break,
3936 }
3937 curr = curr.right(self);
3938 }
3939 (prefix, false)
3940 }
3941
3942 #[allow(dead_code)]
3943 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3944 let mut result = NodeId::EPS;
3945 for byte in raw_str.iter().rev() {
3946 let node = self.mk_u8(*byte);
3947 result = self.mk_concat(node, result);
3948 }
3949 result
3950 }
3951
3952 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3953 let mut result = NodeId::EPS;
3954 for byte in raw_str.bytes().rev() {
3955 let node = self.mk_u8(byte);
3956 result = self.mk_concat(node, result);
3957 }
3958 result
3959 }
3960
3961 pub fn prune_fwd(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3962 self.prune_rec::<true>(node_id, memo)
3963 }
3964
3965 pub fn prune_rev(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3966 self.prune_rec::<false>(node_id, memo)
3967 }
3968
3969 pub fn simplify_fwd_initial(&mut self, node_id: NodeId) -> NodeId {
3970 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3971 match self.get_kind(node_id) {
3972 Kind::Concat => {
3973 let l = self.simplify_fwd_initial_rec(node_id.left(self), &mut memo);
3974 let r = self.simplify_fwd_initial_rec(node_id.right(self), &mut memo);
3975 if r.is_concat(self) {
3976 if r.left(self).is_ts() && !r.right(self).is_lookahead(self) {
3977 if l.is_begin_nullable(self) {
3978 return r;
3979 }
3980 }
3981 }
3982 }
3983 _ => {}
3984 }
3985
3986 self.simplify_fwd_initial_rec(node_id, &mut memo)
3987 }
3988
3989 fn simplify_fwd_initial_rec(
3990 &mut self,
3991 node_id: NodeId,
3992 memo: &mut FxHashMap<NodeId, NodeId>,
3993 ) -> NodeId {
3994 if let Some(&v) = memo.get(&node_id) {
3995 return v;
3996 }
3997
3998 let out = match self.get_kind(node_id) {
3999 Kind::Union => {
4000 let mut parts: Vec<NodeId> = Vec::new();
4001 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4002 for p in &mut parts {
4003 *p = self.simplify_fwd_initial_rec(*p, memo);
4004 }
4005 parts
4006 .iter()
4007 .rev()
4008 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4009 }
4010 Kind::Concat => {
4011 let l = self.simplify_fwd_initial_rec(node_id.left(self), memo);
4012 let r = self.simplify_fwd_initial_rec(node_id.right(self), memo);
4013 if l == NodeId::TS
4014 && r.is_lookahead(self)
4015 && self.get_lookahead_tail(r).is_missing()
4016 && self.get_lookahead_rel(r) == 0
4017 {
4018 let body = self.get_lookahead_inner(r);
4019 if self.is_nullable(body, Nullability::END) {
4020 return NodeId::TS;
4021 }
4022 }
4023
4024 if l != NodeId::TS {
4025 return self.mk_concat(l, r);
4026 }
4027 if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4028 return rewritten;
4029 }
4030 let (head, tail) = if r.is_concat(self) {
4031 (r.left(self), r.right(self))
4032 } else {
4033 (r, NodeId::EPS)
4034 };
4035 if self.get_kind(head) != Kind::Union {
4036 return self.mk_concat(l, r);
4037 }
4038 if !head.left(self).is_begin() {
4039 return self.mk_concat(l, r);
4040 }
4041 let y = head.right(self);
4042 if !y.is_pred(self) {
4043 return self.mk_concat(l, r);
4044 }
4045 if tail.is_concat(self) {
4046 let tl = tail.left(self);
4047 let tr = tail.right(self);
4048 let y_tset = y.pred_tset(self);
4049 let covers_all = if tl == NodeId::TS {
4050 true
4051 } else if let Some(x_pred) = tl.is_pred_star(self) {
4052 let x_ts = x_pred.pred_tset(self);
4053 let combined = self.solver().or_id(x_ts, y_tset);
4054 self.solver().is_full_id(combined)
4055 } else {
4056 false
4057 };
4058 if covers_all {
4059 return self.mk_concat(NodeId::TS, tr);
4060 }
4061 }
4062 self.mk_concat(l, r)
4063 }
4064 _ => node_id,
4065 };
4066 memo.insert(node_id, out);
4067 out
4068 }
4069
4070 pub fn simplify_rev_initial(&mut self, node_id: NodeId) -> NodeId {
4071 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4072 self.simplify_rev_initial_rec(node_id, &mut memo)
4073 }
4074
4075 fn simplify_rev_initial_rec(
4076 &mut self,
4077 node_id: NodeId,
4078 memo: &mut FxHashMap<NodeId, NodeId>,
4079 ) -> NodeId {
4080 if let Some(&v) = memo.get(&node_id) {
4081 return v;
4082 }
4083 let out = match self.get_kind(node_id) {
4084 Kind::Union => {
4085 let mut parts: Vec<NodeId> = Vec::new();
4086 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4087 for p in &mut parts {
4088 *p = self.simplify_rev_initial_rec(*p, memo);
4089 }
4090 parts
4091 .iter()
4092 .rev()
4093 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4094 }
4095 Kind::Concat => {
4096 let l = self.simplify_rev_initial_rec(node_id.left(self), memo);
4097 let r = self.simplify_rev_initial_rec(node_id.right(self), memo);
4098
4099 if l != NodeId::TS {
4100 return self.mk_concat(l, r);
4101 }
4102 if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4103 return rewritten;
4104 }
4105 let (tail1, tail2) = if r.is_concat(self) {
4106 (r.left(self), r.right(self))
4107 } else {
4108 (r, NodeId::EPS)
4109 };
4110
4111 if tail1.is_begin() {
4112 return r;
4113 }
4114
4115 if self.get_kind(tail1) != Kind::Union {
4116 return self.mk_concat(l, r);
4117 }
4118 if !tail1.left(self).is_begin() {
4119 return self.mk_concat(l, r);
4120 }
4121 let y = tail1.right(self);
4122 if !y.is_pred(self) {
4123 return self.mk_concat(l, r);
4124 }
4125 if tail2.is_concat(self) {
4126 let tl = tail2.left(self);
4127 let tr = tail2.right(self);
4128
4129 let y_tset = y.pred_tset(self);
4130 let covers_all = if tl == NodeId::TS {
4131 true
4132 } else if let Some(x_pred) = tl.is_pred_star(self) {
4133 let x_ts = x_pred.pred_tset(self);
4134 let combined = self.solver().or_id(x_ts, y_tset);
4135 self.solver().is_full_id(combined)
4136 } else {
4137 false
4138 };
4139 if covers_all {
4140 return self.mk_concat(NodeId::TS, tr);
4141 }
4142 }
4143 self.mk_concat(l, r)
4144 }
4145 _ => node_id,
4146 };
4147 memo.insert(node_id, out);
4148 out
4149 }
4150
4151 fn try_begin_neg_pred_rewrite(&mut self, r: NodeId) -> Option<NodeId> {
4152 if !r.is_concat(self) {
4153 return None;
4154 }
4155 let begin = r.left(self);
4156 let mid_tail = r.right(self);
4157 if !begin.is_begin() {
4158 return None;
4159 }
4160 if !mid_tail.is_concat(self) {
4161 return None;
4162 }
4163 let neg = mid_tail.left(self);
4164 let tail = mid_tail.right(self);
4165 if self.get_kind(neg) != Kind::Compl {
4166 return None;
4167 }
4168 let inside = neg.left(self);
4169 if !inside.is_concat(self) {
4170 return None;
4171 }
4172 let ts_part = inside.left(self);
4173 let c_pred = inside.right(self);
4174 if ts_part != NodeId::TS || !c_pred.is_pred(self) {
4175 return None;
4176 }
4177 let c_tset = c_pred.pred_tset(self);
4178 let not_c = self.mk_pred_not(c_tset);
4179 let left_branch = self.mk_concat(NodeId::BEGIN, tail);
4180 let not_c_tail = self.mk_concat(not_c, tail);
4181 let right_branch = self.mk_concat(NodeId::TS, not_c_tail);
4182 Some(self.mk_union(left_branch, right_branch))
4183 }
4184
4185 fn strip_la_body_end(&mut self, n: NodeId) -> NodeId {
4186 if !n.is_concat(self) {
4187 return n;
4188 }
4189 let l = n.left(self);
4190 let r = n.right(self);
4191 if l == NodeId::TS && r == NodeId::END {
4192 return NodeId::TS;
4193 }
4194 let new_r = self.strip_la_body_end(r);
4195 if new_r == r {
4196 n
4197 } else {
4198 self.mk_concat(l, new_r)
4199 }
4200 }
4201
4202 fn prune_rec<const FWD: bool>(
4203 &mut self,
4204 node_id: NodeId,
4205 memo: &mut FxHashMap<NodeId, NodeId>,
4206 ) -> NodeId {
4207 assert!(node_id != NodeId::MISSING);
4208 if node_id == NodeId::MISSING {
4209 return node_id;
4210 }
4211 if let Some(&v) = memo.get(&node_id) {
4212 return v;
4213 }
4214 let (l, r) = (node_id.left(self), node_id.right(self));
4215 let out = match node_id.kind(self) {
4216 Kind::Union => {
4217 let l = self.prune_rec::<FWD>(l, memo);
4218 let r = self.prune_rec::<FWD>(r, memo);
4219 let mut parts: Vec<NodeId> = Vec::new();
4220 parts.push(l);
4221 self.iter_unions_b(r, &mut |_, v| parts.push(v));
4222 for p in &mut parts {
4223 *p = self.prune_rec::<FWD>(*p, memo);
4224 }
4225
4226 if FWD {
4227 let mut best: FxHashMap<NodeId, u32> = FxHashMap::default();
4229 for &p in &parts {
4230 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4231 let body = self.get_lookahead_inner(p);
4232 let rel = self.get_lookahead_rel(p);
4233 best.entry(body)
4234 .and_modify(|r| *r = (*r).min(rel))
4235 .or_insert(rel);
4236 }
4237 }
4238 parts.iter().rev().fold(NodeId::BOT, |acc, &p| {
4239 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4240 let body = self.get_lookahead_inner(p);
4241 if self.get_lookahead_rel(p) != best[&body] {
4242 return acc;
4243 }
4244 }
4245 self.mk_union(p, acc)
4246 })
4247 } else {
4248 parts
4249 .iter()
4250 .rev()
4251 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4252 }
4253 }
4254 Kind::Concat => {
4255 if FWD
4256 && l.is_ts()
4257 && r.is_lookahead(self)
4258 && self.get_lookahead_tail(r).is_missing()
4259 && self.get_lookahead_rel(r) == 0
4260 {
4261 let body = self.get_lookahead_inner(r);
4262 if self.is_nullable(body, Nullability::END) {
4263 return NodeId::TS;
4264 }
4265 }
4266 let l = self.prune_rec::<FWD>(l, memo);
4267 let r = self.prune_rec::<FWD>(r, memo);
4268 self.mk_concat(l, r)
4269 }
4270 Kind::Inter => {
4271 let l = self.prune_rec::<FWD>(l, memo);
4272 let r = self.prune_rec::<FWD>(r, memo);
4273 self.mk_inter(l, r)
4274 }
4275 Kind::Compl => {
4276 let l = self.prune_rec::<FWD>(l, memo);
4277 self.mk_compl(l)
4278 }
4279 Kind::Lookahead => {
4280 let lrel = self.get_lookahead_rel(node_id);
4281 let body = self.strip_la_body_end(self.get_lookahead_inner(node_id));
4282 let body = self.prune_rec::<FWD>(body, memo);
4283
4284 let tail = if self.get_lookahead_tail(node_id).is_missing() {
4285 NodeId::MISSING
4286 } else {
4287 self.prune_rec::<FWD>(self.get_lookahead_tail(node_id), memo)
4288 };
4289 self.mk_lookahead_internal(body, tail, lrel)
4290 }
4291 Kind::Begin => NodeId::BOT,
4292 Kind::Counted => {
4293 let body = self.prune_rec::<FWD>(l, memo);
4294 let chain = self.prune_rec::<FWD>(r, memo);
4295 self.mk_counted(body, chain, self.get_extra(node_id))
4296 }
4297 Kind::End | Kind::Pred => node_id,
4298 Kind::Star => {
4299 let l = self.prune_rec::<FWD>(l, memo);
4300 self.mk_star(l)
4301 }
4302 Kind::Lookbehind => {
4303 let l = self.prune_rec::<FWD>(l, memo);
4304 if r.is_missing() {
4305 l
4306 } else {
4307 node_id
4308 }
4309 }
4310 };
4311 memo.insert(node_id, out);
4312 out
4313 }
4314
4315 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4316 let mut sorted: Vec<NodeId> = nodes.collect();
4317 if sorted.len() <= 1 {
4318 return sorted.pop().unwrap_or(NodeId::BOT);
4319 }
4320 sorted.sort();
4321 sorted.dedup();
4322 sorted.retain(|&x| x != NodeId::BOT);
4323 if sorted.is_empty() {
4324 return NodeId::BOT;
4325 }
4326 if sorted.len() > 16 {
4327 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
4328 let mut non_concat: Vec<NodeId> = Vec::new();
4329 for &n in &sorted {
4330 if n.is_concat(self) {
4331 by_head.entry(self.get_left(n)).or_default().push(n);
4332 } else {
4333 non_concat.push(n);
4334 }
4335 }
4336 let mut absorbed: Vec<NodeId> = Vec::new();
4337 for &n in &non_concat {
4338 if by_head.contains_key(&n) {
4339 absorbed.push(n);
4340 }
4341 }
4342 if !absorbed.is_empty() {
4343 non_concat.retain(|n| !absorbed.contains(n));
4344 }
4345 if by_head.len() < sorted.len() {
4346 let mut groups: Vec<NodeId> = non_concat;
4347 for (head, tails) in by_head {
4348 let mut tail_nodes: Vec<NodeId> =
4349 tails.iter().map(|&n| self.get_right(n)).collect();
4350 if absorbed.contains(&head) {
4351 tail_nodes.push(NodeId::EPS);
4352 }
4353 let tail_union = self.mk_unions(tail_nodes.into_iter());
4354 let factored = self.mk_concat(head, tail_union);
4355 groups.push(factored);
4356 }
4357 groups.sort();
4358 groups.dedup();
4359 return self.mk_unions_balanced(&groups);
4360 }
4361 }
4362 self.mk_unions_balanced(&sorted)
4363 }
4364
4365 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
4366 match nodes.len() {
4367 0 => NodeId::BOT,
4368 1 => nodes[0],
4369 n => {
4370 let mid = n / 2;
4371 let left = self.mk_unions_balanced(&nodes[..mid]);
4372 let right = self.mk_unions_balanced(&nodes[mid..]);
4373 self.mk_union(left, right)
4374 }
4375 }
4376 }
4377
4378 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4379 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
4380 }
4381
4382 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4383 nodes
4384 .rev()
4385 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
4386 }
4387}
4388
4389impl RegexBuilder {
4390 pub fn iter_sat(
4391 &mut self,
4392 stack: &mut Vec<(TRegexId, TSetId)>,
4393 f: &mut impl FnMut(&mut RegexBuilder, NodeId, TSetId),
4394 ) {
4395 debug_assert!(!stack.is_empty());
4396 loop {
4397 match stack.pop() {
4398 None => return,
4399 Some((curr, curr_set)) => match *self.get_tregex(curr) {
4400 TRegex::Leaf(n) => {
4401 let mut curr = n;
4402 while curr != NodeId::BOT {
4403 match self.get_kind(curr) {
4404 _ => {
4405 f(self, n, curr_set);
4406 curr = NodeId::BOT;
4407 }
4408 }
4409 }
4410 }
4411 TRegex::ITE(cnd, then_id, else_id) => {
4412 if else_id != TRegexId::BOT {
4413 let notcnd = self.solver().not_id(cnd);
4414 let interset1 = self.solver().and_id(curr_set, notcnd);
4415 stack.push((else_id, interset1));
4416 }
4417 let interset2 = self.solver().and_id(curr_set, cnd);
4418 stack.push((then_id, interset2));
4419 }
4420 },
4421 }
4422 }
4423 }
4424
4425 #[allow(dead_code)]
4426 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
4427 match self.get_tregex(term_id).clone() {
4428 TRegex::Leaf(node_id) => {
4429 if NodeId::BOT == node_id {
4430 vec![]
4431 } else {
4432 vec![node_id]
4433 }
4434 }
4435 TRegex::ITE(_, then_id, else_id) => {
4436 let mut then_nodes = self.extract_sat(then_id);
4437 let mut else_nodes = self.extract_sat(else_id);
4438 then_nodes.append(&mut else_nodes);
4439 then_nodes
4440 }
4441 }
4442 }
4443
4444 pub(crate) fn iter_unions_b(
4445 &mut self,
4446 curr: NodeId,
4447 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
4448 ) {
4449 let mut curr = curr;
4450 while self.get_kind(curr) == Kind::Union {
4451 f(self, curr.left(self));
4452 curr = curr.right(self);
4453 }
4454 f(self, curr);
4455 }
4456
4457 pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
4458 if !self.contains_look(node_id) {
4459 return Some(node_id);
4460 }
4461 match self.get_kind(node_id) {
4462 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
4463 Kind::Concat => {
4464 let left = node_id.left(self);
4465 let right = node_id.right(self);
4466 let elim_l = self.try_elim_lookarounds(left)?;
4467 let elim_r = self.try_elim_lookarounds(right)?;
4468 let rw = self.mk_concat(elim_l, elim_r);
4469 Some(rw)
4470 }
4471 Kind::Union => {
4472 let left = node_id.left(self);
4473 let right = node_id.right(self);
4474 let elim_l = self.try_elim_lookarounds(left)?;
4475 let elim_r = self.try_elim_lookarounds(right)?;
4476 let rw = self.mk_union(elim_l, elim_r);
4477 Some(rw)
4478 }
4479
4480 Kind::Star => {
4481 let body = node_id.left(self);
4482 let elim_l = self.try_elim_lookarounds(body)?;
4483 Some(self.mk_star(elim_l))
4484 }
4485 Kind::Compl => {
4486 let left = node_id.left(self);
4487 let elim_l = self.try_elim_lookarounds(left)?;
4488 Some(self.mk_compl(elim_l))
4489 }
4490 Kind::Lookahead => {
4491 let rel = self.get_lookahead_rel(node_id);
4492 if rel != 0 {
4493 return None;
4494 }
4495 let lbody = self.get_lookahead_inner(node_id);
4496 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
4497 let elim_l = self.try_elim_lookarounds(lbody)?;
4498 let elim_r = self.try_elim_lookarounds(ltail)?;
4499 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
4500 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
4501 let rw = self.mk_inter(lbody_ts, ltail_ts);
4502 Some(rw)
4503 }
4504 Kind::Lookbehind => {
4505 let linner = self.get_lookbehind_inner(node_id);
4506 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
4507 let elim_l = self.try_elim_lookarounds(linner)?;
4508 let elim_r = self.try_elim_lookarounds(lprev)?;
4509 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
4510 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
4511 let rw = self.mk_inter(lbody_ts, ltail_ts);
4512 Some(rw)
4513 }
4514 Kind::Inter => {
4515 let left = node_id.left(self);
4516 let right = node_id.right(self);
4517 let elim_l = self.try_elim_lookarounds(left)?;
4518 let elim_r = self.try_elim_lookarounds(right)?;
4519 let rw = self.mk_inter(elim_l, elim_r);
4520 Some(rw)
4521 }
4522 Kind::Counted => None,
4523 }
4524 }
4525
4526 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
4528 if self.nullability(node) == Nullability::NEVER {
4529 node
4530 } else {
4531 self.mk_inter(NodeId::TOPPLUS, node)
4532 }
4533 }
4534
4535 pub fn iter_find_stack(
4536 &self,
4537 stack: &mut Vec<TRegexId>,
4538 mut f: impl FnMut(NodeId) -> bool,
4539 ) -> bool {
4540 loop {
4541 match stack.pop() {
4542 None => return false,
4543 Some(curr) => match self.get_tregex(curr) {
4544 TRegex::Leaf(n) => {
4545 let mut curr = *n;
4546 while curr != NodeId::BOT {
4547 match self.get_kind(curr) {
4548 Kind::Union => {
4549 if f(curr.left(self)) {
4550 return true;
4551 }
4552 curr = curr.right(self);
4553 }
4554 _ => {
4555 if f(*n) {
4556 return true;
4557 }
4558 curr = NodeId::BOT;
4559 }
4560 }
4561 }
4562 }
4563 TRegex::ITE(_, then_id, else_id) => {
4564 if *else_id != TRegexId::BOT {
4565 stack.push(*else_id);
4566 }
4567 stack.push(*then_id);
4568 }
4569 },
4570 }
4571 }
4572 }
4573
4574 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
4600 if node == NodeId::BOT {
4601 return Some(true);
4602 }
4603 if self.nullability(node) != Nullability::NEVER {
4604 return Some(false);
4605 }
4606 if let Some(cached) = self.cache_empty.get(&node) {
4607 if cached.is_checked() {
4608 return Some(cached.is_empty());
4609 }
4610 }
4611 let node = if !self.contains_look(node) {
4612 node
4613 } else {
4614 self.try_elim_lookarounds(node)?
4615 };
4616 let isempty_flag = self.is_empty_lang_internal(node);
4617
4618 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
4619 }
4620
4621 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, ResharpError> {
4622 if !self.get_meta_flags(initial_node).contains_inter() {
4624 return Ok(NodeFlags::ZERO);
4625 }
4626
4627 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4628 let mut worklist: VecDeque<NodeId> = VecDeque::new();
4629 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
4630 let mut stack = Vec::new();
4631 stack.push(begin_der);
4632 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
4633 visited.insert(node, initial_node);
4634 let nullability = self.nullability(node);
4635 if nullability != Nullability::NEVER {
4636 true
4637 } else {
4638 worklist.push_back(node);
4639 false
4640 }
4641 });
4642 if found_nullable_right_away {
4643 return Ok(NodeFlags::ZERO);
4644 }
4645
4646 worklist.push_back(initial_node);
4647 let isempty_flag: NodeFlags;
4648 let mut found_node = NodeId::BOT;
4649
4650 loop {
4651 match worklist.pop_front() {
4652 None => {
4653 isempty_flag = NodeFlags::IS_EMPTY;
4654 break;
4655 }
4656 Some(outer) => {
4657 if let Some(cached) = self.cache_empty.get(&outer) {
4658 if cached.is_checked() {
4659 if cached.is_empty() {
4660 continue;
4661 } else {
4662 return Ok(NodeFlags::ZERO);
4663 }
4664 }
4665 }
4666
4667 let derivative = self.der(outer, Nullability::CENTER)?;
4668
4669 stack.push(derivative);
4670
4671 let found_null = self.iter_find_stack(&mut stack, |node| {
4672 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
4673 found_node = node;
4674 if !self.get_meta_flags(node).contains_inter() {
4675 true
4676 } else {
4677 e.insert(outer);
4678 worklist.push_front(node);
4679 self.any_nonbegin_nullable(node)
4680 }
4681 } else {
4682 false
4683 }
4684 });
4685 if found_null {
4686 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
4687 isempty_flag = NodeFlags::ZERO;
4688 break;
4689 }
4690 }
4691 }
4692 }
4693
4694 self.cache_empty.insert(
4695 initial_node,
4696 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
4697 );
4698 Ok(isempty_flag)
4699 }
4700
4701 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
4703 if larger_lang == smaller_lang {
4704 return Some(true);
4705 }
4706
4707 if self
4709 .nullability(larger_lang)
4710 .not()
4711 .and(self.nullability(smaller_lang))
4712 != Nullability::NEVER
4713 {
4714 return Some(false);
4715 }
4716
4717 let (smaller_lang, larger_lang) =
4722 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
4723 let wrap = |b: &mut Self, n: NodeId| {
4724 let tmp = b.mk_concat(n, NodeId::TS);
4725 b.mk_concat(NodeId::TS, tmp)
4726 };
4727 (wrap(self, smaller_lang), wrap(self, larger_lang))
4728 } else {
4729 (smaller_lang, larger_lang)
4730 };
4731
4732 let nota = self.mk_compl(larger_lang);
4733 let diff = self.mk_inter(smaller_lang, nota);
4734 self.is_empty_lang(diff)
4735 }
4736}