1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::{neg_class, utf8_char, UnicodeClassCache};
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum ResharpError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for ResharpError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 ResharpError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 ResharpError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 ResharpError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for ResharpError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_LOOKAHEAD: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 & Self::NULL_MASK)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_inter(self) -> bool {
134 self.has(MetaFlags::CONTAINS_INTER)
135 }
136
137 pub(crate) fn all_contains_flags(self) -> MetaFlags {
138 self.and(
139 MetaFlags::CONTAINS_ANCHORS
140 .or(MetaFlags::CONTAINS_INTER)
141 .or(MetaFlags::CONTAINS_LOOKBEHIND)
142 .or(MetaFlags::CONTAINS_LOOKAHEAD),
143 )
144 }
145}
146
147#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
148#[repr(u8)]
149pub enum Kind {
150 #[default]
151 Pred,
152 Star,
153 Begin,
154 End,
155 Concat,
156 Union,
157 Compl,
158 Lookbehind,
159 Lookahead,
160 Inter,
161 Counted,
162}
163
164#[derive(Eq, Hash, PartialEq, Clone)]
165struct Metadata {
166 flags: MetaFlags,
167 nulls: NullsId,
168}
169
170struct MetadataBuilder {
171 num_created: u32,
172 solver: Solver,
173 nb: NullsBuilder,
174 index: FxHashMap<Metadata, MetadataId>,
175 pub array: Vec<Metadata>,
176}
177
178mod helpers {
179 pub(crate) fn incr_rel(n1: u32) -> u32 {
180 match n1.overflowing_add(1) {
181 (_, true) => u32::MAX,
182 (res, false) => res,
183 }
184 }
185}
186
187impl MetadataBuilder {
188 fn new() -> MetadataBuilder {
189 Self {
190 index: FxHashMap::default(),
191 array: vec![Metadata {
192 flags: MetaFlags::ZERO,
193 nulls: NullsId::EMPTY,
194 }],
195 solver: Solver::new(),
196 num_created: 0,
197 nb: NullsBuilder::new(),
198 }
199 }
200
201 fn init(&mut self, inst: Metadata) -> MetadataId {
202 self.num_created += 1;
203 let new_id = MetadataId(self.num_created);
204 self.index.insert(inst.clone(), new_id);
205 self.array.push(inst);
206 new_id
207 }
208
209 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
210 match self.index.get(&inst) {
211 Some(&id) => id,
212 None => self.init(inst),
213 }
214 }
215
216 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
217 &self.array[inst.0 as usize]
218 }
219
220 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
221 setflags.all_contains_flags()
222 }
223
224 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
225 let left = &self.array[body.0 as usize].flags;
226 let contains = self.get_contains(*left);
227 let inf = if body_id == NodeId::BOT {
229 MetaFlags::ZERO
230 } else {
231 MetaFlags::INFINITE_LENGTH
232 };
233 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
234 }
235
236 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
237 let left = &self.array[left_id.0 as usize].flags;
238 let null = left.nullability().not().and(Nullability::ALWAYS);
239 let contains = self.get_contains(*left);
240 MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
241 }
242
243 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
244 let left = &self.array[left_id.0 as usize].flags;
245 let right = &self.array[right_id.0 as usize].flags;
246 let null = left.nullability().and(right.nullability());
247 let contains = self.get_contains(left.or(*right));
248 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
249 MetaFlags::with_nullability(null, contains.or(len))
250 }
251
252 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
253 let left = &self.array[left_id.0 as usize].flags;
254 let right = &self.array[right_id.0 as usize].flags;
255 let null = left.nullability().and(right.nullability());
256 let contains = self
257 .get_contains(left.or(*right))
258 .or(MetaFlags::CONTAINS_INTER);
259 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
260 MetaFlags::with_nullability(null, contains.or(len))
261 }
262
263 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
264 let left = &self.array[left_id.0 as usize].flags;
265 let right = &self.array[right_id.0 as usize].flags;
266 let null = left.nullability().or(right.nullability());
267 let contains = self.get_contains(left.or(*right));
268 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
269 MetaFlags::with_nullability(null, contains.or(len))
270 }
271}
272
273#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
274pub struct NodeKey {
275 pub(crate) kind: Kind,
276 pub(crate) left: NodeId,
277 pub(crate) right: NodeId,
278 pub(crate) extra: u32,
279}
280
281#[derive(Eq, Hash, PartialEq, Clone, Debug)]
282pub enum TRegex<TSet> {
283 Leaf(NodeId),
284 ITE(TSet, TRegexId, TRegexId),
285}
286
287#[derive(Eq, Hash, PartialEq, Clone, Copy)]
288pub(crate) struct NodeFlags(u8);
289impl NodeFlags {
290 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
291 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
292 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
293
294 fn is_checked(self) -> bool {
295 self.0 >= NodeFlags::IS_CHECKED.0
296 }
297 fn is_empty(self) -> bool {
298 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
299 }
300}
301
302#[derive(Eq, Hash, PartialEq, Clone, Copy)]
303pub(crate) struct BuilderFlags(u8);
304impl BuilderFlags {
305 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
306 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
307}
308
309pub struct RegexBuilder {
310 mb: MetadataBuilder,
311 temp_vec: Vec<NodeId>,
312 num_created: u32,
313 index: FxHashMap<NodeKey, NodeId>,
314 array: Vec<NodeKey>,
315 metadata: Vec<MetadataId>,
316 reversed: Vec<NodeId>,
317 cache_empty: FxHashMap<NodeId, NodeFlags>,
318 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
319 tr_array: Vec<TRegex<TSetId>>,
320 tr_der_center: Vec<TRegexId>,
321 tr_der_begin: Vec<TRegexId>,
322 flags: BuilderFlags,
323 pub lookahead_context_max: u32,
325 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
326 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
327}
328
329impl NodeId {
330 fn is_missing(&self) -> bool {
331 *self == NodeId::MISSING
332 }
333 #[inline]
334 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
335 b.get_flags_contains(self)
336 }
337 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
338 if self == tail {
339 true
340 } else if self.is_kind(b, Kind::Concat) {
341 self.right(b).has_concat_tail(b, tail)
342 } else {
343 false
344 }
345 }
346 fn missing_to_eps(&self) -> NodeId {
347 if *self == NodeId::MISSING {
348 NodeId::EPS
349 } else {
350 *self
351 }
352 }
353
354 #[inline]
355 fn kind(self, b: &RegexBuilder) -> Kind {
356 b.get_kind(self)
357 }
358 #[inline]
359 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
360 b.get_kind(self) == k
361 }
362 #[inline]
363 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
364 b.nullability(self) == Nullability::NEVER
365 }
366 #[inline]
367 pub fn nullability(self, b: &RegexBuilder) -> Nullability {
368 b.nullability(self)
369 }
370
371 #[inline]
372 pub fn is_center_nullable(self, b: &RegexBuilder) -> bool {
373 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
374 }
375 #[inline]
376 pub fn is_begin_nullable(self, b: &RegexBuilder) -> bool {
377 b.nullability(self).has(Nullability::BEGIN)
378 }
379 #[inline]
380 pub fn left(self, b: &RegexBuilder) -> NodeId {
381 b.get_left(self)
382 }
383
384 #[inline]
385 pub fn right(self, b: &RegexBuilder) -> NodeId {
386 b.get_right(self)
387 }
388
389 #[inline]
390 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, ResharpError> {
391 b.der(self, mask)
392 }
393
394 #[inline]
395 fn extra(self, b: &RegexBuilder) -> u32 {
396 b.get_extra(self)
397 }
398
399 #[inline]
400 fn is_pred(self, b: &RegexBuilder) -> bool {
401 b.get_kind(self) == Kind::Pred
402 }
403
404 #[inline]
405 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
406 debug_assert!(self.is_pred(b));
407 TSetId(b.get_extra(self))
408 }
409 #[inline]
410 fn is_star(self, b: &RegexBuilder) -> bool {
411 if NodeId::EPS == self {
412 return false;
413 }
414 b.get_kind(self) == Kind::Star
415 }
416
417 pub fn contains_lookbehind(self, b: &RegexBuilder) -> bool {
418 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKBEHIND)
419 }
420
421 pub fn contains_lookahead(self, b: &RegexBuilder) -> bool {
422 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKAHEAD)
423 }
424
425 pub fn contains_lookaround(self, b: &RegexBuilder) -> bool {
426 b.get_meta_flags(self)
427 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
428 }
429
430 #[inline]
431 pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
432 b.get_kind(self) == Kind::Inter
433 }
434
435 #[inline]
436 pub(crate) fn is_compl(self, b: &RegexBuilder) -> bool {
437 b.get_kind(self) == Kind::Compl
438 }
439
440 #[inline]
441 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
442 if self.is_concat(b) {
443 let r = self.right(b);
444 return r.is_star(b) && r.left(b) == self.left(b);
445 }
446 false
447 }
448
449 #[inline]
450 fn is_begin(self) -> bool {
451 self == NodeId::BEGIN
452 }
453
454 #[inline]
455 fn is_end(self) -> bool {
456 self == NodeId::END
457 }
458
459 #[inline]
460 fn is_union(self, b: &RegexBuilder) -> bool {
461 b.get_kind(self) == Kind::Union
462 }
463
464 #[inline]
465 fn is_concat(self, b: &RegexBuilder) -> bool {
466 b.get_kind(self) == Kind::Concat
467 }
468
469 #[inline]
470 fn is_lookahead(self, b: &RegexBuilder) -> bool {
471 b.get_kind(self) == Kind::Lookahead
472 }
473
474 #[inline]
475 fn is_lookbehind(self, b: &RegexBuilder) -> bool {
476 b.get_kind(self) == Kind::Lookbehind
477 }
478
479 #[inline]
480 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
481 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
482 Some(b.get_right(self))
483 } else {
484 None
485 }
486 }
487
488 #[inline]
489 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
490 if self.is_concat(b) {
491 let left = self.left(b);
492 let right = self.right(b);
493 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
494 return right == NodeId::END;
495 }
496 }
497 false
498 }
499
500 #[inline]
501 #[allow(dead_code)]
502 fn is_ts(self) -> bool {
503 NodeId::TS == self
504 }
505
506 #[inline]
507 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
508 if NodeId::EPS == self {
509 return None;
510 }
511 if self.is_star(b) && self.left(b).is_pred(b) {
512 Some(self.left(b))
513 } else {
514 None
515 }
516 }
517
518 #[inline]
519 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
520 if self.is_concat(b) && self.left(b) == NodeId::TS {
521 let middle = self.right(b);
522 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
523 return Some(middle.left(b));
524 }
525 }
526 None
527 }
528
529 #[inline]
530 pub(crate) fn iter_union(
531 self,
532 b: &mut RegexBuilder,
533 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
534 ) {
535 let mut curr: NodeId = self;
536 while curr.kind(b) == Kind::Union {
537 f(b, curr.left(b));
538 curr = curr.right(b);
539 }
540 f(b, curr);
541 }
542
543 #[inline]
544 pub(crate) fn iter_inter(
545 self,
546 b: &mut RegexBuilder,
547 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
548 ) {
549 let mut curr: NodeId = self;
550 while curr.kind(b) == Kind::Inter {
551 f(b, curr.left(b));
552 curr = curr.right(b);
553 }
554 f(b, curr);
555 }
556
557 #[inline]
558 pub(crate) fn iter_union_while(
559 self,
560 b: &mut RegexBuilder,
561 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
562 ) {
563 let mut curr: NodeId = self;
564 let mut continue_loop = true;
565 while continue_loop && curr.kind(b) == Kind::Union {
566 continue_loop = f(b, curr.left(b));
567 curr = curr.right(b);
568 }
569 if continue_loop {
570 f(b, curr);
571 }
572 }
573
574 #[inline]
575 pub(crate) fn any_inter_component(
576 self,
577 b: &RegexBuilder,
578 mut f: impl FnMut(NodeId) -> bool,
579 ) -> bool {
580 debug_assert!(self.kind(b) == Kind::Inter);
581 let mut cur = self;
582 while cur.kind(b) == Kind::Inter {
583 if f(cur.left(b)) {
584 return true;
585 }
586 cur = cur.right(b);
587 }
588 f(cur)
589 }
590
591 #[inline]
592 pub(crate) fn any_union_component(
593 self,
594 b: &RegexBuilder,
595 mut f: impl FnMut(NodeId) -> bool,
596 ) -> bool {
597 let mut cur = self;
598 while cur.kind(b) == Kind::Union {
599 if f(cur.left(b)) {
600 return true;
601 }
602 cur = cur.right(b);
603 }
604 f(cur)
605 }
606}
607
608impl Default for RegexBuilder {
609 fn default() -> Self {
610 Self::new()
611 }
612}
613
614impl RegexBuilder {
615 pub fn new() -> RegexBuilder {
616 let mut inst = Self {
617 mb: MetadataBuilder::new(),
618 array: Vec::new(),
619 index: FxHashMap::default(),
620 cache_empty: FxHashMap::default(),
621 tr_array: Vec::new(),
622 tr_cache: FxHashMap::default(),
623 flags: BuilderFlags::ZERO,
624 lookahead_context_max: 800,
625 num_created: 0,
626 metadata: Vec::new(),
627 reversed: Vec::new(),
628 tr_der_center: Vec::new(),
629 tr_der_begin: Vec::new(),
630 temp_vec: Vec::new(),
631 mk_binary_memo: FxHashMap::default(),
632 clean_cache: FxHashMap::default(),
633 };
634 inst.array.push(NodeKey::default());
635 inst.mk_pred(TSetId::EMPTY);
636 inst.mk_star(NodeId::BOT);
637 inst.mk_pred(TSetId::FULL);
638 inst.mk_star(NodeId::TOP);
639 let top_plus_id = inst.mk_plus(NodeId::TOP);
640 inst.mk_unset(Kind::Begin);
641 inst.mk_unset(Kind::End);
642 debug_assert!(top_plus_id == NodeId::TOPPLUS);
643
644 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
645 inst.mk_leaf(NodeId::BOT);
646 inst.mk_leaf(NodeId::EPS);
647 inst.mk_leaf(NodeId::TOP);
648 inst.mk_leaf(NodeId::TS);
649
650 inst.flags = BuilderFlags::SUBSUME;
651 inst
652 }
653
654 #[inline]
655 pub fn solver_ref(&self) -> &Solver {
656 &self.mb.solver
657 }
658
659 #[inline]
660 pub fn solver(&mut self) -> &mut Solver {
661 &mut self.mb.solver
662 }
663
664 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
665 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
666 self.tr_cache.insert(inst.clone(), new_id);
667 self.tr_array.push(inst);
668 new_id
669 }
670
671 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
672 match self.tr_cache.get(&inst) {
673 Some(&id) => id,
674 None => self.tr_init(inst),
675 }
676 }
677
678 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
679 &self.tr_array[inst.0 as usize]
680 }
681
682 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
683 let solv = self.solver();
684 !solv.is_sat_id(cond1, cond2)
685 }
686
687 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
688 let term = TRegex::<TSetId>::Leaf(node_id);
689 self.get_tregex_id(term)
690 }
691
692 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
693 let tmp_inst = TRegex::ITE(cond, _then, _else);
694 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
695 return *cached;
696 }
697 if _then == _else {
698 return _then;
699 }
700 if self.solver().is_full_id(cond) {
701 return _then;
702 }
703 if self.solver().is_empty_id(cond) {
704 return _else;
705 }
706 let _clean_then = match self.tr_array[_then.0 as usize] {
707 TRegex::Leaf(_) => _then,
708 _ => self.clean(cond, _then),
709 };
710 let notcond = self.solver().not_id(cond);
711 let _clean_else = match self.tr_array[_else.0 as usize] {
712 TRegex::Leaf(_) => _else,
713 _ => self.clean(notcond, _else),
714 };
715
716 if _clean_then == _clean_else {
717 return _clean_then;
718 }
719 match self.get_tregex(_clean_then) {
721 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
722 let _cond_then = *leftcond;
723 let new_then = *_inner_then;
724 let sand = self.solver().and_id(cond, _cond_then);
725 return self.mk_ite(sand, new_then, _clean_else);
726 }
727 _ => {}
728 }
729 match self.get_tregex(_clean_else) {
731 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
733 let e2clone = *_e2;
734 let new_cond = self.mb.solver.or_id(cond, *_c2);
735 return self.mk_ite(new_cond, _clean_then, e2clone);
736 }
737 _ => {}
738 }
739
740 if _clean_then == TRegexId::BOT {
741 let flip_cond = self.solver().not_id(cond);
742 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
743 return cleaned_id;
744 }
745
746 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
747 }
748
749 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
750 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
751 return cached;
752 }
753 let result = match *self.get_tregex(tterm) {
754 TRegex::Leaf(_) => tterm,
755 TRegex::ITE(alpha, _then_id, _else_id) => {
756 let notalpha = self.mb.solver.not_id(alpha);
757 if self.mb.solver.unsat_id(beta, alpha) {
758 self.clean(beta, _else_id)
760 } else if self.unsat(beta, notalpha) {
761 self.clean(beta, _then_id)
763 } else {
764 let tc = self.mb.solver.and_id(beta, alpha);
765 let ec = self.mb.solver.and_id(beta, notalpha);
766 let new_then = self.clean(tc, _then_id);
767 let new_else = self.clean(ec, _else_id);
768 self.mk_ite(alpha, new_then, new_else)
769 }
770 }
771 };
772 self.clean_cache.insert((beta, tterm), result);
773 result
774 }
775
776 fn mk_unary(
777 &mut self,
778 term: TRegexId,
779 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
780 ) -> TRegexId {
781 match self.tr_array[term.0 as usize] {
782 TRegex::Leaf(node_id) => {
783 let applied = apply(self, node_id);
784 self.mk_leaf(applied)
785 }
786 TRegex::ITE(c1, _then, _else) => {
787 let _then1 = self.mk_unary(_then, apply);
788 let _else1 = self.mk_unary(_else, apply);
789 self.mk_ite(c1, _then1, _else1)
790 }
791 }
792 }
793
794 fn mk_binary_result(
795 &mut self,
796 left: TRegexId,
797 right: TRegexId,
798 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, ResharpError>,
799 ) -> Result<TRegexId, ResharpError> {
800 match self.tr_array[left.0 as usize] {
801 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
802 TRegex::Leaf(right_node_id) => {
803 let applied = apply(self, left_node_id, right_node_id)?;
804 Ok(self.mk_leaf(applied))
805 }
806 TRegex::ITE(c2, _then, _else) => {
807 let then2 = self.mk_binary_result(left, _then, apply)?;
808 let else2 = self.mk_binary_result(left, _else, apply)?;
809 Ok(self.mk_ite(c2, then2, else2))
810 }
811 },
812 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
813 TRegex::Leaf(_) => {
814 let then2 = self.mk_binary_result(_then1, right, apply)?;
815 let else2 = self.mk_binary_result(_else1, right, apply)?;
816 Ok(self.mk_ite(c1, then2, else2))
817 }
818 TRegex::ITE(c2, _then2, _else2) => {
819 if c1 == c2 {
820 let _then = self.mk_binary_result(_then1, _then2, apply)?;
821 let _else = self.mk_binary_result(_else1, _else2, apply)?;
822 return Ok(self.mk_ite(c1, _then, _else));
823 }
824 if c1.0 > c2.0 {
825 let _then = self.mk_binary_result(_then1, right, apply)?;
826 let _else = self.mk_binary_result(_else1, right, apply)?;
827 Ok(self.mk_ite(c1, _then, _else))
828 } else {
829 let _then = self.mk_binary_result(left, _then2, apply)?;
830 let _else = self.mk_binary_result(left, _else2, apply)?;
831 Ok(self.mk_ite(c2, _then, _else))
832 }
833 }
834 },
835 }
836 }
837
838 fn mk_binary(
839 &mut self,
840 left: TRegexId,
841 right: TRegexId,
842 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
843 ) -> TRegexId {
844 self.mk_binary_memo.clear();
845 self.mk_binary_inner(left, right, apply)
846 }
847
848 fn mk_binary_inner(
849 &mut self,
850 left: TRegexId,
851 right: TRegexId,
852 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
853 ) -> TRegexId {
854 if left == right {
855 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
856 }
857 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
858 return cached;
859 }
860 let result = match self.tr_array[left.0 as usize] {
861 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
862 TRegex::Leaf(right_node_id) => {
863 let applied = apply(self, left_node_id, right_node_id);
864 self.mk_leaf(applied)
865 }
866 TRegex::ITE(c2, _then, _else) => {
867 let then2 = self.mk_binary_inner(left, _then, apply);
868 let else2 = self.mk_binary_inner(left, _else, apply);
869 self.mk_ite(c2, then2, else2)
870 }
871 },
872 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
873 TRegex::Leaf(_) => {
874 let then2 = self.mk_binary_inner(_then1, right, apply);
875 let else2 = self.mk_binary_inner(_else1, right, apply);
876 self.mk_ite(c1, then2, else2)
877 }
878 TRegex::ITE(c2, _then2, _else2) => {
879 if c1 == c2 {
880 let _then = self.mk_binary_inner(_then1, _then2, apply);
881 let _else = self.mk_binary_inner(_else1, _else2, apply);
882 self.mk_ite(c1, _then, _else)
883 } else if c1.0 > c2.0 {
884 let _then = self.mk_binary_inner(_then1, right, apply);
885 let _else = self.mk_binary_inner(_else1, right, apply);
886 self.mk_ite(c1, _then, _else)
887 } else {
888 let _then = self.mk_binary_inner(left, _then2, apply);
889 let _else = self.mk_binary_inner(left, _else2, apply);
890 self.mk_ite(c2, _then, _else)
891 }
892 }
893 },
894 };
895 self.mk_binary_memo.insert((left, right), result);
896 result
897 }
898
899 pub fn get_nulls(
900 &mut self,
901 pending_rel: u32,
902 mask: Nullability,
903 acc: &mut BTreeSet<NullState>,
904 node_id: NodeId,
905 ) {
906 debug_assert!(node_id != NodeId::MISSING);
907 if !self.is_nullable(node_id, mask) {
908 return;
909 }
910 match self.get_kind(node_id) {
911 Kind::Pred => {}
912 Kind::End => {
913 if mask.has(Nullability::END) {
914 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
915 }
916 }
917 Kind::Begin => {
918 if mask.has(Nullability::BEGIN) {
919 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
920 }
921 }
922 Kind::Concat => {
923 let new_mask = self.nullability(node_id).and(mask);
924 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
925 if self.is_nullable(node_id.left(self), mask) {
926 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
927 }
928 }
929 Kind::Union => {
930 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
931 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
932 }
933 Kind::Inter => {
934 let new_mask = self.nullability(node_id).and(mask);
935 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
936 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
937 }
938 Kind::Star => {
939 acc.insert(NullState::new(mask, pending_rel));
940 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
941 }
942 Kind::Compl => {
943 if !self.is_nullable(node_id.left(self), mask) {
944 acc.insert(NullState::new(mask, 0));
945 }
946 }
947 Kind::Lookbehind => {
948 let new_mask = self.nullability(node_id).and(mask);
949 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
950 if node_id.right(self) != NodeId::MISSING {
951 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
952 }
953 }
954 Kind::Lookahead => {
955 let la_inner = self.get_lookahead_inner(node_id);
956 if self.is_nullable(la_inner, mask) {
957 let rel = self.get_lookahead_rel(node_id);
958 if rel != u32::MAX {
959 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
960 }
961 let la_tail = self.get_lookahead_tail(node_id);
963 if la_tail != NodeId::MISSING {
964 self.get_nulls(pending_rel, mask, acc, la_tail);
965 }
966 }
967 }
968 Kind::Counted => {
969 let packed = self.get_extra(node_id);
970 let best = packed >> 16;
971 if best > 0 {
972 acc.insert(NullState::new(mask, pending_rel + best));
973 }
974 }
975 }
976 }
977
978 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
979 self.get_meta_flags(node_id)
980 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
981 }
982
983 pub fn contains_lookbehind(&mut self, node_id: NodeId) -> bool {
984 self.get_meta_flags(node_id)
985 .has(MetaFlags::CONTAINS_LOOKBEHIND)
986 }
987
988 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
989 self.get_meta_flags(node_id)
990 .has(MetaFlags::CONTAINS_ANCHORS)
991 }
992
993 pub fn is_infinite(&self, node_id: NodeId) -> bool {
994 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
995 }
996
997 pub fn max_lookahead_body_len(&self, node_id: NodeId) -> u32 {
1003 let mut visited: FxHashMap<NodeId, ()> = FxHashMap::default();
1004 let mut stack = vec![node_id];
1005 let mut best: u32 = 0;
1006 while let Some(n) = stack.pop() {
1007 if n == NodeId::MISSING || visited.insert(n, ()).is_some() {
1008 continue;
1009 }
1010 let kind = self.get_kind(n);
1011 if kind == Kind::Lookahead {
1012 let mut body = self.get_lookahead_inner(n);
1013 if body.is_concat(self) && body.right(self) == NodeId::TS {
1016 body = body.left(self);
1017 }
1018 let (_, mx) = self.get_min_max_length(body);
1019 if mx > best {
1020 best = mx;
1021 }
1022 }
1023 match kind {
1024 Kind::Pred | Kind::Begin | Kind::End => {}
1025 Kind::Star | Kind::Compl => stack.push(n.left(self)),
1026 _ => {
1027 stack.push(n.left(self));
1028 stack.push(n.right(self));
1029 }
1030 }
1031 }
1032 best
1033 }
1034
1035 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
1036 if self.is_infinite(node_id) {
1037 if node_id.is_inter(self) {
1038 self.get_bounded_length(node_id)
1039 } else {
1040 (self.get_min_length_only(node_id), u32::MAX)
1041 }
1042 } else {
1043 self.get_bounded_length(node_id)
1044 }
1045 }
1046
1047 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
1048 if node_id == NodeId::EPS {
1049 return (0, 0);
1050 }
1051 match self.get_kind(node_id) {
1052 Kind::End | Kind::Begin => (0, 0),
1053 Kind::Pred => (1, 1),
1054 Kind::Concat => {
1055 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
1056 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
1057 (lmin + rmin, lmax.saturating_add(rmax))
1058 }
1059 Kind::Union => {
1060 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
1061 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
1062 (lmin.min(rmin), lmax.max(rmax))
1063 }
1064 Kind::Inter => {
1065 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
1066 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
1067 (lmin.max(rmin), lmax.min(rmax))
1068 }
1069 Kind::Lookahead => {
1070 let body = node_id.left(self);
1071 if self.is_infinite(body) {
1072 return (0, u32::MAX);
1073 }
1074 let right = node_id.right(self);
1075 if right.is_missing() {
1076 (0, 0)
1077 } else {
1078 self.get_min_max_length(right)
1079 }
1080 }
1081 Kind::Counted => self.get_min_max_length(node_id.left(self)),
1082 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
1083 }
1084 }
1085
1086 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
1087 if node_id == NodeId::EPS {
1088 return Some(0);
1089 }
1090 match self.get_kind(node_id) {
1091 Kind::End | Kind::Begin => Some(0),
1092 Kind::Pred => Some(1),
1093 Kind::Concat => {
1094 let l = self.get_fixed_length(node_id.left(self))?;
1095 let r = self.get_fixed_length(node_id.right(self))?;
1096 Some(l + r)
1097 }
1098 Kind::Union => {
1099 let l = self.get_fixed_length(node_id.left(self))?;
1100 let r = self.get_fixed_length(node_id.right(self))?;
1101 if l == r {
1102 Some(l)
1103 } else {
1104 None
1105 }
1106 }
1107 Kind::Inter => {
1108 let l = self.get_fixed_length(node_id.left(self));
1109 let r = self.get_fixed_length(node_id.right(self));
1110 match (l, r) {
1111 (Some(a), Some(b)) if a == b => Some(a),
1112 (Some(_), Some(_)) => None,
1113 (Some(a), None) | (None, Some(a)) => Some(a),
1114 (None, None) => None,
1115 }
1116 }
1117 Kind::Lookahead | Kind::Lookbehind => {
1118 let prev = node_id.right(self).missing_to_eps();
1119 self.get_fixed_length(prev)
1120 }
1121 Kind::Counted => self.get_fixed_length(node_id.left(self)),
1122 Kind::Star | Kind::Compl => None,
1123 }
1124 }
1125
1126 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1127 match self.get_kind(node_id) {
1128 Kind::End | Kind::Begin => 0,
1129 Kind::Pred => 1,
1130 Kind::Concat => {
1131 self.get_min_length_only(node_id.left(self))
1132 + self.get_min_length_only(node_id.right(self))
1133 }
1134 Kind::Union => self
1135 .get_min_length_only(node_id.left(self))
1136 .min(self.get_min_length_only(node_id.right(self))),
1137 Kind::Inter => self
1138 .get_min_length_only(node_id.left(self))
1139 .max(self.get_min_length_only(node_id.right(self))),
1140 Kind::Star | Kind::Lookbehind | Kind::Lookahead => 0,
1141 Kind::Counted => self.get_min_length_only(node_id.left(self)),
1142 Kind::Compl => {
1143 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1144 0
1145 } else {
1146 1
1147 }
1148 }
1149 }
1150 }
1151
1152 pub fn starts_with_ts(&self, node_id: NodeId) -> bool {
1153 if node_id == NodeId::TS {
1154 return true;
1155 }
1156 match self.get_kind(node_id) {
1157 Kind::Inter => {
1158 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1159 }
1160 Kind::Union => {
1161 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1162 }
1163 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1164 _ => false,
1165 }
1166 }
1167
1168 #[inline]
1169 pub fn ends_with_ts(&self, node_id: NodeId) -> bool {
1170 if node_id.is_concat(self) {
1171 return self.ends_with_ts(node_id.right(self));
1172 }
1173 if node_id.is_lookahead(self) {
1174 let tail = self.get_lookahead_tail(node_id);
1175 if !tail.is_missing() {
1176 return self.ends_with_ts(tail);
1177 }
1178 }
1179 node_id == NodeId::TS
1180 }
1181
1182 pub fn ends_with_ts_any_branch(&self, node_id: NodeId) -> bool {
1183 if node_id == NodeId::TS {
1184 return true;
1185 }
1186 match self.get_kind(node_id) {
1187 Kind::Concat => self.ends_with_ts_any_branch(node_id.right(self)),
1188 Kind::Union => {
1189 self.ends_with_ts_any_branch(node_id.left(self))
1190 || self.ends_with_ts_any_branch(node_id.right(self))
1191 }
1192 Kind::Lookahead => {
1193 let tail = self.get_lookahead_tail(node_id);
1194 !tail.is_missing() && self.ends_with_ts_any_branch(tail)
1195 }
1196 _ => false,
1197 }
1198 }
1199
1200 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1201 debug_assert!(node_id != NodeId::MISSING);
1202 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1203 }
1204
1205 pub(crate) fn cache_der(
1206 &mut self,
1207 node_id: NodeId,
1208 result: TRegexId,
1209 mask: Nullability,
1210 ) -> TRegexId {
1211 if mask == Nullability::CENTER {
1212 self.tr_der_center[node_id.0 as usize] = result
1213 } else {
1214 self.tr_der_begin[node_id.0 as usize] = result
1215 };
1216 result
1217 }
1218
1219 pub(crate) fn try_cached_der(
1220 &mut self,
1221 node_id: NodeId,
1222 mask: Nullability,
1223 ) -> Option<TRegexId> {
1224 let cache = if mask == Nullability::CENTER {
1225 &mut self.tr_der_center
1226 } else {
1227 &mut self.tr_der_begin
1228 };
1229 match cache.get(node_id.0 as usize) {
1230 Some(&TRegexId::MISSING) => {}
1231 Some(&result) => {
1232 return Some(result);
1233 }
1234 None => {
1235 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1236 }
1237 }
1238 None
1239 }
1240
1241 pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1242 let mut term = self.get_tregex(der);
1243 loop {
1244 match *term {
1245 TRegex::Leaf(node_id) => return node_id,
1246 TRegex::ITE(cond, _then, _else) => {
1247 if self.solver().is_sat_id(set, cond) {
1248 term = self.get_tregex(_then);
1249 } else {
1250 term = self.get_tregex(_else);
1251 }
1252 }
1253 }
1254 }
1255 }
1256
1257 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, ResharpError> {
1258 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1259 debug_assert!(
1260 node_id != NodeId::MISSING,
1261 "attempting to derive missing node"
1262 );
1263 if let Some(result) = self.try_cached_der(node_id, mask) {
1264 return Ok(result);
1265 }
1266
1267 let result = match node_id.kind(self) {
1268 Kind::Compl => {
1269 let leftd = node_id.left(self).der(self, mask)?;
1270 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1271 }
1272 Kind::Inter => {
1273 let leftd = node_id.left(self).der(self, mask)?;
1274 let rightd = node_id.right(self).der(self, mask)?;
1275 {
1276 let this = &mut *self;
1277 this.mk_binary(
1278 leftd,
1279 rightd,
1280 &mut (|b, left, right| b.mk_inter(left, right)),
1281 )
1282 }
1283 }
1284 Kind::Union => {
1285 let leftd = self.der(node_id.left(self), mask)?;
1286 let rightd = self.der(node_id.right(self), mask)?;
1287 {
1288 let this = &mut *self;
1289 this.mk_binary(
1290 leftd,
1291 rightd,
1292 &mut (|b, left, right| b.mk_union(left, right)),
1293 )
1294 }
1295 }
1296 Kind::Concat => {
1297 let head = node_id.left(self);
1298 let tail = node_id.right(self);
1299 let tail_leaf = self.mk_leaf(tail);
1300 let head_der = self.der(head, mask)?;
1301
1302 if self.is_nullable(head, mask) {
1303 let rightd = self.der(tail, mask)?;
1304 let ldr = {
1305 let this = &mut *self;
1306 this.mk_binary(
1307 head_der,
1308 tail_leaf,
1309 &mut (|b, left, right| b.mk_concat(left, right)),
1310 )
1311 };
1312 {
1313 let this = &mut *self;
1314 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1315 }
1316 } else {
1317 let this = &mut *self;
1318 this.mk_binary(
1319 head_der,
1320 tail_leaf,
1321 &mut (|b, left, right| b.mk_concat(left, right)),
1322 )
1323 }
1324 }
1325 Kind::Star => {
1326 if node_id == NodeId::EPS {
1327 TRegexId::BOT
1328 } else {
1329 let left = node_id.left(self);
1330 let r_decr_leaf = self.mk_leaf(node_id);
1331 let r_der = self.der(left, mask)?;
1332 let this = &mut *self;
1333 this.mk_binary(
1334 r_der,
1335 r_decr_leaf,
1336 &mut (|b, left, right| b.mk_concat(left, right)),
1337 )
1338 }
1339 }
1340 Kind::Lookbehind => {
1341 let lb_prev_der = {
1342 let lb_prev = self.get_lookbehind_prev(node_id);
1343 if lb_prev == NodeId::MISSING {
1344 TRegexId::MISSING
1345 } else {
1346 self.der(lb_prev, mask)?
1347 }
1348 };
1349 let lb_inner = self.get_lookbehind_inner(node_id);
1350 let lb_inner_der = self.der(lb_inner, mask)?;
1351 {
1352 let this = &mut *self;
1353 this.mk_binary_result(
1354 lb_inner_der,
1355 lb_prev_der,
1356 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1357 )?
1358 }
1359 }
1360 Kind::Lookahead => {
1361 let la_tail = self.get_lookahead_tail(node_id);
1362 let la_body = node_id.left(self);
1363 let rel = self.get_lookahead_rel(node_id);
1364
1365 if self.is_nullable(la_body, mask) {
1366 let right = node_id.right(self).missing_to_eps();
1368 let rder = self.der(right, mask).clone();
1369 return rder;
1370 }
1371
1372 if rel == u32::MAX {
1373 let la_body_der = self.der(la_body, mask)?;
1374 if la_tail.is_kind(self, Kind::Pred) {
1375 let transitioned =
1376 self.transition_term(la_body_der, la_tail.pred_tset(self));
1377 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1378 let concated = self.mk_concat(la_tail, new_la);
1379 return self.der(concated, mask);
1380 }
1381 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1382 let left = la_tail.left(self);
1383 let tset = left.pred_tset(self);
1384 let transitioned = self.transition_term(la_body_der, tset);
1385 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1386 let tail_right = la_tail.right(self);
1387 let concated = self.mk_concat(new_la, tail_right);
1388 let concated = self.mk_concat(left, concated);
1389 return self.der(concated, mask);
1390 }
1391 }
1392
1393 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1394 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1395 let concated = self.mk_concat(la_body, nulls_mask);
1396 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1397 let non_nulled = self.mk_non_nullable_safe(la_tail);
1398 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1399 let new_union = self.mk_union(concated_look, new_look);
1400 return self.der(new_union, mask);
1401 }
1402
1403 let la_tail_der = if la_tail == NodeId::MISSING {
1404 TRegexId::MISSING
1405 } else {
1406 if self.is_nullable(la_tail, mask) {
1407 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1408 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1409 let la_union = self.mk_union(la_tail, nulls_la);
1410 self.der(la_union, mask)?
1411 } else {
1412 self.der(la_tail, mask)?
1413 }
1414 };
1415
1416 let la_body_der = self.der(la_body, mask)?;
1417
1418 if rel != u32::MAX && rel > self.lookahead_context_max {
1419 return Err(ResharpError::AnchorLimit);
1420 }
1421
1422 let la = {
1423 let this = &mut *self;
1424 let rel = helpers::incr_rel(rel);
1425 this.mk_binary(
1426 la_body_der,
1427 la_tail_der,
1428 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1429 )
1430 };
1431
1432 if rel != u32::MAX
1433 && la_tail_der != TRegexId::MISSING
1434 && self.is_nullable(la_tail, mask)
1435 {
1436 let look_only = {
1437 let this = &mut *self;
1438 let right = TRegexId::MISSING;
1439 let rel = helpers::incr_rel(rel);
1440 this.mk_binary(
1441 la_body_der,
1442 right,
1443 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1444 )
1445 };
1446 {
1447 let this = &mut *self;
1448 this.mk_binary(
1449 look_only,
1450 la,
1451 &mut (|b, left, right| b.mk_union(left, right)),
1452 )
1453 }
1454 } else {
1455 la
1456 }
1457 }
1458 Kind::Counted => {
1459 let body = node_id.left(self);
1460 let chain = node_id.right(self);
1461 let packed = self.get_extra(node_id);
1462 let step = (packed & 0xFFFF) as u16;
1463 let best = (packed >> 16) as u16;
1464
1465 let mid_best = if self.is_nullable(body, mask) && step >= best {
1466 step
1467 } else {
1468 best
1469 };
1470
1471 let body_der = self.der(body, mask)?;
1472 let new_step = step.saturating_add(1);
1473 self.mk_unary(body_der, &mut |b, new_body| {
1474 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1475 new_step
1476 } else {
1477 mid_best
1478 };
1479 let packed = (final_best as u32) << 16 | new_step as u32;
1480 b.mk_counted(new_body, chain, packed)
1481 })
1482 }
1483 Kind::Begin | Kind::End => TRegexId::BOT,
1484 Kind::Pred => {
1485 let psi = node_id.pred_tset(self);
1486 if psi == TSetId::EMPTY {
1487 TRegexId::BOT
1488 } else {
1489 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1490 }
1491 }
1492 };
1493
1494 self.cache_der(node_id, result, mask);
1498 Ok(result)
1499 }
1500
1501 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1502 debug_assert!(meta_id != MetadataId::MISSING);
1503 match self.metadata.get_mut(node_id.0 as usize) {
1504 Some(v) => *v = meta_id,
1505 None => {
1506 self.metadata
1507 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1508 self.metadata[node_id.0 as usize] = meta_id;
1509 }
1510 }
1511 }
1512
1513 fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1514 debug_assert!(reversed_id != NodeId::MISSING);
1515 match self.reversed.get_mut(node_id.0 as usize) {
1516 Some(v) => *v = reversed_id,
1517 None => {
1518 self.reversed
1519 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1520 self.reversed[node_id.0 as usize] = reversed_id;
1521 }
1522 }
1523 return reversed_id;
1524 }
1525
1526 fn init(&mut self, inst: NodeKey) -> NodeId {
1527 self.num_created += 1;
1528 let node_id = NodeId(self.num_created);
1529 self.index.insert(inst.clone(), node_id);
1530 match inst.kind {
1531 Kind::Pred => {
1532 let meta_id = self.mb.get_meta_id(Metadata {
1533 flags: MetaFlags::ZERO,
1534 nulls: NullsId::EMPTY,
1535 });
1536 self.init_metadata(node_id, meta_id);
1537 }
1538 Kind::Begin => {
1539 let meta_id = self.mb.get_meta_id(Metadata {
1540 flags: MetaFlags::with_nullability(
1541 Nullability::BEGIN,
1542 MetaFlags::CONTAINS_ANCHORS,
1543 ),
1544 nulls: NullsId::BEGIN0,
1545 });
1546 self.init_metadata(node_id, meta_id);
1547 }
1548 Kind::End => {
1549 let meta_id = self.mb.get_meta_id(Metadata {
1550 flags: MetaFlags::with_nullability(
1551 Nullability::END,
1552 MetaFlags::CONTAINS_ANCHORS,
1553 ),
1554 nulls: NullsId::END0,
1555 });
1556 self.init_metadata(node_id, meta_id);
1557 }
1558 Kind::Inter => {
1559 let m1 = self.get_node_meta_id(inst.left);
1560 let m2 = self.get_node_meta_id(inst.right);
1561 let meta_id = {
1562 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1563 let mask_l = inst.left.nullability(self);
1564 let mask_r = inst.right.nullability(self);
1565 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1566 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1567 nulls = self.mb.nb.and_mask(nulls, mask_l);
1568 nulls = self.mb.nb.and_mask(nulls, mask_r);
1569 let new_meta = Metadata {
1570 flags: self.mb.flags_inter(m1, m2),
1571 nulls,
1572 };
1573 self.mb.get_meta_id(new_meta)
1574 };
1575 self.init_metadata(node_id, meta_id);
1576 }
1577 Kind::Union => {
1578 let m1 = self.get_node_meta_id(inst.left);
1579 let m2 = self.get_node_meta_id(inst.right);
1580 let meta_id = {
1581 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1582 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1583 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1584 let new_meta = Metadata {
1585 flags: self.mb.flags_union(m1, m2),
1586 nulls,
1587 };
1588 self.mb.get_meta_id(new_meta)
1589 };
1590 self.init_metadata(node_id, meta_id);
1591 }
1592 Kind::Concat => {
1593 let flags = self.mb.flags_concat(
1594 self.get_node_meta_id(inst.left),
1595 self.get_node_meta_id(inst.right),
1596 );
1597
1598 let right_nullability = inst.right.nullability(self);
1599 let left_nullability = inst.left.nullability(self);
1600 let nulls_left = self.get_nulls_id(inst.left);
1601 let nulls_right = self.get_nulls_id(inst.right);
1602 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1603 let mask = right_nullability.and(left_nullability);
1604 nulls = self.mb.nb.and_mask(nulls, mask);
1605
1606 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1607 self.init_metadata(node_id, new_id);
1608 }
1609 Kind::Star => {
1610 let left_nulls = self.get_nulls_id(inst.left);
1611 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1612 let meta_id = self.mb.get_meta_id(Metadata {
1613 flags: self
1614 .mb
1615 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1616 nulls,
1617 });
1618 self.init_metadata(node_id, meta_id);
1619 }
1620 Kind::Compl => {
1621 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1622 let meta_id = self.mb.get_meta_id(Metadata {
1623 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1624 nulls,
1625 });
1626 self.init_metadata(node_id, meta_id);
1627 }
1628 Kind::Lookbehind => {
1629 let mut left_nullability = self.nullability(inst.left);
1630 let mut contains_flags = self.get_flags_contains(inst.left);
1631 let nulls = if inst.right.is_missing() {
1632 self.get_nulls_id(inst.left)
1633 } else {
1634 let right_nullability = self.nullability(inst.right);
1635 let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1636 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1637 left_nullability = left_nullability.and(right_nullability);
1638 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1639 self.mb.nb.and_id(nulls_left, nulls_right)
1640 };
1641
1642 let meta_id = self.mb.get_meta_id(Metadata {
1643 flags: MetaFlags::with_nullability(
1644 left_nullability,
1645 contains_flags.or(MetaFlags::CONTAINS_LOOKBEHIND),
1646 ),
1647 nulls,
1648 });
1649 self.init_metadata(node_id, meta_id);
1650 }
1651 Kind::Lookahead => {
1652 let mut nulls = self.get_nulls_id(inst.left);
1653 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1655 let left_nullability = inst.left.nullability(self);
1656 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1657
1658 nulls = self.mb.nb.or_id(nulls, nulls_right);
1659
1660 let la_inner = inst.left;
1661 let la_tail = inst.right;
1662 let null = self
1663 .get_meta_flags(la_inner)
1664 .nullability()
1665 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1666 let contains_flags = self
1667 .get_flags_contains(la_inner)
1668 .or(self.get_flags_contains(la_tail));
1669
1670 let meta_id = self.mb.get_meta_id(Metadata {
1671 flags: MetaFlags::with_nullability(
1672 null,
1673 contains_flags.or(MetaFlags::CONTAINS_LOOKAHEAD),
1674 ),
1675 nulls,
1676 });
1677 self.init_metadata(node_id, meta_id);
1678 }
1679 Kind::Counted => {
1680 let best = inst.extra >> 16;
1681 let (null, nulls) = if best > 0 {
1682 let mut ns = BTreeSet::new();
1683 ns.insert(NullState::new(Nullability::CENTER, best));
1684 (Nullability::CENTER, self.mb.nb.get_id(ns))
1685 } else {
1686 (Nullability::NEVER, NullsId::EMPTY)
1687 };
1688 let meta_id = self.mb.get_meta_id(Metadata {
1689 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1690 nulls,
1691 });
1692 self.init_metadata(node_id, meta_id);
1693 }
1694 }
1695
1696 self.array.push(inst);
1697
1698 if let Some(rw) = self.post_init_simplify(node_id) {
1699 self.override_as(node_id, rw)
1700 } else {
1701 node_id
1702 }
1703 }
1704
1705 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1706 match self.get_kind(node_id) {
1707 Kind::Concat => {
1708 let lhs = node_id.left(self);
1709 let rhs = node_id.right(self);
1710 if lhs.is_pred_star(self).is_some() {
1711 if let Some(opttail) = rhs.is_opt_v(self) {
1712 if let Some(true) = self.subsumes(lhs, opttail) {
1713 return Some(lhs);
1714 }
1715 }
1716 }
1717 }
1718 Kind::Union => {
1719 let lhs = node_id.left(self);
1720 let rhs = node_id.right(self);
1721 let mut subsumed = false;
1722 rhs.iter_union_while(self, &mut |b, branch| {
1723 if b.nullable_subsumes(branch, lhs) {
1724 subsumed = true;
1725 }
1726 !subsumed
1727 });
1728 if subsumed {
1729 return Some(rhs);
1730 }
1731 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1732 return Some(rhs);
1733 }
1734 }
1735 _ => {}
1736 }
1737
1738 None
1739 }
1740
1741 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1743 if self.get_kind(lhs) != Kind::Union {
1744 return false; }
1746 let mut rhs_branches = Vec::new();
1747 let mut curr = rhs;
1748 while self.get_kind(curr) == Kind::Union {
1749 rhs_branches.push(self.get_left(curr));
1750 curr = self.get_right(curr);
1751 }
1752 rhs_branches.push(curr);
1753
1754 curr = lhs;
1755 while self.get_kind(curr) == Kind::Union {
1756 if !rhs_branches.contains(&self.get_left(curr)) {
1757 return false;
1758 }
1759 curr = self.get_right(curr);
1760 }
1761 rhs_branches.contains(&curr)
1762 }
1763
1764 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1766 if node == target {
1767 return true;
1768 }
1769 match self.get_kind(node) {
1770 Kind::Union => {
1771 self.nullable_subsumes(self.get_left(node), target)
1772 || self.nullable_subsumes(self.get_right(node), target)
1773 }
1774 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1775 self.nullable_subsumes(self.get_right(node), target)
1776 }
1777 _ => false,
1778 }
1779 }
1780
1781 pub fn num_nodes(&self) -> u32 {
1782 self.num_created
1783 }
1784
1785 pub fn tree_size(&self, root: NodeId, limit: usize) -> usize {
1786 let mut seen: FxHashMap<NodeId, ()> = FxHashMap::default();
1787 let mut stack: Vec<NodeId> = vec![root];
1788 while let Some(n) = stack.pop() {
1789 if n == NodeId::MISSING
1790 || n == NodeId::BOT
1791 || n == NodeId::EPS
1792 || n == NodeId::TS
1793 || n == NodeId::BEGIN
1794 || n == NodeId::END
1795 {
1796 continue;
1797 }
1798 if seen.insert(n, ()).is_some() {
1799 continue;
1800 }
1801 if seen.len() >= limit {
1802 return limit;
1803 }
1804 stack.push(self.get_left(n));
1805 stack.push(self.get_right(n));
1806 }
1807 seen.len()
1808 }
1809 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1810 match self.index.get(&inst) {
1811 Some(&id) => id,
1812 None => self.init(inst),
1813 }
1814 }
1815 #[inline]
1816 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1817 self.index.get(inst)
1818 }
1819
1820 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1821 self.index.insert(key, subsumed);
1822 subsumed
1823 }
1824
1825 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1826 let key = &self.array[key.0 as usize];
1827 let entry = self.index.get_mut(key).unwrap();
1828 *entry = subsumed;
1829 subsumed
1830 }
1831
1832 #[inline]
1833 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1834 self.array[node_id.0 as usize].left
1835 }
1836
1837 #[inline]
1838 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1839 self.array[node_id.0 as usize].right
1840 }
1841
1842 #[inline]
1843 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1844 self.array[node_id.0 as usize].extra
1845 }
1846
1847 #[inline]
1848 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1849 debug_assert!(node_id.is_concat(self));
1850 let mut curr = node_id;
1851 while curr.is_concat(self) {
1852 curr = curr.right(self);
1853 }
1854 curr
1855 }
1856
1857 fn has_trailing_la(&self, node: NodeId) -> bool {
1858 let end = match self.get_kind(node) {
1859 Kind::Concat => self.get_concat_end(node),
1860 Kind::Lookahead => node,
1861 _ => return false,
1862 };
1863 end.is_lookahead(self) && end.right(self).is_missing()
1864 }
1865
1866 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1867 if node.is_lookahead(self) {
1868 return (NodeId::EPS, node);
1869 }
1870 debug_assert!(node.is_concat(self));
1871 let right = node.right(self);
1872 if !right.is_concat(self) {
1873 return (node.left(self), right);
1874 }
1875 let (stripped, la) = self.strip_trailing_la(right);
1876 (self.mk_concat(node.left(self), stripped), la)
1877 }
1878 #[inline]
1879 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1880 debug_assert!(matches!(
1881 self.get_kind(lookahead_node_id),
1882 Kind::Lookahead | Kind::Counted
1883 ));
1884 lookahead_node_id.left(self)
1885 }
1886 #[inline]
1887 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1888 debug_assert!(lookahead_node_id.is_lookahead(self));
1889 lookahead_node_id.right(self)
1890 }
1891 #[inline]
1892 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1893 debug_assert!(
1894 matches!(
1895 self.get_kind(lookahead_node_id),
1896 Kind::Lookahead | Kind::Counted
1897 ),
1898 "not lookahead/counted: {:?}",
1899 self.pp(lookahead_node_id)
1900 );
1901 self.get_extra(lookahead_node_id)
1902 }
1903 #[inline]
1904 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1905 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1906 lookbehind_node_id.left(self)
1907 }
1908 #[inline]
1909 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1910 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1911 lookbehind_node_id.right(self)
1912 }
1913
1914 #[inline]
1915 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1916 debug_assert!(
1917 self.array.len() > node_id.0 as usize,
1918 "array len: {:?}",
1919 node_id
1920 );
1921 self.array[node_id.0 as usize].kind
1922 }
1923
1924 #[inline]
1925 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1926 &self.array[node_id.0 as usize]
1927 }
1928
1929 #[inline]
1930 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1931 self.metadata[node_id.0 as usize]
1932 }
1933
1934 #[inline]
1935 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1936 debug_assert!(node_id.0 != u32::MAX);
1937 let meta_id = self.get_node_meta_id(node_id);
1938 debug_assert!(meta_id != MetadataId::MISSING);
1939 &self.mb.array[meta_id.0 as usize]
1940 }
1941
1942 #[inline]
1943 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1944 if node_id == NodeId::MISSING {
1945 NullsId::EMPTY
1946 } else {
1947 self.get_meta(node_id).nulls
1948 }
1949 }
1950
1951 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1952 self.mb
1953 .nb
1954 .array
1955 .iter()
1956 .map(|set| set.iter().cloned().collect())
1957 .collect()
1958 }
1959
1960 pub fn nulls_count(&self) -> usize {
1961 self.mb.nb.array.len()
1962 }
1963
1964 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1965 self.mb.nb.array[id as usize].iter().cloned().collect()
1966 }
1967
1968 pub fn center_nulls_id(&mut self, nid: NullsId) -> NullsId {
1969 self.mb.nb.and_mask(nid, Nullability::CENTER)
1970 }
1971
1972 #[inline]
1973 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1974 if node_id == NodeId::MISSING {
1975 NullsId::EMPTY
1976 } else {
1977 let nulls = self.get_meta(node_id).nulls;
1978 self.mb.nb.and_mask(nulls, mask)
1979 }
1980 }
1981
1982 #[inline]
1983 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1984 let meta_id = self.get_node_meta_id(node_id);
1985 let meta = &self.mb.array[meta_id.0 as usize];
1986 meta.flags
1987 }
1988
1989 #[inline]
1990 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1991 self.get_meta(node_id).flags.nullability()
1992 }
1993
1994 #[inline]
1995 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1996 let meta_id = self.get_node_meta_id(node_id);
1997 let meta = &self.mb.array[meta_id.0 as usize];
1998 meta.flags.all_contains_flags()
1999 }
2000
2001 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2002 if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2003 return self.strip_lb(node_id.right(self));
2004 }
2005 let result = self.strip_lb_inner(true, node_id)?;
2006 debug_assert!(
2007 !self.contains_lookbehind(result),
2008 "should not contain lookbehind: {:?}",
2009 self.pp(result)
2010 );
2011 Ok(result)
2012 }
2013
2014 fn strip_lb_inner(&mut self, allowed: bool, node_id: NodeId) -> Result<NodeId, ResharpError> {
2015 if !self.contains_lookbehind(node_id) {
2016 return Ok(node_id);
2017 }
2018 if !allowed {
2019 return Result::Err(ResharpError::UnsupportedPattern);
2020 }
2021 if node_id.is_inter(self) {
2022 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2023 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2024 return Ok(self.mk_inter(left, right));
2025 }
2026 if node_id.is_union(self) {
2027 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2028 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2029 return Ok(self.mk_union(left, right));
2030 }
2031 match self.get_kind(node_id) {
2032 Kind::Compl => Result::Err(ResharpError::UnsupportedPattern),
2033 Kind::Concat => {
2034 let left = self.strip_lb_inner(allowed, node_id.left(self))?;
2035 let right = self.strip_lb_inner(allowed, node_id.right(self))?;
2036 Ok(self.mk_concat(left, right))
2037 }
2038 Kind::Lookbehind => {
2039 let prev = self.get_lookbehind_prev(node_id);
2040 if prev != NodeId::MISSING {
2041 self.strip_lb_inner(allowed, prev)
2042 } else {
2043 Ok(NodeId::EPS)
2044 }
2045 }
2046 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => Ok(node_id),
2047 _ => Ok(node_id),
2048 }
2049 }
2050
2051 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
2053 if !self.contains_anchors(node_id) {
2054 return node_id;
2055 }
2056 match self.get_kind(node_id) {
2057 Kind::Begin => NodeId::BOT,
2058 Kind::Concat => {
2059 let left = self.nonbegins(node_id.left(self));
2060 if left == NodeId::BOT {
2061 return NodeId::BOT;
2062 }
2063 self.mk_concat(left, node_id.right(self))
2064 }
2065 Kind::Union => {
2066 let left = self.nonbegins(node_id.left(self));
2067 let right = self.nonbegins(node_id.right(self));
2068 self.mk_union(left, right)
2069 }
2070 _ => node_id,
2071 }
2072 }
2073
2074 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
2075 match self.get_kind(node_id) {
2076 Kind::Concat => {
2077 let head = node_id.left(self);
2078 match self.get_kind(head) {
2079 _ if self.any_nonbegin_nullable(head) => {
2080 self.strip_prefix_safe(node_id.right(self))
2081 }
2082 _ => node_id,
2083 }
2084 }
2085 _ => node_id,
2086 }
2087 }
2088 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
2089 match self.get_kind(node_id) {
2090 Kind::Begin => NodeId::BOT,
2091 Kind::Concat => {
2092 let head = self.prune_begin(node_id.left(self));
2093 let tail = self.prune_begin(node_id.right(self));
2094 self.mk_concat(head, tail)
2095 }
2096 Kind::Lookbehind => {
2097 if !node_id.right(self).is_missing() {
2098 return node_id;
2099 }
2100 let head = self.prune_begin(node_id.left(self));
2101 head
2102 }
2103 Kind::Union => {
2104 let left = self.prune_begin(node_id.left(self));
2105 let right = self.prune_begin(node_id.right(self));
2106 self.mk_union(left, right)
2107 }
2108 _ => node_id,
2109 }
2110 }
2111 pub fn prune_begin_eps(&mut self, node_id: NodeId) -> NodeId {
2112 match self.get_kind(node_id) {
2113 Kind::Begin => NodeId::EPS,
2114 Kind::Concat => {
2115 let head = self.prune_begin_eps(node_id.left(self));
2116 let tail = self.prune_begin_eps(node_id.right(self));
2117 self.mk_concat(head, tail)
2118 }
2119 Kind::Lookbehind => {
2120 if !node_id.right(self).is_missing() {
2121 return node_id;
2122 }
2123 let head = self.prune_begin_eps(node_id.left(self));
2124 head
2125 }
2126 Kind::Union => {
2127 let left = self.prune_begin_eps(node_id.left(self));
2128 let right = self.prune_begin_eps(node_id.right(self));
2129 self.mk_union(left, right)
2130 }
2131 _ => node_id,
2132 }
2133 }
2134
2135 pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2136 if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2137 return Ok(node_id);
2138 }
2139 let result = match self.get_kind(node_id) {
2140 Kind::Concat => {
2141 let left = self.normalize_rev(node_id.left(self))?;
2142 let right = self.normalize_rev(node_id.right(self))?;
2143 self.mk_concat(left, right)
2144 }
2145 Kind::Inter => {
2146 let left = self.normalize_rev(node_id.left(self))?;
2147 let right = self.normalize_rev(node_id.right(self))?;
2148 self.mk_inter(left, right)
2149 }
2150 Kind::Union => {
2151 let left = self.normalize_rev(node_id.left(self))?;
2152 let right = self.normalize_rev(node_id.right(self))?;
2153 self.mk_union(left, right)
2154 }
2155 Kind::Lookbehind => {
2156 let left = self.normalize_rev(node_id.left(self))?;
2157 let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
2158 let lbody_ts = self.mk_concat(NodeId::TS, left);
2159 let ltail_ts = self.mk_concat(NodeId::TS, right);
2160 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2161 as_inter
2162 }
2163 Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2164 return Err(ResharpError::UnsupportedPattern)
2165 }
2166 _ => node_id,
2167 };
2168 Ok(result)
2169 }
2170
2171 pub fn collect_der_targets(
2172 &mut self,
2173 der: TRegexId,
2174 path_set: TSetId,
2175 out: &mut Vec<(NodeId, TSetId)>,
2176 ) {
2177 match *self.get_tregex(der) {
2178 TRegex::Leaf(target) => {
2179 if let Some(entry) = out.iter_mut().find(|(t, _)| *t == target) {
2180 entry.1 = self.solver().or_id(entry.1, path_set);
2181 } else {
2182 out.push((target, path_set));
2183 }
2184 }
2185 TRegex::ITE(cond, then_b, else_b) => {
2186 let then_path = self.solver().and_id(path_set, cond);
2187 self.collect_der_targets(then_b, then_path, out);
2188 let not_cond = self.solver().not_id(cond);
2189 let else_path = self.solver().and_id(path_set, not_cond);
2190 self.collect_der_targets(else_b, else_path, out);
2191 }
2192 }
2193 }
2194
2195 pub fn prefix_ts(&mut self, node_id: NodeId) -> NodeId {
2196 let with_ts = if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2197 node_id
2198 } else {
2199 self.mk_concat(NodeId::TS, node_id)
2200 };
2201 with_ts
2202 }
2203
2204 pub fn ts_rev_start(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2205 let rev = self.reverse(node_id)?;
2206 let rev = self.normalize_rev(rev)?;
2207 let with_ts = self.prefix_ts(rev);
2208 Ok(self.simplify_rev_initial(with_ts))
2209 }
2210
2211 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2212 debug_assert!(node_id != NodeId::MISSING);
2213 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
2214 if *rev != NodeId::MISSING {
2215 return Ok(*rev);
2216 }
2217 }
2218 let rw = match self.get_kind(node_id) {
2219 Kind::End => NodeId::BEGIN,
2220 Kind::Begin => NodeId::END,
2221 Kind::Pred => node_id,
2222 Kind::Concat => {
2223 let left = self.reverse(node_id.left(self))?;
2224 let right = self.reverse(node_id.right(self))?;
2225 self.mk_concat(right, left)
2226 }
2227 Kind::Union => {
2228 let left = self.reverse(node_id.left(self))?;
2229 let right = self.reverse(node_id.right(self))?;
2230 self.mk_union(left, right)
2231 }
2232 Kind::Inter => {
2233 let left = self.reverse(node_id.left(self))?;
2234 let right = self.reverse(node_id.right(self))?;
2235 self.mk_inter(left, right)
2236 }
2237 Kind::Star => {
2238 let body = self.reverse(node_id.left(self))?;
2239 self.mk_star(body)
2240 }
2241 Kind::Compl => {
2242 if self.contains_look(node_id.left(self)) {
2243 return Err(ResharpError::UnsupportedPattern);
2244 }
2245 let body = self.reverse(node_id.left(self))?;
2246 self.mk_compl(body)
2247 }
2248 Kind::Lookbehind => {
2249 let prev = self.get_lookbehind_prev(node_id);
2250 let inner_id = self.get_lookbehind_inner(node_id);
2251 let rev_inner = self.reverse(inner_id)?;
2252 let rev_prev = if prev != NodeId::MISSING {
2253 self.reverse(prev)?
2254 } else {
2255 NodeId::MISSING
2256 };
2257 self.mk_lookahead(rev_inner, rev_prev, 0)
2258 }
2259 Kind::Lookahead => {
2260 let rel = self.get_lookahead_rel(node_id);
2261 if rel == u32::MAX {
2263 let lbody = self.get_lookahead_inner(node_id);
2264 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2265 let rbody = self.reverse(lbody);
2266 let rtail = self.reverse(ltail);
2267 let rev = self.mk_lookbehind(rbody?, rtail?);
2268 return Ok(self.cache_reversed(node_id, rev));
2269 }
2270 if rel != 0 {
2271 return Err(ResharpError::UnsupportedPattern);
2272 }
2273 let tail_node = self.get_lookahead_tail(node_id);
2274 let rev_tail = if tail_node != NodeId::MISSING {
2275 self.reverse(tail_node)?
2276 } else {
2277 NodeId::MISSING
2278 };
2279 let inner_id = self.get_lookahead_inner(node_id);
2280 let rev_inner = self.reverse(inner_id)?;
2281 self.mk_lookbehind(rev_inner, rev_tail)
2282 }
2283 Kind::Counted => {
2284 return Err(ResharpError::UnsupportedPattern);
2285 }
2286 };
2287 self.cache_reversed(node_id, rw);
2288 Ok(rw)
2289 }
2290
2291 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2292 let node = NodeKey {
2293 kind: Kind::Pred,
2294 left: NodeId::MISSING,
2295 right: NodeId::MISSING,
2296 extra: pred.0,
2297 };
2298 self.get_node_id(node)
2299 }
2300
2301 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2302 let key = NodeKey {
2303 kind: Kind::Compl,
2304 left: body,
2305 right: NodeId::MISSING,
2306 extra: u32::MAX,
2307 };
2308 if let Some(id) = self.key_is_created(&key) {
2309 return *id;
2310 }
2311 if body == NodeId::BOT {
2312 return NodeId::TS;
2313 }
2314 if body == NodeId::TS {
2315 return NodeId::BOT;
2316 }
2317
2318 if let Some(contains_body) = body.is_contains(self) {
2319 if contains_body.is_pred(self) {
2320 let pred = contains_body.pred_tset(self);
2321 let notpred = self.mk_pred_not(pred);
2322 let node = self.mk_star(notpred);
2323 return self.init_as(key, node);
2324 } else if contains_body == NodeId::END {
2325 return self.init_as(key, NodeId::BOT);
2326 }
2327 };
2328
2329 if self.get_kind(body) == Kind::Compl {
2330 return self.get_node(body).left;
2331 }
2332
2333 self.get_node_id(key)
2334 }
2335
2336 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2337 let nid = self.get_nulls_id(body);
2338 let nref = self.mb.nb.get_set_ref(nid).clone();
2339 let mut futures = NodeId::BOT;
2340 for n in nref.iter() {
2341 if !n.is_mask_nullable(mask) {
2342 continue;
2343 }
2344
2345 let eff = if n.rel == 0 {
2346 NodeId::EPS
2347 } else {
2348 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2349 };
2350 futures = self.mk_union(futures, eff)
2351 }
2352 futures
2353 }
2354
2355 fn strip_ts_max_len(&self, node: NodeId) -> Option<u32> {
2357 let mut cur = node;
2358 let mut total: u32 = 0;
2359 loop {
2360 if !cur.is_concat(self) {
2361 return None;
2362 }
2363 let r = cur.right(self);
2364 let (_, lmax) = self.get_min_max_length(cur.left(self));
2365 if lmax == u32::MAX {
2366 return None;
2367 }
2368 total = total.saturating_add(lmax);
2369 if r == NodeId::TS {
2370 return Some(total);
2371 }
2372 cur = r;
2373 }
2374 }
2375
2376 fn peel_head_pred(&self, node: NodeId) -> Option<(TSetId, NodeId)> {
2377 if node.is_pred(self) {
2378 Some((node.pred_tset(self), NodeId::EPS))
2379 } else if node.is_concat(self) && node.left(self).is_pred(self) {
2380 Some((node.left(self).pred_tset(self), node.right(self)))
2381 } else {
2382 None
2383 }
2384 }
2385
2386 fn strip_end_from_la_head(&mut self, node: NodeId) -> NodeId {
2388 let (head, rest) = if node.is_concat(self) {
2389 (node.left(self), node.right(self))
2390 } else {
2391 (node, NodeId::EPS)
2392 };
2393 if !head.is_kind(self, Kind::Union) {
2394 return node;
2395 }
2396 let l = head.left(self);
2397 if !l.is_end() {
2398 return node;
2399 }
2400 let r = head.right(self);
2401 self.mk_concat(r, rest)
2402 }
2403
2404 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2405 if cfg!(feature = "norewrite") {
2406 return None;
2407 }
2408
2409 if tail.is_lookbehind(self) {
2422 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2423 return self
2424 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2425 .ok();
2426 }
2427 if head.is_lookahead(self) {
2428 let la_tail = self.get_lookahead_tail(head);
2429 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2430 let la_body = self.get_lookahead_inner(head);
2431 let la_body = if new_la_tail.is_never_nullable(self) {
2433 self.strip_end_from_la_head(la_body)
2434 } else {
2435 la_body
2436 };
2437 if la_body == NodeId::BOT {
2438 return Some(NodeId::BOT);
2439 }
2440 if let (Some((p_l, body_rest)), Some((p_b, tail_rest))) = (
2441 self.peel_head_pred(la_body),
2442 self.peel_head_pred(new_la_tail),
2443 ) {
2444 let p = self.solver().and_id(p_l, p_b);
2445 let merged = self.mk_pred(p);
2446 if merged == NodeId::BOT {
2447 return Some(NodeId::BOT);
2448 }
2449 let new_la = if body_rest == NodeId::EPS {
2450 NodeId::EPS
2451 } else {
2452 self.mk_lookahead(body_rest, NodeId::MISSING, 0)
2453 };
2454 let after = self.mk_concat(new_la, tail_rest);
2455 return Some(self.mk_concat(merged, after));
2456 }
2457
2458 if la_body.is_concat(self)
2459 && la_body.right(self).is_end()
2460 && la_body.left(self).is_compl(self)
2461 {
2462 let not_p_ts = la_body.left(self); if let Some(p_max) = self.strip_ts_max_len(not_p_ts.left(self)) {
2464 let (tail_min, _) = self.get_min_max_length(new_la_tail);
2465 if p_max <= tail_min {
2466 return Some(self.mk_inter(not_p_ts, new_la_tail));
2467 }
2468 }
2469 }
2470
2471 if new_la_tail.is_center_nullable(self) {
2472 let la_new = self.mk_lookahead_internal(la_body, new_la_tail, u32::MAX);
2473 return Some(la_new);
2474 }
2475 let la_rel = self.get_lookahead_rel(head);
2476 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2477 let tail_rel = self.get_lookahead_rel(new_la_tail);
2478 tail_rel + la_rel
2479 } else {
2480 u32::MAX
2481 };
2482
2483 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2484 }
2485
2486 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2487 return Some(head);
2488 }
2489
2490 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2491 return Some(NodeId::TS);
2492 }
2493
2494 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2495 return Some(NodeId::TS);
2496 }
2497
2498 if head.is_inter(self) && tail == NodeId::TS {
2499 let mut alltopstar = true;
2500 head.iter_inter(
2501 self,
2502 &mut (|b, v| {
2503 alltopstar = b.ends_with_ts(v);
2504 }),
2505 );
2506
2507 if alltopstar {
2508 return Some(head);
2509 }
2510 }
2511
2512 if head.is_star(self) && head == tail {
2513 return Some(head);
2514 }
2515
2516 None
2517 }
2518
2519 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2520 use Kind::*;
2521
2522 if cfg!(feature = "norewrite") {
2523 return None;
2524 }
2525 if left == right {
2526 return Some(left);
2527 }
2528
2529 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2530 return Some(left);
2531 }
2532 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2533 return Some(right);
2534 }
2535
2536 if left.is_inter(self) && right.is_inter(self) {
2537 let mut lconj: Vec<NodeId> = Vec::new();
2538 left.any_inter_component(self, |v| {
2539 lconj.push(v);
2540 false
2541 });
2542 let lconj_initial = lconj.len();
2543 let mut common = NodeId::TS;
2544 let mut r_rest = NodeId::TS;
2545 let mut cur = right;
2546 loop {
2547 let (v, next) = if cur.kind(self) == Inter {
2548 (cur.left(self), Some(cur.right(self)))
2549 } else {
2550 (cur, None)
2551 };
2552 if let Some(pos) = lconj.iter().position(|&x| x == v) {
2553 lconj.swap_remove(pos);
2554 common = self.mk_inter(v, common);
2555 } else {
2556 r_rest = self.mk_inter(v, r_rest);
2557 }
2558 match next {
2559 Some(n) => cur = n,
2560 None => break,
2561 }
2562 }
2563 if lconj.len() < lconj_initial {
2564 let l_rest = lconj
2565 .iter()
2566 .fold(NodeId::TS, |acc, &v| self.mk_inter(v, acc));
2567 let inner_union = self.mk_union(l_rest, r_rest);
2568 return Some(self.mk_inter(common, inner_union));
2569 }
2570 }
2571
2572 if left.is_pred(self) && right.is_pred(self) {
2573 let l = left.pred_tset(self);
2574 let r = right.pred_tset(self);
2575 let solver = self.solver();
2576 let psi = solver.or_id(l, r);
2577 let rewrite = self.mk_pred(psi);
2578 return Some(rewrite);
2579 }
2580
2581 if left == NodeId::EPS && self.nullability(right) == Nullability::ALWAYS {
2582 let right_nulls_id = self.get_nulls_id(right);
2583 if self.mb.nb.array[right_nulls_id.0 as usize]
2584 .iter()
2585 .any(|n| n.rel == 0 && n.mask.has(Nullability::ALWAYS))
2586 {
2587 return Some(right);
2588 }
2589 }
2590
2591 if left.is_lookahead(self) && right.is_lookahead(self) {
2592 let lb = left.left(self);
2593 let lt = left.right(self);
2594 let lrel = left.extra(self);
2595
2596 let rb = right.left(self);
2597 let rt = right.right(self);
2598 let rrel = right.extra(self);
2599
2600 if lrel == rrel && lt.is_missing() && rt.is_missing() {
2601 let unioned = self.mk_union(lb, rb);
2602 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2603 return Some(node);
2604 }
2605 }
2606
2607 if right.is_kind(self, Concat) {
2608 if left == NodeId::END
2609 && right.left(self) == NodeId::END
2610 && self.nullability(right).has(Nullability::END)
2611 {
2612 return Some(right);
2613 }
2614 if left == right.left(self) {
2616 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2617 let rw = self.mk_concat(left, rhs);
2618 return Some(rw);
2619 }
2620 if left.is_kind(self, Concat) {
2621 let head1 = left.left(self);
2622 let head2 = right.left(self);
2623
2624 if head1 == head2 {
2625 let t1 = left.right(self);
2626 let t2 = right.right(self);
2627 if head1 == NodeId::TS {
2629 if t2.has_concat_tail(self, t1) {
2630 return Some(left);
2631 }
2632 if t1.has_concat_tail(self, t2) {
2633 return Some(right);
2634 }
2635 }
2636 let un = self.mk_union(t1, t2);
2637 return Some(self.mk_concat(left.left(self), un));
2638 }
2639
2640 if false {
2644 let end1 = self.get_concat_end(left);
2645 let end2 = self.get_concat_end(right);
2646 if end1 == end2 {
2647 let contains_lookarounds =
2648 left.contains_lookaround(self) || right.contains_lookaround(self);
2649 let flags = left.flags_contains(self).or(right.flags_contains(self));
2650 if !contains_lookarounds && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2651 let rev1 = self.reverse(left).unwrap();
2652 let rev2 = self.reverse(right).unwrap();
2653
2654 let union_rev = self.mk_union(rev1, rev2);
2655 return Some(self.reverse(union_rev).unwrap());
2656 }
2657 }
2658 }
2659 }
2660 if left.is_pred(self) && left == right.left(self) {
2661 let un = self.mk_opt(right.right(self));
2662 let conc = self.mk_concat(left, un);
2663 return Some(conc);
2664 }
2665 }
2666
2667 if left == NodeId::EPS && right.is_plus(self) {
2668 let result = self.mk_star(right.left(self));
2669 return Some(result);
2670 }
2671
2672 if left.is_inter(self) && right.is_inter(self) {
2674 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2675 return Some(rw);
2676 }
2677 }
2678
2679 None
2680 }
2681
2682 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2683 let mut curr = node;
2684 while curr.is_inter(self) {
2685 out.push(self.get_left(curr));
2686 curr = self.get_right(curr);
2687 }
2688 out.push(curr);
2689 }
2690
2691 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2692 let mut curr = node;
2693 let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2694 if has_prefix {
2695 curr = self.get_right(curr);
2696 }
2697 let mut count = 0u32;
2698 let mut pred_set = None;
2699 while curr.is_concat(self) {
2700 let head = self.get_left(curr);
2701 if !head.is_pred(self) {
2702 return None;
2703 }
2704 let ps = head.pred_tset(self);
2705 match pred_set {
2706 None => pred_set = Some(ps),
2707 Some(existing) => {
2708 if existing != ps {
2709 return None;
2710 }
2711 }
2712 }
2713 curr = self.get_right(curr);
2714 count += 1;
2715 }
2716 if count == 0 || !curr.is_star(self) {
2717 return None;
2718 }
2719 Some((has_prefix, pred_set.unwrap(), curr, count))
2720 }
2721
2722 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2723 let mut j = 0;
2724 for &s in sub {
2725 while j < sup.len() && sup[j] < s {
2726 j += 1;
2727 }
2728 if j >= sup.len() || sup[j] != s {
2729 return false;
2730 }
2731 j += 1;
2732 }
2733 true
2734 }
2735
2736 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2737 if !left.is_inter(self) || !right.is_inter(self) {
2738 return None;
2739 }
2740
2741 let mut lc: Vec<NodeId> = Vec::new();
2742 let mut rc: Vec<NodeId> = Vec::new();
2743 self.collect_inter_components(left, &mut lc);
2744 self.collect_inter_components(right, &mut rc);
2745
2746 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2748 return Some(left);
2749 }
2750 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2752 return Some(right);
2753 }
2754
2755 if lc.len() == rc.len() {
2756 let mut diff_idx = None;
2757 for i in 0..lc.len() {
2758 if lc[i] != rc[i] {
2759 if diff_idx.is_some() {
2760 return None;
2761 }
2762 diff_idx = Some(i);
2763 }
2764 }
2765 if let Some(idx) = diff_idx {
2766 let a = lc[idx];
2767 let b = rc[idx];
2768 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2769 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2770 {
2771 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2772 return if ca < cb { Some(left) } else { Some(right) };
2773 }
2774 }
2775 }
2776 }
2777
2778 None
2779 }
2780
2781 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2782 if cfg!(feature = "norewrite") {
2783 return None;
2784 }
2785 if left == right {
2786 return Some(left);
2787 }
2788 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2789 return Some(right);
2790 }
2791 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2792 return Some(left);
2793 }
2794
2795 if left.is_pred(self) && right.is_pred(self) {
2796 let l = left.pred_tset(self);
2797 let r = right.pred_tset(self);
2798 let solver = self.solver();
2799 let psi = solver.and_id(l, r);
2800 let rewrite = self.mk_pred(psi);
2801 return Some(rewrite);
2802 }
2803
2804 for (a, b) in [(left, right), (right, left)] {
2805 if a.is_pred(self) && b.is_compl(self) {
2806 let cbody = b.left(self);
2807 if cbody.is_concat(self)
2808 && cbody.right(self) == NodeId::TS
2809 && cbody.left(self).is_pred(self)
2810 {
2811 let q = a.pred_tset(self);
2812 let p = cbody.left(self).pred_tset(self);
2813 let solver = self.solver();
2814 let notp = solver.not_id(p);
2815 let anded = solver.and_id(q, notp);
2816 return Some(self.mk_pred(anded));
2817 }
2818 }
2819 }
2820
2821 if left.is_concat(self) && right.is_concat(self) {
2822 if left.left(self) == right.left(self) {
2823 if left.left(self).is_pred(self) {
2824 let new_right = self.mk_inter(left.right(self), right.right(self));
2825 return Some(self.mk_concat(left.left(self), new_right));
2826 }
2827 }
2828 }
2829
2830 if right.is_compl(self) && right.left(self) == left {
2831 return Some(NodeId::BOT);
2832 }
2833
2834 if left.is_compl(self) && right.is_compl(self) {
2835 let bodies = self.mk_union(left.left(self), right.left(self));
2836 return Some(self.mk_compl(bodies));
2837 }
2838
2839 if left == NodeId::TOPPLUS {
2840 if right.is_pred_star(self).is_some() {
2841 let newloop = self.mk_plus(right.left(self));
2842 return Some(newloop);
2843 }
2844 if right.is_never_nullable(self) {
2845 return Some(right);
2846 }
2847 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2848 return Some(NodeId::BOT);
2849 }
2850 if right.is_kind(self, Kind::Concat) {}
2851 }
2852
2853 {
2854 let l_is_la = left.is_lookahead(self);
2855 let r_is_la = right.is_lookahead(self);
2856 let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
2857 let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
2858 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2859 let (la_node, other, concat_body) = if r_is_la {
2860 (right, left, NodeId::MISSING)
2861 } else if l_is_la {
2862 (left, right, NodeId::MISSING)
2863 } else if r_is_cla {
2864 (right.left(self), left, right.right(self))
2865 } else {
2866 (left.left(self), right, left.right(self))
2867 };
2868 let la_body = la_node.left(self);
2869 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2870 let inter_right = if concat_body.is_missing() {
2871 la_tail
2872 } else {
2873 self.mk_concat(la_tail, concat_body)
2874 };
2875 let new_body = self.mk_inter(other, inter_right);
2876 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2877 return Some(self.mk_concat(la, new_body));
2878 }
2879 }
2880
2881 if self.get_kind(right) == Kind::Compl {
2882 let compl_body = right.left(self);
2883 if left == compl_body {
2884 return Some(NodeId::BOT);
2885 }
2886 if compl_body.is_concat(self) {
2887 let compl_head = compl_body.left(self);
2888 if compl_body.right(self) == NodeId::TS && compl_head == left {
2889 return Some(NodeId::BOT);
2890 }
2891 }
2892 }
2893
2894 if let Some(pleft) = left.is_pred_star(self) {
2895 if let Some(pright) = right.is_pred_star(self) {
2896 let merged = self.mk_inter(pleft, pright);
2897 return Some(self.mk_star(merged));
2898 }
2899 }
2900
2901 {
2902 let l_is_clb = left.is_concat(self) && left.left(self).is_lookbehind(self);
2903 let r_is_clb = right.is_concat(self) && right.left(self).is_lookbehind(self);
2904 if l_is_clb || r_is_clb {
2905 let (lb, body) = if l_is_clb && r_is_clb {
2906 let lb1 = left.left(self);
2907 let lb2 = right.left(self);
2908 let inner = self.mk_inter(
2909 self.get_lookbehind_inner(lb1),
2910 self.get_lookbehind_inner(lb2),
2911 );
2912 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2913 let body = self.mk_inter(left.right(self), right.right(self));
2914 (lb, body)
2915 } else if l_is_clb {
2916 let lb = left.left(self);
2917 let body = self.mk_inter(left.right(self), right);
2918 (lb, body)
2919 } else {
2920 let lb = right.left(self);
2921 let body = self.mk_inter(left, right.right(self));
2922 (lb, body)
2923 };
2924 return Some(self.mk_concat(lb, body));
2925 }
2926 }
2927
2928 {
2929 let l_has_la = self.has_trailing_la(left);
2930 let r_has_la = self.has_trailing_la(right);
2931 if l_has_la || r_has_la {
2932 let (body, la) = if l_has_la && r_has_la {
2933 let (lbody, l_la) = self.strip_trailing_la(left);
2934 let (rbody, r_la) = self.strip_trailing_la(right);
2935 let inner = self.mk_inter(
2936 self.get_lookahead_inner(l_la),
2937 self.get_lookahead_inner(r_la),
2938 );
2939 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2940 let body = self.mk_inter(lbody, rbody);
2941 (body, la)
2942 } else if l_has_la {
2943 let (lbody, la) = self.strip_trailing_la(left);
2944 let body = self.mk_inter(lbody, right);
2945 (body, la)
2946 } else {
2947 let (rbody, la) = self.strip_trailing_la(right);
2948 let body = self.mk_inter(left, rbody);
2949 (body, la)
2950 };
2951 return Some(self.mk_concat(body, la));
2952 }
2953 }
2954
2955 None
2956 }
2957
2958 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2959 if cfg!(feature = "norewrite") {
2960 return None;
2961 }
2962 debug_assert!(self.get_kind(right_union) == Kind::Union);
2963
2964 let mut rewritten = None;
2965 right_union.iter_union_while(
2966 self,
2967 &mut (|b, v| {
2968 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2969 rewritten = Some((v, rw));
2970 false
2971 } else {
2972 true
2973 }
2974 }),
2975 );
2976
2977 if let Some(rw) = rewritten {
2978 let mut new_union = NodeId::BOT;
2979 right_union.iter_union(
2980 self,
2981 &mut (|b, v| {
2982 if v == rw.0 {
2983 new_union = b.mk_union(rw.1, new_union)
2984 } else {
2985 new_union = b.mk_union(v, new_union)
2986 }
2987 }),
2988 );
2989 return Some(new_union);
2990 };
2991
2992 None
2993 }
2994
2995 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2996 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2997 debug_assert!(tail != NodeId::MISSING);
2998 let key = NodeKey {
2999 kind: Kind::Concat,
3000 left: head,
3001 right: tail,
3002 extra: u32::MAX,
3003 };
3004 if let Some(id) = self.key_is_created(&key) {
3005 return *id;
3006 }
3007
3008 if head == NodeId::BOT || tail == NodeId::BOT {
3009 return NodeId::BOT;
3010 }
3011 if head == NodeId::EPS {
3012 return tail;
3013 }
3014 if tail == NodeId::EPS {
3015 return head;
3016 }
3017
3018 if head.is_kind(self, Kind::Concat) {
3020 let left = head.left(self);
3021 let newright = self.mk_concat(head.right(self), tail);
3022 let updated = self.mk_concat(left, newright);
3023 return self.init_as(key, updated);
3024 }
3025
3026 if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
3027 return NodeId::BOT;
3028 }
3029
3030 if tail.is_concat(self) {
3031 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
3032 let upd = self.mk_concat(rw, tail.right(self));
3033 return self.init_as(key, upd);
3034 }
3035 }
3036
3037 if let Some(new) = self.attempt_rw_concat_2(head, tail) {
3038 return self.init_as(key, new);
3039 }
3040
3041 match (self.get_kind(head), self.get_kind(tail)) {
3042 (Kind::Star, Kind::Concat) if head.is_star(self) => {
3044 let rl = tail.left(self);
3045 if head == rl {
3046 return self.init_as(key, tail);
3047 }
3048 }
3049 (_, Kind::Concat) => {
3051 let curr = head;
3052 let h2 = self.mk_concat(curr, tail.left(self));
3053 let tr = tail.right(self);
3054 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
3055 return self.init_as(key, new);
3056 }
3057 }
3058 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
3059 return self.init_as(key, tail);
3060 }
3061 _ => {}
3062 }
3063
3064 self.init(key)
3065 }
3066
3067 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
3068 let lb_body = {
3070 match self.starts_with_ts(lb_body) {
3071 true => lb_body,
3072 false => self.mk_concat(NodeId::TS, lb_body),
3073 }
3074 };
3075 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
3077 }
3078
3079 fn mk_lookbehind_internal(
3080 &mut self,
3081 lb_body: NodeId,
3082 lb_prev: NodeId,
3083 ) -> Result<NodeId, ResharpError> {
3084 debug_assert!(lb_body != NodeId::MISSING);
3085 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
3086 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
3087 return Ok(NodeId::BOT);
3088 }
3089 if lb_body == NodeId::TS {
3090 return Ok(if lb_prev == NodeId::MISSING {
3091 NodeId::EPS
3092 } else {
3093 lb_prev
3094 });
3095 }
3096 if lb_body == NodeId::EPS {
3097 match lb_prev {
3098 NodeId::MISSING => return Ok(NodeId::EPS),
3099 NodeId::EPS => return Ok(NodeId::EPS),
3100 _ => return Ok(lb_prev),
3101 }
3102 }
3103
3104 let key = NodeKey {
3105 kind: Kind::Lookbehind,
3106 left: lb_body,
3107 right: lb_prev,
3108 extra: u32::MAX,
3109 };
3110 match self.key_is_created(&key) {
3111 Some(id) => Ok(*id),
3112 None => {
3113 if lb_prev == NodeId::TS {
3114 return Ok(self.mk_concat(lb_prev, lb_body));
3115 }
3116
3117 Ok(self.init(key))
3118 }
3119 }
3120 }
3121
3122 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3123 let la_body = if la_tail.is_missing() && rel == 0 {
3124 self.flatten_la_body(la_body)
3125 } else {
3126 la_body
3127 };
3128 let la_body = {
3130 match self.ends_with_ts(la_body) {
3131 true => la_body,
3132 false => self.mk_concat(la_body, NodeId::TS),
3133 }
3134 };
3135 let rel = if NodeId::MISSING == la_tail {
3136 rel
3137 } else {
3138 match la_tail.is_center_nullable(self) {
3139 false => u32::MAX,
3140 true => rel,
3141 }
3142 };
3143
3144 self.mk_lookahead_internal(la_body, la_tail, rel)
3145 }
3146
3147 fn flatten_la_body(&mut self, node: NodeId) -> NodeId {
3148 if !self
3149 .get_meta_flags(node)
3150 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
3151 {
3152 return node;
3153 }
3154 match self.get_kind(node) {
3155 Kind::Lookahead
3156 if self.get_lookahead_tail(node).is_missing()
3157 && self.get_lookahead_rel(node) == 0 =>
3158 {
3159 let inner = self.get_lookahead_inner(node);
3160 let inner = self.strip_trailing_ts(inner);
3161 self.flatten_la_body(inner)
3162 }
3163 Kind::Union => {
3164 let l = self.flatten_la_body(node.left(self));
3165 let r = self.flatten_la_body(node.right(self));
3166 self.mk_union(l, r)
3167 }
3168 _ => node,
3169 }
3170 }
3171
3172 fn strip_trailing_ts(&self, node: NodeId) -> NodeId {
3173 if node.is_concat(self) && node.right(self) == NodeId::TS {
3174 node.left(self)
3175 } else {
3176 node
3177 }
3178 }
3179
3180 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3182 let key = NodeKey {
3183 kind: Kind::Lookahead,
3184 left: la_body,
3185 right: la_tail,
3186 extra: rel,
3187 };
3188 if let Some(id) = self.key_is_created(&key) {
3189 return *id;
3190 }
3191 if la_body == NodeId::TS {
3192 if rel == 0 {
3193 return la_tail.missing_to_eps();
3194 } else {
3195 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
3196 }
3197 }
3198 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
3199 return NodeId::BOT;
3200 }
3201 if la_tail.is_missing() && rel == u32::MAX {
3202 return NodeId::BOT;
3203 }
3204
3205 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
3206 return la_body;
3207 }
3208
3209 if la_tail == NodeId::TS {
3210 if rel == 0 || rel == u32::MAX {
3211 return self.mk_concat(la_body, NodeId::TS);
3212 } else if rel == u32::MAX {
3213 return self.mk_begins_with(la_body);
3214 }
3215 }
3216
3217 if rel == u32::MAX {
3218 if la_tail.is_missing() {
3219 return NodeId::BOT;
3220 }
3221
3222 if self.is_always_nullable(la_body) {
3223 return la_tail.missing_to_eps();
3224 }
3225
3226 if la_tail != NodeId::MISSING {
3227 match self.get_kind(la_tail) {
3228 _ => {
3229 if la_body.is_compl_plus_end(self) {
3230 let minlen = self.get_min_length_only(la_tail);
3231 if minlen >= 1 {
3232 return NodeId::BOT;
3233 }
3234 }
3235 }
3236 }
3237 }
3238 }
3239
3240 if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
3241 let la_body2 = self.get_lookahead_inner(la_tail);
3242 let body1_ts = self.mk_concat(la_body, NodeId::TS);
3243 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
3244 let new_la_body = self.mk_inter(body1_ts, body2_ts);
3245 let new_la_rel = self.get_lookahead_rel(la_tail);
3246 let new_la_tail = self.get_lookahead_tail(la_tail);
3247 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
3248 }
3249
3250 if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
3251 let la_body_right = la_body.right(self);
3252 if self.is_always_nullable(la_body_right) {
3253 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
3254 }
3255 let bodyright = la_body.right(self);
3256 if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
3257 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
3258 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
3259 }
3260 }
3261
3262 if la_tail != NodeId::MISSING {
3263 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
3264 let lpred = la_body.left(self);
3265 if lpred.is_pred(self) {
3266 let l = lpred.pred_tset(self);
3267 let r = la_tail.pred_tset(self);
3268 let psi_and = self.solver().and_id(l, r);
3269 let rewrite = self.mk_pred(psi_and);
3270 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
3271 let new_right =
3272 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
3273 return self.mk_concat(rewrite, new_right);
3274 }
3275 }
3276 }
3277
3278 self.get_node_id(key)
3279 }
3280
3281 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
3282 let has_match = (packed >> 16) > 0;
3283 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
3284 return NodeId::BOT;
3285 }
3286 debug_assert!(
3287 body == NodeId::BOT || !self.is_infinite(body),
3288 "Counted body must have finite max length"
3289 );
3290 let chain = self.prune_counted_chain(body, chain);
3291 let key = NodeKey {
3292 kind: Kind::Counted,
3293 left: body,
3294 right: chain,
3295 extra: packed,
3296 };
3297 if let Some(id) = self.key_is_created(&key) {
3298 return *id;
3299 }
3300 self.get_node_id(key)
3301 }
3302
3303 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
3304 if chain == NodeId::MISSING || body == NodeId::BOT {
3305 return chain;
3306 }
3307 if self.nullability(body) != Nullability::NEVER {
3308 return NodeId::MISSING;
3309 }
3310 let chain_body = chain.left(self);
3311 if chain_body == NodeId::BOT {
3312 return chain;
3313 }
3314 let not_begins = self.mk_not_begins_with(body);
3315 let inter = self.mk_inter(chain_body, not_begins);
3316 let is_empty = inter == NodeId::BOT;
3317 if is_empty {
3318 self.prune_counted_chain(body, chain.right(self))
3319 } else {
3320 chain
3321 }
3322 }
3323
3324 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
3325 let neg_inner = self.mk_concat(body, NodeId::TS);
3326 let neg_part = self.mk_compl(neg_inner);
3327 let conc = self.mk_concat(neg_part, NodeId::END);
3328 self.mk_lookahead(conc, NodeId::MISSING, rel)
3329 }
3330
3331 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
3332 match self.get_node(body).kind {
3333 Kind::Pred => {
3334 let psi = body.pred_tset(self);
3335 let negated = self.mk_pred_not(psi);
3336 let union = self.mk_union(NodeId::BEGIN, negated);
3337 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3338 }
3339 _ => {
3340 let uc = crate::unicode_classes::utf8_char(self);
3343 let neg = self.mk_compl(body);
3344 let negated = self.mk_inter(neg, uc);
3345 let union = self.mk_union(NodeId::BEGIN, negated);
3346 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3347 }
3348 }
3349 }
3350
3351 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
3352 debug_assert!(left != NodeId::MISSING);
3353 debug_assert!(right != NodeId::MISSING);
3354 if left > right {
3355 return self.mk_union(right, left);
3356 }
3357 let key = NodeKey {
3358 kind: Kind::Union,
3359 left,
3360 right,
3361 extra: u32::MAX,
3362 };
3363 if let Some(id) = self.key_is_created(&key) {
3364 return *id;
3365 }
3366 if left == right {
3367 return self.init_as(key, left);
3368 }
3369 if left == NodeId::BOT {
3370 return self.init_as(key, right);
3371 }
3372 if right == NodeId::BOT {
3373 return self.init_as(key, left);
3374 }
3375 if right == NodeId::TS {
3376 return self.init_as(key, right);
3377 }
3378 if left == NodeId::TS {
3379 return self.init_as(key, left);
3380 }
3381 match (self.get_kind(left), self.get_kind(right)) {
3382 (Kind::Union, _) => {
3383 self.iter_unions_b(left, &mut |b, v| {
3384 b.temp_vec.push(v);
3385 });
3386 self.iter_unions_b(right, &mut |b, v| {
3387 b.temp_vec.push(v);
3388 });
3389 self.temp_vec.sort();
3390 let tree = self.temp_vec.clone();
3391 self.temp_vec.clear();
3392 let newnode = tree
3393 .iter()
3394 .rev()
3395 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3396 return self.init_as(key, newnode);
3397 }
3398 (_, Kind::Union) => {
3399 let rleft = right.left(self);
3400 if left > rleft {
3402 self.iter_unions_b(left, &mut |b, v| {
3403 b.temp_vec.push(v);
3404 });
3405 self.iter_unions_b(right, &mut |b, v| {
3406 b.temp_vec.push(v);
3407 });
3408 self.temp_vec.sort();
3409 let tree = self.temp_vec.clone();
3410 self.temp_vec.clear();
3411 let newnode = tree
3412 .iter()
3413 .rev()
3414 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3415 return self.init_as(key, newnode);
3416 } else {
3417 if let Some(rw) = self.attempt_rw_unions(left, right) {
3418 return self.init_as(key, rw);
3419 }
3420 }
3421 }
3422 _ => {}
3423 }
3424
3425 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3426 return self.init_as(key, rw);
3427 }
3428 self.init(key)
3429 }
3430
3431 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3432 debug_assert!(left_id != NodeId::MISSING);
3433 debug_assert!(right_id != NodeId::MISSING);
3434 if left_id == right_id {
3435 return left_id;
3436 }
3437 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3438 return NodeId::BOT;
3439 }
3440 if left_id == NodeId::TS {
3441 return right_id;
3442 }
3443 if right_id == NodeId::TS {
3444 return left_id;
3445 }
3446 if left_id > right_id {
3447 return self.mk_inter(right_id, left_id);
3448 }
3449 let key = NodeKey {
3450 kind: Kind::Inter,
3451 left: left_id,
3452 right: right_id,
3453 extra: u32::MAX,
3454 };
3455 if let Some(id) = self.key_is_created(&key) {
3456 return *id;
3457 }
3458
3459 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3460 return self.init_as(key, rw);
3461 }
3462
3463 self.init(key)
3464 }
3465
3466 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3467 let node = NodeKey {
3468 kind,
3469 left: NodeId::MISSING,
3470 right: NodeId::MISSING,
3471 extra: u32::MAX,
3472 };
3473 self.init(node)
3474 }
3475
3476 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3477 let star = self.mk_star(body_id);
3478 self.mk_concat(body_id, star)
3479 }
3480 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3481 let opt = self.mk_opt(body_id);
3482 let mut nodes1 = vec![];
3483 for _ in lower..upper {
3484 nodes1.push(opt);
3485 }
3486 for _ in 0..lower {
3487 nodes1.push(body_id);
3488 }
3489 self.mk_concats(nodes1.into_iter())
3490 }
3491 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3492 self.mk_union(NodeId::EPS, body_id)
3493 }
3494
3495 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3496 let key = NodeKey {
3497 kind: Kind::Star,
3498 left: body_id,
3499 right: NodeId::MISSING,
3500 extra: 0,
3501 };
3502 if let Some(id) = self.key_is_created(&key) {
3503 return *id;
3504 }
3505 if body_id.is_kind(self, Kind::Star) {
3507 return body_id;
3508 }
3509 self.get_node_id(key)
3510 }
3511
3512 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3515 match self.get_kind(node_id) {
3516 Kind::End => Nullability::EMPTYSTRING,
3517 Kind::Begin => Nullability::EMPTYSTRING,
3518 Kind::Pred => Nullability::NEVER,
3519 Kind::Star => Nullability::ALWAYS,
3520 Kind::Inter | Kind::Concat => {
3521 let lnull = self.nullability_emptystring(node_id.left(self));
3522 let rnull = self.nullability_emptystring(node_id.right(self));
3523 lnull.and(rnull) }
3525 Kind::Union => {
3526 let lnull = self.nullability_emptystring(node_id.left(self));
3527 let rnull = self.nullability_emptystring(node_id.right(self));
3528 lnull.or(rnull)
3529 }
3530 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3531 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3532 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3533 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3534 }
3535 }
3536
3537 #[inline(always)]
3538 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3539 self.get_meta(node_id)
3540 .flags
3541 .nullability()
3542 .has(Nullability::CENTER.or(Nullability::END))
3543 }
3544
3545 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3546 self.get_only_nullability(node_id)
3547 }
3548
3549 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3550 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3551 }
3552
3553 pub fn pp(&self, node_id: NodeId) -> String {
3554 let mut s = String::new();
3555 self.ppw(&mut s, node_id).unwrap();
3556 s
3557 }
3558
3559 pub fn factor_suffixes(&mut self, node: NodeId) -> Result<NodeId, ResharpError> {
3562 let mut memo = FxHashMap::default();
3563 self.factor_suffixes_rec(node, &mut memo)
3564 }
3565
3566 fn factor_suffixes_rec(
3567 &mut self,
3568 node: NodeId,
3569 memo: &mut FxHashMap<NodeId, NodeId>,
3570 ) -> Result<NodeId, ResharpError> {
3571 if let Some(&v) = memo.get(&node) {
3572 return Ok(v);
3573 }
3574 let result = match self.get_kind(node) {
3575 Kind::Pred | Kind::Begin | Kind::End => node,
3576 Kind::Star => {
3577 let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3578 self.mk_star(inner)
3579 }
3580 Kind::Compl => {
3581 let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3582 self.mk_compl(inner)
3583 }
3584 Kind::Concat => {
3585 let l = self.factor_suffixes_rec(node.left(self), memo)?;
3586 let r = self.factor_suffixes_rec(node.right(self), memo)?;
3587 self.mk_concat(l, r)
3588 }
3589 Kind::Inter => {
3590 let l = self.factor_suffixes_rec(node.left(self), memo)?;
3591 let r = self.factor_suffixes_rec(node.right(self), memo)?;
3592 self.mk_inter(l, r)
3593 }
3594 Kind::Union => {
3595 let mut comps: Vec<NodeId> = Vec::new();
3596 node.iter_union(self, &mut |_, c| comps.push(c));
3597 let mut factored: Vec<NodeId> = Vec::with_capacity(comps.len());
3598 let mut reversible = true;
3599 for c in &comps {
3600 let f = self.factor_suffixes_rec(*c, memo)?;
3601 if c.contains_lookaround(self)
3602 || c.flags_contains(self).has(MetaFlags::CONTAINS_ANCHORS)
3603 {
3604 reversible = false;
3605 }
3606 factored.push(f);
3607 }
3608 if !reversible || factored.len() < 2 {
3609 let mut acc = factored[0];
3610 for c in factored.into_iter().skip(1) {
3611 acc = self.mk_union(acc, c);
3612 }
3613 acc
3614 } else {
3615 let mut rev_comps: Vec<NodeId> = Vec::with_capacity(factored.len());
3616 for c in factored {
3617 rev_comps.push(self.reverse(c)?);
3618 }
3619 let mut acc = rev_comps[0];
3620 for c in rev_comps.into_iter().skip(1) {
3621 acc = self.mk_union(acc, c);
3622 }
3623 self.reverse(acc)?
3624 }
3625 }
3626 Kind::Lookahead => {
3627 let inner = self.get_lookahead_inner(node);
3628 let tail = self.get_lookahead_tail(node);
3629 let rel = self.get_lookahead_rel(node);
3630 let ni = self.factor_suffixes_rec(inner, memo)?;
3631 let nt = self.factor_suffixes_rec(tail, memo)?;
3632 self.mk_lookahead(ni, nt, rel)
3633 }
3634 Kind::Lookbehind => {
3635 let inner = self.get_lookbehind_inner(node);
3636 let prev = self.get_lookbehind_prev(node);
3637 let ni = self.factor_suffixes_rec(inner, memo)?;
3638 let np = if prev != NodeId::MISSING {
3639 self.factor_suffixes_rec(prev, memo)?
3640 } else {
3641 prev
3642 };
3643 self.mk_lookbehind(ni, np)
3644 }
3645 Kind::Counted => node,
3646 };
3647 memo.insert(node, result);
3648 Ok(result)
3649 }
3650
3651 #[allow(dead_code)]
3652 pub fn pp_nulls(&self, node_id: NodeId) -> String {
3653 let nu = self.get_nulls_id(node_id);
3654 let nr = self.mb.nb.get_set_ref(nu);
3655 let s1 = format!("{:?}", nr);
3656 s1
3657 }
3658
3659 #[allow(dead_code)]
3660 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3661 match self.get_tregex(term_id) {
3662 TRegex::Leaf(node_id) => {
3663 let mut s = String::new();
3664 self.ppw(&mut s, *node_id).unwrap();
3665 s
3666 }
3667 TRegex::ITE(cond, then_id, else_id) => {
3668 format!(
3669 "ITE({},{},{})",
3670 self.solver_ref().pp(*cond),
3671 self.ppt(*then_id),
3672 self.ppt(*else_id)
3673 )
3674 }
3675 }
3676 }
3677
3678 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3679 if cfg!(feature = "graphviz") {
3680 match node_id {
3681 NodeId::MISSING => return write!(s, "MISSING"),
3682 NodeId::BOT => return write!(s, "⊥"),
3683 NodeId::TS => return write!(s, "_*"),
3684 NodeId::TOP => return write!(s, "_"),
3685 NodeId::EPS => return write!(s, "ε"),
3686 _ => {}
3687 }
3688 }
3689
3690 match node_id {
3691 NodeId::MISSING => return write!(s, "MISSING"),
3692 NodeId::BOT => return write!(s, "⊥"),
3693 NodeId::TS => return write!(s, "_*"),
3694 NodeId::TOP => return write!(s, "_"),
3695 NodeId::EPS => return write!(s, ""),
3696 _ => {}
3697 }
3698
3699 match self.get_kind(node_id) {
3700 Kind::End => write!(s, r"\z"),
3701 Kind::Begin => write!(s, r"\A"),
3702 Kind::Pred => {
3703 let psi = node_id.pred_tset(self);
3704 if psi == TSetId::EMPTY {
3705 write!(s, r"⊥")
3706 } else if psi == TSetId::FULL {
3707 write!(s, r"_")
3708 } else {
3709 write!(s, "{}", self.solver_ref().pp(psi))
3710 }
3711 }
3712 Kind::Inter => {
3713 write!(s, "(")?;
3714 self.ppw(s, node_id.left(self))?;
3715 write!(s, "&")?;
3716 let mut curr = node_id.right(self);
3717 while curr.is_inter(self) {
3718 let n = curr.left(self);
3719 self.ppw(s, n)?;
3720 write!(s, "&")?;
3721 curr = curr.right(self);
3722 }
3723 self.ppw(s, curr)?;
3724 write!(s, ")")
3725 }
3726 Kind::Union => {
3727 let left = node_id.left(self);
3728 let right = node_id.right(self);
3729 write!(s, "(")?;
3730 self.ppw(s, left)?;
3731 write!(s, "|")?;
3732 let mut curr = right;
3733 while self.get_kind(curr) == Kind::Union {
3734 let n = curr.left(self);
3735 self.ppw(s, n)?;
3736 write!(s, "|")?;
3737 curr = curr.right(self);
3738 }
3739 self.ppw(s, curr)?;
3740 write!(s, ")")
3741 }
3742 Kind::Concat => {
3743 let left = node_id.left(self);
3744 let right = node_id.right(self);
3745 if right.is_star(self) && right.left(self) == left {
3746 self.ppw(s, left)?;
3747 write!(s, "+")?;
3748 return Ok(());
3749 }
3750 if right.is_concat(self) {
3751 let rl = right.left(self);
3752 if rl.is_star(self) && rl.left(self) == left {
3753 self.ppw(s, left)?;
3754 write!(s, "+")?;
3755 return self.ppw(s, right.right(self));
3756 }
3757 }
3758 if right.is_concat(self) && right.left(self) == left {
3759 let mut num = 1;
3760 let mut right = right;
3761 while right.is_concat(self) && right.left(self) == left {
3762 num += 1;
3763 right = right.right(self);
3764 }
3765 if let Some(inner) = left.is_opt_v(self) {
3767 let mut inner_count = 0;
3768 let mut right2 = right;
3769 while right2.is_concat(self) && right2.left(self) == inner {
3770 inner_count += 1;
3771 right2 = right2.right(self);
3772 }
3773 if right2 == inner {
3774 inner_count += 1;
3775 self.ppw(s, inner)?;
3776 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3777 }
3778 if inner_count > 0 {
3779 self.ppw(s, inner)?;
3780 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3781 return self.ppw(s, right2);
3782 }
3783 }
3784 self.ppw(s, left)?;
3785 if right == left {
3786 num += 1;
3787 return write!(s, "{{{}}}", num);
3788 }
3789 if num <= 3 && left.is_pred(self) {
3790 for _ in 1..num {
3791 self.ppw(s, left)?;
3792 }
3793 return self.ppw(s, right);
3794 } else {
3795 write!(s, "{{{}}}", num)?;
3796 return self.ppw(s, right);
3797 }
3798 }
3799 self.ppw(s, left)?;
3800 self.ppw(s, right)
3801 }
3802 Kind::Star => {
3803 let left = node_id.left(self);
3804 let leftkind = self.get_kind(left);
3805 match leftkind {
3806 Kind::Concat | Kind::Star | Kind::Compl => {
3807 write!(s, "(")?;
3808 self.ppw(s, left)?;
3809 write!(s, ")")?;
3810 }
3811 _ => {
3812 self.ppw(s, left)?;
3813 }
3814 };
3815 write!(s, "*")
3816 }
3817 Kind::Compl => {
3818 write!(s, "~(")?;
3819 self.ppw(s, node_id.left(self))?;
3820 write!(s, ")")
3821 }
3822 Kind::Lookbehind => {
3823 let lbleft = self.get_lookbehind_prev(node_id);
3824 let lbinner = self.get_lookbehind_inner(node_id);
3825 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3826 if lbleft != NodeId::MISSING {
3827 write!(s, "❮")?;
3828 self.ppw(s, lbleft)?;
3829 write!(s, "❯")?;
3830 }
3831
3832 write!(s, "(?<=")?;
3833 self.ppw(s, lbinner)?;
3834 write!(s, ")")
3835 }
3836 Kind::Lookahead => {
3837 let inner = self.get_lookahead_inner(node_id);
3838 write!(s, "(?=")?;
3839 self.ppw(s, inner)?;
3840 write!(s, ")")?;
3841 if self.get_lookahead_rel(node_id) != 0 {
3842 write!(s, "{{")?;
3843 let rel = self.get_lookahead_rel(node_id);
3844 if rel == u32::MAX {
3845 write!(s, "∅")?;
3846 } else {
3847 write!(s, "{}", rel)?;
3848 }
3849 write!(s, "}}")?;
3850 }
3851 if node_id.right(self) == NodeId::MISSING {
3852 Ok(())
3853 } else {
3854 write!(s, "❮")?;
3855 self.ppw(s, node_id.right(self))?;
3856 write!(s, "❯")
3857 }
3858 }
3859 Kind::Counted => {
3860 let body = node_id.left(self);
3861 let packed = self.get_extra(node_id);
3862 let step = packed & 0xFFFF;
3863 let best = packed >> 16;
3864 write!(s, "#(")?;
3865 self.ppw(s, body)?;
3866 write!(s, ")s{}b{}", step, best)
3867 }
3868 }
3869 }
3870
3871 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3872 self.mk_concat(node, NodeId::TS)
3873 }
3874
3875 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3876 let node_ts = self.mk_concat(node, NodeId::TS);
3877 self.mk_compl(node_ts)
3878 }
3879
3880 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3881 let notset = self.solver().not_id(set);
3882 self.mk_pred(notset)
3883 }
3884
3885 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3886 let set_id = self.solver().u8_to_set_id(char);
3887 self.mk_pred(set_id)
3888 }
3889
3890 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3891 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3892 self.mk_pred(rangeset)
3893 }
3894
3895 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3896 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3897 for &(lo, hi) in &ranges[1..] {
3898 let r = self.mk_range_u8(lo, hi);
3899 node = self.mk_union(node, r);
3900 }
3901 node
3902 }
3903
3904 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3905 let mut prefix = Vec::new();
3906 let mut curr = node;
3907 loop {
3908 if curr == NodeId::EPS {
3909 let full = !prefix.is_empty();
3910 return (prefix, full);
3911 }
3912 if curr == NodeId::BOT {
3913 break;
3914 }
3915 if curr.is_pred(self) {
3916 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3917 Some(byte) => {
3918 prefix.push(byte);
3919 return (prefix, true);
3920 }
3921 None => break, }
3923 }
3924 if !curr.is_concat(self) {
3925 break;
3926 }
3927 let left = curr.left(self);
3928 if !left.is_pred(self) {
3929 break;
3930 }
3931 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3932 Some(byte) => prefix.push(byte),
3933 None => break,
3934 }
3935 curr = curr.right(self);
3936 }
3937 (prefix, false)
3938 }
3939
3940 #[allow(dead_code)]
3941 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3942 let mut result = NodeId::EPS;
3943 for byte in raw_str.iter().rev() {
3944 let node = self.mk_u8(*byte);
3945 result = self.mk_concat(node, result);
3946 }
3947 result
3948 }
3949
3950 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3951 let mut result = NodeId::EPS;
3952 for byte in raw_str.bytes().rev() {
3953 let node = self.mk_u8(byte);
3954 result = self.mk_concat(node, result);
3955 }
3956 result
3957 }
3958
3959 pub fn prune_fwd(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3960 self.prune_rec::<true>(node_id, memo)
3961 }
3962
3963 pub fn prune_rev(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3964 self.prune_rec::<false>(node_id, memo)
3965 }
3966
3967 pub fn simplify_fwd_initial(&mut self, node_id: NodeId) -> NodeId {
3968 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3969 match self.get_kind(node_id) {
3970 Kind::Concat => {
3971 let l = self.simplify_fwd_initial_rec(node_id.left(self), &mut memo);
3972 let r = self.simplify_fwd_initial_rec(node_id.right(self), &mut memo);
3973 if r.is_concat(self) {
3974 if r.left(self).is_ts() && !r.right(self).is_lookahead(self) {
3975 if l.is_begin_nullable(self) {
3976 return r;
3977 }
3978 }
3979 }
3980 }
3981 _ => {}
3982 }
3983
3984 self.simplify_fwd_initial_rec(node_id, &mut memo)
3985 }
3986
3987 fn simplify_fwd_initial_rec(
3988 &mut self,
3989 node_id: NodeId,
3990 memo: &mut FxHashMap<NodeId, NodeId>,
3991 ) -> NodeId {
3992 if let Some(&v) = memo.get(&node_id) {
3993 return v;
3994 }
3995
3996 let out = match self.get_kind(node_id) {
3997 Kind::Union => {
3998 let mut parts: Vec<NodeId> = Vec::new();
3999 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4000 for p in &mut parts {
4001 *p = self.simplify_fwd_initial_rec(*p, memo);
4002 }
4003 parts
4004 .iter()
4005 .rev()
4006 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4007 }
4008 Kind::Concat => {
4009 let l = self.simplify_fwd_initial_rec(node_id.left(self), memo);
4010 let r = self.simplify_fwd_initial_rec(node_id.right(self), memo);
4011 if l == NodeId::TS
4012 && r.is_lookahead(self)
4013 && self.get_lookahead_tail(r).is_missing()
4014 && self.get_lookahead_rel(r) == 0
4015 {
4016 let body = self.get_lookahead_inner(r);
4017 if self.is_nullable(body, Nullability::END) {
4018 return NodeId::TS;
4019 }
4020 }
4021
4022 if l != NodeId::TS {
4023 return self.mk_concat(l, r);
4024 }
4025 if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4026 return rewritten;
4027 }
4028 let (head, tail) = if r.is_concat(self) {
4029 (r.left(self), r.right(self))
4030 } else {
4031 (r, NodeId::EPS)
4032 };
4033 if self.get_kind(head) != Kind::Union {
4034 return self.mk_concat(l, r);
4035 }
4036 if !head.left(self).is_begin() {
4037 return self.mk_concat(l, r);
4038 }
4039 let y = head.right(self);
4040 if !y.is_pred(self) {
4041 return self.mk_concat(l, r);
4042 }
4043 if tail.is_concat(self) {
4044 let tl = tail.left(self);
4045 let tr = tail.right(self);
4046 let y_tset = y.pred_tset(self);
4047 let covers_all = if tl == NodeId::TS {
4048 true
4049 } else if let Some(x_pred) = tl.is_pred_star(self) {
4050 let x_ts = x_pred.pred_tset(self);
4051 let combined = self.solver().or_id(x_ts, y_tset);
4052 self.solver().is_full_id(combined)
4053 } else {
4054 false
4055 };
4056 if covers_all {
4057 return self.mk_concat(NodeId::TS, tr);
4058 }
4059 }
4060 self.mk_concat(l, r)
4061 }
4062 _ => node_id,
4063 };
4064 memo.insert(node_id, out);
4065 out
4066 }
4067
4068 pub fn simplify_rev_initial(&mut self, node_id: NodeId) -> NodeId {
4069 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4070 self.simplify_rev_initial_rec(node_id, &mut memo)
4071 }
4072
4073 fn simplify_rev_initial_rec(
4074 &mut self,
4075 node_id: NodeId,
4076 memo: &mut FxHashMap<NodeId, NodeId>,
4077 ) -> NodeId {
4078 if let Some(&v) = memo.get(&node_id) {
4079 return v;
4080 }
4081 let out = match self.get_kind(node_id) {
4082 Kind::Union => {
4083 let mut parts: Vec<NodeId> = Vec::new();
4084 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4085 for p in &mut parts {
4086 *p = self.simplify_rev_initial_rec(*p, memo);
4087 }
4088 parts
4089 .iter()
4090 .rev()
4091 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4092 }
4093 Kind::Concat => {
4094 let l = self.simplify_rev_initial_rec(node_id.left(self), memo);
4095 let r = self.simplify_rev_initial_rec(node_id.right(self), memo);
4096
4097 if l != NodeId::TS {
4098 return self.mk_concat(l, r);
4099 }
4100 if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4101 return rewritten;
4102 }
4103 let (tail1, tail2) = if r.is_concat(self) {
4104 (r.left(self), r.right(self))
4105 } else {
4106 (r, NodeId::EPS)
4107 };
4108
4109 if tail1.is_begin() {
4110 return r;
4111 }
4112
4113 if self.get_kind(tail1) != Kind::Union {
4114 return self.mk_concat(l, r);
4115 }
4116 if !tail1.left(self).is_begin() {
4117 return self.mk_concat(l, r);
4118 }
4119 let y = tail1.right(self);
4120 if !y.is_pred(self) {
4121 return self.mk_concat(l, r);
4122 }
4123 if tail2.is_concat(self) {
4124 let tl = tail2.left(self);
4125 let tr = tail2.right(self);
4126
4127 let y_tset = y.pred_tset(self);
4128 let covers_all = if tl == NodeId::TS {
4129 true
4130 } else if let Some(x_pred) = tl.is_pred_star(self) {
4131 let x_ts = x_pred.pred_tset(self);
4132 let combined = self.solver().or_id(x_ts, y_tset);
4133 self.solver().is_full_id(combined)
4134 } else {
4135 false
4136 };
4137 if covers_all {
4138 return self.mk_concat(NodeId::TS, tr);
4139 }
4140 }
4141 self.mk_concat(l, r)
4142 }
4143 _ => node_id,
4144 };
4145 memo.insert(node_id, out);
4146 out
4147 }
4148
4149 fn try_begin_neg_pred_rewrite(&mut self, r: NodeId) -> Option<NodeId> {
4150 if !r.is_concat(self) {
4151 return None;
4152 }
4153 let begin = r.left(self);
4154 let mid_tail = r.right(self);
4155 if !begin.is_begin() {
4156 return None;
4157 }
4158 if !mid_tail.is_concat(self) {
4159 return None;
4160 }
4161 let neg = mid_tail.left(self);
4162 let tail = mid_tail.right(self);
4163 if self.get_kind(neg) != Kind::Compl {
4164 return None;
4165 }
4166 let inside = neg.left(self);
4167 if !inside.is_concat(self) {
4168 return None;
4169 }
4170 let ts_part = inside.left(self);
4171 let c_pred = inside.right(self);
4172 if ts_part != NodeId::TS || !c_pred.is_pred(self) {
4173 return None;
4174 }
4175 let c_tset = c_pred.pred_tset(self);
4176 let not_c = self.mk_pred_not(c_tset);
4177 let left_branch = self.mk_concat(NodeId::BEGIN, tail);
4178 let not_c_tail = self.mk_concat(not_c, tail);
4179 let right_branch = self.mk_concat(NodeId::TS, not_c_tail);
4180 Some(self.mk_union(left_branch, right_branch))
4181 }
4182
4183 fn strip_la_body_end(&mut self, n: NodeId) -> NodeId {
4184 if !n.is_concat(self) {
4185 return n;
4186 }
4187 let l = n.left(self);
4188 let r = n.right(self);
4189 if l == NodeId::TS && r == NodeId::END {
4190 return NodeId::TS;
4191 }
4192 let new_r = self.strip_la_body_end(r);
4193 if new_r == r {
4194 n
4195 } else {
4196 self.mk_concat(l, new_r)
4197 }
4198 }
4199
4200 fn prune_rec<const FWD: bool>(
4201 &mut self,
4202 node_id: NodeId,
4203 memo: &mut FxHashMap<NodeId, NodeId>,
4204 ) -> NodeId {
4205 assert!(node_id != NodeId::MISSING);
4206 if node_id == NodeId::MISSING {
4207 return node_id;
4208 }
4209 if let Some(&v) = memo.get(&node_id) {
4210 return v;
4211 }
4212 let (l, r) = (node_id.left(self), node_id.right(self));
4213 let out = match node_id.kind(self) {
4214 Kind::Union => {
4215 let l = self.prune_rec::<FWD>(l, memo);
4216 let r = self.prune_rec::<FWD>(r, memo);
4217 let mut parts: Vec<NodeId> = Vec::new();
4218 parts.push(l);
4219 self.iter_unions_b(r, &mut |_, v| parts.push(v));
4220 for p in &mut parts {
4221 *p = self.prune_rec::<FWD>(*p, memo);
4222 }
4223
4224 if FWD {
4225 let mut best: FxHashMap<NodeId, u32> = FxHashMap::default();
4227 for &p in &parts {
4228 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4229 let body = self.get_lookahead_inner(p);
4230 let rel = self.get_lookahead_rel(p);
4231 best.entry(body)
4232 .and_modify(|r| *r = (*r).min(rel))
4233 .or_insert(rel);
4234 }
4235 }
4236 parts.iter().rev().fold(NodeId::BOT, |acc, &p| {
4237 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4238 let body = self.get_lookahead_inner(p);
4239 if self.get_lookahead_rel(p) != best[&body] {
4240 return acc;
4241 }
4242 }
4243 self.mk_union(p, acc)
4244 })
4245 } else {
4246 parts
4247 .iter()
4248 .rev()
4249 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4250 }
4251 }
4252 Kind::Concat => {
4253 if FWD
4254 && l.is_ts()
4255 && r.is_lookahead(self)
4256 && self.get_lookahead_tail(r).is_missing()
4257 && self.get_lookahead_rel(r) == 0
4258 {
4259 let body = self.get_lookahead_inner(r);
4260 if self.is_nullable(body, Nullability::END) {
4261 return NodeId::TS;
4262 }
4263 }
4264 let l = self.prune_rec::<FWD>(l, memo);
4265 let r = self.prune_rec::<FWD>(r, memo);
4266 self.mk_concat(l, r)
4267 }
4268 Kind::Inter => {
4269 let l = self.prune_rec::<FWD>(l, memo);
4270 let r = self.prune_rec::<FWD>(r, memo);
4271 self.mk_inter(l, r)
4272 }
4273 Kind::Compl => {
4274 let l = self.prune_rec::<FWD>(l, memo);
4275 self.mk_compl(l)
4276 }
4277 Kind::Lookahead => {
4278 let lrel = self.get_lookahead_rel(node_id);
4279 let body = self.strip_la_body_end(self.get_lookahead_inner(node_id));
4280 let body = self.prune_rec::<FWD>(body, memo);
4281
4282 let tail = if self.get_lookahead_tail(node_id).is_missing() {
4283 NodeId::MISSING
4284 } else {
4285 self.prune_rec::<FWD>(self.get_lookahead_tail(node_id), memo)
4286 };
4287 self.mk_lookahead_internal(body, tail, lrel)
4288 }
4289 Kind::Begin => NodeId::BOT,
4290 Kind::Counted => {
4291 let body = self.prune_rec::<FWD>(l, memo);
4292 let chain = self.prune_rec::<FWD>(r, memo);
4293 self.mk_counted(body, chain, self.get_extra(node_id))
4294 }
4295 Kind::End | Kind::Pred => node_id,
4296 Kind::Star => {
4297 let l = self.prune_rec::<FWD>(l, memo);
4298 self.mk_star(l)
4299 }
4300 Kind::Lookbehind => {
4301 let l = self.prune_rec::<FWD>(l, memo);
4302 if r.is_missing() {
4303 l
4304 } else {
4305 node_id
4306 }
4307 }
4308 };
4309 memo.insert(node_id, out);
4310 out
4311 }
4312
4313 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4314 let mut sorted: Vec<NodeId> = nodes.collect();
4315 if sorted.len() <= 1 {
4316 return sorted.pop().unwrap_or(NodeId::BOT);
4317 }
4318 sorted.sort();
4319 sorted.dedup();
4320 sorted.retain(|&x| x != NodeId::BOT);
4321 if sorted.is_empty() {
4322 return NodeId::BOT;
4323 }
4324 if sorted.len() > 16 {
4325 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
4326 let mut non_concat: Vec<NodeId> = Vec::new();
4327 for &n in &sorted {
4328 if n.is_concat(self) {
4329 by_head.entry(self.get_left(n)).or_default().push(n);
4330 } else {
4331 non_concat.push(n);
4332 }
4333 }
4334 let mut absorbed: Vec<NodeId> = Vec::new();
4335 for &n in &non_concat {
4336 if by_head.contains_key(&n) {
4337 absorbed.push(n);
4338 }
4339 }
4340 if !absorbed.is_empty() {
4341 non_concat.retain(|n| !absorbed.contains(n));
4342 }
4343 if by_head.len() < sorted.len() {
4344 let mut groups: Vec<NodeId> = non_concat;
4345 for (head, tails) in by_head {
4346 let mut tail_nodes: Vec<NodeId> =
4347 tails.iter().map(|&n| self.get_right(n)).collect();
4348 if absorbed.contains(&head) {
4349 tail_nodes.push(NodeId::EPS);
4350 }
4351 let tail_union = self.mk_unions(tail_nodes.into_iter());
4352 let factored = self.mk_concat(head, tail_union);
4353 groups.push(factored);
4354 }
4355 groups.sort();
4356 groups.dedup();
4357 return self.mk_unions_balanced(&groups);
4358 }
4359 }
4360 self.mk_unions_balanced(&sorted)
4361 }
4362
4363 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
4364 match nodes.len() {
4365 0 => NodeId::BOT,
4366 1 => nodes[0],
4367 n => {
4368 let mid = n / 2;
4369 let left = self.mk_unions_balanced(&nodes[..mid]);
4370 let right = self.mk_unions_balanced(&nodes[mid..]);
4371 self.mk_union(left, right)
4372 }
4373 }
4374 }
4375
4376 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4377 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
4378 }
4379
4380 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4381 nodes
4382 .rev()
4383 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
4384 }
4385}
4386
4387impl RegexBuilder {
4388 pub fn iter_sat(
4389 &mut self,
4390 stack: &mut Vec<(TRegexId, TSetId)>,
4391 f: &mut impl FnMut(&mut RegexBuilder, NodeId, TSetId),
4392 ) {
4393 debug_assert!(!stack.is_empty());
4394 loop {
4395 match stack.pop() {
4396 None => return,
4397 Some((curr, curr_set)) => match *self.get_tregex(curr) {
4398 TRegex::Leaf(n) => {
4399 let mut curr = n;
4400 while curr != NodeId::BOT {
4401 match self.get_kind(curr) {
4402 _ => {
4403 f(self, n, curr_set);
4404 curr = NodeId::BOT;
4405 }
4406 }
4407 }
4408 }
4409 TRegex::ITE(cnd, then_id, else_id) => {
4410 if else_id != TRegexId::BOT {
4411 let notcnd = self.solver().not_id(cnd);
4412 let interset1 = self.solver().and_id(curr_set, notcnd);
4413 stack.push((else_id, interset1));
4414 }
4415 let interset2 = self.solver().and_id(curr_set, cnd);
4416 stack.push((then_id, interset2));
4417 }
4418 },
4419 }
4420 }
4421 }
4422
4423 #[allow(dead_code)]
4424 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
4425 match self.get_tregex(term_id).clone() {
4426 TRegex::Leaf(node_id) => {
4427 if NodeId::BOT == node_id {
4428 vec![]
4429 } else {
4430 vec![node_id]
4431 }
4432 }
4433 TRegex::ITE(_, then_id, else_id) => {
4434 let mut then_nodes = self.extract_sat(then_id);
4435 let mut else_nodes = self.extract_sat(else_id);
4436 then_nodes.append(&mut else_nodes);
4437 then_nodes
4438 }
4439 }
4440 }
4441
4442 pub(crate) fn iter_unions_b(
4443 &mut self,
4444 curr: NodeId,
4445 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
4446 ) {
4447 let mut curr = curr;
4448 while self.get_kind(curr) == Kind::Union {
4449 f(self, curr.left(self));
4450 curr = curr.right(self);
4451 }
4452 f(self, curr);
4453 }
4454
4455 pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
4456 if !self.contains_look(node_id) {
4457 return Some(node_id);
4458 }
4459 match self.get_kind(node_id) {
4460 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
4461 Kind::Concat => {
4462 let left = node_id.left(self);
4463 let right = node_id.right(self);
4464 let elim_l = self.try_elim_lookarounds(left)?;
4465 let elim_r = self.try_elim_lookarounds(right)?;
4466 let rw = self.mk_concat(elim_l, elim_r);
4467 Some(rw)
4468 }
4469 Kind::Union => {
4470 let left = node_id.left(self);
4471 let right = node_id.right(self);
4472 let elim_l = self.try_elim_lookarounds(left)?;
4473 let elim_r = self.try_elim_lookarounds(right)?;
4474 let rw = self.mk_union(elim_l, elim_r);
4475 Some(rw)
4476 }
4477
4478 Kind::Star => {
4479 let body = node_id.left(self);
4480 let elim_l = self.try_elim_lookarounds(body)?;
4481 Some(self.mk_star(elim_l))
4482 }
4483 Kind::Compl => {
4484 let left = node_id.left(self);
4485 let elim_l = self.try_elim_lookarounds(left)?;
4486 Some(self.mk_compl(elim_l))
4487 }
4488 Kind::Lookahead => {
4489 let rel = self.get_lookahead_rel(node_id);
4490 if rel != 0 {
4491 return None;
4492 }
4493 let lbody = self.get_lookahead_inner(node_id);
4494 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
4495 let elim_l = self.try_elim_lookarounds(lbody)?;
4496 let elim_r = self.try_elim_lookarounds(ltail)?;
4497 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
4498 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
4499 let rw = self.mk_inter(lbody_ts, ltail_ts);
4500 Some(rw)
4501 }
4502 Kind::Lookbehind => {
4503 let linner = self.get_lookbehind_inner(node_id);
4504 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
4505 let elim_l = self.try_elim_lookarounds(linner)?;
4506 let elim_r = self.try_elim_lookarounds(lprev)?;
4507 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
4508 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
4509 let rw = self.mk_inter(lbody_ts, ltail_ts);
4510 Some(rw)
4511 }
4512 Kind::Inter => {
4513 let left = node_id.left(self);
4514 let right = node_id.right(self);
4515 let elim_l = self.try_elim_lookarounds(left)?;
4516 let elim_r = self.try_elim_lookarounds(right)?;
4517 let rw = self.mk_inter(elim_l, elim_r);
4518 Some(rw)
4519 }
4520 Kind::Counted => None,
4521 }
4522 }
4523
4524 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
4526 if self.nullability(node) == Nullability::NEVER {
4527 node
4528 } else {
4529 self.mk_inter(NodeId::TOPPLUS, node)
4530 }
4531 }
4532
4533 pub fn iter_find_stack(
4534 &self,
4535 stack: &mut Vec<TRegexId>,
4536 mut f: impl FnMut(NodeId) -> bool,
4537 ) -> bool {
4538 loop {
4539 match stack.pop() {
4540 None => return false,
4541 Some(curr) => match self.get_tregex(curr) {
4542 TRegex::Leaf(n) => {
4543 let mut curr = *n;
4544 while curr != NodeId::BOT {
4545 match self.get_kind(curr) {
4546 Kind::Union => {
4547 if f(curr.left(self)) {
4548 return true;
4549 }
4550 curr = curr.right(self);
4551 }
4552 _ => {
4553 if f(*n) {
4554 return true;
4555 }
4556 curr = NodeId::BOT;
4557 }
4558 }
4559 }
4560 }
4561 TRegex::ITE(_, then_id, else_id) => {
4562 if *else_id != TRegexId::BOT {
4563 stack.push(*else_id);
4564 }
4565 stack.push(*then_id);
4566 }
4567 },
4568 }
4569 }
4570 }
4571
4572 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
4598 if node == NodeId::BOT {
4599 return Some(true);
4600 }
4601 if self.nullability(node) != Nullability::NEVER {
4602 return Some(false);
4603 }
4604 if let Some(cached) = self.cache_empty.get(&node) {
4605 if cached.is_checked() {
4606 return Some(cached.is_empty());
4607 }
4608 }
4609 let node = if !self.contains_look(node) {
4610 node
4611 } else {
4612 self.try_elim_lookarounds(node)?
4613 };
4614 let isempty_flag = self.is_empty_lang_internal(node);
4615
4616 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
4617 }
4618
4619 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, ResharpError> {
4620 if !self.get_meta_flags(initial_node).contains_inter() {
4622 return Ok(NodeFlags::ZERO);
4623 }
4624
4625 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4626 let mut worklist: VecDeque<NodeId> = VecDeque::new();
4627 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
4628 let mut stack = Vec::new();
4629 stack.push(begin_der);
4630 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
4631 visited.insert(node, initial_node);
4632 let nullability = self.nullability(node);
4633 if nullability != Nullability::NEVER {
4634 true
4635 } else {
4636 worklist.push_back(node);
4637 false
4638 }
4639 });
4640 if found_nullable_right_away {
4641 return Ok(NodeFlags::ZERO);
4642 }
4643
4644 worklist.push_back(initial_node);
4645 let isempty_flag: NodeFlags;
4646 let mut found_node = NodeId::BOT;
4647
4648 loop {
4649 match worklist.pop_front() {
4650 None => {
4651 isempty_flag = NodeFlags::IS_EMPTY;
4652 break;
4653 }
4654 Some(outer) => {
4655 if let Some(cached) = self.cache_empty.get(&outer) {
4656 if cached.is_checked() {
4657 if cached.is_empty() {
4658 continue;
4659 } else {
4660 return Ok(NodeFlags::ZERO);
4661 }
4662 }
4663 }
4664
4665 let derivative = self.der(outer, Nullability::CENTER)?;
4666
4667 stack.push(derivative);
4668
4669 let found_null = self.iter_find_stack(&mut stack, |node| {
4670 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
4671 found_node = node;
4672 if !self.get_meta_flags(node).contains_inter() {
4673 true
4674 } else {
4675 e.insert(outer);
4676 worklist.push_front(node);
4677 self.any_nonbegin_nullable(node)
4678 }
4679 } else {
4680 false
4681 }
4682 });
4683 if found_null {
4684 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
4685 isempty_flag = NodeFlags::ZERO;
4686 break;
4687 }
4688 }
4689 }
4690 }
4691
4692 self.cache_empty.insert(
4693 initial_node,
4694 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
4695 );
4696 Ok(isempty_flag)
4697 }
4698
4699 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
4701 if larger_lang == smaller_lang {
4702 return Some(true);
4703 }
4704
4705 if self
4707 .nullability(larger_lang)
4708 .not()
4709 .and(self.nullability(smaller_lang))
4710 != Nullability::NEVER
4711 {
4712 return Some(false);
4713 }
4714
4715 let (smaller_lang, larger_lang) =
4720 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
4721 let wrap = |b: &mut Self, n: NodeId| {
4722 let tmp = b.mk_concat(n, NodeId::TS);
4723 b.mk_concat(NodeId::TS, tmp)
4724 };
4725 (wrap(self, smaller_lang), wrap(self, larger_lang))
4726 } else {
4727 (smaller_lang, larger_lang)
4728 };
4729
4730 let nota = self.mk_compl(larger_lang);
4731 let diff = self.mk_inter(smaller_lang, nota);
4732 self.is_empty_lang(diff)
4733 }
4734}