1#![warn(dead_code)]
7
8pub mod unicode_classes;
9use rustc_hash::FxHashMap;
10use solver::{Solver, TSetId};
11use std::collections::{BTreeSet, VecDeque};
12use std::fmt::Debug;
13use std::fmt::Write;
14use std::hash::Hash;
15pub use unicode_classes::{neg_class, utf8_char, UnicodeClassCache};
16
17use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
18pub mod nulls;
19pub mod solver;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum ResharpError {
23 AnchorLimit,
24 StateSpaceExplosion,
25 UnsupportedPattern,
26}
27
28impl std::fmt::Display for ResharpError {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 ResharpError::AnchorLimit => write!(f, "anchor limit exceeded"),
32 ResharpError::StateSpaceExplosion => {
33 write!(f, "too many states, likely infinite state space")
34 }
35 ResharpError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
36 }
37 }
38}
39
40impl std::error::Error for ResharpError {}
41
42mod id {
43 pub const MISSING: u32 = 0;
44 pub const BOT: u32 = 1;
45 pub const EPS: u32 = 2;
46 pub const TOP: u32 = 3;
47 pub const TOPSTAR: u32 = 4;
48 pub const TOPPLUS: u32 = 5;
49 pub const BEGIN: u32 = 6;
50 pub const END: u32 = 7;
51}
52
53#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
54pub(crate) struct MetadataId(u32);
55impl MetadataId {
56 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
57}
58
59#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
60pub struct NodeId(pub u32);
61impl NodeId {
62 pub const MISSING: NodeId = NodeId(id::MISSING);
63 pub const BOT: NodeId = NodeId(id::BOT);
64 pub const EPS: NodeId = NodeId(id::EPS);
65 pub const TOP: NodeId = NodeId(id::TOP);
66 pub const TS: NodeId = NodeId(id::TOPSTAR);
67 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
68 pub const BEGIN: NodeId = NodeId(id::BEGIN);
69 pub const END: NodeId = NodeId(id::END);
70
71 #[inline]
72 pub fn as_u32(self) -> u32 {
73 self.0
74 }
75
76 #[inline]
77 pub fn from_u32(v: u32) -> NodeId {
78 NodeId(v)
79 }
80}
81impl Debug for NodeId {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let num = &self.0;
84 f.write_str(format!("{num}").as_str())
85 }
86}
87
88#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
89pub struct TRegexId(u32);
90impl TRegexId {
91 pub const MISSING: TRegexId = TRegexId(id::MISSING);
92 pub const EPS: TRegexId = TRegexId(id::EPS);
93 pub const BOT: TRegexId = TRegexId(id::BOT);
94 pub const TOP: TRegexId = TRegexId(id::TOP);
95 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
96}
97
98#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
99pub(crate) struct MetaFlags(u8);
100impl MetaFlags {
101 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
104 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 3);
105 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 4);
106 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 5);
107 pub(crate) const CONTAINS_LOOKBEHIND: MetaFlags = MetaFlags(1 << 6);
108 pub(crate) const CONTAINS_LOOKAHEAD: MetaFlags = MetaFlags(1 << 7);
109
110 #[inline]
111 pub(crate) fn nullability(self) -> Nullability {
112 Nullability(self.0 & Self::NULL_MASK)
113 }
114
115 #[inline]
116 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
117 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
118 }
119
120 #[inline]
121 pub(crate) fn has(self, flag: MetaFlags) -> bool {
122 self.0 & flag.0 != 0
123 }
124 #[inline]
125 const fn and(self, other: MetaFlags) -> MetaFlags {
126 MetaFlags(self.0 & other.0)
127 }
128 #[inline]
129 const fn or(self, other: MetaFlags) -> MetaFlags {
130 MetaFlags(self.0 | other.0)
131 }
132
133 pub(crate) fn contains_inter(self) -> bool {
134 self.has(MetaFlags::CONTAINS_INTER)
135 }
136
137 pub(crate) fn all_contains_flags(self) -> MetaFlags {
138 self.and(
139 MetaFlags::CONTAINS_ANCHORS
140 .or(MetaFlags::CONTAINS_INTER)
141 .or(MetaFlags::CONTAINS_LOOKBEHIND)
142 .or(MetaFlags::CONTAINS_LOOKAHEAD),
143 )
144 }
145}
146
147#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
148#[repr(u8)]
149pub enum Kind {
150 #[default]
151 Pred,
152 Star,
153 Begin,
154 End,
155 Concat,
156 Union,
157 Compl,
158 Lookbehind,
159 Lookahead,
160 Inter,
161 Counted,
162}
163
164#[derive(Eq, Hash, PartialEq, Clone)]
165struct Metadata {
166 flags: MetaFlags,
167 nulls: NullsId,
168}
169
170struct MetadataBuilder {
171 num_created: u32,
172 solver: Solver,
173 nb: NullsBuilder,
174 index: FxHashMap<Metadata, MetadataId>,
175 pub array: Vec<Metadata>,
176}
177
178mod helpers {
179 pub(crate) fn incr_rel(n1: u32) -> u32 {
180 match n1.overflowing_add(1) {
181 (_, true) => u32::MAX,
182 (res, false) => res,
183 }
184 }
185}
186
187impl MetadataBuilder {
188 fn new() -> MetadataBuilder {
189 Self {
190 index: FxHashMap::default(),
191 array: vec![Metadata {
192 flags: MetaFlags::ZERO,
193 nulls: NullsId::EMPTY,
194 }],
195 solver: Solver::new(),
196 num_created: 0,
197 nb: NullsBuilder::new(),
198 }
199 }
200
201 fn init(&mut self, inst: Metadata) -> MetadataId {
202 self.num_created += 1;
203 let new_id = MetadataId(self.num_created);
204 self.index.insert(inst.clone(), new_id);
205 self.array.push(inst);
206 new_id
207 }
208
209 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
210 match self.index.get(&inst) {
211 Some(&id) => id,
212 None => self.init(inst),
213 }
214 }
215
216 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
217 &self.array[inst.0 as usize]
218 }
219
220 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
221 setflags.all_contains_flags()
222 }
223
224 fn flags_star(&self, body: MetadataId, body_id: NodeId) -> MetaFlags {
225 let left = &self.array[body.0 as usize].flags;
226 let contains = self.get_contains(*left);
227 let inf = if body_id == NodeId::BOT {
229 MetaFlags::ZERO
230 } else {
231 MetaFlags::INFINITE_LENGTH
232 };
233 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(inf))
234 }
235
236 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
237 let left = &self.array[left_id.0 as usize].flags;
238 let null = left.nullability().not().and(Nullability::ALWAYS);
239 let contains = self.get_contains(*left);
240 MetaFlags::with_nullability(null, contains.or(MetaFlags::INFINITE_LENGTH))
241 }
242
243 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
244 let left = &self.array[left_id.0 as usize].flags;
245 let right = &self.array[right_id.0 as usize].flags;
246 let null = left.nullability().and(right.nullability());
247 let contains = self.get_contains(left.or(*right));
248 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
249 MetaFlags::with_nullability(null, contains.or(len))
250 }
251
252 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
253 let left = &self.array[left_id.0 as usize].flags;
254 let right = &self.array[right_id.0 as usize].flags;
255 let null = left.nullability().and(right.nullability());
256 let contains = self
257 .get_contains(left.or(*right))
258 .or(MetaFlags::CONTAINS_INTER);
259 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
260 MetaFlags::with_nullability(null, contains.or(len))
261 }
262
263 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
264 let left = &self.array[left_id.0 as usize].flags;
265 let right = &self.array[right_id.0 as usize].flags;
266 let null = left.nullability().or(right.nullability());
267 let contains = self.get_contains(left.or(*right));
268 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
269 MetaFlags::with_nullability(null, contains.or(len))
270 }
271}
272
273#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
274pub struct NodeKey {
275 pub(crate) kind: Kind,
276 pub(crate) left: NodeId,
277 pub(crate) right: NodeId,
278 pub(crate) extra: u32,
279}
280
281#[derive(Eq, Hash, PartialEq, Clone, Debug)]
282pub enum TRegex<TSet> {
283 Leaf(NodeId),
284 ITE(TSet, TRegexId, TRegexId),
285}
286
287#[derive(Eq, Hash, PartialEq, Clone, Copy)]
288pub(crate) struct NodeFlags(u8);
289impl NodeFlags {
290 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
291 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
292 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
293
294 fn is_checked(self) -> bool {
295 self.0 >= NodeFlags::IS_CHECKED.0
296 }
297 fn is_empty(self) -> bool {
298 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
299 }
300}
301
302#[derive(Eq, Hash, PartialEq, Clone, Copy)]
303pub(crate) struct BuilderFlags(u8);
304impl BuilderFlags {
305 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
306 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
307}
308
309pub struct RegexBuilder {
310 mb: MetadataBuilder,
311 temp_vec: Vec<NodeId>,
312 num_created: u32,
313 index: FxHashMap<NodeKey, NodeId>,
314 array: Vec<NodeKey>,
315 metadata: Vec<MetadataId>,
316 reversed: Vec<NodeId>,
317 cache_empty: FxHashMap<NodeId, NodeFlags>,
318 tr_cache: FxHashMap<TRegex<TSetId>, TRegexId>,
319 tr_array: Vec<TRegex<TSetId>>,
320 tr_der_center: Vec<TRegexId>,
321 tr_der_begin: Vec<TRegexId>,
322 flags: BuilderFlags,
323 pub lookahead_context_max: u32,
325 mk_binary_memo: FxHashMap<(TRegexId, TRegexId), TRegexId>,
326 clean_cache: FxHashMap<(TSetId, TRegexId), TRegexId>,
327}
328
329impl NodeId {
330 fn is_missing(&self) -> bool {
331 *self == NodeId::MISSING
332 }
333 #[inline]
334 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
335 b.get_flags_contains(self)
336 }
337 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
338 if self == tail {
339 true
340 } else if self.is_kind(b, Kind::Concat) {
341 self.right(b).has_concat_tail(b, tail)
342 } else {
343 false
344 }
345 }
346 fn missing_to_eps(&self) -> NodeId {
347 if *self == NodeId::MISSING {
348 NodeId::EPS
349 } else {
350 *self
351 }
352 }
353
354 #[inline]
355 fn kind(self, b: &RegexBuilder) -> Kind {
356 b.get_kind(self)
357 }
358 #[inline]
359 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
360 b.get_kind(self) == k
361 }
362 #[inline]
363 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
364 b.nullability(self) == Nullability::NEVER
365 }
366 #[inline]
367 pub fn nullability(self, b: &RegexBuilder) -> Nullability {
368 b.nullability(self)
369 }
370
371 #[inline]
372 pub fn is_center_nullable(self, b: &RegexBuilder) -> bool {
373 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
374 }
375 #[inline]
376 pub fn is_begin_nullable(self, b: &RegexBuilder) -> bool {
377 b.nullability(self).has(Nullability::BEGIN)
378 }
379 #[inline]
380 pub fn left(self, b: &RegexBuilder) -> NodeId {
381 b.get_left(self)
382 }
383
384 #[inline]
385 pub fn right(self, b: &RegexBuilder) -> NodeId {
386 b.get_right(self)
387 }
388
389 #[inline]
390 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, ResharpError> {
391 b.der(self, mask)
392 }
393
394 #[inline]
395 fn extra(self, b: &RegexBuilder) -> u32 {
396 b.get_extra(self)
397 }
398
399 #[inline]
400 fn is_pred(self, b: &RegexBuilder) -> bool {
401 b.get_kind(self) == Kind::Pred
402 }
403
404 #[inline]
405 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
406 debug_assert!(self.is_pred(b));
407 TSetId(b.get_extra(self))
408 }
409 #[inline]
410 fn is_star(self, b: &RegexBuilder) -> bool {
411 if NodeId::EPS == self {
412 return false;
413 }
414 b.get_kind(self) == Kind::Star
415 }
416
417 pub fn contains_lookbehind(self, b: &RegexBuilder) -> bool {
418 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKBEHIND)
419 }
420
421 pub fn contains_lookahead(self, b: &RegexBuilder) -> bool {
422 b.get_meta_flags(self).has(MetaFlags::CONTAINS_LOOKAHEAD)
423 }
424
425 pub fn contains_lookaround(self, b: &RegexBuilder) -> bool {
426 b.get_meta_flags(self)
427 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
428 }
429
430 #[inline]
431 pub(crate) fn is_inter(self, b: &RegexBuilder) -> bool {
432 b.get_kind(self) == Kind::Inter
433 }
434
435 #[inline]
436 pub(crate) fn is_compl(self, b: &RegexBuilder) -> bool {
437 b.get_kind(self) == Kind::Compl
438 }
439
440 #[inline]
441 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
442 if self.is_concat(b) {
443 let r = self.right(b);
444 return r.is_star(b) && r.left(b) == self.left(b);
445 }
446 false
447 }
448
449 #[inline]
450 fn is_begin(self) -> bool {
451 self == NodeId::BEGIN
452 }
453
454 #[inline]
455 fn is_end(self) -> bool {
456 self == NodeId::END
457 }
458
459 #[inline]
460 fn is_union(self, b: &RegexBuilder) -> bool {
461 b.get_kind(self) == Kind::Union
462 }
463
464 #[inline]
465 fn is_concat(self, b: &RegexBuilder) -> bool {
466 b.get_kind(self) == Kind::Concat
467 }
468
469 #[inline]
470 fn is_lookahead(self, b: &RegexBuilder) -> bool {
471 b.get_kind(self) == Kind::Lookahead
472 }
473
474 #[inline]
475 fn is_lookbehind(self, b: &RegexBuilder) -> bool {
476 b.get_kind(self) == Kind::Lookbehind
477 }
478
479 #[inline]
480 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
481 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
482 Some(b.get_right(self))
483 } else {
484 None
485 }
486 }
487
488 #[inline]
489 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
490 if self.is_concat(b) {
491 let left = self.left(b);
492 let right = self.right(b);
493 if left.is_kind(b, Kind::Compl) && left.left(b) == NodeId::TOPPLUS {
494 return right == NodeId::END;
495 }
496 }
497 false
498 }
499
500 #[inline]
501 #[allow(dead_code)]
502 fn is_ts(self) -> bool {
503 NodeId::TS == self
504 }
505
506 #[inline]
507 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
508 if NodeId::EPS == self {
509 return None;
510 }
511 if self.is_star(b) && self.left(b).is_pred(b) {
512 Some(self.left(b))
513 } else {
514 None
515 }
516 }
517
518 #[inline]
519 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
520 if self.is_concat(b) && self.left(b) == NodeId::TS {
521 let middle = self.right(b);
522 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
523 return Some(middle.left(b));
524 }
525 }
526 None
527 }
528
529 #[inline]
530 pub(crate) fn iter_union(
531 self,
532 b: &mut RegexBuilder,
533 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
534 ) {
535 let mut curr: NodeId = self;
536 while curr.kind(b) == Kind::Union {
537 f(b, curr.left(b));
538 curr = curr.right(b);
539 }
540 f(b, curr);
541 }
542
543 #[inline]
544 pub(crate) fn iter_inter(
545 self,
546 b: &mut RegexBuilder,
547 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
548 ) {
549 let mut curr: NodeId = self;
550 while curr.kind(b) == Kind::Inter {
551 f(b, curr.left(b));
552 curr = curr.right(b);
553 }
554 f(b, curr);
555 }
556
557 #[inline]
558 pub(crate) fn iter_union_while(
559 self,
560 b: &mut RegexBuilder,
561 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
562 ) {
563 let mut curr: NodeId = self;
564 let mut continue_loop = true;
565 while continue_loop && curr.kind(b) == Kind::Union {
566 continue_loop = f(b, curr.left(b));
567 curr = curr.right(b);
568 }
569 if continue_loop {
570 f(b, curr);
571 }
572 }
573
574 #[inline]
575 pub(crate) fn any_inter_component(
576 self,
577 b: &RegexBuilder,
578 mut f: impl FnMut(NodeId) -> bool,
579 ) -> bool {
580 debug_assert!(self.kind(b) == Kind::Inter);
581 let mut cur = self;
582 while cur.kind(b) == Kind::Inter {
583 if f(cur.left(b)) {
584 return true;
585 }
586 cur = cur.right(b);
587 }
588 f(cur)
589 }
590
591 #[inline]
592 pub(crate) fn any_union_component(
593 self,
594 b: &RegexBuilder,
595 mut f: impl FnMut(NodeId) -> bool,
596 ) -> bool {
597 let mut cur = self;
598 while cur.kind(b) == Kind::Union {
599 if f(cur.left(b)) {
600 return true;
601 }
602 cur = cur.right(b);
603 }
604 f(cur)
605 }
606}
607
608impl Default for RegexBuilder {
609 fn default() -> Self {
610 Self::new()
611 }
612}
613
614impl RegexBuilder {
615 pub fn new() -> RegexBuilder {
616 let mut inst = Self {
617 mb: MetadataBuilder::new(),
618 array: Vec::new(),
619 index: FxHashMap::default(),
620 cache_empty: FxHashMap::default(),
621 tr_array: Vec::new(),
622 tr_cache: FxHashMap::default(),
623 flags: BuilderFlags::ZERO,
624 lookahead_context_max: 800,
625 num_created: 0,
626 metadata: Vec::new(),
627 reversed: Vec::new(),
628 tr_der_center: Vec::new(),
629 tr_der_begin: Vec::new(),
630 temp_vec: Vec::new(),
631 mk_binary_memo: FxHashMap::default(),
632 clean_cache: FxHashMap::default(),
633 };
634 inst.array.push(NodeKey::default());
635 inst.mk_pred(TSetId::EMPTY);
636 inst.mk_star(NodeId::BOT);
637 inst.mk_pred(TSetId::FULL);
638 inst.mk_star(NodeId::TOP);
639 let top_plus_id = inst.mk_plus(NodeId::TOP);
640 inst.mk_unset(Kind::Begin);
641 inst.mk_unset(Kind::End);
642 debug_assert!(top_plus_id == NodeId::TOPPLUS);
643
644 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
645 inst.mk_leaf(NodeId::BOT);
646 inst.mk_leaf(NodeId::EPS);
647 inst.mk_leaf(NodeId::TOP);
648 inst.mk_leaf(NodeId::TS);
649
650 inst.flags = BuilderFlags::SUBSUME;
651 inst
652 }
653
654 #[inline]
655 pub fn solver_ref(&self) -> &Solver {
656 &self.mb.solver
657 }
658
659 #[inline]
660 pub fn solver(&mut self) -> &mut Solver {
661 &mut self.mb.solver
662 }
663
664 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
665 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
666 self.tr_cache.insert(inst.clone(), new_id);
667 self.tr_array.push(inst);
668 new_id
669 }
670
671 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
672 match self.tr_cache.get(&inst) {
673 Some(&id) => id,
674 None => self.tr_init(inst),
675 }
676 }
677
678 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
679 &self.tr_array[inst.0 as usize]
680 }
681
682 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
683 let solv = self.solver();
684 !solv.is_sat_id(cond1, cond2)
685 }
686
687 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
688 let term = TRegex::<TSetId>::Leaf(node_id);
689 self.get_tregex_id(term)
690 }
691
692 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
693 let tmp_inst = TRegex::ITE(cond, _then, _else);
694 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
695 return *cached;
696 }
697 if _then == _else {
698 return _then;
699 }
700 if self.solver().is_full_id(cond) {
701 return _then;
702 }
703 if self.solver().is_empty_id(cond) {
704 return _else;
705 }
706 let _clean_then = match self.tr_array[_then.0 as usize] {
707 TRegex::Leaf(_) => _then,
708 _ => self.clean(cond, _then),
709 };
710 let notcond = self.solver().not_id(cond);
711 let _clean_else = match self.tr_array[_else.0 as usize] {
712 TRegex::Leaf(_) => _else,
713 _ => self.clean(notcond, _else),
714 };
715
716 if _clean_then == _clean_else {
717 return _clean_then;
718 }
719 match self.get_tregex(_clean_then) {
721 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
722 let _cond_then = *leftcond;
723 let new_then = *_inner_then;
724 let sand = self.solver().and_id(cond, _cond_then);
725 return self.mk_ite(sand, new_then, _clean_else);
726 }
727 _ => {}
728 }
729 match self.get_tregex(_clean_else) {
731 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
733 let e2clone = *_e2;
734 let new_cond = self.mb.solver.or_id(cond, *_c2);
735 return self.mk_ite(new_cond, _clean_then, e2clone);
736 }
737 _ => {}
738 }
739
740 if _clean_then == TRegexId::BOT {
741 let flip_cond = self.solver().not_id(cond);
742 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
743 return cleaned_id;
744 }
745
746 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
747 }
748
749 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
750 if let Some(&cached) = self.clean_cache.get(&(beta, tterm)) {
751 return cached;
752 }
753 let result = match *self.get_tregex(tterm) {
754 TRegex::Leaf(_) => tterm,
755 TRegex::ITE(alpha, _then_id, _else_id) => {
756 let notalpha = self.mb.solver.not_id(alpha);
757 if self.mb.solver.unsat_id(beta, alpha) {
758 self.clean(beta, _else_id)
760 } else if self.unsat(beta, notalpha) {
761 self.clean(beta, _then_id)
763 } else {
764 let tc = self.mb.solver.and_id(beta, alpha);
765 let ec = self.mb.solver.and_id(beta, notalpha);
766 let new_then = self.clean(tc, _then_id);
767 let new_else = self.clean(ec, _else_id);
768 self.mk_ite(alpha, new_then, new_else)
769 }
770 }
771 };
772 self.clean_cache.insert((beta, tterm), result);
773 result
774 }
775
776 fn mk_unary(
777 &mut self,
778 term: TRegexId,
779 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
780 ) -> TRegexId {
781 match self.tr_array[term.0 as usize] {
782 TRegex::Leaf(node_id) => {
783 let applied = apply(self, node_id);
784 self.mk_leaf(applied)
785 }
786 TRegex::ITE(c1, _then, _else) => {
787 let _then1 = self.mk_unary(_then, apply);
788 let _else1 = self.mk_unary(_else, apply);
789 self.mk_ite(c1, _then1, _else1)
790 }
791 }
792 }
793
794 fn mk_binary_result(
795 &mut self,
796 left: TRegexId,
797 right: TRegexId,
798 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, ResharpError>,
799 ) -> Result<TRegexId, ResharpError> {
800 match self.tr_array[left.0 as usize] {
801 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
802 TRegex::Leaf(right_node_id) => {
803 let applied = apply(self, left_node_id, right_node_id)?;
804 Ok(self.mk_leaf(applied))
805 }
806 TRegex::ITE(c2, _then, _else) => {
807 let then2 = self.mk_binary_result(left, _then, apply)?;
808 let else2 = self.mk_binary_result(left, _else, apply)?;
809 Ok(self.mk_ite(c2, then2, else2))
810 }
811 },
812 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
813 TRegex::Leaf(_) => {
814 let then2 = self.mk_binary_result(_then1, right, apply)?;
815 let else2 = self.mk_binary_result(_else1, right, apply)?;
816 Ok(self.mk_ite(c1, then2, else2))
817 }
818 TRegex::ITE(c2, _then2, _else2) => {
819 if c1 == c2 {
820 let _then = self.mk_binary_result(_then1, _then2, apply)?;
821 let _else = self.mk_binary_result(_else1, _else2, apply)?;
822 return Ok(self.mk_ite(c1, _then, _else));
823 }
824 if c1.0 > c2.0 {
825 let _then = self.mk_binary_result(_then1, right, apply)?;
826 let _else = self.mk_binary_result(_else1, right, apply)?;
827 Ok(self.mk_ite(c1, _then, _else))
828 } else {
829 let _then = self.mk_binary_result(left, _then2, apply)?;
830 let _else = self.mk_binary_result(left, _else2, apply)?;
831 Ok(self.mk_ite(c2, _then, _else))
832 }
833 }
834 },
835 }
836 }
837
838 fn mk_binary(
839 &mut self,
840 left: TRegexId,
841 right: TRegexId,
842 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
843 ) -> TRegexId {
844 self.mk_binary_memo.clear();
845 self.mk_binary_inner(left, right, apply)
846 }
847
848 fn mk_binary_inner(
849 &mut self,
850 left: TRegexId,
851 right: TRegexId,
852 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
853 ) -> TRegexId {
854 if left == right {
855 return self.mk_unary(left, &mut |b, n| apply(b, n, n));
856 }
857 if let Some(&cached) = self.mk_binary_memo.get(&(left, right)) {
858 return cached;
859 }
860 let result = match self.tr_array[left.0 as usize] {
861 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
862 TRegex::Leaf(right_node_id) => {
863 let applied = apply(self, left_node_id, right_node_id);
864 self.mk_leaf(applied)
865 }
866 TRegex::ITE(c2, _then, _else) => {
867 let then2 = self.mk_binary_inner(left, _then, apply);
868 let else2 = self.mk_binary_inner(left, _else, apply);
869 self.mk_ite(c2, then2, else2)
870 }
871 },
872 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
873 TRegex::Leaf(_) => {
874 let then2 = self.mk_binary_inner(_then1, right, apply);
875 let else2 = self.mk_binary_inner(_else1, right, apply);
876 self.mk_ite(c1, then2, else2)
877 }
878 TRegex::ITE(c2, _then2, _else2) => {
879 if c1 == c2 {
880 let _then = self.mk_binary_inner(_then1, _then2, apply);
881 let _else = self.mk_binary_inner(_else1, _else2, apply);
882 self.mk_ite(c1, _then, _else)
883 } else if c1.0 > c2.0 {
884 let _then = self.mk_binary_inner(_then1, right, apply);
885 let _else = self.mk_binary_inner(_else1, right, apply);
886 self.mk_ite(c1, _then, _else)
887 } else {
888 let _then = self.mk_binary_inner(left, _then2, apply);
889 let _else = self.mk_binary_inner(left, _else2, apply);
890 self.mk_ite(c2, _then, _else)
891 }
892 }
893 },
894 };
895 self.mk_binary_memo.insert((left, right), result);
896 result
897 }
898
899 pub fn get_nulls(
900 &mut self,
901 pending_rel: u32,
902 mask: Nullability,
903 acc: &mut BTreeSet<NullState>,
904 node_id: NodeId,
905 ) {
906 debug_assert!(node_id != NodeId::MISSING);
907 if !self.is_nullable(node_id, mask) {
908 return;
909 }
910 match self.get_kind(node_id) {
911 Kind::Pred => {}
912 Kind::End => {
913 if mask.has(Nullability::END) {
914 acc.insert(NullState::new(mask.and(Nullability::END), pending_rel));
915 }
916 }
917 Kind::Begin => {
918 if mask.has(Nullability::BEGIN) {
919 acc.insert(NullState::new(mask.and(Nullability::BEGIN), pending_rel));
920 }
921 }
922 Kind::Concat => {
923 let new_mask = self.nullability(node_id).and(mask);
924 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
925 if self.is_nullable(node_id.left(self), mask) {
926 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
927 }
928 }
929 Kind::Union => {
930 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
931 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
932 }
933 Kind::Inter => {
934 let new_mask = self.nullability(node_id).and(mask);
935 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
936 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
937 }
938 Kind::Star => {
939 acc.insert(NullState::new(mask, pending_rel));
940 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
941 }
942 Kind::Compl => {
943 if !self.is_nullable(node_id.left(self), mask) {
944 acc.insert(NullState::new(mask, 0));
945 }
946 }
947 Kind::Lookbehind => {
948 let new_mask = self.nullability(node_id).and(mask);
949 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
950 if node_id.right(self) != NodeId::MISSING {
951 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
952 }
953 }
954 Kind::Lookahead => {
955 let la_inner = self.get_lookahead_inner(node_id);
956 if self.is_nullable(la_inner, mask) {
957 let rel = self.get_lookahead_rel(node_id);
958 if rel != u32::MAX {
959 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
960 }
961 let la_tail = self.get_lookahead_tail(node_id);
963 if la_tail != NodeId::MISSING {
964 self.get_nulls(pending_rel, mask, acc, la_tail);
965 }
966 }
967 }
968 Kind::Counted => {
969 let packed = self.get_extra(node_id);
970 let best = packed >> 16;
971 if best > 0 {
972 acc.insert(NullState::new(mask, pending_rel + best));
973 }
974 }
975 }
976 }
977
978 pub fn contains_look(&mut self, node_id: NodeId) -> bool {
979 self.get_meta_flags(node_id)
980 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
981 }
982
983 pub fn contains_anchors(&self, node_id: NodeId) -> bool {
984 self.get_meta_flags(node_id)
985 .has(MetaFlags::CONTAINS_ANCHORS)
986 }
987
988 pub fn is_infinite(&self, node_id: NodeId) -> bool {
989 self.get_meta_flags(node_id).has(MetaFlags::INFINITE_LENGTH)
990 }
991
992 pub fn max_lookahead_body_len(&self, node_id: NodeId) -> u32 {
998 let mut visited: FxHashMap<NodeId, ()> = FxHashMap::default();
999 let mut stack = vec![node_id];
1000 let mut best: u32 = 0;
1001 while let Some(n) = stack.pop() {
1002 if n == NodeId::MISSING || visited.insert(n, ()).is_some() {
1003 continue;
1004 }
1005 let kind = self.get_kind(n);
1006 if kind == Kind::Lookahead {
1007 let mut body = self.get_lookahead_inner(n);
1008 if body.is_concat(self) && body.right(self) == NodeId::TS {
1011 body = body.left(self);
1012 }
1013 let (_, mx) = self.get_min_max_length(body);
1014 if mx > best {
1015 best = mx;
1016 }
1017 }
1018 match kind {
1019 Kind::Pred | Kind::Begin | Kind::End => {}
1020 Kind::Star | Kind::Compl => stack.push(n.left(self)),
1021 _ => {
1022 stack.push(n.left(self));
1023 stack.push(n.right(self));
1024 }
1025 }
1026 }
1027 best
1028 }
1029
1030 pub fn get_min_max_length(&self, node_id: NodeId) -> (u32, u32) {
1031 if self.is_infinite(node_id) {
1032 if node_id.is_inter(self) {
1033 self.get_bounded_length(node_id)
1034 } else {
1035 (self.get_min_length_only(node_id), u32::MAX)
1036 }
1037 } else {
1038 self.get_bounded_length(node_id)
1039 }
1040 }
1041
1042 fn get_bounded_length(&self, node_id: NodeId) -> (u32, u32) {
1043 if node_id == NodeId::EPS {
1044 return (0, 0);
1045 }
1046 match self.get_kind(node_id) {
1047 Kind::End | Kind::Begin => (0, 0),
1048 Kind::Pred => (1, 1),
1049 Kind::Concat => {
1050 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
1051 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
1052 (lmin + rmin, lmax.saturating_add(rmax))
1053 }
1054 Kind::Union => {
1055 let (lmin, lmax) = self.get_bounded_length(node_id.left(self));
1056 let (rmin, rmax) = self.get_bounded_length(node_id.right(self));
1057 (lmin.min(rmin), lmax.max(rmax))
1058 }
1059 Kind::Inter => {
1060 let (lmin, lmax) = self.get_min_max_length(node_id.left(self));
1061 let (rmin, rmax) = self.get_min_max_length(node_id.right(self));
1062 (lmin.max(rmin), lmax.min(rmax))
1063 }
1064 Kind::Lookahead => {
1065 let body = node_id.left(self);
1066 if self.is_infinite(body) {
1067 return (0, u32::MAX);
1068 }
1069 let right = node_id.right(self);
1070 if right.is_missing() {
1071 (0, 0)
1072 } else {
1073 self.get_min_max_length(right)
1074 }
1075 }
1076 Kind::Counted => (0, 0),
1077 Kind::Star | Kind::Lookbehind | Kind::Compl => (0, u32::MAX),
1078 }
1079 }
1080
1081 pub fn get_fixed_length(&self, node_id: NodeId) -> Option<u32> {
1082 match self.get_kind(node_id) {
1083 Kind::End | Kind::Begin => Some(0),
1084 Kind::Pred => Some(1),
1085 Kind::Concat => {
1086 let l = self.get_fixed_length(node_id.left(self))?;
1087 let r = self.get_fixed_length(node_id.right(self))?;
1088 Some(l + r)
1089 }
1090 Kind::Union => {
1091 let l = self.get_fixed_length(node_id.left(self))?;
1092 let r = self.get_fixed_length(node_id.right(self))?;
1093 if l == r {
1094 Some(l)
1095 } else {
1096 None
1097 }
1098 }
1099 Kind::Inter => {
1100 let l = self.get_fixed_length(node_id.left(self));
1101 let r = self.get_fixed_length(node_id.right(self));
1102 match (l, r) {
1103 (Some(a), Some(b)) if a == b => Some(a),
1104 (Some(_), Some(_)) => None,
1105 (Some(a), None) | (None, Some(a)) => Some(a),
1106 (None, None) => None,
1107 }
1108 }
1109 Kind::Lookahead => {
1110 let right = node_id.right(self);
1111 if right.is_missing() {
1112 Some(0)
1113 } else {
1114 self.get_fixed_length(right)
1115 }
1116 }
1117 Kind::Counted => Some(0),
1118 Kind::Star | Kind::Lookbehind | Kind::Compl => None,
1119 }
1120 }
1121
1122 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
1123 match self.get_kind(node_id) {
1124 Kind::End | Kind::Begin => 0,
1125 Kind::Pred => 1,
1126 Kind::Concat => {
1127 self.get_min_length_only(node_id.left(self))
1128 + self.get_min_length_only(node_id.right(self))
1129 }
1130 Kind::Union => self
1131 .get_min_length_only(node_id.left(self))
1132 .min(self.get_min_length_only(node_id.right(self))),
1133 Kind::Inter => self
1134 .get_min_length_only(node_id.left(self))
1135 .max(self.get_min_length_only(node_id.right(self))),
1136 Kind::Star | Kind::Lookbehind | Kind::Lookahead | Kind::Counted => 0,
1137 Kind::Compl => {
1138 if self.nullability(node_id.left(self)) == Nullability::NEVER {
1139 0
1140 } else {
1141 1
1142 }
1143 }
1144 }
1145 }
1146
1147 pub fn starts_with_ts(&self, node_id: NodeId) -> bool {
1148 if node_id == NodeId::TS {
1149 return true;
1150 }
1151 match self.get_kind(node_id) {
1152 Kind::Inter => {
1153 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1154 }
1155 Kind::Union => {
1156 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
1157 }
1158 Kind::Concat => self.starts_with_ts(node_id.left(self)),
1159 _ => false,
1160 }
1161 }
1162
1163 #[inline]
1164 pub fn ends_with_ts(&self, node_id: NodeId) -> bool {
1165 if node_id.is_concat(self) {
1166 return self.ends_with_ts(node_id.right(self));
1167 }
1168 if node_id.is_lookahead(self) {
1169 let tail = self.get_lookahead_tail(node_id);
1170 if !tail.is_missing() {
1171 return self.ends_with_ts(tail);
1172 }
1173 }
1174 node_id == NodeId::TS
1175 }
1176
1177 pub fn ends_with_ts_any_branch(&self, node_id: NodeId) -> bool {
1178 if node_id == NodeId::TS {
1179 return true;
1180 }
1181 match self.get_kind(node_id) {
1182 Kind::Concat => self.ends_with_ts_any_branch(node_id.right(self)),
1183 Kind::Union => {
1184 self.ends_with_ts_any_branch(node_id.left(self))
1185 || self.ends_with_ts_any_branch(node_id.right(self))
1186 }
1187 Kind::Lookahead => {
1188 let tail = self.get_lookahead_tail(node_id);
1189 !tail.is_missing() && self.ends_with_ts_any_branch(tail)
1190 }
1191 _ => false,
1192 }
1193 }
1194
1195 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
1196 debug_assert!(node_id != NodeId::MISSING);
1197 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
1198 }
1199
1200 pub(crate) fn cache_der(
1201 &mut self,
1202 node_id: NodeId,
1203 result: TRegexId,
1204 mask: Nullability,
1205 ) -> TRegexId {
1206 if mask == Nullability::CENTER {
1207 self.tr_der_center[node_id.0 as usize] = result
1208 } else {
1209 self.tr_der_begin[node_id.0 as usize] = result
1210 };
1211 result
1212 }
1213
1214 pub(crate) fn try_cached_der(
1215 &mut self,
1216 node_id: NodeId,
1217 mask: Nullability,
1218 ) -> Option<TRegexId> {
1219 let cache = if mask == Nullability::CENTER {
1220 &mut self.tr_der_center
1221 } else {
1222 &mut self.tr_der_begin
1223 };
1224 match cache.get(node_id.0 as usize) {
1225 Some(&TRegexId::MISSING) => {}
1226 Some(&result) => {
1227 return Some(result);
1228 }
1229 None => {
1230 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
1231 }
1232 }
1233 None
1234 }
1235
1236 pub fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
1237 let mut term = self.get_tregex(der);
1238 loop {
1239 match *term {
1240 TRegex::Leaf(node_id) => return node_id,
1241 TRegex::ITE(cond, _then, _else) => {
1242 if self.solver().is_sat_id(set, cond) {
1243 term = self.get_tregex(_then);
1244 } else {
1245 term = self.get_tregex(_else);
1246 }
1247 }
1248 }
1249 }
1250 }
1251
1252 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, ResharpError> {
1253 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
1254 debug_assert!(
1255 node_id != NodeId::MISSING,
1256 "attempting to derive missing node"
1257 );
1258 if let Some(result) = self.try_cached_der(node_id, mask) {
1259 return Ok(result);
1260 }
1261
1262 let result = match node_id.kind(self) {
1263 Kind::Compl => {
1264 let leftd = node_id.left(self).der(self, mask)?;
1265 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
1266 }
1267 Kind::Inter => {
1268 let leftd = node_id.left(self).der(self, mask)?;
1269 let rightd = node_id.right(self).der(self, mask)?;
1270 {
1271 let this = &mut *self;
1272 this.mk_binary(
1273 leftd,
1274 rightd,
1275 &mut (|b, left, right| b.mk_inter(left, right)),
1276 )
1277 }
1278 }
1279 Kind::Union => {
1280 let leftd = self.der(node_id.left(self), mask)?;
1281 let rightd = self.der(node_id.right(self), mask)?;
1282 {
1283 let this = &mut *self;
1284 this.mk_binary(
1285 leftd,
1286 rightd,
1287 &mut (|b, left, right| b.mk_union(left, right)),
1288 )
1289 }
1290 }
1291 Kind::Concat => {
1292 let head = node_id.left(self);
1293 let tail = node_id.right(self);
1294 let tail_leaf = self.mk_leaf(tail);
1295 let head_der = self.der(head, mask)?;
1296
1297 if self.is_nullable(head, mask) {
1298 let rightd = self.der(tail, mask)?;
1299 let ldr = {
1300 let this = &mut *self;
1301 this.mk_binary(
1302 head_der,
1303 tail_leaf,
1304 &mut (|b, left, right| b.mk_concat(left, right)),
1305 )
1306 };
1307 {
1308 let this = &mut *self;
1309 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1310 }
1311 } else {
1312 let this = &mut *self;
1313 this.mk_binary(
1314 head_der,
1315 tail_leaf,
1316 &mut (|b, left, right| b.mk_concat(left, right)),
1317 )
1318 }
1319 }
1320 Kind::Star => {
1321 if node_id == NodeId::EPS {
1322 TRegexId::BOT
1323 } else {
1324 let left = node_id.left(self);
1325 let r_decr_leaf = self.mk_leaf(node_id);
1326 let r_der = self.der(left, mask)?;
1327 let this = &mut *self;
1328 this.mk_binary(
1329 r_der,
1330 r_decr_leaf,
1331 &mut (|b, left, right| b.mk_concat(left, right)),
1332 )
1333 }
1334 }
1335 Kind::Lookbehind => {
1336 let lb_prev_der = {
1337 let lb_prev = self.get_lookbehind_prev(node_id);
1338 if lb_prev == NodeId::MISSING {
1339 TRegexId::MISSING
1340 } else {
1341 self.der(lb_prev, mask)?
1342 }
1343 };
1344 let lb_inner = self.get_lookbehind_inner(node_id);
1345 let lb_inner_der = self.der(lb_inner, mask)?;
1346 {
1347 let this = &mut *self;
1348 this.mk_binary_result(
1349 lb_inner_der,
1350 lb_prev_der,
1351 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1352 )?
1353 }
1354 }
1355 Kind::Lookahead => {
1356 let la_tail = self.get_lookahead_tail(node_id);
1357 let la_body = node_id.left(self);
1358 let rel = self.get_lookahead_rel(node_id);
1359
1360 if self.is_nullable(la_body, mask) {
1361 let right = node_id.right(self).missing_to_eps();
1363 let rder = self.der(right, mask).clone();
1364 return rder;
1365 }
1366
1367 if rel == u32::MAX {
1368 let la_body_der = self.der(la_body, mask)?;
1369 if la_tail.is_kind(self, Kind::Pred) {
1370 let transitioned =
1371 self.transition_term(la_body_der, la_tail.pred_tset(self));
1372 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1373 let concated = self.mk_concat(la_tail, new_la);
1374 return self.der(concated, mask);
1375 }
1376 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1377 let left = la_tail.left(self);
1378 let tset = left.pred_tset(self);
1379 let transitioned = self.transition_term(la_body_der, tset);
1380 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1381 let tail_right = la_tail.right(self);
1382 let concated = self.mk_concat(new_la, tail_right);
1383 let concated = self.mk_concat(left, concated);
1384 return self.der(concated, mask);
1385 }
1386 }
1387
1388 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1389 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1390 let concated = self.mk_concat(la_body, nulls_mask);
1391 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1392 let non_nulled = self.mk_non_nullable_safe(la_tail);
1393 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1394 let new_union = self.mk_union(concated_look, new_look);
1395 return self.der(new_union, mask);
1396 }
1397
1398 let la_tail_der = if la_tail == NodeId::MISSING {
1399 TRegexId::MISSING
1400 } else {
1401 if self.is_nullable(la_tail, mask) {
1402 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1403 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1404 let la_union = self.mk_union(la_tail, nulls_la);
1405 self.der(la_union, mask)?
1406 } else {
1407 self.der(la_tail, mask)?
1408 }
1409 };
1410
1411 let la_body_der = self.der(la_body, mask)?;
1412
1413 if rel != u32::MAX && rel > self.lookahead_context_max {
1414 return Err(ResharpError::AnchorLimit);
1415 }
1416
1417 let la = {
1418 let this = &mut *self;
1419 let rel = helpers::incr_rel(rel);
1420 this.mk_binary(
1421 la_body_der,
1422 la_tail_der,
1423 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1424 )
1425 };
1426
1427 if rel != u32::MAX
1428 && la_tail_der != TRegexId::MISSING
1429 && self.is_nullable(la_tail, mask)
1430 {
1431 let look_only = {
1432 let this = &mut *self;
1433 let right = TRegexId::MISSING;
1434 let rel = helpers::incr_rel(rel);
1435 this.mk_binary(
1436 la_body_der,
1437 right,
1438 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1439 )
1440 };
1441 {
1442 let this = &mut *self;
1443 this.mk_binary(
1444 look_only,
1445 la,
1446 &mut (|b, left, right| b.mk_union(left, right)),
1447 )
1448 }
1449 } else {
1450 la
1451 }
1452 }
1453 Kind::Counted => {
1454 let body = node_id.left(self);
1455 let chain = node_id.right(self);
1456 let packed = self.get_extra(node_id);
1457 let step = (packed & 0xFFFF) as u16;
1458 let best = (packed >> 16) as u16;
1459
1460 let mid_best = if self.is_nullable(body, mask) && step >= best {
1461 step
1462 } else {
1463 best
1464 };
1465
1466 let body_der = self.der(body, mask)?;
1467 let new_step = step.saturating_add(1);
1468 self.mk_unary(body_der, &mut |b, new_body| {
1469 let final_best = if b.is_nullable(new_body, mask) && new_step >= mid_best {
1470 new_step
1471 } else {
1472 mid_best
1473 };
1474 let packed = (final_best as u32) << 16 | new_step as u32;
1475 b.mk_counted(new_body, chain, packed)
1476 })
1477 }
1478 Kind::Begin | Kind::End => TRegexId::BOT,
1479 Kind::Pred => {
1480 let psi = node_id.pred_tset(self);
1481 if psi == TSetId::EMPTY {
1482 TRegexId::BOT
1483 } else {
1484 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1485 }
1486 }
1487 };
1488
1489 self.cache_der(node_id, result, mask);
1493 Ok(result)
1494 }
1495
1496 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1497 debug_assert!(meta_id != MetadataId::MISSING);
1498 match self.metadata.get_mut(node_id.0 as usize) {
1499 Some(v) => *v = meta_id,
1500 None => {
1501 self.metadata
1502 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1503 self.metadata[node_id.0 as usize] = meta_id;
1504 }
1505 }
1506 }
1507
1508 fn cache_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) -> NodeId {
1509 debug_assert!(reversed_id != NodeId::MISSING);
1510 match self.reversed.get_mut(node_id.0 as usize) {
1511 Some(v) => *v = reversed_id,
1512 None => {
1513 self.reversed
1514 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1515 self.reversed[node_id.0 as usize] = reversed_id;
1516 }
1517 }
1518 return reversed_id;
1519 }
1520
1521 fn init(&mut self, inst: NodeKey) -> NodeId {
1522 self.num_created += 1;
1523 let node_id = NodeId(self.num_created);
1524 self.index.insert(inst.clone(), node_id);
1525 match inst.kind {
1526 Kind::Pred => {
1527 let meta_id = self.mb.get_meta_id(Metadata {
1528 flags: MetaFlags::ZERO,
1529 nulls: NullsId::EMPTY,
1530 });
1531 self.init_metadata(node_id, meta_id);
1532 }
1533 Kind::Begin => {
1534 let meta_id = self.mb.get_meta_id(Metadata {
1535 flags: MetaFlags::with_nullability(
1536 Nullability::BEGIN,
1537 MetaFlags::CONTAINS_ANCHORS,
1538 ),
1539 nulls: NullsId::BEGIN0,
1540 });
1541 self.init_metadata(node_id, meta_id);
1542 }
1543 Kind::End => {
1544 let meta_id = self.mb.get_meta_id(Metadata {
1545 flags: MetaFlags::with_nullability(
1546 Nullability::END,
1547 MetaFlags::CONTAINS_ANCHORS,
1548 ),
1549 nulls: NullsId::END0,
1550 });
1551 self.init_metadata(node_id, meta_id);
1552 }
1553 Kind::Inter => {
1554 let m1 = self.get_node_meta_id(inst.left);
1555 let m2 = self.get_node_meta_id(inst.right);
1556 let meta_id = {
1557 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1558 let mask_l = inst.left.nullability(self);
1559 let mask_r = inst.right.nullability(self);
1560 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1561 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1562 nulls = self.mb.nb.and_mask(nulls, mask_l);
1563 nulls = self.mb.nb.and_mask(nulls, mask_r);
1564 let new_meta = Metadata {
1565 flags: self.mb.flags_inter(m1, m2),
1566 nulls,
1567 };
1568 self.mb.get_meta_id(new_meta)
1569 };
1570 self.init_metadata(node_id, meta_id);
1571 }
1572 Kind::Union => {
1573 let m1 = self.get_node_meta_id(inst.left);
1574 let m2 = self.get_node_meta_id(inst.right);
1575 let meta_id = {
1576 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1577 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1578 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1579 let new_meta = Metadata {
1580 flags: self.mb.flags_union(m1, m2),
1581 nulls,
1582 };
1583 self.mb.get_meta_id(new_meta)
1584 };
1585 self.init_metadata(node_id, meta_id);
1586 }
1587 Kind::Concat => {
1588 let flags = self.mb.flags_concat(
1589 self.get_node_meta_id(inst.left),
1590 self.get_node_meta_id(inst.right),
1591 );
1592
1593 let right_nullability = inst.right.nullability(self);
1594 let left_nullability = inst.left.nullability(self);
1595 let nulls_left = self.get_nulls_id(inst.left);
1596 let nulls_right = self.get_nulls_id(inst.right);
1597 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1598 let mask = right_nullability.and(left_nullability);
1599 nulls = self.mb.nb.and_mask(nulls, mask);
1600
1601 let new_id = self.mb.get_meta_id(Metadata { flags, nulls });
1602 self.init_metadata(node_id, new_id);
1603 }
1604 Kind::Star => {
1605 let left_nulls = self.get_nulls_id(inst.left);
1606 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1607 let meta_id = self.mb.get_meta_id(Metadata {
1608 flags: self
1609 .mb
1610 .flags_star(self.get_node_meta_id(inst.left), inst.left),
1611 nulls,
1612 });
1613 self.init_metadata(node_id, meta_id);
1614 }
1615 Kind::Compl => {
1616 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1617 let meta_id = self.mb.get_meta_id(Metadata {
1618 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1619 nulls,
1620 });
1621 self.init_metadata(node_id, meta_id);
1622 }
1623 Kind::Lookbehind => {
1624 let mut left_nullability = self.nullability(inst.left);
1625 let mut contains_flags = self.get_flags_contains(inst.left);
1626 let nulls = if inst.right.is_missing() {
1627 self.get_nulls_id(inst.left)
1628 } else {
1629 let right_nullability = self.nullability(inst.right);
1630 let nulls_left = self.get_nulls_id_w_mask(inst.right, right_nullability);
1631 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1632 left_nullability = left_nullability.and(right_nullability);
1633 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1634 self.mb.nb.and_id(nulls_left, nulls_right)
1635 };
1636
1637 let meta_id = self.mb.get_meta_id(Metadata {
1638 flags: MetaFlags::with_nullability(
1639 left_nullability,
1640 contains_flags.or(MetaFlags::CONTAINS_LOOKBEHIND),
1641 ),
1642 nulls,
1643 });
1644 self.init_metadata(node_id, meta_id);
1645 }
1646 Kind::Lookahead => {
1647 let mut nulls = self.get_nulls_id(inst.left);
1648 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1650 let left_nullability = inst.left.nullability(self);
1651 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1652
1653 nulls = self.mb.nb.or_id(nulls, nulls_right);
1654
1655 let la_inner = inst.left;
1656 let la_tail = inst.right;
1657 let null = self
1658 .get_meta_flags(la_inner)
1659 .nullability()
1660 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1661 let contains_flags = self
1662 .get_flags_contains(la_inner)
1663 .or(self.get_flags_contains(la_tail));
1664
1665 let meta_id = self.mb.get_meta_id(Metadata {
1666 flags: MetaFlags::with_nullability(
1667 null,
1668 contains_flags.or(MetaFlags::CONTAINS_LOOKAHEAD),
1669 ),
1670 nulls,
1671 });
1672 self.init_metadata(node_id, meta_id);
1673 }
1674 Kind::Counted => {
1675 let best = inst.extra >> 16;
1676 let (null, nulls) = if best > 0 {
1677 let mut ns = BTreeSet::new();
1678 ns.insert(NullState::new(Nullability::CENTER, best));
1679 (Nullability::CENTER, self.mb.nb.get_id(ns))
1680 } else {
1681 (Nullability::NEVER, NullsId::EMPTY)
1682 };
1683 let meta_id = self.mb.get_meta_id(Metadata {
1684 flags: MetaFlags::with_nullability(null, MetaFlags::ZERO),
1685 nulls,
1686 });
1687 self.init_metadata(node_id, meta_id);
1688 }
1689 }
1690
1691 self.array.push(inst);
1692
1693 if let Some(rw) = self.post_init_simplify(node_id) {
1694 self.override_as(node_id, rw)
1695 } else {
1696 node_id
1697 }
1698 }
1699
1700 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1701 match self.get_kind(node_id) {
1702 Kind::Concat => {
1703 let lhs = node_id.left(self);
1704 let rhs = node_id.right(self);
1705 if lhs.is_pred_star(self).is_some() {
1706 if let Some(opttail) = rhs.is_opt_v(self) {
1707 if let Some(true) = self.subsumes(lhs, opttail) {
1708 return Some(lhs);
1709 }
1710 }
1711 }
1712 }
1713 Kind::Union => {
1714 let lhs = node_id.left(self);
1715 let rhs = node_id.right(self);
1716 let mut subsumed = false;
1717 rhs.iter_union_while(self, &mut |b, branch| {
1718 if b.nullable_subsumes(branch, lhs) {
1719 subsumed = true;
1720 }
1721 !subsumed
1722 });
1723 if subsumed {
1724 return Some(rhs);
1725 }
1726 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1727 return Some(rhs);
1728 }
1729 }
1730 _ => {}
1731 }
1732
1733 None
1734 }
1735
1736 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1738 if self.get_kind(lhs) != Kind::Union {
1739 return false; }
1741 let mut rhs_branches = Vec::new();
1742 let mut curr = rhs;
1743 while self.get_kind(curr) == Kind::Union {
1744 rhs_branches.push(self.get_left(curr));
1745 curr = self.get_right(curr);
1746 }
1747 rhs_branches.push(curr);
1748
1749 curr = lhs;
1750 while self.get_kind(curr) == Kind::Union {
1751 if !rhs_branches.contains(&self.get_left(curr)) {
1752 return false;
1753 }
1754 curr = self.get_right(curr);
1755 }
1756 rhs_branches.contains(&curr)
1757 }
1758
1759 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1761 if node == target {
1762 return true;
1763 }
1764 match self.get_kind(node) {
1765 Kind::Union => {
1766 self.nullable_subsumes(self.get_left(node), target)
1767 || self.nullable_subsumes(self.get_right(node), target)
1768 }
1769 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1770 self.nullable_subsumes(self.get_right(node), target)
1771 }
1772 _ => false,
1773 }
1774 }
1775
1776 pub fn num_nodes(&self) -> u32 {
1777 self.num_created
1778 }
1779
1780 pub fn tree_size(&self, root: NodeId, limit: usize) -> usize {
1781 let mut seen: FxHashMap<NodeId, ()> = FxHashMap::default();
1782 let mut stack: Vec<NodeId> = vec![root];
1783 while let Some(n) = stack.pop() {
1784 if n == NodeId::MISSING
1785 || n == NodeId::BOT
1786 || n == NodeId::EPS
1787 || n == NodeId::TS
1788 || n == NodeId::BEGIN
1789 || n == NodeId::END
1790 {
1791 continue;
1792 }
1793 if seen.insert(n, ()).is_some() {
1794 continue;
1795 }
1796 if seen.len() >= limit {
1797 return limit;
1798 }
1799 stack.push(self.get_left(n));
1800 stack.push(self.get_right(n));
1801 }
1802 seen.len()
1803 }
1804 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1805 match self.index.get(&inst) {
1806 Some(&id) => id,
1807 None => self.init(inst),
1808 }
1809 }
1810 #[inline]
1811 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1812 self.index.get(inst)
1813 }
1814
1815 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1816 self.index.insert(key, subsumed);
1817 subsumed
1818 }
1819
1820 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1821 let key = &self.array[key.0 as usize];
1822 let entry = self.index.get_mut(key).unwrap();
1823 *entry = subsumed;
1824 subsumed
1825 }
1826
1827 #[inline]
1828 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1829 self.array[node_id.0 as usize].left
1830 }
1831
1832 #[inline]
1833 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1834 self.array[node_id.0 as usize].right
1835 }
1836
1837 #[inline]
1838 pub fn get_extra(&self, node_id: NodeId) -> u32 {
1839 self.array[node_id.0 as usize].extra
1840 }
1841
1842 #[inline]
1843 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1844 debug_assert!(node_id.is_concat(self));
1845 let mut curr = node_id;
1846 while curr.is_concat(self) {
1847 curr = curr.right(self);
1848 }
1849 curr
1850 }
1851
1852 fn has_trailing_la(&self, node: NodeId) -> bool {
1853 let end = match self.get_kind(node) {
1854 Kind::Concat => self.get_concat_end(node),
1855 Kind::Lookahead => node,
1856 _ => return false,
1857 };
1858 end.is_lookahead(self) && end.right(self).is_missing()
1859 }
1860
1861 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1862 if node.is_lookahead(self) {
1863 return (NodeId::EPS, node);
1864 }
1865 debug_assert!(node.is_concat(self));
1866 let right = node.right(self);
1867 if !right.is_concat(self) {
1868 return (node.left(self), right);
1869 }
1870 let (stripped, la) = self.strip_trailing_la(right);
1871 (self.mk_concat(node.left(self), stripped), la)
1872 }
1873 #[inline]
1874 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1875 debug_assert!(matches!(
1876 self.get_kind(lookahead_node_id),
1877 Kind::Lookahead | Kind::Counted
1878 ));
1879 lookahead_node_id.left(self)
1880 }
1881 #[inline]
1882 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1883 debug_assert!(lookahead_node_id.is_lookahead(self));
1884 lookahead_node_id.right(self)
1885 }
1886 #[inline]
1887 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1888 debug_assert!(
1889 matches!(
1890 self.get_kind(lookahead_node_id),
1891 Kind::Lookahead | Kind::Counted
1892 ),
1893 "not lookahead/counted: {:?}",
1894 self.pp(lookahead_node_id)
1895 );
1896 self.get_extra(lookahead_node_id)
1897 }
1898 #[inline]
1899 pub fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1900 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1901 lookbehind_node_id.left(self)
1902 }
1903 #[inline]
1904 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1905 debug_assert!(lookbehind_node_id.is_lookbehind(self));
1906 lookbehind_node_id.right(self)
1907 }
1908
1909 #[inline]
1910 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1911 debug_assert!(
1912 self.array.len() > node_id.0 as usize,
1913 "array len: {:?}",
1914 node_id
1915 );
1916 self.array[node_id.0 as usize].kind
1917 }
1918
1919 #[inline]
1920 pub fn get_node(&self, node_id: NodeId) -> &NodeKey {
1921 &self.array[node_id.0 as usize]
1922 }
1923
1924 #[inline]
1925 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1926 self.metadata[node_id.0 as usize]
1927 }
1928
1929 #[inline]
1930 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1931 debug_assert!(node_id.0 != u32::MAX);
1932 let meta_id = self.get_node_meta_id(node_id);
1933 debug_assert!(meta_id != MetadataId::MISSING);
1934 &self.mb.array[meta_id.0 as usize]
1935 }
1936
1937 #[inline]
1938 pub fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1939 if node_id == NodeId::MISSING {
1940 NullsId::EMPTY
1941 } else {
1942 self.get_meta(node_id).nulls
1943 }
1944 }
1945
1946 pub fn nulls_as_vecs(&self) -> Vec<Vec<NullState>> {
1947 self.mb
1948 .nb
1949 .array
1950 .iter()
1951 .map(|set| set.iter().cloned().collect())
1952 .collect()
1953 }
1954
1955 pub fn nulls_count(&self) -> usize {
1956 self.mb.nb.array.len()
1957 }
1958
1959 pub fn nulls_entry_vec(&self, id: u32) -> Vec<NullState> {
1960 self.mb.nb.array[id as usize].iter().cloned().collect()
1961 }
1962
1963 pub fn center_nulls_id(&mut self, nid: NullsId) -> NullsId {
1964 self.mb.nb.and_mask(nid, Nullability::CENTER)
1965 }
1966
1967 #[inline]
1968 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1969 if node_id == NodeId::MISSING {
1970 NullsId::EMPTY
1971 } else {
1972 let nulls = self.get_meta(node_id).nulls;
1973 self.mb.nb.and_mask(nulls, mask)
1974 }
1975 }
1976
1977 #[inline]
1978 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1979 let meta_id = self.get_node_meta_id(node_id);
1980 let meta = &self.mb.array[meta_id.0 as usize];
1981 meta.flags
1982 }
1983
1984 #[inline]
1985 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1986 self.get_meta(node_id).flags.nullability()
1987 }
1988
1989 #[inline]
1990 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1991 let meta_id = self.get_node_meta_id(node_id);
1992 let meta = &self.mb.array[meta_id.0 as usize];
1993 meta.flags.all_contains_flags()
1994 }
1995
1996 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
1997 if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
1998 return self.strip_lb(node_id.right(self));
1999 }
2000 self.strip_lb_inner(node_id)
2001 }
2002
2003 fn strip_lb_inner(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2004 if !self.contains_look(node_id) {
2005 return Ok(node_id);
2006 }
2007 if node_id.is_concat(self) && node_id.left(self).is_lookbehind(self) {
2008 let lb = node_id.left(self);
2009 let prev = self.get_lookbehind_prev(lb);
2010 let tail = self.strip_lb_inner(node_id.right(self))?;
2011 if prev != NodeId::MISSING {
2012 let stripped_prev = self.strip_lb_inner(prev)?;
2013 return Ok(self.mk_concat(stripped_prev, tail));
2014 }
2015 return Ok(tail);
2016 }
2017 if node_id.is_inter(self) {
2018 let left = self.strip_lb_inner(node_id.left(self))?;
2019 let right = self.strip_lb_inner(node_id.right(self))?;
2020 return Ok(self.mk_inter(left, right));
2021 }
2022 if self.get_kind(node_id) == Kind::Union {
2023 let left = self.strip_lb_inner(node_id.left(self))?;
2024 let right = self.strip_lb_inner(node_id.right(self))?;
2025 return Ok(self.mk_union(left, right));
2026 }
2027 match self.get_kind(node_id) {
2028 Kind::Lookbehind => {
2029 let prev = self.get_lookbehind_prev(node_id);
2030 if prev != NodeId::MISSING {
2031 self.strip_lb_inner(prev)
2032 } else {
2033 Ok(NodeId::EPS)
2034 }
2035 }
2036 Kind::Lookahead if self.get_lookahead_tail(node_id).is_missing() => {
2037 Err(ResharpError::UnsupportedPattern)
2038 }
2039 _ => Ok(node_id),
2040 }
2041 }
2042
2043 pub fn nonbegins(&mut self, node_id: NodeId) -> NodeId {
2045 if !self.contains_anchors(node_id) {
2046 return node_id;
2047 }
2048 match self.get_kind(node_id) {
2049 Kind::Begin => NodeId::BOT,
2050 Kind::Concat => {
2051 let left = self.nonbegins(node_id.left(self));
2052 if left == NodeId::BOT {
2053 return NodeId::BOT;
2054 }
2055 self.mk_concat(left, node_id.right(self))
2056 }
2057 Kind::Union => {
2058 let left = self.nonbegins(node_id.left(self));
2059 let right = self.nonbegins(node_id.right(self));
2060 self.mk_union(left, right)
2061 }
2062 _ => node_id,
2063 }
2064 }
2065
2066 pub fn strip_prefix_safe(&mut self, node_id: NodeId) -> NodeId {
2067 match self.get_kind(node_id) {
2068 Kind::Concat => {
2069 let head = node_id.left(self);
2070 match self.get_kind(head) {
2071 _ if self.any_nonbegin_nullable(head) => {
2072 self.strip_prefix_safe(node_id.right(self))
2073 }
2074 _ => node_id,
2075 }
2076 }
2077 _ => node_id,
2078 }
2079 }
2080 pub fn prune_begin(&mut self, node_id: NodeId) -> NodeId {
2081 match self.get_kind(node_id) {
2082 Kind::Begin => NodeId::BOT,
2083 Kind::Concat => {
2084 let head = self.prune_begin(node_id.left(self));
2085 let tail = self.prune_begin(node_id.right(self));
2086 self.mk_concat(head, tail)
2087 }
2088 Kind::Lookbehind => {
2089 if !node_id.right(self).is_missing() {
2090 return node_id;
2091 }
2092 let head = self.prune_begin(node_id.left(self));
2093 head
2094 }
2095 Kind::Union => {
2096 let left = self.prune_begin(node_id.left(self));
2097 let right = self.prune_begin(node_id.right(self));
2098 self.mk_union(left, right)
2099 }
2100 _ => node_id,
2101 }
2102 }
2103 pub fn prune_begin_eps(&mut self, node_id: NodeId) -> NodeId {
2104 match self.get_kind(node_id) {
2105 Kind::Begin => NodeId::EPS,
2106 Kind::Concat => {
2107 let head = self.prune_begin_eps(node_id.left(self));
2108 let tail = self.prune_begin_eps(node_id.right(self));
2109 self.mk_concat(head, tail)
2110 }
2111 Kind::Lookbehind => {
2112 if !node_id.right(self).is_missing() {
2113 return node_id;
2114 }
2115 let head = self.prune_begin_eps(node_id.left(self));
2116 head
2117 }
2118 Kind::Union => {
2119 let left = self.prune_begin_eps(node_id.left(self));
2120 let right = self.prune_begin_eps(node_id.right(self));
2121 self.mk_union(left, right)
2122 }
2123 _ => node_id,
2124 }
2125 }
2126
2127 pub fn normalize_rev(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2128 if !self.contains_look(node_id) && !self.contains_anchors(node_id) {
2129 return Ok(node_id);
2130 }
2131 let result = match self.get_kind(node_id) {
2132 Kind::Concat => {
2133 let left = self.normalize_rev(node_id.left(self))?;
2134 let right = self.normalize_rev(node_id.right(self))?;
2135 self.mk_concat(left, right)
2136 }
2137 Kind::Inter => {
2138 let left = self.normalize_rev(node_id.left(self))?;
2139 let right = self.normalize_rev(node_id.right(self))?;
2140 self.mk_inter(left, right)
2141 }
2142 Kind::Union => {
2143 let left = self.normalize_rev(node_id.left(self))?;
2144 let right = self.normalize_rev(node_id.right(self))?;
2145 self.mk_union(left, right)
2146 }
2147 Kind::Lookbehind => {
2148 let left = self.normalize_rev(node_id.left(self))?;
2149 let right = self.normalize_rev(node_id.right(self).missing_to_eps())?;
2150 let lbody_ts = self.mk_concat(NodeId::TS, left);
2151 let ltail_ts = self.mk_concat(NodeId::TS, right);
2152 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
2153 as_inter
2154 }
2155 Kind::Lookahead if !self.get_lookahead_tail(node_id).is_missing() => {
2156 return Err(ResharpError::UnsupportedPattern)
2157 }
2158 _ => node_id,
2159 };
2160 Ok(result)
2161 }
2162
2163 pub fn collect_der_targets(
2164 &mut self,
2165 der: TRegexId,
2166 path_set: TSetId,
2167 out: &mut Vec<(NodeId, TSetId)>,
2168 ) {
2169 match *self.get_tregex(der) {
2170 TRegex::Leaf(target) => {
2171 if let Some(entry) = out.iter_mut().find(|(t, _)| *t == target) {
2172 entry.1 = self.solver().or_id(entry.1, path_set);
2173 } else {
2174 out.push((target, path_set));
2175 }
2176 }
2177 TRegex::ITE(cond, then_b, else_b) => {
2178 let then_path = self.solver().and_id(path_set, cond);
2179 self.collect_der_targets(then_b, then_path, out);
2180 let not_cond = self.solver().not_id(cond);
2181 let else_path = self.solver().and_id(path_set, not_cond);
2182 self.collect_der_targets(else_b, else_path, out);
2183 }
2184 }
2185 }
2186
2187 pub fn prefix_ts(&mut self, node_id: NodeId) -> NodeId {
2188 let with_ts = if node_id.is_concat(self) && node_id.left(self) == NodeId::BEGIN {
2189 node_id
2190 } else {
2191 self.mk_concat(NodeId::TS, node_id)
2192 };
2193 with_ts
2194 }
2195
2196 pub fn ts_rev_start(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2197 let rev = self.reverse(node_id)?;
2198 let rev = self.normalize_rev(rev)?;
2199 let with_ts = self.prefix_ts(rev);
2200 Ok(self.simplify_rev_initial(with_ts))
2201 }
2202
2203 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, ResharpError> {
2204 debug_assert!(node_id != NodeId::MISSING);
2205 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
2206 if *rev != NodeId::MISSING {
2207 return Ok(*rev);
2208 }
2209 }
2210 let rw = match self.get_kind(node_id) {
2211 Kind::End => NodeId::BEGIN,
2212 Kind::Begin => NodeId::END,
2213 Kind::Pred => node_id,
2214 Kind::Concat => {
2215 let left = self.reverse(node_id.left(self))?;
2216 let right = self.reverse(node_id.right(self))?;
2217 self.mk_concat(right, left)
2218 }
2219 Kind::Union => {
2220 let left = self.reverse(node_id.left(self))?;
2221 let right = self.reverse(node_id.right(self))?;
2222 self.mk_union(left, right)
2223 }
2224 Kind::Inter => {
2225 let left = self.reverse(node_id.left(self))?;
2226 let right = self.reverse(node_id.right(self))?;
2227 self.mk_inter(left, right)
2228 }
2229 Kind::Star => {
2230 let body = self.reverse(node_id.left(self))?;
2231 self.mk_star(body)
2232 }
2233 Kind::Compl => {
2234 if self.contains_look(node_id.left(self)) {
2235 return Err(ResharpError::UnsupportedPattern);
2236 }
2237 let body = self.reverse(node_id.left(self))?;
2238 self.mk_compl(body)
2239 }
2240 Kind::Lookbehind => {
2241 let prev = self.get_lookbehind_prev(node_id);
2242 let inner_id = self.get_lookbehind_inner(node_id);
2243 let rev_inner = self.reverse(inner_id)?;
2244 let rev_prev = if prev != NodeId::MISSING {
2245 self.reverse(prev)?
2246 } else {
2247 NodeId::MISSING
2248 };
2249 self.mk_lookahead(rev_inner, rev_prev, 0)
2250 }
2251 Kind::Lookahead => {
2252 let rel = self.get_lookahead_rel(node_id);
2253 if rel == u32::MAX {
2255 let lbody = self.get_lookahead_inner(node_id);
2256 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
2257 let rbody = self.reverse(lbody);
2258 let rtail = self.reverse(ltail);
2259 let rev = self.mk_lookbehind(rbody?, rtail?);
2260 return Ok(self.cache_reversed(node_id, rev));
2261 }
2262 if rel != 0 {
2263 return Err(ResharpError::UnsupportedPattern);
2264 }
2265 let tail_node = self.get_lookahead_tail(node_id);
2266 let rev_tail = if tail_node != NodeId::MISSING {
2267 self.reverse(tail_node)?
2268 } else {
2269 NodeId::MISSING
2270 };
2271 let inner_id = self.get_lookahead_inner(node_id);
2272 let rev_inner = self.reverse(inner_id)?;
2273 self.mk_lookbehind(rev_inner, rev_tail)
2274 }
2275 Kind::Counted => {
2276 return Err(ResharpError::UnsupportedPattern);
2277 }
2278 };
2279 self.cache_reversed(node_id, rw);
2280 Ok(rw)
2281 }
2282
2283 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
2284 let node = NodeKey {
2285 kind: Kind::Pred,
2286 left: NodeId::MISSING,
2287 right: NodeId::MISSING,
2288 extra: pred.0,
2289 };
2290 self.get_node_id(node)
2291 }
2292
2293 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
2294 let key = NodeKey {
2295 kind: Kind::Compl,
2296 left: body,
2297 right: NodeId::MISSING,
2298 extra: u32::MAX,
2299 };
2300 if let Some(id) = self.key_is_created(&key) {
2301 return *id;
2302 }
2303 if body == NodeId::BOT {
2304 return NodeId::TS;
2305 }
2306 if body == NodeId::TS {
2307 return NodeId::BOT;
2308 }
2309
2310 if let Some(contains_body) = body.is_contains(self) {
2311 if contains_body.is_pred(self) {
2312 let pred = contains_body.pred_tset(self);
2313 let notpred = self.mk_pred_not(pred);
2314 let node = self.mk_star(notpred);
2315 return self.init_as(key, node);
2316 } else if contains_body == NodeId::END {
2317 return self.init_as(key, NodeId::BOT);
2318 }
2319 };
2320
2321 if self.get_kind(body) == Kind::Compl {
2322 return self.get_node(body).left;
2323 }
2324
2325 self.get_node_id(key)
2326 }
2327
2328 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
2329 let nid = self.get_nulls_id(body);
2330 let nref = self.mb.nb.get_set_ref(nid).clone();
2331 let mut futures = NodeId::BOT;
2332 for n in nref.iter() {
2333 if !n.is_mask_nullable(mask) {
2334 continue;
2335 }
2336
2337 let eff = if n.rel == 0 {
2338 NodeId::EPS
2339 } else {
2340 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
2341 };
2342 futures = self.mk_union(futures, eff)
2343 }
2344 futures
2345 }
2346
2347 fn strip_ts_max_len(&self, node: NodeId) -> Option<u32> {
2349 let mut cur = node;
2350 let mut total: u32 = 0;
2351 loop {
2352 if !cur.is_concat(self) {
2353 return None;
2354 }
2355 let r = cur.right(self);
2356 let (_, lmax) = self.get_min_max_length(cur.left(self));
2357 if lmax == u32::MAX {
2358 return None;
2359 }
2360 total = total.saturating_add(lmax);
2361 if r == NodeId::TS {
2362 return Some(total);
2363 }
2364 cur = r;
2365 }
2366 }
2367
2368 fn peel_head_pred(&self, node: NodeId) -> Option<(TSetId, NodeId)> {
2369 if node.is_pred(self) {
2370 Some((node.pred_tset(self), NodeId::EPS))
2371 } else if node.is_concat(self) && node.left(self).is_pred(self) {
2372 Some((node.left(self).pred_tset(self), node.right(self)))
2373 } else {
2374 None
2375 }
2376 }
2377
2378 fn strip_end_from_la_head(&mut self, node: NodeId) -> NodeId {
2380 let (head, rest) = if node.is_concat(self) {
2381 (node.left(self), node.right(self))
2382 } else {
2383 (node, NodeId::EPS)
2384 };
2385 if !head.is_kind(self, Kind::Union) {
2386 return node;
2387 }
2388 let l = head.left(self);
2389 if !l.is_end() {
2390 return node;
2391 }
2392 let r = head.right(self);
2393 self.mk_concat(r, rest)
2394 }
2395
2396 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
2397 if cfg!(feature = "norewrite") {
2398 return None;
2399 }
2400
2401 if tail.is_lookbehind(self) {
2414 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
2415 return self
2416 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
2417 .ok();
2418 }
2419 if head.is_lookahead(self) {
2420 let la_tail = self.get_lookahead_tail(head);
2421 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
2422 let la_body = self.get_lookahead_inner(head);
2423 let la_body = if new_la_tail.is_never_nullable(self) {
2425 self.strip_end_from_la_head(la_body)
2426 } else {
2427 la_body
2428 };
2429 if la_body == NodeId::BOT {
2430 return Some(NodeId::BOT);
2431 }
2432 if let (Some((p_l, body_rest)), Some((p_b, tail_rest))) = (
2433 self.peel_head_pred(la_body),
2434 self.peel_head_pred(new_la_tail),
2435 ) {
2436 let p = self.solver().and_id(p_l, p_b);
2437 let merged = self.mk_pred(p);
2438 if merged == NodeId::BOT {
2439 return Some(NodeId::BOT);
2440 }
2441 let new_la = if body_rest == NodeId::EPS {
2442 NodeId::EPS
2443 } else {
2444 self.mk_lookahead(body_rest, NodeId::MISSING, 0)
2445 };
2446 let after = self.mk_concat(new_la, tail_rest);
2447 return Some(self.mk_concat(merged, after));
2448 }
2449
2450 if la_body.is_concat(self)
2451 && la_body.right(self).is_end()
2452 && la_body.left(self).is_compl(self)
2453 {
2454 let not_p_ts = la_body.left(self); if let Some(p_max) = self.strip_ts_max_len(not_p_ts.left(self)) {
2456 let (tail_min, _) = self.get_min_max_length(new_la_tail);
2457 if p_max <= tail_min {
2458 return Some(self.mk_inter(not_p_ts, new_la_tail));
2459 }
2460 }
2461 }
2462
2463 if new_la_tail.is_center_nullable(self) {
2464 let la_new = self.mk_lookahead_internal(la_body, new_la_tail, u32::MAX);
2465 return Some(la_new);
2466 }
2467 let la_rel = self.get_lookahead_rel(head);
2468 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
2469 let tail_rel = self.get_lookahead_rel(new_la_tail);
2470 tail_rel + la_rel
2471 } else {
2472 u32::MAX
2473 };
2474
2475 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
2476 }
2477
2478 if head.is_kind(self, Kind::End) && tail == NodeId::TS {
2479 return Some(head);
2480 }
2481
2482 if head == NodeId::TS && self.nullability(tail) == Nullability::ALWAYS {
2483 return Some(NodeId::TS);
2484 }
2485
2486 if tail == NodeId::TS && self.nullability(head) == Nullability::ALWAYS {
2487 return Some(NodeId::TS);
2488 }
2489
2490 if head.is_inter(self) && tail == NodeId::TS {
2491 let mut alltopstar = true;
2492 head.iter_inter(
2493 self,
2494 &mut (|b, v| {
2495 alltopstar = b.ends_with_ts(v);
2496 }),
2497 );
2498
2499 if alltopstar {
2500 return Some(head);
2501 }
2502 }
2503
2504 if head.is_star(self) && head == tail {
2505 return Some(head);
2506 }
2507
2508 None
2509 }
2510
2511 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2512 use Kind::*;
2513
2514 if cfg!(feature = "norewrite") {
2515 return None;
2516 }
2517 if left == right {
2518 return Some(left);
2519 }
2520
2521 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2522 return Some(left);
2523 }
2524 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2525 return Some(right);
2526 }
2527
2528 if left.is_inter(self) && right.is_inter(self) {
2529 let mut lconj: Vec<NodeId> = Vec::new();
2530 left.any_inter_component(self, |v| {
2531 lconj.push(v);
2532 false
2533 });
2534 let lconj_initial = lconj.len();
2535 let mut common = NodeId::TS;
2536 let mut r_rest = NodeId::TS;
2537 let mut cur = right;
2538 loop {
2539 let (v, next) = if cur.kind(self) == Inter {
2540 (cur.left(self), Some(cur.right(self)))
2541 } else {
2542 (cur, None)
2543 };
2544 if let Some(pos) = lconj.iter().position(|&x| x == v) {
2545 lconj.swap_remove(pos);
2546 common = self.mk_inter(v, common);
2547 } else {
2548 r_rest = self.mk_inter(v, r_rest);
2549 }
2550 match next {
2551 Some(n) => cur = n,
2552 None => break,
2553 }
2554 }
2555 if lconj.len() < lconj_initial {
2556 let l_rest = lconj
2557 .iter()
2558 .fold(NodeId::TS, |acc, &v| self.mk_inter(v, acc));
2559 let inner_union = self.mk_union(l_rest, r_rest);
2560 return Some(self.mk_inter(common, inner_union));
2561 }
2562 }
2563
2564 if left.is_pred(self) && right.is_pred(self) {
2565 let l = left.pred_tset(self);
2566 let r = right.pred_tset(self);
2567 let solver = self.solver();
2568 let psi = solver.or_id(l, r);
2569 let rewrite = self.mk_pred(psi);
2570 return Some(rewrite);
2571 }
2572
2573 if left == NodeId::EPS
2574 && self.get_extra(left) == 0
2575 && self.nullability(right) == Nullability::ALWAYS
2576 {
2577 return Some(right);
2578 }
2579
2580 if left.is_lookahead(self) && right.is_lookahead(self) {
2581 let lb = left.left(self);
2582 let lt = left.right(self);
2583 let lrel = left.extra(self);
2584
2585 let rb = right.left(self);
2586 let rt = right.right(self);
2587 let rrel = right.extra(self);
2588
2589 if lrel == rrel && lt.is_missing() && rt.is_missing() {
2590 let unioned = self.mk_union(lb, rb);
2591 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
2592 return Some(node);
2593 }
2594 }
2595
2596 if right.is_kind(self, Concat) {
2597 if left == NodeId::END
2598 && right.left(self) == NodeId::END
2599 && self.nullability(right).has(Nullability::END)
2600 {
2601 return Some(right);
2602 }
2603 if left == right.left(self) {
2605 let rhs = self.mk_union(NodeId::EPS, right.right(self));
2606 let rw = self.mk_concat(left, rhs);
2607 return Some(rw);
2608 }
2609 if left.is_kind(self, Concat) {
2610 let head1 = left.left(self);
2611 let head2 = right.left(self);
2612
2613 if head1 == head2 {
2614 let t1 = left.right(self);
2615 let t2 = right.right(self);
2616 if head1 == NodeId::TS {
2618 if t2.has_concat_tail(self, t1) {
2619 return Some(left);
2620 }
2621 if t1.has_concat_tail(self, t2) {
2622 return Some(right);
2623 }
2624 }
2625 let un = self.mk_union(t1, t2);
2626 return Some(self.mk_concat(left.left(self), un));
2627 }
2628
2629 if false {
2633 let end1 = self.get_concat_end(left);
2634 let end2 = self.get_concat_end(right);
2635 if end1 == end2 {
2636 let contains_lookarounds =
2637 left.contains_lookaround(self) || right.contains_lookaround(self);
2638 let flags = left.flags_contains(self).or(right.flags_contains(self));
2639 if !contains_lookarounds && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
2640 let rev1 = self.reverse(left).unwrap();
2641 let rev2 = self.reverse(right).unwrap();
2642
2643 let union_rev = self.mk_union(rev1, rev2);
2644 return Some(self.reverse(union_rev).unwrap());
2645 }
2646 }
2647 }
2648 }
2649 if left.is_pred(self) && left == right.left(self) {
2650 let un = self.mk_opt(right.right(self));
2651 let conc = self.mk_concat(left, un);
2652 return Some(conc);
2653 }
2654 }
2655
2656 if left == NodeId::EPS && right.is_plus(self) {
2657 let result = self.mk_star(right.left(self));
2658 return Some(result);
2659 }
2660
2661 if left.is_inter(self) && right.is_inter(self) {
2663 if let Some(rw) = self.try_subsume_inter_union(left, right) {
2664 return Some(rw);
2665 }
2666 }
2667
2668 None
2669 }
2670
2671 fn collect_inter_components(&self, node: NodeId, out: &mut Vec<NodeId>) {
2672 let mut curr = node;
2673 while curr.is_inter(self) {
2674 out.push(self.get_left(curr));
2675 curr = self.get_right(curr);
2676 }
2677 out.push(curr);
2678 }
2679
2680 fn as_pred_chain_star(&self, node: NodeId) -> Option<(bool, TSetId, NodeId, u32)> {
2681 let mut curr = node;
2682 let has_prefix = curr.is_concat(self) && self.get_left(curr) == NodeId::TS;
2683 if has_prefix {
2684 curr = self.get_right(curr);
2685 }
2686 let mut count = 0u32;
2687 let mut pred_set = None;
2688 while curr.is_concat(self) {
2689 let head = self.get_left(curr);
2690 if !head.is_pred(self) {
2691 return None;
2692 }
2693 let ps = head.pred_tset(self);
2694 match pred_set {
2695 None => pred_set = Some(ps),
2696 Some(existing) => {
2697 if existing != ps {
2698 return None;
2699 }
2700 }
2701 }
2702 curr = self.get_right(curr);
2703 count += 1;
2704 }
2705 if count == 0 || !curr.is_star(self) {
2706 return None;
2707 }
2708 Some((has_prefix, pred_set.unwrap(), curr, count))
2709 }
2710
2711 fn is_sorted_subset(sub: &[NodeId], sup: &[NodeId]) -> bool {
2712 let mut j = 0;
2713 for &s in sub {
2714 while j < sup.len() && sup[j] < s {
2715 j += 1;
2716 }
2717 if j >= sup.len() || sup[j] != s {
2718 return false;
2719 }
2720 j += 1;
2721 }
2722 true
2723 }
2724
2725 fn try_subsume_inter_union(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2726 if !left.is_inter(self) || !right.is_inter(self) {
2727 return None;
2728 }
2729
2730 let mut lc: Vec<NodeId> = Vec::new();
2731 let mut rc: Vec<NodeId> = Vec::new();
2732 self.collect_inter_components(left, &mut lc);
2733 self.collect_inter_components(right, &mut rc);
2734
2735 if lc.len() <= rc.len() && Self::is_sorted_subset(&lc, &rc) {
2737 return Some(left);
2738 }
2739 if rc.len() <= lc.len() && Self::is_sorted_subset(&rc, &lc) {
2741 return Some(right);
2742 }
2743
2744 if lc.len() == rc.len() {
2745 let mut diff_idx = None;
2746 for i in 0..lc.len() {
2747 if lc[i] != rc[i] {
2748 if diff_idx.is_some() {
2749 return None;
2750 }
2751 diff_idx = Some(i);
2752 }
2753 }
2754 if let Some(idx) = diff_idx {
2755 let a = lc[idx];
2756 let b = rc[idx];
2757 if let (Some((pfa, pa, sa, ca)), Some((pfb, pb, sb, cb))) =
2758 (self.as_pred_chain_star(a), self.as_pred_chain_star(b))
2759 {
2760 if pfa == pfb && pa == pb && sa == sb && ca != cb {
2761 return if ca < cb { Some(left) } else { Some(right) };
2762 }
2763 }
2764 }
2765 }
2766
2767 None
2768 }
2769
2770 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2771 if cfg!(feature = "norewrite") {
2772 return None;
2773 }
2774 if left == right {
2775 return Some(left);
2776 }
2777 if right.is_inter(self) && right.any_inter_component(self, |v| v == left) {
2778 return Some(right);
2779 }
2780 if right.is_union(self) && right.any_union_component(self, |v| v == left) {
2781 return Some(left);
2782 }
2783
2784 if left.is_pred(self) && right.is_pred(self) {
2785 let l = left.pred_tset(self);
2786 let r = right.pred_tset(self);
2787 let solver = self.solver();
2788 let psi = solver.and_id(l, r);
2789 let rewrite = self.mk_pred(psi);
2790 return Some(rewrite);
2791 }
2792
2793 for (a, b) in [(left, right), (right, left)] {
2794 if a.is_pred(self) && b.is_compl(self) {
2795 let cbody = b.left(self);
2796 if cbody.is_concat(self)
2797 && cbody.right(self) == NodeId::TS
2798 && cbody.left(self).is_pred(self)
2799 {
2800 let q = a.pred_tset(self);
2801 let p = cbody.left(self).pred_tset(self);
2802 let solver = self.solver();
2803 let notp = solver.not_id(p);
2804 let anded = solver.and_id(q, notp);
2805 return Some(self.mk_pred(anded));
2806 }
2807 }
2808 }
2809
2810 if left.is_concat(self) && right.is_concat(self) {
2811 if left.left(self) == right.left(self) {
2812 if left.left(self).is_pred(self) {
2813 let new_right = self.mk_inter(left.right(self), right.right(self));
2814 return Some(self.mk_concat(left.left(self), new_right));
2815 }
2816 }
2817 }
2818
2819 if right.is_compl(self) && right.left(self) == left {
2820 return Some(NodeId::BOT);
2821 }
2822
2823 if left.is_compl(self) && right.is_compl(self) {
2824 let bodies = self.mk_union(left.left(self), right.left(self));
2825 return Some(self.mk_compl(bodies));
2826 }
2827
2828 if left == NodeId::TOPPLUS {
2829 if right.is_pred_star(self).is_some() {
2830 let newloop = self.mk_plus(right.left(self));
2831 return Some(newloop);
2832 }
2833 if right.is_never_nullable(self) {
2834 return Some(right);
2835 }
2836 if right.is_kind(self, Kind::Lookahead) && self.get_lookahead_tail(right).is_missing() {
2837 return Some(NodeId::BOT);
2838 }
2839 if right.is_kind(self, Kind::Concat) {}
2840 }
2841
2842 {
2843 let l_is_la = left.is_lookahead(self);
2844 let r_is_la = right.is_lookahead(self);
2845 let l_is_cla = !l_is_la && left.is_concat(self) && left.left(self).is_lookahead(self);
2846 let r_is_cla = !r_is_la && right.is_concat(self) && right.left(self).is_lookahead(self);
2847 if l_is_la || r_is_la || l_is_cla || r_is_cla {
2848 let (la_node, other, concat_body) = if r_is_la {
2849 (right, left, NodeId::MISSING)
2850 } else if l_is_la {
2851 (left, right, NodeId::MISSING)
2852 } else if r_is_cla {
2853 (right.left(self), left, right.right(self))
2854 } else {
2855 (left.left(self), right, left.right(self))
2856 };
2857 let la_body = la_node.left(self);
2858 let la_tail = self.get_lookahead_tail(la_node).missing_to_eps();
2859 let inter_right = if concat_body.is_missing() {
2860 la_tail
2861 } else {
2862 self.mk_concat(la_tail, concat_body)
2863 };
2864 let new_body = self.mk_inter(other, inter_right);
2865 let la = self.mk_lookahead_internal(la_body, NodeId::MISSING, 0);
2866 return Some(self.mk_concat(la, new_body));
2867 }
2868 }
2869
2870 if self.get_kind(right) == Kind::Compl {
2871 let compl_body = right.left(self);
2872 if left == compl_body {
2873 return Some(NodeId::BOT);
2874 }
2875 if compl_body.is_concat(self) {
2876 let compl_head = compl_body.left(self);
2877 if compl_body.right(self) == NodeId::TS && compl_head == left {
2878 return Some(NodeId::BOT);
2879 }
2880 }
2881 }
2882
2883 if let Some(pleft) = left.is_pred_star(self) {
2884 if let Some(pright) = right.is_pred_star(self) {
2885 let merged = self.mk_inter(pleft, pright);
2886 return Some(self.mk_star(merged));
2887 }
2888 }
2889
2890 {
2891 let l_is_clb = left.is_concat(self) && left.left(self).is_lookbehind(self);
2892 let r_is_clb = right.is_concat(self) && right.left(self).is_lookbehind(self);
2893 if l_is_clb || r_is_clb {
2894 let (lb, body) = if l_is_clb && r_is_clb {
2895 let lb1 = left.left(self);
2896 let lb2 = right.left(self);
2897 let inner = self.mk_inter(
2898 self.get_lookbehind_inner(lb1),
2899 self.get_lookbehind_inner(lb2),
2900 );
2901 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2902 let body = self.mk_inter(left.right(self), right.right(self));
2903 (lb, body)
2904 } else if l_is_clb {
2905 let lb = left.left(self);
2906 let body = self.mk_inter(left.right(self), right);
2907 (lb, body)
2908 } else {
2909 let lb = right.left(self);
2910 let body = self.mk_inter(left, right.right(self));
2911 (lb, body)
2912 };
2913 return Some(self.mk_concat(lb, body));
2914 }
2915 }
2916
2917 {
2918 let l_has_la = self.has_trailing_la(left);
2919 let r_has_la = self.has_trailing_la(right);
2920 if l_has_la || r_has_la {
2921 let (body, la) = if l_has_la && r_has_la {
2922 let (lbody, l_la) = self.strip_trailing_la(left);
2923 let (rbody, r_la) = self.strip_trailing_la(right);
2924 let inner = self.mk_inter(
2925 self.get_lookahead_inner(l_la),
2926 self.get_lookahead_inner(r_la),
2927 );
2928 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2929 let body = self.mk_inter(lbody, rbody);
2930 (body, la)
2931 } else if l_has_la {
2932 let (lbody, la) = self.strip_trailing_la(left);
2933 let body = self.mk_inter(lbody, right);
2934 (body, la)
2935 } else {
2936 let (rbody, la) = self.strip_trailing_la(right);
2937 let body = self.mk_inter(left, rbody);
2938 (body, la)
2939 };
2940 return Some(self.mk_concat(body, la));
2941 }
2942 }
2943
2944 None
2945 }
2946
2947 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2948 if cfg!(feature = "norewrite") {
2949 return None;
2950 }
2951 debug_assert!(self.get_kind(right_union) == Kind::Union);
2952
2953 let mut rewritten = None;
2954 right_union.iter_union_while(
2955 self,
2956 &mut (|b, v| {
2957 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2958 rewritten = Some((v, rw));
2959 false
2960 } else {
2961 true
2962 }
2963 }),
2964 );
2965
2966 if let Some(rw) = rewritten {
2967 let mut new_union = NodeId::BOT;
2968 right_union.iter_union(
2969 self,
2970 &mut (|b, v| {
2971 if v == rw.0 {
2972 new_union = b.mk_union(rw.1, new_union)
2973 } else {
2974 new_union = b.mk_union(v, new_union)
2975 }
2976 }),
2977 );
2978 return Some(new_union);
2979 };
2980
2981 None
2982 }
2983
2984 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2985 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2986 debug_assert!(tail != NodeId::MISSING);
2987 let key = NodeKey {
2988 kind: Kind::Concat,
2989 left: head,
2990 right: tail,
2991 extra: u32::MAX,
2992 };
2993 if let Some(id) = self.key_is_created(&key) {
2994 return *id;
2995 }
2996
2997 if head == NodeId::BOT || tail == NodeId::BOT {
2998 return NodeId::BOT;
2999 }
3000 if head == NodeId::EPS {
3001 return tail;
3002 }
3003 if tail == NodeId::EPS {
3004 return head;
3005 }
3006
3007 if head.is_kind(self, Kind::Concat) {
3009 let left = head.left(self);
3010 let newright = self.mk_concat(head.right(self), tail);
3011 let updated = self.mk_concat(left, newright);
3012 return self.init_as(key, updated);
3013 }
3014
3015 if self.get_kind(head) == Kind::End && !self.is_nullable(tail, Nullability::END) {
3016 return NodeId::BOT;
3017 }
3018
3019 if tail.is_concat(self) {
3020 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
3021 let upd = self.mk_concat(rw, tail.right(self));
3022 return self.init_as(key, upd);
3023 }
3024 }
3025
3026 if let Some(new) = self.attempt_rw_concat_2(head, tail) {
3027 return self.init_as(key, new);
3028 }
3029
3030 match (self.get_kind(head), self.get_kind(tail)) {
3031 (Kind::Star, Kind::Concat) if head.is_star(self) => {
3033 let rl = tail.left(self);
3034 if head == rl {
3035 return self.init_as(key, tail);
3036 }
3037 }
3038 (_, Kind::Concat) => {
3040 let curr = head;
3041 let h2 = self.mk_concat(curr, tail.left(self));
3042 let tr = tail.right(self);
3043 if let Some(new) = self.attempt_rw_concat_2(h2, tr) {
3044 return self.init_as(key, new);
3045 }
3046 }
3047 _ if head == NodeId::TS && self.starts_with_ts(tail) => {
3048 return self.init_as(key, tail);
3049 }
3050 _ => {}
3051 }
3052
3053 self.init(key)
3054 }
3055
3056 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
3057 let lb_body = {
3059 match self.starts_with_ts(lb_body) {
3060 true => lb_body,
3061 false => self.mk_concat(NodeId::TS, lb_body),
3062 }
3063 };
3064 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
3066 }
3067
3068 fn mk_lookbehind_internal(
3069 &mut self,
3070 lb_body: NodeId,
3071 lb_prev: NodeId,
3072 ) -> Result<NodeId, ResharpError> {
3073 debug_assert!(lb_body != NodeId::MISSING);
3074 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
3075 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
3076 return Ok(NodeId::BOT);
3077 }
3078 if lb_body == NodeId::TS {
3079 return Ok(lb_prev);
3080 }
3081 if lb_body == NodeId::EPS {
3082 match lb_prev {
3083 NodeId::MISSING => return Ok(NodeId::EPS),
3084 NodeId::EPS => return Ok(NodeId::EPS),
3085 _ => return Ok(lb_prev),
3086 }
3087 }
3088
3089 let key = NodeKey {
3090 kind: Kind::Lookbehind,
3091 left: lb_body,
3092 right: lb_prev,
3093 extra: u32::MAX,
3094 };
3095 match self.key_is_created(&key) {
3096 Some(id) => Ok(*id),
3097 None => {
3098 if lb_prev == NodeId::TS {
3099 return Ok(self.mk_concat(lb_prev, lb_body));
3100 }
3101
3102 Ok(self.init(key))
3103 }
3104 }
3105 }
3106
3107 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3108 let la_body = if la_tail.is_missing() && rel == 0 {
3109 self.flatten_la_body(la_body)
3110 } else {
3111 la_body
3112 };
3113 let la_body = {
3115 match self.ends_with_ts(la_body) {
3116 true => la_body,
3117 false => self.mk_concat(la_body, NodeId::TS),
3118 }
3119 };
3120 let rel = if NodeId::MISSING == la_tail {
3121 rel
3122 } else {
3123 match la_tail.is_center_nullable(self) {
3124 false => u32::MAX,
3125 true => rel,
3126 }
3127 };
3128
3129 self.mk_lookahead_internal(la_body, la_tail, rel)
3130 }
3131
3132 fn flatten_la_body(&mut self, node: NodeId) -> NodeId {
3133 if !self
3134 .get_meta_flags(node)
3135 .has(MetaFlags::CONTAINS_LOOKBEHIND.or(MetaFlags::CONTAINS_LOOKAHEAD))
3136 {
3137 return node;
3138 }
3139 match self.get_kind(node) {
3140 Kind::Lookahead
3141 if self.get_lookahead_tail(node).is_missing()
3142 && self.get_lookahead_rel(node) == 0 =>
3143 {
3144 let inner = self.get_lookahead_inner(node);
3145 let inner = self.strip_trailing_ts(inner);
3146 self.flatten_la_body(inner)
3147 }
3148 Kind::Union => {
3149 let l = self.flatten_la_body(node.left(self));
3150 let r = self.flatten_la_body(node.right(self));
3151 self.mk_union(l, r)
3152 }
3153 _ => node,
3154 }
3155 }
3156
3157 fn strip_trailing_ts(&self, node: NodeId) -> NodeId {
3158 if node.is_concat(self) && node.right(self) == NodeId::TS {
3159 node.left(self)
3160 } else {
3161 node
3162 }
3163 }
3164
3165 pub fn mk_lookahead_internal(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
3167 let key = NodeKey {
3168 kind: Kind::Lookahead,
3169 left: la_body,
3170 right: la_tail,
3171 extra: rel,
3172 };
3173 if let Some(id) = self.key_is_created(&key) {
3174 return *id;
3175 }
3176 if la_body == NodeId::TS {
3177 if rel == 0 {
3178 return la_tail.missing_to_eps();
3179 } else {
3180 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
3181 }
3182 }
3183 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
3184 return NodeId::BOT;
3185 }
3186 if la_tail.is_missing() && rel == u32::MAX {
3187 return NodeId::BOT;
3188 }
3189
3190 if la_body == NodeId::EPS && la_tail.is_missing() && rel == 0 {
3191 return la_body;
3192 }
3193
3194 if la_tail == NodeId::TS {
3195 if rel == 0 || rel == u32::MAX {
3196 return self.mk_concat(la_body, NodeId::TS);
3197 } else if rel == u32::MAX {
3198 return self.mk_begins_with(la_body);
3199 }
3200 }
3201
3202 if rel == u32::MAX {
3203 if la_tail.is_missing() {
3204 return NodeId::BOT;
3205 }
3206
3207 if self.is_always_nullable(la_body) {
3208 return la_tail.missing_to_eps();
3209 }
3210
3211 if la_tail != NodeId::MISSING {
3212 match self.get_kind(la_tail) {
3213 _ => {
3214 if la_body.is_compl_plus_end(self) {
3215 let minlen = self.get_min_length_only(la_tail);
3216 if minlen >= 1 {
3217 return NodeId::BOT;
3218 }
3219 }
3220 }
3221 }
3222 }
3223 }
3224
3225 if la_tail != NodeId::MISSING && la_tail.is_lookahead(self) {
3226 let la_body2 = self.get_lookahead_inner(la_tail);
3227 let body1_ts = self.mk_concat(la_body, NodeId::TS);
3228 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
3229 let new_la_body = self.mk_inter(body1_ts, body2_ts);
3230 let new_la_rel = self.get_lookahead_rel(la_tail);
3231 let new_la_tail = self.get_lookahead_tail(la_tail);
3232 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
3233 }
3234
3235 if la_body.is_concat(self) && la_body.left(self) == NodeId::TS {
3236 let la_body_right = la_body.right(self);
3237 if self.is_always_nullable(la_body_right) {
3238 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
3239 }
3240 let bodyright = la_body.right(self);
3241 if bodyright.is_concat(self) && bodyright.left(self) == NodeId::END {
3242 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
3243 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
3244 }
3245 }
3246
3247 if la_tail != NodeId::MISSING {
3248 if let (Kind::Concat, Kind::Pred) = (self.get_kind(la_body), self.get_kind(la_tail)) {
3249 let lpred = la_body.left(self);
3250 if lpred.is_pred(self) {
3251 let l = lpred.pred_tset(self);
3252 let r = la_tail.pred_tset(self);
3253 let psi_and = self.solver().and_id(l, r);
3254 let rewrite = self.mk_pred(psi_and);
3255 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
3256 let new_right =
3257 self.mk_lookahead_internal(la_body.right(self), NodeId::MISSING, new_rel);
3258 return self.mk_concat(rewrite, new_right);
3259 }
3260 }
3261 }
3262
3263 self.get_node_id(key)
3264 }
3265
3266 pub fn mk_counted(&mut self, body: NodeId, chain: NodeId, packed: u32) -> NodeId {
3267 let has_match = (packed >> 16) > 0;
3268 if body == NodeId::BOT && chain == NodeId::MISSING && !has_match {
3269 return NodeId::BOT;
3270 }
3271 debug_assert!(
3272 body == NodeId::BOT || !self.is_infinite(body),
3273 "Counted body must have finite max length"
3274 );
3275 let chain = self.prune_counted_chain(body, chain);
3276 let key = NodeKey {
3277 kind: Kind::Counted,
3278 left: body,
3279 right: chain,
3280 extra: packed,
3281 };
3282 if let Some(id) = self.key_is_created(&key) {
3283 return *id;
3284 }
3285 self.get_node_id(key)
3286 }
3287
3288 fn prune_counted_chain(&mut self, body: NodeId, chain: NodeId) -> NodeId {
3289 if chain == NodeId::MISSING || body == NodeId::BOT {
3290 return chain;
3291 }
3292 if self.nullability(body) != Nullability::NEVER {
3293 return NodeId::MISSING;
3294 }
3295 let chain_body = chain.left(self);
3296 if chain_body == NodeId::BOT {
3297 return chain;
3298 }
3299 let not_begins = self.mk_not_begins_with(body);
3300 let inter = self.mk_inter(chain_body, not_begins);
3301 let is_empty = inter == NodeId::BOT;
3302 if is_empty {
3303 self.prune_counted_chain(body, chain.right(self))
3304 } else {
3305 chain
3306 }
3307 }
3308
3309 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
3310 let neg_inner = self.mk_concat(body, NodeId::TS);
3311 let neg_part = self.mk_compl(neg_inner);
3312 let conc = self.mk_concat(neg_part, NodeId::END);
3313 self.mk_lookahead(conc, NodeId::MISSING, rel)
3314 }
3315
3316 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
3317 match self.get_node(body).kind {
3318 Kind::Pred => {
3319 let psi = body.pred_tset(self);
3320 let negated = self.mk_pred_not(psi);
3321 let union = self.mk_union(NodeId::BEGIN, negated);
3322 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3324 }
3325 _ => {
3326 let uc = crate::unicode_classes::utf8_char(self);
3329 let neg = self.mk_compl(body);
3330 let negated = self.mk_inter(neg, uc);
3331 let union = self.mk_union(NodeId::BEGIN, negated);
3332 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
3334 }
3335 }
3336 }
3337
3338 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
3339 debug_assert!(left != NodeId::MISSING);
3340 debug_assert!(right != NodeId::MISSING);
3341 if left > right {
3342 return self.mk_union(right, left);
3343 }
3344 let key = NodeKey {
3345 kind: Kind::Union,
3346 left,
3347 right,
3348 extra: u32::MAX,
3349 };
3350 if let Some(id) = self.key_is_created(&key) {
3351 return *id;
3352 }
3353 if left == right {
3354 return self.init_as(key, left);
3355 }
3356 if left == NodeId::BOT {
3357 return self.init_as(key, right);
3358 }
3359 if right == NodeId::BOT {
3360 return self.init_as(key, left);
3361 }
3362 if right == NodeId::TS {
3363 return self.init_as(key, right);
3364 }
3365 if left == NodeId::TS {
3366 return self.init_as(key, left);
3367 }
3368 match (self.get_kind(left), self.get_kind(right)) {
3369 (Kind::Union, _) => {
3370 self.iter_unions_b(left, &mut |b, v| {
3371 b.temp_vec.push(v);
3372 });
3373 self.iter_unions_b(right, &mut |b, v| {
3374 b.temp_vec.push(v);
3375 });
3376 self.temp_vec.sort();
3377 let tree = self.temp_vec.clone();
3378 self.temp_vec.clear();
3379 let newnode = tree
3380 .iter()
3381 .rev()
3382 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3383 return self.init_as(key, newnode);
3384 }
3385 (_, Kind::Union) => {
3386 let rleft = right.left(self);
3387 if left > rleft {
3389 self.iter_unions_b(left, &mut |b, v| {
3390 b.temp_vec.push(v);
3391 });
3392 self.iter_unions_b(right, &mut |b, v| {
3393 b.temp_vec.push(v);
3394 });
3395 self.temp_vec.sort();
3396 let tree = self.temp_vec.clone();
3397 self.temp_vec.clear();
3398 let newnode = tree
3399 .iter()
3400 .rev()
3401 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
3402 return self.init_as(key, newnode);
3403 } else {
3404 if let Some(rw) = self.attempt_rw_unions(left, right) {
3405 return self.init_as(key, rw);
3406 }
3407 }
3408 }
3409 _ => {}
3410 }
3411
3412 if let Some(rw) = self.attempt_rw_union_2(left, right) {
3413 return self.init_as(key, rw);
3414 }
3415 self.init(key)
3416 }
3417
3418 pub fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
3419 debug_assert!(left_id != NodeId::MISSING);
3420 debug_assert!(right_id != NodeId::MISSING);
3421 if left_id == right_id {
3422 return left_id;
3423 }
3424 if left_id == NodeId::BOT || right_id == NodeId::BOT {
3425 return NodeId::BOT;
3426 }
3427 if left_id == NodeId::TS {
3428 return right_id;
3429 }
3430 if right_id == NodeId::TS {
3431 return left_id;
3432 }
3433 if left_id > right_id {
3434 return self.mk_inter(right_id, left_id);
3435 }
3436 let key = NodeKey {
3437 kind: Kind::Inter,
3438 left: left_id,
3439 right: right_id,
3440 extra: u32::MAX,
3441 };
3442 if let Some(id) = self.key_is_created(&key) {
3443 return *id;
3444 }
3445
3446 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
3447 return self.init_as(key, rw);
3448 }
3449
3450 self.init(key)
3451 }
3452
3453 fn mk_unset(&mut self, kind: Kind) -> NodeId {
3454 let node = NodeKey {
3455 kind,
3456 left: NodeId::MISSING,
3457 right: NodeId::MISSING,
3458 extra: u32::MAX,
3459 };
3460 self.init(node)
3461 }
3462
3463 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
3464 let star = self.mk_star(body_id);
3465 self.mk_concat(body_id, star)
3466 }
3467 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
3468 let opt = self.mk_opt(body_id);
3469 let mut nodes1 = vec![];
3470 for _ in lower..upper {
3471 nodes1.push(opt);
3472 }
3473 for _ in 0..lower {
3474 nodes1.push(body_id);
3475 }
3476 self.mk_concats(nodes1.into_iter())
3477 }
3478 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
3479 self.mk_union(NodeId::EPS, body_id)
3480 }
3481
3482 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
3483 let key = NodeKey {
3484 kind: Kind::Star,
3485 left: body_id,
3486 right: NodeId::MISSING,
3487 extra: 0,
3488 };
3489 if let Some(id) = self.key_is_created(&key) {
3490 return *id;
3491 }
3492 if body_id.is_kind(self, Kind::Star) {
3494 return body_id;
3495 }
3496 self.get_node_id(key)
3497 }
3498
3499 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
3502 match self.get_kind(node_id) {
3503 Kind::End => Nullability::EMPTYSTRING,
3504 Kind::Begin => Nullability::EMPTYSTRING,
3505 Kind::Pred => Nullability::NEVER,
3506 Kind::Star => Nullability::ALWAYS,
3507 Kind::Inter | Kind::Concat => {
3508 let lnull = self.nullability_emptystring(node_id.left(self));
3509 let rnull = self.nullability_emptystring(node_id.right(self));
3510 lnull.and(rnull) }
3512 Kind::Union => {
3513 let lnull = self.nullability_emptystring(node_id.left(self));
3514 let rnull = self.nullability_emptystring(node_id.right(self));
3515 lnull.or(rnull)
3516 }
3517 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
3518 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
3519 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
3520 Kind::Counted => self.nullability_emptystring(node_id.left(self)),
3521 }
3522 }
3523
3524 #[inline(always)]
3525 pub fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
3526 self.get_meta(node_id)
3527 .flags
3528 .nullability()
3529 .has(Nullability::CENTER.or(Nullability::END))
3530 }
3531
3532 pub fn nullability(&self, node_id: NodeId) -> Nullability {
3533 self.get_only_nullability(node_id)
3534 }
3535
3536 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
3537 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
3538 }
3539
3540 pub fn pp(&self, node_id: NodeId) -> String {
3541 let mut s = String::new();
3542 self.ppw(&mut s, node_id).unwrap();
3543 s
3544 }
3545
3546 pub fn factor_suffixes(&mut self, node: NodeId) -> Result<NodeId, ResharpError> {
3549 let mut memo = FxHashMap::default();
3550 self.factor_suffixes_rec(node, &mut memo)
3551 }
3552
3553 fn factor_suffixes_rec(
3554 &mut self,
3555 node: NodeId,
3556 memo: &mut FxHashMap<NodeId, NodeId>,
3557 ) -> Result<NodeId, ResharpError> {
3558 if let Some(&v) = memo.get(&node) {
3559 return Ok(v);
3560 }
3561 let result = match self.get_kind(node) {
3562 Kind::Pred | Kind::Begin | Kind::End => node,
3563 Kind::Star => {
3564 let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3565 self.mk_star(inner)
3566 }
3567 Kind::Compl => {
3568 let inner = self.factor_suffixes_rec(node.left(self), memo)?;
3569 self.mk_compl(inner)
3570 }
3571 Kind::Concat => {
3572 let l = self.factor_suffixes_rec(node.left(self), memo)?;
3573 let r = self.factor_suffixes_rec(node.right(self), memo)?;
3574 self.mk_concat(l, r)
3575 }
3576 Kind::Inter => {
3577 let l = self.factor_suffixes_rec(node.left(self), memo)?;
3578 let r = self.factor_suffixes_rec(node.right(self), memo)?;
3579 self.mk_inter(l, r)
3580 }
3581 Kind::Union => {
3582 let mut comps: Vec<NodeId> = Vec::new();
3583 node.iter_union(self, &mut |_, c| comps.push(c));
3584 let mut factored: Vec<NodeId> = Vec::with_capacity(comps.len());
3585 let mut reversible = true;
3586 for c in &comps {
3587 let f = self.factor_suffixes_rec(*c, memo)?;
3588 if c.contains_lookaround(self)
3589 || c.flags_contains(self).has(MetaFlags::CONTAINS_ANCHORS)
3590 {
3591 reversible = false;
3592 }
3593 factored.push(f);
3594 }
3595 if !reversible || factored.len() < 2 {
3596 let mut acc = factored[0];
3597 for c in factored.into_iter().skip(1) {
3598 acc = self.mk_union(acc, c);
3599 }
3600 acc
3601 } else {
3602 let mut rev_comps: Vec<NodeId> = Vec::with_capacity(factored.len());
3603 for c in factored {
3604 rev_comps.push(self.reverse(c)?);
3605 }
3606 let mut acc = rev_comps[0];
3607 for c in rev_comps.into_iter().skip(1) {
3608 acc = self.mk_union(acc, c);
3609 }
3610 self.reverse(acc)?
3611 }
3612 }
3613 Kind::Lookahead => {
3614 let inner = self.get_lookahead_inner(node);
3615 let tail = self.get_lookahead_tail(node);
3616 let rel = self.get_lookahead_rel(node);
3617 let ni = self.factor_suffixes_rec(inner, memo)?;
3618 let nt = self.factor_suffixes_rec(tail, memo)?;
3619 self.mk_lookahead(ni, nt, rel)
3620 }
3621 Kind::Lookbehind => {
3622 let inner = self.get_lookbehind_inner(node);
3623 let prev = self.get_lookbehind_prev(node);
3624 let ni = self.factor_suffixes_rec(inner, memo)?;
3625 let np = if prev != NodeId::MISSING {
3626 self.factor_suffixes_rec(prev, memo)?
3627 } else {
3628 prev
3629 };
3630 self.mk_lookbehind(ni, np)
3631 }
3632 Kind::Counted => node,
3633 };
3634 memo.insert(node, result);
3635 Ok(result)
3636 }
3637
3638 #[allow(dead_code)]
3639 pub fn pp_nulls(&self, node_id: NodeId) -> String {
3640 let nu = self.get_nulls_id(node_id);
3641 let nr = self.mb.nb.get_set_ref(nu);
3642 let s1 = format!("{:?}", nr);
3643 s1
3644 }
3645
3646 #[allow(dead_code)]
3647 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
3648 match self.get_tregex(term_id) {
3649 TRegex::Leaf(node_id) => {
3650 let mut s = String::new();
3651 self.ppw(&mut s, *node_id).unwrap();
3652 s
3653 }
3654 TRegex::ITE(cond, then_id, else_id) => {
3655 format!(
3656 "ITE({},{},{})",
3657 self.solver_ref().pp(*cond),
3658 self.ppt(*then_id),
3659 self.ppt(*else_id)
3660 )
3661 }
3662 }
3663 }
3664
3665 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
3666 if cfg!(feature = "graphviz") {
3667 match node_id {
3668 NodeId::MISSING => return write!(s, "MISSING"),
3669 NodeId::BOT => return write!(s, "⊥"),
3670 NodeId::TS => return write!(s, "_*"),
3671 NodeId::TOP => return write!(s, "_"),
3672 NodeId::EPS => return write!(s, "ε"),
3673 _ => {}
3674 }
3675 }
3676
3677 match node_id {
3678 NodeId::MISSING => return write!(s, "MISSING"),
3679 NodeId::BOT => return write!(s, "⊥"),
3680 NodeId::TS => return write!(s, "_*"),
3681 NodeId::TOP => return write!(s, "_"),
3682 NodeId::EPS => return write!(s, ""),
3683 _ => {}
3684 }
3685
3686 match self.get_kind(node_id) {
3687 Kind::End => write!(s, r"\z"),
3688 Kind::Begin => write!(s, r"\A"),
3689 Kind::Pred => {
3690 let psi = node_id.pred_tset(self);
3691 if psi == TSetId::EMPTY {
3692 write!(s, r"⊥")
3693 } else if psi == TSetId::FULL {
3694 write!(s, r"_")
3695 } else {
3696 write!(s, "{}", self.solver_ref().pp(psi))
3697 }
3698 }
3699 Kind::Inter => {
3700 write!(s, "(")?;
3701 self.ppw(s, node_id.left(self))?;
3702 write!(s, "&")?;
3703 let mut curr = node_id.right(self);
3704 while curr.is_inter(self) {
3705 let n = curr.left(self);
3706 self.ppw(s, n)?;
3707 write!(s, "&")?;
3708 curr = curr.right(self);
3709 }
3710 self.ppw(s, curr)?;
3711 write!(s, ")")
3712 }
3713 Kind::Union => {
3714 let left = node_id.left(self);
3715 let right = node_id.right(self);
3716 write!(s, "(")?;
3717 self.ppw(s, left)?;
3718 write!(s, "|")?;
3719 let mut curr = right;
3720 while self.get_kind(curr) == Kind::Union {
3721 let n = curr.left(self);
3722 self.ppw(s, n)?;
3723 write!(s, "|")?;
3724 curr = curr.right(self);
3725 }
3726 self.ppw(s, curr)?;
3727 write!(s, ")")
3728 }
3729 Kind::Concat => {
3730 let left = node_id.left(self);
3731 let right = node_id.right(self);
3732 if right.is_star(self) && right.left(self) == left {
3733 self.ppw(s, left)?;
3734 write!(s, "+")?;
3735 return Ok(());
3736 }
3737 if right.is_concat(self) {
3738 let rl = right.left(self);
3739 if rl.is_star(self) && rl.left(self) == left {
3740 self.ppw(s, left)?;
3741 write!(s, "+")?;
3742 return self.ppw(s, right.right(self));
3743 }
3744 }
3745 if right.is_concat(self) && right.left(self) == left {
3746 let mut num = 1;
3747 let mut right = right;
3748 while right.is_concat(self) && right.left(self) == left {
3749 num += 1;
3750 right = right.right(self);
3751 }
3752 if let Some(inner) = left.is_opt_v(self) {
3754 let mut inner_count = 0;
3755 let mut right2 = right;
3756 while right2.is_concat(self) && right2.left(self) == inner {
3757 inner_count += 1;
3758 right2 = right2.right(self);
3759 }
3760 if right2 == inner {
3761 inner_count += 1;
3762 self.ppw(s, inner)?;
3763 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
3764 }
3765 if inner_count > 0 {
3766 self.ppw(s, inner)?;
3767 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
3768 return self.ppw(s, right2);
3769 }
3770 }
3771 self.ppw(s, left)?;
3772 if right == left {
3773 num += 1;
3774 return write!(s, "{{{}}}", num);
3775 }
3776 if num <= 3 && left.is_pred(self) {
3777 for _ in 1..num {
3778 self.ppw(s, left)?;
3779 }
3780 return self.ppw(s, right);
3781 } else {
3782 write!(s, "{{{}}}", num)?;
3783 return self.ppw(s, right);
3784 }
3785 }
3786 self.ppw(s, left)?;
3787 self.ppw(s, right)
3788 }
3789 Kind::Star => {
3790 let left = node_id.left(self);
3791 let leftkind = self.get_kind(left);
3792 match leftkind {
3793 Kind::Concat | Kind::Star | Kind::Compl => {
3794 write!(s, "(")?;
3795 self.ppw(s, left)?;
3796 write!(s, ")")?;
3797 }
3798 _ => {
3799 self.ppw(s, left)?;
3800 }
3801 };
3802 write!(s, "*")
3803 }
3804 Kind::Compl => {
3805 write!(s, "~(")?;
3806 self.ppw(s, node_id.left(self))?;
3807 write!(s, ")")
3808 }
3809 Kind::Lookbehind => {
3810 let lbleft = self.get_lookbehind_prev(node_id);
3811 let lbinner = self.get_lookbehind_inner(node_id);
3812 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
3813 if lbleft != NodeId::MISSING {
3814 write!(s, "❮")?;
3815 self.ppw(s, lbleft)?;
3816 write!(s, "❯")?;
3817 }
3818
3819 write!(s, "(?<=")?;
3820 self.ppw(s, lbinner)?;
3821 write!(s, ")")
3822 }
3823 Kind::Lookahead => {
3824 let inner = self.get_lookahead_inner(node_id);
3825 write!(s, "(?=")?;
3826 self.ppw(s, inner)?;
3827 write!(s, ")")?;
3828 if self.get_lookahead_rel(node_id) != 0 {
3829 write!(s, "{{")?;
3830 let rel = self.get_lookahead_rel(node_id);
3831 if rel == u32::MAX {
3832 write!(s, "∅")?;
3833 } else {
3834 write!(s, "{}", rel)?;
3835 }
3836 write!(s, "}}")?;
3837 }
3838 if node_id.right(self) == NodeId::MISSING {
3839 Ok(())
3840 } else {
3841 write!(s, "❮")?;
3842 self.ppw(s, node_id.right(self))?;
3843 write!(s, "❯")
3844 }
3845 }
3846 Kind::Counted => {
3847 let body = node_id.left(self);
3848 let packed = self.get_extra(node_id);
3849 let step = packed & 0xFFFF;
3850 let best = packed >> 16;
3851 write!(s, "#(")?;
3852 self.ppw(s, body)?;
3853 write!(s, ")s{}b{}", step, best)
3854 }
3855 }
3856 }
3857
3858 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
3859 self.mk_concat(node, NodeId::TS)
3860 }
3861
3862 pub fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
3863 let node_ts = self.mk_concat(node, NodeId::TS);
3864 self.mk_compl(node_ts)
3865 }
3866
3867 pub fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
3868 let notset = self.solver().not_id(set);
3869 self.mk_pred(notset)
3870 }
3871
3872 pub fn mk_u8(&mut self, char: u8) -> NodeId {
3873 let set_id = self.solver().u8_to_set_id(char);
3874 self.mk_pred(set_id)
3875 }
3876
3877 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
3878 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
3879 self.mk_pred(rangeset)
3880 }
3881
3882 pub fn mk_ranges_u8(&mut self, ranges: &[(u8, u8)]) -> NodeId {
3883 let mut node = self.mk_range_u8(ranges[0].0, ranges[0].1);
3884 for &(lo, hi) in &ranges[1..] {
3885 let r = self.mk_range_u8(lo, hi);
3886 node = self.mk_union(node, r);
3887 }
3888 node
3889 }
3890
3891 pub fn extract_literal_prefix(&self, node: NodeId) -> (Vec<u8>, bool) {
3892 let mut prefix = Vec::new();
3893 let mut curr = node;
3894 loop {
3895 if curr == NodeId::EPS {
3896 let full = !prefix.is_empty();
3897 return (prefix, full);
3898 }
3899 if curr == NodeId::BOT {
3900 break;
3901 }
3902 if curr.is_pred(self) {
3903 match self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
3904 Some(byte) => {
3905 prefix.push(byte);
3906 return (prefix, true);
3907 }
3908 None => break, }
3910 }
3911 if !curr.is_concat(self) {
3912 break;
3913 }
3914 let left = curr.left(self);
3915 if !left.is_pred(self) {
3916 break;
3917 }
3918 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3919 Some(byte) => prefix.push(byte),
3920 None => break,
3921 }
3922 curr = curr.right(self);
3923 }
3924 (prefix, false)
3925 }
3926
3927 #[allow(dead_code)]
3928 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3929 let mut result = NodeId::EPS;
3930 for byte in raw_str.iter().rev() {
3931 let node = self.mk_u8(*byte);
3932 result = self.mk_concat(node, result);
3933 }
3934 result
3935 }
3936
3937 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3938 let mut result = NodeId::EPS;
3939 for byte in raw_str.bytes().rev() {
3940 let node = self.mk_u8(byte);
3941 result = self.mk_concat(node, result);
3942 }
3943 result
3944 }
3945
3946 pub fn prune_fwd(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3947 self.prune_rec::<true>(node_id, memo)
3948 }
3949
3950 pub fn prune_rev(&mut self, node_id: NodeId, memo: &mut FxHashMap<NodeId, NodeId>) -> NodeId {
3951 self.prune_rec::<false>(node_id, memo)
3952 }
3953
3954 pub fn simplify_fwd_initial(&mut self, node_id: NodeId) -> NodeId {
3955 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
3956 match self.get_kind(node_id) {
3957 Kind::Concat => {
3958 let l = self.simplify_fwd_initial_rec(node_id.left(self), &mut memo);
3959 let r = self.simplify_fwd_initial_rec(node_id.right(self), &mut memo);
3960 if r.is_concat(self) {
3961 if r.left(self).is_ts() && !r.right(self).is_lookahead(self) {
3962 if l.is_begin_nullable(self) {
3963 return r;
3964 }
3965 }
3966 }
3967 }
3968 _ => {}
3969 }
3970
3971 self.simplify_fwd_initial_rec(node_id, &mut memo)
3972 }
3973
3974 fn simplify_fwd_initial_rec(
3975 &mut self,
3976 node_id: NodeId,
3977 memo: &mut FxHashMap<NodeId, NodeId>,
3978 ) -> NodeId {
3979 if let Some(&v) = memo.get(&node_id) {
3980 return v;
3981 }
3982
3983 let out = match self.get_kind(node_id) {
3984 Kind::Union => {
3985 let mut parts: Vec<NodeId> = Vec::new();
3986 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
3987 for p in &mut parts {
3988 *p = self.simplify_fwd_initial_rec(*p, memo);
3989 }
3990 parts
3991 .iter()
3992 .rev()
3993 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
3994 }
3995 Kind::Concat => {
3996 let l = self.simplify_fwd_initial_rec(node_id.left(self), memo);
3997 let r = self.simplify_fwd_initial_rec(node_id.right(self), memo);
3998 if l == NodeId::TS
3999 && r.is_lookahead(self)
4000 && self.get_lookahead_tail(r).is_missing()
4001 && self.get_lookahead_rel(r) == 0
4002 {
4003 let body = self.get_lookahead_inner(r);
4004 if self.is_nullable(body, Nullability::END) {
4005 return NodeId::TS;
4006 }
4007 }
4008
4009 if l != NodeId::TS {
4010 return self.mk_concat(l, r);
4011 }
4012 if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4013 return rewritten;
4014 }
4015 let (head, tail) = if r.is_concat(self) {
4016 (r.left(self), r.right(self))
4017 } else {
4018 (r, NodeId::EPS)
4019 };
4020 if self.get_kind(head) != Kind::Union {
4021 return self.mk_concat(l, r);
4022 }
4023 if !head.left(self).is_begin() {
4024 return self.mk_concat(l, r);
4025 }
4026 let y = head.right(self);
4027 if !y.is_pred(self) {
4028 return self.mk_concat(l, r);
4029 }
4030 if tail.is_concat(self) {
4031 let tl = tail.left(self);
4032 let tr = tail.right(self);
4033 let y_tset = y.pred_tset(self);
4034 let covers_all = if tl == NodeId::TS {
4035 true
4036 } else if let Some(x_pred) = tl.is_pred_star(self) {
4037 let x_ts = x_pred.pred_tset(self);
4038 let combined = self.solver().or_id(x_ts, y_tset);
4039 self.solver().is_full_id(combined)
4040 } else {
4041 false
4042 };
4043 if covers_all {
4044 return self.mk_concat(NodeId::TS, tr);
4045 }
4046 }
4047 self.mk_concat(l, r)
4048 }
4049 _ => node_id,
4050 };
4051 memo.insert(node_id, out);
4052 out
4053 }
4054
4055 pub fn simplify_rev_initial(&mut self, node_id: NodeId) -> NodeId {
4056 let mut memo: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4057 self.simplify_rev_initial_rec(node_id, &mut memo)
4058 }
4059
4060 fn simplify_rev_initial_rec(
4061 &mut self,
4062 node_id: NodeId,
4063 memo: &mut FxHashMap<NodeId, NodeId>,
4064 ) -> NodeId {
4065 if let Some(&v) = memo.get(&node_id) {
4066 return v;
4067 }
4068 let out = match self.get_kind(node_id) {
4069 Kind::Union => {
4070 let mut parts: Vec<NodeId> = Vec::new();
4071 self.iter_unions_b(node_id, &mut |_, v| parts.push(v));
4072 for p in &mut parts {
4073 *p = self.simplify_rev_initial_rec(*p, memo);
4074 }
4075 parts
4076 .iter()
4077 .rev()
4078 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4079 }
4080 Kind::Concat => {
4081 let l = self.simplify_rev_initial_rec(node_id.left(self), memo);
4082 let r = self.simplify_rev_initial_rec(node_id.right(self), memo);
4083
4084 if l != NodeId::TS {
4085 return self.mk_concat(l, r);
4086 }
4087 if let Some(rewritten) = self.try_begin_neg_pred_rewrite(r) {
4088 return rewritten;
4089 }
4090 let (tail1, tail2) = if r.is_concat(self) {
4091 (r.left(self), r.right(self))
4092 } else {
4093 (r, NodeId::EPS)
4094 };
4095
4096 if tail1.is_begin() {
4097 return r;
4098 }
4099
4100 if self.get_kind(tail1) != Kind::Union {
4101 return self.mk_concat(l, r);
4102 }
4103 if !tail1.left(self).is_begin() {
4104 return self.mk_concat(l, r);
4105 }
4106 let y = tail1.right(self);
4107 if !y.is_pred(self) {
4108 return self.mk_concat(l, r);
4109 }
4110 if tail2.is_concat(self) {
4111 let tl = tail2.left(self);
4112 let tr = tail2.right(self);
4113
4114 let y_tset = y.pred_tset(self);
4115 let covers_all = if tl == NodeId::TS {
4116 true
4117 } else if let Some(x_pred) = tl.is_pred_star(self) {
4118 let x_ts = x_pred.pred_tset(self);
4119 let combined = self.solver().or_id(x_ts, y_tset);
4120 self.solver().is_full_id(combined)
4121 } else {
4122 false
4123 };
4124 if covers_all {
4125 return self.mk_concat(NodeId::TS, tr);
4126 }
4127 }
4128 self.mk_concat(l, r)
4129 }
4130 _ => node_id,
4131 };
4132 memo.insert(node_id, out);
4133 out
4134 }
4135
4136 fn try_begin_neg_pred_rewrite(&mut self, r: NodeId) -> Option<NodeId> {
4137 if !r.is_concat(self) {
4138 return None;
4139 }
4140 let begin = r.left(self);
4141 let mid_tail = r.right(self);
4142 if !begin.is_begin() {
4143 return None;
4144 }
4145 if !mid_tail.is_concat(self) {
4146 return None;
4147 }
4148 let neg = mid_tail.left(self);
4149 let tail = mid_tail.right(self);
4150 if self.get_kind(neg) != Kind::Compl {
4151 return None;
4152 }
4153 let inside = neg.left(self);
4154 if !inside.is_concat(self) {
4155 return None;
4156 }
4157 let ts_part = inside.left(self);
4158 let c_pred = inside.right(self);
4159 if ts_part != NodeId::TS || !c_pred.is_pred(self) {
4160 return None;
4161 }
4162 let c_tset = c_pred.pred_tset(self);
4163 let not_c = self.mk_pred_not(c_tset);
4164 let left_branch = self.mk_concat(NodeId::BEGIN, tail);
4165 let not_c_tail = self.mk_concat(not_c, tail);
4166 let right_branch = self.mk_concat(NodeId::TS, not_c_tail);
4167 Some(self.mk_union(left_branch, right_branch))
4168 }
4169
4170 fn strip_la_body_end(&mut self, n: NodeId) -> NodeId {
4171 if !n.is_concat(self) {
4172 return n;
4173 }
4174 let l = n.left(self);
4175 let r = n.right(self);
4176 if l == NodeId::TS && r == NodeId::END {
4177 return NodeId::TS;
4178 }
4179 let new_r = self.strip_la_body_end(r);
4180 if new_r == r {
4181 n
4182 } else {
4183 self.mk_concat(l, new_r)
4184 }
4185 }
4186
4187 fn prune_rec<const FWD: bool>(
4188 &mut self,
4189 node_id: NodeId,
4190 memo: &mut FxHashMap<NodeId, NodeId>,
4191 ) -> NodeId {
4192 assert!(node_id != NodeId::MISSING);
4193 if node_id == NodeId::MISSING {
4194 return node_id;
4195 }
4196 if let Some(&v) = memo.get(&node_id) {
4197 return v;
4198 }
4199 let (l, r) = (node_id.left(self), node_id.right(self));
4200 let out = match node_id.kind(self) {
4201 Kind::Union => {
4202 let l = self.prune_rec::<FWD>(l, memo);
4203 let r = self.prune_rec::<FWD>(r, memo);
4204 let mut parts: Vec<NodeId> = Vec::new();
4205 parts.push(l);
4206 self.iter_unions_b(r, &mut |_, v| parts.push(v));
4207 for p in &mut parts {
4208 *p = self.prune_rec::<FWD>(*p, memo);
4209 }
4210
4211 if FWD {
4212 let mut best: FxHashMap<NodeId, u32> = FxHashMap::default();
4214 for &p in &parts {
4215 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4216 let body = self.get_lookahead_inner(p);
4217 let rel = self.get_lookahead_rel(p);
4218 best.entry(body)
4219 .and_modify(|r| *r = (*r).min(rel))
4220 .or_insert(rel);
4221 }
4222 }
4223 parts.iter().rev().fold(NodeId::BOT, |acc, &p| {
4224 if p.is_lookahead(self) && self.get_lookahead_tail(p) == NodeId::MISSING {
4225 let body = self.get_lookahead_inner(p);
4226 if self.get_lookahead_rel(p) != best[&body] {
4227 return acc;
4228 }
4229 }
4230 self.mk_union(p, acc)
4231 })
4232 } else {
4233 parts
4234 .iter()
4235 .rev()
4236 .fold(NodeId::BOT, |acc, &p| self.mk_union(p, acc))
4237 }
4238 }
4239 Kind::Concat => {
4240 if FWD
4241 && l.is_ts()
4242 && r.is_lookahead(self)
4243 && self.get_lookahead_tail(r).is_missing()
4244 && self.get_lookahead_rel(r) == 0
4245 {
4246 let body = self.get_lookahead_inner(r);
4247 if self.is_nullable(body, Nullability::END) {
4248 return NodeId::TS;
4249 }
4250 }
4251 let l = self.prune_rec::<FWD>(l, memo);
4252 let r = self.prune_rec::<FWD>(r, memo);
4253 self.mk_concat(l, r)
4254 }
4255 Kind::Inter => {
4256 let l = self.prune_rec::<FWD>(l, memo);
4257 let r = self.prune_rec::<FWD>(r, memo);
4258 self.mk_inter(l, r)
4259 }
4260 Kind::Compl => {
4261 let l = self.prune_rec::<FWD>(l, memo);
4262 self.mk_compl(l)
4263 }
4264 Kind::Lookahead => {
4265 let lrel = self.get_lookahead_rel(node_id);
4266 let body = self.strip_la_body_end(self.get_lookahead_inner(node_id));
4267 let body = self.prune_rec::<FWD>(body, memo);
4268
4269 let tail = if self.get_lookahead_tail(node_id).is_missing() {
4270 NodeId::MISSING
4271 } else {
4272 self.prune_rec::<FWD>(self.get_lookahead_tail(node_id), memo)
4273 };
4274 self.mk_lookahead_internal(body, tail, lrel)
4275 }
4276 Kind::Begin => NodeId::BOT,
4277 Kind::Counted => {
4278 let body = self.prune_rec::<FWD>(l, memo);
4279 let chain = self.prune_rec::<FWD>(r, memo);
4280 self.mk_counted(body, chain, self.get_extra(node_id))
4281 }
4282 Kind::End | Kind::Pred => node_id,
4283 Kind::Star => {
4284 let l = self.prune_rec::<FWD>(l, memo);
4285 self.mk_star(l)
4286 }
4287 Kind::Lookbehind => {
4288 let l = self.prune_rec::<FWD>(l, memo);
4289 if r.is_missing() {
4290 l
4291 } else {
4292 node_id
4293 }
4294 }
4295 };
4296 memo.insert(node_id, out);
4297 out
4298 }
4299
4300 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4301 let mut sorted: Vec<NodeId> = nodes.collect();
4302 if sorted.len() <= 1 {
4303 return sorted.pop().unwrap_or(NodeId::BOT);
4304 }
4305 sorted.sort();
4306 sorted.dedup();
4307 sorted.retain(|&x| x != NodeId::BOT);
4308 if sorted.is_empty() {
4309 return NodeId::BOT;
4310 }
4311 if sorted.len() > 16 {
4312 let mut by_head: FxHashMap<NodeId, Vec<NodeId>> = FxHashMap::default();
4313 let mut non_concat: Vec<NodeId> = Vec::new();
4314 for &n in &sorted {
4315 if n.is_concat(self) {
4316 by_head.entry(self.get_left(n)).or_default().push(n);
4317 } else {
4318 non_concat.push(n);
4319 }
4320 }
4321 let mut absorbed: Vec<NodeId> = Vec::new();
4322 for &n in &non_concat {
4323 if by_head.contains_key(&n) {
4324 absorbed.push(n);
4325 }
4326 }
4327 if !absorbed.is_empty() {
4328 non_concat.retain(|n| !absorbed.contains(n));
4329 }
4330 if by_head.len() < sorted.len() {
4331 let mut groups: Vec<NodeId> = non_concat;
4332 for (head, tails) in by_head {
4333 let mut tail_nodes: Vec<NodeId> =
4334 tails.iter().map(|&n| self.get_right(n)).collect();
4335 if absorbed.contains(&head) {
4336 tail_nodes.push(NodeId::EPS);
4337 }
4338 let tail_union = self.mk_unions(tail_nodes.into_iter());
4339 let factored = self.mk_concat(head, tail_union);
4340 groups.push(factored);
4341 }
4342 groups.sort();
4343 groups.dedup();
4344 return self.mk_unions_balanced(&groups);
4345 }
4346 }
4347 self.mk_unions_balanced(&sorted)
4348 }
4349
4350 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
4351 match nodes.len() {
4352 0 => NodeId::BOT,
4353 1 => nodes[0],
4354 n => {
4355 let mid = n / 2;
4356 let left = self.mk_unions_balanced(&nodes[..mid]);
4357 let right = self.mk_unions_balanced(&nodes[mid..]);
4358 self.mk_union(left, right)
4359 }
4360 }
4361 }
4362
4363 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4364 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
4365 }
4366
4367 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
4368 nodes
4369 .rev()
4370 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
4371 }
4372}
4373
4374impl RegexBuilder {
4375 pub fn iter_sat(
4376 &mut self,
4377 stack: &mut Vec<(TRegexId, TSetId)>,
4378 f: &mut impl FnMut(&mut RegexBuilder, NodeId, TSetId),
4379 ) {
4380 debug_assert!(!stack.is_empty());
4381 loop {
4382 match stack.pop() {
4383 None => return,
4384 Some((curr, curr_set)) => match *self.get_tregex(curr) {
4385 TRegex::Leaf(n) => {
4386 let mut curr = n;
4387 while curr != NodeId::BOT {
4388 match self.get_kind(curr) {
4389 _ => {
4390 f(self, n, curr_set);
4391 curr = NodeId::BOT;
4392 }
4393 }
4394 }
4395 }
4396 TRegex::ITE(cnd, then_id, else_id) => {
4397 if else_id != TRegexId::BOT {
4398 let notcnd = self.solver().not_id(cnd);
4399 let interset1 = self.solver().and_id(curr_set, notcnd);
4400 stack.push((else_id, interset1));
4401 }
4402 let interset2 = self.solver().and_id(curr_set, cnd);
4403 stack.push((then_id, interset2));
4404 }
4405 },
4406 }
4407 }
4408 }
4409
4410 #[allow(dead_code)]
4411 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
4412 match self.get_tregex(term_id).clone() {
4413 TRegex::Leaf(node_id) => {
4414 if NodeId::BOT == node_id {
4415 vec![]
4416 } else {
4417 vec![node_id]
4418 }
4419 }
4420 TRegex::ITE(_, then_id, else_id) => {
4421 let mut then_nodes = self.extract_sat(then_id);
4422 let mut else_nodes = self.extract_sat(else_id);
4423 then_nodes.append(&mut else_nodes);
4424 then_nodes
4425 }
4426 }
4427 }
4428
4429 pub(crate) fn iter_unions_b(
4430 &mut self,
4431 curr: NodeId,
4432 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
4433 ) {
4434 let mut curr = curr;
4435 while self.get_kind(curr) == Kind::Union {
4436 f(self, curr.left(self));
4437 curr = curr.right(self);
4438 }
4439 f(self, curr);
4440 }
4441
4442 pub fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
4443 if !self.contains_look(node_id) {
4444 return Some(node_id);
4445 }
4446 match self.get_kind(node_id) {
4447 Kind::Pred | Kind::Begin | Kind::End => Some(node_id),
4448 Kind::Concat => {
4449 let left = node_id.left(self);
4450 let right = node_id.right(self);
4451 let elim_l = self.try_elim_lookarounds(left)?;
4452 let elim_r = self.try_elim_lookarounds(right)?;
4453 let rw = self.mk_concat(elim_l, elim_r);
4454 Some(rw)
4455 }
4456 Kind::Union => {
4457 let left = node_id.left(self);
4458 let right = node_id.right(self);
4459 let elim_l = self.try_elim_lookarounds(left)?;
4460 let elim_r = self.try_elim_lookarounds(right)?;
4461 let rw = self.mk_union(elim_l, elim_r);
4462 Some(rw)
4463 }
4464
4465 Kind::Star => {
4466 let body = node_id.left(self);
4467 let elim_l = self.try_elim_lookarounds(body)?;
4468 Some(self.mk_star(elim_l))
4469 }
4470 Kind::Compl => {
4471 let left = node_id.left(self);
4472 let elim_l = self.try_elim_lookarounds(left)?;
4473 Some(self.mk_compl(elim_l))
4474 }
4475 Kind::Lookahead => {
4476 let rel = self.get_lookahead_rel(node_id);
4477 if rel != 0 {
4478 return None;
4479 }
4480 let lbody = self.get_lookahead_inner(node_id);
4481 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
4482 let elim_l = self.try_elim_lookarounds(lbody)?;
4483 let elim_r = self.try_elim_lookarounds(ltail)?;
4484 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
4485 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
4486 let rw = self.mk_inter(lbody_ts, ltail_ts);
4487 Some(rw)
4488 }
4489 Kind::Lookbehind => {
4490 let linner = self.get_lookbehind_inner(node_id);
4491 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
4492 let elim_l = self.try_elim_lookarounds(linner)?;
4493 let elim_r = self.try_elim_lookarounds(lprev)?;
4494 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
4495 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
4496 let rw = self.mk_inter(lbody_ts, ltail_ts);
4497 Some(rw)
4498 }
4499 Kind::Inter => {
4500 let left = node_id.left(self);
4501 let right = node_id.right(self);
4502 let elim_l = self.try_elim_lookarounds(left)?;
4503 let elim_r = self.try_elim_lookarounds(right)?;
4504 let rw = self.mk_inter(elim_l, elim_r);
4505 Some(rw)
4506 }
4507 Kind::Counted => None,
4508 }
4509 }
4510
4511 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
4513 if self.nullability(node) == Nullability::NEVER {
4514 node
4515 } else {
4516 self.mk_inter(NodeId::TOPPLUS, node)
4517 }
4518 }
4519
4520 pub fn iter_find_stack(
4521 &self,
4522 stack: &mut Vec<TRegexId>,
4523 mut f: impl FnMut(NodeId) -> bool,
4524 ) -> bool {
4525 loop {
4526 match stack.pop() {
4527 None => return false,
4528 Some(curr) => match self.get_tregex(curr) {
4529 TRegex::Leaf(n) => {
4530 let mut curr = *n;
4531 while curr != NodeId::BOT {
4532 match self.get_kind(curr) {
4533 Kind::Union => {
4534 if f(curr.left(self)) {
4535 return true;
4536 }
4537 curr = curr.right(self);
4538 }
4539 _ => {
4540 if f(*n) {
4541 return true;
4542 }
4543 curr = NodeId::BOT;
4544 }
4545 }
4546 }
4547 }
4548 TRegex::ITE(_, then_id, else_id) => {
4549 if *else_id != TRegexId::BOT {
4550 stack.push(*else_id);
4551 }
4552 stack.push(*then_id);
4553 }
4554 },
4555 }
4556 }
4557 }
4558
4559 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
4585 if node == NodeId::BOT {
4586 return Some(true);
4587 }
4588 if self.nullability(node) != Nullability::NEVER {
4589 return Some(false);
4590 }
4591 if let Some(cached) = self.cache_empty.get(&node) {
4592 if cached.is_checked() {
4593 return Some(cached.is_empty());
4594 }
4595 }
4596 let node = if !self.contains_look(node) {
4597 node
4598 } else {
4599 self.try_elim_lookarounds(node)?
4600 };
4601 let isempty_flag = self.is_empty_lang_internal(node);
4602
4603 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
4604 }
4605
4606 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, ResharpError> {
4607 if !self.get_meta_flags(initial_node).contains_inter() {
4609 return Ok(NodeFlags::ZERO);
4610 }
4611
4612 let mut visited: FxHashMap<NodeId, NodeId> = FxHashMap::default();
4613 let mut worklist: VecDeque<NodeId> = VecDeque::new();
4614 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
4615 let mut stack = Vec::new();
4616 stack.push(begin_der);
4617 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
4618 visited.insert(node, initial_node);
4619 let nullability = self.nullability(node);
4620 if nullability != Nullability::NEVER {
4621 true
4622 } else {
4623 worklist.push_back(node);
4624 false
4625 }
4626 });
4627 if found_nullable_right_away {
4628 return Ok(NodeFlags::ZERO);
4629 }
4630
4631 worklist.push_back(initial_node);
4632 let isempty_flag: NodeFlags;
4633 let mut found_node = NodeId::BOT;
4634
4635 loop {
4636 match worklist.pop_front() {
4637 None => {
4638 isempty_flag = NodeFlags::IS_EMPTY;
4639 break;
4640 }
4641 Some(outer) => {
4642 if let Some(cached) = self.cache_empty.get(&outer) {
4643 if cached.is_checked() {
4644 if cached.is_empty() {
4645 continue;
4646 } else {
4647 return Ok(NodeFlags::ZERO);
4648 }
4649 }
4650 }
4651
4652 let derivative = self.der(outer, Nullability::CENTER)?;
4653
4654 stack.push(derivative);
4655
4656 let found_null = self.iter_find_stack(&mut stack, |node| {
4657 if let std::collections::hash_map::Entry::Vacant(e) = visited.entry(node) {
4658 found_node = node;
4659 if !self.get_meta_flags(node).contains_inter() {
4660 true
4661 } else {
4662 e.insert(outer);
4663 worklist.push_front(node);
4664 self.any_nonbegin_nullable(node)
4665 }
4666 } else {
4667 false
4668 }
4669 });
4670 if found_null {
4671 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
4672 isempty_flag = NodeFlags::ZERO;
4673 break;
4674 }
4675 }
4676 }
4677 }
4678
4679 self.cache_empty.insert(
4680 initial_node,
4681 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
4682 );
4683 Ok(isempty_flag)
4684 }
4685
4686 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
4688 if larger_lang == smaller_lang {
4689 return Some(true);
4690 }
4691
4692 if self
4694 .nullability(larger_lang)
4695 .not()
4696 .and(self.nullability(smaller_lang))
4697 != Nullability::NEVER
4698 {
4699 return Some(false);
4700 }
4701
4702 let (smaller_lang, larger_lang) =
4707 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
4708 let wrap = |b: &mut Self, n: NodeId| {
4709 let tmp = b.mk_concat(n, NodeId::TS);
4710 b.mk_concat(NodeId::TS, tmp)
4711 };
4712 (wrap(self, smaller_lang), wrap(self, larger_lang))
4713 } else {
4714 (smaller_lang, larger_lang)
4715 };
4716
4717 let nota = self.mk_compl(larger_lang);
4718 let diff = self.mk_inter(smaller_lang, nota);
4719 self.is_empty_lang(diff)
4720 }
4721}