1#![warn(dead_code)]
7
8use solver::{Solver, TSetId};
9use std::collections::{BTreeSet, HashMap, VecDeque};
10use std::fmt::Debug;
11use std::fmt::Write;
12use std::hash::Hash;
13
14use crate::nulls::{NullState, Nullability, NullsBuilder, NullsId};
15pub mod nulls;
16pub mod solver;
17
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub enum AlgebraError {
20 AnchorLimit,
21 StateSpaceExplosion,
22 UnsupportedPattern,
23}
24
25impl std::fmt::Display for AlgebraError {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 match self {
28 AlgebraError::AnchorLimit => write!(f, "anchor limit exceeded"),
29 AlgebraError::StateSpaceExplosion => {
30 write!(f, "too many states, likely infinite state space")
31 }
32 AlgebraError::UnsupportedPattern => write!(f, "unsupported lookaround pattern"),
33 }
34 }
35}
36
37impl std::error::Error for AlgebraError {}
38
39mod id {
40 pub const MISSING: u32 = 0;
41 pub const BOT: u32 = 1;
42 pub const EPS: u32 = 2;
43 pub const TOP: u32 = 3;
44 pub const TOPSTAR: u32 = 4;
45 pub const TOPPLUS: u32 = 5;
46 pub const BEGIN: u32 = 6;
47 pub const END: u32 = 7;
48}
49
50#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug)]
51pub(crate) struct MetadataId(u32);
52impl MetadataId {
53 pub(crate) const MISSING: MetadataId = MetadataId(id::MISSING);
54}
55
56#[derive(Clone, Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Default)]
57pub struct NodeId(pub u32);
58impl NodeId {
59 pub const MISSING: NodeId = NodeId(id::MISSING);
60 pub const BOT: NodeId = NodeId(id::BOT);
61 pub const EPS: NodeId = NodeId(id::EPS);
62 pub const TOP: NodeId = NodeId(id::TOP);
63 pub const TS: NodeId = NodeId(id::TOPSTAR);
64 pub const TOPPLUS: NodeId = NodeId(id::TOPPLUS);
65 pub const BEGIN: NodeId = NodeId(id::BEGIN);
66 pub const END: NodeId = NodeId(id::END);
67
68 #[inline]
69 pub fn as_u32(self) -> u32 {
70 self.0
71 }
72
73 #[inline]
74 pub fn from_u32(v: u32) -> NodeId {
75 NodeId(v)
76 }
77}
78impl Debug for NodeId {
79 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80 let num = &self.0;
81 f.write_str(format!("{num}").as_str())
82 }
83}
84
85#[derive(Clone, Copy, PartialEq, Hash, Eq, Debug, PartialOrd, Ord)]
86pub struct TRegexId(u32);
87impl TRegexId {
88 pub const MISSING: TRegexId = TRegexId(id::MISSING);
89 pub const EPS: TRegexId = TRegexId(id::EPS);
90 pub const BOT: TRegexId = TRegexId(id::BOT);
91 pub const TOP: TRegexId = TRegexId(id::TOP);
92 pub const TOPSTAR: TRegexId = TRegexId(id::TOPSTAR);
93}
94
95#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
96pub(crate) struct MetaFlags(u8);
97impl MetaFlags {
98 const NULL_MASK: u8 = 0b111; pub(crate) const ZERO: MetaFlags = MetaFlags(0);
101 pub(crate) const CONTAINS_LOOKAROUND: MetaFlags = MetaFlags(1 << 3);
102 pub(crate) const INFINITE_LENGTH: MetaFlags = MetaFlags(1 << 4);
103 pub(crate) const CONTAINS_COMPL: MetaFlags = MetaFlags(1 << 5);
104 pub(crate) const CONTAINS_INTER: MetaFlags = MetaFlags(1 << 6);
105 pub(crate) const CONTAINS_ANCHORS: MetaFlags = MetaFlags(1 << 7);
106
107 #[inline]
108 pub(crate) fn nullability(self) -> Nullability {
109 Nullability(self.0 as u8 & Self::NULL_MASK as u8)
110 }
111
112 #[inline]
113 pub(crate) const fn with_nullability(n: Nullability, flags: MetaFlags) -> MetaFlags {
114 MetaFlags((flags.0 & !Self::NULL_MASK) | n.0)
115 }
116
117 #[inline]
118 pub(crate) fn has(self, flag: MetaFlags) -> bool {
119 self.0 & flag.0 != 0
120 }
121 #[inline]
122 const fn and(self, other: MetaFlags) -> MetaFlags {
123 MetaFlags(self.0 & other.0)
124 }
125 #[inline]
126 const fn or(self, other: MetaFlags) -> MetaFlags {
127 MetaFlags(self.0 | other.0)
128 }
129
130 pub(crate) fn contains_lookaround(self) -> bool {
131 self.has(MetaFlags::CONTAINS_LOOKAROUND)
132 }
133 pub(crate) fn contains_inter(self) -> bool {
134 self.has(MetaFlags::CONTAINS_INTER)
135 }
136
137 pub(crate) fn all_contains_flags(self) -> MetaFlags {
138 self.and(
139 MetaFlags::CONTAINS_LOOKAROUND
140 .or(MetaFlags::CONTAINS_ANCHORS)
141 .or(MetaFlags::CONTAINS_INTER),
142 )
143 }
144}
145
146#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
147#[repr(u8)]
148pub enum Kind {
149 #[default]
150 Pred,
151 Star,
152 Begin,
153 End,
154 Concat,
155 Union,
156 Compl,
157 Lookbehind,
158 Lookahead,
159 Inter,
160}
161
162#[derive(Eq, Hash, PartialEq, Clone)]
163struct Metadata {
164 flags: MetaFlags,
165 nulls: NullsId,
166}
167
168struct MetadataBuilder {
169 num_created: u32,
170 solver: Solver,
171 nb: NullsBuilder,
172 index: HashMap<Metadata, MetadataId>,
173 pub array: Vec<Metadata>,
174}
175
176mod helpers {
177 pub(crate) fn incr_rel(n1: u32) -> u32 {
178 match n1.overflowing_add(1) {
179 (_, true) => u32::MAX,
180 (res, false) => res,
181 }
182 }
183}
184
185impl MetadataBuilder {
186 fn new() -> MetadataBuilder {
187 let mut arr = Vec::new();
188 arr.push(Metadata {
189 flags: MetaFlags::ZERO,
190 nulls: NullsId::EMPTY,
191 });
192 Self {
193 index: HashMap::new(),
194 array: arr,
195 solver: Solver::new(),
196 num_created: 0,
197 nb: NullsBuilder::new(),
198 }
199 }
200
201 fn init(&mut self, inst: Metadata) -> MetadataId {
202 self.num_created += 1;
203 let new_id = MetadataId(self.num_created);
204 self.index.insert(inst.clone(), new_id);
205 self.array.push(inst);
206 new_id
207 }
208
209 fn get_meta_id(&mut self, inst: Metadata) -> MetadataId {
210 match self.index.get(&inst) {
211 Some(&id) => id,
212 None => self.init(inst),
213 }
214 }
215
216 fn get_meta_ref(&mut self, inst: MetadataId) -> &Metadata {
217 &self.array[inst.0 as usize]
218 }
219
220 fn get_contains(&self, setflags: MetaFlags) -> MetaFlags {
221 setflags.all_contains_flags()
222 }
223
224 fn flags_star(&self, body: MetadataId) -> MetaFlags {
225 let left = &self.array[body.0 as usize].flags;
226 let contains = left.and(MetaFlags::CONTAINS_LOOKAROUND.or(MetaFlags::CONTAINS_INTER));
227 MetaFlags::with_nullability(Nullability::ALWAYS, contains.or(MetaFlags::INFINITE_LENGTH))
228 }
229
230 fn flags_compl(&self, left_id: MetadataId) -> MetaFlags {
231 let left = &self.array[left_id.0 as usize].flags;
232 let null = left.nullability().not().and(Nullability::ALWAYS);
233 let contains = self.get_contains(*left);
234 MetaFlags::with_nullability(
235 null,
236 contains
237 .or(MetaFlags::INFINITE_LENGTH)
238 .or(MetaFlags::CONTAINS_COMPL),
239 )
240 }
241
242 fn flags_concat(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
243 let left = &self.array[left_id.0 as usize].flags;
244 let right = &self.array[right_id.0 as usize].flags;
245 let null = left.nullability().and(right.nullability());
246 let contains = self.get_contains(left.or(*right));
247 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
248 MetaFlags::with_nullability(null, contains.or(len))
249 }
250
251 fn flags_inter(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
252 let left = &self.array[left_id.0 as usize].flags;
253 let right = &self.array[right_id.0 as usize].flags;
254 let null = left.nullability().and(right.nullability());
255 let contains = self
256 .get_contains(left.or(*right))
257 .or(MetaFlags::CONTAINS_INTER);
258 let len = (left.and(*right)).and(MetaFlags::INFINITE_LENGTH);
259 MetaFlags::with_nullability(null, contains.or(len))
260 }
261
262 fn flags_union(&self, left_id: MetadataId, right_id: MetadataId) -> MetaFlags {
263 let left = &self.array[left_id.0 as usize].flags;
264 let right = &self.array[right_id.0 as usize].flags;
265 let null = left.nullability().or(right.nullability());
266 let contains = self.get_contains(left.or(*right));
267 let len = (left.or(*right)).and(MetaFlags::INFINITE_LENGTH);
268 MetaFlags::with_nullability(null, contains.or(len))
269 }
270}
271
272#[derive(Eq, Hash, PartialEq, Clone, Debug, Default)]
273pub(crate) struct NodeKey {
274 pub(crate) kind: Kind,
275 pub(crate) left: NodeId,
276 pub(crate) right: NodeId,
277 pub(crate) extra: u32,
278}
279
280#[derive(Eq, Hash, PartialEq, Clone, Debug)]
281pub enum TRegex<TSet> {
282 Leaf(NodeId),
283 ITE(TSet, TRegexId, TRegexId),
284}
285
286#[derive(Eq, Hash, PartialEq, Clone, Copy)]
287pub(crate) struct NodeFlags(u8);
288impl NodeFlags {
289 pub(crate) const ZERO: NodeFlags = NodeFlags(0);
290 pub(crate) const IS_CHECKED: NodeFlags = NodeFlags(1);
291 pub(crate) const IS_EMPTY: NodeFlags = NodeFlags(1 << 1);
292
293 fn is_checked(self) -> bool {
294 self.0 >= NodeFlags::IS_CHECKED.0
295 }
296 fn is_empty(self) -> bool {
297 self.0 & NodeFlags::IS_EMPTY.0 == NodeFlags::IS_EMPTY.0
298 }
299}
300
301#[derive(Eq, Hash, PartialEq, Clone, Copy)]
302pub(crate) struct BuilderFlags(u8);
303impl BuilderFlags {
304 pub(crate) const ZERO: BuilderFlags = BuilderFlags(0);
305 pub(crate) const SUBSUME: BuilderFlags = BuilderFlags(1);
306}
307
308pub struct RegexBuilder {
309 mb: MetadataBuilder,
310 temp_vec: Vec<NodeId>,
311 num_created: u32,
312 index: HashMap<NodeKey, NodeId>,
313 array: Vec<NodeKey>,
314 metadata: Vec<MetadataId>,
315 reversed: Vec<NodeId>,
316 cache_empty: HashMap<NodeId, NodeFlags>,
317 tr_cache: HashMap<TRegex<TSetId>, TRegexId>,
318 tr_array: Vec<TRegex<TSetId>>,
319 tr_der_center: Vec<TRegexId>,
320 tr_der_begin: Vec<TRegexId>,
321 flags: BuilderFlags,
322 pub lookahead_context_max: u32,
324}
325
326macro_rules! iter_inter {
327 ($compiler:ident, $start:ident, $expression:expr) => {
328 let mut curr = $start;
329 while $compiler.get_kind(curr) == Kind::Inter {
330 let left = $compiler.get_left(curr);
331 $expression(left);
332 curr = $compiler.get_right(curr);
333 }
334 $expression(curr);
335 };
336}
337
338impl NodeId {
339 fn is_missing(&self) -> bool {
340 *self == NodeId::MISSING
341 }
342 #[inline]
343 fn flags_contains(self, b: &RegexBuilder) -> MetaFlags {
344 b.get_flags_contains(self)
345 }
346 pub(crate) fn has_concat_tail(self, b: &RegexBuilder, tail: NodeId) -> bool {
347 if self == tail {
348 true
349 } else if self.is_kind(b, Kind::Concat) {
350 self.right(b).has_concat_tail(b, tail)
351 } else {
352 false
353 }
354 }
355 fn missing_to_eps(&self) -> NodeId {
356 if *self == NodeId::MISSING {
357 NodeId::EPS
358 } else {
359 *self
360 }
361 }
362
363 #[inline]
364 fn kind(self, b: &RegexBuilder) -> Kind {
365 b.get_kind(self)
366 }
367 #[inline]
368 fn is_kind(self, b: &RegexBuilder, k: Kind) -> bool {
369 b.get_kind(self) == k
370 }
371 #[inline]
372 fn is_never_nullable(self, b: &RegexBuilder) -> bool {
373 b.nullability(self) == Nullability::NEVER
374 }
375 #[inline]
376 fn nullability(self, b: &RegexBuilder) -> Nullability {
377 b.nullability(self)
378 }
379
380 #[inline]
381 fn is_center_nullable(self, b: &RegexBuilder) -> bool {
382 b.nullability(self).and(Nullability::CENTER) != Nullability::NEVER
383 }
384 #[inline]
385 pub fn left(self, b: &RegexBuilder) -> NodeId {
386 b.get_left(self)
387 }
388
389 #[inline]
390 pub fn right(self, b: &RegexBuilder) -> NodeId {
391 b.get_right(self)
392 }
393
394 #[inline]
395 fn der(self, b: &mut RegexBuilder, mask: Nullability) -> Result<TRegexId, AlgebraError> {
396 b.der(self, mask)
397 }
398
399 #[inline]
400 fn extra(self, b: &RegexBuilder) -> u32 {
401 b.get_extra(self)
402 }
403
404 #[inline]
405 fn is_pred(self, b: &RegexBuilder) -> bool {
406 b.get_kind(self) == Kind::Pred
407 }
408 #[inline]
409 fn is_lookahead(self, b: &RegexBuilder) -> bool {
410 b.get_kind(self) == Kind::Lookahead
411 }
412 #[inline]
413 pub fn pred_tset(self, b: &RegexBuilder) -> TSetId {
414 debug_assert!(self.is_pred(b));
415 TSetId(b.get_extra(self))
416 }
417 #[inline]
418 fn is_star(self, b: &RegexBuilder) -> bool {
419 if NodeId::EPS == self {
420 return false;
421 }
422 b.get_kind(self) == Kind::Star
423 }
424
425 #[inline]
426 pub(crate) fn is_plus(self, b: &RegexBuilder) -> bool {
427 if self.is_concat(b) {
428 let r = self.right(b);
429 return r.is_star(b) && r.left(b) == self.left(b);
430 }
431 false
432 }
433
434 #[inline]
435 fn is_concat(self, b: &RegexBuilder) -> bool {
436 b.get_kind(self) == Kind::Concat
437 }
438
439 #[inline]
440 fn is_opt_v(self, b: &RegexBuilder) -> Option<NodeId> {
441 if b.get_kind(self) == Kind::Union && b.get_left(self) == NodeId::EPS {
442 Some(b.get_right(self))
443 } else {
444 None
445 }
446 }
447
448 #[inline]
449 fn is_compl_plus_end(self, b: &RegexBuilder) -> bool {
450 if b.get_kind(self) == Kind::Concat {
451 let left = self.left(b);
452 let right = self.right(b);
453 if left.is_kind(b, Kind::Compl) {
454 if left.left(b) == NodeId::TOPPLUS {
455 return right == NodeId::END;
456 }
457 }
458 }
459 false
460 }
461
462 #[inline]
463 fn is_pred_star(self, b: &RegexBuilder) -> Option<NodeId> {
464 if NodeId::EPS == self {
465 return None;
466 }
467 if self.is_star(b) && self.left(b).is_pred(b) {
468 Some(self.left(b))
469 } else {
470 None
471 }
472 }
473
474 #[inline]
475 fn is_contains(self, b: &RegexBuilder) -> Option<NodeId> {
476 if b.get_kind(self) == Kind::Concat && self.left(b) == NodeId::TS {
477 let middle = self.right(b);
478 if middle.kind(b) == Kind::Concat && middle.right(b) == NodeId::TS {
479 return Some(middle.left(b));
480 }
481 }
482 None
483 }
484
485 #[inline]
486 pub(crate) fn iter_union(
487 self,
488 b: &mut RegexBuilder,
489 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
490 ) {
491 let mut curr: NodeId = self;
492 while curr.kind(b) == Kind::Union {
493 f(b, curr.left(b));
494 curr = curr.right(b);
495 }
496 f(b, curr);
497 }
498
499 #[inline]
500 pub(crate) fn iter_union_while(
501 self,
502 b: &mut RegexBuilder,
503 f: &mut impl FnMut(&mut RegexBuilder, NodeId) -> bool,
504 ) {
505 let mut curr: NodeId = self;
506 let mut continue_loop = true;
507 while continue_loop && curr.kind(b) == Kind::Union {
508 continue_loop = f(b, curr.left(b));
509 curr = curr.right(b);
510 }
511 if continue_loop {
512 f(b, curr);
513 }
514 }
515}
516
517impl RegexBuilder {
518 pub fn new() -> RegexBuilder {
519 let mut inst = Self {
520 mb: MetadataBuilder::new(),
521 array: Vec::new(),
522 index: HashMap::new(),
523 cache_empty: HashMap::new(),
524 tr_array: Vec::new(),
525 tr_cache: HashMap::new(),
526 flags: BuilderFlags::ZERO,
527 lookahead_context_max: 800,
528 num_created: 0,
529 metadata: Vec::new(),
530 reversed: Vec::new(),
531 tr_der_center: Vec::new(),
532 tr_der_begin: Vec::new(),
533 temp_vec: Vec::new(),
534 };
535 inst.array.push(NodeKey::default());
536 inst.mk_pred(TSetId::EMPTY);
537 inst.mk_star(NodeId::BOT);
538 inst.mk_pred(TSetId::FULL);
539 inst.mk_star(NodeId::TOP);
540 let top_plus_id = inst.mk_plus(NodeId::TOP);
541 inst.mk_unset(Kind::Begin);
542 inst.mk_unset(Kind::End);
543 debug_assert!(top_plus_id == NodeId::TOPPLUS);
544
545 inst.tr_array.push(TRegex::Leaf(NodeId::MISSING));
546 inst.mk_leaf(NodeId::BOT);
547 inst.mk_leaf(NodeId::EPS);
548 inst.mk_leaf(NodeId::TOP);
549 inst.mk_leaf(NodeId::TS);
550
551 inst.flags = BuilderFlags::SUBSUME;
552 inst
553 }
554
555 #[inline]
556 pub fn solver_ref(&self) -> &Solver {
557 &self.mb.solver
558 }
559
560 #[inline]
561 pub fn solver(&mut self) -> &mut Solver {
562 &mut self.mb.solver
563 }
564
565 fn tr_init(&mut self, inst: TRegex<TSetId>) -> TRegexId {
566 let new_id = TRegexId(self.tr_cache.len() as u32 + 1);
567 self.tr_cache.insert(inst.clone(), new_id);
568 self.tr_array.push(inst);
569 new_id
570 }
571
572 fn get_tregex_id(&mut self, inst: TRegex<TSetId>) -> TRegexId {
573 match self.tr_cache.get(&inst) {
574 Some(&id) => id,
575 None => self.tr_init(inst),
576 }
577 }
578
579 pub fn get_tregex(&self, inst: TRegexId) -> &TRegex<TSetId> {
580 &self.tr_array[inst.0 as usize]
581 }
582
583 fn unsat(&mut self, cond1: TSetId, cond2: TSetId) -> bool {
584 let solv = self.solver();
585 !solv.is_sat_id(cond1, cond2)
586 }
587
588 pub(crate) fn mk_leaf(&mut self, node_id: NodeId) -> TRegexId {
589 let term = TRegex::<TSetId>::Leaf(node_id);
590 self.get_tregex_id(term)
591 }
592
593 fn mk_ite(&mut self, cond: TSetId, _then: TRegexId, _else: TRegexId) -> TRegexId {
594 let tmp_inst = TRegex::ITE(cond, _then, _else);
595 if let Some(cached) = self.tr_cache.get(&tmp_inst) {
596 return *cached;
597 }
598 if _then == _else {
599 return _then;
600 }
601 if self.solver().is_full_id(cond) {
602 return _then;
603 }
604 if self.solver().is_empty_id(cond) {
605 return _else;
606 }
607 let _clean_then = self.clean(cond, _then);
608 let notcond = self.solver().not_id(cond);
609 let _clean_else = self.clean(notcond, _else);
610
611 if _clean_then == _clean_else {
612 return _clean_then;
613 }
614 match self.get_tregex(_clean_then) {
616 TRegex::ITE(leftcond, _inner_then, leftg) if *leftg == _clean_else => {
617 let _cond_then = leftcond.clone();
618 let new_then = _inner_then.clone();
619 let sand = self.solver().and_id(cond, _cond_then);
620 return self.mk_ite(sand, new_then, _clean_else);
621 }
622 _ => {}
623 }
624 match self.get_tregex(_clean_else) {
626 TRegex::ITE(_c2, _t2, _e2) if *_t2 == _clean_then => {
628 let e2clone = _e2.clone();
629 let new_cond = self.mb.solver.or_id(cond, _c2.clone());
630 return self.mk_ite(new_cond, _clean_then, e2clone);
631 }
632 _ => {}
633 }
634
635 if _clean_then == TRegexId::BOT {
636 let flip_cond = self.solver().not_id(cond);
637 let cleaned_id = self.get_tregex_id(TRegex::ITE(flip_cond, _clean_else, _clean_then));
638 return cleaned_id;
639 }
640
641 self.get_tregex_id(TRegex::ITE(cond, _clean_then, _clean_else))
642 }
643
644 fn clean(&mut self, beta: TSetId, tterm: TRegexId) -> TRegexId {
645 match *self.get_tregex(tterm) {
646 TRegex::Leaf(_) => return tterm,
647 TRegex::ITE(alpha, _then_id, _else_id) => {
648 let notalpha = self.mb.solver.not_id(alpha);
649 if self.mb.solver.unsat_id(beta, alpha) {
650 let ec = self.mb.solver.and_id(beta, notalpha);
651 let new_else = self.clean(ec, _else_id);
652 return new_else;
653 }
654 if self.unsat(beta, notalpha) {
655 let tc = self.mb.solver.and_id(beta, alpha);
656 let new_then = self.clean(tc, _then_id);
657 return new_then;
658 }
659
660 let ei = _else_id;
661 let tc = self.mb.solver.and_id(beta, alpha);
662 let ec = self.mb.solver.and_id(beta, notalpha);
663 let new_then = self.clean(tc, _then_id);
664 let new_else = self.clean(ec, ei);
665 return self.mk_ite(tc, new_then, new_else);
666 }
667 }
668 }
669
670 fn mk_unary(
671 &mut self,
672 term: TRegexId,
673 apply: &mut impl FnMut(&mut RegexBuilder, NodeId) -> NodeId,
674 ) -> TRegexId {
675 match self.tr_array[term.0 as usize] {
676 TRegex::Leaf(node_id) => {
677 let applied = apply(self, node_id);
678 self.mk_leaf(applied)
679 }
680 TRegex::ITE(c1, _then, _else) => {
681 let _then1 = self.mk_unary(_then, apply);
682 let _else1 = self.mk_unary(_else, apply);
683 self.mk_ite(c1, _then1, _else1)
684 }
685 }
686 }
687
688 fn mk_binary_result(
689 &mut self,
690 left: TRegexId,
691 right: TRegexId,
692 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> Result<NodeId, AlgebraError>,
693 ) -> Result<TRegexId, AlgebraError> {
694 match self.tr_array[left.0 as usize] {
695 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
696 TRegex::Leaf(right_node_id) => {
697 let applied = apply(self, left_node_id, right_node_id)?;
698 Ok(self.mk_leaf(applied))
699 }
700 TRegex::ITE(c2, _then, _else) => {
701 let then2 = self.mk_binary_result(left, _then, apply)?;
702 let else2 = self.mk_binary_result(left, _else, apply)?;
703 Ok(self.mk_ite(c2, then2, else2))
704 }
705 },
706 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
707 TRegex::Leaf(_) => {
708 let then2 = self.mk_binary_result(_then1, right, apply)?;
709 let else2 = self.mk_binary_result(_else1, right, apply)?;
710 Ok(self.mk_ite(c1, then2, else2))
711 }
712 TRegex::ITE(c2, _then2, _else2) => {
713 if c1 == c2 {
714 let _then = self.mk_binary_result(_then1, _then2, apply)?;
715 let _else = self.mk_binary_result(_else1, _else2, apply)?;
716 return Ok(self.mk_ite(c1, _then, _else));
717 }
718 if c1.0 > c2.0 {
719 let _then = self.mk_binary_result(_then1, right, apply)?;
720 let _else = self.mk_binary_result(_else1, right, apply)?;
721 return Ok(self.mk_ite(c1, _then, _else));
722 } else {
723 let _then = self.mk_binary_result(left, _then2, apply)?;
724 let _else = self.mk_binary_result(left, _else2, apply)?;
725 return Ok(self.mk_ite(c2, _then, _else));
726 }
727 }
728 },
729 }
730 }
731
732 fn mk_binary(
733 &mut self,
734 left: TRegexId,
735 right: TRegexId,
736 apply: &mut impl FnMut(&mut RegexBuilder, NodeId, NodeId) -> NodeId,
737 ) -> TRegexId {
738 match self.tr_array[left.0 as usize] {
739 TRegex::Leaf(left_node_id) => match self.tr_array[right.0 as usize] {
740 TRegex::Leaf(right_node_id) => {
741 let applied = apply(self, left_node_id, right_node_id);
742 self.mk_leaf(applied)
743 }
744 TRegex::ITE(c2, _then, _else) => {
745 let then2 = self.mk_binary(left, _then, apply);
746 let else2 = self.mk_binary(left, _else, apply);
747 self.mk_ite(c2, then2, else2)
748 }
749 },
750 TRegex::ITE(c1, _then1, _else1) => match self.tr_array[right.0 as usize] {
751 TRegex::Leaf(_) => {
752 let then2 = self.mk_binary(_then1, right, apply);
753 let else2 = self.mk_binary(_else1, right, apply);
754 self.mk_ite(c1, then2, else2)
755 }
756 TRegex::ITE(c2, _then2, _else2) => {
757 if c1 == c2 {
758 let _then = self.mk_binary(_then1, _then2, apply);
759 let _else = self.mk_binary(_else1, _else2, apply);
760 return self.mk_ite(c1, _then, _else);
761 }
762 if c1.0 > c2.0 {
763 let _then = self.mk_binary(_then1, right, apply);
764 let _else = self.mk_binary(_else1, right, apply);
765 return self.mk_ite(c1, _then, _else);
766 } else {
767 let _then = self.mk_binary(left, _then2, apply);
768 let _else = self.mk_binary(left, _else2, apply);
769 return self.mk_ite(c2, _then, _else);
770 }
771 }
772 },
773 }
774 }
775
776 pub fn get_nulls(
777 &mut self,
778 pending_rel: u32,
779 mask: Nullability,
780 acc: &mut BTreeSet<NullState>,
781 node_id: NodeId,
782 ) {
783 debug_assert!(node_id != NodeId::MISSING);
784 if !self.is_nullable(node_id, mask) {
785 return;
786 }
787 match self.get_kind(node_id) {
788 Kind::Pred => {}
789 Kind::End => {
790 if mask.has(Nullability::END) {
791 acc.insert(NullState::new(mask, pending_rel));
792 }
793 }
794 Kind::Begin => {
795 if mask.has(Nullability::BEGIN) {
796 acc.insert(NullState::new(mask, pending_rel));
797 }
798 }
799 Kind::Concat => {
800 let new_mask = self.nullability(node_id).and(mask);
801 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
802 if self.is_nullable(node_id.left(self), mask) {
803 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
804 }
805 }
806 Kind::Union => {
807 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
808 self.get_nulls(pending_rel, mask, acc, node_id.right(self));
809 }
810 Kind::Inter => {
811 let new_mask = self.nullability(node_id).and(mask);
812 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
813 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
814 }
815 Kind::Star => {
816 acc.insert(NullState::new(mask, pending_rel));
817 self.get_nulls(pending_rel, mask, acc, node_id.left(self));
818 }
819 Kind::Compl => {
820 if !self.is_nullable(node_id.left(self), mask) {
821 acc.insert(NullState::new(mask, 0));
822 }
823 }
824 Kind::Lookbehind => {
825 let new_mask = self.nullability(node_id).and(mask);
826 self.get_nulls(pending_rel, new_mask, acc, node_id.left(self));
827 if node_id.right(self) != NodeId::MISSING {
828 self.get_nulls(pending_rel, new_mask, acc, node_id.right(self));
829 }
830 }
831 Kind::Lookahead => {
832 let la_inner = self.get_lookahead_inner(node_id);
833 if self.is_nullable(la_inner, mask) {
834 let rel = self.get_lookahead_rel(node_id);
835 if rel != u32::MAX {
836 self.get_nulls(pending_rel + rel, mask, acc, la_inner);
837 }
838 }
839 let la_tail = self.get_lookahead_tail(node_id);
840 if la_tail != NodeId::MISSING {
841 self.get_nulls(pending_rel, mask, acc, la_tail);
842 }
843 }
844 }
845 }
846
847 pub(crate) fn contains_look(&mut self, node_id: NodeId) -> bool {
848 self.get_meta_flags(node_id)
849 .has(MetaFlags::CONTAINS_LOOKAROUND)
850 }
851
852 fn get_min_length_only(&self, node_id: NodeId) -> u32 {
853 match self.get_kind(node_id) {
854 Kind::End | Kind::Begin => 0,
855 Kind::Pred => 1,
856 Kind::Concat => {
857 self.get_min_length_only(node_id.left(self))
858 + self.get_min_length_only(node_id.right(self))
859 }
860 Kind::Union => self
861 .get_min_length_only(node_id.left(self))
862 .min(self.get_min_length_only(node_id.right(self))),
863 Kind::Inter => self
864 .get_min_length_only(node_id.left(self))
865 .max(self.get_min_length_only(node_id.right(self))),
866 Kind::Star | Kind::Lookbehind | Kind::Lookahead => 0,
867 Kind::Compl => {
868 if self.nullability(node_id.left(self)) == Nullability::NEVER {
869 0
870 } else {
871 1
872 }
873 }
874 }
875 }
876
877 fn starts_with_ts(&self, node_id: NodeId) -> bool {
878 if node_id == NodeId::TS {
879 return true;
880 }
881 match self.get_kind(node_id) {
882 Kind::Inter => {
883 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
884 }
885 Kind::Union => {
886 self.starts_with_ts(node_id.left(self)) && self.starts_with_ts(node_id.right(self))
887 }
888 Kind::Concat => self.starts_with_ts(node_id.left(self)),
889 _ => false,
890 }
891 }
892
893 #[inline]
894 pub(crate) fn ends_with_ts(&self, node_id: NodeId) -> bool {
895 if self.get_kind(node_id) == Kind::Concat {
896 self.ends_with_ts(node_id.right(self))
897 } else {
898 node_id == NodeId::TS
899 }
900 }
901
902 pub(crate) fn is_nullable(&mut self, node_id: NodeId, mask: Nullability) -> bool {
903 debug_assert!(node_id != NodeId::MISSING);
904 self.nullability(node_id).0 & mask.0 != Nullability::NEVER.0
905 }
906
907 pub(crate) fn cache_der(
908 &mut self,
909 node_id: NodeId,
910 result: TRegexId,
911 mask: Nullability,
912 ) -> TRegexId {
913 if mask == Nullability::CENTER {
914 self.tr_der_center[node_id.0 as usize] = result
915 } else {
916 self.tr_der_begin[node_id.0 as usize] = result
917 };
918 result
919 }
920
921 pub(crate) fn try_cached_der(
922 &mut self,
923 node_id: NodeId,
924 mask: Nullability,
925 ) -> Option<TRegexId> {
926 let cache = if mask == Nullability::CENTER {
927 &mut self.tr_der_center
928 } else {
929 &mut self.tr_der_begin
930 };
931 match cache.get(node_id.0 as usize) {
932 Some(&TRegexId::MISSING) => {}
933 Some(&result) => {
934 return Some(result);
935 }
936 None => {
937 cache.resize(node_id.0 as usize + 1, TRegexId::MISSING);
938 }
939 }
940 return None;
941 }
942
943 pub(crate) fn transition_term(&mut self, der: TRegexId, set: TSetId) -> NodeId {
944 let mut term = self.get_tregex(der);
945 loop {
946 match *term {
947 TRegex::Leaf(node_id) => return node_id,
948 TRegex::ITE(cond, _then, _else) => {
949 if self.solver().is_sat_id(set, cond) {
950 term = self.get_tregex(_then);
951 } else {
952 term = self.get_tregex(_else);
953 }
954 }
955 }
956 }
957 }
958
959 pub fn der(&mut self, node_id: NodeId, mask: Nullability) -> Result<TRegexId, AlgebraError> {
960 debug_assert!(mask != Nullability::ALWAYS, "attempting to derive w always");
961 debug_assert!(
962 node_id != NodeId::MISSING,
963 "attempting to derive missing node"
964 );
965 if let Some(result) = self.try_cached_der(node_id, mask) {
966 return Ok(result);
967 }
968
969 let result = match node_id.kind(self) {
970 Kind::Compl => {
971 let leftd = node_id.left(self).der(self, mask)?;
972 self.mk_unary(leftd, &mut (|b, v| b.mk_compl(v)))
973 }
974 Kind::Inter => {
975 let leftd = node_id.left(self).der(self, mask)?;
976 let rightd = node_id.right(self).der(self, mask)?;
977 {
978 let this = &mut *self;
979 this.mk_binary(
980 leftd,
981 rightd,
982 &mut (|b, left, right| b.mk_inter(left, right)),
983 )
984 }
985 }
986 Kind::Union => {
987 let leftd = self.der(node_id.left(self), mask)?;
988 let rightd = self.der(node_id.right(self), mask)?;
989 {
990 let this = &mut *self;
991 this.mk_binary(
992 leftd,
993 rightd,
994 &mut (|b, left, right| b.mk_union(left, right)),
995 )
996 }
997 }
998 Kind::Concat => {
999 let head = node_id.left(self);
1000 let tail = node_id.right(self);
1001 let tail_leaf = self.mk_leaf(tail);
1002 let head_der = self.der(head, mask)?;
1003
1004 if self.is_nullable(head, mask) {
1005 let rightd = self.der(tail, mask)?;
1006 let ldr = {
1007 let this = &mut *self;
1008 this.mk_binary(
1009 head_der,
1010 tail_leaf,
1011 &mut (|b, left, right| b.mk_concat(left, right)),
1012 )
1013 };
1014 {
1015 let this = &mut *self;
1016 this.mk_binary(ldr, rightd, &mut (|b, left, right| b.mk_union(left, right)))
1017 }
1018 } else {
1019 let ldr = {
1020 let this = &mut *self;
1021 this.mk_binary(
1022 head_der,
1023 tail_leaf,
1024 &mut (|b, left, right| b.mk_concat(left, right)),
1025 )
1026 };
1027 ldr
1028 }
1029 }
1030 Kind::Star => {
1031 if node_id == NodeId::EPS {
1032 TRegexId::BOT
1033 } else {
1034 let left = node_id.left(self);
1035 let r_decr_leaf = self.mk_leaf(node_id);
1036 let r_der = self.der(left, mask)?;
1037 let result = {
1038 let this = &mut *self;
1039 this.mk_binary(
1040 r_der,
1041 r_decr_leaf,
1042 &mut (|b, left, right| b.mk_concat(left, right)),
1043 )
1044 };
1045 result
1046 }
1047 }
1048 Kind::Lookbehind => {
1049 let lb_prev_der = {
1050 let lb_prev = self.get_lookbehind_prev(node_id);
1051 if lb_prev == NodeId::MISSING {
1052 TRegexId::MISSING
1053 } else {
1054 self.der(lb_prev, mask)?
1055 }
1056 };
1057 let lb_inner = self.get_lookbehind_inner(node_id);
1058 let lb_inner_der = self.der(lb_inner, mask)?;
1059 {
1060 let this = &mut *self;
1061 this.mk_binary_result(
1062 lb_inner_der,
1063 lb_prev_der,
1064 &mut (|b, left, right| b.mk_lookbehind_internal(left, right)),
1065 )?
1066 }
1067 }
1068 Kind::Lookahead => {
1069 let la_tail = self.get_lookahead_tail(node_id);
1070 let la_body = node_id.left(self);
1071 let rel = self.get_lookahead_rel(node_id);
1072
1073 if self.is_nullable(la_body, mask) {
1074 let right = node_id.right(self).missing_to_eps();
1076 let rder = self.der(right, mask).clone();
1077 return rder;
1078 }
1079
1080 if rel == u32::MAX {
1081 let la_body_der = self.der(la_body, mask)?;
1082 if la_tail.is_kind(self, Kind::Pred) {
1083 let transitioned =
1084 self.transition_term(la_body_der, la_tail.pred_tset(self));
1085 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1086 let concated = self.mk_concat(la_tail, new_la);
1087 return self.der(concated, mask);
1088 }
1089 if la_tail.is_kind(self, Kind::Concat) && la_tail.left(self).is_pred(self) {
1090 let left = la_tail.left(self);
1091 let tset = left.pred_tset(self);
1092 let transitioned = self.transition_term(la_body_der, tset);
1093 let new_la = self.mk_lookahead_internal(transitioned, NodeId::MISSING, 0);
1094 let tail_right = la_tail.right(self);
1095 let concated = self.mk_concat(new_la, tail_right);
1096 let concated = self.mk_concat(left, concated);
1097 return self.der(concated, mask);
1098 }
1099 }
1100
1101 if la_tail != NodeId::MISSING && self.is_nullable(la_tail, mask) {
1102 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1103 let concated = self.mk_concat(la_body, nulls_mask);
1104 let concated_look = self.mk_lookahead_internal(concated, NodeId::MISSING, 0);
1105 let non_nulled = self.mk_non_nullable_safe(la_tail);
1106 let new_look = self.mk_lookahead_internal(la_body, non_nulled, rel);
1107 let new_union = self.mk_union(concated_look, new_look);
1108 return self.der(new_union, mask);
1109 }
1110
1111 let la_tail_der = if la_tail == NodeId::MISSING {
1112 TRegexId::MISSING
1113 } else {
1114 if self.is_nullable(la_tail, mask) {
1115 let nulls_mask = self.extract_nulls_mask(la_tail, mask);
1116 let nulls_la = self.mk_lookahead_internal(nulls_mask, NodeId::MISSING, 0);
1117 let la_union = self.mk_union(la_tail, nulls_la);
1118 self.der(la_union, mask)?
1119 } else {
1120 self.der(la_tail, mask)?
1121 }
1122 };
1123
1124 let la_body_der = self.der(la_body, mask)?;
1125
1126 if rel != u32::MAX && rel > self.lookahead_context_max {
1127 return Err(AlgebraError::AnchorLimit);
1128 }
1129
1130 let la = {
1131 let this = &mut *self;
1132 let rel = helpers::incr_rel(rel);
1133 this.mk_binary(
1134 la_body_der,
1135 la_tail_der,
1136 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1137 )
1138 };
1139
1140 if rel != u32::MAX
1141 && la_tail_der != TRegexId::MISSING
1142 && self.is_nullable(la_tail, mask)
1143 {
1144 let look_only = {
1145 let this = &mut *self;
1146 let right = TRegexId::MISSING;
1147 let rel = helpers::incr_rel(rel);
1148 this.mk_binary(
1149 la_body_der,
1150 right,
1151 &mut (|b, left, right| b.mk_lookahead_internal(left, right, rel)),
1152 )
1153 };
1154 {
1155 let this = &mut *self;
1156 this.mk_binary(
1157 look_only,
1158 la,
1159 &mut (|b, left, right| b.mk_union(left, right)),
1160 )
1161 }
1162 } else {
1163 la
1164 }
1165 }
1166 Kind::Begin | Kind::End => TRegexId::BOT,
1167 Kind::Pred => {
1168 let psi = node_id.pred_tset(&self);
1169 if psi == TSetId::EMPTY {
1170 TRegexId::BOT
1171 } else {
1172 self.mk_ite(psi, TRegexId::EPS, TRegexId::BOT)
1173 }
1174 }
1175 };
1176
1177 self.cache_der(node_id, result, mask);
1181 Ok(result)
1182 }
1183
1184 fn init_metadata(&mut self, node_id: NodeId, meta_id: MetadataId) {
1185 debug_assert!(meta_id != MetadataId::MISSING);
1186 match self.metadata.get_mut(node_id.0 as usize) {
1187 Some(v) => *v = meta_id,
1188 None => {
1189 self.metadata
1190 .resize(node_id.0 as usize + 1, MetadataId::MISSING);
1191 self.metadata[node_id.0 as usize] = meta_id;
1192 }
1193 }
1194 }
1195
1196 fn init_reversed(&mut self, node_id: NodeId, reversed_id: NodeId) {
1197 debug_assert!(reversed_id != NodeId::MISSING);
1198 match self.reversed.get_mut(node_id.0 as usize) {
1199 Some(v) => *v = reversed_id,
1200 None => {
1201 self.reversed
1202 .resize(node_id.0 as usize + 1, NodeId::MISSING);
1203 self.reversed[node_id.0 as usize] = reversed_id;
1204 }
1205 }
1206 }
1207
1208 fn init(&mut self, inst: NodeKey) -> NodeId {
1209 self.num_created += 1;
1210 let node_id = NodeId(self.num_created);
1211 self.index.insert(inst.clone(), node_id);
1212 match inst.kind {
1213 Kind::Pred => {
1214 let meta_id = self.mb.get_meta_id(Metadata {
1215 flags: MetaFlags::ZERO,
1216 nulls: NullsId::EMPTY,
1217 });
1218 self.init_metadata(node_id, meta_id);
1219 }
1220 Kind::Begin => {
1221 let meta_id = self.mb.get_meta_id(Metadata {
1222 flags: MetaFlags::with_nullability(Nullability::BEGIN, MetaFlags::ZERO),
1223 nulls: NullsId::BEGIN0,
1224 });
1225 self.init_metadata(node_id, meta_id);
1226 }
1227 Kind::End => {
1228 let meta_id = self.mb.get_meta_id(Metadata {
1229 flags: MetaFlags::with_nullability(Nullability::END, MetaFlags::ZERO),
1230 nulls: NullsId::END0,
1231 });
1232 self.init_metadata(node_id, meta_id);
1233 }
1234 Kind::Inter => {
1235 let m1 = self.get_node_meta_id(inst.left);
1236 let m2 = self.get_node_meta_id(inst.right);
1237 let meta_id = {
1238 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1239 let mask_l = inst.left.nullability(self);
1240 let mask_r = inst.right.nullability(self);
1241 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1242 let mut nulls = self.mb.nb.and_id(left_nulls, right_nulls);
1243 nulls = self.mb.nb.and_mask(nulls, mask_l);
1244 nulls = self.mb.nb.and_mask(nulls, mask_r);
1245 let new_meta = Metadata {
1246 flags: self.mb.flags_inter(m1, m2),
1247 nulls,
1248 };
1249 let new_id = self.mb.get_meta_id(new_meta);
1250
1251 new_id
1252 };
1253 self.init_metadata(node_id, meta_id);
1254 }
1255 Kind::Union => {
1256 let m1 = self.get_node_meta_id(inst.left);
1257 let m2 = self.get_node_meta_id(inst.right);
1258 let meta_id = {
1259 let left_nulls = self.mb.get_meta_ref(m1).nulls;
1260 let right_nulls = self.mb.get_meta_ref(m2).nulls;
1261 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1262 let new_meta = Metadata {
1263 flags: self.mb.flags_union(m1, m2),
1264 nulls,
1265 };
1266 let new_id = self.mb.get_meta_id(new_meta);
1267 new_id
1268 };
1269 self.init_metadata(node_id, meta_id);
1270 }
1271 Kind::Concat => {
1272 let flags = self.mb.flags_concat(
1273 self.get_node_meta_id(inst.left),
1274 self.get_node_meta_id(inst.right),
1275 );
1276
1277 let right_nullability = inst.right.nullability(self);
1278 let left_nullability = inst.left.nullability(self);
1279 let nulls_left = self.get_nulls_id(inst.left);
1280 let nulls_right = self.get_nulls_id(inst.right);
1281 let mut nulls = self.mb.nb.or_id(nulls_left, nulls_right);
1282 let mask = right_nullability.and(left_nullability);
1283 nulls = self.mb.nb.and_mask(nulls, mask);
1284
1285 let new_id = self.mb.get_meta_id(Metadata {
1286 flags,
1287 nulls: nulls,
1288 });
1289 self.init_metadata(node_id, new_id);
1290 }
1291 Kind::Star => {
1292 let left_nulls = self.get_nulls_id(inst.left);
1293 let nulls = self.mb.nb.or_id(left_nulls, NullsId::ALWAYS0);
1294 let meta_id = self.mb.get_meta_id(Metadata {
1295 flags: self.mb.flags_star(self.get_node_meta_id(inst.left)),
1296 nulls: nulls,
1297 });
1298 self.init_metadata(node_id, meta_id);
1299 }
1300 Kind::Compl => {
1301 let nulls = self.mb.nb.not_id(self.get_nulls_id(inst.left));
1302 let meta_id = self.mb.get_meta_id(Metadata {
1303 flags: self.mb.flags_compl(self.get_node_meta_id(inst.left)),
1304 nulls: nulls,
1305 });
1306 self.init_metadata(node_id, meta_id);
1307 }
1308 Kind::Lookbehind => {
1309 let mut null = self.get_meta_flags(inst.left).nullability();
1310 let mut contains_flags = self.get_flags_contains(inst.left);
1311 if !inst.right.is_missing() {
1312 null = null.and(self.get_meta_flags(inst.right).nullability());
1313 contains_flags = contains_flags.or(self.get_flags_contains(inst.right));
1314 }
1315
1316 let left_nulls = self.get_nulls_id(inst.left);
1317 let right_nulls = self.get_nulls_id(inst.right);
1318 let nulls = self.mb.nb.or_id(left_nulls, right_nulls);
1319 let meta_id = self.mb.get_meta_id(Metadata {
1320 flags: MetaFlags::with_nullability(
1321 null,
1322 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1323 ),
1324 nulls: nulls,
1325 });
1326 self.init_metadata(node_id, meta_id);
1327 }
1328 Kind::Lookahead => {
1329 let mut nulls = self.get_nulls_id(inst.left);
1330 let left_nullability = inst.left.nullability(self);
1331 let nulls_right = self.get_nulls_id_w_mask(inst.right, left_nullability);
1332 nulls = self.mb.nb.or_id(nulls, nulls_right);
1333 nulls = self.mb.nb.add_rel(nulls, inst.extra);
1334
1335 let la_inner = inst.left;
1336 let la_tail = inst.right;
1337 let null = self
1338 .get_meta_flags(la_inner)
1339 .nullability()
1340 .and(self.get_meta_flags(la_tail.missing_to_eps()).nullability());
1341 let contains_flags = self
1342 .get_flags_contains(la_inner)
1343 .or(self.get_flags_contains(la_tail));
1344
1345 let meta_id = self.mb.get_meta_id(Metadata {
1346 flags: MetaFlags::with_nullability(
1347 null,
1348 contains_flags.or(MetaFlags::CONTAINS_LOOKAROUND),
1349 ),
1350 nulls: nulls,
1351 });
1352 self.init_metadata(node_id, meta_id);
1353 }
1354 }
1355
1356 self.array.push(inst);
1357
1358 if let Some(rw) = self.post_init_simplify(node_id) {
1359 self.override_as(node_id, rw)
1360 } else {
1361 node_id
1362 }
1363 }
1364
1365 fn post_init_simplify(&mut self, node_id: NodeId) -> Option<NodeId> {
1366 match self.get_kind(node_id) {
1367 Kind::Concat => {
1368 let lhs = node_id.left(self);
1369 let rhs = node_id.right(self);
1370 if let Some(_) = lhs.is_pred_star(self) {
1371 if let Some(opttail) = rhs.is_opt_v(self) {
1373 if let Some(true) = self.subsumes(lhs, opttail) {
1374 return Some(lhs);
1376 }
1377 }
1378 }
1379 }
1380 Kind::Union => {
1381 let lhs = node_id.left(self);
1383 let rhs = node_id.right(self);
1384 let mut subsumed = false;
1385 rhs.iter_union_while(self, &mut |b, branch| {
1386 if b.nullable_subsumes(branch, lhs) {
1387 subsumed = true;
1388 }
1389 !subsumed
1390 });
1391 if subsumed {
1392 return Some(rhs);
1393 }
1394 if lhs != rhs && self.union_branches_subset(lhs, rhs) {
1396 return Some(rhs);
1397 }
1398 }
1399 _ => {}
1400 }
1401
1402 None
1403 }
1404
1405 fn union_branches_subset(&self, lhs: NodeId, rhs: NodeId) -> bool {
1407 if self.get_kind(lhs) != Kind::Union {
1409 return false; }
1411 let mut rhs_branches = Vec::new();
1413 let mut curr = rhs;
1414 while self.get_kind(curr) == Kind::Union {
1415 rhs_branches.push(self.get_left(curr));
1416 curr = self.get_right(curr);
1417 }
1418 rhs_branches.push(curr);
1419
1420 curr = lhs;
1422 while self.get_kind(curr) == Kind::Union {
1423 if !rhs_branches.contains(&self.get_left(curr)) {
1424 return false;
1425 }
1426 curr = self.get_right(curr);
1427 }
1428 rhs_branches.contains(&curr)
1429 }
1430
1431 fn nullable_subsumes(&self, node: NodeId, target: NodeId) -> bool {
1433 if node == target {
1434 return true;
1435 }
1436 match self.get_kind(node) {
1437 Kind::Union => {
1438 self.nullable_subsumes(self.get_left(node), target)
1439 || self.nullable_subsumes(self.get_right(node), target)
1440 }
1441 Kind::Concat if self.is_always_nullable(self.get_left(node)) => {
1442 self.nullable_subsumes(self.get_right(node), target)
1443 }
1444 _ => false,
1445 }
1446 }
1447
1448 pub fn num_nodes(&self) -> u32 {
1449 self.num_created
1450 }
1451 fn get_node_id(&mut self, inst: NodeKey) -> NodeId {
1452 match self.index.get(&inst) {
1453 Some(&id) => id,
1454 None => self.init(inst),
1455 }
1456 }
1457 #[inline]
1458 fn key_is_created(&self, inst: &NodeKey) -> Option<&NodeId> {
1459 self.index.get(inst)
1460 }
1461
1462 fn init_as(&mut self, key: NodeKey, subsumed: NodeId) -> NodeId {
1463 self.index.insert(key, subsumed);
1464 subsumed
1465 }
1466
1467 pub(crate) fn override_as(&mut self, key: NodeId, subsumed: NodeId) -> NodeId {
1468 let key = &self.array[key.0 as usize];
1469 let entry = self.index.get_mut(key).unwrap();
1470 *entry = subsumed;
1471 subsumed
1472 }
1473
1474 #[inline]
1475 pub(crate) fn get_left(&self, node_id: NodeId) -> NodeId {
1476 self.array[node_id.0 as usize].left
1477 }
1478
1479 #[inline]
1480 pub(crate) fn get_right(&self, node_id: NodeId) -> NodeId {
1481 self.array[node_id.0 as usize].right
1482 }
1483
1484 #[inline]
1485 pub(crate) fn get_extra(&self, node_id: NodeId) -> u32 {
1486 self.array[node_id.0 as usize].extra
1487 }
1488
1489 #[inline]
1490 pub(crate) fn get_concat_end(&self, node_id: NodeId) -> NodeId {
1491 debug_assert!(self.get_kind(node_id) == Kind::Concat);
1492 let mut curr = node_id;
1493 while self.get_kind(curr) == Kind::Concat {
1494 curr = curr.right(self);
1495 }
1496 curr
1497 }
1498
1499 fn has_trailing_la(&self, node: NodeId) -> bool {
1501 let end = match self.get_kind(node) {
1502 Kind::Concat => self.get_concat_end(node),
1503 Kind::Lookahead => node,
1504 _ => return false,
1505 };
1506 self.get_kind(end) == Kind::Lookahead && end.right(self).is_missing()
1507 }
1508
1509 fn strip_trailing_la(&mut self, node: NodeId) -> (NodeId, NodeId) {
1511 if self.get_kind(node) == Kind::Lookahead {
1512 return (NodeId::EPS, node);
1513 }
1514 debug_assert!(self.get_kind(node) == Kind::Concat);
1515 let right = node.right(self);
1516 if self.get_kind(right) != Kind::Concat {
1517 return (node.left(self), right);
1519 }
1520 let (stripped, la) = self.strip_trailing_la(right);
1521 (self.mk_concat(node.left(self), stripped), la)
1522 }
1523 #[inline]
1524 pub(crate) fn get_lookahead_inner(&self, lookahead_node_id: NodeId) -> NodeId {
1525 debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1526 lookahead_node_id.left(self)
1527 }
1528 #[inline]
1529 pub(crate) fn get_lookahead_tail(&self, lookahead_node_id: NodeId) -> NodeId {
1530 debug_assert!(self.get_kind(lookahead_node_id) == Kind::Lookahead);
1531 lookahead_node_id.right(self)
1532 }
1533 #[inline]
1534 pub(crate) fn get_lookahead_rel(&self, lookahead_node_id: NodeId) -> u32 {
1535 debug_assert!(
1536 self.get_kind(lookahead_node_id) == Kind::Lookahead,
1537 "not lookahead: {:?}",
1538 self.pp(lookahead_node_id)
1539 );
1540 self.get_extra(lookahead_node_id)
1541 }
1542 #[inline]
1543 pub(crate) fn get_lookbehind_inner(&self, lookbehind_node_id: NodeId) -> NodeId {
1544 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1545 lookbehind_node_id.left(self)
1546 }
1547 #[inline]
1548 pub(crate) fn get_lookbehind_prev(&self, lookbehind_node_id: NodeId) -> NodeId {
1549 debug_assert!(self.get_kind(lookbehind_node_id) == Kind::Lookbehind);
1550 lookbehind_node_id.right(self)
1551 }
1552
1553 #[inline]
1554 pub fn get_kind(&self, node_id: NodeId) -> Kind {
1555 debug_assert!(
1556 self.array.len() > node_id.0 as usize,
1557 "array len: {:?}",
1558 node_id
1559 );
1560 self.array[node_id.0 as usize].kind
1561 }
1562
1563 #[inline]
1564 pub(crate) fn get_node(&self, node_id: NodeId) -> &NodeKey {
1565 &self.array[node_id.0 as usize]
1566 }
1567
1568 #[inline]
1569 fn get_node_meta_id(&self, node_id: NodeId) -> MetadataId {
1570 self.metadata[node_id.0 as usize]
1571 }
1572
1573 #[inline]
1574 fn get_meta(&self, node_id: NodeId) -> &Metadata {
1575 debug_assert!(node_id.0 != u32::MAX);
1576 let meta_id = self.get_node_meta_id(node_id);
1577 debug_assert!(meta_id != MetadataId::MISSING);
1578 &self.mb.array[meta_id.0 as usize]
1579 }
1580
1581 #[inline]
1582 pub(crate) fn get_nulls_id(&self, node_id: NodeId) -> NullsId {
1583 if node_id == NodeId::MISSING {
1584 NullsId::EMPTY
1585 } else {
1586 self.get_meta(node_id).nulls
1587 }
1588 }
1589
1590 #[inline]
1591 pub(crate) fn get_nulls_id_w_mask(&mut self, node_id: NodeId, mask: Nullability) -> NullsId {
1592 if node_id == NodeId::MISSING {
1593 NullsId::EMPTY
1594 } else {
1595 let nulls = self.get_meta(node_id).nulls;
1596 self.mb.nb.and_mask(nulls, mask)
1597 }
1598 }
1599
1600 #[inline]
1601 pub(crate) fn get_meta_flags(&self, node_id: NodeId) -> MetaFlags {
1602 let meta_id = self.get_node_meta_id(node_id);
1603 let meta = &self.mb.array[meta_id.0 as usize];
1604 meta.flags
1605 }
1606
1607 #[inline]
1608 pub(crate) fn get_only_nullability(&self, node_id: NodeId) -> Nullability {
1609 self.get_meta(node_id).flags.nullability()
1610 }
1611
1612 #[inline]
1613 pub(crate) fn get_flags_contains(&self, node_id: NodeId) -> MetaFlags {
1614 let meta_id = self.get_node_meta_id(node_id);
1615 let meta = &self.mb.array[meta_id.0 as usize];
1616 meta.flags.all_contains_flags()
1617 }
1618
1619 pub fn strip_lb(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1620 if !self.contains_look(node_id) {
1621 return Ok(node_id);
1622 }
1623 if self.get_kind(node_id) == Kind::Concat
1624 && self.get_kind(node_id.left(self)) == Kind::Lookbehind
1625 {
1626 return self.strip_lb(node_id.right(self));
1627 }
1628 match self.get_kind(node_id) {
1629 Kind::Lookbehind | Kind::Lookahead => Err(AlgebraError::UnsupportedPattern),
1630 _ => Ok(node_id),
1631 }
1632 }
1633
1634 pub fn reverse(&mut self, node_id: NodeId) -> Result<NodeId, AlgebraError> {
1635 debug_assert!(node_id != NodeId::MISSING);
1636 if let Some(rev) = self.reversed.get(node_id.0 as usize) {
1637 if *rev != NodeId::MISSING {
1638 return Ok(*rev);
1639 }
1640 }
1641 let rw = match self.get_kind(node_id) {
1642 Kind::End => NodeId::BEGIN,
1643 Kind::Begin => NodeId::END,
1644 Kind::Pred => node_id,
1645 Kind::Concat => {
1646 let left = self.reverse(node_id.left(self))?;
1647 let right = self.reverse(node_id.right(self))?;
1648 self.mk_concat(right, left)
1649 }
1650 Kind::Union => {
1651 let left = self.reverse(node_id.left(self))?;
1652 let right = self.reverse(node_id.right(self))?;
1653 self.mk_union(left, right)
1654 }
1655 Kind::Inter => {
1656 let left = self.reverse(node_id.left(self))?;
1657 let right = self.reverse(node_id.right(self))?;
1658 self.mk_inter(left, right)
1659 }
1660 Kind::Star => {
1661 let body = self.reverse(node_id.left(self))?;
1662 self.mk_star(body)
1663 }
1664 Kind::Compl => {
1665 let body = self.reverse(node_id.left(self))?;
1666 self.mk_compl(body)
1667 }
1668 Kind::Lookbehind => {
1669 let prev = self.get_lookbehind_prev(node_id);
1670 let tail = if prev != NodeId::MISSING {
1671 self.reverse(prev)?
1672 } else {
1673 NodeId::MISSING
1674 };
1675 let inner_id = self.get_lookbehind_inner(node_id);
1676 let inner = self.reverse(inner_id)?;
1677 self.mk_lookahead(inner, tail, 0)
1678 }
1679 Kind::Lookahead => {
1680 let rel = self.get_lookahead_rel(node_id);
1681 if rel == u32::MAX {
1682 let lbody = self.get_lookahead_inner(node_id);
1684 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
1685 let lbody_ts = self.mk_concat(lbody, NodeId::TS);
1686 let ltail_ts = self.mk_concat(ltail, NodeId::TS);
1687 let as_inter = self.mk_inter(lbody_ts, ltail_ts);
1688 let rev = self.reverse(as_inter)?;
1689 self.init_reversed(node_id, rev);
1690 return Ok(rev);
1691 }
1692 if rel != 0 {
1693 return Err(AlgebraError::UnsupportedPattern);
1694 }
1695 let tail_node = self.get_lookahead_tail(node_id);
1696 let prev = if tail_node != NodeId::MISSING {
1697 self.reverse(tail_node)?
1698 } else {
1699 NodeId::MISSING
1700 };
1701 let inner_id = self.get_lookahead_inner(node_id);
1702 let inner = self.reverse(inner_id)?;
1703 self.mk_lookbehind_internal(inner, prev)?
1704 }
1705 };
1706 self.init_reversed(node_id, rw);
1707 Ok(rw)
1708 }
1709
1710 pub(crate) fn mk_pred(&mut self, pred: TSetId) -> NodeId {
1711 let node = NodeKey {
1712 kind: Kind::Pred,
1713 left: NodeId::MISSING,
1714 right: NodeId::MISSING,
1715 extra: pred.0,
1716 };
1717 self.get_node_id(node)
1718 }
1719
1720 pub fn mk_compl(&mut self, body: NodeId) -> NodeId {
1721 let key = NodeKey {
1722 kind: Kind::Compl,
1723 left: body,
1724 right: NodeId::MISSING,
1725 extra: u32::MAX,
1726 };
1727 if let Some(id) = self.key_is_created(&key) {
1728 return *id;
1729 }
1730 if body == NodeId::BOT {
1731 return NodeId::TS;
1732 }
1733 if body == NodeId::TS {
1734 return NodeId::BOT;
1735 }
1736
1737 if let Some(contains_body) = body.is_contains(&self) {
1738 if contains_body.is_pred(&self) {
1739 let pred = contains_body.pred_tset(&self);
1740 let notpred = self.mk_pred_not(pred);
1741 let node = self.mk_star(notpred);
1742 return self.init_as(key, node);
1743 } else if contains_body == NodeId::END {
1744 return self.init_as(key, NodeId::BOT);
1745 } else {
1746 }
1747 };
1748
1749 if self.get_kind(body) == Kind::Compl {
1750 return self.get_node(body).left;
1751 }
1752
1753 let node_id = self.get_node_id(key);
1754 node_id
1755 }
1756
1757 pub(crate) fn extract_nulls_mask(&mut self, body: NodeId, mask: Nullability) -> NodeId {
1758 let nid = self.get_nulls_id(body);
1759 let nref = self.mb.nb.get_set_ref(nid).clone();
1760 let mut futures = NodeId::BOT;
1761 for n in nref.iter() {
1762 if !n.is_mask_nullable(mask) {
1763 continue;
1764 }
1765
1766 let eff = if n.rel == 0 {
1767 NodeId::EPS
1768 } else {
1769 self.mk_lookahead_internal(NodeId::EPS, NodeId::MISSING, n.rel)
1770 };
1771 futures = self.mk_union(futures, eff)
1772 }
1773 futures
1774 }
1775
1776 fn attempt_rw_concat_2(&mut self, head: NodeId, tail: NodeId) -> Option<NodeId> {
1778 if cfg!(feature = "norewrite") {
1779 return None;
1780 }
1781
1782 if self.get_kind(tail) == Kind::Lookbehind {
1784 let lbleft = self.mk_concat(head, self.get_lookbehind_prev(tail).missing_to_eps());
1785 return match self
1786 .mk_lookbehind_internal(self.get_lookbehind_inner(tail).missing_to_eps(), lbleft)
1787 {
1788 Ok(node) => Some(node),
1789 Err(_) => None,
1790 };
1791 }
1792 if self.get_kind(head) == Kind::Lookahead {
1794 let la_body = self.get_lookahead_inner(head);
1795 let la_tail = self.get_lookahead_tail(head);
1796 let la_rel = self.get_lookahead_rel(head);
1797 let new_la_tail = self.mk_concat(la_tail.missing_to_eps(), tail);
1798 let la_rel = if new_la_tail.is_kind(self, Kind::Lookahead) {
1799 let tail_rel = self.get_lookahead_rel(new_la_tail);
1800 tail_rel + la_rel
1801 } else if new_la_tail.is_center_nullable(self) {
1802 la_rel
1803 } else {
1804 u32::MAX
1805 };
1806
1807 return Some(self.mk_lookahead_internal(la_body, new_la_tail, la_rel));
1808 }
1809
1810 if head.is_kind(self, Kind::End) {
1811 if tail == NodeId::TS {
1812 return Some(head);
1813 }
1814 }
1815 if head == NodeId::TS {
1816 if tail == NodeId::END {
1817 return Some(head);
1818 }
1819 }
1820
1821 if head == NodeId::TS {
1822 if self.nullability(tail) == Nullability::ALWAYS {
1823 return Some(NodeId::TS);
1824 }
1825 }
1826
1827 if tail == NodeId::TS {
1828 if self.nullability(head) == Nullability::ALWAYS {
1829 return Some(NodeId::TS);
1830 }
1831 }
1832
1833 if self.get_kind(tail) == Kind::Union {
1834 if head == NodeId::TS {
1836 let mut should_distribute_top = false;
1837 self.iter_unions(tail, |v| {
1838 if v == NodeId::BEGIN || self.starts_with_ts(v) {
1839 should_distribute_top = true;
1840 }
1841 });
1842 if should_distribute_top {
1843 let mut new_union = NodeId::BOT;
1844 let mut curr = tail;
1845 while self.get_kind(curr) == Kind::Union {
1846 let new_node = self.mk_concat(NodeId::TS, curr.left(self));
1847 new_union = self.mk_union(new_node, new_union);
1848 curr = curr.right(self);
1849 }
1850 let new_node = self.mk_concat(NodeId::TS, curr);
1851 new_union = self.mk_union(new_union, new_node);
1852 return Some(new_union);
1853 }
1854 }
1855 }
1856
1857 if self.get_kind(head) == Kind::Union {
1858 if head == NodeId::TS {
1860 let mut should_distribute_top = false;
1861 self.iter_unions(head, |v| {
1862 if v == NodeId::END || self.starts_with_ts(v) {
1863 should_distribute_top = true;
1864 }
1865 });
1866 if should_distribute_top {
1867 let mut new_union = NodeId::BOT;
1868 let mut curr = head;
1869 while self.get_kind(curr) == Kind::Union {
1870 let new_node = self.mk_concat(curr.left(self), NodeId::TS);
1871 new_union = self.mk_union(new_node, new_union);
1872 curr = curr.right(self);
1873 }
1874 let new_node = self.mk_concat(curr, NodeId::TS);
1875 new_union = self.mk_union(new_union, new_node);
1876 return Some(new_union);
1877 }
1878 }
1879 }
1880
1881 if self.get_kind(head) == Kind::Inter {
1882 if tail == NodeId::TS {
1883 let mut alltopstar = true;
1884 iter_inter!(self, head, |v| {
1885 alltopstar = self.ends_with_ts(v);
1886 });
1887 if alltopstar {
1888 return Some(head);
1889 }
1890 }
1891 }
1892
1893 if head.is_star(self) && head == tail {
1894 return Some(head);
1895 }
1896
1897 None
1898 }
1899
1900 fn attempt_rw_union_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
1902 use Kind::*;
1903
1904 if cfg!(feature = "norewrite") {
1905 return None;
1906 }
1907 if left == right {
1908 return Some(left);
1909 }
1910
1911 if right.is_kind(self, Kind::Union) && left == right.left(self) {
1912 return Some(right);
1913 }
1914
1915 if self.get_kind(left) == Kind::Pred {
1916 if self.get_kind(right) == Kind::Pred {
1917 let l = left.pred_tset(&self);
1918 let r = right.pred_tset(&self);
1919 let solver = self.solver();
1920 let psi = solver.or_id(l, r);
1921 let rewrite = self.mk_pred(psi);
1922 return Some(rewrite);
1923 }
1924 }
1925
1926 if left == NodeId::EPS {
1927 if self.get_extra(left) == 0 {
1928 if self.nullability(right) == Nullability::ALWAYS {
1929 return Some(right);
1930 }
1931 }
1932 }
1933
1934 if self.get_kind(left) == Kind::Lookahead {
1935 if self.get_kind(right) == Kind::Lookahead {
1936 let lb = left.left(self);
1937 let lt = left.right(self);
1938 let lrel = left.extra(self);
1939
1940 let rb = right.left(self);
1941 let rt = right.right(self);
1942 let rrel = right.extra(self);
1943
1944 if lrel == rrel {
1945 if lb == rb {}
1946 if lt.is_missing() {
1947 if rt.is_missing() {
1948 let unioned = self.mk_union(lb, rb);
1949 let node = self.mk_lookahead_internal(unioned, NodeId::MISSING, lrel);
1950 return Some(node);
1951 }
1952 }
1953 }
1954 }
1955 }
1956
1957 if right.is_kind(self, Concat) {
1958 if left == NodeId::END {
1959 if right.left(self) == NodeId::END {
1960 if self.nullability(right).has(Nullability::END) {
1961 return Some(right);
1962 }
1963 }
1964 }
1965 if left == right.left(self) {
1967 let rhs = self.mk_union(NodeId::EPS, right.right(self));
1968 let rw = self.mk_concat(left, rhs);
1969 return Some(rw);
1970 }
1971 if left.is_kind(self, Concat) {
1972 let head1 = left.left(self);
1973 let head2 = right.left(self);
1974
1975 if head1 == head2 {
1976 let t1 = left.right(self);
1977 let t2 = right.right(self);
1978 if head1 == NodeId::TS {
1980 if t2.has_concat_tail(self, t1) {
1981 return Some(left);
1982 }
1983 if t1.has_concat_tail(self, t2) {
1984 return Some(right);
1985 }
1986 }
1987 let un = self.mk_union(t1, t2);
1988 return Some(self.mk_concat(left.left(self), un));
1989 }
1990
1991 if false {
1994 let end1 = self.get_concat_end(left);
1995 let end2 = self.get_concat_end(right);
1996 if end1 == end2 {
1997 let flags = left.flags_contains(self).or(right.flags_contains(self));
1998 if !flags.contains_lookaround() && !flags.has(MetaFlags::CONTAINS_ANCHORS) {
1999 let rev1 = self.reverse(left).unwrap();
2000 let rev2 = self.reverse(right).unwrap();
2001
2002 let union_rev = self.mk_union(rev1, rev2);
2003 return Some(self.reverse(union_rev).unwrap());
2004 }
2005 }
2006 }
2007 }
2008 if self.get_kind(left) == Kind::Pred {
2009 if left == right.left(self) {
2010 let un = self.mk_opt(right.right(self));
2011 let conc = self.mk_concat(left, un);
2012 return Some(conc);
2013 }
2014 }
2015 }
2016
2017 if left == NodeId::EPS {
2018 if right.is_plus(self) {
2019 let result = self.mk_star(right.left(self));
2020 return Some(result);
2021 }
2022 }
2023
2024 None
2025 }
2026
2027 fn attempt_rw_inter_2(&mut self, left: NodeId, right: NodeId) -> Option<NodeId> {
2028 if cfg!(feature = "norewrite") {
2029 return None;
2030 }
2031 if left == right {
2032 return Some(left);
2033 }
2034
2035 if self.get_kind(right) == Kind::Union {
2036 let mut result = NodeId::BOT;
2037 self.iter_unions_b(
2038 right,
2039 &mut (|b, v| {
2040 let new_inter = b.mk_inter(v, left);
2041 result = b.mk_union(result, new_inter);
2042 }),
2043 );
2044 return Some(result);
2045 }
2046 if self.get_kind(left) == Kind::Union {
2047 let mut result = NodeId::BOT;
2048 self.iter_unions_b(
2049 left,
2050 &mut (|b, v| {
2051 let new_inter = b.mk_inter(v, right);
2052 result = b.mk_union(result, new_inter);
2053 }),
2054 );
2055 return Some(result);
2056 }
2057
2058 if self.get_kind(right) == Kind::Compl && right.left(self) == left {
2059 return Some(NodeId::BOT);
2060 }
2061
2062 if left.kind(&self) == Kind::Compl {
2063 if right.kind(&self) == Kind::Compl {
2064 let bodies = self.mk_union(left.left(self), right.left(self));
2065 return Some(self.mk_compl(bodies));
2066 }
2067 }
2068
2069 if left == NodeId::TOPPLUS {
2070 if let Some(_) = right.is_pred_star(self) {
2071 let newloop = self.mk_plus(right.left(self));
2072 return Some(newloop);
2073 }
2074 if right.is_never_nullable(&self) {
2075 return Some(right);
2076 }
2077 if right.is_kind(self, Kind::Lookahead) {
2078 if self.get_lookahead_tail(right).is_missing() {
2079 return Some(NodeId::BOT);
2080 }
2081 }
2082 if right.is_kind(self, Kind::Concat) {}
2083 }
2084
2085 if right.is_lookahead(&self) {
2086 if left.is_never_nullable(&self) {
2087 return Some(NodeId::BOT);
2088 }
2089 }
2090
2091 if self.get_kind(right) == Kind::Compl {
2092 let compl_body = right.left(self);
2093 if left == compl_body {
2094 return Some(NodeId::BOT);
2095 }
2096 if self.get_kind(compl_body) == Kind::Concat {
2097 let compl_head = compl_body.left(self);
2098 if compl_body.right(self) == NodeId::TS {
2100 if compl_head == left {
2101 return Some(NodeId::BOT);
2102 }
2103 }
2104 }
2105 }
2106
2107 if let Some(pleft) = left.is_pred_star(&self) {
2108 if let Some(pright) = right.is_pred_star(&self) {
2109 let merged = self.mk_inter(pleft, pright);
2110 return Some(self.mk_star(merged));
2111 }
2112 }
2113
2114 {
2118 let l_is_clb = self.get_kind(left) == Kind::Concat
2119 && self.get_kind(left.left(self)) == Kind::Lookbehind;
2120 let r_is_clb = self.get_kind(right) == Kind::Concat
2121 && self.get_kind(right.left(self)) == Kind::Lookbehind;
2122 if l_is_clb || r_is_clb {
2123 let (lb, body) = if l_is_clb && r_is_clb {
2124 let lb1 = left.left(self);
2125 let lb2 = right.left(self);
2126 let inner = self.mk_inter(
2127 self.get_lookbehind_inner(lb1),
2128 self.get_lookbehind_inner(lb2),
2129 );
2130 let lb = self.mk_lookbehind_internal(inner, NodeId::MISSING).unwrap();
2132 let body = self.mk_inter(left.right(self), right.right(self));
2133 (lb, body)
2134 } else if l_is_clb {
2135 let lb = left.left(self);
2136 let body = self.mk_inter(left.right(self), right);
2137 (lb, body)
2138 } else {
2139 let lb = right.left(self);
2140 let body = self.mk_inter(left, right.right(self));
2141 (lb, body)
2142 };
2143 return Some(self.mk_concat(lb, body));
2144 }
2145 }
2146
2147 {
2151 let l_has_la = self.has_trailing_la(left);
2152 let r_has_la = self.has_trailing_la(right);
2153 if l_has_la || r_has_la {
2154 let (body, la) = if l_has_la && r_has_la {
2155 let (lbody, l_la) = self.strip_trailing_la(left);
2156 let (rbody, r_la) = self.strip_trailing_la(right);
2157 let inner = self.mk_inter(
2158 self.get_lookahead_inner(l_la),
2159 self.get_lookahead_inner(r_la),
2160 );
2161 let la = self.mk_lookahead_internal(inner, NodeId::MISSING, 0);
2162 let body = self.mk_inter(lbody, rbody);
2163 (body, la)
2164 } else if l_has_la {
2165 let (lbody, la) = self.strip_trailing_la(left);
2166 let body = self.mk_inter(lbody, right);
2167 (body, la)
2168 } else {
2169 let (rbody, la) = self.strip_trailing_la(right);
2170 let body = self.mk_inter(left, rbody);
2171 (body, la)
2172 };
2173 return Some(self.mk_concat(body, la));
2174 }
2175 }
2176
2177 return None;
2178 }
2179
2180 fn attempt_rw_unions(&mut self, left: NodeId, right_union: NodeId) -> Option<NodeId> {
2181 if cfg!(feature = "norewrite") {
2182 return None;
2183 }
2184 debug_assert!(self.get_kind(right_union) == Kind::Union);
2185
2186 let mut rewritten = None;
2187 right_union.iter_union_while(
2188 self,
2189 &mut (|b, v| {
2190 if let Some(rw) = b.attempt_rw_union_2(left, v) {
2191 rewritten = Some((v, rw));
2192 false
2193 } else {
2194 true
2195 }
2196 }),
2197 );
2198
2199 if let Some(rw) = rewritten {
2200 let mut new_union = NodeId::BOT;
2201 right_union.iter_union(
2202 self,
2203 &mut (|b, v| {
2204 if v == rw.0 {
2205 new_union = b.mk_union(rw.1, new_union)
2206 } else {
2207 new_union = b.mk_union(v, new_union)
2208 }
2209 }),
2210 );
2211 return Some(new_union);
2212 };
2213
2214 return None;
2215 }
2216
2217 pub fn mk_concat(&mut self, head: NodeId, tail: NodeId) -> NodeId {
2218 debug_assert!(head != NodeId::MISSING, "missing to {}", self.pp(tail));
2219 debug_assert!(tail != NodeId::MISSING);
2220 let key = NodeKey {
2221 kind: Kind::Concat,
2222 left: head,
2223 right: tail,
2224 extra: u32::MAX,
2225 };
2226 if let Some(id) = self.key_is_created(&key) {
2227 return *id;
2228 }
2229
2230 if head == NodeId::BOT || tail == NodeId::BOT {
2231 return NodeId::BOT;
2232 }
2233 if head == NodeId::EPS {
2234 return tail;
2235 }
2236 if tail == NodeId::EPS {
2237 return head;
2238 }
2239
2240 match tail {
2241 NodeId::BEGIN => {
2243 if !self.is_nullable(head, Nullability::BEGIN) {
2244 return NodeId::BOT;
2245 } else {
2246 return NodeId::BEGIN;
2247 }
2248 }
2249 _ => {}
2250 }
2251
2252 if head.is_kind(self, Kind::Concat) {
2254 let left = head.left(self);
2255 let newright = self.mk_concat(head.right(self), tail);
2256 let updated = self.mk_concat(left, newright);
2257 return self.init_as(key, updated);
2258 }
2259
2260 if head == NodeId::TS && tail == NodeId::END {
2261 return NodeId::TS;
2262 }
2263
2264 if self.get_kind(head) == Kind::End {
2265 if !self.is_nullable(tail, Nullability::END) {
2266 return NodeId::BOT;
2267 }
2268 }
2269
2270 if self.get_kind(tail) == Kind::Concat {
2271 if let Some(rw) = self.attempt_rw_concat_2(head, tail.left(self)) {
2272 let upd = self.mk_concat(rw, tail.right(self));
2273 return self.init_as(key, upd);
2274 }
2275 }
2276
2277 match self.attempt_rw_concat_2(head, tail) {
2278 Some(new) => return self.init_as(key, new),
2279 None => {}
2280 }
2281
2282 match (self.get_kind(head), self.get_kind(tail)) {
2283 (Kind::Star, Kind::Concat) if head.is_star(&self) => {
2285 let rl = tail.left(self);
2286 if head == rl {
2287 return self.init_as(key, tail);
2288 }
2289 }
2290 (_, Kind::Concat) => {
2292 let curr = head;
2293 let h2 = self.mk_concat(curr, tail.left(self));
2294 let tr = tail.right(self);
2295 match self.attempt_rw_concat_2(h2, tr) {
2296 Some(new) => {
2297 return self.init_as(key, new);
2298 }
2299 None => {}
2300 }
2301 }
2302 _ if head == NodeId::TS => {
2303 if self.starts_with_ts(tail) {
2304 return self.init_as(key, tail);
2305 }
2306 }
2307 _ => {}
2308 }
2309
2310 self.init(key)
2311 }
2312
2313 pub fn mk_lookbehind(&mut self, lb_body: NodeId, lb_prev: NodeId) -> NodeId {
2314 let lb_body = {
2316 match self.starts_with_ts(lb_body) {
2317 true => lb_body,
2318 false => self.mk_concat(NodeId::TS, lb_body),
2319 }
2320 };
2321 self.mk_lookbehind_internal(lb_body, lb_prev).unwrap()
2323 }
2324
2325 fn mk_lookbehind_internal(
2326 &mut self,
2327 lb_body: NodeId,
2328 lb_prev: NodeId,
2329 ) -> Result<NodeId, AlgebraError> {
2330 debug_assert!(lb_body != NodeId::MISSING);
2331 debug_assert!(lb_prev.0 != u32::MAX, "pattern_left missing");
2332 if lb_body == NodeId::BOT || lb_prev == NodeId::BOT {
2333 return Ok(NodeId::BOT);
2334 }
2335 if lb_body == NodeId::TS {
2336 return Ok(lb_prev);
2337 }
2338 if lb_body == NodeId::EPS {
2339 match lb_prev {
2340 NodeId::MISSING => return Ok(NodeId::EPS),
2341 NodeId::EPS => return Ok(NodeId::EPS),
2342 _ => return Err(AlgebraError::UnsupportedPattern),
2343 }
2344 }
2345
2346 let key = NodeKey {
2347 kind: Kind::Lookbehind,
2348 left: lb_body,
2349 right: lb_prev,
2350 extra: u32::MAX,
2351 };
2352 match self.key_is_created(&key) {
2353 Some(id) => return Ok(*id),
2354 None => {
2355 if lb_prev == NodeId::TS {
2356 return Ok(self.mk_concat(lb_prev, lb_body));
2357 }
2358
2359 Ok(self.init(key))
2360 }
2361 }
2362 }
2363
2364 pub fn mk_lookahead(&mut self, la_body: NodeId, la_tail: NodeId, rel: u32) -> NodeId {
2365 let la_body = {
2367 match self.ends_with_ts(la_body) {
2368 true => la_body,
2369 false => self.mk_concat(la_body, NodeId::TS),
2370 }
2371 };
2372 let rel = if NodeId::MISSING == la_tail {
2373 rel
2374 } else {
2375 match la_tail.is_center_nullable(self) {
2376 false => u32::MAX,
2377 true => rel,
2378 }
2379 };
2380
2381 self.mk_lookahead_internal(la_body, la_tail, rel)
2382 }
2383
2384 pub(crate) fn mk_lookahead_internal(
2386 &mut self,
2387 la_body: NodeId,
2388 la_tail: NodeId,
2389 rel: u32,
2390 ) -> NodeId {
2391 let key = NodeKey {
2392 kind: Kind::Lookahead,
2393 left: la_body,
2394 right: la_tail,
2395 extra: rel,
2396 };
2397 if let Some(id) = self.key_is_created(&key) {
2398 return *id;
2399 }
2400 if la_body == NodeId::TS {
2401 if rel == 0 {
2402 return la_tail.missing_to_eps();
2403 } else {
2404 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2405 }
2406 }
2407 if la_body == NodeId::BOT || la_tail == NodeId::BOT {
2408 return NodeId::BOT;
2409 }
2410 if la_tail.is_missing() && rel == u32::MAX {
2411 return NodeId::BOT;
2412 }
2413
2414 if la_body == NodeId::EPS && la_tail.is_missing() {
2415 if rel == 0 {
2416 return la_body;
2417 }
2418 }
2419
2420 if la_tail == NodeId::TS {
2421 if rel == 0 || rel == u32::MAX {
2422 return self.mk_concat(la_body, NodeId::TS);
2423 } else if rel == u32::MAX {
2424 return self.mk_begins_with(la_body);
2425 }
2426 }
2427
2428 if rel == u32::MAX {
2429 if la_tail.is_missing() {
2430 return NodeId::BOT;
2431 }
2432
2433 if self.is_always_nullable(la_body) {
2434 return la_tail.missing_to_eps();
2435 }
2436
2437 if la_tail != NodeId::MISSING {
2438 match self.get_kind(la_tail) {
2439 _ => {
2440 if la_body.is_compl_plus_end(&self) {
2441 let minlen = self.get_min_length_only(la_tail);
2442 if minlen >= 1 {
2443 return NodeId::BOT;
2444 }
2445 }
2446 }
2447 }
2448 }
2449 }
2450
2451 if la_tail != NodeId::MISSING && self.get_kind(la_tail) == Kind::Lookahead {
2453 let la_body2 = self.get_lookahead_inner(la_tail);
2454 let body1_ts = self.mk_concat(la_body, NodeId::TS);
2455 let body2_ts = self.mk_concat(la_body2, NodeId::TS);
2456 let new_la_body = self.mk_inter(body1_ts, body2_ts);
2457 let new_la_rel = self.get_lookahead_rel(la_tail);
2458 let new_la_tail = self.get_lookahead_tail(la_tail);
2459 return self.mk_lookahead_internal(new_la_body, new_la_tail, new_la_rel);
2460 }
2461
2462 if self.get_kind(la_body) == Kind::Concat && la_body.left(self) == NodeId::TS {
2464 let la_body_right = la_body.right(self);
2465 if self.is_always_nullable(la_body_right) {
2466 return self.mk_lookahead_internal(la_body_right, la_tail, rel);
2467 }
2468 if la_body.right(self) == NodeId::END {
2469 return self.mk_lookahead_internal(NodeId::EPS, la_tail, rel);
2470 }
2471 let bodyright = la_body.right(self);
2472 if self.get_kind(bodyright) == Kind::Concat && bodyright.left(self) == NodeId::END {
2473 let strippedanchor = self.mk_concat(NodeId::TS, bodyright.right(self));
2474 return self.mk_lookahead_internal(strippedanchor, la_tail, rel);
2475 }
2476 }
2477
2478 if la_tail != NodeId::MISSING {
2479 match (self.get_kind(la_body), self.get_kind(la_tail)) {
2480 (Kind::Concat, Kind::Pred) => {
2481 let lpred = la_body.left(self);
2482 if self.get_kind(lpred) == Kind::Pred {
2483 let l = lpred.pred_tset(&self);
2484 let r = la_tail.pred_tset(&self);
2485 let psi_and = self.solver().and_id(l, r);
2486 let rewrite = self.mk_pred(psi_and);
2487 let new_rel = if rel == usize::MAX as u32 { 0 } else { rel + 1 };
2488 let new_right = self.mk_lookahead_internal(
2489 la_body.right(self),
2490 NodeId::MISSING,
2491 new_rel,
2492 );
2493 return self.mk_concat(rewrite, new_right);
2494 }
2495 }
2496 _ => {}
2497 }
2498 }
2499
2500 self.get_node_id(key)
2501 }
2502
2503 pub fn mk_neg_lookahead(&mut self, body: NodeId, rel: u32) -> NodeId {
2504 match self.get_node(body).kind {
2505 Kind::Pred => {
2506 let psi = body.pred_tset(&self);
2507 let negated = self.mk_pred_not(psi);
2508 let union = self.mk_union(NodeId::END, negated);
2509 let la = self.mk_lookahead(union, NodeId::MISSING, rel);
2510 la
2511 }
2512 _ => {
2513 let neg_inner = self.mk_concat(body, NodeId::TS);
2514 let neg_part = self.mk_compl(neg_inner);
2515 let conc = self.mk_concat(neg_part, NodeId::END);
2516 let look = self.mk_lookahead(conc, NodeId::MISSING, rel);
2517 look
2518 }
2519 }
2520 }
2521
2522 pub fn mk_neg_lookbehind(&mut self, body: NodeId) -> NodeId {
2523 match self.get_node(body).kind {
2524 Kind::Pred => {
2525 let psi = body.pred_tset(&self);
2526 let negated = self.mk_pred_not(psi);
2527 let union = self.mk_union(NodeId::BEGIN, negated);
2528 self.mk_lookbehind_internal(union, NodeId::MISSING).unwrap()
2530 }
2531 _ => {
2532 let neg_inner = self.mk_concat(NodeId::TS, body);
2533 let neg_part = self.mk_compl(neg_inner);
2534 let conc = self.mk_concat(NodeId::BEGIN, neg_part);
2535 self.mk_lookbehind_internal(conc, NodeId::MISSING).unwrap()
2537 }
2538 }
2539 }
2540
2541 pub fn mk_union(&mut self, left: NodeId, right: NodeId) -> NodeId {
2542 debug_assert!(left != NodeId::MISSING);
2543 debug_assert!(right != NodeId::MISSING);
2544 if left > right {
2545 return self.mk_union(right, left);
2546 }
2547 let key = NodeKey {
2548 kind: Kind::Union,
2549 left,
2550 right,
2551 extra: u32::MAX,
2552 };
2553 if let Some(id) = self.key_is_created(&key) {
2554 return *id;
2555 }
2556
2557 if left == right {
2558 return left;
2559 }
2560 if left == NodeId::BOT {
2561 return right;
2562 }
2563 if right == NodeId::BOT {
2564 return left;
2565 }
2566 if right == NodeId::TS {
2567 return right;
2568 }
2569 if left == NodeId::TS {
2570 return left;
2571 }
2572
2573 match (self.get_kind(left), self.get_kind(right)) {
2574 (Kind::Union, _) => {
2576 self.iter_unions_b(left, &mut |b, v| {
2577 b.temp_vec.push(v);
2578 });
2579 self.iter_unions_b(right, &mut |b, v| {
2580 b.temp_vec.push(v);
2581 });
2582 self.temp_vec.sort();
2583 let tree = self.temp_vec.clone();
2584 self.temp_vec.clear();
2585 let newnode = tree
2586 .iter()
2587 .rev()
2588 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2589 return self.init_as(key, newnode);
2590 }
2591 (_, Kind::Union) => {
2592 let rleft = right.left(&self);
2593 if left > rleft {
2595 self.iter_unions_b(left, &mut |b, v| {
2596 b.temp_vec.push(v);
2597 });
2598 self.iter_unions_b(right, &mut |b, v| {
2599 b.temp_vec.push(v);
2600 });
2601 self.temp_vec.sort();
2602 let tree = self.temp_vec.clone();
2603 self.temp_vec.clear();
2604 let newnode = tree
2605 .iter()
2606 .rev()
2607 .fold(NodeId::BOT, |acc, x| self.mk_union(*x, acc));
2608 return self.init_as(key, newnode);
2609 } else {
2610 if let Some(rw) = self.attempt_rw_unions(left, right) {
2611 return self.init_as(key, rw);
2612 }
2613 }
2614 }
2615 _ => {}
2616 }
2617
2618 if let Some(rw) = self.attempt_rw_union_2(left, right) {
2619 return self.init_as(key, rw);
2620 }
2621 self.init(key)
2622 }
2623
2624 pub(crate) fn mk_inter(&mut self, left_id: NodeId, right_id: NodeId) -> NodeId {
2625 debug_assert!(left_id != NodeId::MISSING);
2626 debug_assert!(right_id != NodeId::MISSING);
2627 if left_id == right_id {
2628 return left_id;
2629 }
2630 if left_id == NodeId::BOT || right_id == NodeId::BOT {
2631 return NodeId::BOT;
2632 }
2633 if left_id == NodeId::TS {
2634 return right_id;
2635 }
2636 if right_id == NodeId::TS {
2637 return left_id;
2638 }
2639 if left_id > right_id {
2640 return self.mk_inter(right_id, left_id);
2641 }
2642 let key = NodeKey {
2643 kind: Kind::Inter,
2644 left: left_id,
2645 right: right_id,
2646 extra: u32::MAX,
2647 };
2648 if let Some(id) = self.key_is_created(&key) {
2649 return *id;
2650 }
2651
2652 if let Some(rw) = self.attempt_rw_inter_2(left_id, right_id) {
2653 return self.init_as(key, rw);
2654 }
2655
2656 self.init(key)
2657 }
2658
2659 fn mk_unset(&mut self, kind: Kind) -> NodeId {
2660 let node = NodeKey {
2661 kind,
2662 left: NodeId::MISSING,
2663 right: NodeId::MISSING,
2664 extra: u32::MAX,
2665 };
2666 self.init(node)
2667 }
2668
2669 pub fn mk_plus(&mut self, body_id: NodeId) -> NodeId {
2670 let star = self.mk_star(body_id);
2671 self.mk_concat(body_id, star)
2672 }
2673 pub fn mk_repeat(&mut self, body_id: NodeId, lower: u32, upper: u32) -> NodeId {
2674 let opt = self.mk_opt(body_id);
2675 let mut nodes1 = vec![];
2676 for _ in lower..upper {
2677 nodes1.push(opt);
2678 }
2679 for _ in 0..lower {
2680 nodes1.push(body_id);
2681 }
2682 self.mk_concats(nodes1.into_iter())
2683 }
2684 pub fn mk_opt(&mut self, body_id: NodeId) -> NodeId {
2685 self.mk_union(NodeId::EPS, body_id)
2686 }
2687
2688 pub fn mk_star(&mut self, body_id: NodeId) -> NodeId {
2689 let key = NodeKey {
2690 kind: Kind::Star,
2691 left: body_id,
2692 right: NodeId::MISSING,
2693 extra: 0,
2694 };
2695 if let Some(id) = self.key_is_created(&key) {
2696 return *id;
2697 }
2698 if body_id.is_kind(self, Kind::Star) {
2700 return body_id;
2701 }
2702 self.get_node_id(key)
2703 }
2704
2705 pub fn nullability_emptystring(&self, node_id: NodeId) -> Nullability {
2708 match self.get_kind(node_id) {
2709 Kind::End => Nullability::EMPTYSTRING,
2710 Kind::Begin => Nullability::EMPTYSTRING,
2711 Kind::Pred => Nullability::NEVER,
2712 Kind::Star => Nullability::ALWAYS,
2713 Kind::Inter | Kind::Concat => {
2714 let lnull = self.nullability_emptystring(node_id.left(self));
2715 let rnull = self.nullability_emptystring(node_id.right(self));
2716 lnull.and(rnull) }
2718 Kind::Union => {
2719 let lnull = self.nullability_emptystring(node_id.left(self));
2720 let rnull = self.nullability_emptystring(node_id.right(self));
2721 lnull.or(rnull)
2722 }
2723 Kind::Compl => self.nullability_emptystring(node_id.left(self)).not(),
2724 Kind::Lookbehind => self.nullability_emptystring(node_id.left(self)),
2725 Kind::Lookahead => self.nullability_emptystring(node_id.left(self)),
2726 }
2727 }
2728
2729 #[inline(always)]
2730 pub(crate) fn any_nonbegin_nullable(&self, node_id: NodeId) -> bool {
2731 self.get_meta(node_id)
2732 .flags
2733 .nullability()
2734 .has(Nullability::CENTER.or(Nullability::END))
2735 }
2736
2737 pub fn nullability(&self, node_id: NodeId) -> Nullability {
2738 self.get_only_nullability(node_id)
2739 }
2740
2741 pub(crate) fn is_always_nullable(&self, node_id: NodeId) -> bool {
2742 self.get_only_nullability(node_id).and(Nullability::ALWAYS) == Nullability::ALWAYS
2743 }
2744
2745 pub fn pp(&self, node_id: NodeId) -> String {
2747 let mut s = String::new();
2748 self.ppw(&mut s, node_id).unwrap();
2749 s
2750 }
2751
2752 #[allow(dead_code)]
2753 pub(crate) fn pp_nulls(&self, node_id: NodeId) -> String {
2754 let nu = self.get_nulls_id(node_id);
2755 let nr = self.mb.nb.get_set_ref(nu);
2756 let s1 = format!("{:?}", nr);
2757 s1
2758 }
2759
2760 #[allow(dead_code)]
2761 pub(crate) fn ppt(&self, term_id: TRegexId) -> String {
2762 match self.get_tregex(term_id) {
2763 TRegex::Leaf(node_id) => {
2764 let mut s = String::new();
2765 self.ppw(&mut s, *node_id).unwrap();
2766 s
2767 }
2768 TRegex::ITE(cond, then_id, else_id) => {
2769 format!(
2770 "ITE({},{},{})",
2771 self.solver_ref().pp(*cond),
2772 self.ppt(*then_id),
2773 self.ppt(*else_id)
2774 )
2775 }
2776 }
2777 }
2778
2779 fn ppw(&self, s: &mut String, node_id: NodeId) -> Result<(), std::fmt::Error> {
2780 if cfg!(feature = "graphviz") {
2781 match node_id {
2782 NodeId::MISSING => return write!(s, "MISSING"),
2783 NodeId::BOT => return write!(s, "⊥"),
2784 NodeId::TS => return write!(s, "_*"),
2785 NodeId::TOP => return write!(s, "_"),
2786 NodeId::EPS => return write!(s, "ε"),
2787 _ => {}
2788 }
2789 }
2790
2791 match node_id {
2792 NodeId::MISSING => return write!(s, "MISSING"),
2793 NodeId::BOT => return write!(s, "⊥"),
2794 NodeId::TS => return write!(s, "_*"),
2795 NodeId::TOP => return write!(s, "_"),
2796 NodeId::EPS => return write!(s, ""),
2797 _ => {}
2798 }
2799
2800 match self.get_kind(node_id) {
2801 Kind::End => return write!(s, r"\z"),
2802 Kind::Begin => return write!(s, r"\A"),
2803 Kind::Pred => {
2804 let psi = node_id.pred_tset(self);
2805 if psi == TSetId::EMPTY {
2806 return write!(s, r"⊥");
2807 } else if psi == TSetId::FULL {
2808 return write!(s, r"_");
2809 } else {
2810 return write!(s, "{}", self.solver_ref().pp(psi));
2811 }
2812 }
2813 Kind::Inter => {
2814 write!(s, "(")?;
2815 self.ppw(s, node_id.left(self))?;
2816 write!(s, "&")?;
2817 let mut curr = node_id.right(self);
2818 while self.get_kind(curr) == Kind::Inter {
2819 let n = curr.left(self);
2820 self.ppw(s, n)?;
2821 write!(s, "&")?;
2822 curr = curr.right(self);
2823 }
2824 self.ppw(s, curr)?;
2825 write!(s, ")")
2826 }
2827 Kind::Union => {
2828 let left = node_id.left(self);
2829 let right = node_id.right(self);
2830 write!(s, "(")?;
2831 self.ppw(s, left)?;
2832 write!(s, "|")?;
2833 let mut curr = right;
2834 while self.get_kind(curr) == Kind::Union {
2835 let n = curr.left(self);
2836 self.ppw(s, n)?;
2837 write!(s, "|")?;
2838 curr = curr.right(self);
2839 }
2840 self.ppw(s, curr)?;
2841 write!(s, ")")
2842 }
2843 Kind::Concat => {
2844 let left = node_id.left(self);
2845 let right = node_id.right(self);
2846 if right.is_star(self) && right.left(self) == left {
2847 self.ppw(s, left)?;
2848 write!(s, "+")?;
2849 return Ok(());
2850 }
2851 if right.is_concat(self) {
2852 let rl = right.left(self);
2853 if rl.is_star(self) && rl.left(self) == left {
2854 self.ppw(s, left)?;
2855 write!(s, "+")?;
2856 return self.ppw(s, right.right(self));
2857 }
2858 }
2859 if right.is_concat(self) && right.left(self) == left {
2860 let mut num = 1;
2861 let mut right = right;
2862 while right.is_concat(self) && right.left(self) == left {
2863 num = num + 1;
2864 right = right.right(self);
2865 }
2866 if let Some(inner) = left.is_opt_v(self) {
2868 let mut inner_count = 0;
2869 let mut right2 = right;
2870 while right2.is_concat(self) && right2.left(self) == inner {
2871 inner_count += 1;
2872 right2 = right2.right(self);
2873 }
2874 if right2 == inner {
2875 inner_count += 1;
2876 self.ppw(s, inner)?;
2877 return write!(s, "{{{},{}}}", inner_count, inner_count + num);
2878 }
2879 if inner_count > 0 {
2880 self.ppw(s, inner)?;
2881 write!(s, "{{{},{}}}", inner_count, inner_count + num)?;
2882 return self.ppw(s, right2);
2883 }
2884 }
2885 self.ppw(s, left)?;
2886 if right == left {
2887 num = num + 1;
2888 return write!(s, "{{{}}}", num);
2889 }
2890 if num <= 3 && left.is_pred(self) {
2891 for _ in 1..num {
2892 self.ppw(s, left)?;
2893 }
2894 return self.ppw(s, right);
2895 } else {
2896 write!(s, "{{{}}}", num)?;
2897 return self.ppw(s, right);
2898 }
2899 }
2900 self.ppw(s, left)?;
2901 self.ppw(s, right)
2902 }
2903 Kind::Star => {
2904 let left = node_id.left(self);
2905 let leftkind = self.get_kind(left);
2906 match leftkind {
2907 Kind::Concat | Kind::Star | Kind::Compl => {
2908 write!(s, "(")?;
2909 self.ppw(s, left)?;
2910 write!(s, ")")?;
2911 }
2912 _ => {
2913 self.ppw(s, left)?;
2914 }
2915 };
2916 write!(s, "*")
2917 }
2918 Kind::Compl => {
2919 write!(s, "~(")?;
2920 self.ppw(s, node_id.left(self))?;
2921 write!(s, ")")
2922 }
2923 Kind::Lookbehind => {
2924 let lbleft = self.get_lookbehind_prev(node_id);
2925 let lbinner = self.get_lookbehind_inner(node_id);
2926 debug_assert!(lbleft.0 != u32::MAX, "lookbehind right is not u32::MAX");
2927 if lbleft != NodeId::MISSING {
2928 write!(s, "「")?;
2929 self.ppw(s, lbleft)?;
2930 write!(s, "」")?;
2931 }
2932
2933 write!(s, "(?<=")?;
2934 self.ppw(s, lbinner)?;
2935 write!(s, ")")
2936 }
2937 Kind::Lookahead => {
2938 let inner = self.get_lookahead_inner(node_id);
2939 write!(s, "(?=")?;
2940 self.ppw(s, inner)?;
2941 write!(s, ")")?;
2942 if self.get_lookahead_rel(node_id) != 0 {
2943 write!(s, "{{")?;
2944 let rel = self.get_lookahead_rel(node_id);
2945 if rel == u32::MAX {
2946 write!(s, "∅")?;
2947 } else {
2948 write!(s, "{}", rel)?;
2949 }
2950 write!(s, "}}")?;
2951 }
2952 if node_id.right(self) == NodeId::MISSING {
2953 Ok(())
2954 } else {
2955 write!(s, "『")?;
2956 self.ppw(s, node_id.right(self))?;
2957 write!(s, "』")
2958 }
2959 }
2960 }
2961 }
2962
2963 pub(crate) fn mk_begins_with(&mut self, node: NodeId) -> NodeId {
2964 self.mk_concat(node, NodeId::TS)
2965 }
2966
2967 #[allow(dead_code)]
2968 pub(crate) fn mk_not_begins_with(&mut self, node: NodeId) -> NodeId {
2969 let node_ts = self.mk_concat(node, NodeId::TS);
2970 self.mk_compl(node_ts)
2971 }
2972
2973 pub(crate) fn mk_pred_not(&mut self, set: TSetId) -> NodeId {
2974 let notset = self.solver().not_id(set);
2975 self.mk_pred(notset)
2976 }
2977
2978 pub fn mk_u8(&mut self, char: u8) -> NodeId {
2979 let set_id = self.solver().u8_to_set_id(char as u8);
2980 self.mk_pred(set_id)
2981 }
2982
2983 pub fn mk_range_u8(&mut self, start: u8, end_inclusive: u8) -> NodeId {
2984 let rangeset = self.solver().range_to_set_id(start, end_inclusive);
2985 self.mk_pred(rangeset)
2986 }
2987
2988 pub fn extract_literal_prefix(&self, node: NodeId) -> Vec<u8> {
2991 let mut prefix = Vec::new();
2992 let mut curr = node;
2993 loop {
2994 if curr == NodeId::EPS || curr == NodeId::BOT {
2995 break;
2996 }
2997 if self.get_kind(curr) == Kind::Pred {
2998 if let Some(byte) = self.solver_ref().single_byte(TSetId(self.get_extra(curr))) {
2999 prefix.push(byte);
3000 }
3001 break;
3002 }
3003 if self.get_kind(curr) != Kind::Concat {
3004 break;
3005 }
3006 let left = curr.left(self);
3007 if self.get_kind(left) != Kind::Pred {
3008 break;
3009 }
3010 match self.solver_ref().single_byte(TSetId(self.get_extra(left))) {
3011 Some(byte) => prefix.push(byte),
3012 None => break,
3013 }
3014 curr = curr.right(self);
3015 }
3016 prefix
3017 }
3018
3019 #[allow(dead_code)]
3020 pub(crate) fn mk_bytestring(&mut self, raw_str: &[u8]) -> NodeId {
3021 let mut result = NodeId::EPS;
3022 for byte in raw_str.into_iter().rev() {
3023 let node = self.mk_u8(*byte);
3024 result = self.mk_concat(node, result);
3025 }
3026 result
3027 }
3028
3029 pub fn mk_string(&mut self, raw_str: &str) -> NodeId {
3030 let mut result = NodeId::EPS;
3031 for byte in raw_str.bytes().rev() {
3032 let node = self.mk_u8(byte);
3033 result = self.mk_concat(node, result);
3034 }
3035 result
3036 }
3037
3038 pub fn mk_unions(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3039 let mut sorted: Vec<NodeId> = nodes.collect();
3040 if sorted.len() <= 1 {
3041 return sorted.pop().unwrap_or(NodeId::BOT);
3042 }
3043 sorted.sort();
3044 sorted.dedup();
3045 sorted.retain(|&x| x != NodeId::BOT);
3046 if sorted.is_empty() {
3047 return NodeId::BOT;
3048 }
3049 if sorted.len() > 16 {
3050 let mut by_head: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
3051 let mut non_concat: Vec<NodeId> = Vec::new();
3052 for &n in &sorted {
3053 if self.get_kind(n) == Kind::Concat {
3054 by_head.entry(self.get_left(n)).or_default().push(n);
3055 } else {
3056 non_concat.push(n);
3057 }
3058 }
3059 if by_head.len() < sorted.len() {
3060 let mut groups: Vec<NodeId> = non_concat;
3061 for (head, tails) in by_head {
3062 if tails.len() == 1 {
3063 groups.push(tails[0]);
3064 } else {
3065 let tail_nodes: Vec<NodeId> =
3066 tails.iter().map(|&n| self.get_right(n)).collect();
3067 let tail_union = self.mk_unions(tail_nodes.into_iter());
3068 let factored = self.mk_concat(head, tail_union);
3069 groups.push(factored);
3070 }
3071 }
3072 groups.sort();
3073 groups.dedup();
3074 return self.mk_unions(groups.into_iter());
3075 }
3076 }
3077 self.mk_unions_balanced(&sorted)
3078 }
3079
3080 fn mk_unions_balanced(&mut self, nodes: &[NodeId]) -> NodeId {
3081 match nodes.len() {
3082 0 => NodeId::BOT,
3083 1 => nodes[0],
3084 n => {
3085 let mid = n / 2;
3086 let left = self.mk_unions_balanced(&nodes[..mid]);
3087 let right = self.mk_unions_balanced(&nodes[mid..]);
3088 self.mk_union(left, right)
3089 }
3090 }
3091 }
3092
3093 pub fn mk_inters(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3094 nodes.rev().fold(NodeId::TS, |acc, v| self.mk_inter(acc, v))
3095 }
3096
3097 pub fn mk_concats(&mut self, nodes: impl DoubleEndedIterator<Item = NodeId>) -> NodeId {
3098 nodes
3099 .rev()
3100 .fold(NodeId::EPS, |acc, x| self.mk_concat(x, acc))
3101 }
3102}
3103
3104impl RegexBuilder {
3105 #[allow(dead_code)]
3106 pub(crate) fn extract_sat(&self, term_id: TRegexId) -> Vec<NodeId> {
3107 match self.get_tregex(term_id).clone() {
3108 TRegex::Leaf(node_id) => {
3109 if NodeId::BOT == node_id {
3110 vec![]
3111 } else {
3112 vec![node_id]
3113 }
3114 }
3115 TRegex::ITE(_, then_id, else_id) => {
3116 let mut then_nodes = self.extract_sat(then_id);
3117 let mut else_nodes = self.extract_sat(else_id);
3118 then_nodes.append(&mut else_nodes);
3119 then_nodes
3120 }
3121 }
3122 }
3123
3124 pub(crate) fn iter_unions(&self, start: NodeId, mut f: impl FnMut(NodeId)) {
3125 debug_assert!(self.get_kind(start) == Kind::Union);
3126 let mut curr = start;
3127 while self.get_kind(curr) == Kind::Union {
3128 f(curr.left(self));
3129 curr = curr.right(self);
3130 }
3131 f(curr);
3132 }
3133
3134 pub(crate) fn iter_unions_b(
3135 &mut self,
3136 curr: NodeId,
3137 f: &mut impl FnMut(&mut RegexBuilder, NodeId),
3138 ) {
3139 let mut curr = curr;
3140 while self.get_kind(curr) == Kind::Union {
3141 f(self, curr.left(self));
3142 curr = curr.right(self);
3143 }
3144 f(self, curr);
3145 }
3146
3147 pub(crate) fn try_elim_lookarounds(&mut self, node_id: NodeId) -> Option<NodeId> {
3148 if !self.contains_look(node_id) {
3149 return Some(node_id);
3150 }
3151 match self.get_kind(node_id) {
3152 Kind::Pred | Kind::Begin | Kind::End => return Some(node_id),
3153 Kind::Concat => {
3154 let left = node_id.left(self);
3155 let right = node_id.right(self);
3156 let elim_l = self.try_elim_lookarounds(left)?;
3157 let elim_r = self.try_elim_lookarounds(right)?;
3158 let rw = self.mk_concat(elim_l, elim_r);
3159 Some(rw)
3160 }
3161 Kind::Union => {
3162 let left = node_id.left(self);
3163 let right = node_id.right(self);
3164 let elim_l = self.try_elim_lookarounds(left)?;
3165 let elim_r = self.try_elim_lookarounds(right)?;
3166 let rw = self.mk_union(elim_l, elim_r);
3167 Some(rw)
3168 }
3169
3170 Kind::Star => {
3171 let body = node_id.left(self);
3172 let elim_l = self.try_elim_lookarounds(body)?;
3173 Some(self.mk_star(elim_l))
3174 }
3175 Kind::Compl => {
3176 let left = node_id.left(self);
3177 let elim_l = self.try_elim_lookarounds(left)?;
3178 Some(self.mk_compl(elim_l))
3179 }
3180 Kind::Lookahead => {
3181 let rel = self.get_lookahead_rel(node_id);
3182 if rel != 0 {
3183 return None;
3184 }
3185 let lbody = self.get_lookahead_inner(node_id);
3186 let ltail = self.get_lookahead_tail(node_id).missing_to_eps();
3187 let elim_l = self.try_elim_lookarounds(lbody)?;
3188 let elim_r = self.try_elim_lookarounds(ltail)?;
3189 let lbody_ts = self.mk_concat(elim_l, NodeId::TS);
3190 let ltail_ts = self.mk_concat(elim_r, NodeId::TS);
3191 let rw = self.mk_inter(lbody_ts, ltail_ts);
3192 Some(rw)
3193 }
3194 Kind::Lookbehind => {
3195 let linner = self.get_lookbehind_inner(node_id);
3196 let lprev = self.get_lookbehind_prev(node_id).missing_to_eps();
3197 let elim_l = self.try_elim_lookarounds(linner)?;
3198 let elim_r = self.try_elim_lookarounds(lprev)?;
3199 let lbody_ts = self.mk_concat(NodeId::TS, elim_l);
3200 let ltail_ts = self.mk_concat(NodeId::TS, elim_r);
3201 let rw = self.mk_inter(lbody_ts, ltail_ts);
3202 Some(rw)
3203 }
3204 Kind::Inter => {
3205 let left = node_id.left(self);
3206 let right = node_id.right(self);
3207 let elim_l = self.try_elim_lookarounds(left)?;
3208 let elim_r = self.try_elim_lookarounds(right)?;
3209 let rw = self.mk_inter(elim_l, elim_r);
3210 Some(rw)
3211 }
3212 }
3213 }
3214
3215 pub(crate) fn mk_non_nullable_safe(&mut self, node: NodeId) -> NodeId {
3217 if self.nullability(node) == Nullability::NEVER {
3218 node
3219 } else {
3220 self.mk_inter(NodeId::TOPPLUS, node)
3221 }
3222 }
3223
3224 fn iter_find_stack(
3225 &self,
3226 stack: &mut Vec<TRegexId>,
3227 mut f: impl FnMut(NodeId) -> bool,
3228 ) -> bool {
3229 loop {
3230 match stack.pop() {
3231 None => return false,
3232 Some(curr) => match self.get_tregex(curr) {
3233 TRegex::Leaf(n) => {
3234 let mut curr = *n;
3235 while curr != NodeId::BOT {
3236 match self.get_kind(curr) {
3237 Kind::Union => {
3238 if f(curr.left(self)) {
3239 return true;
3240 }
3241 curr = curr.right(self);
3242 }
3243 _ => {
3244 if f(*n) {
3245 return true;
3246 }
3247 curr = NodeId::BOT;
3248 }
3249 }
3250 }
3251 }
3252 TRegex::ITE(_, then_id, else_id) => {
3253 if *else_id != TRegexId::BOT {
3254 stack.push(*else_id);
3255 }
3256 stack.push(*then_id);
3257 }
3258 },
3259 }
3260 }
3261 }
3262
3263 pub(crate) fn is_empty_lang(&mut self, node: NodeId) -> Option<bool> {
3264 if node == NodeId::BOT {
3265 return Some(true);
3266 }
3267 if self.nullability(node) != Nullability::NEVER {
3268 return Some(false);
3269 }
3270 if let Some(cached) = self.cache_empty.get(&node) {
3271 if cached.is_checked() {
3272 return Some(cached.is_empty());
3273 }
3274 }
3275 let node = if !self.contains_look(node) {
3276 node
3277 } else {
3278 let elim = self.try_elim_lookarounds(node)?;
3279 elim
3280 };
3281 let isempty_flag = self.is_empty_lang_internal(node);
3282
3283 Some(isempty_flag == Ok(NodeFlags::IS_EMPTY))
3284 }
3285
3286 fn is_empty_lang_internal(&mut self, initial_node: NodeId) -> Result<NodeFlags, AlgebraError> {
3287 if !self.get_meta_flags(initial_node).contains_inter() {
3289 return Ok(NodeFlags::ZERO);
3290 }
3291
3292 let mut visited: HashMap<NodeId, NodeId> = HashMap::new();
3293 let mut todo: VecDeque<NodeId> = VecDeque::new();
3294 let begin_der = self.der(initial_node, Nullability::BEGIN)?;
3295 let mut stack = Vec::new();
3296 stack.push(begin_der);
3297 let found_nullable_right_away = self.iter_find_stack(&mut stack, |node| {
3298 visited.insert(node, initial_node);
3299 let nullability = self.nullability(node);
3300 if nullability != Nullability::NEVER {
3301 true
3302 } else {
3303 todo.push_back(node);
3304 false
3305 }
3306 });
3307 if found_nullable_right_away {
3308 return Ok(NodeFlags::ZERO);
3309 }
3310
3311 todo.push_back(initial_node);
3312 let isempty_flag: NodeFlags;
3313 let mut found_node = NodeId::BOT;
3314
3315 loop {
3316 match todo.pop_front() {
3317 None => {
3318 isempty_flag = NodeFlags::IS_EMPTY;
3319 break;
3320 }
3321 Some(outer) => {
3322 if let Some(cached) = self.cache_empty.get(&outer) {
3323 if cached.is_checked() {
3324 if cached.is_empty() {
3325 continue;
3326 } else {
3327 return Ok(NodeFlags::ZERO);
3328 }
3329 }
3330 }
3331
3332 let derivative = self.der(outer, Nullability::CENTER)?;
3333
3334 stack.push(derivative);
3335
3336 let found_null = self.iter_find_stack(&mut stack, |node| {
3337 if visited.contains_key(&node) {
3338 false
3339 } else {
3340 found_node = node;
3341 if !self.get_meta_flags(node).contains_inter() {
3342 return true;
3343 } else {
3344 visited.insert(node, outer);
3345 todo.push_front(node);
3346 self.any_nonbegin_nullable(node)
3347 }
3348 }
3349 });
3350 if found_null {
3351 self.cache_empty.insert(outer, NodeFlags::IS_CHECKED);
3352 isempty_flag = NodeFlags::ZERO;
3353 break;
3354 }
3355 }
3356 }
3357 }
3358
3359 self.cache_empty.insert(
3360 initial_node,
3361 NodeFlags(isempty_flag.0 | NodeFlags::IS_CHECKED.0),
3362 );
3363 Ok(isempty_flag)
3364 }
3365
3366 pub fn subsumes(&mut self, larger_lang: NodeId, smaller_lang: NodeId) -> Option<bool> {
3368 if larger_lang == smaller_lang {
3369 return Some(true);
3370 }
3371
3372 if self
3374 .nullability(larger_lang)
3375 .not()
3376 .and(self.nullability(smaller_lang))
3377 != Nullability::NEVER
3378 {
3379 return Some(false);
3380 }
3381
3382 let (smaller_lang, larger_lang) =
3387 if self.contains_look(smaller_lang) || self.contains_look(larger_lang) {
3388 let wrap = |b: &mut Self, n: NodeId| {
3389 let tmp = b.mk_concat(n, NodeId::TS);
3390 b.mk_concat(NodeId::TS, tmp)
3391 };
3392 (wrap(self, smaller_lang), wrap(self, larger_lang))
3393 } else {
3394 (smaller_lang, larger_lang)
3395 };
3396
3397 let nota = self.mk_compl(larger_lang);
3398 let diff = self.mk_inter(smaller_lang, nota);
3399 self.is_empty_lang(diff)
3400 }
3401}