smt_str/re/
build.rs

1//! The builder for regular expressions.
2
3use smallvec::smallvec;
4
5use crate::SmtChar;
6
7use super::*;
8use std::collections::{BTreeSet, HashMap};
9
10/// A builder for constructing regular expressions in the SMT-LIB string theory.
11///
12/// This is the only way to create [`Regex`] instances.
13/// The `ReBuilder` ensures structural sharing by deduplicates identical sub-expressions and  assigns globally unique identifiers to each node.
14/// An instance of `ReBuilder` is obtained by calling [`ReBuilder::default()`] or [`ReBuilder::non_optimizing()`].
15///
16/// Internally, the builder maintains a registry of expressions:
17/// if the same regular expression is constructed multiple times, it will return
18/// a shared, reference-counted pointer to the same [`ReNode`] instance.
19/// The builder automatically performs garbage collection on unused expressions to keep memory usage low.
20///
21/// For each SMT-LIB regex operation (e.g., `re.*`, `re.union`, `re.++`, `re.comp`, etc.),
22/// the builder provides a corresponding method to create the appropriate [`Regex`] node.
23///
24/// # Optimization
25/// By default, the builder performs lightweight optimizations during construction.
26/// For instance, it may simplify expressions like `(re.++ ε r)` to just `r`.
27/// This behavior can be disabled by constructing a builder using [ReBuilder::non_optimizing()],
28/// which suppresses all such optimizations.
29///
30/// # Using multiple builders
31/// Mixing regular expressions from different builders results in logical errors.
32/// It is recommended to use a single builder instance to construct all regular expressions in a program.
33///
34/// # Example
35/// ```
36/// use smt_str::re::*;
37/// use smallvec::smallvec;
38///
39/// let mut builder1 = ReBuilder::default();
40/// let mut builder2 = ReBuilder::default();
41///
42/// // Construct a regex using the first builder
43/// let r1 = builder1.to_re("a".into());
44///
45/// // Construct a regex using the second builder
46/// let r2 = builder2.to_re("b".into());
47///
48/// // The regexes are structurally different but the following assertion will hold
49/// assert_eq!(r1, r2);
50///
51/// // Construct compound regexes using regexes from different builder will also result in unexpected behavior
52/// let r3 = builder1.union(smallvec![r1.clone(), r2.clone()]);
53///
54/// // The following assertion will hold, although we wanted r3 to accept "a" and "b"
55/// assert!(!r3.accepts(&"a".into()), "{}", r3);
56/// ```
57#[derive(Debug)]
58pub struct ReBuilder {
59    registry: Registry,
60    optimize: bool,
61
62    /* base expressions */
63    re_none: Regex,
64    re_all: Regex,
65    re_allchar: Regex,
66    re_epsilon: Regex,
67}
68
69impl Default for ReBuilder {
70    /// Creates a new `RegexBuilder` with an empty internal registry.
71    /// By default, on-the-fly optimization is enabled.
72    fn default() -> Self {
73        let mut registry = Registry::new();
74        let re_none = registry.intern(ReOp::None);
75        let re_all = registry.intern(ReOp::All);
76        let re_allchar = registry.intern(ReOp::Any);
77        let re_epsilon = registry.intern(ReOp::Literal(SmtString::empty()));
78
79        Self {
80            registry,
81            optimize: true,
82            re_none,
83            re_all,
84            re_allchar,
85            re_epsilon,
86        }
87    }
88}
89
90impl ReBuilder {
91    /// Creates a new `RegexBuilder` with on-the-fly optimization disabled.
92    pub fn non_optimizing() -> Self {
93        Self {
94            optimize: false,
95            ..Default::default()
96        }
97    }
98
99    fn intern(&mut self, regex: ReOp) -> Regex {
100        self.registry.intern(regex)
101    }
102
103    /// Checks if the builder manages the given regex.
104    /// Returns true if the regex was constructed by this builder.
105    /// Returns false otherwise.
106    ///
107    /// # Example
108    /// ```
109    /// use smt_str::re::*;
110    /// use smallvec::smallvec;
111    ///
112    /// let mut builder1 = ReBuilder::default();
113    /// let mut builder2 = ReBuilder::default();
114    ///
115    /// // Construct a regex using the first builder
116    /// let r1 = builder1.to_re("a".into());
117    ///
118    /// // Construct a regex using the second builder
119    /// let r2 = builder2.to_re("b".into());
120    ///
121    ///
122    /// assert!(builder1.manages(&r1));
123    /// assert!(!builder1.manages(&r2));
124    /// assert!(builder2.manages(&r2));
125    /// assert!(!builder2.manages(&r1));
126    /// ```
127    pub fn manages(&self, regex: &Regex) -> bool {
128        // We need to check if the registry has the regex stored with the same id.
129        // After that we need to check that that the stored regex is pointer equal to the given regex
130        // Only checking the id is not enough as the id is only unique to the builder instance
131        // We could also recurse the structure and check if all children are managed by this builder.
132        match self.registry.registry.get(regex.op()) {
133            Some(r) => Rc::ptr_eq(r, regex),
134            None => false,
135        }
136    }
137
138    /// Re-create a structurally identical regex as the given one using this builder.
139    ///
140    /// /// # Example
141    /// ```
142    /// use smt_str::re::*;
143    /// use smallvec::smallvec;
144    ///
145    /// let mut builder1 = ReBuilder::default();
146    /// let mut builder2 = ReBuilder::default();
147    ///
148    ///
149    /// // Construct a regex using the second builder
150    /// let r = builder2.to_re("b".into());
151    ///
152    /// assert!(!builder1.manages(&r));
153    ///
154    /// let rr = builder1.regex(&r);
155    /// assert!(builder1.manages(&rr));
156    /// ```
157    pub fn regex(&mut self, regex: &Regex) -> Regex {
158        match regex.op() {
159            ReOp::Literal(w) => self.to_re(w.clone()),
160            ReOp::None => self.none(),
161            ReOp::All => self.all(),
162            ReOp::Any => self.allchar(),
163            ReOp::Concat(rs) => {
164                let rs = rs.iter().map(|r| self.regex(r)).collect();
165                self.concat(rs)
166            }
167            ReOp::Union(rs) => {
168                let rs = rs.iter().map(|r| self.regex(r)).collect();
169                self.union(rs)
170            }
171            ReOp::Inter(rs) => {
172                let rs = rs.iter().map(|r| self.regex(r)).collect();
173                self.inter(rs)
174            }
175            ReOp::Star(r) => {
176                let r = self.regex(r);
177                self.star(r)
178            }
179            ReOp::Plus(r) => {
180                let r = self.regex(r);
181                self.plus(r)
182            }
183            ReOp::Opt(r) => {
184                let r = self.regex(r);
185                self.opt(r)
186            }
187            ReOp::Range(r) => self.range_from_to(r.start(), r.end()),
188            ReOp::Comp(r) => {
189                let r = self.regex(r);
190                self.comp(r)
191            }
192            ReOp::Diff(r1, r2) => {
193                let r1 = self.regex(r1);
194                let r2 = self.regex(r2);
195                self.diff(r1, r2)
196            }
197            ReOp::Pow(r, e) => {
198                let r = self.regex(r);
199                self.pow(r, *e)
200            }
201            ReOp::Loop(r, l, u) => {
202                let r = self.regex(r);
203                self.loop_(r, *l, *u)
204            }
205        }
206    }
207
208    /// Constructor for `str.to_re`.
209    /// Returns a regular expression of a constant word.
210    ///
211    /// # Example
212    /// ```
213    /// use smt_str::re::*;
214    /// use smt_str::SmtString;
215    ///
216    /// let mut builder = ReBuilder::default();
217    /// let string = SmtString::from("abc");
218    /// let r = builder.to_re(string.clone());
219    /// assert!(r.accepts(&string));
220    /// ```
221    ///
222    pub fn to_re(&mut self, w: SmtString) -> Regex {
223        self.intern(ReOp::Literal(w))
224    }
225
226    /// Constructs a regular expression denoting the empty word.
227    /// This is exactly the regular expression `(str.to_re "")`.
228    ///
229    /// # Example
230    /// ```
231    /// use smt_str::re::*;
232    ///
233    /// let mut builder = ReBuilder::default();
234    /// let r = builder.epsilon();
235    /// assert!(r.accepts(&"".into()));
236    /// ```
237    pub fn epsilon(&self) -> Regex {
238        self.re_epsilon.clone()
239    }
240
241    /// Constructor for `re.none`.
242    /// Returns a regular expression denoting the empty set.
243    ///
244    /// # Example
245    /// ```
246    /// use smt_str::re::*;
247    ///
248    ///
249    /// let mut builder = ReBuilder::default();
250    /// let r = builder.none();
251    /// assert!(!r.accepts(&"a".into()));
252    /// assert!(!r.accepts(&"".into()));
253    /// assert_eq!(r.none(), Some(true));
254    /// ```
255    pub fn none(&self) -> Regex {
256        self.re_none.clone()
257    }
258
259    /// Constructor for `re.all`.
260    /// Returns a regular expression denoting the set of all strings.
261    ///
262    /// # Example
263    /// ```
264    /// use smt_str::re::*;
265    ///
266    /// let mut builder = ReBuilder::default();
267    /// let r = builder.all();
268    /// assert!(r.accepts(&"a".into()));
269    /// assert!(r.accepts(&"".into()));
270    /// assert_eq!(r.universal(), Some(true));
271    /// ```
272    pub fn all(&self) -> Regex {
273        self.re_all.clone()
274    }
275
276    /// Constructor for `re.allchar`.
277    /// Returns a regular expression accepting any character in the SMT-LIB alphabet (0 - 0x2FFFF).
278    ///
279    /// # Example
280    /// ```
281    /// use smt_str::re::*;
282    ///
283    /// let mut builder = ReBuilder::default();
284    /// let r = builder.allchar();
285    /// assert!(r.accepts(&"a".into()));
286    /// assert!(r.accepts(&"🦀".into()));
287    /// assert!(!r.accepts(&"".into()));
288    /// ```
289    pub fn allchar(&self) -> Regex {
290        self.re_allchar.clone()
291    }
292
293    /// Constructor for `re.range`.
294    /// Returns a regular expression denoting the set of characters in the given range.
295    ///
296    /// # Example
297    /// ```
298    /// use smt_str::re::*;
299    /// use smt_str::alphabet::CharRange;
300    ///
301    /// let mut builder = ReBuilder::default();
302    /// let r = builder.range(CharRange::new('a', 'z'));
303    /// assert!(r.accepts(&"a".into()));
304    /// assert!(r.accepts(&"c".into()));
305    /// assert!(r.accepts(&"a".into()));
306    /// assert!(r.accepts(&"b".into()));
307    /// assert!(!r.accepts(&"A".into()));
308    /// ```
309    pub fn range(&mut self, r: CharRange) -> Regex {
310        if self.optimize {
311            self.range_opt(r)
312        } else {
313            self.intern(ReOp::Range(r))
314        }
315    }
316
317    /// Wrapper for [`ReBuilder::range`] that creates a range from the given characters.
318    pub fn range_from_to(&mut self, l: impl Into<SmtChar>, u: impl Into<SmtChar>) -> Regex {
319        self.range(CharRange::new(l.into(), u.into()))
320    }
321
322    fn range_opt(&mut self, r: CharRange) -> Regex {
323        if r.is_empty() {
324            self.none()
325        } else if r.is_full() {
326            self.allchar()
327        } else if let Some(c) = r.is_singleton() {
328            self.to_re(c.into())
329        } else {
330            self.intern(ReOp::Range(r))
331        }
332    }
333
334    /// Constructor for `re.++`.
335    /// Constructs a regular expression denoting the concatenation of the given regular expressions.
336    ///
337    /// # Example
338    /// ```
339    /// use smt_str::re::*;
340    /// use smallvec::smallvec;
341    ///
342    /// let mut builder = ReBuilder::default();
343    /// let r1 = builder.to_re("abc".into());
344    /// let r2 = builder.to_re("def".into());
345    /// let r = builder.concat(smallvec![r1, r2]);
346    /// assert!(r.accepts(&"abcdef".into()));
347    /// assert!(!r.accepts(&"abc".into()));
348    /// ```
349    pub fn concat(&mut self, rs: SmallVec<[Regex; 2]>) -> Regex {
350        if self.optimize {
351            self.concat_opt(rs)
352        } else {
353            self.intern(ReOp::Concat(rs))
354        }
355    }
356
357    fn concat_opt(&mut self, rs: SmallVec<[Regex; 2]>) -> Regex {
358        // Check if any of the children is the empty set, then the concatenation is the empty set
359        if rs.iter().any(|i| i.none() == Some(true)) {
360            return self.none();
361        }
362
363        // Filter out empty terms as they are the identity element of concatenation
364        // Flatten nested concatenations
365        let rs = rs
366            .into_iter()
367            .filter(|i| i.epsilon() != Some(true))
368            .flat_map(|i| {
369                if let ReOp::Concat(rs) = &i.op() {
370                    rs.clone()
371                } else {
372                    smallvec![i]
373                }
374            })
375            .collect_vec();
376
377        // Collapse adjacent words
378        // TODO: Do this in one iteration
379        let mut collapsed: SmallVec<[Regex; 2]> = SmallVec::with_capacity(rs.len());
380        let mut last_word: Option<SmtString> = None;
381        for r in rs.into_iter() {
382            if let ReOp::Literal(w) = &r.op() {
383                if let Some(last) = last_word {
384                    last_word = Some(last.concat(w));
385                } else {
386                    last_word = Some(w.clone());
387                }
388            } else {
389                if let Some(last) = last_word {
390                    collapsed.push(self.to_re(last));
391                    last_word = None;
392                }
393                collapsed.push(r);
394            }
395        }
396        if let Some(last) = last_word {
397            collapsed.push(self.to_re(last));
398        }
399
400        if collapsed.is_empty() {
401            self.epsilon()
402        } else if collapsed.len() == 1 {
403            collapsed[0].clone()
404        } else {
405            self.intern(ReOp::Concat(collapsed))
406        }
407    }
408
409    /// Constructor for `re.union`.
410    /// Returns a regular expression denoting the union of the given regular expressions.
411    ///
412    /// # Example
413    /// ```
414    /// use smt_str::re::*;
415    /// use smallvec::smallvec;
416    ///
417    /// let mut builder = ReBuilder::default();
418    /// let r1 = builder.to_re("ac".into());
419    /// let r2 = builder.to_re("ab".into());
420    /// let r = builder.union(smallvec![r1, r2]);
421    /// assert!(r.accepts(&"ac".into()));
422    /// assert!(r.accepts(&"ab".into()));
423    /// assert!(!r.accepts(&"cb".into()));
424    /// ```
425    pub fn union(&mut self, rs: SmallVec<[Regex; 2]>) -> Regex {
426        if self.optimize {
427            self.union_opt(rs)
428        } else {
429            self.intern(ReOp::Union(rs))
430        }
431    }
432
433    fn union_opt(&mut self, rs: SmallVec<[Regex; 2]>) -> Regex {
434        // Filter out empty terms as they are the identity element of union
435
436        // Filter out empty terms as they are the identity element of union
437        // Collect the remaining terms into a set to deduplicate and make the order deterministic
438        #[allow(clippy::mutable_key_type)]
439        let rs: BTreeSet<Regex> = rs
440            .into_iter()
441            .filter(|i| i.none() != Some(true))
442            .flat_map(|i| {
443                if let ReOp::Union(rs) = &i.op() {
444                    rs.clone()
445                } else {
446                    smallvec![i]
447                }
448            })
449            .collect();
450
451        let rs: SmallVec<[Rc<ReNode>; 2]> = rs.into_iter().collect();
452
453        if rs.is_empty() {
454            self.none()
455        } else if rs.len() == 1 {
456            rs[0].clone()
457        } else {
458            self.intern(ReOp::Union(rs))
459        }
460    }
461
462    /// Constructor for `re.inter`.
463    /// Returns a regular expression denoting the intersection of the given regular expressions.
464    ///
465    /// # Example
466    /// ```
467    /// use smt_str::re::*;
468    /// use smallvec::smallvec;
469    ///
470    /// let mut builder = ReBuilder::default();
471    /// let r1 = builder.range_from_to('a', 'z');
472    /// let r2 = builder.range_from_to('o', 'x');
473    /// let r = builder.inter(smallvec![r1, r2]);
474    /// assert!(r.accepts(&"o".into()));
475    /// assert!(r.accepts(&"q".into()));
476    /// assert!(r.accepts(&"x".into()));
477    /// assert!(!r.accepts(&"z".into()));
478    /// assert!(!r.accepts(&"1".into()));
479    /// ```
480    pub fn inter(&mut self, rs: SmallVec<[Regex; 2]>) -> Regex {
481        if self.optimize {
482            self.inter_opt(rs)
483        } else {
484            self.intern(ReOp::Inter(rs))
485        }
486    }
487
488    fn inter_opt(&mut self, rs: SmallVec<[Regex; 2]>) -> Regex {
489        // Check if any of the children is the empty set, then the intersection is the empty set
490        if rs.iter().any(|i| i.none() == Some(true)) {
491            return self.none();
492        }
493
494        // Filter out re.all terms as they are the identity element of intersection
495        // As as side effect, this also flattens nested intersections, and sorts them to make the order deterministic
496        #[allow(clippy::mutable_key_type)]
497        let rs: BTreeSet<_> = rs
498            .into_iter()
499            .filter(|i| i.universal() != Some(true))
500            .flat_map(|i| {
501                if let ReOp::Inter(rs) = &i.op() {
502                    rs.clone()
503                } else {
504                    smallvec![i]
505                }
506            })
507            .collect();
508
509        let rs: SmallVec<[Rc<ReNode>; 2]> = rs.into_iter().collect();
510        if rs.is_empty() {
511            self.all()
512        } else if rs.len() == 1 {
513            rs[0].clone()
514        } else {
515            self.intern(ReOp::Inter(rs))
516        }
517    }
518
519    /// Constructor for `re.*`.
520    /// Returns a regular expression denoting the Kleene star of the given regular expression.
521    ///
522    /// # Example
523    /// ```
524    /// use smt_str::re::*;
525    ///
526    /// let mut builder = ReBuilder::default();
527    /// let r = builder.to_re("a".into());
528    /// let s = builder.star(r);
529    /// assert!(s.accepts(&"".into()));
530    /// assert!(s.accepts(&"a".into()));
531    /// assert!(s.accepts(&"aaaa".into()));
532    /// assert!(s.accepts(&"aaaaaaaa".into()));
533    /// assert!(!s.accepts(&"b".into()));
534    /// ```
535    pub fn star(&mut self, r: Regex) -> Regex {
536        if self.optimize {
537            self.star_opt(r)
538        } else {
539            self.intern(ReOp::Star(r))
540        }
541    }
542
543    fn star_opt(&mut self, r: Regex) -> Regex {
544        if r.none().unwrap_or(false) || r.epsilon().unwrap_or(false) {
545            return self.epsilon();
546        }
547
548        // If r is e+ or e* or e? then return e
549        // Otherwise, return r
550        fn stip_closures(r: Regex) -> Regex {
551            match r.op() {
552                ReOp::Star(inner) | ReOp::Plus(inner) | ReOp::Opt(inner) => inner.clone(),
553                _ => r,
554            }
555        }
556
557        match r.op() {
558            ReOp::Union(rs) => {
559                // Strip closures from each branch
560                let rs = rs.iter().map(|r| stip_closures(r.clone())).collect();
561                let u = self.union(rs);
562                self.intern(ReOp::Star(u))
563            }
564            ReOp::Star(_) => {
565                // Flatten nested stars
566                r.clone()
567            }
568            ReOp::Plus(rr) | ReOp::Opt(rr) => {
569                // Flatten (R+)* = R*
570                // Flatten (R?)* = R*
571                self.star(rr.clone())
572            }
573            _ => self.intern(ReOp::Star(r)),
574        }
575    }
576
577    /// Constructor for `re.+`.
578    /// Returns a regular expression denoting the positive closure of the given regular expression.
579    ///
580    /// # Example
581    /// ```
582    /// use smt_str::re::*;
583    ///
584    /// let mut builder = ReBuilder::default();
585    /// let r = builder.to_re("a".into());
586    /// let p = builder.plus(r);
587    /// assert!(!p.accepts(&"".into()));
588    /// assert!(p.accepts(&"a".into()));
589    /// assert!(p.accepts(&"aaaa".into()));
590    /// assert!(p.accepts(&"aaaaaaaa".into()));
591    /// assert!(!p.accepts(&"b".into()));
592    /// ```
593    pub fn plus(&mut self, r: Regex) -> Regex {
594        if self.optimize {
595            self.plus_opt(r)
596        } else {
597            self.intern(ReOp::Plus(r))
598        }
599    }
600
601    fn plus_opt(&mut self, r: Regex) -> Regex {
602        // (∅)+ = ∅
603        if r.none().unwrap_or(false) {
604            return self.none();
605        }
606
607        // If r is e+ return e
608        // Otherwise, return r
609        fn stip_plus(r: Regex) -> Regex {
610            match r.op() {
611                ReOp::Plus(inner) => inner.clone(),
612                _ => r,
613            }
614        }
615
616        // ε+ = ε
617        if r.epsilon().unwrap_or(false) {
618            return self.epsilon();
619        }
620
621        if r.nullable() {
622            // (R)+ = R* with R nullable
623            if let ReOp::Star(inner) = r.op() {
624                return self.star(inner.clone());
625            }
626        }
627
628        match r.op() {
629            // (R+)+ → R+
630            ReOp::Plus(_) => r.clone(),
631
632            // (R*)+ → R*
633            ReOp::Star(inner) => self.star(inner.clone()),
634
635            // (⋃ R_i+)+ → (⋃ R_i)+ → ⋃ R_i+
636            ReOp::Union(rs) => {
637                let rs = rs.iter().map(|r| stip_plus(r.clone())).collect();
638                let u = self.union(rs);
639                self.intern(ReOp::Plus(u))
640            }
641
642            _ => self.intern(ReOp::Plus(r)),
643        }
644    }
645
646    /// Constructor for `re.comp`.
647    /// Returns a regular expression denoting the complement of the given regular expression.
648    ///
649    /// # Example
650    /// ```
651    /// use smt_str::re::*;
652    ///
653    /// let mut builder = ReBuilder::default();
654    /// let r = builder.to_re("a".into());
655    /// let c = builder.comp(r);
656    /// assert!(!c.accepts(&"a".into()));
657    /// assert!(c.accepts(&"b".into()));
658    /// assert!(c.accepts(&"".into()));
659    /// assert!(c.accepts(&"aa".into()));
660    /// ```
661    pub fn comp(&mut self, r: Regex) -> Regex {
662        if self.optimize {
663            self.comp_opt(r)
664        } else {
665            self.intern(ReOp::Comp(r))
666        }
667    }
668
669    fn comp_opt(&mut self, r: Regex) -> Regex {
670        if r.none().unwrap_or(false) {
671            self.all()
672        } else if r.universal().unwrap_or(false) {
673            self.none()
674        } else {
675            self.intern(ReOp::Comp(r))
676        }
677    }
678
679    /// Constructor for `re.diff`.
680    /// Returns a regular expression denoting the difference of the given regular expressions.
681    ///
682    /// # Example
683    /// ```
684    /// use smt_str::re::*;
685    ///
686    /// let mut builder = ReBuilder::default();
687    /// let a = builder.to_re("a".into());
688    /// let s = builder.star(a.clone());
689    /// let eps = builder.epsilon();
690    /// let r = builder.diff(s, eps);
691    ///
692    /// assert!(r.accepts(&"a".into()));
693    /// assert!(r.accepts(&"aaaa".into()));
694    /// assert!(!r.accepts(&"".into()));
695    /// assert!(!r.accepts(&"b".into()));
696    /// ```
697    pub fn diff(&mut self, r1: Regex, r2: Regex) -> Regex {
698        if self.optimize {
699            self.diff_opt(r1, r2)
700        } else {
701            self.intern(ReOp::Diff(r1, r2))
702        }
703    }
704
705    fn diff_opt(&mut self, r1: Regex, r2: Regex) -> Regex {
706        if r1.none().unwrap_or(false) || r2.universal().unwrap_or(false) {
707            self.none()
708        } else if r2.none().unwrap_or(false) {
709            r1.clone()
710        } else {
711            match (r1.op(), r2.op()) {
712                (ReOp::Opt(r), _) if r2.epsilon().unwrap_or(false) => r.clone(),
713                (ReOp::Star(r), _) if r2.epsilon().unwrap_or(false) => self.plus(r.clone()),
714                _ => self.intern(ReOp::Diff(r1, r2)),
715            }
716        }
717    }
718
719    /// Constructor for `re.opt`.
720    /// Returns a regular expression denoting the optional version of the given regular expression.
721    ///
722    /// # Example
723    /// ```
724    /// use smt_str::re::*;
725    ///
726    /// let mut builder = ReBuilder::default();
727    /// let r = builder.to_re("a".into());
728    /// let o = builder.opt(r);
729    /// assert!(o.accepts(&"a".into()));
730    /// assert!(o.accepts(&"".into()));
731    /// assert!(!o.accepts(&"b".into()));
732    /// ```
733    pub fn opt(&mut self, r: Regex) -> Regex {
734        if self.optimize {
735            self.opt_opt(r)
736        } else {
737            self.intern(ReOp::Opt(r))
738        }
739    }
740
741    fn opt_opt(&mut self, r: Regex) -> Regex {
742        if r.none().unwrap_or(false) || r.epsilon().unwrap_or(false) {
743            self.epsilon()
744        } else {
745            self.intern(ReOp::Opt(r))
746        }
747    }
748
749    /// Constructor for `re.pow`.
750    /// Returns a regular expression denoting the `n`th power of the given regular expression.
751    ///
752    /// # Example
753    /// ```
754    /// use smt_str::re::*;
755    ///
756    /// let mut builder = ReBuilder::default();
757    /// let r = builder.to_re("a".into());
758    /// let p = builder.pow(r, 3);
759    /// assert!(p.accepts(&"aaa".into()));
760    /// assert!(!p.accepts(&"aaaa".into()));
761    /// assert!(!p.accepts(&"aa".into()));
762    /// ```
763    pub fn pow(&mut self, r: Regex, n: u32) -> Regex {
764        if self.optimize {
765            self.pow_opt(r, n)
766        } else {
767            self.intern(ReOp::Pow(r, n))
768        }
769    }
770
771    fn pow_opt(&mut self, r: Regex, n: u32) -> Regex {
772        if n == 0 {
773            self.epsilon()
774        } else if n == 1 {
775            r.clone()
776        } else {
777            self.intern(ReOp::Pow(r, n))
778        }
779    }
780
781    /// Constructor for `re.loop`.
782    /// Returns a regular expression denoting the loop of the given regular expression.
783    /// That is, the regular expression accepts any number of repetitions of the given regular expression between `l` and `u` times (inclusive).
784    ///
785    /// # Example
786    /// ```
787    /// use smt_str::re::*;
788    ///
789    /// let mut builder = ReBuilder::default();
790    /// let r = builder.to_re("a".into());
791    /// let l = 2;
792    /// let u = 4;
793    /// let looped = builder.loop_(r, l, u);
794    /// assert!(looped.accepts(&"aa".into()));
795    /// assert!(looped.accepts(&"aaa".into()));
796    /// assert!(looped.accepts(&"aaaa".into()));
797    /// assert!(!looped.accepts(&"aaaaa".into()));
798    /// assert!(!looped.accepts(&"a".into()));
799    /// ```
800    pub fn loop_(&mut self, r: Regex, l: u32, u: u32) -> Regex {
801        if self.optimize {
802            self.loop_opt(r, l, u)
803        } else {
804            self.intern(ReOp::Loop(r, l, u))
805        }
806    }
807
808    fn loop_opt(&mut self, r: Regex, l: u32, u: u32) -> Regex {
809        if l > u {
810            self.none()
811        } else if u == 0 {
812            self.epsilon()
813        } else if l == u {
814            self.pow(r, u)
815        } else {
816            self.intern(ReOp::Loop(r, l, u))
817        }
818    }
819
820    /// Unrolls a loop of the given regular expression.
821    /// The loop is unrolled as a concatenation of the given regular expression repeated `l` times followed by the optional regular expression repeated `u-l` times.
822    #[allow(dead_code)]
823    fn unroll_loop(&mut self, r: Regex, l: u32, u: u32) -> Regex {
824        let mut concats: SmallVec<[Rc<ReNode>; 2]> = SmallVec::with_capacity(u as usize);
825        let opt = self.opt(r.clone());
826        for i in 0..u {
827            if i < l {
828                concats.push(r.clone());
829            } else {
830                concats.push(opt.clone());
831            }
832        }
833        self.concat(concats)
834    }
835
836    /// Unrolls a power of the given regular expression.
837    /// The power is unrolled as a concatenation of the given regular expression repeated `n` times.
838    #[allow(dead_code)]
839    fn unroll_pow(&mut self, r: Regex, n: u32) -> Regex {
840        let mut concats = SmallVec::with_capacity(n as usize);
841        for _ in 0..n {
842            concats.push(r.clone());
843        }
844        self.concat(concats)
845    }
846
847    /* Aux methods */
848
849    /// Constructs a regular expression denoting the reverse of the given regular expression.
850    /// If a word w is accepted by the given regular expression, then the reverse of w is accepted by the returned regular expression.
851    pub fn reversed(&mut self, r: &Regex) -> Regex {
852        match r.op() {
853            ReOp::Literal(word) => self.to_re(word.reversed()),
854            ReOp::None | ReOp::All | ReOp::Any | ReOp::Range(_) => r.clone(),
855            ReOp::Concat(rs) => {
856                let rs = rs.iter().rev().map(|r| self.reversed(r)).collect();
857                self.concat(rs)
858            }
859            ReOp::Union(rs) => {
860                let rs = rs.iter().map(|r| self.reversed(r)).collect();
861                self.union(rs)
862            }
863            ReOp::Inter(rs) => {
864                let rs = rs.iter().map(|r| self.reversed(r)).collect();
865                self.inter(rs)
866            }
867            ReOp::Star(r) => {
868                let rev = self.reversed(r);
869                self.star(rev)
870            }
871            ReOp::Plus(r) => {
872                let rev = self.reversed(r);
873                self.plus(rev)
874            }
875            ReOp::Opt(r) => {
876                let rev = self.reversed(r);
877                self.opt(rev)
878            }
879            ReOp::Comp(r) => {
880                let rev = self.reversed(r);
881                self.comp(rev)
882            }
883            ReOp::Diff(r, r1) => {
884                let rev = self.reversed(r);
885                let rev1 = self.reversed(r1);
886                self.diff(rev, rev1)
887            }
888            ReOp::Pow(r, e) => {
889                let rev = self.reversed(r);
890                self.pow(rev, *e)
891            }
892            ReOp::Loop(r, l, u) => {
893                let rev = self.reversed(r);
894                self.loop_(rev, *l, *u)
895            }
896        }
897    }
898}
899
900/// Ensures that identical regex patterns are reused and provides construction methods for each regex op(), optimizing memory usage and construction efficiency.
901#[derive(Debug)]
902struct Registry {
903    /// Stores the unique instances of `Regex`.
904    /// The key is the regex pattern itself, and the value is a shared reference-counted handle to the pattern.
905    registry: HashMap<ReOp, Regex>,
906    /// The id to assign to the next regex.
907    next_id: usize,
908}
909
910impl Registry {
911    /// Creates a new `Interner` with an empty internal registry.
912    /// It starts without any regex patterns and will only populate its internal map when `intern` is called.
913    fn new() -> Self {
914        Registry {
915            registry: HashMap::new(),
916            next_id: 0,
917        }
918    }
919
920    /// Interns a regex pattern, ensuring each unique regex is stored and reused.
921    /// This method serves as the primary access point for adding new regex patterns to the builder.
922    ///
923    /// # Arguments
924    /// * `regex` - The `Regex` pattern to intern.
925    ///
926    /// # Returns
927    /// A [RegexRef] pointing to the stored or newly created regex instance.
928    fn intern(&mut self, op: ReOp) -> Regex {
929        if let Some(existing) = self.registry.get(&op) {
930            existing.clone()
931        } else {
932            let regex = ReNode::new(self.next_id, op.clone());
933            self.next_id += 1;
934            let re = Rc::new(regex.clone());
935            self.registry.insert(op, re.clone());
936            re
937        }
938    }
939}
940
941#[cfg(test)]
942mod test {
943    use test::build::ReBuilder;
944
945    use super::*;
946
947    #[test]
948    fn intern_word() {
949        let mut builder = ReBuilder::default();
950        let w1 = builder.to_re("abc".into());
951        let w2 = builder.to_re("abc".into());
952        assert!(Rc::ptr_eq(&w1, &w2));
953    }
954
955    #[test]
956    fn intern_epsilon() {
957        let builder = ReBuilder::default();
958        let e1 = builder.epsilon();
959        let e2 = builder.epsilon();
960        assert!(Rc::ptr_eq(&e1, &e2));
961    }
962
963    #[test]
964    fn intern_none() {
965        let builder = ReBuilder::default();
966        let n1 = builder.none();
967        let n2 = builder.none();
968        assert!(Rc::ptr_eq(&n1, &n2));
969    }
970
971    #[test]
972    fn intern_all() {
973        let builder = ReBuilder::default();
974        let a1 = builder.all();
975        let a2 = builder.all();
976        assert!(Rc::ptr_eq(&a1, &a2));
977    }
978
979    #[test]
980    fn intern_all_char() {
981        let builder = ReBuilder::default();
982        let ac1 = builder.allchar();
983        let ac2 = builder.allchar();
984        assert!(Rc::ptr_eq(&ac1, &ac2));
985    }
986
987    #[test]
988    fn intern_range() {
989        let mut builder = ReBuilder::default();
990        let r1 = builder.range_from_to('a', 'z');
991        let r2 = builder.range_from_to('a', 'z');
992        assert!(Rc::ptr_eq(&r1, &r2));
993    }
994
995    #[test]
996    fn intern_range_full() {
997        let mut builder = ReBuilder::default();
998        let r1 = builder.range_from_to('\0', SmtChar::MAX);
999        let r2 = builder.allchar();
1000        assert!(Rc::ptr_eq(&r1, &r2));
1001    }
1002
1003    #[test]
1004    fn intern_concat() {
1005        let mut builder = ReBuilder::default();
1006        let w1 = builder.to_re("abc".into());
1007        let w2 = builder.to_re("def".into());
1008        let c1 = builder.concat(smallvec![w1.clone(), w2.clone()]);
1009        let c2 = builder.concat(smallvec![w1.clone(), w2.clone()]);
1010        assert!(Rc::ptr_eq(&c1, &c2));
1011    }
1012
1013    #[test]
1014    fn intern_union() {
1015        let mut builder = ReBuilder::default();
1016        let w1 = builder.to_re("abc".into());
1017        let w2 = builder.to_re("def".into());
1018        let u1 = builder.union(smallvec![w1.clone(), w2.clone()]);
1019        let u2 = builder.union(smallvec![w1.clone(), w2.clone()]);
1020        assert!(Rc::ptr_eq(&u1, &u2));
1021    }
1022
1023    #[test]
1024    fn intern_inter() {
1025        let mut builder = ReBuilder::default();
1026        let w1 = builder.to_re("abc".into());
1027        let w2 = builder.to_re("def".into());
1028        let args = smallvec![w1.clone(), w2.clone()];
1029        let i1 = builder.inter(args.clone());
1030        let i2 = builder.inter(args.clone());
1031        assert!(Rc::ptr_eq(&i1, &i2));
1032    }
1033
1034    #[test]
1035    fn intern_star() {
1036        let mut builder = ReBuilder::default();
1037        let w1 = builder.to_re("abc".into());
1038        let s1 = builder.star(w1.clone());
1039        let s2 = builder.star(w1.clone());
1040        assert!(Rc::ptr_eq(&s1, &s2));
1041    }
1042
1043    #[test]
1044    fn intern_plus() {
1045        let mut builder = ReBuilder::default();
1046        let w1 = builder.to_re("abc".into());
1047        let p1 = builder.plus(w1.clone());
1048        let p2 = builder.plus(w1.clone());
1049        assert!(Rc::ptr_eq(&p1, &p2));
1050    }
1051
1052    #[test]
1053    fn intern_opt() {
1054        let mut builder = ReBuilder::default();
1055        let w1 = builder.to_re("abc".into());
1056        let o1 = builder.opt(w1.clone());
1057        let o2 = builder.opt(w1.clone());
1058        assert!(Rc::ptr_eq(&o1, &o2));
1059    }
1060
1061    #[test]
1062    fn intern_comp() {
1063        let mut builder = ReBuilder::default();
1064        let w1 = builder.to_re("abc".into());
1065        let c1 = builder.comp(w1.clone());
1066        let c2 = builder.comp(w1.clone());
1067        assert!(Rc::ptr_eq(&c1, &c2));
1068    }
1069
1070    #[test]
1071    fn builder_equal() {
1072        let mut builder = ReBuilder::default();
1073        let w1 = builder.to_re("abc".into());
1074        let w2 = builder.to_re("abc".into());
1075        assert_eq!(w1, w2);
1076        assert!(Rc::ptr_eq(&w1, &w2));
1077
1078        let mut builder2 = ReBuilder::default();
1079        let w3 = builder2.to_re("abc".into());
1080
1081        assert_eq!(w1, w3);
1082        assert!(!Rc::ptr_eq(&w1, &w3));
1083    }
1084
1085    #[test]
1086    fn universal_concat() {
1087        let mut builder = ReBuilder::default();
1088        let a = builder.all();
1089        let c = builder.concat(smallvec![a.clone(), a.clone()]);
1090        assert_eq!(c.universal(), Some(true));
1091    }
1092
1093    #[test]
1094    fn non_universal_concat() {
1095        let mut builder = ReBuilder::default();
1096        let a = builder.all();
1097        let b = builder.allchar();
1098        let c = builder.concat(smallvec![a.clone(), b.clone()]);
1099        assert_eq!(c.universal(), Some(false));
1100    }
1101
1102    #[test]
1103    fn universal_union() {
1104        let mut builder = ReBuilder::default();
1105        let all = builder.all();
1106        let none = builder.none();
1107        let c = builder.union(smallvec![all, none]);
1108        assert_eq!(c.universal(), Some(true));
1109    }
1110
1111    #[test]
1112    fn non_universal_union() {
1113        let mut builder = ReBuilder::default();
1114        let b = builder.allchar();
1115        let c = builder.concat(smallvec![b.clone(), b.clone()]);
1116        assert_eq!(c.universal(), Some(false));
1117    }
1118
1119    #[test]
1120    fn test_reversed_constant() {
1121        let mut rb = ReBuilder::default();
1122        let regex = rb.to_re("abc".into());
1123        let reversed = rb.reversed(&regex);
1124        assert_eq!(reversed.to_string(), rb.to_re("cba".into()).to_string());
1125    }
1126
1127    #[test]
1128    fn test_reversed_concat() {
1129        let mut rb = ReBuilder::default();
1130        let regex1 = rb.to_re("abc".into());
1131        let regex2 = rb.to_re("def".into());
1132        let concat = rb.concat(smallvec![regex1.clone(), regex2.clone()]);
1133        let reversed = rb.reversed(&concat);
1134        let args = smallvec![rb.reversed(&regex2), rb.reversed(&regex1)];
1135        let expected = rb.concat(args);
1136        assert_eq!(reversed.to_string(), expected.to_string());
1137    }
1138
1139    #[test]
1140    fn test_reversed_concat_2() {
1141        let mut rb = ReBuilder::default();
1142        let a = rb.to_re("a".into());
1143        let astar = rb.star(a);
1144
1145        let b = rb.to_re("b".into());
1146        let bstar = rb.star(b);
1147
1148        // a*b*
1149        let concat = rb.concat(smallvec![astar.clone(), bstar.clone()]);
1150        // Should be b*a*
1151        let reversed = rb.reversed(&concat);
1152
1153        // b*a*
1154        let args = smallvec![bstar, astar];
1155        let expected = rb.concat(args);
1156        assert_eq!(
1157            reversed, expected,
1158            "Expected: {}, Got: {}",
1159            expected, reversed
1160        );
1161    }
1162
1163    #[test]
1164    fn test_reversed_union() {
1165        let mut rb = ReBuilder::default();
1166        let regex1 = rb.to_re("abc".into());
1167        let regex2 = rb.to_re("xyz".into());
1168        let union = rb.union(smallvec![regex1.clone(), regex2.clone()]);
1169        let reversed = rb.reversed(&union);
1170        let args = smallvec![rb.reversed(&regex1), rb.reversed(&regex2)];
1171        let expected = rb.union(args);
1172        assert_eq!(reversed.to_string(), expected.to_string());
1173    }
1174
1175    #[test]
1176    fn test_reversed_star() {
1177        let mut rb = ReBuilder::default();
1178        let regex = rb.to_re("abc".into());
1179        let star = rb.star(regex.clone());
1180        let reversed = rb.reversed(&star);
1181        let revd = rb.reversed(&regex);
1182        let expected = rb.star(revd);
1183        assert_eq!(reversed.to_string(), expected.to_string());
1184    }
1185
1186    #[test]
1187    fn test_reversed_plus() {
1188        let mut rb = ReBuilder::default();
1189        let regex = rb.to_re("abc".into());
1190        let plus = rb.plus(regex.clone());
1191        let reversed = rb.reversed(&plus);
1192
1193        let rev = rb.reversed(&regex);
1194        let expected = rb.plus(rev);
1195        assert_eq!(reversed.to_string(), expected.to_string());
1196    }
1197
1198    #[test]
1199    fn test_reversed_opt() {
1200        let mut rb = ReBuilder::default();
1201        let regex = rb.to_re("abc".into());
1202        let opt = rb.opt(regex.clone());
1203        let reversed = rb.reversed(&opt);
1204
1205        let rev = rb.reversed(&regex);
1206        let expected = rb.opt(rev);
1207        assert_eq!(reversed.to_string(), expected.to_string());
1208    }
1209
1210    #[test]
1211    fn test_reversed_inter() {
1212        let mut rb = ReBuilder::default();
1213        let regex1 = rb.to_re("abc".into());
1214        let regex2 = rb.to_re("xyz".into());
1215        let inter = rb.inter(smallvec![regex1.clone(), regex2.clone()]);
1216        let reversed = rb.reversed(&inter);
1217
1218        let args = smallvec![rb.reversed(&regex1), rb.reversed(&regex2)];
1219        let expected = rb.inter(args);
1220        assert_eq!(reversed.to_string(), expected.to_string());
1221    }
1222
1223    #[test]
1224    fn test_reversed_diff() {
1225        let mut rb = ReBuilder::default();
1226        let regex1 = rb.to_re("abc".into());
1227        let regex2 = rb.to_re("xyz".into());
1228        let diff = rb.diff(regex1.clone(), regex2.clone());
1229        let reversed = rb.reversed(&diff);
1230
1231        let l = rb.reversed(&regex1);
1232        let r = rb.reversed(&regex2);
1233        let expected = rb.diff(l, r);
1234        assert_eq!(reversed.to_string(), expected.to_string());
1235    }
1236
1237    #[test]
1238    fn test_reversed_comp() {
1239        let mut rb = ReBuilder::default();
1240        let regex = rb.to_re("abc".into());
1241        let comp = rb.comp(regex.clone());
1242        let reversed = rb.reversed(&comp);
1243
1244        let rev = rb.reversed(&regex);
1245        let expected = rb.comp(rev);
1246        assert_eq!(reversed.to_string(), expected.to_string());
1247    }
1248
1249    #[test]
1250    fn test_reversed_pow() {
1251        let mut rb = ReBuilder::default();
1252        let regex = rb.to_re("abc".into());
1253        let pow = rb.pow(regex.clone(), 3);
1254        let reversed = rb.reversed(&pow);
1255
1256        let rev = rb.reversed(&regex);
1257        let expected = rb.pow(rev, 3);
1258        assert_eq!(reversed.to_string(), expected.to_string());
1259    }
1260
1261    #[test]
1262    fn test_reversed_loop() {
1263        let mut rb = ReBuilder::default();
1264        let regex = rb.to_re("abc".into());
1265        let looped = rb.loop_(regex.clone(), 2, 5);
1266        let reversed = rb.reversed(&looped);
1267
1268        let rev = rb.reversed(&regex);
1269        let expected = rb.loop_(rev, 2, 5);
1270        assert_eq!(reversed.to_string(), expected.to_string());
1271    }
1272
1273    #[test]
1274    fn test_unroll_loop_zero() {
1275        let mut rb = ReBuilder::default();
1276        let regex = rb.to_re("abc".into());
1277        let unrolled = rb.unroll_loop(regex.clone(), 0, 0);
1278        assert_eq!(unrolled.to_string(), rb.epsilon().to_string());
1279    }
1280
1281    #[test]
1282    fn test_unroll_loop_single() {
1283        let mut rb = ReBuilder::default();
1284        let regex = rb.to_re("abc".into());
1285        let unrolled = rb.unroll_loop(regex.clone(), 1, 1);
1286        assert_eq!(unrolled.to_string(), regex.to_string());
1287    }
1288
1289    #[test]
1290    fn test_unroll_loop_multiple() {
1291        let mut rb = ReBuilder::default();
1292        let regex = rb.to_re("abc".into());
1293        let unrolled = rb.unroll_loop(regex.clone(), 2, 4);
1294        let opt = rb.opt(regex.clone());
1295
1296        let expected = rb.concat(smallvec![
1297            regex.clone(),
1298            regex.clone(),
1299            opt.clone(),
1300            opt.clone()
1301        ]);
1302        assert_eq!(unrolled.to_string(), expected.to_string());
1303    }
1304
1305    #[test]
1306    fn test_unroll_loop_full() {
1307        let mut rb = ReBuilder::default();
1308        let regex = rb.to_re("abc".into());
1309        let unrolled = rb.unroll_loop(regex.clone(), 3, 3);
1310        let expected = rb.concat(smallvec![regex.clone(), regex.clone(), regex.clone()]);
1311        assert_eq!(unrolled.to_string(), expected.to_string());
1312    }
1313
1314    #[test]
1315    fn test_unroll_loop_opt() {
1316        let mut rb = ReBuilder::default();
1317        let regex = rb.to_re("abc".into());
1318        let unrolled = rb.unroll_loop(regex.clone(), 0, 2);
1319        let opt = rb.opt(regex.clone());
1320        let expected = rb.concat(smallvec![opt.clone(), opt.clone()]);
1321        assert_eq!(unrolled.to_string(), expected.to_string());
1322    }
1323
1324    #[test]
1325    fn test_unroll_pow_zero() {
1326        let mut builder = ReBuilder::default();
1327        let r = builder.to_re("a".into());
1328        let result = builder.unroll_pow(r, 0);
1329        assert_eq!(builder.epsilon(), result);
1330    }
1331
1332    #[test]
1333    fn test_unroll_pow_one() {
1334        let mut builder = ReBuilder::default();
1335        let r = builder.to_re("a".into());
1336        let result = builder.unroll_pow(r.clone(), 1);
1337        assert_eq!(r, result);
1338    }
1339
1340    #[test]
1341    fn test_unroll_pow_multiple() {
1342        let mut builder = ReBuilder::default();
1343        let r = builder.to_re("a".into());
1344        let result = builder.unroll_pow(r.clone(), 3);
1345        let expected = builder.concat(smallvec![r.clone(), r.clone(), r.clone()]);
1346        assert_eq!(expected, result);
1347    }
1348}