brzozowski_regex/builder/
similarity.rs1use std::cmp::Ordering;
18use std::marker::PhantomData;
19
20use itertools::Itertools;
21
22use crate::builder::Builder;
23use crate::builder::Regex;
24use crate::Alphabet;
25
26#[derive(Clone, Debug, Hash, PartialEq, Eq)]
29pub struct ApproximatelySimilarCanonical<S: Alphabet> {
30 _phantom: PhantomData<S>,
31}
32
33impl<S: Alphabet> Builder for ApproximatelySimilarCanonical<S> {
34 type Symbol = S;
35
36 #[inline]
37 fn empty_set() -> Regex<Self> {
38 Regex::EmptySet
39 }
40
41 #[inline]
42 fn empty_string() -> Regex<Self> {
43 Regex::EmptyString
44 }
45
46 #[inline]
47 fn symbol(value: Self::Symbol) -> Regex<Self> {
48 Regex::Symbol(value)
49 }
50
51 fn closure(inner: Regex<Self>) -> Regex<Self> {
52 match inner {
53 Regex::EmptySet => Self::empty_string(),
55 Regex::EmptyString => Self::empty_string(),
57 Regex::Closure(inner) => Regex::Closure(inner),
59 inner => Regex::Closure(inner.into()),
61 }
62 }
63
64 fn concat(left: Regex<Self>, right: Regex<Self>) -> Regex<Self> {
65 match (left, right) {
66 (Regex::EmptySet, _) | (_, Regex::EmptySet) => Self::empty_set(),
68 (Regex::EmptyString, inner) | (inner, Regex::EmptyString) => inner,
70 (left, right) => right
73 .into_reverse_concat_iter()
74 .chain(left.into_reverse_concat_iter())
75 .collect_vec()
76 .into_iter()
77 .rev()
78 .reduce(|l, r| Regex::Concat(l.into(), r.into()))
79 .expect("at least two items"),
80 }
81 }
82
83 fn or(left: Regex<Self>, right: Regex<Self>) -> Regex<Self> {
84 match (left, right) {
85 (Regex::EmptySet, inner) | (inner, Regex::EmptySet) => inner,
87 (any, _) | (_, any) if any.is_empty_set_complement() => {
89 Self::complement(Self::empty_set())
90 }
91 (left, right) => right
96 .into_reverse_or_iter()
97 .chain(left.into_reverse_or_iter())
98 .sorted_by(cmp)
99 .dedup()
100 .reduce(|l, r| Regex::Or(l.into(), r.into()))
101 .expect("at least two items"),
102 }
103 }
104
105 fn and(left: Regex<Self>, right: Regex<Self>) -> Regex<Self> {
106 match (left, right) {
107 (Regex::EmptySet, _) | (_, Regex::EmptySet) => Self::empty_set(),
109 (any, inner) | (inner, any) if any.is_empty_set_complement() => inner,
111 (left, right) => right
116 .into_reverse_and_iter()
117 .chain(left.into_reverse_and_iter())
118 .sorted_by(cmp)
119 .dedup()
120 .reduce(|l, r| Regex::And(l.into(), r.into()))
121 .expect("at least two items"),
122 }
123 }
124
125 fn complement(inner: Regex<Self>) -> Regex<Self> {
126 match inner {
127 Regex::Complement(inner) => *inner,
129 inner => Regex::Complement(inner.into()),
131 }
132 }
133}
134
135impl<S: Alphabet> Regex<ApproximatelySimilarCanonical<S>> {
136 fn into_reverse_concat_iter(self) -> impl Iterator<Item = Self> {
138 ReverseIter(Some(self), |r| {
139 if let Regex::Concat(next, value) = r {
140 (*value, Some(*next))
141 } else {
142 (r, None)
143 }
144 })
145 }
146
147 fn into_reverse_or_iter(self) -> impl Iterator<Item = Self> {
149 ReverseIter(Some(self), |r| {
150 if let Regex::Or(next, value) = r {
151 (*value, Some(*next))
152 } else {
153 (r, None)
154 }
155 })
156 }
157
158 fn into_reverse_and_iter(self) -> impl Iterator<Item = Self> {
160 ReverseIter(Some(self), |r| {
161 if let Regex::And(next, value) = r {
162 (*value, Some(*next))
163 } else {
164 (r, None)
165 }
166 })
167 }
168
169 fn is_empty_set_complement(&self) -> bool {
171 if let Regex::Complement(inner) = self {
172 matches!(inner.as_ref(), Regex::EmptySet)
173 } else {
174 false
175 }
176 }
177}
178
179struct ReverseIter<S, F>(Option<Regex<ApproximatelySimilarCanonical<S>>>, F)
180where
181 S: Alphabet,
182 F: Fn(
183 Regex<ApproximatelySimilarCanonical<S>>,
184 ) -> (
185 Regex<ApproximatelySimilarCanonical<S>>,
186 Option<Regex<ApproximatelySimilarCanonical<S>>>,
187 );
188
189impl<S, F> Iterator for ReverseIter<S, F>
190where
191 S: Alphabet,
192 F: Fn(
193 Regex<ApproximatelySimilarCanonical<S>>,
194 ) -> (
195 Regex<ApproximatelySimilarCanonical<S>>,
196 Option<Regex<ApproximatelySimilarCanonical<S>>>,
197 ),
198{
199 type Item = Regex<ApproximatelySimilarCanonical<S>>;
200 fn next(&mut self) -> Option<Self::Item> {
201 let mut r = None;
202 std::mem::swap(&mut r, &mut self.0);
203 if let Some(r) = r {
204 let (value, next) = self.1(r);
205 self.0 = next;
206 Some(value)
207 } else {
208 None
209 }
210 }
211}
212
213fn cmp<B: Builder>(left: &Regex<B>, right: &Regex<B>) -> Ordering {
214 match (left, right) {
215 (Regex::Symbol(left_value), Regex::Symbol(right_value)) => left_value.cmp(right_value),
216 (Regex::Concat(left_left, left_right), Regex::Concat(right_left, right_right)) => {
217 cmp(left_left, right_left).then(cmp(left_right, right_right))
218 }
219 (Regex::Closure(left_inner), Regex::Closure(right_inner)) => cmp(left_inner, right_inner),
220 (Regex::Or(left_left, left_right), Regex::Or(right_left, right_right)) => {
221 cmp(left_left, right_left).then(cmp(left_right, right_right))
222 }
223 (Regex::And(left_left, left_right), Regex::And(right_left, right_right)) => {
224 cmp(left_left, right_left).then(cmp(left_right, right_right))
225 }
226 (Regex::Complement(left_inner), Regex::Complement(right_inner)) => {
227 cmp(left_inner, right_inner)
228 }
229 (left, right) => rank(left).cmp(&rank(right)),
230 }
231}
232
233fn rank<B: Builder>(re: &Regex<B>) -> usize {
234 match re {
235 Regex::EmptySet => 1,
236 Regex::EmptyString => 2,
237 Regex::Symbol(_) => 3,
238 Regex::Concat(_, _) => 4,
239 Regex::Closure(_) => 5,
240 Regex::Or(_, _) => 6,
241 Regex::And(_, _) => 7,
242 Regex::Complement(_) => 8,
243 }
244}
245
246#[cfg(test)]
247mod tests {
248 use crate::builder::Pure;
249 use crate::ops::*;
250
251 use super::*;
252
253 #[test]
254 fn test_canonical_forms() {
255 let tests: Vec<(
256 Regex<ApproximatelySimilarCanonical<usize>>,
257 Regex<Pure<usize>>,
258 )> = vec![
259 (().r(), ().r()),
260 (().r().c(), [].r()),
261 ([].r().c(), [].r()),
262 (11.s() & [42.s(), ().r()].r(), ().r()),
263 (42.s() & 11.s() & 17.s(), 11.s() & 17.s() & 42.s()),
264 (42.s() & !11.s() & 17.s(), 17.s() & 42.s() & !11.s()),
265 (42.s() | 11.s() | 17.s(), 11.s() | 17.s() | 42.s()),
266 (42.s() | !11.s() | 17.s(), 17.s() | 42.s() | !11.s()),
267 (!42.s() & !11.s(), !11.s() & !42.s()),
268 ];
269 for test in tests {
270 assert_eq!(test.1, test.0.rebuild());
271 }
272 }
273
274 #[test]
275 fn test_equivalent_forms() {
276 let tests: Vec<(
277 Regex<ApproximatelySimilarCanonical<usize>>,
278 Regex<ApproximatelySimilarCanonical<usize>>,
279 )> = vec![
280 (11.s() & 42.s() & 7.s(), 7.s() & 42.s() & 11.s()),
281 (11.s() & 7.s() & 42.s(), 42.s() & 7.s() & 11.s()),
282 (11.s() | 42.s() | 7.s(), 7.s() | 42.s() | 11.s()),
283 (11.s() | 7.s() | 42.s(), 42.s() | 7.s() | 11.s()),
284 (42.s().c().c(), 42.s().c()),
285 (11.s() | 11.s(), 11.s()),
286 (11.s() & !().r(), 11.s()),
287 (11.s() | !().r(), !().r()),
288 (11.s() & (42.s() & 7.s()), 7.s() & 11.s() & 42.s()),
289 (11.s() | (42.s() | 7.s()), 7.s() | 11.s() | 42.s()),
290 ];
291 for test in tests {
292 assert_eq!(test.1, test.0.rebuild());
293 }
294 }
295}