1use std::borrow::Borrow;
18use std::collections::HashSet;
19
20use itertools::Itertools;
21
22use crate::builder::Builder;
23use crate::builder::Regex;
24use crate::Alphabet;
25
26impl<B: Builder> Regex<B> {
27 pub fn derive_iter<I>(&self, symbols: impl IntoIterator<Item = I>) -> Regex<B>
29 where
30 I: Borrow<B::Symbol>,
31 {
32 let mut d = self.clone();
33 for symbol in symbols {
34 d = d.derive(symbol.borrow());
35 }
36 d
37 }
38
39 #[inline]
41 pub fn derive(&self, symbol: &B::Symbol) -> Regex<B> {
42 self.derive_symbols(&Symbols::include([symbol.clone()]))
43 }
44
45 pub(crate) fn derive_symbols(&self, symbol: &Symbols<B::Symbol>) -> Regex<B> {
46 match self {
47 Self::EmptySet => B::empty_set(),
48 Self::EmptyString => B::empty_set(),
49 Self::Symbol(inner) => {
50 if symbol.matches(inner) {
51 B::empty_string()
52 } else {
53 B::empty_set()
54 }
55 }
56 Self::Concat(left, right) => B::or(
57 B::concat(left.derive_symbols(symbol), *right.clone()),
58 B::concat(left.nullable(), right.derive_symbols(symbol)),
59 ),
60 Self::Closure(inner) => {
61 B::concat(inner.derive_symbols(symbol), B::closure(*inner.clone()))
62 }
63 Self::Or(left, right) => {
64 B::or(left.derive_symbols(symbol), right.derive_symbols(symbol))
65 }
66 Self::And(left, right) => {
67 B::and(left.derive_symbols(symbol), right.derive_symbols(symbol))
68 }
69 Self::Complement(inner) => B::complement(inner.derive_symbols(symbol)),
70 }
71 }
72
73 pub fn matches<I>(&self, symbols: impl IntoIterator<Item = I>) -> bool
75 where
76 I: Borrow<B::Symbol>,
77 {
78 self.derive_iter(symbols).is_nullable()
79 }
80}
81
82#[derive(Clone, Debug, Eq, PartialEq)]
83pub(crate) enum Symbols<S: Alphabet> {
84 Include(HashSet<S>),
86 Exclude(HashSet<S>),
88}
89
90impl<S: Alphabet> std::fmt::Display for Symbols<S>
91where
92 S: std::fmt::Display,
93{
94 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95 match self {
96 Symbols::Include(symbols) => write!(f, "{{{}}}", symbols.iter().join(", ")),
97 Symbols::Exclude(symbols) => write!(f, "Σ∖{{{}}}", symbols.iter().join(", ")),
98 }
99 }
100}
101
102impl<S: Alphabet> Symbols<S> {
103 #[inline]
104 pub(crate) fn include<const N: usize>(symbols: [S; N]) -> Self {
105 Self::Include(HashSet::from(symbols))
106 }
107
108 #[cfg(test)]
109 #[inline]
110 pub(crate) fn exclude<const N: usize>(symbols: [S; N]) -> Self {
111 Self::Exclude(HashSet::from(symbols))
112 }
113
114 pub(crate) fn matches(&self, symbol: &S) -> bool {
115 match self {
116 Self::Include(included) => included.contains(symbol),
117 Self::Exclude(excluded) => !excluded.contains(symbol),
118 }
119 }
120}
121
122impl<S: Alphabet> std::ops::BitOr for Symbols<S> {
123 type Output = Self;
124
125 fn bitor(self, other: Self) -> Self::Output {
126 match (self, other) {
128 (Self::Include(left), Self::Include(right)) => {
130 Self::Include(HashSet::union(&left, &right).cloned().collect())
131 }
132 (Self::Exclude(left), Self::Exclude(right)) => {
134 Self::Exclude(HashSet::intersection(&left, &right).cloned().collect())
135 }
136 (Self::Include(included), Self::Exclude(excluded))
138 | (Self::Exclude(excluded), Self::Include(included)) => {
139 Self::Exclude(excluded.difference(&included).cloned().collect())
140 }
141 }
142 }
143}
144
145impl<S: Alphabet> std::ops::BitAnd for Symbols<S> {
146 type Output = Self;
147
148 fn bitand(self, other: Self) -> Self::Output {
149 match (self, other) {
151 (Self::Include(left), Self::Include(right)) => {
153 Self::Include(HashSet::intersection(&left, &right).cloned().collect())
154 }
155 (Self::Exclude(left), Self::Exclude(right)) => {
157 Self::Exclude(HashSet::union(&left, &right).cloned().collect())
158 }
159 (Self::Include(included), Self::Exclude(excluded))
161 | (Self::Exclude(excluded), Self::Include(included)) => {
162 Self::Include(included.difference(&excluded).cloned().collect())
163 }
164 }
165 }
166}
167
168impl<S: Alphabet> std::ops::Not for Symbols<S> {
169 type Output = Self;
170
171 fn not(self) -> Self::Output {
172 match self {
173 Self::Include(symbols) => Self::Exclude(symbols),
174 Self::Exclude(symbols) => Self::Include(symbols),
175 }
176 }
177}
178
179#[cfg(test)]
180mod tests {
181 use crate::builder::ApproximatelySimilarCanonical;
182 use crate::builder::Pure;
183 use crate::ops::*;
184
185 use super::*;
186
187 #[test]
188 fn test_derive_symbols() {
189 let tests: Vec<(Regex<Pure<usize>>, Symbols<usize>, Regex<Pure<usize>>)> = vec![
190 (().r(), Symbols::include([42]), ().r()),
191 (().r(), Symbols::exclude([42]), ().r()),
192 ([].r(), Symbols::include([42]), ().r()),
193 ([].r(), Symbols::exclude([42]), ().r()),
194 ([42.s()].r(), Symbols::include([42]), [].r()),
195 ([42.s()].r(), Symbols::exclude([42]), ().r()),
196 (!().r(), Symbols::include([42]), !().r()),
197 (!().r(), Symbols::exclude([42]), !().r()),
198 (!42.s(), Symbols::include([42]), ![].r()),
199 (!42.s(), Symbols::exclude([42]), !().r()),
200 ];
201 for (r, symbols, expected) in tests {
202 let actual = r.derive_symbols(&symbols);
203 assert_eq!(
204 expected, actual,
205 "derive {} for {}, expected {}, got {}",
206 r, symbols, expected, actual
207 );
208 }
209 }
210
211 #[test]
212 fn test_matches_pure() {
213 test_matches::<Pure<_>>();
214 }
215
216 #[test]
217 fn test_matches_asc() {
218 test_matches::<ApproximatelySimilarCanonical<_>>();
219 }
220
221 fn test_matches<B: Builder<Symbol = usize> + Clone>() {
222 let tests: Vec<(Regex<B>, Vec<_>, bool)> = vec![
223 ((().r()), vec![], false),
224 (([].r()), vec![], true),
225 (42.s(), vec![42], true),
226 (42.s(), vec![42, 42], false),
227 (42.s(), vec![11], false),
228 ((![42.s()].r()), vec![], true),
229 (([42.s()].r()), vec![42], true),
230 (([42.s(), (11.s() | 7.s())].r()), vec![42], false),
231 (([42.s(), (11.s() | 7.s())].r()), vec![42, 11], true),
232 (([42.s(), (11.s() | 7.s())].r()), vec![42, 7], true),
233 ((42.s().c()), vec![], true),
234 ((42.s().c()), vec![42], true),
235 ((42.s().c()), vec![42, 42, 42], true),
236 ((42.s().c()), vec![42, 11], false),
237 ((42.s() & [].r()), vec![42], false),
238 ((42.s() & 42.s().c()), vec![42], true),
239 ((42.s() & 42.s().c()), vec![42, 42], false),
240 ((42.s() | 11.s()), vec![42], true),
241 ((42.s() | 11.s()), vec![11], true),
242 ((42.s() | 11.s()), vec![11, 42], false),
243 ((42.s() | 11.s().c()), vec![42], true),
244 ((42.s() | 11.s().c()), vec![11], true),
245 ((42.s() | 11.s().c()), vec![11, 11], true),
246 ((42.s() | 11.s().c()), vec![42, 11], false),
247 ((!().r()), vec![11], true),
248 ((!11.s()), vec![42], true),
249 ((!11.s()), vec![11], false),
250 ];
251 for test in tests {
252 assert_eq!(test.2, test.0.matches(test.1));
253 }
254 }
255}