1#![warn(missing_docs)]
28#![allow(clippy::similar_names)]
29
30#[cfg(feature = "glob")]
31pub mod glob;
33
34use regex_syntax::hir::{Class, ClassBytes, ClassUnicode, Hir, HirKind, Literal, RepetitionKind};
35use regex_syntax::ParserBuilder;
36use std::borrow::BorrowMut;
37use thiserror::Error;
38
39#[derive(Debug, Error)]
41pub enum IntersectError {
42 #[error("error while parsing expression: {0}")]
44 Parse(#[from] regex_syntax::Error),
45
46 #[error("error: {0}")]
48 Error(String),
49}
50
51fn maybe_trim<'a>(
52 x: &'a Hir,
53 y: &'a Hir,
54 xs: &'a [Hir],
55 ys: &'a [Hir],
56 reversed: bool,
57) -> Option<(&'a [Hir], &'a [Hir])> {
58 if x.eq(y) {
59 trim_slice(xs, ys, reversed)
60 } else {
61 None
62 }
63}
64fn trim_slice<'a>(
65 left: &'a [Hir],
66 right: &'a [Hir],
67 reversed: bool,
68) -> Option<(&'a [Hir], &'a [Hir])> {
69 match (left, right) {
70 ([xs @ .., x], [ys @ .., y]) | ([x, xs @ ..], [y, ys @ ..])
71 if reversed && x.is_literal() && y.is_literal() =>
72 {
73 maybe_trim(x, y, xs, ys, reversed)
74 }
75 ([x, xs @ ..], [y, ys @ ..]) if !reversed && x.is_literal() && y.is_literal() => {
76 maybe_trim(x, y, xs, ys, reversed)
77 }
78 _ => Some((left, right)),
79 }
80}
81
82fn trim<'a>(left: &'a Hir, right: &'a Hir, reversed: bool) -> Option<(Hir, Hir)> {
83 match (left.kind(), right.kind()) {
84 (HirKind::Concat(left), HirKind::Concat(right)) => {
85 trim_slice(left.as_slice(), right.as_slice(), reversed)
86 .map(|(left, right)| (Hir::concat(left.to_vec()), Hir::concat(right.to_vec())))
87 }
88 _ => Some((left.clone(), right.clone())),
89 }
90}
91
92#[cfg_attr(feature="tracing", tracing::instrument(skip_all, fields(
93 left = format!("{}", left.iter().map(|f| format!("{}", f)).collect::<Vec<_>>().join(", ")),
94 right = format!("{:?}", right.iter().map(|f| format!("{}", f)).collect::<Vec<_>>().join(", ")))))]
95fn concats(left: &[Hir], right: &[Hir]) -> bool {
96 match (left, right) {
97 (&[], &[]) => true,
99 (&[], [x, ..]) | ([x, ..], &[]) if matches!(x.kind(), HirKind::Repetition(r) if r.kind == RepetitionKind::ZeroOrMore || r.kind == RepetitionKind::ZeroOrOne) => {
101 true
102 }
103 ([x], [rep, ..]) | ([rep, ..], [x]) if matches!(rep.kind(), HirKind::Repetition(r) if r.kind == RepetitionKind::OneOrMore) => {
105 exp(rep, x)
106 }
107 (xall @ [x, xs @ ..], yall @ [y, ys @ ..]) => {
109 match (x.kind(), y.kind()) {
110 (HirKind::Repetition(_), _) => exp(x, y) && concats(xall, ys),
112 (_, HirKind::Repetition(_)) => exp(x, y) && concats(xs, yall),
113 (_, _) => exp(x, y) && concats(xs, ys),
114 }
115 }
116 _ => false,
117 }
118}
119
120#[cfg_attr(feature = "tracing", tracing::instrument)]
121fn unicode_range(left: &ClassUnicode, right: &ClassUnicode) -> bool {
122 let mut rl = left.clone();
123 rl.borrow_mut().intersect(right);
124 rl.iter().count() > 0
125}
126
127#[cfg_attr(feature = "tracing", tracing::instrument)]
128fn bytes_range(left: &ClassBytes, right: &ClassBytes) -> bool {
129 let mut rl = left.clone();
130 rl.borrow_mut().intersect(right);
131 rl.iter().count() > 0
132}
133
134#[cfg_attr(feature = "tracing", tracing::instrument)]
135fn literal(left: &Literal, right: &Literal) -> bool {
136 left.eq(right)
137}
138
139#[cfg_attr(feature="tracing", tracing::instrument(skip_all, fields(left = format!("{}", left), right = format!("{}", right))))]
140fn exp(left: &Hir, right: &Hir) -> bool {
141 match (left.kind(), right.kind()) {
142 (HirKind::Concat(xs), HirKind::Concat(ys)) => concats(xs, ys),
143 (HirKind::Concat(xs), _) => concats(xs, &[right.clone()]),
144 (_, HirKind::Concat(ys)) => concats(&[left.clone()], ys),
145 (HirKind::Class(Class::Unicode(left)), HirKind::Class(Class::Unicode(right))) => {
146 unicode_range(left, right)
147 }
148 (HirKind::Class(Class::Bytes(left)), HirKind::Class(Class::Bytes(right))) => {
149 bytes_range(left, right)
150 }
151 (HirKind::Class(Class::Unicode(cls)), HirKind::Literal(Literal::Unicode(lit)))
152 | (HirKind::Literal(Literal::Unicode(lit)), HirKind::Class(Class::Unicode(cls))) => {
153 cls.iter().any(|c| *lit >= c.start() && *lit <= c.end())
154 }
155 (HirKind::Class(Class::Bytes(cls)), HirKind::Literal(Literal::Unicode(lit)))
156 | (HirKind::Literal(Literal::Unicode(lit)), HirKind::Class(Class::Bytes(cls))) => {
157 cls.iter().any(|c| {
158 let s = lit.to_string();
159 let litb = s.as_bytes();
160 litb.len() == 1 && litb[0] >= c.start() && litb[0] <= c.end()
161 })
162 }
163 (HirKind::Literal(left), HirKind::Literal(right)) => literal(left, right),
164 (HirKind::Empty, HirKind::Repetition(rep)) | (HirKind::Repetition(rep), HirKind::Empty)
165 if rep.kind == RepetitionKind::ZeroOrMore || rep.kind == RepetitionKind::ZeroOrOne =>
166 {
167 true
168 }
169 (HirKind::Repetition(left), HirKind::Repetition(right)) => exp(&left.hir, &right.hir),
170 (HirKind::Repetition(left), _) => exp(&left.hir, right),
171 (_, HirKind::Repetition(right)) => exp(left, &right.hir),
172
173 (HirKind::Empty, HirKind::Empty) => true,
175 _tup => {
176 #[cfg(feature = "tracing")]
177 tracing::warn!("not found: {:?}", _tup);
178
179 false
180 }
181 }
182}
183
184#[cfg_attr(feature = "tracing", tracing::instrument)]
185fn hir(exp: &str) -> Result<Hir, IntersectError> {
186 let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build();
187 Ok(parser.parse(exp)?)
188}
189
190#[cfg_attr(feature = "tracing", tracing::instrument)]
204pub fn non_empty(left: &str, right: &str) -> Result<bool, IntersectError> {
205 let trimmed =
206 trim(&hir(left)?, &hir(right)?, false).and_then(|(left, right)| trim(&left, &right, true));
207 Ok(trimmed.map_or(false, |(hl, hr)| exp(&hl, &hr)))
208}
209
210#[cfg(test)]
211mod tests {
212 use super::*;
213 use pretty_assertions::assert_eq;
214
215 const NON_EMPTY: &[(&str, &[&str])] = &[
216 ("abcd", &["abcd", "....", "[a-d]*"]),
217 ("pqrs", &[".qrs", "p.rs", "pq.s", "pqr."]),
218 (".*", &["asdklfj", "jasdfh", "asdhfajfh", "asdflkasdfjl"]),
219 ("d*", &["[abcd][abcd]", "d[a-z]+", ".....", "[d]*"]),
220 ("[a-p]+", &["[p-z]+", "apapapaapapapap", ".*", "abcdefgh*"]),
221 (
222 "abcd[a-c]z+",
223 &["abcd[b-d][yz]*", "abcdazzzz", "abcdbzzz", "abcdcz"],
224 ),
225 (".*\\\\", &[".*", "asdfasdf\\\\"]),
226 (".a.a", &["b.b.", "c.c.", "d.d.", "e.e."]),
227 (
228 ".*.*.*.*.*.*.*.*.*.*.*.*.*.*.*",
229 &[".*.*.*.*.*.*.*.*.*.*.*"],
230 ),
231 ("foo.*bar", &["foobar", "fooalkdsjfbar"]),
232 ("[a-b]+c", &["[b-c]+"]),
233 (".xyz.", &[".*pqr.*"]), ];
235 const EMPTY: &[(&str, &[&str])] = &[
236 ("abcd", &["lsdfhda", "abcdla", "asdlfk", "ksdfj"]),
237 ("[a-d]+", &["xyz", "p+", "[e-f]+"]),
238 ("[0-9]*", &["[a-z]", ".\\*"]),
239 ("ab+", &["a", "b", "abc"]),
240 ("mamama.*", &["dadada.*", "nanana.*"]),
241 (".*mamama", &[".*dadada", ".*nanana"]),
242 (".xyz.", &["paaap"]),
243 (".*.*.*.*f", &[".*.*.*.*g"]),
244 ];
245
246 const EXTRAS_NON_EMPTY: &[(&str, &[&str])] = &[
247 ("fabcd", &["f.*"]),
248 ("f.*abcd", &["fd.*"]),
249 ("f[a-n]*abcd", &["fd.*"]),
250 ];
251
252 const EXTRAS_EMPTY: &[(&str, &[&str])] = &[
253 ("fabcd", &["fd.*"]),
254 ("f.*abcd", &["fd.*z"]),
255 ("f[a-n]*abcd", &["fd.*z"]),
256 ];
257
258 #[test]
259 fn test_error() {
260 let err = format!("{}", non_empty("*", ".*g").unwrap_err());
261 assert_eq!(
262 r#"error while parsing expression: regex parse error:
263 *
264 ^
265error: repetition operator missing expression"#,
266 err
267 );
268 }
269
270 #[test]
271 fn test_one() {
272 assert!(!non_empty(".*f", ".*g").unwrap());
273 }
274
275 #[test]
276 fn test_non_empty() {
277 for (left, rights) in NON_EMPTY {
278 for right in rights.iter() {
279 assert!(non_empty(left, right).unwrap());
280 assert!(non_empty(right, left).unwrap());
281 }
282 }
283 }
284
285 #[test]
286 fn test_empty() {
287 for (left, rights) in EMPTY {
288 for right in rights.iter() {
289 assert!(!non_empty(left, right).unwrap());
290 assert!(!non_empty(right, left).unwrap());
291 }
292 }
293 }
294
295 #[test]
296 fn test_extras_non_empty() {
297 for (left, rights) in EXTRAS_NON_EMPTY {
298 for right in rights.iter() {
299 assert!(non_empty(left, right).unwrap());
300 assert!(non_empty(right, left).unwrap());
301 }
302 }
303 }
304 #[test]
305 fn test_extras_empty() {
306 for (left, rights) in EXTRAS_EMPTY {
307 for right in rights.iter() {
308 assert!(!non_empty(left, right).unwrap());
309 assert!(!non_empty(right, left).unwrap());
310 }
311 }
312 }
313}