#![warn(missing_docs)]
#![allow(clippy::similar_names)]
#[cfg(feature = "glob")]
pub mod glob;
use regex_syntax::hir::{Class, ClassBytes, ClassUnicode, Hir, HirKind, Literal, RepetitionKind};
use regex_syntax::ParserBuilder;
use std::borrow::BorrowMut;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum IntersectError {
#[error("error while parsing expression: {0}")]
Parse(#[from] regex_syntax::Error),
#[error("error: {0}")]
Error(String),
}
fn maybe_trim<'a>(
x: &'a Hir,
y: &'a Hir,
xs: &'a [Hir],
ys: &'a [Hir],
reversed: bool,
) -> Option<(&'a [Hir], &'a [Hir])> {
if x.eq(y) {
trim_slice(xs, ys, reversed)
} else {
None
}
}
fn trim_slice<'a>(
left: &'a [Hir],
right: &'a [Hir],
reversed: bool,
) -> Option<(&'a [Hir], &'a [Hir])> {
match (left, right) {
([xs @ .., x], [ys @ .., y]) | ([x, xs @ ..], [y, ys @ ..])
if reversed && x.is_literal() && y.is_literal() =>
{
maybe_trim(x, y, xs, ys, reversed)
}
([x, xs @ ..], [y, ys @ ..]) if !reversed && x.is_literal() && y.is_literal() => {
maybe_trim(x, y, xs, ys, reversed)
}
_ => Some((left, right)),
}
}
fn trim<'a>(left: &'a Hir, right: &'a Hir, reversed: bool) -> Option<(Hir, Hir)> {
match (left.kind(), right.kind()) {
(HirKind::Concat(left), HirKind::Concat(right)) => {
trim_slice(left.as_slice(), right.as_slice(), reversed)
.map(|(left, right)| (Hir::concat(left.to_vec()), Hir::concat(right.to_vec())))
}
_ => Some((left.clone(), right.clone())),
}
}
#[cfg_attr(feature="tracing", tracing::instrument(skip_all, fields(
left = format!("{}", left.iter().map(|f| format!("{}", f)).collect::<Vec<_>>().join(", ")),
right = format!("{:?}", right.iter().map(|f| format!("{}", f)).collect::<Vec<_>>().join(", ")))))]
fn concats(left: &[Hir], right: &[Hir]) -> bool {
match (left, right) {
(&[], &[]) => true,
(&[], [x, ..]) | ([x, ..], &[]) if matches!(x.kind(), HirKind::Repetition(r) if r.kind == RepetitionKind::ZeroOrMore || r.kind == RepetitionKind::ZeroOrOne) => {
true
}
([x], [rep, ..]) | ([rep, ..], [x]) if matches!(rep.kind(), HirKind::Repetition(r) if r.kind == RepetitionKind::OneOrMore) => {
exp(rep, x)
}
(xall @ [x, xs @ ..], yall @ [y, ys @ ..]) => {
match (x.kind(), y.kind()) {
(HirKind::Repetition(_), _) => exp(x, y) && concats(xall, ys),
(_, HirKind::Repetition(_)) => exp(x, y) && concats(xs, yall),
(_, _) => exp(x, y) && concats(xs, ys),
}
}
_ => false,
}
}
#[cfg_attr(feature = "tracing", tracing::instrument)]
fn unicode_range(left: &ClassUnicode, right: &ClassUnicode) -> bool {
let mut rl = left.clone();
rl.borrow_mut().intersect(right);
rl.iter().count() > 0
}
#[cfg_attr(feature = "tracing", tracing::instrument)]
fn bytes_range(left: &ClassBytes, right: &ClassBytes) -> bool {
let mut rl = left.clone();
rl.borrow_mut().intersect(right);
rl.iter().count() > 0
}
#[cfg_attr(feature = "tracing", tracing::instrument)]
fn literal(left: &Literal, right: &Literal) -> bool {
left.eq(right)
}
#[cfg_attr(feature="tracing", tracing::instrument(skip_all, fields(left = format!("{}", left), right = format!("{}", right))))]
fn exp(left: &Hir, right: &Hir) -> bool {
match (left.kind(), right.kind()) {
(HirKind::Concat(xs), HirKind::Concat(ys)) => concats(xs, ys),
(HirKind::Concat(xs), _) => concats(xs, &[right.clone()]),
(_, HirKind::Concat(ys)) => concats(&[left.clone()], ys),
(HirKind::Class(Class::Unicode(left)), HirKind::Class(Class::Unicode(right))) => {
unicode_range(left, right)
}
(HirKind::Class(Class::Bytes(left)), HirKind::Class(Class::Bytes(right))) => {
bytes_range(left, right)
}
(HirKind::Class(Class::Unicode(cls)), HirKind::Literal(Literal::Unicode(lit)))
| (HirKind::Literal(Literal::Unicode(lit)), HirKind::Class(Class::Unicode(cls))) => {
cls.iter().any(|c| *lit >= c.start() && *lit <= c.end())
}
(HirKind::Class(Class::Bytes(cls)), HirKind::Literal(Literal::Unicode(lit)))
| (HirKind::Literal(Literal::Unicode(lit)), HirKind::Class(Class::Bytes(cls))) => {
cls.iter().any(|c| {
let s = lit.to_string();
let litb = s.as_bytes();
litb.len() == 1 && litb[0] >= c.start() && litb[0] <= c.end()
})
}
(HirKind::Literal(left), HirKind::Literal(right)) => literal(left, right),
(HirKind::Empty, HirKind::Repetition(rep)) | (HirKind::Repetition(rep), HirKind::Empty)
if rep.kind == RepetitionKind::ZeroOrMore || rep.kind == RepetitionKind::ZeroOrOne =>
{
true
}
(HirKind::Repetition(left), HirKind::Repetition(right)) => exp(&left.hir, &right.hir),
(HirKind::Repetition(left), _) => exp(&left.hir, right),
(_, HirKind::Repetition(right)) => exp(left, &right.hir),
(HirKind::Empty, HirKind::Empty) => true,
_tup => {
#[cfg(feature = "tracing")]
tracing::warn!("not found: {:?}", _tup);
false
}
}
}
#[cfg_attr(feature = "tracing", tracing::instrument)]
fn hir(exp: &str) -> Result<Hir, IntersectError> {
let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build();
Ok(parser.parse(exp)?)
}
#[cfg_attr(feature = "tracing", tracing::instrument)]
pub fn non_empty(left: &str, right: &str) -> Result<bool, IntersectError> {
let trimmed =
trim(&hir(left)?, &hir(right)?, false).and_then(|(left, right)| trim(&left, &right, true));
Ok(trimmed.map_or(false, |(hl, hr)| exp(&hl, &hr)))
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
const NON_EMPTY: &[(&str, &[&str])] = &[
("abcd", &["abcd", "....", "[a-d]*"]),
("pqrs", &[".qrs", "p.rs", "pq.s", "pqr."]),
(".*", &["asdklfj", "jasdfh", "asdhfajfh", "asdflkasdfjl"]),
("d*", &["[abcd][abcd]", "d[a-z]+", ".....", "[d]*"]),
("[a-p]+", &["[p-z]+", "apapapaapapapap", ".*", "abcdefgh*"]),
(
"abcd[a-c]z+",
&["abcd[b-d][yz]*", "abcdazzzz", "abcdbzzz", "abcdcz"],
),
(".*\\\\", &[".*", "asdfasdf\\\\"]),
(".a.a", &["b.b.", "c.c.", "d.d.", "e.e."]),
(
".*.*.*.*.*.*.*.*.*.*.*.*.*.*.*",
&[".*.*.*.*.*.*.*.*.*.*.*"],
),
("foo.*bar", &["foobar", "fooalkdsjfbar"]),
("[a-b]+c", &["[b-c]+"]),
(".xyz.", &[".*pqr.*"]), ];
const EMPTY: &[(&str, &[&str])] = &[
("abcd", &["lsdfhda", "abcdla", "asdlfk", "ksdfj"]),
("[a-d]+", &["xyz", "p+", "[e-f]+"]),
("[0-9]*", &["[a-z]", ".\\*"]),
("ab+", &["a", "b", "abc"]),
("mamama.*", &["dadada.*", "nanana.*"]),
(".*mamama", &[".*dadada", ".*nanana"]),
(".xyz.", &["paaap"]),
(".*.*.*.*f", &[".*.*.*.*g"]),
];
const EXTRAS_NON_EMPTY: &[(&str, &[&str])] = &[
("fabcd", &["f.*"]),
("f.*abcd", &["fd.*"]),
("f[a-n]*abcd", &["fd.*"]),
];
const EXTRAS_EMPTY: &[(&str, &[&str])] = &[
("fabcd", &["fd.*"]),
("f.*abcd", &["fd.*z"]),
("f[a-n]*abcd", &["fd.*z"]),
];
#[test]
fn test_error() {
let err = format!("{}", non_empty("*", ".*g").unwrap_err());
assert_eq!(
r#"error while parsing expression: regex parse error:
*
^
error: repetition operator missing expression"#,
err
);
}
#[test]
fn test_one() {
assert!(!non_empty(".*f", ".*g").unwrap());
}
#[test]
fn test_non_empty() {
for (left, rights) in NON_EMPTY {
for right in rights.iter() {
assert!(non_empty(left, right).unwrap());
assert!(non_empty(right, left).unwrap());
}
}
}
#[test]
fn test_empty() {
for (left, rights) in EMPTY {
for right in rights.iter() {
assert!(!non_empty(left, right).unwrap());
assert!(!non_empty(right, left).unwrap());
}
}
}
#[test]
fn test_extras_non_empty() {
for (left, rights) in EXTRAS_NON_EMPTY {
for right in rights.iter() {
assert!(non_empty(left, right).unwrap());
assert!(non_empty(right, left).unwrap());
}
}
}
#[test]
fn test_extras_empty() {
for (left, rights) in EXTRAS_EMPTY {
for right in rights.iter() {
assert!(!non_empty(left, right).unwrap());
assert!(!non_empty(right, left).unwrap());
}
}
}
}