use std::collections::{HashSet, VecDeque};
use regex_automata::dfa::Automaton;
use regex_automata::dfa::dense;
use regex_automata::util::primitives::StateID;
use regex_automata::util::syntax;
use regex_automata::{Anchored, Input};
use snafu::Snafu;
#[derive(Debug, Clone, PartialEq, Eq, Snafu)]
pub enum GlobIntersectError {
#[snafu(display("could not build DFA for glob '{glob}': {message}"))]
BuildDfa {
glob: String,
message: String,
},
#[snafu(display("could not derive start state for glob '{glob}': {message}"))]
StartState {
glob: String,
message: String,
},
}
pub fn glob_intersect_non_empty(
lhs: &globset::Glob,
rhs: &globset::Glob,
) -> Result<bool, GlobIntersectError> {
let dfa_lhs = build_dfa(lhs)?;
let dfa_rhs = build_dfa(rhs)?;
bfs_product(&dfa_lhs, &dfa_rhs, lhs, rhs)
}
fn build_dfa(glob: &globset::Glob) -> Result<dense::DFA<Vec<u32>>, GlobIntersectError> {
dense::Builder::new()
.syntax(syntax::Config::new().utf8(false))
.build(glob.regex())
.map_err(|e| GlobIntersectError::BuildDfa {
glob: glob.glob().to_owned(),
message: format!("{e:?}"),
})
}
fn bfs_product(
lhs: &dense::DFA<Vec<u32>>,
rhs: &dense::DFA<Vec<u32>>,
lhs_glob: &globset::Glob,
rhs_glob: &globset::Glob,
) -> Result<bool, GlobIntersectError> {
let empty_input = Input::new(b"").anchored(Anchored::Yes);
let start_lhs =
lhs.start_state_forward(&empty_input)
.map_err(|e| GlobIntersectError::StartState {
glob: lhs_glob.glob().to_owned(),
message: e.to_string(),
})?;
let start_rhs =
rhs.start_state_forward(&empty_input)
.map_err(|e| GlobIntersectError::StartState {
glob: rhs_glob.glob().to_owned(),
message: e.to_string(),
})?;
let mut visited: HashSet<(StateID, StateID)> = HashSet::new();
let mut queue: VecDeque<(StateID, StateID)> = VecDeque::new();
visited.insert((start_lhs, start_rhs));
queue.push_back((start_lhs, start_rhs));
while let Some((cur_lhs, cur_rhs)) = queue.pop_front() {
if accepts_at_eoi(lhs, cur_lhs) && accepts_at_eoi(rhs, cur_rhs) {
return Ok(true);
}
if lhs.is_dead_state(cur_lhs) || rhs.is_dead_state(cur_rhs) {
continue;
}
for byte in 0u8..=255u8 {
let next_lhs = lhs.next_state(cur_lhs, byte);
let next_rhs = rhs.next_state(cur_rhs, byte);
if lhs.is_dead_state(next_lhs) || rhs.is_dead_state(next_rhs) {
continue;
}
let pair = (next_lhs, next_rhs);
if visited.insert(pair) {
queue.push_back(pair);
}
}
}
Ok(false)
}
fn accepts_at_eoi(dfa: &dense::DFA<Vec<u32>>, state: StateID) -> bool {
if dfa.is_match_state(state) {
return true;
}
let after_eoi = dfa.next_eoi_state(state);
dfa.is_match_state(after_eoi)
}
#[cfg(test)]
mod tests {
use super::*;
fn glob(pattern: &str) -> globset::Glob {
globset::GlobBuilder::new(pattern)
.literal_separator(true)
.case_insensitive(false)
.build()
.expect("test pattern must compile")
}
fn intersects(a: &str, b: &str) -> bool {
glob_intersect_non_empty(&glob(a), &glob(b)).expect("DFA build must succeed")
}
#[test]
fn qry_003_identical_globs_intersect() {
assert!(intersects("src/**/*.rs", "src/**/*.rs"));
}
#[test]
fn qry_003_overlapping_globs_intersect() {
assert!(intersects("src/**/*.rs", "src/lib/*.rs"));
}
#[test]
fn qry_003_overlapping_globs_intersect_other_direction() {
assert!(intersects("src/lib/*.rs", "src/**/*.rs"));
}
#[test]
fn qry_003_disjoint_glob_prefixes_do_not_intersect() {
assert!(!intersects("src/**/*.rs", "lib/**/*.rs"));
}
#[test]
fn qry_003_disjoint_extensions_do_not_intersect() {
assert!(!intersects("**/*.rs", "**/*.js"));
}
#[test]
fn qry_003_char_class_intersection_is_decidable() {
assert!(intersects("src/[abc].rs", "src/[bd].rs"));
}
#[test]
fn qry_003_disjoint_char_classes_do_not_intersect() {
assert!(!intersects("src/[ab].rs", "src/[cd].rs"));
}
#[test]
fn qry_003_question_mark_overlap_decidable() {
assert!(intersects("src/?.rs", "src/*.rs"));
}
#[test]
fn qry_003_double_star_absorbs_single_star_prefix() {
assert!(intersects("src/**/*.rs", "src/*/*.rs"));
}
#[test]
fn qry_003_double_star_does_not_overflow_into_disjoint_root() {
assert!(!intersects("src/**/*.rs", "lib/**/*.rs"));
}
#[test]
fn qry_003_alternation_overlap_decidable() {
assert!(intersects("src/{a,b}.rs", "src/{b,c}.rs"));
}
#[test]
fn qry_003_alternation_with_no_overlap_is_disjoint() {
assert!(!intersects("src/{a,b}.rs", "src/{c,d}.rs"));
}
#[test]
fn qry_003_workspace_absolute_vs_workspace_absolute() {
assert!(intersects("/lib_core/src/**/*.rs", "/lib_core/src/foo.rs",));
}
#[test]
fn qry_003_workspace_absolute_disjoint_roots() {
assert!(!intersects("/lib_core/**/*.rs", "/web/**/*.rs"));
}
#[test]
fn qry_003_literal_glob_vs_literal_glob_byte_equal() {
assert!(intersects("src/main.rs", "src/main.rs"));
}
#[test]
fn qry_003_literal_glob_vs_literal_glob_byte_inequal() {
assert!(!intersects("src/main.rs", "src/lib.rs"));
}
}