use crate::trigram::{self, Trigram};
use regex_syntax::ParserBuilder;
use regex_syntax::hir::literal::{ExtractKind, Extractor, Seq};
#[derive(Debug, Clone, Copy, Default)]
pub struct Options {
pub case_insensitive: bool,
pub multi_line: bool,
pub dot_matches_new_line: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Query {
All,
Tri(Trigram),
And(Vec<Query>),
Or(Vec<Query>),
}
impl Query {
pub fn for_pattern(pattern: &str, opts: Options) -> Query {
let hir = match ParserBuilder::new()
.case_insensitive(opts.case_insensitive)
.multi_line(opts.multi_line)
.dot_matches_new_line(opts.dot_matches_new_line)
.build()
.parse(pattern)
{
Ok(h) => h,
Err(_) => return Query::All, };
let prefix = seq_query(&Extractor::new().kind(ExtractKind::Prefix).extract(&hir));
let suffix = seq_query(&Extractor::new().kind(ExtractKind::Suffix).extract(&hir));
Query::and(vec![prefix, suffix])
}
pub fn is_fallback(&self) -> bool {
matches!(self, Query::All)
}
pub fn eval(&self, present: &impl Fn(Trigram) -> bool) -> bool {
match self {
Query::All => true,
Query::Tri(t) => present(*t),
Query::And(qs) => qs.iter().all(|q| q.eval(present)),
Query::Or(qs) => qs.iter().any(|q| q.eval(present)),
}
}
pub fn trigrams(&self, out: &mut Vec<Trigram>) {
match self {
Query::All => {}
Query::Tri(t) => out.push(*t),
Query::And(qs) | Query::Or(qs) => qs.iter().for_each(|q| q.trigrams(out)),
}
}
fn and(parts: Vec<Query>) -> Query {
let mut kept: Vec<Query> = parts.into_iter().filter(|q| !q.is_fallback()).collect();
match kept.len() {
0 => Query::All,
1 => kept.pop().unwrap(),
_ => Query::And(kept),
}
}
fn or(parts: Vec<Query>) -> Query {
if parts.is_empty() || parts.iter().any(Query::is_fallback) {
return Query::All;
}
if parts.len() == 1 {
return parts.into_iter().next().unwrap();
}
Query::Or(parts)
}
}
fn seq_query(seq: &Seq) -> Query {
let Some(lits) = seq.literals() else {
return Query::All;
};
if lits.is_empty() {
return Query::All;
}
let mut branches = Vec::with_capacity(lits.len());
for lit in lits {
let tris = trigram::of_literal(lit.as_bytes());
if tris.is_empty() {
return Query::All; }
let conj: Vec<Query> = tris.into_iter().map(Query::Tri).collect();
branches.push(Query::and(conj));
}
Query::or(branches)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::trigram;
fn q(p: &str) -> Query {
Query::for_pattern(p, Options::default())
}
#[test]
fn literal_requires_all_its_trigrams() {
let query = q("IndexWriter");
let mut tris = Vec::new();
query.trigrams(&mut tris);
assert!(!query.is_fallback());
assert!(tris.contains(b"Ind"));
assert!(tris.contains(b"ter"));
}
#[test]
fn short_and_wildcard_patterns_fall_back() {
assert!(q("ab").is_fallback()); assert!(q(".").is_fallback());
assert!(q("\\w+").is_fallback());
assert!(q(".*").is_fallback());
}
#[test]
fn alternation_with_an_unconstrained_branch_falls_back() {
assert!(q("foo|.|bar").is_fallback());
}
#[test]
fn alternation_of_literals_is_an_or() {
let query = q("alpha|bravo|gamma");
assert!(!query.is_fallback());
let set = trigram::distinct(b"xx alpha xx");
assert!(query.eval(&|t| set.contains(&t)));
let none = trigram::distinct(b"nothing here");
assert!(!query.eval(&|t| none.contains(&t)));
}
}