1use crate::trigram::{self, Trigram};
16use regex_syntax::ParserBuilder;
17use regex_syntax::hir::literal::{ExtractKind, Extractor, Seq};
18
19#[derive(Debug, Clone, Copy, Default)]
22pub struct Options {
23 pub case_insensitive: bool,
24 pub multi_line: bool,
25 pub dot_matches_new_line: bool,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum Query {
32 All,
33 Tri(Trigram),
34 And(Vec<Query>),
35 Or(Vec<Query>),
36}
37
38impl Query {
39 pub fn for_pattern(pattern: &str, opts: Options) -> Query {
42 let hir = match ParserBuilder::new()
43 .case_insensitive(opts.case_insensitive)
44 .multi_line(opts.multi_line)
45 .dot_matches_new_line(opts.dot_matches_new_line)
46 .build()
47 .parse(pattern)
48 {
49 Ok(h) => h,
50 Err(_) => return Query::All, };
52 let prefix = seq_query(&Extractor::new().kind(ExtractKind::Prefix).extract(&hir));
53 let suffix = seq_query(&Extractor::new().kind(ExtractKind::Suffix).extract(&hir));
54 Query::and(vec![prefix, suffix])
55 }
56
57 pub fn is_fallback(&self) -> bool {
59 matches!(self, Query::All)
60 }
61
62 pub fn eval(&self, present: &impl Fn(Trigram) -> bool) -> bool {
64 match self {
65 Query::All => true,
66 Query::Tri(t) => present(*t),
67 Query::And(qs) => qs.iter().all(|q| q.eval(present)),
68 Query::Or(qs) => qs.iter().any(|q| q.eval(present)),
69 }
70 }
71
72 pub fn trigrams(&self, out: &mut Vec<Trigram>) {
74 match self {
75 Query::All => {}
76 Query::Tri(t) => out.push(*t),
77 Query::And(qs) | Query::Or(qs) => qs.iter().for_each(|q| q.trigrams(out)),
78 }
79 }
80
81 fn and(parts: Vec<Query>) -> Query {
84 let mut kept: Vec<Query> = parts.into_iter().filter(|q| !q.is_fallback()).collect();
85 match kept.len() {
86 0 => Query::All,
87 1 => kept.pop().unwrap(),
88 _ => Query::And(kept),
89 }
90 }
91
92 fn or(parts: Vec<Query>) -> Query {
94 if parts.is_empty() || parts.iter().any(Query::is_fallback) {
95 return Query::All;
96 }
97 if parts.len() == 1 {
98 return parts.into_iter().next().unwrap();
99 }
100 Query::Or(parts)
101 }
102}
103
104fn seq_query(seq: &Seq) -> Query {
111 let Some(lits) = seq.literals() else {
112 return Query::All;
113 };
114 if lits.is_empty() {
115 return Query::All;
116 }
117 let mut branches = Vec::with_capacity(lits.len());
118 for lit in lits {
119 let tris = trigram::of_literal(lit.as_bytes());
120 if tris.is_empty() {
121 return Query::All; }
123 let conj: Vec<Query> = tris.into_iter().map(Query::Tri).collect();
124 branches.push(Query::and(conj));
125 }
126 Query::or(branches)
127}
128
129#[cfg(test)]
130mod tests {
131 use super::*;
132 use crate::trigram;
133
134 fn q(p: &str) -> Query {
135 Query::for_pattern(p, Options::default())
136 }
137
138 #[test]
139 fn literal_requires_all_its_trigrams() {
140 let query = q("IndexWriter");
141 let mut tris = Vec::new();
142 query.trigrams(&mut tris);
143 assert!(!query.is_fallback());
144 assert!(tris.contains(b"Ind"));
146 assert!(tris.contains(b"ter"));
147 }
148
149 #[test]
150 fn short_and_wildcard_patterns_fall_back() {
151 assert!(q("ab").is_fallback()); assert!(q(".").is_fallback());
153 assert!(q("\\w+").is_fallback());
154 assert!(q(".*").is_fallback());
155 }
156
157 #[test]
158 fn alternation_with_an_unconstrained_branch_falls_back() {
159 assert!(q("foo|.|bar").is_fallback());
161 }
162
163 #[test]
164 fn alternation_of_literals_is_an_or() {
165 let query = q("alpha|bravo|gamma");
166 assert!(!query.is_fallback());
167 let set = trigram::distinct(b"xx alpha xx");
169 assert!(query.eval(&|t| set.contains(&t)));
170 let none = trigram::distinct(b"nothing here");
171 assert!(!query.eval(&|t| none.contains(&t)));
172 }
173}