1use crate::core::{Result, ScoreMode};
28use regex::Regex;
29
30use crate::query::multi_term::ConstantScoreMultiTermSupplier;
31use crate::query::regex_automaton::RegexAutomaton;
32use crate::query::{BoundQuery, Query, ScorerSupplier};
33use crate::search::searcher::Searcher;
34use crate::segment::reader::SegmentReader;
35
36pub struct WildcardQuery {
38 pub field: String,
39 pub pattern: String,
40}
41
42impl WildcardQuery {
43 pub fn matches(pattern: &str, term: &str) -> bool {
45 let regex_pattern = wildcard_to_regex_anchored(pattern);
46 Regex::new(®ex_pattern)
47 .map(|re| re.is_match(term))
48 .unwrap_or(false)
49 }
50}
51
52fn wildcard_to_regex_unanchored(pattern: &str) -> String {
57 let mut regex = String::with_capacity(pattern.len() * 2);
58 for ch in pattern.chars() {
59 match ch {
60 '*' => regex.push_str(".*"),
61 '?' => regex.push('.'),
62 '.' | '+' | '(' | ')' | '[' | ']' | '{' | '}' | '\\' | '^' | '$' | '|' => {
63 regex.push('\\');
64 regex.push(ch);
65 }
66 _ => regex.push(ch),
67 }
68 }
69 regex
70}
71
72fn wildcard_to_regex_anchored(pattern: &str) -> String {
74 let mut s = String::with_capacity(pattern.len() * 2 + 2);
75 s.push('^');
76 s.push_str(&wildcard_to_regex_unanchored(pattern));
77 s.push('$');
78 s
79}
80
81impl Query for WildcardQuery {
82 fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
83 let regex_pattern = wildcard_to_regex_unanchored(&self.pattern);
84 let automaton = RegexAutomaton::new(®ex_pattern)?;
85 Ok(Box::new(BoundWildcardQuery {
86 field: self.field.clone(),
87 automaton,
88 }))
89 }
90}
91
92struct BoundWildcardQuery {
95 field: String,
96 automaton: RegexAutomaton,
97}
98
99impl BoundQuery for BoundWildcardQuery {
100 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
101 let field_id = match reader
102 .header()
103 .fields
104 .iter()
105 .find(|f| f.field_name == self.field)
106 .map(|f| f.field_id)
107 {
108 Some(id) => id,
109 None => return Ok(None),
110 };
111
112 let terms: Vec<(String, u32)> = reader.automaton_search(field_id, &self.automaton);
118 if terms.is_empty() {
119 return Ok(None);
120 }
121
122 Ok(Some(Box::new(ConstantScoreMultiTermSupplier::new(
123 reader, field_id, terms,
124 ))))
125 }
126}
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131 use crate::analysis::Token;
132 use crate::core::{FieldId, SegmentId};
133 use crate::mapping::{FieldType, Mapping};
134 use crate::segment::builder::SegmentBuilder;
135 use crate::segment::reader::SegmentReader;
136
137 #[test]
138 fn wildcard_matching() {
139 assert!(WildcardQuery::matches("tech*", "technology"));
140 assert!(WildcardQuery::matches("tech*", "tech"));
141 assert!(!WildcardQuery::matches("tech*", "tec"));
142 assert!(WildcardQuery::matches("t?ch", "tech"));
143 assert!(!WildcardQuery::matches("t?ch", "touch"));
144 assert!(WildcardQuery::matches("*fox*", "quick fox jumps"));
145 assert!(WildcardQuery::matches("*", "anything"));
146 assert!(WildcardQuery::matches("?", "x"));
147 assert!(!WildcardQuery::matches("?", ""));
148 assert!(WildcardQuery::matches("a*b", "ab"));
149 assert!(WildcardQuery::matches("a*b", "aXYZb"));
150 assert!(!WildcardQuery::matches("a*b", "aXYZc"));
151 }
152
153 #[test]
154 fn wildcard_query_star() {
155 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
156 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
157 for tag in &["technology", "technical", "tennis", "science"] {
158 builder.add_document(
159 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
160 b"{}",
161 );
162 }
163 let reader = SegmentReader::open(builder.build()).unwrap();
164 let store = crate::search::segment_store::SegmentStore::new(
165 vec![reader],
166 crate::analysis::AnalyzerRegistry::new(),
167 None,
168 None,
169 );
170 let searcher = Searcher::new(&store);
171
172 let results = searcher
173 .search_query(
174 &WildcardQuery {
175 field: "tag".into(),
176 pattern: "tech*".into(),
177 },
178 10,
179 0,
180 )
181 .unwrap();
182 assert_eq!(results.total_hits.value, 2); }
184
185 #[test]
186 fn wildcard_query_question_mark() {
187 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
188 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
189 for tag in &["cat", "cut", "cot", "cart"] {
190 builder.add_document(
191 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
192 b"{}",
193 );
194 }
195 let reader = SegmentReader::open(builder.build()).unwrap();
196 let store = crate::search::segment_store::SegmentStore::new(
197 vec![reader],
198 crate::analysis::AnalyzerRegistry::new(),
199 None,
200 None,
201 );
202 let searcher = Searcher::new(&store);
203
204 let results = searcher
205 .search_query(
206 &WildcardQuery {
207 field: "tag".into(),
208 pattern: "c?t".into(),
209 },
210 10,
211 0,
212 )
213 .unwrap();
214 assert_eq!(results.total_hits.value, 3); }
216
217 #[test]
218 fn wildcard_constant_score_all_ones() {
219 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
223 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
224 for tag in &["technology", "technical", "tennis", "science"] {
225 builder.add_document(
226 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
227 b"{}",
228 );
229 }
230 let reader = SegmentReader::open(builder.build()).unwrap();
231 let store = crate::search::segment_store::SegmentStore::new(
232 vec![reader],
233 crate::analysis::AnalyzerRegistry::new(),
234 None,
235 None,
236 );
237 let searcher = Searcher::new(&store);
238
239 let results = searcher
240 .search_query(
241 &WildcardQuery {
242 field: "tag".into(),
243 pattern: "tech*".into(),
244 },
245 10,
246 0,
247 )
248 .unwrap();
249 assert_eq!(results.total_hits.value, 2);
250 for hit in &results.hits {
251 assert_eq!(
252 hit.score, 1.0,
253 "wildcard hit should have constant score 1.0, got {}",
254 hit.score
255 );
256 }
257 }
258
259 #[test]
260 fn wildcard_high_cardinality_constant_score() {
261 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
266 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
267 for i in 0..1000 {
268 let tag = format!("tag_{i:04}");
269 builder.add_document(
270 &[(FieldId::new(0), vec![Token::new(&tag, 0, tag.len(), 0)])],
271 b"{}",
272 );
273 }
274 let reader = SegmentReader::open(builder.build()).unwrap();
275 let store = crate::search::segment_store::SegmentStore::new(
276 vec![reader],
277 crate::analysis::AnalyzerRegistry::new(),
278 None,
279 None,
280 );
281 let searcher = Searcher::new(&store);
282
283 let results = searcher
284 .search_query(
285 &WildcardQuery {
286 field: "tag".into(),
287 pattern: "tag_*".into(),
288 },
289 1000,
290 0,
291 )
292 .unwrap();
293 assert_eq!(results.total_hits.value, 1000);
294 assert_eq!(results.hits.len(), 1000);
295 for hit in &results.hits {
296 assert_eq!(
297 hit.score, 1.0,
298 "high-cardinality wildcard hit should score 1.0, got {}",
299 hit.score
300 );
301 }
302 }
303
304 #[test]
305 fn wildcard_no_matches() {
306 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
307 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
308 builder.add_document(
309 &[(FieldId::new(0), vec![Token::new("hello", 0, 5, 0)])],
310 b"{}",
311 );
312 let reader = SegmentReader::open(builder.build()).unwrap();
313 let store = crate::search::segment_store::SegmentStore::new(
314 vec![reader],
315 crate::analysis::AnalyzerRegistry::new(),
316 None,
317 None,
318 );
319 let searcher = Searcher::new(&store);
320
321 let results = searcher
322 .search_query(
323 &WildcardQuery {
324 field: "tag".into(),
325 pattern: "xyz*".into(),
326 },
327 10,
328 0,
329 )
330 .unwrap();
331 assert_eq!(results.total_hits.value, 0);
332 }
333
334 #[test]
335 fn wildcard_leading_wildcard_correctness() {
336 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
340 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
341 for tag in &["technology", "ecology", "biology", "tennis"] {
342 builder.add_document(
343 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
344 b"{}",
345 );
346 }
347 let reader = SegmentReader::open(builder.build()).unwrap();
348 let store = crate::search::segment_store::SegmentStore::new(
349 vec![reader],
350 crate::analysis::AnalyzerRegistry::new(),
351 None,
352 None,
353 );
354 let searcher = Searcher::new(&store);
355
356 let results = searcher
357 .search_query(
358 &WildcardQuery {
359 field: "tag".into(),
360 pattern: "*ology".into(),
361 },
362 10,
363 0,
364 )
365 .unwrap();
366 assert_eq!(results.total_hits.value, 3);
368 for hit in &results.hits {
369 assert_eq!(hit.score, 1.0);
370 }
371 }
372
373 #[test]
374 fn wildcard_middle_wildcard_correctness() {
375 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
379 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
380 for tag in &["bat1", "bet1", "bit1", "but2", "bat2"] {
381 builder.add_document(
382 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
383 b"{}",
384 );
385 }
386 let reader = SegmentReader::open(builder.build()).unwrap();
387 let store = crate::search::segment_store::SegmentStore::new(
388 vec![reader],
389 crate::analysis::AnalyzerRegistry::new(),
390 None,
391 None,
392 );
393 let searcher = Searcher::new(&store);
394
395 let results = searcher
396 .search_query(
397 &WildcardQuery {
398 field: "tag".into(),
399 pattern: "b?t1".into(),
400 },
401 10,
402 0,
403 )
404 .unwrap();
405 assert_eq!(results.total_hits.value, 3);
407 }
408}