use crate::core::{Result, ScoreMode};
use regex::Regex;
use crate::query::multi_term::ConstantScoreMultiTermSupplier;
use crate::query::regex_automaton::RegexAutomaton;
use crate::query::{BoundQuery, Query, ScorerSupplier};
use crate::search::searcher::Searcher;
use crate::segment::reader::SegmentReader;
pub struct WildcardQuery {
pub field: String,
pub pattern: String,
}
impl WildcardQuery {
pub fn matches(pattern: &str, term: &str) -> bool {
let regex_pattern = wildcard_to_regex_anchored(pattern);
Regex::new(®ex_pattern)
.map(|re| re.is_match(term))
.unwrap_or(false)
}
}
fn wildcard_to_regex_unanchored(pattern: &str) -> String {
let mut regex = String::with_capacity(pattern.len() * 2);
for ch in pattern.chars() {
match ch {
'*' => regex.push_str(".*"),
'?' => regex.push('.'),
'.' | '+' | '(' | ')' | '[' | ']' | '{' | '}' | '\\' | '^' | '$' | '|' => {
regex.push('\\');
regex.push(ch);
}
_ => regex.push(ch),
}
}
regex
}
fn wildcard_to_regex_anchored(pattern: &str) -> String {
let mut s = String::with_capacity(pattern.len() * 2 + 2);
s.push('^');
s.push_str(&wildcard_to_regex_unanchored(pattern));
s.push('$');
s
}
impl Query for WildcardQuery {
fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
let regex_pattern = wildcard_to_regex_unanchored(&self.pattern);
let automaton = RegexAutomaton::new(®ex_pattern)?;
Ok(Box::new(BoundWildcardQuery {
field: self.field.clone(),
automaton,
}))
}
}
struct BoundWildcardQuery {
field: String,
automaton: RegexAutomaton,
}
impl BoundQuery for BoundWildcardQuery {
fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
let field_id = match reader
.header()
.fields
.iter()
.find(|f| f.field_name == self.field)
.map(|f| f.field_id)
{
Some(id) => id,
None => return Ok(None),
};
let terms: Vec<(String, u32)> = reader.automaton_search(field_id, &self.automaton);
if terms.is_empty() {
return Ok(None);
}
Ok(Some(Box::new(ConstantScoreMultiTermSupplier::new(
reader, field_id, terms,
))))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::Token;
use crate::core::{FieldId, SegmentId};
use crate::mapping::{FieldType, Mapping};
use crate::segment::builder::SegmentBuilder;
use crate::segment::reader::SegmentReader;
#[test]
fn wildcard_matching() {
assert!(WildcardQuery::matches("tech*", "technology"));
assert!(WildcardQuery::matches("tech*", "tech"));
assert!(!WildcardQuery::matches("tech*", "tec"));
assert!(WildcardQuery::matches("t?ch", "tech"));
assert!(!WildcardQuery::matches("t?ch", "touch"));
assert!(WildcardQuery::matches("*fox*", "quick fox jumps"));
assert!(WildcardQuery::matches("*", "anything"));
assert!(WildcardQuery::matches("?", "x"));
assert!(!WildcardQuery::matches("?", ""));
assert!(WildcardQuery::matches("a*b", "ab"));
assert!(WildcardQuery::matches("a*b", "aXYZb"));
assert!(!WildcardQuery::matches("a*b", "aXYZc"));
}
#[test]
fn wildcard_query_star() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["technology", "technical", "tennis", "science"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&WildcardQuery {
field: "tag".into(),
pattern: "tech*".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 2); }
#[test]
fn wildcard_query_question_mark() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["cat", "cut", "cot", "cart"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&WildcardQuery {
field: "tag".into(),
pattern: "c?t".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 3); }
#[test]
fn wildcard_constant_score_all_ones() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["technology", "technical", "tennis", "science"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&WildcardQuery {
field: "tag".into(),
pattern: "tech*".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 2);
for hit in &results.hits {
assert_eq!(
hit.score, 1.0,
"wildcard hit should have constant score 1.0, got {}",
hit.score
);
}
}
#[test]
fn wildcard_high_cardinality_constant_score() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for i in 0..1000 {
let tag = format!("tag_{i:04}");
builder.add_document(
&[(FieldId::new(0), vec![Token::new(&tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&WildcardQuery {
field: "tag".into(),
pattern: "tag_*".into(),
},
1000,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 1000);
assert_eq!(results.hits.len(), 1000);
for hit in &results.hits {
assert_eq!(
hit.score, 1.0,
"high-cardinality wildcard hit should score 1.0, got {}",
hit.score
);
}
}
#[test]
fn wildcard_no_matches() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
builder.add_document(
&[(FieldId::new(0), vec![Token::new("hello", 0, 5, 0)])],
b"{}",
);
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&WildcardQuery {
field: "tag".into(),
pattern: "xyz*".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 0);
}
#[test]
fn wildcard_leading_wildcard_correctness() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["technology", "ecology", "biology", "tennis"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&WildcardQuery {
field: "tag".into(),
pattern: "*ology".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 3);
for hit in &results.hits {
assert_eq!(hit.score, 1.0);
}
}
#[test]
fn wildcard_middle_wildcard_correctness() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["bat1", "bet1", "bit1", "but2", "bat2"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&WildcardQuery {
field: "tag".into(),
pattern: "b?t1".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 3);
}
}