use crate::core::{Result, ScoreMode};
use crate::query::multi_term::ConstantScoreMultiTermSupplier;
use crate::query::regex_automaton::RegexAutomaton;
use crate::query::{BoundQuery, Query, ScorerSupplier};
use crate::search::searcher::Searcher;
use crate::segment::reader::SegmentReader;
pub struct RegexpQuery {
pub field: String,
pub pattern: String,
}
impl Query for RegexpQuery {
fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
let pattern = strip_anchors(&self.pattern);
let automaton = RegexAutomaton::new(pattern)?;
Ok(Box::new(BoundRegexpQuery {
field: self.field.clone(),
automaton,
}))
}
}
fn strip_anchors(pattern: &str) -> &str {
let mut p = pattern;
if let Some(stripped) = p.strip_prefix('^') {
p = stripped;
}
if let Some(stripped) = p.strip_suffix('$') {
p = stripped;
}
p
}
struct BoundRegexpQuery {
field: String,
automaton: RegexAutomaton,
}
impl BoundQuery for BoundRegexpQuery {
fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
let field_id = match reader
.header()
.fields
.iter()
.find(|f| f.field_name == self.field)
.map(|f| f.field_id)
{
Some(id) => id,
None => return Ok(None),
};
let terms: Vec<(String, u32)> = reader.automaton_search(field_id, &self.automaton);
if terms.is_empty() {
return Ok(None);
}
Ok(Some(Box::new(ConstantScoreMultiTermSupplier::new(
reader, field_id, terms,
))))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::Token;
use crate::core::{FieldId, SegmentId};
use crate::mapping::{FieldType, Mapping};
use crate::segment::builder::SegmentBuilder;
use crate::segment::reader::SegmentReader;
#[test]
fn regexp_basic() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["technology", "technical", "tennis", "science"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&RegexpQuery {
field: "tag".into(),
pattern: "tech.*".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 2); }
#[test]
fn regexp_character_class() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["cat", "cut", "cot", "cart", "cit"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&RegexpQuery {
field: "tag".into(),
pattern: "c[aou]t".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 3); }
#[test]
fn regexp_alternation() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["red", "blue", "green"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&RegexpQuery {
field: "tag".into(),
pattern: "red|blue".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 2);
}
#[test]
fn regexp_constant_score_all_ones() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["cat", "cut", "cot", "cart", "cit"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&RegexpQuery {
field: "tag".into(),
pattern: "c[aou]t".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 3);
for hit in &results.hits {
assert_eq!(
hit.score, 1.0,
"regexp hit should have constant score 1.0, got {}",
hit.score
);
}
}
#[test]
fn regexp_no_matches() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
builder.add_document(
&[(FieldId::new(0), vec![Token::new("hello", 0, 5, 0)])],
b"{}",
);
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&RegexpQuery {
field: "tag".into(),
pattern: "xyz.*".into(),
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 0);
}
}