1use crate::core::{Result, ScoreMode};
22
23use crate::query::multi_term::ConstantScoreMultiTermSupplier;
24use crate::query::regex_automaton::RegexAutomaton;
25use crate::query::{BoundQuery, Query, ScorerSupplier};
26use crate::search::searcher::Searcher;
27use crate::segment::reader::SegmentReader;
28
29pub struct RegexpQuery {
31 pub field: String,
32 pub pattern: String,
33}
34
35impl Query for RegexpQuery {
36 fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
37 let pattern = strip_anchors(&self.pattern);
40 let automaton = RegexAutomaton::new(pattern)?;
41 Ok(Box::new(BoundRegexpQuery {
42 field: self.field.clone(),
43 automaton,
44 }))
45 }
46}
47
48fn strip_anchors(pattern: &str) -> &str {
49 let mut p = pattern;
50 if let Some(stripped) = p.strip_prefix('^') {
51 p = stripped;
52 }
53 if let Some(stripped) = p.strip_suffix('$') {
54 p = stripped;
55 }
56 p
57}
58
59struct BoundRegexpQuery {
62 field: String,
63 automaton: RegexAutomaton,
64}
65
66impl BoundQuery for BoundRegexpQuery {
67 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
68 let field_id = match reader
69 .header()
70 .fields
71 .iter()
72 .find(|f| f.field_name == self.field)
73 .map(|f| f.field_id)
74 {
75 Some(id) => id,
76 None => return Ok(None),
77 };
78
79 let terms: Vec<(String, u32)> = reader.automaton_search(field_id, &self.automaton);
82 if terms.is_empty() {
83 return Ok(None);
84 }
85
86 Ok(Some(Box::new(ConstantScoreMultiTermSupplier::new(
87 reader, field_id, terms,
88 ))))
89 }
90}
91
92#[cfg(test)]
93mod tests {
94 use super::*;
95 use crate::analysis::Token;
96 use crate::core::{FieldId, SegmentId};
97 use crate::mapping::{FieldType, Mapping};
98 use crate::segment::builder::SegmentBuilder;
99 use crate::segment::reader::SegmentReader;
100
101 #[test]
102 fn regexp_basic() {
103 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
104 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
105 for tag in &["technology", "technical", "tennis", "science"] {
106 builder.add_document(
107 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
108 b"{}",
109 );
110 }
111 let reader = SegmentReader::open(builder.build()).unwrap();
112 let store = crate::search::segment_store::SegmentStore::new(
113 vec![reader],
114 crate::analysis::AnalyzerRegistry::new(),
115 None,
116 None,
117 );
118 let searcher = Searcher::new(&store);
119
120 let results = searcher
121 .search_query(
122 &RegexpQuery {
123 field: "tag".into(),
124 pattern: "tech.*".into(),
125 },
126 10,
127 0,
128 )
129 .unwrap();
130 assert_eq!(results.total_hits.value, 2); }
132
133 #[test]
134 fn regexp_character_class() {
135 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
136 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
137 for tag in &["cat", "cut", "cot", "cart", "cit"] {
138 builder.add_document(
139 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
140 b"{}",
141 );
142 }
143 let reader = SegmentReader::open(builder.build()).unwrap();
144 let store = crate::search::segment_store::SegmentStore::new(
145 vec![reader],
146 crate::analysis::AnalyzerRegistry::new(),
147 None,
148 None,
149 );
150 let searcher = Searcher::new(&store);
151
152 let results = searcher
153 .search_query(
154 &RegexpQuery {
155 field: "tag".into(),
156 pattern: "c[aou]t".into(),
157 },
158 10,
159 0,
160 )
161 .unwrap();
162 assert_eq!(results.total_hits.value, 3); }
164
165 #[test]
166 fn regexp_alternation() {
167 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
168 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
169 for tag in &["red", "blue", "green"] {
170 builder.add_document(
171 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
172 b"{}",
173 );
174 }
175 let reader = SegmentReader::open(builder.build()).unwrap();
176 let store = crate::search::segment_store::SegmentStore::new(
177 vec![reader],
178 crate::analysis::AnalyzerRegistry::new(),
179 None,
180 None,
181 );
182 let searcher = Searcher::new(&store);
183
184 let results = searcher
185 .search_query(
186 &RegexpQuery {
187 field: "tag".into(),
188 pattern: "red|blue".into(),
189 },
190 10,
191 0,
192 )
193 .unwrap();
194 assert_eq!(results.total_hits.value, 2);
195 }
196
197 #[test]
198 fn regexp_constant_score_all_ones() {
199 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
203 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
204 for tag in &["cat", "cut", "cot", "cart", "cit"] {
205 builder.add_document(
206 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
207 b"{}",
208 );
209 }
210 let reader = SegmentReader::open(builder.build()).unwrap();
211 let store = crate::search::segment_store::SegmentStore::new(
212 vec![reader],
213 crate::analysis::AnalyzerRegistry::new(),
214 None,
215 None,
216 );
217 let searcher = Searcher::new(&store);
218
219 let results = searcher
220 .search_query(
221 &RegexpQuery {
222 field: "tag".into(),
223 pattern: "c[aou]t".into(),
224 },
225 10,
226 0,
227 )
228 .unwrap();
229 assert_eq!(results.total_hits.value, 3);
231 for hit in &results.hits {
232 assert_eq!(
233 hit.score, 1.0,
234 "regexp hit should have constant score 1.0, got {}",
235 hit.score
236 );
237 }
238 }
239
240 #[test]
241 fn regexp_no_matches() {
242 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
243 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
244 builder.add_document(
245 &[(FieldId::new(0), vec![Token::new("hello", 0, 5, 0)])],
246 b"{}",
247 );
248 let reader = SegmentReader::open(builder.build()).unwrap();
249 let store = crate::search::segment_store::SegmentStore::new(
250 vec![reader],
251 crate::analysis::AnalyzerRegistry::new(),
252 None,
253 None,
254 );
255 let searcher = Searcher::new(&store);
256
257 let results = searcher
258 .search_query(
259 &RegexpQuery {
260 field: "tag".into(),
261 pattern: "xyz.*".into(),
262 },
263 10,
264 0,
265 )
266 .unwrap();
267 assert_eq!(results.total_hits.value, 0);
268 }
269}