1use pest::Parser;
11use pest_derive::Parser;
12use std::sync::Arc;
13
14use super::query_field_router::{QueryFieldRouter, RoutingMode};
15use super::schema::{Field, Schema};
16use crate::query::{BooleanQuery, PrefixQuery, Query, TermQuery};
17use crate::tokenizer::{BoxedTokenizer, TokenizerRegistry};
18
19#[derive(Parser)]
20#[grammar = "dsl/ql/ql.pest"]
21struct QueryParser;
22
23#[derive(Debug, Clone)]
25pub enum ParsedQuery {
26 Term {
27 field: Option<String>,
28 term: String,
29 },
30 Phrase {
31 field: Option<String>,
32 phrase: String,
33 },
34 Prefix {
36 field: Option<String>,
37 prefix: String,
38 },
39 Ann {
41 field: String,
42 vector: Vec<f32>,
43 nprobe: usize,
44 rerank: f32,
45 },
46 Sparse {
48 field: String,
49 vector: Vec<(u32, f32)>,
50 },
51 And(Vec<ParsedQuery>),
52 Or(Vec<ParsedQuery>),
53 Not(Box<ParsedQuery>),
54}
55
56pub struct QueryLanguageParser {
58 schema: Arc<Schema>,
59 default_fields: Vec<Field>,
60 tokenizers: Arc<TokenizerRegistry>,
61 field_router: Option<QueryFieldRouter>,
63}
64
65impl QueryLanguageParser {
66 pub fn new(
67 schema: Arc<Schema>,
68 default_fields: Vec<Field>,
69 tokenizers: Arc<TokenizerRegistry>,
70 ) -> Self {
71 Self {
72 schema,
73 default_fields,
74 tokenizers,
75 field_router: None,
76 }
77 }
78
79 pub fn with_router(
81 schema: Arc<Schema>,
82 default_fields: Vec<Field>,
83 tokenizers: Arc<TokenizerRegistry>,
84 router: QueryFieldRouter,
85 ) -> Self {
86 Self {
87 schema,
88 default_fields,
89 tokenizers,
90 field_router: Some(router),
91 }
92 }
93
94 pub fn set_router(&mut self, router: QueryFieldRouter) {
96 self.field_router = Some(router);
97 }
98
99 pub fn router(&self) -> Option<&QueryFieldRouter> {
101 self.field_router.as_ref()
102 }
103
104 pub fn parse(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
114 let query_str = query_str.trim();
115 if query_str.is_empty() {
116 return Err("Empty query".to_string());
117 }
118
119 if let Some(router) = &self.field_router
121 && let Some(routed) = router.route(query_str)
122 {
123 return self.build_routed_query(
124 &routed.query,
125 &routed.target_field,
126 routed.mode,
127 query_str,
128 );
129 }
130
131 self.parse_normal(query_str)
133 }
134
135 fn build_routed_query(
137 &self,
138 routed_query: &str,
139 target_field: &str,
140 mode: RoutingMode,
141 original_query: &str,
142 ) -> Result<Box<dyn Query>, String> {
143 let _field_id = self
145 .schema
146 .get_field(target_field)
147 .ok_or_else(|| format!("Unknown target field: {}", target_field))?;
148
149 let target_query = self.build_term_query(Some(target_field), routed_query)?;
151
152 match mode {
153 RoutingMode::Exclusive => {
154 Ok(target_query)
156 }
157 RoutingMode::Additional => {
158 let mut bool_query = BooleanQuery::new();
160 bool_query = bool_query.should(target_query);
161
162 if let Ok(default_query) = self.parse_normal(original_query) {
164 bool_query = bool_query.should(default_query);
165 }
166
167 Ok(Box::new(bool_query))
168 }
169 }
170 }
171
172 fn parse_normal(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
174 match self.parse_query_string(query_str) {
176 Ok(parsed) => self.build_query(&parsed),
177 Err(_) => {
178 self.parse_plain_text(query_str)
181 }
182 }
183 }
184
185 fn parse_plain_text(&self, text: &str) -> Result<Box<dyn Query>, String> {
187 if self.default_fields.is_empty() {
188 return Err("No default fields configured".to_string());
189 }
190
191 let tokenizer = self.get_tokenizer(self.default_fields[0]);
192 let tokens: Vec<String> = tokenizer
193 .tokenize(text)
194 .into_iter()
195 .map(|t| t.text.to_lowercase())
196 .collect();
197
198 if tokens.is_empty() {
199 return Err("No tokens in query".to_string());
200 }
201
202 let mut bool_query = BooleanQuery::new();
203 for token in &tokens {
204 for &field_id in &self.default_fields {
205 bool_query = bool_query.should(TermQuery::text(field_id, token));
206 }
207 }
208 Ok(Box::new(bool_query))
209 }
210
211 fn parse_query_string(&self, query_str: &str) -> Result<ParsedQuery, String> {
212 let pairs = QueryParser::parse(Rule::query, query_str)
213 .map_err(|e| format!("Parse error: {}", e))?;
214
215 let query_pair = pairs.into_iter().next().ok_or("No query found")?;
216
217 self.parse_or_expr(query_pair.into_inner().next().unwrap())
219 }
220
221 fn parse_or_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
222 let mut inner = pair.into_inner();
223 let first = self.parse_and_expr(inner.next().unwrap())?;
224
225 let rest: Vec<ParsedQuery> = inner
226 .filter(|p| p.as_rule() == Rule::and_expr)
227 .map(|p| self.parse_and_expr(p))
228 .collect::<Result<Vec<_>, _>>()?;
229
230 if rest.is_empty() {
231 Ok(first)
232 } else {
233 let mut all = vec![first];
234 all.extend(rest);
235 Ok(ParsedQuery::Or(all))
236 }
237 }
238
239 fn parse_and_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
240 let mut inner = pair.into_inner();
241 let first = self.parse_primary(inner.next().unwrap())?;
242
243 let rest: Vec<ParsedQuery> = inner
244 .filter(|p| p.as_rule() == Rule::primary)
245 .map(|p| self.parse_primary(p))
246 .collect::<Result<Vec<_>, _>>()?;
247
248 if rest.is_empty() {
249 Ok(first)
250 } else {
251 let mut all = vec![first];
252 all.extend(rest);
253 Ok(ParsedQuery::And(all))
254 }
255 }
256
257 fn parse_primary(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
258 let mut negated = false;
259 let mut inner_query = None;
260
261 for inner in pair.into_inner() {
262 match inner.as_rule() {
263 Rule::not_op => negated = true,
264 Rule::group => {
265 let or_expr = inner.into_inner().next().unwrap();
266 inner_query = Some(self.parse_or_expr(or_expr)?);
267 }
268 Rule::ann_query => {
269 inner_query = Some(self.parse_ann_query(inner)?);
270 }
271 Rule::sparse_query => {
272 inner_query = Some(self.parse_sparse_query(inner)?);
273 }
274 Rule::phrase_query => {
275 inner_query = Some(self.parse_phrase_query(inner)?);
276 }
277 Rule::prefix_query => {
278 inner_query = Some(self.parse_prefix_query(inner)?);
279 }
280 Rule::term_query => {
281 inner_query = Some(self.parse_term_query(inner)?);
282 }
283 _ => {}
284 }
285 }
286
287 let query = inner_query.ok_or("No query in primary")?;
288
289 if negated {
290 Ok(ParsedQuery::Not(Box::new(query)))
291 } else {
292 Ok(query)
293 }
294 }
295
296 fn parse_term_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
297 let mut field = None;
298 let mut term = String::new();
299
300 for inner in pair.into_inner() {
301 match inner.as_rule() {
302 Rule::field_spec => {
303 field = Some(inner.into_inner().next().unwrap().as_str().to_string());
304 }
305 Rule::term => {
306 term = inner.as_str().to_string();
307 }
308 _ => {}
309 }
310 }
311
312 Ok(ParsedQuery::Term { field, term })
313 }
314
315 fn parse_prefix_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
316 let mut field = None;
317 let mut prefix = String::new();
318
319 for inner in pair.into_inner() {
320 match inner.as_rule() {
321 Rule::field_spec => {
322 field = Some(inner.into_inner().next().unwrap().as_str().to_string());
323 }
324 Rule::prefix_value => {
325 prefix = inner.as_str().to_string();
326 }
327 _ => {}
328 }
329 }
330
331 Ok(ParsedQuery::Prefix { field, prefix })
332 }
333
334 fn parse_phrase_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
335 let mut field = None;
336 let mut phrase = String::new();
337
338 for inner in pair.into_inner() {
339 match inner.as_rule() {
340 Rule::field_spec => {
341 field = Some(inner.into_inner().next().unwrap().as_str().to_string());
342 }
343 Rule::quoted_string => {
344 let s = inner.as_str();
345 phrase = s[1..s.len() - 1].to_string();
346 }
347 _ => {}
348 }
349 }
350
351 Ok(ParsedQuery::Phrase { field, phrase })
352 }
353
354 fn parse_ann_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
356 let mut field = String::new();
357 let mut vector = Vec::new();
358 let mut nprobe = 32usize;
359 let mut rerank = 3.0f32;
360
361 for inner in pair.into_inner() {
362 match inner.as_rule() {
363 Rule::field_spec => {
364 field = inner.into_inner().next().unwrap().as_str().to_string();
365 }
366 Rule::vector_array => {
367 for num in inner.into_inner() {
368 if num.as_rule() == Rule::number
369 && let Ok(v) = num.as_str().parse::<f32>()
370 {
371 vector.push(v);
372 }
373 }
374 }
375 Rule::ann_params => {
376 for param in inner.into_inner() {
377 if param.as_rule() == Rule::ann_param {
378 let param_str = param.as_str();
380 if let Some(eq_pos) = param_str.find('=') {
381 let name = ¶m_str[..eq_pos];
382 let value = ¶m_str[eq_pos + 1..];
383 match name {
384 "nprobe" => nprobe = value.parse().unwrap_or(0),
385 "rerank" => rerank = value.parse().unwrap_or(0.0),
386 _ => {}
387 }
388 }
389 }
390 }
391 }
392 _ => {}
393 }
394 }
395
396 Ok(ParsedQuery::Ann {
397 field,
398 vector,
399 nprobe,
400 rerank,
401 })
402 }
403
404 fn parse_sparse_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
406 let mut field = String::new();
407 let mut vector = Vec::new();
408
409 for inner in pair.into_inner() {
410 match inner.as_rule() {
411 Rule::field_spec => {
412 field = inner.into_inner().next().unwrap().as_str().to_string();
413 }
414 Rule::sparse_map => {
415 for entry in inner.into_inner() {
416 if entry.as_rule() == Rule::sparse_entry {
417 let mut entry_inner = entry.into_inner();
418 if let (Some(idx), Some(weight)) =
419 (entry_inner.next(), entry_inner.next())
420 && let (Ok(i), Ok(w)) =
421 (idx.as_str().parse::<u32>(), weight.as_str().parse::<f32>())
422 {
423 vector.push((i, w));
424 }
425 }
426 }
427 }
428 _ => {}
429 }
430 }
431
432 Ok(ParsedQuery::Sparse { field, vector })
433 }
434
435 fn build_query(&self, parsed: &ParsedQuery) -> Result<Box<dyn Query>, String> {
436 use crate::query::{DenseVectorQuery, SparseVectorQuery};
437
438 match parsed {
439 ParsedQuery::Term { field, term } => self.build_term_query(field.as_deref(), term),
440 ParsedQuery::Phrase { field, phrase } => {
441 self.build_phrase_query(field.as_deref(), phrase)
442 }
443 ParsedQuery::Prefix { field, prefix } => {
444 self.build_prefix_query(field.as_deref(), prefix)
445 }
446 ParsedQuery::Ann {
447 field,
448 vector,
449 nprobe,
450 rerank,
451 } => {
452 let field_id = self
453 .schema
454 .get_field(field)
455 .ok_or_else(|| format!("Unknown field: {}", field))?;
456 let query = DenseVectorQuery::new(field_id, vector.clone())
457 .with_nprobe(*nprobe)
458 .with_rerank_factor(*rerank);
459 Ok(Box::new(query))
460 }
461 ParsedQuery::Sparse { field, vector } => {
462 let field_id = self
463 .schema
464 .get_field(field)
465 .ok_or_else(|| format!("Unknown field: {}", field))?;
466 let query = SparseVectorQuery::new(field_id, vector.clone());
467 Ok(Box::new(query))
468 }
469 ParsedQuery::And(queries) => {
470 let mut bool_query = BooleanQuery::new();
471 for q in queries {
472 bool_query = bool_query.must(self.build_query(q)?);
473 }
474 Ok(Box::new(bool_query))
475 }
476 ParsedQuery::Or(queries) => {
477 let mut bool_query = BooleanQuery::new();
478 for q in queries {
479 bool_query = bool_query.should(self.build_query(q)?);
480 }
481 Ok(Box::new(bool_query))
482 }
483 ParsedQuery::Not(inner) => {
484 let mut bool_query = BooleanQuery::new();
486 bool_query = bool_query.must_not(self.build_query(inner)?);
487 Ok(Box::new(bool_query))
488 }
489 }
490 }
491
492 fn build_term_query(&self, field: Option<&str>, term: &str) -> Result<Box<dyn Query>, String> {
493 if let Some(field_name) = field {
494 let field_id = self
496 .schema
497 .get_field(field_name)
498 .ok_or_else(|| format!("Unknown field: {}", field_name))?;
499 if let Some(entry) = self.schema.get_field_entry(field_id) {
501 use crate::dsl::FieldType;
502 if entry.field_type != FieldType::Text {
503 return Err(format!(
504 "Term query requires a text field, but '{}' is {:?}. Use range query for numeric fields.",
505 field_name, entry.field_type
506 ));
507 }
508 }
509 let tokenizer = self.get_tokenizer(field_id);
510 let tokens: Vec<String> = tokenizer
511 .tokenize(term)
512 .into_iter()
513 .map(|t| t.text.to_lowercase())
514 .collect();
515
516 if tokens.is_empty() {
517 return Err("No tokens in term".to_string());
518 }
519
520 if tokens.len() == 1 {
521 Ok(Box::new(TermQuery::text(field_id, &tokens[0])))
522 } else {
523 let mut bool_query = BooleanQuery::new();
525 for token in &tokens {
526 bool_query = bool_query.must(TermQuery::text(field_id, token));
527 }
528 Ok(Box::new(bool_query))
529 }
530 } else if !self.default_fields.is_empty() {
531 let tokenizer = self.get_tokenizer(self.default_fields[0]);
533 let tokens: Vec<String> = tokenizer
534 .tokenize(term)
535 .into_iter()
536 .map(|t| t.text.to_lowercase())
537 .collect();
538
539 if tokens.is_empty() {
540 return Err("No tokens in term".to_string());
541 }
542
543 let mut bool_query = BooleanQuery::new();
545 for token in &tokens {
546 for &field_id in &self.default_fields {
547 bool_query = bool_query.should(TermQuery::text(field_id, token));
548 }
549 }
550 Ok(Box::new(bool_query))
551 } else {
552 Err("No field specified and no default fields configured".to_string())
553 }
554 }
555
556 fn build_prefix_query(
557 &self,
558 field: Option<&str>,
559 prefix: &str,
560 ) -> Result<Box<dyn Query>, String> {
561 if let Some(field_name) = field {
562 let field_id = self
563 .schema
564 .get_field(field_name)
565 .ok_or_else(|| format!("Unknown field: {}", field_name))?;
566 Ok(Box::new(PrefixQuery::text(field_id, prefix)))
567 } else if !self.default_fields.is_empty() {
568 let mut bool_query = BooleanQuery::new();
570 for &field_id in &self.default_fields {
571 bool_query = bool_query.should(PrefixQuery::text(field_id, prefix));
572 }
573 Ok(Box::new(bool_query))
574 } else {
575 Err("No field specified and no default fields configured".to_string())
576 }
577 }
578
579 fn build_phrase_query(
580 &self,
581 field: Option<&str>,
582 phrase: &str,
583 ) -> Result<Box<dyn Query>, String> {
584 let field_id = if let Some(field_name) = field {
586 self.schema
587 .get_field(field_name)
588 .ok_or_else(|| format!("Unknown field: {}", field_name))?
589 } else if !self.default_fields.is_empty() {
590 self.default_fields[0]
591 } else {
592 return Err("No field specified and no default fields configured".to_string());
593 };
594
595 let tokenizer = self.get_tokenizer(field_id);
596 let tokens: Vec<String> = tokenizer
597 .tokenize(phrase)
598 .into_iter()
599 .map(|t| t.text.to_lowercase())
600 .collect();
601
602 if tokens.is_empty() {
603 return Err("No tokens in phrase".to_string());
604 }
605
606 if tokens.len() == 1 {
607 return Ok(Box::new(TermQuery::text(field_id, &tokens[0])));
608 }
609
610 let mut bool_query = BooleanQuery::new();
612 for token in &tokens {
613 bool_query = bool_query.must(TermQuery::text(field_id, token));
614 }
615
616 if field.is_none() && self.default_fields.len() > 1 {
618 let mut outer = BooleanQuery::new();
619 for &f in &self.default_fields {
620 let tokenizer = self.get_tokenizer(f);
621 let tokens: Vec<String> = tokenizer
622 .tokenize(phrase)
623 .into_iter()
624 .map(|t| t.text.to_lowercase())
625 .collect();
626
627 let mut field_query = BooleanQuery::new();
628 for token in &tokens {
629 field_query = field_query.must(TermQuery::text(f, token));
630 }
631 outer = outer.should(field_query);
632 }
633 return Ok(Box::new(outer));
634 }
635
636 Ok(Box::new(bool_query))
637 }
638
639 fn get_tokenizer(&self, field: Field) -> BoxedTokenizer {
640 let tokenizer_name = self
642 .schema
643 .get_field_entry(field)
644 .and_then(|entry| entry.tokenizer.as_deref())
645 .unwrap_or("simple");
646
647 self.tokenizers
648 .get(tokenizer_name)
649 .unwrap_or_else(|| Box::new(crate::tokenizer::SimpleTokenizer))
650 }
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656 use crate::dsl::SchemaBuilder;
657 use crate::tokenizer::TokenizerRegistry;
658
659 fn setup() -> (Arc<Schema>, Vec<Field>, Arc<TokenizerRegistry>) {
660 let mut builder = SchemaBuilder::default();
661 let title = builder.add_text_field("title", true, true);
662 let body = builder.add_text_field("body", true, true);
663 let schema = Arc::new(builder.build());
664 let tokenizers = Arc::new(TokenizerRegistry::default());
665 (schema, vec![title, body], tokenizers)
666 }
667
668 #[test]
669 fn test_simple_term() {
670 let (schema, default_fields, tokenizers) = setup();
671 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
672
673 let _query = parser.parse("rust").unwrap();
675 }
676
677 #[test]
678 fn test_field_term() {
679 let (schema, default_fields, tokenizers) = setup();
680 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
681
682 let _query = parser.parse("title:rust").unwrap();
684 }
685
686 #[test]
687 fn test_boolean_and() {
688 let (schema, default_fields, tokenizers) = setup();
689 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
690
691 let _query = parser.parse("rust AND programming").unwrap();
693 }
694
695 #[test]
696 fn test_match_query() {
697 let (schema, default_fields, tokenizers) = setup();
698 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
699
700 let _query = parser.parse("hello world").unwrap();
702 }
703
704 #[test]
705 fn test_phrase_query() {
706 let (schema, default_fields, tokenizers) = setup();
707 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
708
709 let _query = parser.parse("\"hello world\"").unwrap();
711 }
712
713 #[test]
714 fn test_boolean_or() {
715 let (schema, default_fields, tokenizers) = setup();
716 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
717
718 let _query = parser.parse("rust OR python").unwrap();
720 }
721
722 #[test]
723 fn test_complex_query() {
724 let (schema, default_fields, tokenizers) = setup();
725 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
726
727 let _query = parser.parse("(rust OR python) AND programming").unwrap();
729 }
730
731 #[test]
732 fn test_router_exclusive_mode() {
733 use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
734
735 let mut builder = SchemaBuilder::default();
736 let _title = builder.add_text_field("title", true, true);
737 let _uri = builder.add_text_field("uri", true, true);
738 let schema = Arc::new(builder.build());
739 let tokenizers = Arc::new(TokenizerRegistry::default());
740
741 let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
742 pattern: r"^doi:(10\.\d{4,}/[^\s]+)$".to_string(),
743 substitution: "doi://{1}".to_string(),
744 target_field: "uri".to_string(),
745 mode: RoutingMode::Exclusive,
746 }])
747 .unwrap();
748
749 let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
750
751 let _query = parser.parse("doi:10.1234/test.123").unwrap();
753 }
754
755 #[test]
756 fn test_router_additional_mode() {
757 use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
758
759 let mut builder = SchemaBuilder::default();
760 let title = builder.add_text_field("title", true, true);
761 let _uri = builder.add_text_field("uri", true, true);
762 let schema = Arc::new(builder.build());
763 let tokenizers = Arc::new(TokenizerRegistry::default());
764
765 let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
766 pattern: r"#(\d+)".to_string(),
767 substitution: "{1}".to_string(),
768 target_field: "uri".to_string(),
769 mode: RoutingMode::Additional,
770 }])
771 .unwrap();
772
773 let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
774
775 let _query = parser.parse("#42").unwrap();
777 }
778
779 #[test]
780 fn test_router_no_match_falls_through() {
781 use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
782
783 let mut builder = SchemaBuilder::default();
784 let title = builder.add_text_field("title", true, true);
785 let _uri = builder.add_text_field("uri", true, true);
786 let schema = Arc::new(builder.build());
787 let tokenizers = Arc::new(TokenizerRegistry::default());
788
789 let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
790 pattern: r"^doi:".to_string(),
791 substitution: "{0}".to_string(),
792 target_field: "uri".to_string(),
793 mode: RoutingMode::Exclusive,
794 }])
795 .unwrap();
796
797 let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
798
799 let _query = parser.parse("rust programming").unwrap();
801 }
802
803 #[test]
804 fn test_router_invalid_target_field() {
805 use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
806
807 let mut builder = SchemaBuilder::default();
808 let _title = builder.add_text_field("title", true, true);
809 let schema = Arc::new(builder.build());
810 let tokenizers = Arc::new(TokenizerRegistry::default());
811
812 let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
813 pattern: r"test".to_string(),
814 substitution: "{0}".to_string(),
815 target_field: "nonexistent".to_string(),
816 mode: RoutingMode::Exclusive,
817 }])
818 .unwrap();
819
820 let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
821
822 let result = parser.parse("test");
824 assert!(result.is_err());
825 let err = result.err().unwrap();
826 assert!(err.contains("Unknown target field"));
827 }
828
829 #[test]
830 fn test_parse_ann_query() {
831 let mut builder = SchemaBuilder::default();
832 let embedding = builder.add_dense_vector_field("embedding", 128, true, true);
833 let schema = Arc::new(builder.build());
834 let tokenizers = Arc::new(TokenizerRegistry::default());
835
836 let parser = QueryLanguageParser::new(schema, vec![embedding], tokenizers);
837
838 let result = parser.parse_query_string("embedding:ann([1.0, 2.0, 3.0], nprobe=32)");
840 assert!(result.is_ok(), "Failed to parse ANN query: {:?}", result);
841
842 if let Ok(ParsedQuery::Ann {
843 field,
844 vector,
845 nprobe,
846 rerank,
847 }) = result
848 {
849 assert_eq!(field, "embedding");
850 assert_eq!(vector, vec![1.0, 2.0, 3.0]);
851 assert_eq!(nprobe, 32);
852 assert_eq!(rerank, 3.0); } else {
854 panic!("Expected Ann query, got: {:?}", result);
855 }
856 }
857
858 #[test]
859 fn test_parse_sparse_query() {
860 let mut builder = SchemaBuilder::default();
861 let sparse = builder.add_text_field("sparse", true, true);
862 let schema = Arc::new(builder.build());
863 let tokenizers = Arc::new(TokenizerRegistry::default());
864
865 let parser = QueryLanguageParser::new(schema, vec![sparse], tokenizers);
866
867 let result = parser.parse_query_string("sparse:sparse({1: 0.5, 5: 0.3})");
869 assert!(result.is_ok(), "Failed to parse sparse query: {:?}", result);
870
871 if let Ok(ParsedQuery::Sparse { field, vector }) = result {
872 assert_eq!(field, "sparse");
873 assert_eq!(vector, vec![(1, 0.5), (5, 0.3)]);
874 } else {
875 panic!("Expected Sparse query, got: {:?}", result);
876 }
877 }
878
879 #[test]
880 fn test_parse_prefix_simple() {
881 let (schema, default_fields, tokenizers) = setup();
882 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
883
884 let result = parser.parse_query_string("title:abc*");
886 assert!(result.is_ok(), "Failed to parse prefix query: {:?}", result);
887 if let Ok(ParsedQuery::Prefix { field, prefix }) = result {
888 assert_eq!(field, Some("title".to_string()));
889 assert_eq!(prefix, "abc");
890 } else {
891 panic!("Expected Prefix query, got: {:?}", result);
892 }
893 }
894
895 #[test]
896 fn test_parse_prefix_url() {
897 let mut builder = SchemaBuilder::default();
898 let _site = builder.add_text_field("site", true, true);
899 let schema = Arc::new(builder.build());
900 let tokenizers = Arc::new(TokenizerRegistry::default());
901 let parser = QueryLanguageParser::new(schema, vec![], tokenizers);
902
903 let result = parser.parse_query_string("site:https://reddit.com/r/Transhumanism*");
905 assert!(
906 result.is_ok(),
907 "Failed to parse URL prefix query: {:?}",
908 result
909 );
910 if let Ok(ParsedQuery::Prefix { field, prefix }) = result {
911 assert_eq!(field, Some("site".to_string()));
912 assert_eq!(prefix, "https://reddit.com/r/Transhumanism");
913 } else {
914 panic!("Expected Prefix query, got: {:?}", result);
915 }
916 }
917
918 #[test]
919 fn test_parse_prefix_unqualified() {
920 let (schema, default_fields, tokenizers) = setup();
921 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
922
923 let result = parser.parse_query_string("transhuman*");
925 assert!(
926 result.is_ok(),
927 "Failed to parse unqualified prefix: {:?}",
928 result
929 );
930 if let Ok(ParsedQuery::Prefix { field, prefix }) = result {
931 assert_eq!(field, None);
932 assert_eq!(prefix, "transhuman");
933 } else {
934 panic!("Expected Prefix query, got: {:?}", result);
935 }
936 }
937
938 #[test]
939 fn test_prefix_query_builds() {
940 let (schema, default_fields, tokenizers) = setup();
941 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
942
943 let _query = parser.parse("title:abc*").unwrap();
945 }
946
947 #[test]
948 fn test_prefix_in_boolean() {
949 let (schema, default_fields, tokenizers) = setup();
950 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
951
952 let _query = parser.parse("rust AND title:abc*").unwrap();
954 }
955
956 #[test]
957 fn test_prefix_mixed_with_terms() {
958 let mut builder = SchemaBuilder::default();
959 let title = builder.add_text_field("title", true, true);
960 let _site = builder.add_text_field("site", true, true);
961 let schema = Arc::new(builder.build());
962 let tokenizers = Arc::new(TokenizerRegistry::default());
963 let parser = QueryLanguageParser::new(schema, vec![title], tokenizers);
964
965 let result =
967 parser.parse_query_string("site:https://reddit.com/r/Transhumanism* longevity drugs");
968 assert!(
969 result.is_ok(),
970 "Failed to parse mixed prefix+terms: {:?}",
971 result
972 );
973 if let Ok(ParsedQuery::Or(parts)) = &result {
975 assert_eq!(parts.len(), 3, "Expected 3 parts, got: {:?}", parts);
976 assert!(
977 matches!(&parts[0], ParsedQuery::And(v) if v.len() == 1 && matches!(&v[0], ParsedQuery::Prefix { .. }))
978 || matches!(&parts[0], ParsedQuery::Prefix { .. }),
979 "First part should be prefix: {:?}",
980 parts[0]
981 );
982 } else {
983 panic!("Expected Or query, got: {:?}", result);
984 }
985
986 let _query = parser
988 .parse("site:https://reddit.com/r/Transhumanism* longevity drugs")
989 .unwrap();
990 }
991
992 #[test]
993 fn test_implicit_or_plain_terms() {
994 let (schema, default_fields, tokenizers) = setup();
995 let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
996
997 let result = parser.parse_query_string("hello world");
999 assert!(result.is_ok(), "Failed to parse implicit OR: {:?}", result);
1000 if let Ok(ParsedQuery::Or(parts)) = &result {
1001 assert_eq!(parts.len(), 2);
1002 } else {
1003 panic!("Expected Or query, got: {:?}", result);
1004 }
1005 }
1006}