Skip to main content

hermes_core/dsl/ql/
mod.rs

1//! Query language parser using pest
2//!
3//! Supports:
4//! - Term queries: `rust` or `title:rust`
5//! - Phrase queries: `"hello world"` or `title:"hello world"`
6//! - Boolean operators: `AND`, `OR`, `NOT` (or `&&`, `||`, `-`)
7//! - Grouping: `(rust OR python) AND programming`
8//! - Default fields for unqualified terms
9
10use pest::Parser;
11use pest_derive::Parser;
12use std::sync::Arc;
13
14use super::query_field_router::{QueryFieldRouter, RoutingMode};
15use super::schema::{Field, Schema};
16use crate::query::{BooleanQuery, Query, TermQuery};
17use crate::tokenizer::{BoxedTokenizer, TokenizerRegistry};
18
19#[derive(Parser)]
20#[grammar = "dsl/ql/ql.pest"]
21struct QueryParser;
22
23/// Parsed query that can be converted to a Query trait object
24#[derive(Debug, Clone)]
25pub enum ParsedQuery {
26    Term {
27        field: Option<String>,
28        term: String,
29    },
30    Phrase {
31        field: Option<String>,
32        phrase: String,
33    },
34    /// Dense vector ANN query
35    Ann {
36        field: String,
37        vector: Vec<f32>,
38        nprobe: usize,
39        rerank: usize,
40    },
41    /// Sparse vector query
42    Sparse {
43        field: String,
44        vector: Vec<(u32, f32)>,
45    },
46    And(Vec<ParsedQuery>),
47    Or(Vec<ParsedQuery>),
48    Not(Box<ParsedQuery>),
49}
50
51/// Query language parser with schema awareness
52pub struct QueryLanguageParser {
53    schema: Arc<Schema>,
54    default_fields: Vec<Field>,
55    tokenizers: Arc<TokenizerRegistry>,
56    /// Optional query field router for routing queries based on regex patterns
57    field_router: Option<QueryFieldRouter>,
58}
59
60impl QueryLanguageParser {
61    pub fn new(
62        schema: Arc<Schema>,
63        default_fields: Vec<Field>,
64        tokenizers: Arc<TokenizerRegistry>,
65    ) -> Self {
66        Self {
67            schema,
68            default_fields,
69            tokenizers,
70            field_router: None,
71        }
72    }
73
74    /// Create a parser with a query field router
75    pub fn with_router(
76        schema: Arc<Schema>,
77        default_fields: Vec<Field>,
78        tokenizers: Arc<TokenizerRegistry>,
79        router: QueryFieldRouter,
80    ) -> Self {
81        Self {
82            schema,
83            default_fields,
84            tokenizers,
85            field_router: Some(router),
86        }
87    }
88
89    /// Set the query field router
90    pub fn set_router(&mut self, router: QueryFieldRouter) {
91        self.field_router = Some(router);
92    }
93
94    /// Get the query field router
95    pub fn router(&self) -> Option<&QueryFieldRouter> {
96        self.field_router.as_ref()
97    }
98
99    /// Parse a query string into a Query
100    ///
101    /// Supports query language syntax (field:term, AND, OR, NOT, grouping)
102    /// and plain text (tokenized and searched across default fields).
103    ///
104    /// If a query field router is configured, the query is first checked against
105    /// routing rules. If a rule matches:
106    /// - In exclusive mode: only the target field is queried with the substituted value
107    /// - In additional mode: both the target field and default fields are queried
108    pub fn parse(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
109        let query_str = query_str.trim();
110        if query_str.is_empty() {
111            return Err("Empty query".to_string());
112        }
113
114        // Check if query matches any routing rules
115        if let Some(router) = &self.field_router
116            && let Some(routed) = router.route(query_str)
117        {
118            return self.build_routed_query(
119                &routed.query,
120                &routed.target_field,
121                routed.mode,
122                query_str,
123            );
124        }
125
126        // No routing match - parse normally
127        self.parse_normal(query_str)
128    }
129
130    /// Build a query from a routed match
131    fn build_routed_query(
132        &self,
133        routed_query: &str,
134        target_field: &str,
135        mode: RoutingMode,
136        original_query: &str,
137    ) -> Result<Box<dyn Query>, String> {
138        // Validate target field exists
139        let _field_id = self
140            .schema
141            .get_field(target_field)
142            .ok_or_else(|| format!("Unknown target field: {}", target_field))?;
143
144        // Build query for the target field with the substituted value
145        let target_query = self.build_term_query(Some(target_field), routed_query)?;
146
147        match mode {
148            RoutingMode::Exclusive => {
149                // Only query the target field
150                Ok(target_query)
151            }
152            RoutingMode::Additional => {
153                // Query both target field and default fields
154                let mut bool_query = BooleanQuery::new();
155                bool_query = bool_query.should(target_query);
156
157                // Also parse the original query against default fields
158                if let Ok(default_query) = self.parse_normal(original_query) {
159                    bool_query = bool_query.should(default_query);
160                }
161
162                Ok(Box::new(bool_query))
163            }
164        }
165    }
166
167    /// Parse query without routing (normal parsing path)
168    fn parse_normal(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
169        // Try parsing as query language first
170        match self.parse_query_string(query_str) {
171            Ok(parsed) => self.build_query(&parsed),
172            Err(_) => {
173                // If grammar parsing fails, treat as plain text
174                // Split by whitespace and create OR of terms
175                self.parse_plain_text(query_str)
176            }
177        }
178    }
179
180    /// Parse plain text as implicit OR of tokenized terms
181    fn parse_plain_text(&self, text: &str) -> Result<Box<dyn Query>, String> {
182        if self.default_fields.is_empty() {
183            return Err("No default fields configured".to_string());
184        }
185
186        let tokenizer = self.get_tokenizer(self.default_fields[0]);
187        let tokens: Vec<String> = tokenizer
188            .tokenize(text)
189            .into_iter()
190            .map(|t| t.text.to_lowercase())
191            .collect();
192
193        if tokens.is_empty() {
194            return Err("No tokens in query".to_string());
195        }
196
197        let mut bool_query = BooleanQuery::new();
198        for token in &tokens {
199            for &field_id in &self.default_fields {
200                bool_query = bool_query.should(TermQuery::text(field_id, token));
201            }
202        }
203        Ok(Box::new(bool_query))
204    }
205
206    fn parse_query_string(&self, query_str: &str) -> Result<ParsedQuery, String> {
207        let pairs = QueryParser::parse(Rule::query, query_str)
208            .map_err(|e| format!("Parse error: {}", e))?;
209
210        let query_pair = pairs.into_iter().next().ok_or("No query found")?;
211
212        // query = { SOI ~ or_expr ~ EOI }
213        self.parse_or_expr(query_pair.into_inner().next().unwrap())
214    }
215
216    fn parse_or_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
217        let mut inner = pair.into_inner();
218        let first = self.parse_and_expr(inner.next().unwrap())?;
219
220        let rest: Vec<ParsedQuery> = inner
221            .filter(|p| p.as_rule() == Rule::and_expr)
222            .map(|p| self.parse_and_expr(p))
223            .collect::<Result<Vec<_>, _>>()?;
224
225        if rest.is_empty() {
226            Ok(first)
227        } else {
228            let mut all = vec![first];
229            all.extend(rest);
230            Ok(ParsedQuery::Or(all))
231        }
232    }
233
234    fn parse_and_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
235        let mut inner = pair.into_inner();
236        let first = self.parse_primary(inner.next().unwrap())?;
237
238        let rest: Vec<ParsedQuery> = inner
239            .filter(|p| p.as_rule() == Rule::primary)
240            .map(|p| self.parse_primary(p))
241            .collect::<Result<Vec<_>, _>>()?;
242
243        if rest.is_empty() {
244            Ok(first)
245        } else {
246            let mut all = vec![first];
247            all.extend(rest);
248            Ok(ParsedQuery::And(all))
249        }
250    }
251
252    fn parse_primary(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
253        let mut negated = false;
254        let mut inner_query = None;
255
256        for inner in pair.into_inner() {
257            match inner.as_rule() {
258                Rule::not_op => negated = true,
259                Rule::group => {
260                    let or_expr = inner.into_inner().next().unwrap();
261                    inner_query = Some(self.parse_or_expr(or_expr)?);
262                }
263                Rule::ann_query => {
264                    inner_query = Some(self.parse_ann_query(inner)?);
265                }
266                Rule::sparse_query => {
267                    inner_query = Some(self.parse_sparse_query(inner)?);
268                }
269                Rule::phrase_query => {
270                    inner_query = Some(self.parse_phrase_query(inner)?);
271                }
272                Rule::term_query => {
273                    inner_query = Some(self.parse_term_query(inner)?);
274                }
275                _ => {}
276            }
277        }
278
279        let query = inner_query.ok_or("No query in primary")?;
280
281        if negated {
282            Ok(ParsedQuery::Not(Box::new(query)))
283        } else {
284            Ok(query)
285        }
286    }
287
288    fn parse_term_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
289        let mut field = None;
290        let mut term = String::new();
291
292        for inner in pair.into_inner() {
293            match inner.as_rule() {
294                Rule::field_spec => {
295                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
296                }
297                Rule::term => {
298                    term = inner.as_str().to_string();
299                }
300                _ => {}
301            }
302        }
303
304        Ok(ParsedQuery::Term { field, term })
305    }
306
307    fn parse_phrase_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
308        let mut field = None;
309        let mut phrase = String::new();
310
311        for inner in pair.into_inner() {
312            match inner.as_rule() {
313                Rule::field_spec => {
314                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
315                }
316                Rule::quoted_string => {
317                    let s = inner.as_str();
318                    phrase = s[1..s.len() - 1].to_string();
319                }
320                _ => {}
321            }
322        }
323
324        Ok(ParsedQuery::Phrase { field, phrase })
325    }
326
327    /// Parse an ANN query: field:ann([1.0, 2.0, 3.0], nprobe=32, rerank=3)
328    fn parse_ann_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
329        let mut field = String::new();
330        let mut vector = Vec::new();
331        let mut nprobe = 32usize;
332        let mut rerank = 3usize;
333
334        for inner in pair.into_inner() {
335            match inner.as_rule() {
336                Rule::field_spec => {
337                    field = inner.into_inner().next().unwrap().as_str().to_string();
338                }
339                Rule::vector_array => {
340                    for num in inner.into_inner() {
341                        if num.as_rule() == Rule::number
342                            && let Ok(v) = num.as_str().parse::<f32>()
343                        {
344                            vector.push(v);
345                        }
346                    }
347                }
348                Rule::ann_params => {
349                    for param in inner.into_inner() {
350                        if param.as_rule() == Rule::ann_param {
351                            // ann_param = { ("nprobe" | "rerank") ~ "=" ~ number }
352                            let param_str = param.as_str();
353                            if let Some(eq_pos) = param_str.find('=') {
354                                let name = &param_str[..eq_pos];
355                                let value = &param_str[eq_pos + 1..];
356                                let val: usize = value.parse().unwrap_or(0);
357                                match name {
358                                    "nprobe" => nprobe = val,
359                                    "rerank" => rerank = val,
360                                    _ => {}
361                                }
362                            }
363                        }
364                    }
365                }
366                _ => {}
367            }
368        }
369
370        Ok(ParsedQuery::Ann {
371            field,
372            vector,
373            nprobe,
374            rerank,
375        })
376    }
377
378    /// Parse a sparse vector query: field:sparse({1: 0.5, 5: 0.3})
379    fn parse_sparse_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
380        let mut field = String::new();
381        let mut vector = Vec::new();
382
383        for inner in pair.into_inner() {
384            match inner.as_rule() {
385                Rule::field_spec => {
386                    field = inner.into_inner().next().unwrap().as_str().to_string();
387                }
388                Rule::sparse_map => {
389                    for entry in inner.into_inner() {
390                        if entry.as_rule() == Rule::sparse_entry {
391                            let mut entry_inner = entry.into_inner();
392                            if let (Some(idx), Some(weight)) =
393                                (entry_inner.next(), entry_inner.next())
394                                && let (Ok(i), Ok(w)) =
395                                    (idx.as_str().parse::<u32>(), weight.as_str().parse::<f32>())
396                            {
397                                vector.push((i, w));
398                            }
399                        }
400                    }
401                }
402                _ => {}
403            }
404        }
405
406        Ok(ParsedQuery::Sparse { field, vector })
407    }
408
409    fn build_query(&self, parsed: &ParsedQuery) -> Result<Box<dyn Query>, String> {
410        use crate::query::{DenseVectorQuery, SparseVectorQuery};
411
412        match parsed {
413            ParsedQuery::Term { field, term } => self.build_term_query(field.as_deref(), term),
414            ParsedQuery::Phrase { field, phrase } => {
415                self.build_phrase_query(field.as_deref(), phrase)
416            }
417            ParsedQuery::Ann {
418                field,
419                vector,
420                nprobe,
421                rerank,
422            } => {
423                let field_id = self
424                    .schema
425                    .get_field(field)
426                    .ok_or_else(|| format!("Unknown field: {}", field))?;
427                let query = DenseVectorQuery::new(field_id, vector.clone())
428                    .with_nprobe(*nprobe)
429                    .with_rerank_factor(*rerank);
430                Ok(Box::new(query))
431            }
432            ParsedQuery::Sparse { field, vector } => {
433                let field_id = self
434                    .schema
435                    .get_field(field)
436                    .ok_or_else(|| format!("Unknown field: {}", field))?;
437                let query = SparseVectorQuery::new(field_id, vector.clone());
438                Ok(Box::new(query))
439            }
440            ParsedQuery::And(queries) => {
441                let mut bool_query = BooleanQuery::new();
442                for q in queries {
443                    bool_query = bool_query.must(self.build_query(q)?);
444                }
445                Ok(Box::new(bool_query))
446            }
447            ParsedQuery::Or(queries) => {
448                let mut bool_query = BooleanQuery::new();
449                for q in queries {
450                    bool_query = bool_query.should(self.build_query(q)?);
451                }
452                Ok(Box::new(bool_query))
453            }
454            ParsedQuery::Not(inner) => {
455                // NOT query needs a context - wrap in a match-all with must_not
456                let mut bool_query = BooleanQuery::new();
457                bool_query = bool_query.must_not(self.build_query(inner)?);
458                Ok(Box::new(bool_query))
459            }
460        }
461    }
462
463    fn build_term_query(&self, field: Option<&str>, term: &str) -> Result<Box<dyn Query>, String> {
464        if let Some(field_name) = field {
465            // Field-qualified term: tokenize using field's tokenizer
466            let field_id = self
467                .schema
468                .get_field(field_name)
469                .ok_or_else(|| format!("Unknown field: {}", field_name))?;
470            let tokenizer = self.get_tokenizer(field_id);
471            let tokens: Vec<String> = tokenizer
472                .tokenize(term)
473                .into_iter()
474                .map(|t| t.text.to_lowercase())
475                .collect();
476
477            if tokens.is_empty() {
478                return Err("No tokens in term".to_string());
479            }
480
481            if tokens.len() == 1 {
482                Ok(Box::new(TermQuery::text(field_id, &tokens[0])))
483            } else {
484                // Multiple tokens from single term - AND them together
485                let mut bool_query = BooleanQuery::new();
486                for token in &tokens {
487                    bool_query = bool_query.must(TermQuery::text(field_id, token));
488                }
489                Ok(Box::new(bool_query))
490            }
491        } else if !self.default_fields.is_empty() {
492            // Unqualified term: tokenize and search across default fields
493            let tokenizer = self.get_tokenizer(self.default_fields[0]);
494            let tokens: Vec<String> = tokenizer
495                .tokenize(term)
496                .into_iter()
497                .map(|t| t.text.to_lowercase())
498                .collect();
499
500            if tokens.is_empty() {
501                return Err("No tokens in term".to_string());
502            }
503
504            // Build SHOULD query across all default fields for each token
505            let mut bool_query = BooleanQuery::new();
506            for token in &tokens {
507                for &field_id in &self.default_fields {
508                    bool_query = bool_query.should(TermQuery::text(field_id, token));
509                }
510            }
511            Ok(Box::new(bool_query))
512        } else {
513            Err("No field specified and no default fields configured".to_string())
514        }
515    }
516
517    fn build_phrase_query(
518        &self,
519        field: Option<&str>,
520        phrase: &str,
521    ) -> Result<Box<dyn Query>, String> {
522        // For phrase queries, tokenize and create AND query of terms
523        let field_id = if let Some(field_name) = field {
524            self.schema
525                .get_field(field_name)
526                .ok_or_else(|| format!("Unknown field: {}", field_name))?
527        } else if !self.default_fields.is_empty() {
528            self.default_fields[0]
529        } else {
530            return Err("No field specified and no default fields configured".to_string());
531        };
532
533        let tokenizer = self.get_tokenizer(field_id);
534        let tokens: Vec<String> = tokenizer
535            .tokenize(phrase)
536            .into_iter()
537            .map(|t| t.text.to_lowercase())
538            .collect();
539
540        if tokens.is_empty() {
541            return Err("No tokens in phrase".to_string());
542        }
543
544        if tokens.len() == 1 {
545            return Ok(Box::new(TermQuery::text(field_id, &tokens[0])));
546        }
547
548        // Create AND query for all tokens (simplified phrase matching)
549        let mut bool_query = BooleanQuery::new();
550        for token in &tokens {
551            bool_query = bool_query.must(TermQuery::text(field_id, token));
552        }
553
554        // If no field specified and multiple default fields, wrap in OR
555        if field.is_none() && self.default_fields.len() > 1 {
556            let mut outer = BooleanQuery::new();
557            for &f in &self.default_fields {
558                let tokenizer = self.get_tokenizer(f);
559                let tokens: Vec<String> = tokenizer
560                    .tokenize(phrase)
561                    .into_iter()
562                    .map(|t| t.text.to_lowercase())
563                    .collect();
564
565                let mut field_query = BooleanQuery::new();
566                for token in &tokens {
567                    field_query = field_query.must(TermQuery::text(f, token));
568                }
569                outer = outer.should(field_query);
570            }
571            return Ok(Box::new(outer));
572        }
573
574        Ok(Box::new(bool_query))
575    }
576
577    fn get_tokenizer(&self, field: Field) -> BoxedTokenizer {
578        // Get tokenizer name from schema field entry, fallback to "default"
579        let tokenizer_name = self
580            .schema
581            .get_field_entry(field)
582            .and_then(|entry| entry.tokenizer.as_deref())
583            .unwrap_or("default");
584
585        self.tokenizers
586            .get(tokenizer_name)
587            .unwrap_or_else(|| Box::new(crate::tokenizer::LowercaseTokenizer))
588    }
589}
590
591#[cfg(test)]
592mod tests {
593    use super::*;
594    use crate::dsl::SchemaBuilder;
595    use crate::tokenizer::TokenizerRegistry;
596
597    fn setup() -> (Arc<Schema>, Vec<Field>, Arc<TokenizerRegistry>) {
598        let mut builder = SchemaBuilder::default();
599        let title = builder.add_text_field("title", true, true);
600        let body = builder.add_text_field("body", true, true);
601        let schema = Arc::new(builder.build());
602        let tokenizers = Arc::new(TokenizerRegistry::default());
603        (schema, vec![title, body], tokenizers)
604    }
605
606    #[test]
607    fn test_simple_term() {
608        let (schema, default_fields, tokenizers) = setup();
609        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
610
611        // Should parse without error - creates BooleanQuery across default fields
612        let _query = parser.parse("rust").unwrap();
613    }
614
615    #[test]
616    fn test_field_term() {
617        let (schema, default_fields, tokenizers) = setup();
618        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
619
620        // Should parse field:term syntax
621        let _query = parser.parse("title:rust").unwrap();
622    }
623
624    #[test]
625    fn test_boolean_and() {
626        let (schema, default_fields, tokenizers) = setup();
627        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
628
629        // Should parse AND boolean query
630        let _query = parser.parse("rust AND programming").unwrap();
631    }
632
633    #[test]
634    fn test_match_query() {
635        let (schema, default_fields, tokenizers) = setup();
636        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
637
638        // Should tokenize and create boolean query
639        let _query = parser.parse("hello world").unwrap();
640    }
641
642    #[test]
643    fn test_phrase_query() {
644        let (schema, default_fields, tokenizers) = setup();
645        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
646
647        // Should parse quoted phrase
648        let _query = parser.parse("\"hello world\"").unwrap();
649    }
650
651    #[test]
652    fn test_boolean_or() {
653        let (schema, default_fields, tokenizers) = setup();
654        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
655
656        // Should parse OR boolean query
657        let _query = parser.parse("rust OR python").unwrap();
658    }
659
660    #[test]
661    fn test_complex_query() {
662        let (schema, default_fields, tokenizers) = setup();
663        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
664
665        // Should parse complex boolean with grouping
666        let _query = parser.parse("(rust OR python) AND programming").unwrap();
667    }
668
669    #[test]
670    fn test_router_exclusive_mode() {
671        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
672
673        let mut builder = SchemaBuilder::default();
674        let _title = builder.add_text_field("title", true, true);
675        let _uri = builder.add_text_field("uri", true, true);
676        let schema = Arc::new(builder.build());
677        let tokenizers = Arc::new(TokenizerRegistry::default());
678
679        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
680            pattern: r"^doi:(10\.\d{4,}/[^\s]+)$".to_string(),
681            substitution: "doi://{1}".to_string(),
682            target_field: "uri".to_string(),
683            mode: RoutingMode::Exclusive,
684        }])
685        .unwrap();
686
687        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
688
689        // Should route DOI query to uri field
690        let _query = parser.parse("doi:10.1234/test.123").unwrap();
691    }
692
693    #[test]
694    fn test_router_additional_mode() {
695        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
696
697        let mut builder = SchemaBuilder::default();
698        let title = builder.add_text_field("title", true, true);
699        let _uri = builder.add_text_field("uri", true, true);
700        let schema = Arc::new(builder.build());
701        let tokenizers = Arc::new(TokenizerRegistry::default());
702
703        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
704            pattern: r"#(\d+)".to_string(),
705            substitution: "{1}".to_string(),
706            target_field: "uri".to_string(),
707            mode: RoutingMode::Additional,
708        }])
709        .unwrap();
710
711        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
712
713        // Should route to both uri field and default fields
714        let _query = parser.parse("#42").unwrap();
715    }
716
717    #[test]
718    fn test_router_no_match_falls_through() {
719        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
720
721        let mut builder = SchemaBuilder::default();
722        let title = builder.add_text_field("title", true, true);
723        let _uri = builder.add_text_field("uri", true, true);
724        let schema = Arc::new(builder.build());
725        let tokenizers = Arc::new(TokenizerRegistry::default());
726
727        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
728            pattern: r"^doi:".to_string(),
729            substitution: "{0}".to_string(),
730            target_field: "uri".to_string(),
731            mode: RoutingMode::Exclusive,
732        }])
733        .unwrap();
734
735        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
736
737        // Should NOT match and fall through to normal parsing
738        let _query = parser.parse("rust programming").unwrap();
739    }
740
741    #[test]
742    fn test_router_invalid_target_field() {
743        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
744
745        let mut builder = SchemaBuilder::default();
746        let _title = builder.add_text_field("title", true, true);
747        let schema = Arc::new(builder.build());
748        let tokenizers = Arc::new(TokenizerRegistry::default());
749
750        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
751            pattern: r"test".to_string(),
752            substitution: "{0}".to_string(),
753            target_field: "nonexistent".to_string(),
754            mode: RoutingMode::Exclusive,
755        }])
756        .unwrap();
757
758        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
759
760        // Should fail because target field doesn't exist
761        let result = parser.parse("test");
762        assert!(result.is_err());
763        let err = result.err().unwrap();
764        assert!(err.contains("Unknown target field"));
765    }
766
767    #[test]
768    fn test_parse_ann_query() {
769        let mut builder = SchemaBuilder::default();
770        let embedding = builder.add_dense_vector_field("embedding", 128, true, true);
771        let schema = Arc::new(builder.build());
772        let tokenizers = Arc::new(TokenizerRegistry::default());
773
774        let parser = QueryLanguageParser::new(schema, vec![embedding], tokenizers);
775
776        // Parse ANN query
777        let result = parser.parse_query_string("embedding:ann([1.0, 2.0, 3.0], nprobe=32)");
778        assert!(result.is_ok(), "Failed to parse ANN query: {:?}", result);
779
780        if let Ok(ParsedQuery::Ann {
781            field,
782            vector,
783            nprobe,
784            rerank,
785        }) = result
786        {
787            assert_eq!(field, "embedding");
788            assert_eq!(vector, vec![1.0, 2.0, 3.0]);
789            assert_eq!(nprobe, 32);
790            assert_eq!(rerank, 3); // default
791        } else {
792            panic!("Expected Ann query, got: {:?}", result);
793        }
794    }
795
796    #[test]
797    fn test_parse_sparse_query() {
798        let mut builder = SchemaBuilder::default();
799        let sparse = builder.add_text_field("sparse", true, true);
800        let schema = Arc::new(builder.build());
801        let tokenizers = Arc::new(TokenizerRegistry::default());
802
803        let parser = QueryLanguageParser::new(schema, vec![sparse], tokenizers);
804
805        // Parse sparse query
806        let result = parser.parse_query_string("sparse:sparse({1: 0.5, 5: 0.3})");
807        assert!(result.is_ok(), "Failed to parse sparse query: {:?}", result);
808
809        if let Ok(ParsedQuery::Sparse { field, vector }) = result {
810            assert_eq!(field, "sparse");
811            assert_eq!(vector, vec![(1, 0.5), (5, 0.3)]);
812        } else {
813            panic!("Expected Sparse query, got: {:?}", result);
814        }
815    }
816}