hermes_core/dsl/ql/
mod.rs

1//! Query language parser using pest
2//!
3//! Supports:
4//! - Term queries: `rust` or `title:rust`
5//! - Phrase queries: `"hello world"` or `title:"hello world"`
6//! - Boolean operators: `AND`, `OR`, `NOT` (or `&&`, `||`, `-`)
7//! - Grouping: `(rust OR python) AND programming`
8//! - Default fields for unqualified terms
9
10use pest::Parser;
11use pest_derive::Parser;
12use std::sync::Arc;
13
14use super::query_field_router::{QueryFieldRouter, RoutingMode};
15use super::schema::{Field, Schema};
16use crate::query::{BooleanQuery, Query, TermQuery};
17use crate::tokenizer::{BoxedTokenizer, TokenizerRegistry};
18
19#[derive(Parser)]
20#[grammar = "dsl/ql/ql.pest"]
21struct QueryParser;
22
23/// Parsed query that can be converted to a Query trait object
24#[derive(Debug, Clone)]
25pub enum ParsedQuery {
26    Term {
27        field: Option<String>,
28        term: String,
29    },
30    Phrase {
31        field: Option<String>,
32        phrase: String,
33    },
34    /// Dense vector KNN query
35    Knn {
36        field: String,
37        vector: Vec<f32>,
38        k: usize,
39        nprobe: usize,
40        rerank: usize,
41    },
42    /// Sparse vector query
43    Sparse {
44        field: String,
45        indices: Vec<u32>,
46        weights: Vec<f32>,
47        k: usize,
48    },
49    And(Vec<ParsedQuery>),
50    Or(Vec<ParsedQuery>),
51    Not(Box<ParsedQuery>),
52}
53
54/// Query language parser with schema awareness
55pub struct QueryLanguageParser {
56    schema: Arc<Schema>,
57    default_fields: Vec<Field>,
58    tokenizers: Arc<TokenizerRegistry>,
59    /// Optional query field router for routing queries based on regex patterns
60    field_router: Option<QueryFieldRouter>,
61}
62
63impl QueryLanguageParser {
64    pub fn new(
65        schema: Arc<Schema>,
66        default_fields: Vec<Field>,
67        tokenizers: Arc<TokenizerRegistry>,
68    ) -> Self {
69        Self {
70            schema,
71            default_fields,
72            tokenizers,
73            field_router: None,
74        }
75    }
76
77    /// Create a parser with a query field router
78    pub fn with_router(
79        schema: Arc<Schema>,
80        default_fields: Vec<Field>,
81        tokenizers: Arc<TokenizerRegistry>,
82        router: QueryFieldRouter,
83    ) -> Self {
84        Self {
85            schema,
86            default_fields,
87            tokenizers,
88            field_router: Some(router),
89        }
90    }
91
92    /// Set the query field router
93    pub fn set_router(&mut self, router: QueryFieldRouter) {
94        self.field_router = Some(router);
95    }
96
97    /// Get the query field router
98    pub fn router(&self) -> Option<&QueryFieldRouter> {
99        self.field_router.as_ref()
100    }
101
102    /// Parse a query string into a Query
103    ///
104    /// Supports query language syntax (field:term, AND, OR, NOT, grouping)
105    /// and plain text (tokenized and searched across default fields).
106    ///
107    /// If a query field router is configured, the query is first checked against
108    /// routing rules. If a rule matches:
109    /// - In exclusive mode: only the target field is queried with the substituted value
110    /// - In additional mode: both the target field and default fields are queried
111    pub fn parse(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
112        let query_str = query_str.trim();
113        if query_str.is_empty() {
114            return Err("Empty query".to_string());
115        }
116
117        // Check if query matches any routing rules
118        if let Some(router) = &self.field_router
119            && let Some(routed) = router.route(query_str)
120        {
121            return self.build_routed_query(
122                &routed.query,
123                &routed.target_field,
124                routed.mode,
125                query_str,
126            );
127        }
128
129        // No routing match - parse normally
130        self.parse_normal(query_str)
131    }
132
133    /// Build a query from a routed match
134    fn build_routed_query(
135        &self,
136        routed_query: &str,
137        target_field: &str,
138        mode: RoutingMode,
139        original_query: &str,
140    ) -> Result<Box<dyn Query>, String> {
141        // Validate target field exists
142        let _field_id = self
143            .schema
144            .get_field(target_field)
145            .ok_or_else(|| format!("Unknown target field: {}", target_field))?;
146
147        // Build query for the target field with the substituted value
148        let target_query = self.build_term_query(Some(target_field), routed_query)?;
149
150        match mode {
151            RoutingMode::Exclusive => {
152                // Only query the target field
153                Ok(target_query)
154            }
155            RoutingMode::Additional => {
156                // Query both target field and default fields
157                let mut bool_query = BooleanQuery::new();
158                bool_query = bool_query.should(target_query);
159
160                // Also parse the original query against default fields
161                if let Ok(default_query) = self.parse_normal(original_query) {
162                    bool_query = bool_query.should(default_query);
163                }
164
165                Ok(Box::new(bool_query))
166            }
167        }
168    }
169
170    /// Parse query without routing (normal parsing path)
171    fn parse_normal(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
172        // Try parsing as query language first
173        match self.parse_query_string(query_str) {
174            Ok(parsed) => self.build_query(&parsed),
175            Err(_) => {
176                // If grammar parsing fails, treat as plain text
177                // Split by whitespace and create OR of terms
178                self.parse_plain_text(query_str)
179            }
180        }
181    }
182
183    /// Parse plain text as implicit OR of tokenized terms
184    fn parse_plain_text(&self, text: &str) -> Result<Box<dyn Query>, String> {
185        if self.default_fields.is_empty() {
186            return Err("No default fields configured".to_string());
187        }
188
189        let tokenizer = self.get_tokenizer(self.default_fields[0]);
190        let tokens: Vec<String> = tokenizer
191            .tokenize(text)
192            .into_iter()
193            .map(|t| t.text.to_lowercase())
194            .collect();
195
196        if tokens.is_empty() {
197            return Err("No tokens in query".to_string());
198        }
199
200        let mut bool_query = BooleanQuery::new();
201        for token in &tokens {
202            for &field_id in &self.default_fields {
203                bool_query = bool_query.should(TermQuery::text(field_id, token));
204            }
205        }
206        Ok(Box::new(bool_query))
207    }
208
209    fn parse_query_string(&self, query_str: &str) -> Result<ParsedQuery, String> {
210        let pairs = QueryParser::parse(Rule::query, query_str)
211            .map_err(|e| format!("Parse error: {}", e))?;
212
213        let query_pair = pairs.into_iter().next().ok_or("No query found")?;
214
215        // query = { SOI ~ or_expr ~ EOI }
216        self.parse_or_expr(query_pair.into_inner().next().unwrap())
217    }
218
219    fn parse_or_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
220        let mut inner = pair.into_inner();
221        let first = self.parse_and_expr(inner.next().unwrap())?;
222
223        let rest: Vec<ParsedQuery> = inner
224            .filter(|p| p.as_rule() == Rule::and_expr)
225            .map(|p| self.parse_and_expr(p))
226            .collect::<Result<Vec<_>, _>>()?;
227
228        if rest.is_empty() {
229            Ok(first)
230        } else {
231            let mut all = vec![first];
232            all.extend(rest);
233            Ok(ParsedQuery::Or(all))
234        }
235    }
236
237    fn parse_and_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
238        let mut inner = pair.into_inner();
239        let first = self.parse_primary(inner.next().unwrap())?;
240
241        let rest: Vec<ParsedQuery> = inner
242            .filter(|p| p.as_rule() == Rule::primary)
243            .map(|p| self.parse_primary(p))
244            .collect::<Result<Vec<_>, _>>()?;
245
246        if rest.is_empty() {
247            Ok(first)
248        } else {
249            let mut all = vec![first];
250            all.extend(rest);
251            Ok(ParsedQuery::And(all))
252        }
253    }
254
255    fn parse_primary(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
256        let mut negated = false;
257        let mut inner_query = None;
258
259        for inner in pair.into_inner() {
260            match inner.as_rule() {
261                Rule::not_op => negated = true,
262                Rule::group => {
263                    let or_expr = inner.into_inner().next().unwrap();
264                    inner_query = Some(self.parse_or_expr(or_expr)?);
265                }
266                Rule::ann_query => {
267                    inner_query = Some(self.parse_ann_query(inner)?);
268                }
269                Rule::sparse_query => {
270                    inner_query = Some(self.parse_sparse_query(inner)?);
271                }
272                Rule::phrase_query => {
273                    inner_query = Some(self.parse_phrase_query(inner)?);
274                }
275                Rule::term_query => {
276                    inner_query = Some(self.parse_term_query(inner)?);
277                }
278                _ => {}
279            }
280        }
281
282        let query = inner_query.ok_or("No query in primary")?;
283
284        if negated {
285            Ok(ParsedQuery::Not(Box::new(query)))
286        } else {
287            Ok(query)
288        }
289    }
290
291    fn parse_term_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
292        let mut field = None;
293        let mut term = String::new();
294
295        for inner in pair.into_inner() {
296            match inner.as_rule() {
297                Rule::field_spec => {
298                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
299                }
300                Rule::term => {
301                    term = inner.as_str().to_string();
302                }
303                _ => {}
304            }
305        }
306
307        Ok(ParsedQuery::Term { field, term })
308    }
309
310    fn parse_phrase_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
311        let mut field = None;
312        let mut phrase = String::new();
313
314        for inner in pair.into_inner() {
315            match inner.as_rule() {
316                Rule::field_spec => {
317                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
318                }
319                Rule::quoted_string => {
320                    let s = inner.as_str();
321                    phrase = s[1..s.len() - 1].to_string();
322                }
323                _ => {}
324            }
325        }
326
327        Ok(ParsedQuery::Phrase { field, phrase })
328    }
329
330    /// Parse an ANN query: field:ann([1.0, 2.0, 3.0], k=10, nprobe=32)
331    fn parse_ann_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
332        let mut field = String::new();
333        let mut vector = Vec::new();
334        let mut k = 10usize;
335        let mut nprobe = 32usize;
336        let mut rerank = 3usize;
337
338        for inner in pair.into_inner() {
339            match inner.as_rule() {
340                Rule::field_spec => {
341                    field = inner.into_inner().next().unwrap().as_str().to_string();
342                }
343                Rule::vector_array => {
344                    for num in inner.into_inner() {
345                        if num.as_rule() == Rule::number
346                            && let Ok(v) = num.as_str().parse::<f32>()
347                        {
348                            vector.push(v);
349                        }
350                    }
351                }
352                Rule::ann_params => {
353                    for param in inner.into_inner() {
354                        match param.as_rule() {
355                            Rule::number => {
356                                // Simple k value
357                                if let Ok(v) = param.as_str().parse::<usize>() {
358                                    k = v;
359                                }
360                            }
361                            Rule::ann_param => {
362                                let mut param_inner = param.into_inner();
363                                if let (Some(name), Some(value)) =
364                                    (param_inner.next(), param_inner.next())
365                                {
366                                    let val: usize = value.as_str().parse().unwrap_or(0);
367                                    match name.as_str() {
368                                        "k" => k = val,
369                                        "nprobe" => nprobe = val,
370                                        "rerank" => rerank = val,
371                                        _ => {}
372                                    }
373                                }
374                            }
375                            _ => {}
376                        }
377                    }
378                }
379                _ => {}
380            }
381        }
382
383        Ok(ParsedQuery::Knn {
384            field,
385            vector,
386            k,
387            nprobe,
388            rerank,
389        })
390    }
391
392    /// Parse a sparse vector query: field:sparse({1: 0.5, 5: 0.3}, k=10)
393    fn parse_sparse_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
394        let mut field = String::new();
395        let mut indices = Vec::new();
396        let mut weights = Vec::new();
397        let mut k = 10usize;
398
399        for inner in pair.into_inner() {
400            match inner.as_rule() {
401                Rule::field_spec => {
402                    field = inner.into_inner().next().unwrap().as_str().to_string();
403                }
404                Rule::sparse_map => {
405                    for entry in inner.into_inner() {
406                        if entry.as_rule() == Rule::sparse_entry {
407                            let mut entry_inner = entry.into_inner();
408                            if let (Some(idx), Some(weight)) =
409                                (entry_inner.next(), entry_inner.next())
410                                && let (Ok(i), Ok(w)) =
411                                    (idx.as_str().parse::<u32>(), weight.as_str().parse::<f32>())
412                            {
413                                indices.push(i);
414                                weights.push(w);
415                            }
416                        }
417                    }
418                }
419                Rule::ann_params => {
420                    for param in inner.into_inner() {
421                        if param.as_rule() == Rule::number {
422                            if let Ok(v) = param.as_str().parse::<usize>() {
423                                k = v;
424                            }
425                        } else if param.as_rule() == Rule::ann_param {
426                            let mut param_inner = param.into_inner();
427                            if let (Some(name), Some(value)) =
428                                (param_inner.next(), param_inner.next())
429                                && name.as_str() == "k"
430                            {
431                                k = value.as_str().parse().unwrap_or(10);
432                            }
433                        }
434                    }
435                }
436                _ => {}
437            }
438        }
439
440        Ok(ParsedQuery::Sparse {
441            field,
442            indices,
443            weights,
444            k,
445        })
446    }
447
448    fn build_query(&self, parsed: &ParsedQuery) -> Result<Box<dyn Query>, String> {
449        use crate::query::{DenseVectorQuery, SparseVectorQuery};
450
451        match parsed {
452            ParsedQuery::Term { field, term } => self.build_term_query(field.as_deref(), term),
453            ParsedQuery::Phrase { field, phrase } => {
454                self.build_phrase_query(field.as_deref(), phrase)
455            }
456            ParsedQuery::Knn {
457                field,
458                vector,
459                k,
460                nprobe,
461                rerank,
462            } => {
463                let field_id = self
464                    .schema
465                    .get_field(field)
466                    .ok_or_else(|| format!("Unknown field: {}", field))?;
467                let query = DenseVectorQuery::new(field_id, vector.clone(), *k)
468                    .with_nprobe(*nprobe)
469                    .with_rerank_factor(*rerank);
470                Ok(Box::new(query))
471            }
472            ParsedQuery::Sparse {
473                field,
474                indices,
475                weights,
476                k,
477            } => {
478                let field_id = self
479                    .schema
480                    .get_field(field)
481                    .ok_or_else(|| format!("Unknown field: {}", field))?;
482                let query = SparseVectorQuery::new(field_id, indices.clone(), weights.clone(), *k);
483                Ok(Box::new(query))
484            }
485            ParsedQuery::And(queries) => {
486                let mut bool_query = BooleanQuery::new();
487                for q in queries {
488                    bool_query = bool_query.must(self.build_query(q)?);
489                }
490                Ok(Box::new(bool_query))
491            }
492            ParsedQuery::Or(queries) => {
493                let mut bool_query = BooleanQuery::new();
494                for q in queries {
495                    bool_query = bool_query.should(self.build_query(q)?);
496                }
497                Ok(Box::new(bool_query))
498            }
499            ParsedQuery::Not(inner) => {
500                // NOT query needs a context - wrap in a match-all with must_not
501                let mut bool_query = BooleanQuery::new();
502                bool_query = bool_query.must_not(self.build_query(inner)?);
503                Ok(Box::new(bool_query))
504            }
505        }
506    }
507
508    fn build_term_query(&self, field: Option<&str>, term: &str) -> Result<Box<dyn Query>, String> {
509        if let Some(field_name) = field {
510            // Field-qualified term: tokenize using field's tokenizer
511            let field_id = self
512                .schema
513                .get_field(field_name)
514                .ok_or_else(|| format!("Unknown field: {}", field_name))?;
515            let tokenizer = self.get_tokenizer(field_id);
516            let tokens: Vec<String> = tokenizer
517                .tokenize(term)
518                .into_iter()
519                .map(|t| t.text.to_lowercase())
520                .collect();
521
522            if tokens.is_empty() {
523                return Err("No tokens in term".to_string());
524            }
525
526            if tokens.len() == 1 {
527                Ok(Box::new(TermQuery::text(field_id, &tokens[0])))
528            } else {
529                // Multiple tokens from single term - AND them together
530                let mut bool_query = BooleanQuery::new();
531                for token in &tokens {
532                    bool_query = bool_query.must(TermQuery::text(field_id, token));
533                }
534                Ok(Box::new(bool_query))
535            }
536        } else if !self.default_fields.is_empty() {
537            // Unqualified term: tokenize and search across default fields
538            let tokenizer = self.get_tokenizer(self.default_fields[0]);
539            let tokens: Vec<String> = tokenizer
540                .tokenize(term)
541                .into_iter()
542                .map(|t| t.text.to_lowercase())
543                .collect();
544
545            if tokens.is_empty() {
546                return Err("No tokens in term".to_string());
547            }
548
549            // Build SHOULD query across all default fields for each token
550            let mut bool_query = BooleanQuery::new();
551            for token in &tokens {
552                for &field_id in &self.default_fields {
553                    bool_query = bool_query.should(TermQuery::text(field_id, token));
554                }
555            }
556            Ok(Box::new(bool_query))
557        } else {
558            Err("No field specified and no default fields configured".to_string())
559        }
560    }
561
562    fn build_phrase_query(
563        &self,
564        field: Option<&str>,
565        phrase: &str,
566    ) -> Result<Box<dyn Query>, String> {
567        // For phrase queries, tokenize and create AND query of terms
568        let field_id = if let Some(field_name) = field {
569            self.schema
570                .get_field(field_name)
571                .ok_or_else(|| format!("Unknown field: {}", field_name))?
572        } else if !self.default_fields.is_empty() {
573            self.default_fields[0]
574        } else {
575            return Err("No field specified and no default fields configured".to_string());
576        };
577
578        let tokenizer = self.get_tokenizer(field_id);
579        let tokens: Vec<String> = tokenizer
580            .tokenize(phrase)
581            .into_iter()
582            .map(|t| t.text.to_lowercase())
583            .collect();
584
585        if tokens.is_empty() {
586            return Err("No tokens in phrase".to_string());
587        }
588
589        if tokens.len() == 1 {
590            return Ok(Box::new(TermQuery::text(field_id, &tokens[0])));
591        }
592
593        // Create AND query for all tokens (simplified phrase matching)
594        let mut bool_query = BooleanQuery::new();
595        for token in &tokens {
596            bool_query = bool_query.must(TermQuery::text(field_id, token));
597        }
598
599        // If no field specified and multiple default fields, wrap in OR
600        if field.is_none() && self.default_fields.len() > 1 {
601            let mut outer = BooleanQuery::new();
602            for &f in &self.default_fields {
603                let tokenizer = self.get_tokenizer(f);
604                let tokens: Vec<String> = tokenizer
605                    .tokenize(phrase)
606                    .into_iter()
607                    .map(|t| t.text.to_lowercase())
608                    .collect();
609
610                let mut field_query = BooleanQuery::new();
611                for token in &tokens {
612                    field_query = field_query.must(TermQuery::text(f, token));
613                }
614                outer = outer.should(field_query);
615            }
616            return Ok(Box::new(outer));
617        }
618
619        Ok(Box::new(bool_query))
620    }
621
622    fn get_tokenizer(&self, field: Field) -> BoxedTokenizer {
623        // Get tokenizer name from schema field entry, fallback to "default"
624        let tokenizer_name = self
625            .schema
626            .get_field_entry(field)
627            .and_then(|entry| entry.tokenizer.as_deref())
628            .unwrap_or("default");
629
630        self.tokenizers
631            .get(tokenizer_name)
632            .unwrap_or_else(|| Box::new(crate::tokenizer::LowercaseTokenizer))
633    }
634}
635
636#[cfg(test)]
637mod tests {
638    use super::*;
639    use crate::dsl::SchemaBuilder;
640    use crate::tokenizer::TokenizerRegistry;
641
642    fn setup() -> (Arc<Schema>, Vec<Field>, Arc<TokenizerRegistry>) {
643        let mut builder = SchemaBuilder::default();
644        let title = builder.add_text_field("title", true, true);
645        let body = builder.add_text_field("body", true, true);
646        let schema = Arc::new(builder.build());
647        let tokenizers = Arc::new(TokenizerRegistry::default());
648        (schema, vec![title, body], tokenizers)
649    }
650
651    #[test]
652    fn test_simple_term() {
653        let (schema, default_fields, tokenizers) = setup();
654        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
655
656        // Should parse without error - creates BooleanQuery across default fields
657        let _query = parser.parse("rust").unwrap();
658    }
659
660    #[test]
661    fn test_field_term() {
662        let (schema, default_fields, tokenizers) = setup();
663        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
664
665        // Should parse field:term syntax
666        let _query = parser.parse("title:rust").unwrap();
667    }
668
669    #[test]
670    fn test_boolean_and() {
671        let (schema, default_fields, tokenizers) = setup();
672        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
673
674        // Should parse AND boolean query
675        let _query = parser.parse("rust AND programming").unwrap();
676    }
677
678    #[test]
679    fn test_match_query() {
680        let (schema, default_fields, tokenizers) = setup();
681        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
682
683        // Should tokenize and create boolean query
684        let _query = parser.parse("hello world").unwrap();
685    }
686
687    #[test]
688    fn test_phrase_query() {
689        let (schema, default_fields, tokenizers) = setup();
690        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
691
692        // Should parse quoted phrase
693        let _query = parser.parse("\"hello world\"").unwrap();
694    }
695
696    #[test]
697    fn test_boolean_or() {
698        let (schema, default_fields, tokenizers) = setup();
699        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
700
701        // Should parse OR boolean query
702        let _query = parser.parse("rust OR python").unwrap();
703    }
704
705    #[test]
706    fn test_complex_query() {
707        let (schema, default_fields, tokenizers) = setup();
708        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
709
710        // Should parse complex boolean with grouping
711        let _query = parser.parse("(rust OR python) AND programming").unwrap();
712    }
713
714    #[test]
715    fn test_router_exclusive_mode() {
716        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
717
718        let mut builder = SchemaBuilder::default();
719        let _title = builder.add_text_field("title", true, true);
720        let _uri = builder.add_text_field("uri", true, true);
721        let schema = Arc::new(builder.build());
722        let tokenizers = Arc::new(TokenizerRegistry::default());
723
724        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
725            pattern: r"^doi:(10\.\d{4,}/[^\s]+)$".to_string(),
726            substitution: "doi://{1}".to_string(),
727            target_field: "uri".to_string(),
728            mode: RoutingMode::Exclusive,
729        }])
730        .unwrap();
731
732        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
733
734        // Should route DOI query to uri field
735        let _query = parser.parse("doi:10.1234/test.123").unwrap();
736    }
737
738    #[test]
739    fn test_router_additional_mode() {
740        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
741
742        let mut builder = SchemaBuilder::default();
743        let title = builder.add_text_field("title", true, true);
744        let _uri = builder.add_text_field("uri", true, true);
745        let schema = Arc::new(builder.build());
746        let tokenizers = Arc::new(TokenizerRegistry::default());
747
748        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
749            pattern: r"#(\d+)".to_string(),
750            substitution: "{1}".to_string(),
751            target_field: "uri".to_string(),
752            mode: RoutingMode::Additional,
753        }])
754        .unwrap();
755
756        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
757
758        // Should route to both uri field and default fields
759        let _query = parser.parse("#42").unwrap();
760    }
761
762    #[test]
763    fn test_router_no_match_falls_through() {
764        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
765
766        let mut builder = SchemaBuilder::default();
767        let title = builder.add_text_field("title", true, true);
768        let _uri = builder.add_text_field("uri", true, true);
769        let schema = Arc::new(builder.build());
770        let tokenizers = Arc::new(TokenizerRegistry::default());
771
772        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
773            pattern: r"^doi:".to_string(),
774            substitution: "{0}".to_string(),
775            target_field: "uri".to_string(),
776            mode: RoutingMode::Exclusive,
777        }])
778        .unwrap();
779
780        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
781
782        // Should NOT match and fall through to normal parsing
783        let _query = parser.parse("rust programming").unwrap();
784    }
785
786    #[test]
787    fn test_router_invalid_target_field() {
788        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
789
790        let mut builder = SchemaBuilder::default();
791        let _title = builder.add_text_field("title", true, true);
792        let schema = Arc::new(builder.build());
793        let tokenizers = Arc::new(TokenizerRegistry::default());
794
795        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
796            pattern: r"test".to_string(),
797            substitution: "{0}".to_string(),
798            target_field: "nonexistent".to_string(),
799            mode: RoutingMode::Exclusive,
800        }])
801        .unwrap();
802
803        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
804
805        // Should fail because target field doesn't exist
806        let result = parser.parse("test");
807        assert!(result.is_err());
808        let err = result.err().unwrap();
809        assert!(err.contains("Unknown target field"));
810    }
811
812    #[test]
813    fn test_parse_knn_query() {
814        let mut builder = SchemaBuilder::default();
815        let embedding = builder.add_dense_vector_field("embedding", 128, true, true);
816        let schema = Arc::new(builder.build());
817        let tokenizers = Arc::new(TokenizerRegistry::default());
818
819        let parser = QueryLanguageParser::new(schema, vec![embedding], tokenizers);
820
821        // Parse KNN query
822        let result = parser.parse_query_string("embedding:ann([1.0, 2.0, 3.0], k=10, nprobe=32)");
823        assert!(result.is_ok(), "Failed to parse KNN query: {:?}", result);
824
825        if let Ok(ParsedQuery::Knn {
826            field,
827            vector,
828            k,
829            nprobe,
830            rerank,
831        }) = result
832        {
833            assert_eq!(field, "embedding");
834            assert_eq!(vector, vec![1.0, 2.0, 3.0]);
835            assert_eq!(k, 10);
836            assert_eq!(nprobe, 32);
837            assert_eq!(rerank, 3); // default
838        } else {
839            panic!("Expected Knn query, got: {:?}", result);
840        }
841    }
842
843    #[test]
844    fn test_parse_sparse_query() {
845        let mut builder = SchemaBuilder::default();
846        let sparse = builder.add_text_field("sparse", true, true);
847        let schema = Arc::new(builder.build());
848        let tokenizers = Arc::new(TokenizerRegistry::default());
849
850        let parser = QueryLanguageParser::new(schema, vec![sparse], tokenizers);
851
852        // Parse sparse query
853        let result = parser.parse_query_string("sparse:sparse({1: 0.5, 5: 0.3}, k=20)");
854        assert!(result.is_ok(), "Failed to parse sparse query: {:?}", result);
855
856        if let Ok(ParsedQuery::Sparse {
857            field,
858            indices,
859            weights,
860            k,
861        }) = result
862        {
863            assert_eq!(field, "sparse");
864            assert_eq!(indices, vec![1, 5]);
865            assert_eq!(weights, vec![0.5, 0.3]);
866            assert_eq!(k, 20);
867        } else {
868            panic!("Expected Sparse query, got: {:?}", result);
869        }
870    }
871}