hermes_core/dsl/ql/
mod.rs

1//! Query language parser using pest
2//!
3//! Supports:
4//! - Term queries: `rust` or `title:rust`
5//! - Phrase queries: `"hello world"` or `title:"hello world"`
6//! - Boolean operators: `AND`, `OR`, `NOT` (or `&&`, `||`, `-`)
7//! - Grouping: `(rust OR python) AND programming`
8//! - Default fields for unqualified terms
9
10use pest::Parser;
11use pest_derive::Parser;
12use std::sync::Arc;
13
14use super::query_field_router::{QueryFieldRouter, RoutingMode};
15use super::schema::{Field, Schema};
16use crate::query::{BooleanQuery, Query, TermQuery};
17use crate::tokenizer::{BoxedTokenizer, TokenizerRegistry};
18
19#[derive(Parser)]
20#[grammar = "dsl/ql/ql.pest"]
21struct QueryParser;
22
23/// Parsed query that can be converted to a Query trait object
24#[derive(Debug, Clone)]
25pub enum ParsedQuery {
26    Term {
27        field: Option<String>,
28        term: String,
29    },
30    Phrase {
31        field: Option<String>,
32        phrase: String,
33    },
34    /// Dense vector KNN query
35    Knn {
36        field: String,
37        vector: Vec<f32>,
38        k: usize,
39        nprobe: usize,
40        rerank: usize,
41    },
42    /// Sparse vector query
43    Sparse {
44        field: String,
45        indices: Vec<u32>,
46        weights: Vec<f32>,
47        k: usize,
48    },
49    And(Vec<ParsedQuery>),
50    Or(Vec<ParsedQuery>),
51    Not(Box<ParsedQuery>),
52}
53
54/// Query language parser with schema awareness
55pub struct QueryLanguageParser {
56    schema: Arc<Schema>,
57    default_fields: Vec<Field>,
58    tokenizers: Arc<TokenizerRegistry>,
59    /// Optional query field router for routing queries based on regex patterns
60    field_router: Option<QueryFieldRouter>,
61}
62
63impl QueryLanguageParser {
64    pub fn new(
65        schema: Arc<Schema>,
66        default_fields: Vec<Field>,
67        tokenizers: Arc<TokenizerRegistry>,
68    ) -> Self {
69        Self {
70            schema,
71            default_fields,
72            tokenizers,
73            field_router: None,
74        }
75    }
76
77    /// Create a parser with a query field router
78    pub fn with_router(
79        schema: Arc<Schema>,
80        default_fields: Vec<Field>,
81        tokenizers: Arc<TokenizerRegistry>,
82        router: QueryFieldRouter,
83    ) -> Self {
84        Self {
85            schema,
86            default_fields,
87            tokenizers,
88            field_router: Some(router),
89        }
90    }
91
92    /// Set the query field router
93    pub fn set_router(&mut self, router: QueryFieldRouter) {
94        self.field_router = Some(router);
95    }
96
97    /// Get the query field router
98    pub fn router(&self) -> Option<&QueryFieldRouter> {
99        self.field_router.as_ref()
100    }
101
102    /// Parse a query string into a Query
103    ///
104    /// Supports query language syntax (field:term, AND, OR, NOT, grouping)
105    /// and plain text (tokenized and searched across default fields).
106    ///
107    /// If a query field router is configured, the query is first checked against
108    /// routing rules. If a rule matches:
109    /// - In exclusive mode: only the target field is queried with the substituted value
110    /// - In additional mode: both the target field and default fields are queried
111    pub fn parse(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
112        let query_str = query_str.trim();
113        if query_str.is_empty() {
114            return Err("Empty query".to_string());
115        }
116
117        // Check if query matches any routing rules
118        if let Some(router) = &self.field_router
119            && let Some(routed) = router.route(query_str)
120        {
121            return self.build_routed_query(
122                &routed.query,
123                &routed.target_field,
124                routed.mode,
125                query_str,
126            );
127        }
128
129        // No routing match - parse normally
130        self.parse_normal(query_str)
131    }
132
133    /// Build a query from a routed match
134    fn build_routed_query(
135        &self,
136        routed_query: &str,
137        target_field: &str,
138        mode: RoutingMode,
139        original_query: &str,
140    ) -> Result<Box<dyn Query>, String> {
141        // Validate target field exists
142        let _field_id = self
143            .schema
144            .get_field(target_field)
145            .ok_or_else(|| format!("Unknown target field: {}", target_field))?;
146
147        // Build query for the target field with the substituted value
148        let target_query = self.build_term_query(Some(target_field), routed_query)?;
149
150        match mode {
151            RoutingMode::Exclusive => {
152                // Only query the target field
153                Ok(target_query)
154            }
155            RoutingMode::Additional => {
156                // Query both target field and default fields
157                let mut bool_query = BooleanQuery::new();
158                bool_query = bool_query.should(target_query);
159
160                // Also parse the original query against default fields
161                if let Ok(default_query) = self.parse_normal(original_query) {
162                    bool_query = bool_query.should(default_query);
163                }
164
165                Ok(Box::new(bool_query))
166            }
167        }
168    }
169
170    /// Parse query without routing (normal parsing path)
171    fn parse_normal(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
172        // Try parsing as query language first
173        match self.parse_query_string(query_str) {
174            Ok(parsed) => self.build_query(&parsed),
175            Err(_) => {
176                // If grammar parsing fails, treat as plain text
177                // Split by whitespace and create OR of terms
178                self.parse_plain_text(query_str)
179            }
180        }
181    }
182
183    /// Parse plain text as implicit OR of tokenized terms
184    fn parse_plain_text(&self, text: &str) -> Result<Box<dyn Query>, String> {
185        if self.default_fields.is_empty() {
186            return Err("No default fields configured".to_string());
187        }
188
189        let tokenizer = self.get_tokenizer(self.default_fields[0]);
190        let tokens: Vec<String> = tokenizer
191            .tokenize(text)
192            .into_iter()
193            .map(|t| t.text.to_lowercase())
194            .collect();
195
196        if tokens.is_empty() {
197            return Err("No tokens in query".to_string());
198        }
199
200        let mut bool_query = BooleanQuery::new();
201        for token in &tokens {
202            for &field_id in &self.default_fields {
203                bool_query = bool_query.should(TermQuery::text(field_id, token));
204            }
205        }
206        Ok(Box::new(bool_query))
207    }
208
209    fn parse_query_string(&self, query_str: &str) -> Result<ParsedQuery, String> {
210        let pairs = QueryParser::parse(Rule::query, query_str)
211            .map_err(|e| format!("Parse error: {}", e))?;
212
213        let query_pair = pairs.into_iter().next().ok_or("No query found")?;
214
215        // query = { SOI ~ or_expr ~ EOI }
216        self.parse_or_expr(query_pair.into_inner().next().unwrap())
217    }
218
219    fn parse_or_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
220        let mut inner = pair.into_inner();
221        let first = self.parse_and_expr(inner.next().unwrap())?;
222
223        let rest: Vec<ParsedQuery> = inner
224            .filter(|p| p.as_rule() == Rule::and_expr)
225            .map(|p| self.parse_and_expr(p))
226            .collect::<Result<Vec<_>, _>>()?;
227
228        if rest.is_empty() {
229            Ok(first)
230        } else {
231            let mut all = vec![first];
232            all.extend(rest);
233            Ok(ParsedQuery::Or(all))
234        }
235    }
236
237    fn parse_and_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
238        let mut inner = pair.into_inner();
239        let first = self.parse_primary(inner.next().unwrap())?;
240
241        let rest: Vec<ParsedQuery> = inner
242            .filter(|p| p.as_rule() == Rule::primary)
243            .map(|p| self.parse_primary(p))
244            .collect::<Result<Vec<_>, _>>()?;
245
246        if rest.is_empty() {
247            Ok(first)
248        } else {
249            let mut all = vec![first];
250            all.extend(rest);
251            Ok(ParsedQuery::And(all))
252        }
253    }
254
255    fn parse_primary(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
256        let mut negated = false;
257        let mut inner_query = None;
258
259        for inner in pair.into_inner() {
260            match inner.as_rule() {
261                Rule::not_op => negated = true,
262                Rule::group => {
263                    let or_expr = inner.into_inner().next().unwrap();
264                    inner_query = Some(self.parse_or_expr(or_expr)?);
265                }
266                Rule::ann_query => {
267                    inner_query = Some(self.parse_ann_query(inner)?);
268                }
269                Rule::sparse_query => {
270                    inner_query = Some(self.parse_sparse_query(inner)?);
271                }
272                Rule::phrase_query => {
273                    inner_query = Some(self.parse_phrase_query(inner)?);
274                }
275                Rule::term_query => {
276                    inner_query = Some(self.parse_term_query(inner)?);
277                }
278                _ => {}
279            }
280        }
281
282        let query = inner_query.ok_or("No query in primary")?;
283
284        if negated {
285            Ok(ParsedQuery::Not(Box::new(query)))
286        } else {
287            Ok(query)
288        }
289    }
290
291    fn parse_term_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
292        let mut field = None;
293        let mut term = String::new();
294
295        for inner in pair.into_inner() {
296            match inner.as_rule() {
297                Rule::field_spec => {
298                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
299                }
300                Rule::term => {
301                    term = inner.as_str().to_string();
302                }
303                _ => {}
304            }
305        }
306
307        Ok(ParsedQuery::Term { field, term })
308    }
309
310    fn parse_phrase_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
311        let mut field = None;
312        let mut phrase = String::new();
313
314        for inner in pair.into_inner() {
315            match inner.as_rule() {
316                Rule::field_spec => {
317                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
318                }
319                Rule::quoted_string => {
320                    let s = inner.as_str();
321                    phrase = s[1..s.len() - 1].to_string();
322                }
323                _ => {}
324            }
325        }
326
327        Ok(ParsedQuery::Phrase { field, phrase })
328    }
329
330    /// Parse an ANN query: field:ann([1.0, 2.0, 3.0], k=10, nprobe=32)
331    fn parse_ann_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
332        let mut field = String::new();
333        let mut vector = Vec::new();
334        let mut k = 10usize;
335        let mut nprobe = 32usize;
336        let mut rerank = 3usize;
337
338        for inner in pair.into_inner() {
339            match inner.as_rule() {
340                Rule::field_spec => {
341                    field = inner.into_inner().next().unwrap().as_str().to_string();
342                }
343                Rule::vector_array => {
344                    for num in inner.into_inner() {
345                        if num.as_rule() == Rule::number
346                            && let Ok(v) = num.as_str().parse::<f32>()
347                        {
348                            vector.push(v);
349                        }
350                    }
351                }
352                Rule::ann_params => {
353                    for param in inner.into_inner() {
354                        match param.as_rule() {
355                            Rule::number => {
356                                // Simple k value
357                                if let Ok(v) = param.as_str().parse::<usize>() {
358                                    k = v;
359                                }
360                            }
361                            Rule::ann_param => {
362                                // ann_param = { ("k" | "nprobe" | "rerank") ~ "=" ~ number }
363                                // The param.as_str() contains the full "k=20" string
364                                let param_str = param.as_str();
365                                if let Some(eq_pos) = param_str.find('=') {
366                                    let name = &param_str[..eq_pos];
367                                    let value = &param_str[eq_pos + 1..];
368                                    let val: usize = value.parse().unwrap_or(0);
369                                    match name {
370                                        "k" => k = val,
371                                        "nprobe" => nprobe = val,
372                                        "rerank" => rerank = val,
373                                        _ => {}
374                                    }
375                                }
376                            }
377                            _ => {}
378                        }
379                    }
380                }
381                _ => {}
382            }
383        }
384
385        Ok(ParsedQuery::Knn {
386            field,
387            vector,
388            k,
389            nprobe,
390            rerank,
391        })
392    }
393
394    /// Parse a sparse vector query: field:sparse({1: 0.5, 5: 0.3}, k=10)
395    fn parse_sparse_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
396        let mut field = String::new();
397        let mut indices = Vec::new();
398        let mut weights = Vec::new();
399        let mut k = 10usize;
400
401        for inner in pair.into_inner() {
402            match inner.as_rule() {
403                Rule::field_spec => {
404                    field = inner.into_inner().next().unwrap().as_str().to_string();
405                }
406                Rule::sparse_map => {
407                    for entry in inner.into_inner() {
408                        if entry.as_rule() == Rule::sparse_entry {
409                            let mut entry_inner = entry.into_inner();
410                            if let (Some(idx), Some(weight)) =
411                                (entry_inner.next(), entry_inner.next())
412                                && let (Ok(i), Ok(w)) =
413                                    (idx.as_str().parse::<u32>(), weight.as_str().parse::<f32>())
414                            {
415                                indices.push(i);
416                                weights.push(w);
417                            }
418                        }
419                    }
420                }
421                Rule::ann_params => {
422                    for param in inner.into_inner() {
423                        if param.as_rule() == Rule::number {
424                            if let Ok(v) = param.as_str().parse::<usize>() {
425                                k = v;
426                            }
427                        } else if param.as_rule() == Rule::ann_param {
428                            // ann_param = { ("k" | "nprobe" | "rerank") ~ "=" ~ number }
429                            // The param.as_str() contains the full "k=20" string
430                            let param_str = param.as_str();
431                            if let Some(eq_pos) = param_str.find('=') {
432                                let name = &param_str[..eq_pos];
433                                let value = &param_str[eq_pos + 1..];
434                                if name == "k" {
435                                    k = value.parse().unwrap_or(10);
436                                }
437                            }
438                        }
439                    }
440                }
441                _ => {}
442            }
443        }
444
445        Ok(ParsedQuery::Sparse {
446            field,
447            indices,
448            weights,
449            k,
450        })
451    }
452
453    fn build_query(&self, parsed: &ParsedQuery) -> Result<Box<dyn Query>, String> {
454        use crate::query::{DenseVectorQuery, SparseVectorQuery};
455
456        match parsed {
457            ParsedQuery::Term { field, term } => self.build_term_query(field.as_deref(), term),
458            ParsedQuery::Phrase { field, phrase } => {
459                self.build_phrase_query(field.as_deref(), phrase)
460            }
461            ParsedQuery::Knn {
462                field,
463                vector,
464                k,
465                nprobe,
466                rerank,
467            } => {
468                let field_id = self
469                    .schema
470                    .get_field(field)
471                    .ok_or_else(|| format!("Unknown field: {}", field))?;
472                let query = DenseVectorQuery::new(field_id, vector.clone(), *k)
473                    .with_nprobe(*nprobe)
474                    .with_rerank_factor(*rerank);
475                Ok(Box::new(query))
476            }
477            ParsedQuery::Sparse {
478                field,
479                indices,
480                weights,
481                k,
482            } => {
483                let field_id = self
484                    .schema
485                    .get_field(field)
486                    .ok_or_else(|| format!("Unknown field: {}", field))?;
487                let query = SparseVectorQuery::new(field_id, indices.clone(), weights.clone(), *k);
488                Ok(Box::new(query))
489            }
490            ParsedQuery::And(queries) => {
491                let mut bool_query = BooleanQuery::new();
492                for q in queries {
493                    bool_query = bool_query.must(self.build_query(q)?);
494                }
495                Ok(Box::new(bool_query))
496            }
497            ParsedQuery::Or(queries) => {
498                let mut bool_query = BooleanQuery::new();
499                for q in queries {
500                    bool_query = bool_query.should(self.build_query(q)?);
501                }
502                Ok(Box::new(bool_query))
503            }
504            ParsedQuery::Not(inner) => {
505                // NOT query needs a context - wrap in a match-all with must_not
506                let mut bool_query = BooleanQuery::new();
507                bool_query = bool_query.must_not(self.build_query(inner)?);
508                Ok(Box::new(bool_query))
509            }
510        }
511    }
512
513    fn build_term_query(&self, field: Option<&str>, term: &str) -> Result<Box<dyn Query>, String> {
514        if let Some(field_name) = field {
515            // Field-qualified term: tokenize using field's tokenizer
516            let field_id = self
517                .schema
518                .get_field(field_name)
519                .ok_or_else(|| format!("Unknown field: {}", field_name))?;
520            let tokenizer = self.get_tokenizer(field_id);
521            let tokens: Vec<String> = tokenizer
522                .tokenize(term)
523                .into_iter()
524                .map(|t| t.text.to_lowercase())
525                .collect();
526
527            if tokens.is_empty() {
528                return Err("No tokens in term".to_string());
529            }
530
531            if tokens.len() == 1 {
532                Ok(Box::new(TermQuery::text(field_id, &tokens[0])))
533            } else {
534                // Multiple tokens from single term - AND them together
535                let mut bool_query = BooleanQuery::new();
536                for token in &tokens {
537                    bool_query = bool_query.must(TermQuery::text(field_id, token));
538                }
539                Ok(Box::new(bool_query))
540            }
541        } else if !self.default_fields.is_empty() {
542            // Unqualified term: tokenize and search across default fields
543            let tokenizer = self.get_tokenizer(self.default_fields[0]);
544            let tokens: Vec<String> = tokenizer
545                .tokenize(term)
546                .into_iter()
547                .map(|t| t.text.to_lowercase())
548                .collect();
549
550            if tokens.is_empty() {
551                return Err("No tokens in term".to_string());
552            }
553
554            // Build SHOULD query across all default fields for each token
555            let mut bool_query = BooleanQuery::new();
556            for token in &tokens {
557                for &field_id in &self.default_fields {
558                    bool_query = bool_query.should(TermQuery::text(field_id, token));
559                }
560            }
561            Ok(Box::new(bool_query))
562        } else {
563            Err("No field specified and no default fields configured".to_string())
564        }
565    }
566
567    fn build_phrase_query(
568        &self,
569        field: Option<&str>,
570        phrase: &str,
571    ) -> Result<Box<dyn Query>, String> {
572        // For phrase queries, tokenize and create AND query of terms
573        let field_id = if let Some(field_name) = field {
574            self.schema
575                .get_field(field_name)
576                .ok_or_else(|| format!("Unknown field: {}", field_name))?
577        } else if !self.default_fields.is_empty() {
578            self.default_fields[0]
579        } else {
580            return Err("No field specified and no default fields configured".to_string());
581        };
582
583        let tokenizer = self.get_tokenizer(field_id);
584        let tokens: Vec<String> = tokenizer
585            .tokenize(phrase)
586            .into_iter()
587            .map(|t| t.text.to_lowercase())
588            .collect();
589
590        if tokens.is_empty() {
591            return Err("No tokens in phrase".to_string());
592        }
593
594        if tokens.len() == 1 {
595            return Ok(Box::new(TermQuery::text(field_id, &tokens[0])));
596        }
597
598        // Create AND query for all tokens (simplified phrase matching)
599        let mut bool_query = BooleanQuery::new();
600        for token in &tokens {
601            bool_query = bool_query.must(TermQuery::text(field_id, token));
602        }
603
604        // If no field specified and multiple default fields, wrap in OR
605        if field.is_none() && self.default_fields.len() > 1 {
606            let mut outer = BooleanQuery::new();
607            for &f in &self.default_fields {
608                let tokenizer = self.get_tokenizer(f);
609                let tokens: Vec<String> = tokenizer
610                    .tokenize(phrase)
611                    .into_iter()
612                    .map(|t| t.text.to_lowercase())
613                    .collect();
614
615                let mut field_query = BooleanQuery::new();
616                for token in &tokens {
617                    field_query = field_query.must(TermQuery::text(f, token));
618                }
619                outer = outer.should(field_query);
620            }
621            return Ok(Box::new(outer));
622        }
623
624        Ok(Box::new(bool_query))
625    }
626
627    fn get_tokenizer(&self, field: Field) -> BoxedTokenizer {
628        // Get tokenizer name from schema field entry, fallback to "default"
629        let tokenizer_name = self
630            .schema
631            .get_field_entry(field)
632            .and_then(|entry| entry.tokenizer.as_deref())
633            .unwrap_or("default");
634
635        self.tokenizers
636            .get(tokenizer_name)
637            .unwrap_or_else(|| Box::new(crate::tokenizer::LowercaseTokenizer))
638    }
639}
640
641#[cfg(test)]
642mod tests {
643    use super::*;
644    use crate::dsl::SchemaBuilder;
645    use crate::tokenizer::TokenizerRegistry;
646
647    fn setup() -> (Arc<Schema>, Vec<Field>, Arc<TokenizerRegistry>) {
648        let mut builder = SchemaBuilder::default();
649        let title = builder.add_text_field("title", true, true);
650        let body = builder.add_text_field("body", true, true);
651        let schema = Arc::new(builder.build());
652        let tokenizers = Arc::new(TokenizerRegistry::default());
653        (schema, vec![title, body], tokenizers)
654    }
655
656    #[test]
657    fn test_simple_term() {
658        let (schema, default_fields, tokenizers) = setup();
659        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
660
661        // Should parse without error - creates BooleanQuery across default fields
662        let _query = parser.parse("rust").unwrap();
663    }
664
665    #[test]
666    fn test_field_term() {
667        let (schema, default_fields, tokenizers) = setup();
668        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
669
670        // Should parse field:term syntax
671        let _query = parser.parse("title:rust").unwrap();
672    }
673
674    #[test]
675    fn test_boolean_and() {
676        let (schema, default_fields, tokenizers) = setup();
677        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
678
679        // Should parse AND boolean query
680        let _query = parser.parse("rust AND programming").unwrap();
681    }
682
683    #[test]
684    fn test_match_query() {
685        let (schema, default_fields, tokenizers) = setup();
686        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
687
688        // Should tokenize and create boolean query
689        let _query = parser.parse("hello world").unwrap();
690    }
691
692    #[test]
693    fn test_phrase_query() {
694        let (schema, default_fields, tokenizers) = setup();
695        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
696
697        // Should parse quoted phrase
698        let _query = parser.parse("\"hello world\"").unwrap();
699    }
700
701    #[test]
702    fn test_boolean_or() {
703        let (schema, default_fields, tokenizers) = setup();
704        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
705
706        // Should parse OR boolean query
707        let _query = parser.parse("rust OR python").unwrap();
708    }
709
710    #[test]
711    fn test_complex_query() {
712        let (schema, default_fields, tokenizers) = setup();
713        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
714
715        // Should parse complex boolean with grouping
716        let _query = parser.parse("(rust OR python) AND programming").unwrap();
717    }
718
719    #[test]
720    fn test_router_exclusive_mode() {
721        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
722
723        let mut builder = SchemaBuilder::default();
724        let _title = builder.add_text_field("title", true, true);
725        let _uri = builder.add_text_field("uri", true, true);
726        let schema = Arc::new(builder.build());
727        let tokenizers = Arc::new(TokenizerRegistry::default());
728
729        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
730            pattern: r"^doi:(10\.\d{4,}/[^\s]+)$".to_string(),
731            substitution: "doi://{1}".to_string(),
732            target_field: "uri".to_string(),
733            mode: RoutingMode::Exclusive,
734        }])
735        .unwrap();
736
737        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
738
739        // Should route DOI query to uri field
740        let _query = parser.parse("doi:10.1234/test.123").unwrap();
741    }
742
743    #[test]
744    fn test_router_additional_mode() {
745        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
746
747        let mut builder = SchemaBuilder::default();
748        let title = builder.add_text_field("title", true, true);
749        let _uri = builder.add_text_field("uri", true, true);
750        let schema = Arc::new(builder.build());
751        let tokenizers = Arc::new(TokenizerRegistry::default());
752
753        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
754            pattern: r"#(\d+)".to_string(),
755            substitution: "{1}".to_string(),
756            target_field: "uri".to_string(),
757            mode: RoutingMode::Additional,
758        }])
759        .unwrap();
760
761        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
762
763        // Should route to both uri field and default fields
764        let _query = parser.parse("#42").unwrap();
765    }
766
767    #[test]
768    fn test_router_no_match_falls_through() {
769        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
770
771        let mut builder = SchemaBuilder::default();
772        let title = builder.add_text_field("title", true, true);
773        let _uri = builder.add_text_field("uri", true, true);
774        let schema = Arc::new(builder.build());
775        let tokenizers = Arc::new(TokenizerRegistry::default());
776
777        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
778            pattern: r"^doi:".to_string(),
779            substitution: "{0}".to_string(),
780            target_field: "uri".to_string(),
781            mode: RoutingMode::Exclusive,
782        }])
783        .unwrap();
784
785        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
786
787        // Should NOT match and fall through to normal parsing
788        let _query = parser.parse("rust programming").unwrap();
789    }
790
791    #[test]
792    fn test_router_invalid_target_field() {
793        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
794
795        let mut builder = SchemaBuilder::default();
796        let _title = builder.add_text_field("title", true, true);
797        let schema = Arc::new(builder.build());
798        let tokenizers = Arc::new(TokenizerRegistry::default());
799
800        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
801            pattern: r"test".to_string(),
802            substitution: "{0}".to_string(),
803            target_field: "nonexistent".to_string(),
804            mode: RoutingMode::Exclusive,
805        }])
806        .unwrap();
807
808        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
809
810        // Should fail because target field doesn't exist
811        let result = parser.parse("test");
812        assert!(result.is_err());
813        let err = result.err().unwrap();
814        assert!(err.contains("Unknown target field"));
815    }
816
817    #[test]
818    fn test_parse_knn_query() {
819        let mut builder = SchemaBuilder::default();
820        let embedding = builder.add_dense_vector_field("embedding", 128, true, true);
821        let schema = Arc::new(builder.build());
822        let tokenizers = Arc::new(TokenizerRegistry::default());
823
824        let parser = QueryLanguageParser::new(schema, vec![embedding], tokenizers);
825
826        // Parse KNN query
827        let result = parser.parse_query_string("embedding:ann([1.0, 2.0, 3.0], k=10, nprobe=32)");
828        assert!(result.is_ok(), "Failed to parse KNN query: {:?}", result);
829
830        if let Ok(ParsedQuery::Knn {
831            field,
832            vector,
833            k,
834            nprobe,
835            rerank,
836        }) = result
837        {
838            assert_eq!(field, "embedding");
839            assert_eq!(vector, vec![1.0, 2.0, 3.0]);
840            assert_eq!(k, 10);
841            assert_eq!(nprobe, 32);
842            assert_eq!(rerank, 3); // default
843        } else {
844            panic!("Expected Knn query, got: {:?}", result);
845        }
846    }
847
848    #[test]
849    fn test_parse_sparse_query() {
850        let mut builder = SchemaBuilder::default();
851        let sparse = builder.add_text_field("sparse", true, true);
852        let schema = Arc::new(builder.build());
853        let tokenizers = Arc::new(TokenizerRegistry::default());
854
855        let parser = QueryLanguageParser::new(schema, vec![sparse], tokenizers);
856
857        // Parse sparse query
858        let result = parser.parse_query_string("sparse:sparse({1: 0.5, 5: 0.3}, k=20)");
859        assert!(result.is_ok(), "Failed to parse sparse query: {:?}", result);
860
861        if let Ok(ParsedQuery::Sparse {
862            field,
863            indices,
864            weights,
865            k,
866        }) = result
867        {
868            assert_eq!(field, "sparse");
869            assert_eq!(indices, vec![1, 5]);
870            assert_eq!(weights, vec![0.5, 0.3]);
871            assert_eq!(k, 20);
872        } else {
873            panic!("Expected Sparse query, got: {:?}", result);
874        }
875    }
876}