Skip to main content

hermes_core/dsl/ql/
mod.rs

1//! Query language parser using pest
2//!
3//! Supports:
4//! - Term queries: `rust` or `title:rust`
5//! - Phrase queries: `"hello world"` or `title:"hello world"`
6//! - Boolean operators: `AND`, `OR`, `NOT` (or `&&`, `||`, `-`)
7//! - Grouping: `(rust OR python) AND programming`
8//! - Default fields for unqualified terms
9
10use pest::Parser;
11use pest_derive::Parser;
12use std::sync::Arc;
13
14use super::query_field_router::{QueryFieldRouter, RoutingMode};
15use super::schema::{Field, Schema};
16use crate::query::{BooleanQuery, Query, TermQuery};
17use crate::tokenizer::{BoxedTokenizer, TokenizerRegistry};
18
19#[derive(Parser)]
20#[grammar = "dsl/ql/ql.pest"]
21struct QueryParser;
22
23/// Parsed query that can be converted to a Query trait object
24#[derive(Debug, Clone)]
25pub enum ParsedQuery {
26    Term {
27        field: Option<String>,
28        term: String,
29    },
30    Phrase {
31        field: Option<String>,
32        phrase: String,
33    },
34    /// Dense vector ANN query
35    Ann {
36        field: String,
37        vector: Vec<f32>,
38        nprobe: usize,
39        rerank: f32,
40    },
41    /// Sparse vector query
42    Sparse {
43        field: String,
44        vector: Vec<(u32, f32)>,
45    },
46    And(Vec<ParsedQuery>),
47    Or(Vec<ParsedQuery>),
48    Not(Box<ParsedQuery>),
49}
50
51/// Query language parser with schema awareness
52pub struct QueryLanguageParser {
53    schema: Arc<Schema>,
54    default_fields: Vec<Field>,
55    tokenizers: Arc<TokenizerRegistry>,
56    /// Optional query field router for routing queries based on regex patterns
57    field_router: Option<QueryFieldRouter>,
58}
59
60impl QueryLanguageParser {
61    pub fn new(
62        schema: Arc<Schema>,
63        default_fields: Vec<Field>,
64        tokenizers: Arc<TokenizerRegistry>,
65    ) -> Self {
66        Self {
67            schema,
68            default_fields,
69            tokenizers,
70            field_router: None,
71        }
72    }
73
74    /// Create a parser with a query field router
75    pub fn with_router(
76        schema: Arc<Schema>,
77        default_fields: Vec<Field>,
78        tokenizers: Arc<TokenizerRegistry>,
79        router: QueryFieldRouter,
80    ) -> Self {
81        Self {
82            schema,
83            default_fields,
84            tokenizers,
85            field_router: Some(router),
86        }
87    }
88
89    /// Set the query field router
90    pub fn set_router(&mut self, router: QueryFieldRouter) {
91        self.field_router = Some(router);
92    }
93
94    /// Get the query field router
95    pub fn router(&self) -> Option<&QueryFieldRouter> {
96        self.field_router.as_ref()
97    }
98
99    /// Parse a query string into a Query
100    ///
101    /// Supports query language syntax (field:term, AND, OR, NOT, grouping)
102    /// and plain text (tokenized and searched across default fields).
103    ///
104    /// If a query field router is configured, the query is first checked against
105    /// routing rules. If a rule matches:
106    /// - In exclusive mode: only the target field is queried with the substituted value
107    /// - In additional mode: both the target field and default fields are queried
108    pub fn parse(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
109        let query_str = query_str.trim();
110        if query_str.is_empty() {
111            return Err("Empty query".to_string());
112        }
113
114        // Check if query matches any routing rules
115        if let Some(router) = &self.field_router
116            && let Some(routed) = router.route(query_str)
117        {
118            return self.build_routed_query(
119                &routed.query,
120                &routed.target_field,
121                routed.mode,
122                query_str,
123            );
124        }
125
126        // No routing match - parse normally
127        self.parse_normal(query_str)
128    }
129
130    /// Build a query from a routed match
131    fn build_routed_query(
132        &self,
133        routed_query: &str,
134        target_field: &str,
135        mode: RoutingMode,
136        original_query: &str,
137    ) -> Result<Box<dyn Query>, String> {
138        // Validate target field exists
139        let _field_id = self
140            .schema
141            .get_field(target_field)
142            .ok_or_else(|| format!("Unknown target field: {}", target_field))?;
143
144        // Build query for the target field with the substituted value
145        let target_query = self.build_term_query(Some(target_field), routed_query)?;
146
147        match mode {
148            RoutingMode::Exclusive => {
149                // Only query the target field
150                Ok(target_query)
151            }
152            RoutingMode::Additional => {
153                // Query both target field and default fields
154                let mut bool_query = BooleanQuery::new();
155                bool_query = bool_query.should(target_query);
156
157                // Also parse the original query against default fields
158                if let Ok(default_query) = self.parse_normal(original_query) {
159                    bool_query = bool_query.should(default_query);
160                }
161
162                Ok(Box::new(bool_query))
163            }
164        }
165    }
166
167    /// Parse query without routing (normal parsing path)
168    fn parse_normal(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
169        // Try parsing as query language first
170        match self.parse_query_string(query_str) {
171            Ok(parsed) => self.build_query(&parsed),
172            Err(_) => {
173                // If grammar parsing fails, treat as plain text
174                // Split by whitespace and create OR of terms
175                self.parse_plain_text(query_str)
176            }
177        }
178    }
179
180    /// Parse plain text as implicit OR of tokenized terms
181    fn parse_plain_text(&self, text: &str) -> Result<Box<dyn Query>, String> {
182        if self.default_fields.is_empty() {
183            return Err("No default fields configured".to_string());
184        }
185
186        let tokenizer = self.get_tokenizer(self.default_fields[0]);
187        let tokens: Vec<String> = tokenizer
188            .tokenize(text)
189            .into_iter()
190            .map(|t| t.text.to_lowercase())
191            .collect();
192
193        if tokens.is_empty() {
194            return Err("No tokens in query".to_string());
195        }
196
197        let mut bool_query = BooleanQuery::new();
198        for token in &tokens {
199            for &field_id in &self.default_fields {
200                bool_query = bool_query.should(TermQuery::text(field_id, token));
201            }
202        }
203        Ok(Box::new(bool_query))
204    }
205
206    fn parse_query_string(&self, query_str: &str) -> Result<ParsedQuery, String> {
207        let pairs = QueryParser::parse(Rule::query, query_str)
208            .map_err(|e| format!("Parse error: {}", e))?;
209
210        let query_pair = pairs.into_iter().next().ok_or("No query found")?;
211
212        // query = { SOI ~ or_expr ~ EOI }
213        self.parse_or_expr(query_pair.into_inner().next().unwrap())
214    }
215
216    fn parse_or_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
217        let mut inner = pair.into_inner();
218        let first = self.parse_and_expr(inner.next().unwrap())?;
219
220        let rest: Vec<ParsedQuery> = inner
221            .filter(|p| p.as_rule() == Rule::and_expr)
222            .map(|p| self.parse_and_expr(p))
223            .collect::<Result<Vec<_>, _>>()?;
224
225        if rest.is_empty() {
226            Ok(first)
227        } else {
228            let mut all = vec![first];
229            all.extend(rest);
230            Ok(ParsedQuery::Or(all))
231        }
232    }
233
234    fn parse_and_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
235        let mut inner = pair.into_inner();
236        let first = self.parse_primary(inner.next().unwrap())?;
237
238        let rest: Vec<ParsedQuery> = inner
239            .filter(|p| p.as_rule() == Rule::primary)
240            .map(|p| self.parse_primary(p))
241            .collect::<Result<Vec<_>, _>>()?;
242
243        if rest.is_empty() {
244            Ok(first)
245        } else {
246            let mut all = vec![first];
247            all.extend(rest);
248            Ok(ParsedQuery::And(all))
249        }
250    }
251
252    fn parse_primary(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
253        let mut negated = false;
254        let mut inner_query = None;
255
256        for inner in pair.into_inner() {
257            match inner.as_rule() {
258                Rule::not_op => negated = true,
259                Rule::group => {
260                    let or_expr = inner.into_inner().next().unwrap();
261                    inner_query = Some(self.parse_or_expr(or_expr)?);
262                }
263                Rule::ann_query => {
264                    inner_query = Some(self.parse_ann_query(inner)?);
265                }
266                Rule::sparse_query => {
267                    inner_query = Some(self.parse_sparse_query(inner)?);
268                }
269                Rule::phrase_query => {
270                    inner_query = Some(self.parse_phrase_query(inner)?);
271                }
272                Rule::term_query => {
273                    inner_query = Some(self.parse_term_query(inner)?);
274                }
275                _ => {}
276            }
277        }
278
279        let query = inner_query.ok_or("No query in primary")?;
280
281        if negated {
282            Ok(ParsedQuery::Not(Box::new(query)))
283        } else {
284            Ok(query)
285        }
286    }
287
288    fn parse_term_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
289        let mut field = None;
290        let mut term = String::new();
291
292        for inner in pair.into_inner() {
293            match inner.as_rule() {
294                Rule::field_spec => {
295                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
296                }
297                Rule::term => {
298                    term = inner.as_str().to_string();
299                }
300                _ => {}
301            }
302        }
303
304        Ok(ParsedQuery::Term { field, term })
305    }
306
307    fn parse_phrase_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
308        let mut field = None;
309        let mut phrase = String::new();
310
311        for inner in pair.into_inner() {
312            match inner.as_rule() {
313                Rule::field_spec => {
314                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
315                }
316                Rule::quoted_string => {
317                    let s = inner.as_str();
318                    phrase = s[1..s.len() - 1].to_string();
319                }
320                _ => {}
321            }
322        }
323
324        Ok(ParsedQuery::Phrase { field, phrase })
325    }
326
327    /// Parse an ANN query: field:ann([1.0, 2.0, 3.0], nprobe=32, rerank=3)
328    fn parse_ann_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
329        let mut field = String::new();
330        let mut vector = Vec::new();
331        let mut nprobe = 32usize;
332        let mut rerank = 3.0f32;
333
334        for inner in pair.into_inner() {
335            match inner.as_rule() {
336                Rule::field_spec => {
337                    field = inner.into_inner().next().unwrap().as_str().to_string();
338                }
339                Rule::vector_array => {
340                    for num in inner.into_inner() {
341                        if num.as_rule() == Rule::number
342                            && let Ok(v) = num.as_str().parse::<f32>()
343                        {
344                            vector.push(v);
345                        }
346                    }
347                }
348                Rule::ann_params => {
349                    for param in inner.into_inner() {
350                        if param.as_rule() == Rule::ann_param {
351                            // ann_param = { ("nprobe" | "rerank") ~ "=" ~ number }
352                            let param_str = param.as_str();
353                            if let Some(eq_pos) = param_str.find('=') {
354                                let name = &param_str[..eq_pos];
355                                let value = &param_str[eq_pos + 1..];
356                                match name {
357                                    "nprobe" => nprobe = value.parse().unwrap_or(0),
358                                    "rerank" => rerank = value.parse().unwrap_or(0.0),
359                                    _ => {}
360                                }
361                            }
362                        }
363                    }
364                }
365                _ => {}
366            }
367        }
368
369        Ok(ParsedQuery::Ann {
370            field,
371            vector,
372            nprobe,
373            rerank,
374        })
375    }
376
377    /// Parse a sparse vector query: field:sparse({1: 0.5, 5: 0.3})
378    fn parse_sparse_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
379        let mut field = String::new();
380        let mut vector = Vec::new();
381
382        for inner in pair.into_inner() {
383            match inner.as_rule() {
384                Rule::field_spec => {
385                    field = inner.into_inner().next().unwrap().as_str().to_string();
386                }
387                Rule::sparse_map => {
388                    for entry in inner.into_inner() {
389                        if entry.as_rule() == Rule::sparse_entry {
390                            let mut entry_inner = entry.into_inner();
391                            if let (Some(idx), Some(weight)) =
392                                (entry_inner.next(), entry_inner.next())
393                                && let (Ok(i), Ok(w)) =
394                                    (idx.as_str().parse::<u32>(), weight.as_str().parse::<f32>())
395                            {
396                                vector.push((i, w));
397                            }
398                        }
399                    }
400                }
401                _ => {}
402            }
403        }
404
405        Ok(ParsedQuery::Sparse { field, vector })
406    }
407
408    fn build_query(&self, parsed: &ParsedQuery) -> Result<Box<dyn Query>, String> {
409        use crate::query::{DenseVectorQuery, SparseVectorQuery};
410
411        match parsed {
412            ParsedQuery::Term { field, term } => self.build_term_query(field.as_deref(), term),
413            ParsedQuery::Phrase { field, phrase } => {
414                self.build_phrase_query(field.as_deref(), phrase)
415            }
416            ParsedQuery::Ann {
417                field,
418                vector,
419                nprobe,
420                rerank,
421            } => {
422                let field_id = self
423                    .schema
424                    .get_field(field)
425                    .ok_or_else(|| format!("Unknown field: {}", field))?;
426                let query = DenseVectorQuery::new(field_id, vector.clone())
427                    .with_nprobe(*nprobe)
428                    .with_rerank_factor(*rerank);
429                Ok(Box::new(query))
430            }
431            ParsedQuery::Sparse { field, vector } => {
432                let field_id = self
433                    .schema
434                    .get_field(field)
435                    .ok_or_else(|| format!("Unknown field: {}", field))?;
436                let query = SparseVectorQuery::new(field_id, vector.clone());
437                Ok(Box::new(query))
438            }
439            ParsedQuery::And(queries) => {
440                let mut bool_query = BooleanQuery::new();
441                for q in queries {
442                    bool_query = bool_query.must(self.build_query(q)?);
443                }
444                Ok(Box::new(bool_query))
445            }
446            ParsedQuery::Or(queries) => {
447                let mut bool_query = BooleanQuery::new();
448                for q in queries {
449                    bool_query = bool_query.should(self.build_query(q)?);
450                }
451                Ok(Box::new(bool_query))
452            }
453            ParsedQuery::Not(inner) => {
454                // NOT query needs a context - wrap in a match-all with must_not
455                let mut bool_query = BooleanQuery::new();
456                bool_query = bool_query.must_not(self.build_query(inner)?);
457                Ok(Box::new(bool_query))
458            }
459        }
460    }
461
462    fn build_term_query(&self, field: Option<&str>, term: &str) -> Result<Box<dyn Query>, String> {
463        if let Some(field_name) = field {
464            // Field-qualified term: tokenize using field's tokenizer
465            let field_id = self
466                .schema
467                .get_field(field_name)
468                .ok_or_else(|| format!("Unknown field: {}", field_name))?;
469            // Validate field type — TermQuery only works on text fields
470            if let Some(entry) = self.schema.get_field_entry(field_id) {
471                use crate::dsl::FieldType;
472                if entry.field_type != FieldType::Text {
473                    return Err(format!(
474                        "Term query requires a text field, but '{}' is {:?}. Use range query for numeric fields.",
475                        field_name, entry.field_type
476                    ));
477                }
478            }
479            let tokenizer = self.get_tokenizer(field_id);
480            let tokens: Vec<String> = tokenizer
481                .tokenize(term)
482                .into_iter()
483                .map(|t| t.text.to_lowercase())
484                .collect();
485
486            if tokens.is_empty() {
487                return Err("No tokens in term".to_string());
488            }
489
490            if tokens.len() == 1 {
491                Ok(Box::new(TermQuery::text(field_id, &tokens[0])))
492            } else {
493                // Multiple tokens from single term - AND them together
494                let mut bool_query = BooleanQuery::new();
495                for token in &tokens {
496                    bool_query = bool_query.must(TermQuery::text(field_id, token));
497                }
498                Ok(Box::new(bool_query))
499            }
500        } else if !self.default_fields.is_empty() {
501            // Unqualified term: tokenize and search across default fields
502            let tokenizer = self.get_tokenizer(self.default_fields[0]);
503            let tokens: Vec<String> = tokenizer
504                .tokenize(term)
505                .into_iter()
506                .map(|t| t.text.to_lowercase())
507                .collect();
508
509            if tokens.is_empty() {
510                return Err("No tokens in term".to_string());
511            }
512
513            // Build SHOULD query across all default fields for each token
514            let mut bool_query = BooleanQuery::new();
515            for token in &tokens {
516                for &field_id in &self.default_fields {
517                    bool_query = bool_query.should(TermQuery::text(field_id, token));
518                }
519            }
520            Ok(Box::new(bool_query))
521        } else {
522            Err("No field specified and no default fields configured".to_string())
523        }
524    }
525
526    fn build_phrase_query(
527        &self,
528        field: Option<&str>,
529        phrase: &str,
530    ) -> Result<Box<dyn Query>, String> {
531        // For phrase queries, tokenize and create AND query of terms
532        let field_id = if let Some(field_name) = field {
533            self.schema
534                .get_field(field_name)
535                .ok_or_else(|| format!("Unknown field: {}", field_name))?
536        } else if !self.default_fields.is_empty() {
537            self.default_fields[0]
538        } else {
539            return Err("No field specified and no default fields configured".to_string());
540        };
541
542        let tokenizer = self.get_tokenizer(field_id);
543        let tokens: Vec<String> = tokenizer
544            .tokenize(phrase)
545            .into_iter()
546            .map(|t| t.text.to_lowercase())
547            .collect();
548
549        if tokens.is_empty() {
550            return Err("No tokens in phrase".to_string());
551        }
552
553        if tokens.len() == 1 {
554            return Ok(Box::new(TermQuery::text(field_id, &tokens[0])));
555        }
556
557        // Create AND query for all tokens (simplified phrase matching)
558        let mut bool_query = BooleanQuery::new();
559        for token in &tokens {
560            bool_query = bool_query.must(TermQuery::text(field_id, token));
561        }
562
563        // If no field specified and multiple default fields, wrap in OR
564        if field.is_none() && self.default_fields.len() > 1 {
565            let mut outer = BooleanQuery::new();
566            for &f in &self.default_fields {
567                let tokenizer = self.get_tokenizer(f);
568                let tokens: Vec<String> = tokenizer
569                    .tokenize(phrase)
570                    .into_iter()
571                    .map(|t| t.text.to_lowercase())
572                    .collect();
573
574                let mut field_query = BooleanQuery::new();
575                for token in &tokens {
576                    field_query = field_query.must(TermQuery::text(f, token));
577                }
578                outer = outer.should(field_query);
579            }
580            return Ok(Box::new(outer));
581        }
582
583        Ok(Box::new(bool_query))
584    }
585
586    fn get_tokenizer(&self, field: Field) -> BoxedTokenizer {
587        // Get tokenizer name from schema field entry, fallback to "lowercase"
588        let tokenizer_name = self
589            .schema
590            .get_field_entry(field)
591            .and_then(|entry| entry.tokenizer.as_deref())
592            .unwrap_or("lowercase");
593
594        self.tokenizers
595            .get(tokenizer_name)
596            .unwrap_or_else(|| Box::new(crate::tokenizer::LowercaseTokenizer))
597    }
598}
599
600#[cfg(test)]
601mod tests {
602    use super::*;
603    use crate::dsl::SchemaBuilder;
604    use crate::tokenizer::TokenizerRegistry;
605
606    fn setup() -> (Arc<Schema>, Vec<Field>, Arc<TokenizerRegistry>) {
607        let mut builder = SchemaBuilder::default();
608        let title = builder.add_text_field("title", true, true);
609        let body = builder.add_text_field("body", true, true);
610        let schema = Arc::new(builder.build());
611        let tokenizers = Arc::new(TokenizerRegistry::default());
612        (schema, vec![title, body], tokenizers)
613    }
614
615    #[test]
616    fn test_simple_term() {
617        let (schema, default_fields, tokenizers) = setup();
618        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
619
620        // Should parse without error - creates BooleanQuery across default fields
621        let _query = parser.parse("rust").unwrap();
622    }
623
624    #[test]
625    fn test_field_term() {
626        let (schema, default_fields, tokenizers) = setup();
627        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
628
629        // Should parse field:term syntax
630        let _query = parser.parse("title:rust").unwrap();
631    }
632
633    #[test]
634    fn test_boolean_and() {
635        let (schema, default_fields, tokenizers) = setup();
636        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
637
638        // Should parse AND boolean query
639        let _query = parser.parse("rust AND programming").unwrap();
640    }
641
642    #[test]
643    fn test_match_query() {
644        let (schema, default_fields, tokenizers) = setup();
645        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
646
647        // Should tokenize and create boolean query
648        let _query = parser.parse("hello world").unwrap();
649    }
650
651    #[test]
652    fn test_phrase_query() {
653        let (schema, default_fields, tokenizers) = setup();
654        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
655
656        // Should parse quoted phrase
657        let _query = parser.parse("\"hello world\"").unwrap();
658    }
659
660    #[test]
661    fn test_boolean_or() {
662        let (schema, default_fields, tokenizers) = setup();
663        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
664
665        // Should parse OR boolean query
666        let _query = parser.parse("rust OR python").unwrap();
667    }
668
669    #[test]
670    fn test_complex_query() {
671        let (schema, default_fields, tokenizers) = setup();
672        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
673
674        // Should parse complex boolean with grouping
675        let _query = parser.parse("(rust OR python) AND programming").unwrap();
676    }
677
678    #[test]
679    fn test_router_exclusive_mode() {
680        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
681
682        let mut builder = SchemaBuilder::default();
683        let _title = builder.add_text_field("title", true, true);
684        let _uri = builder.add_text_field("uri", true, true);
685        let schema = Arc::new(builder.build());
686        let tokenizers = Arc::new(TokenizerRegistry::default());
687
688        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
689            pattern: r"^doi:(10\.\d{4,}/[^\s]+)$".to_string(),
690            substitution: "doi://{1}".to_string(),
691            target_field: "uri".to_string(),
692            mode: RoutingMode::Exclusive,
693        }])
694        .unwrap();
695
696        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
697
698        // Should route DOI query to uri field
699        let _query = parser.parse("doi:10.1234/test.123").unwrap();
700    }
701
702    #[test]
703    fn test_router_additional_mode() {
704        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
705
706        let mut builder = SchemaBuilder::default();
707        let title = builder.add_text_field("title", true, true);
708        let _uri = builder.add_text_field("uri", true, true);
709        let schema = Arc::new(builder.build());
710        let tokenizers = Arc::new(TokenizerRegistry::default());
711
712        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
713            pattern: r"#(\d+)".to_string(),
714            substitution: "{1}".to_string(),
715            target_field: "uri".to_string(),
716            mode: RoutingMode::Additional,
717        }])
718        .unwrap();
719
720        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
721
722        // Should route to both uri field and default fields
723        let _query = parser.parse("#42").unwrap();
724    }
725
726    #[test]
727    fn test_router_no_match_falls_through() {
728        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
729
730        let mut builder = SchemaBuilder::default();
731        let title = builder.add_text_field("title", true, true);
732        let _uri = builder.add_text_field("uri", true, true);
733        let schema = Arc::new(builder.build());
734        let tokenizers = Arc::new(TokenizerRegistry::default());
735
736        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
737            pattern: r"^doi:".to_string(),
738            substitution: "{0}".to_string(),
739            target_field: "uri".to_string(),
740            mode: RoutingMode::Exclusive,
741        }])
742        .unwrap();
743
744        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
745
746        // Should NOT match and fall through to normal parsing
747        let _query = parser.parse("rust programming").unwrap();
748    }
749
750    #[test]
751    fn test_router_invalid_target_field() {
752        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
753
754        let mut builder = SchemaBuilder::default();
755        let _title = builder.add_text_field("title", true, true);
756        let schema = Arc::new(builder.build());
757        let tokenizers = Arc::new(TokenizerRegistry::default());
758
759        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
760            pattern: r"test".to_string(),
761            substitution: "{0}".to_string(),
762            target_field: "nonexistent".to_string(),
763            mode: RoutingMode::Exclusive,
764        }])
765        .unwrap();
766
767        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
768
769        // Should fail because target field doesn't exist
770        let result = parser.parse("test");
771        assert!(result.is_err());
772        let err = result.err().unwrap();
773        assert!(err.contains("Unknown target field"));
774    }
775
776    #[test]
777    fn test_parse_ann_query() {
778        let mut builder = SchemaBuilder::default();
779        let embedding = builder.add_dense_vector_field("embedding", 128, true, true);
780        let schema = Arc::new(builder.build());
781        let tokenizers = Arc::new(TokenizerRegistry::default());
782
783        let parser = QueryLanguageParser::new(schema, vec![embedding], tokenizers);
784
785        // Parse ANN query
786        let result = parser.parse_query_string("embedding:ann([1.0, 2.0, 3.0], nprobe=32)");
787        assert!(result.is_ok(), "Failed to parse ANN query: {:?}", result);
788
789        if let Ok(ParsedQuery::Ann {
790            field,
791            vector,
792            nprobe,
793            rerank,
794        }) = result
795        {
796            assert_eq!(field, "embedding");
797            assert_eq!(vector, vec![1.0, 2.0, 3.0]);
798            assert_eq!(nprobe, 32);
799            assert_eq!(rerank, 3.0); // default
800        } else {
801            panic!("Expected Ann query, got: {:?}", result);
802        }
803    }
804
805    #[test]
806    fn test_parse_sparse_query() {
807        let mut builder = SchemaBuilder::default();
808        let sparse = builder.add_text_field("sparse", true, true);
809        let schema = Arc::new(builder.build());
810        let tokenizers = Arc::new(TokenizerRegistry::default());
811
812        let parser = QueryLanguageParser::new(schema, vec![sparse], tokenizers);
813
814        // Parse sparse query
815        let result = parser.parse_query_string("sparse:sparse({1: 0.5, 5: 0.3})");
816        assert!(result.is_ok(), "Failed to parse sparse query: {:?}", result);
817
818        if let Ok(ParsedQuery::Sparse { field, vector }) = result {
819            assert_eq!(field, "sparse");
820            assert_eq!(vector, vec![(1, 0.5), (5, 0.3)]);
821        } else {
822            panic!("Expected Sparse query, got: {:?}", result);
823        }
824    }
825}