Skip to main content

hermes_core/dsl/ql/
mod.rs

1//! Query language parser using pest
2//!
3//! Supports:
4//! - Term queries: `rust` or `title:rust`
5//! - Phrase queries: `"hello world"` or `title:"hello world"`
6//! - Boolean operators: `AND`, `OR`, `NOT` (or `&&`, `||`, `-`)
7//! - Grouping: `(rust OR python) AND programming`
8//! - Default fields for unqualified terms
9
10use pest::Parser;
11use pest_derive::Parser;
12use std::sync::Arc;
13
14use super::query_field_router::{QueryFieldRouter, RoutingMode};
15use super::schema::{Field, Schema};
16use crate::query::{BooleanQuery, PrefixQuery, Query, TermQuery};
17use crate::tokenizer::{BoxedTokenizer, TokenizerRegistry};
18
19#[derive(Parser)]
20#[grammar = "dsl/ql/ql.pest"]
21struct QueryParser;
22
23/// Parsed query that can be converted to a Query trait object
24#[derive(Debug, Clone)]
25pub enum ParsedQuery {
26    Term {
27        field: Option<String>,
28        term: String,
29    },
30    Phrase {
31        field: Option<String>,
32        phrase: String,
33    },
34    /// Prefix query — matches terms starting with a given prefix
35    Prefix {
36        field: Option<String>,
37        prefix: String,
38    },
39    /// Dense vector ANN query
40    Ann {
41        field: String,
42        vector: Vec<f32>,
43        nprobe: usize,
44        rerank: f32,
45    },
46    /// Sparse vector query
47    Sparse {
48        field: String,
49        vector: Vec<(u32, f32)>,
50    },
51    And(Vec<ParsedQuery>),
52    Or(Vec<ParsedQuery>),
53    Not(Box<ParsedQuery>),
54}
55
56/// Query language parser with schema awareness
57pub struct QueryLanguageParser {
58    schema: Arc<Schema>,
59    default_fields: Vec<Field>,
60    tokenizers: Arc<TokenizerRegistry>,
61    /// Optional query field router for routing queries based on regex patterns
62    field_router: Option<QueryFieldRouter>,
63}
64
65impl QueryLanguageParser {
66    pub fn new(
67        schema: Arc<Schema>,
68        default_fields: Vec<Field>,
69        tokenizers: Arc<TokenizerRegistry>,
70    ) -> Self {
71        Self {
72            schema,
73            default_fields,
74            tokenizers,
75            field_router: None,
76        }
77    }
78
79    /// Create a parser with a query field router
80    pub fn with_router(
81        schema: Arc<Schema>,
82        default_fields: Vec<Field>,
83        tokenizers: Arc<TokenizerRegistry>,
84        router: QueryFieldRouter,
85    ) -> Self {
86        Self {
87            schema,
88            default_fields,
89            tokenizers,
90            field_router: Some(router),
91        }
92    }
93
94    /// Set the query field router
95    pub fn set_router(&mut self, router: QueryFieldRouter) {
96        self.field_router = Some(router);
97    }
98
99    /// Get the query field router
100    pub fn router(&self) -> Option<&QueryFieldRouter> {
101        self.field_router.as_ref()
102    }
103
104    /// Parse a query string into a Query
105    ///
106    /// Supports query language syntax (field:term, AND, OR, NOT, grouping)
107    /// and plain text (tokenized and searched across default fields).
108    ///
109    /// If a query field router is configured, the query is first checked against
110    /// routing rules. If a rule matches:
111    /// - In exclusive mode: only the target field is queried with the substituted value
112    /// - In additional mode: both the target field and default fields are queried
113    pub fn parse(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
114        let query_str = query_str.trim();
115        if query_str.is_empty() {
116            return Err("Empty query".to_string());
117        }
118
119        // Check if query matches any routing rules
120        if let Some(router) = &self.field_router
121            && let Some(routed) = router.route(query_str)
122        {
123            return self.build_routed_query(
124                &routed.query,
125                &routed.target_field,
126                routed.mode,
127                query_str,
128            );
129        }
130
131        // No routing match - parse normally
132        self.parse_normal(query_str)
133    }
134
135    /// Build a query from a routed match
136    fn build_routed_query(
137        &self,
138        routed_query: &str,
139        target_field: &str,
140        mode: RoutingMode,
141        original_query: &str,
142    ) -> Result<Box<dyn Query>, String> {
143        // Validate target field exists
144        let _field_id = self
145            .schema
146            .get_field(target_field)
147            .ok_or_else(|| format!("Unknown target field: {}", target_field))?;
148
149        // Build query for the target field with the substituted value
150        let target_query = self.build_term_query(Some(target_field), routed_query)?;
151
152        match mode {
153            RoutingMode::Exclusive => {
154                // Only query the target field
155                Ok(target_query)
156            }
157            RoutingMode::Additional => {
158                // Query both target field and default fields
159                let mut bool_query = BooleanQuery::new();
160                bool_query = bool_query.should(target_query);
161
162                // Also parse the original query against default fields
163                if let Ok(default_query) = self.parse_normal(original_query) {
164                    bool_query = bool_query.should(default_query);
165                }
166
167                Ok(Box::new(bool_query))
168            }
169        }
170    }
171
172    /// Parse query without routing (normal parsing path)
173    fn parse_normal(&self, query_str: &str) -> Result<Box<dyn Query>, String> {
174        // Try parsing as query language first
175        match self.parse_query_string(query_str) {
176            Ok(parsed) => self.build_query(&parsed),
177            Err(_) => {
178                // If grammar parsing fails, treat as plain text
179                // Split by whitespace and create OR of terms
180                self.parse_plain_text(query_str)
181            }
182        }
183    }
184
185    /// Parse plain text as implicit OR of tokenized terms
186    fn parse_plain_text(&self, text: &str) -> Result<Box<dyn Query>, String> {
187        if self.default_fields.is_empty() {
188            return Err("No default fields configured".to_string());
189        }
190
191        let tokenizer = self.get_tokenizer(self.default_fields[0]);
192        let tokens: Vec<String> = tokenizer
193            .tokenize(text)
194            .into_iter()
195            .map(|t| t.text.to_lowercase())
196            .collect();
197
198        if tokens.is_empty() {
199            return Err("No tokens in query".to_string());
200        }
201
202        let mut bool_query = BooleanQuery::new();
203        for token in &tokens {
204            for &field_id in &self.default_fields {
205                bool_query = bool_query.should(TermQuery::text(field_id, token));
206            }
207        }
208        Ok(Box::new(bool_query))
209    }
210
211    fn parse_query_string(&self, query_str: &str) -> Result<ParsedQuery, String> {
212        let pairs = QueryParser::parse(Rule::query, query_str)
213            .map_err(|e| format!("Parse error: {}", e))?;
214
215        let query_pair = pairs.into_iter().next().ok_or("No query found")?;
216
217        // query = { SOI ~ or_expr ~ EOI }
218        self.parse_or_expr(query_pair.into_inner().next().unwrap())
219    }
220
221    fn parse_or_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
222        let mut inner = pair.into_inner();
223        let first = self.parse_and_expr(inner.next().unwrap())?;
224
225        let rest: Vec<ParsedQuery> = inner
226            .filter(|p| p.as_rule() == Rule::and_expr)
227            .map(|p| self.parse_and_expr(p))
228            .collect::<Result<Vec<_>, _>>()?;
229
230        if rest.is_empty() {
231            Ok(first)
232        } else {
233            let mut all = vec![first];
234            all.extend(rest);
235            Ok(ParsedQuery::Or(all))
236        }
237    }
238
239    fn parse_and_expr(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
240        let mut inner = pair.into_inner();
241        let first = self.parse_primary(inner.next().unwrap())?;
242
243        let rest: Vec<ParsedQuery> = inner
244            .filter(|p| p.as_rule() == Rule::primary)
245            .map(|p| self.parse_primary(p))
246            .collect::<Result<Vec<_>, _>>()?;
247
248        if rest.is_empty() {
249            Ok(first)
250        } else {
251            let mut all = vec![first];
252            all.extend(rest);
253            Ok(ParsedQuery::And(all))
254        }
255    }
256
257    fn parse_primary(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
258        let mut negated = false;
259        let mut inner_query = None;
260
261        for inner in pair.into_inner() {
262            match inner.as_rule() {
263                Rule::not_op => negated = true,
264                Rule::group => {
265                    let or_expr = inner.into_inner().next().unwrap();
266                    inner_query = Some(self.parse_or_expr(or_expr)?);
267                }
268                Rule::ann_query => {
269                    inner_query = Some(self.parse_ann_query(inner)?);
270                }
271                Rule::sparse_query => {
272                    inner_query = Some(self.parse_sparse_query(inner)?);
273                }
274                Rule::phrase_query => {
275                    inner_query = Some(self.parse_phrase_query(inner)?);
276                }
277                Rule::prefix_query => {
278                    inner_query = Some(self.parse_prefix_query(inner)?);
279                }
280                Rule::term_query => {
281                    inner_query = Some(self.parse_term_query(inner)?);
282                }
283                _ => {}
284            }
285        }
286
287        let query = inner_query.ok_or("No query in primary")?;
288
289        if negated {
290            Ok(ParsedQuery::Not(Box::new(query)))
291        } else {
292            Ok(query)
293        }
294    }
295
296    fn parse_term_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
297        let mut field = None;
298        let mut term = String::new();
299
300        for inner in pair.into_inner() {
301            match inner.as_rule() {
302                Rule::field_spec => {
303                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
304                }
305                Rule::term => {
306                    term = inner.as_str().to_string();
307                }
308                _ => {}
309            }
310        }
311
312        Ok(ParsedQuery::Term { field, term })
313    }
314
315    fn parse_prefix_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
316        let mut field = None;
317        let mut prefix = String::new();
318
319        for inner in pair.into_inner() {
320            match inner.as_rule() {
321                Rule::field_spec => {
322                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
323                }
324                Rule::prefix_value => {
325                    prefix = inner.as_str().to_string();
326                }
327                _ => {}
328            }
329        }
330
331        Ok(ParsedQuery::Prefix { field, prefix })
332    }
333
334    fn parse_phrase_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
335        let mut field = None;
336        let mut phrase = String::new();
337
338        for inner in pair.into_inner() {
339            match inner.as_rule() {
340                Rule::field_spec => {
341                    field = Some(inner.into_inner().next().unwrap().as_str().to_string());
342                }
343                Rule::quoted_string => {
344                    let s = inner.as_str();
345                    phrase = s[1..s.len() - 1].to_string();
346                }
347                _ => {}
348            }
349        }
350
351        Ok(ParsedQuery::Phrase { field, phrase })
352    }
353
354    /// Parse an ANN query: field:ann([1.0, 2.0, 3.0], nprobe=32, rerank=3)
355    fn parse_ann_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
356        let mut field = String::new();
357        let mut vector = Vec::new();
358        let mut nprobe = 32usize;
359        let mut rerank = 3.0f32;
360
361        for inner in pair.into_inner() {
362            match inner.as_rule() {
363                Rule::field_spec => {
364                    field = inner.into_inner().next().unwrap().as_str().to_string();
365                }
366                Rule::vector_array => {
367                    for num in inner.into_inner() {
368                        if num.as_rule() == Rule::number
369                            && let Ok(v) = num.as_str().parse::<f32>()
370                        {
371                            vector.push(v);
372                        }
373                    }
374                }
375                Rule::ann_params => {
376                    for param in inner.into_inner() {
377                        if param.as_rule() == Rule::ann_param {
378                            // ann_param = { ("nprobe" | "rerank") ~ "=" ~ number }
379                            let param_str = param.as_str();
380                            if let Some(eq_pos) = param_str.find('=') {
381                                let name = &param_str[..eq_pos];
382                                let value = &param_str[eq_pos + 1..];
383                                match name {
384                                    "nprobe" => nprobe = value.parse().unwrap_or(0),
385                                    "rerank" => rerank = value.parse().unwrap_or(0.0),
386                                    _ => {}
387                                }
388                            }
389                        }
390                    }
391                }
392                _ => {}
393            }
394        }
395
396        Ok(ParsedQuery::Ann {
397            field,
398            vector,
399            nprobe,
400            rerank,
401        })
402    }
403
404    /// Parse a sparse vector query: field:sparse({1: 0.5, 5: 0.3})
405    fn parse_sparse_query(&self, pair: pest::iterators::Pair<Rule>) -> Result<ParsedQuery, String> {
406        let mut field = String::new();
407        let mut vector = Vec::new();
408
409        for inner in pair.into_inner() {
410            match inner.as_rule() {
411                Rule::field_spec => {
412                    field = inner.into_inner().next().unwrap().as_str().to_string();
413                }
414                Rule::sparse_map => {
415                    for entry in inner.into_inner() {
416                        if entry.as_rule() == Rule::sparse_entry {
417                            let mut entry_inner = entry.into_inner();
418                            if let (Some(idx), Some(weight)) =
419                                (entry_inner.next(), entry_inner.next())
420                                && let (Ok(i), Ok(w)) =
421                                    (idx.as_str().parse::<u32>(), weight.as_str().parse::<f32>())
422                            {
423                                vector.push((i, w));
424                            }
425                        }
426                    }
427                }
428                _ => {}
429            }
430        }
431
432        Ok(ParsedQuery::Sparse { field, vector })
433    }
434
435    fn build_query(&self, parsed: &ParsedQuery) -> Result<Box<dyn Query>, String> {
436        use crate::query::{DenseVectorQuery, SparseVectorQuery};
437
438        match parsed {
439            ParsedQuery::Term { field, term } => self.build_term_query(field.as_deref(), term),
440            ParsedQuery::Phrase { field, phrase } => {
441                self.build_phrase_query(field.as_deref(), phrase)
442            }
443            ParsedQuery::Prefix { field, prefix } => {
444                self.build_prefix_query(field.as_deref(), prefix)
445            }
446            ParsedQuery::Ann {
447                field,
448                vector,
449                nprobe,
450                rerank,
451            } => {
452                let field_id = self
453                    .schema
454                    .get_field(field)
455                    .ok_or_else(|| format!("Unknown field: {}", field))?;
456                let query = DenseVectorQuery::new(field_id, vector.clone())
457                    .with_nprobe(*nprobe)
458                    .with_rerank_factor(*rerank);
459                Ok(Box::new(query))
460            }
461            ParsedQuery::Sparse { field, vector } => {
462                let field_id = self
463                    .schema
464                    .get_field(field)
465                    .ok_or_else(|| format!("Unknown field: {}", field))?;
466                let query = SparseVectorQuery::new(field_id, vector.clone());
467                Ok(Box::new(query))
468            }
469            ParsedQuery::And(queries) => {
470                let mut bool_query = BooleanQuery::new();
471                for q in queries {
472                    bool_query = bool_query.must(self.build_query(q)?);
473                }
474                Ok(Box::new(bool_query))
475            }
476            ParsedQuery::Or(queries) => {
477                let mut bool_query = BooleanQuery::new();
478                for q in queries {
479                    bool_query = bool_query.should(self.build_query(q)?);
480                }
481                Ok(Box::new(bool_query))
482            }
483            ParsedQuery::Not(inner) => {
484                // NOT query needs a context - wrap in a match-all with must_not
485                let mut bool_query = BooleanQuery::new();
486                bool_query = bool_query.must_not(self.build_query(inner)?);
487                Ok(Box::new(bool_query))
488            }
489        }
490    }
491
492    fn build_term_query(&self, field: Option<&str>, term: &str) -> Result<Box<dyn Query>, String> {
493        if let Some(field_name) = field {
494            // Field-qualified term: tokenize using field's tokenizer
495            let field_id = self
496                .schema
497                .get_field(field_name)
498                .ok_or_else(|| format!("Unknown field: {}", field_name))?;
499            // Validate field type — TermQuery only works on text fields
500            if let Some(entry) = self.schema.get_field_entry(field_id) {
501                use crate::dsl::FieldType;
502                if entry.field_type != FieldType::Text {
503                    return Err(format!(
504                        "Term query requires a text field, but '{}' is {:?}. Use range query for numeric fields.",
505                        field_name, entry.field_type
506                    ));
507                }
508            }
509            let tokenizer = self.get_tokenizer(field_id);
510            let tokens: Vec<String> = tokenizer
511                .tokenize(term)
512                .into_iter()
513                .map(|t| t.text.to_lowercase())
514                .collect();
515
516            if tokens.is_empty() {
517                return Err("No tokens in term".to_string());
518            }
519
520            if tokens.len() == 1 {
521                Ok(Box::new(TermQuery::text(field_id, &tokens[0])))
522            } else {
523                // Multiple tokens from single term - AND them together
524                let mut bool_query = BooleanQuery::new();
525                for token in &tokens {
526                    bool_query = bool_query.must(TermQuery::text(field_id, token));
527                }
528                Ok(Box::new(bool_query))
529            }
530        } else if !self.default_fields.is_empty() {
531            // Unqualified term: tokenize and search across default fields
532            let tokenizer = self.get_tokenizer(self.default_fields[0]);
533            let tokens: Vec<String> = tokenizer
534                .tokenize(term)
535                .into_iter()
536                .map(|t| t.text.to_lowercase())
537                .collect();
538
539            if tokens.is_empty() {
540                return Err("No tokens in term".to_string());
541            }
542
543            // Build SHOULD query across all default fields for each token
544            let mut bool_query = BooleanQuery::new();
545            for token in &tokens {
546                for &field_id in &self.default_fields {
547                    bool_query = bool_query.should(TermQuery::text(field_id, token));
548                }
549            }
550            Ok(Box::new(bool_query))
551        } else {
552            Err("No field specified and no default fields configured".to_string())
553        }
554    }
555
556    fn build_prefix_query(
557        &self,
558        field: Option<&str>,
559        prefix: &str,
560    ) -> Result<Box<dyn Query>, String> {
561        if let Some(field_name) = field {
562            let field_id = self
563                .schema
564                .get_field(field_name)
565                .ok_or_else(|| format!("Unknown field: {}", field_name))?;
566            Ok(Box::new(PrefixQuery::text(field_id, prefix)))
567        } else if !self.default_fields.is_empty() {
568            // Unqualified prefix: OR across default fields
569            let mut bool_query = BooleanQuery::new();
570            for &field_id in &self.default_fields {
571                bool_query = bool_query.should(PrefixQuery::text(field_id, prefix));
572            }
573            Ok(Box::new(bool_query))
574        } else {
575            Err("No field specified and no default fields configured".to_string())
576        }
577    }
578
579    fn build_phrase_query(
580        &self,
581        field: Option<&str>,
582        phrase: &str,
583    ) -> Result<Box<dyn Query>, String> {
584        // For phrase queries, tokenize and create AND query of terms
585        let field_id = if let Some(field_name) = field {
586            self.schema
587                .get_field(field_name)
588                .ok_or_else(|| format!("Unknown field: {}", field_name))?
589        } else if !self.default_fields.is_empty() {
590            self.default_fields[0]
591        } else {
592            return Err("No field specified and no default fields configured".to_string());
593        };
594
595        let tokenizer = self.get_tokenizer(field_id);
596        let tokens: Vec<String> = tokenizer
597            .tokenize(phrase)
598            .into_iter()
599            .map(|t| t.text.to_lowercase())
600            .collect();
601
602        if tokens.is_empty() {
603            return Err("No tokens in phrase".to_string());
604        }
605
606        if tokens.len() == 1 {
607            return Ok(Box::new(TermQuery::text(field_id, &tokens[0])));
608        }
609
610        // Create AND query for all tokens (simplified phrase matching)
611        let mut bool_query = BooleanQuery::new();
612        for token in &tokens {
613            bool_query = bool_query.must(TermQuery::text(field_id, token));
614        }
615
616        // If no field specified and multiple default fields, wrap in OR
617        if field.is_none() && self.default_fields.len() > 1 {
618            let mut outer = BooleanQuery::new();
619            for &f in &self.default_fields {
620                let tokenizer = self.get_tokenizer(f);
621                let tokens: Vec<String> = tokenizer
622                    .tokenize(phrase)
623                    .into_iter()
624                    .map(|t| t.text.to_lowercase())
625                    .collect();
626
627                let mut field_query = BooleanQuery::new();
628                for token in &tokens {
629                    field_query = field_query.must(TermQuery::text(f, token));
630                }
631                outer = outer.should(field_query);
632            }
633            return Ok(Box::new(outer));
634        }
635
636        Ok(Box::new(bool_query))
637    }
638
639    fn get_tokenizer(&self, field: Field) -> BoxedTokenizer {
640        // Get tokenizer name from schema field entry, fallback to "simple"
641        let tokenizer_name = self
642            .schema
643            .get_field_entry(field)
644            .and_then(|entry| entry.tokenizer.as_deref())
645            .unwrap_or("simple");
646
647        self.tokenizers
648            .get(tokenizer_name)
649            .unwrap_or_else(|| Box::new(crate::tokenizer::SimpleTokenizer))
650    }
651}
652
653#[cfg(test)]
654mod tests {
655    use super::*;
656    use crate::dsl::SchemaBuilder;
657    use crate::tokenizer::TokenizerRegistry;
658
659    fn setup() -> (Arc<Schema>, Vec<Field>, Arc<TokenizerRegistry>) {
660        let mut builder = SchemaBuilder::default();
661        let title = builder.add_text_field("title", true, true);
662        let body = builder.add_text_field("body", true, true);
663        let schema = Arc::new(builder.build());
664        let tokenizers = Arc::new(TokenizerRegistry::default());
665        (schema, vec![title, body], tokenizers)
666    }
667
668    #[test]
669    fn test_simple_term() {
670        let (schema, default_fields, tokenizers) = setup();
671        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
672
673        // Should parse without error - creates BooleanQuery across default fields
674        let _query = parser.parse("rust").unwrap();
675    }
676
677    #[test]
678    fn test_field_term() {
679        let (schema, default_fields, tokenizers) = setup();
680        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
681
682        // Should parse field:term syntax
683        let _query = parser.parse("title:rust").unwrap();
684    }
685
686    #[test]
687    fn test_boolean_and() {
688        let (schema, default_fields, tokenizers) = setup();
689        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
690
691        // Should parse AND boolean query
692        let _query = parser.parse("rust AND programming").unwrap();
693    }
694
695    #[test]
696    fn test_match_query() {
697        let (schema, default_fields, tokenizers) = setup();
698        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
699
700        // Should tokenize and create boolean query
701        let _query = parser.parse("hello world").unwrap();
702    }
703
704    #[test]
705    fn test_phrase_query() {
706        let (schema, default_fields, tokenizers) = setup();
707        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
708
709        // Should parse quoted phrase
710        let _query = parser.parse("\"hello world\"").unwrap();
711    }
712
713    #[test]
714    fn test_boolean_or() {
715        let (schema, default_fields, tokenizers) = setup();
716        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
717
718        // Should parse OR boolean query
719        let _query = parser.parse("rust OR python").unwrap();
720    }
721
722    #[test]
723    fn test_complex_query() {
724        let (schema, default_fields, tokenizers) = setup();
725        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
726
727        // Should parse complex boolean with grouping
728        let _query = parser.parse("(rust OR python) AND programming").unwrap();
729    }
730
731    #[test]
732    fn test_router_exclusive_mode() {
733        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
734
735        let mut builder = SchemaBuilder::default();
736        let _title = builder.add_text_field("title", true, true);
737        let _uri = builder.add_text_field("uri", true, true);
738        let schema = Arc::new(builder.build());
739        let tokenizers = Arc::new(TokenizerRegistry::default());
740
741        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
742            pattern: r"^doi:(10\.\d{4,}/[^\s]+)$".to_string(),
743            substitution: "doi://{1}".to_string(),
744            target_field: "uri".to_string(),
745            mode: RoutingMode::Exclusive,
746        }])
747        .unwrap();
748
749        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
750
751        // Should route DOI query to uri field
752        let _query = parser.parse("doi:10.1234/test.123").unwrap();
753    }
754
755    #[test]
756    fn test_router_additional_mode() {
757        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
758
759        let mut builder = SchemaBuilder::default();
760        let title = builder.add_text_field("title", true, true);
761        let _uri = builder.add_text_field("uri", true, true);
762        let schema = Arc::new(builder.build());
763        let tokenizers = Arc::new(TokenizerRegistry::default());
764
765        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
766            pattern: r"#(\d+)".to_string(),
767            substitution: "{1}".to_string(),
768            target_field: "uri".to_string(),
769            mode: RoutingMode::Additional,
770        }])
771        .unwrap();
772
773        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
774
775        // Should route to both uri field and default fields
776        let _query = parser.parse("#42").unwrap();
777    }
778
779    #[test]
780    fn test_router_no_match_falls_through() {
781        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
782
783        let mut builder = SchemaBuilder::default();
784        let title = builder.add_text_field("title", true, true);
785        let _uri = builder.add_text_field("uri", true, true);
786        let schema = Arc::new(builder.build());
787        let tokenizers = Arc::new(TokenizerRegistry::default());
788
789        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
790            pattern: r"^doi:".to_string(),
791            substitution: "{0}".to_string(),
792            target_field: "uri".to_string(),
793            mode: RoutingMode::Exclusive,
794        }])
795        .unwrap();
796
797        let parser = QueryLanguageParser::with_router(schema, vec![title], tokenizers, router);
798
799        // Should NOT match and fall through to normal parsing
800        let _query = parser.parse("rust programming").unwrap();
801    }
802
803    #[test]
804    fn test_router_invalid_target_field() {
805        use crate::dsl::query_field_router::{QueryFieldRouter, QueryRouterRule, RoutingMode};
806
807        let mut builder = SchemaBuilder::default();
808        let _title = builder.add_text_field("title", true, true);
809        let schema = Arc::new(builder.build());
810        let tokenizers = Arc::new(TokenizerRegistry::default());
811
812        let router = QueryFieldRouter::from_rules(&[QueryRouterRule {
813            pattern: r"test".to_string(),
814            substitution: "{0}".to_string(),
815            target_field: "nonexistent".to_string(),
816            mode: RoutingMode::Exclusive,
817        }])
818        .unwrap();
819
820        let parser = QueryLanguageParser::with_router(schema, vec![], tokenizers, router);
821
822        // Should fail because target field doesn't exist
823        let result = parser.parse("test");
824        assert!(result.is_err());
825        let err = result.err().unwrap();
826        assert!(err.contains("Unknown target field"));
827    }
828
829    #[test]
830    fn test_parse_ann_query() {
831        let mut builder = SchemaBuilder::default();
832        let embedding = builder.add_dense_vector_field("embedding", 128, true, true);
833        let schema = Arc::new(builder.build());
834        let tokenizers = Arc::new(TokenizerRegistry::default());
835
836        let parser = QueryLanguageParser::new(schema, vec![embedding], tokenizers);
837
838        // Parse ANN query
839        let result = parser.parse_query_string("embedding:ann([1.0, 2.0, 3.0], nprobe=32)");
840        assert!(result.is_ok(), "Failed to parse ANN query: {:?}", result);
841
842        if let Ok(ParsedQuery::Ann {
843            field,
844            vector,
845            nprobe,
846            rerank,
847        }) = result
848        {
849            assert_eq!(field, "embedding");
850            assert_eq!(vector, vec![1.0, 2.0, 3.0]);
851            assert_eq!(nprobe, 32);
852            assert_eq!(rerank, 3.0); // default
853        } else {
854            panic!("Expected Ann query, got: {:?}", result);
855        }
856    }
857
858    #[test]
859    fn test_parse_sparse_query() {
860        let mut builder = SchemaBuilder::default();
861        let sparse = builder.add_text_field("sparse", true, true);
862        let schema = Arc::new(builder.build());
863        let tokenizers = Arc::new(TokenizerRegistry::default());
864
865        let parser = QueryLanguageParser::new(schema, vec![sparse], tokenizers);
866
867        // Parse sparse query
868        let result = parser.parse_query_string("sparse:sparse({1: 0.5, 5: 0.3})");
869        assert!(result.is_ok(), "Failed to parse sparse query: {:?}", result);
870
871        if let Ok(ParsedQuery::Sparse { field, vector }) = result {
872            assert_eq!(field, "sparse");
873            assert_eq!(vector, vec![(1, 0.5), (5, 0.3)]);
874        } else {
875            panic!("Expected Sparse query, got: {:?}", result);
876        }
877    }
878
879    #[test]
880    fn test_parse_prefix_simple() {
881        let (schema, default_fields, tokenizers) = setup();
882        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
883
884        // Simple prefix: title:abc*
885        let result = parser.parse_query_string("title:abc*");
886        assert!(result.is_ok(), "Failed to parse prefix query: {:?}", result);
887        if let Ok(ParsedQuery::Prefix { field, prefix }) = result {
888            assert_eq!(field, Some("title".to_string()));
889            assert_eq!(prefix, "abc");
890        } else {
891            panic!("Expected Prefix query, got: {:?}", result);
892        }
893    }
894
895    #[test]
896    fn test_parse_prefix_url() {
897        let mut builder = SchemaBuilder::default();
898        let _site = builder.add_text_field("site", true, true);
899        let schema = Arc::new(builder.build());
900        let tokenizers = Arc::new(TokenizerRegistry::default());
901        let parser = QueryLanguageParser::new(schema, vec![], tokenizers);
902
903        // URL prefix: site:https://reddit.com/r/Transhumanism*
904        let result = parser.parse_query_string("site:https://reddit.com/r/Transhumanism*");
905        assert!(
906            result.is_ok(),
907            "Failed to parse URL prefix query: {:?}",
908            result
909        );
910        if let Ok(ParsedQuery::Prefix { field, prefix }) = result {
911            assert_eq!(field, Some("site".to_string()));
912            assert_eq!(prefix, "https://reddit.com/r/Transhumanism");
913        } else {
914            panic!("Expected Prefix query, got: {:?}", result);
915        }
916    }
917
918    #[test]
919    fn test_parse_prefix_unqualified() {
920        let (schema, default_fields, tokenizers) = setup();
921        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
922
923        // Unqualified prefix: transhuman*
924        let result = parser.parse_query_string("transhuman*");
925        assert!(
926            result.is_ok(),
927            "Failed to parse unqualified prefix: {:?}",
928            result
929        );
930        if let Ok(ParsedQuery::Prefix { field, prefix }) = result {
931            assert_eq!(field, None);
932            assert_eq!(prefix, "transhuman");
933        } else {
934            panic!("Expected Prefix query, got: {:?}", result);
935        }
936    }
937
938    #[test]
939    fn test_prefix_query_builds() {
940        let (schema, default_fields, tokenizers) = setup();
941        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
942
943        // Should build without error
944        let _query = parser.parse("title:abc*").unwrap();
945    }
946
947    #[test]
948    fn test_prefix_in_boolean() {
949        let (schema, default_fields, tokenizers) = setup();
950        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
951
952        // Prefix in boolean: rust AND title:abc*
953        let _query = parser.parse("rust AND title:abc*").unwrap();
954    }
955
956    #[test]
957    fn test_prefix_mixed_with_terms() {
958        let mut builder = SchemaBuilder::default();
959        let title = builder.add_text_field("title", true, true);
960        let _site = builder.add_text_field("site", true, true);
961        let schema = Arc::new(builder.build());
962        let tokenizers = Arc::new(TokenizerRegistry::default());
963        let parser = QueryLanguageParser::new(schema, vec![title], tokenizers);
964
965        // Mixed: prefix + free-text terms (implicit OR)
966        let result =
967            parser.parse_query_string("site:https://reddit.com/r/Transhumanism* longevity drugs");
968        assert!(
969            result.is_ok(),
970            "Failed to parse mixed prefix+terms: {:?}",
971            result
972        );
973        // Should be Or([Prefix, Term, Term])
974        if let Ok(ParsedQuery::Or(parts)) = &result {
975            assert_eq!(parts.len(), 3, "Expected 3 parts, got: {:?}", parts);
976            assert!(
977                matches!(&parts[0], ParsedQuery::And(v) if v.len() == 1 && matches!(&v[0], ParsedQuery::Prefix { .. }))
978                    || matches!(&parts[0], ParsedQuery::Prefix { .. }),
979                "First part should be prefix: {:?}",
980                parts[0]
981            );
982        } else {
983            panic!("Expected Or query, got: {:?}", result);
984        }
985
986        // Should also build into a Query without error
987        let _query = parser
988            .parse("site:https://reddit.com/r/Transhumanism* longevity drugs")
989            .unwrap();
990    }
991
992    #[test]
993    fn test_implicit_or_plain_terms() {
994        let (schema, default_fields, tokenizers) = setup();
995        let parser = QueryLanguageParser::new(schema, default_fields, tokenizers);
996
997        // Space-separated terms: implicit OR
998        let result = parser.parse_query_string("hello world");
999        assert!(result.is_ok(), "Failed to parse implicit OR: {:?}", result);
1000        if let Ok(ParsedQuery::Or(parts)) = &result {
1001            assert_eq!(parts.len(), 2);
1002        } else {
1003            panic!("Expected Or query, got: {:?}", result);
1004        }
1005    }
1006}