Skip to main content

reddb_server/storage/query/parser/
hybrid.rs

1//! Hybrid query parsing (combining structured and vector search)
2
3use super::super::ast::{FusionStrategy, HybridQuery, QueryExpr, VectorQuery};
4use super::super::lexer::Token;
5use super::error::ParseError;
6use super::Parser;
7
8impl<'a> Parser<'a> {
9    /// Parse HYBRID query combining structured and vector search
10    ///
11    /// Syntax:
12    /// ```text
13    /// HYBRID
14    ///   FROM table [WHERE ...] | MATCH pattern [WHERE ...]
15    /// VECTOR SEARCH collection
16    ///   SIMILAR TO ...
17    /// FUSION RERANK(weight) | FILTER_THEN_SEARCH | SEARCH_THEN_FILTER | RRF(k) | INTERSECTION | UNION(sw, vw)
18    /// [LIMIT n]
19    /// ```
20    pub fn parse_hybrid_query(&mut self) -> Result<QueryExpr, ParseError> {
21        self.expect(Token::Hybrid)?;
22
23        // Parse structured part (table or graph query)
24        let structured = match self.peek() {
25            Token::From => {
26                // Table query or join
27                self.parse_from_query()?
28            }
29            Token::Match => self.parse_match_query()?,
30            Token::Select => self.parse_select_query()?,
31            other => {
32                return Err(ParseError::expected(
33                    vec!["FROM", "MATCH", "SELECT"],
34                    other,
35                    self.position(),
36                ));
37            }
38        };
39
40        // Parse vector part
41        self.expect(Token::Vector)?;
42        self.expect(Token::Search)?;
43
44        let collection = self.expect_ident()?;
45
46        self.expect(Token::Similar)?;
47        self.expect(Token::To)?;
48
49        let query_vector = self.parse_vector_source()?;
50
51        // Parse vector filter
52        let filter = if self.consume(&Token::Where)? {
53            Some(self.parse_metadata_filter()?)
54        } else {
55            None
56        };
57
58        // Parse optional metric
59        let metric = if self.consume(&Token::Metric)? {
60            Some(self.parse_distance_metric()?)
61        } else {
62            None
63        };
64
65        let vector = VectorQuery {
66            alias: None,
67            collection,
68            query_vector,
69            k: 10, // Will be overridden by limit
70            filter,
71            metric,
72            include_vectors: false,
73            include_metadata: true,
74            threshold: None,
75        };
76
77        // Parse fusion strategy
78        self.expect(Token::Fusion)?;
79        let fusion = self.parse_fusion_strategy()?;
80
81        // Parse limit
82        let limit = if self.consume(&Token::Limit)? {
83            Some(self.parse_integer()? as usize)
84        } else {
85            None
86        };
87
88        Ok(QueryExpr::Hybrid(HybridQuery {
89            alias: None,
90            structured: Box::new(structured),
91            vector,
92            fusion,
93            limit,
94        }))
95    }
96
97    /// Parse fusion strategy
98    fn parse_fusion_strategy(&mut self) -> Result<FusionStrategy, ParseError> {
99        match self.peek() {
100            Token::Rerank => {
101                self.advance()?;
102                // Optional weight in parentheses
103                let weight = if self.consume(&Token::LParen)? {
104                    let w = self.parse_float()? as f32;
105                    self.expect(Token::RParen)?;
106                    w
107                } else {
108                    0.5 // Default weight
109                };
110                Ok(FusionStrategy::Rerank { weight })
111            }
112            Token::Rrf => {
113                self.advance()?;
114                // Optional k in parentheses
115                let k = if self.consume(&Token::LParen)? {
116                    let k = self.parse_integer()? as u32;
117                    self.expect(Token::RParen)?;
118                    k
119                } else {
120                    60 // Default RRF k
121                };
122                Ok(FusionStrategy::RRF { k })
123            }
124            Token::Intersection => {
125                self.advance()?;
126                Ok(FusionStrategy::Intersection)
127            }
128            Token::Union => {
129                self.advance()?;
130                // Optional weights in parentheses
131                let (sw, vw) = if self.consume(&Token::LParen)? {
132                    let sw = self.parse_float()? as f32;
133                    self.expect(Token::Comma)?;
134                    let vw = self.parse_float()? as f32;
135                    self.expect(Token::RParen)?;
136                    (sw, vw)
137                } else {
138                    (0.5, 0.5) // Default equal weights
139                };
140                Ok(FusionStrategy::Union {
141                    structured_weight: sw,
142                    vector_weight: vw,
143                })
144            }
145            Token::Ident(name) => {
146                let name_upper = name.to_uppercase();
147                let name_clone = name.clone();
148                self.advance()?;
149                match name_upper.as_str() {
150                    "FILTER_THEN_SEARCH" | "FILTERTHEN" => {
151                        Ok(FusionStrategy::FilterThenSearch)
152                    }
153                    "SEARCH_THEN_FILTER" | "SEARCHTHEN" => {
154                        Ok(FusionStrategy::SearchThenFilter)
155                    }
156                    "RERANK" => {
157                        let weight = if self.consume(&Token::LParen)? {
158                            let w = self.parse_float()? as f32;
159                            self.expect(Token::RParen)?;
160                            w
161                        } else {
162                            0.5
163                        };
164                        Ok(FusionStrategy::Rerank { weight })
165                    }
166                    _ => Err(ParseError::new(
167                        // F-05: `name_clone` is caller-controlled identifier
168                        // bytes. Render via `{:?}` so CR/LF/NUL/quotes are
169                        // escaped before the message reaches the downstream
170                        // JSON / audit / log / gRPC sinks.
171                        format!("Unknown fusion strategy: {name_clone:?}. Valid: RERANK, RRF, FILTER_THEN_SEARCH, SEARCH_THEN_FILTER, INTERSECTION, UNION"),
172                        self.position(),
173                    )),
174                }
175            }
176            other => Err(ParseError::expected(
177                vec![
178                    "RERANK",
179                    "RRF",
180                    "FILTER_THEN_SEARCH",
181                    "SEARCH_THEN_FILTER",
182                    "INTERSECTION",
183                    "UNION",
184                ],
185                other,
186                self.position(),
187            )),
188        }
189    }
190}