Skip to main content

reddb_server/storage/query/parser/
search_commands.rs

1//! Search Command Parser: SEARCH SIMILAR | TEXT | HYBRID
2
3use super::super::ast::{QueryExpr, SearchCommand};
4use super::super::lexer::Token;
5use super::error::ParseError;
6use super::Parser;
7use crate::storage::schema::Value;
8
9impl<'a> Parser<'a> {
10    /// Parse: SEARCH subcommand ...
11    pub fn parse_search_command(&mut self) -> Result<QueryExpr, ParseError> {
12        self.expect(Token::Search)?;
13        match self.peek().clone() {
14            Token::Similar => self.parse_search_similar(),
15            Token::Text => self.parse_search_text(),
16            Token::Hybrid => self.parse_search_hybrid(),
17            Token::Index => self.parse_search_index(),
18            Token::Ident(name) if name.eq_ignore_ascii_case("MULTIMODAL") => {
19                self.parse_search_multimodal()
20            }
21            Token::Ident(name) if name.eq_ignore_ascii_case("CONTEXT") => {
22                self.parse_search_context()
23            }
24            Token::Ident(name) if name.eq_ignore_ascii_case("SPATIAL") => {
25                self.parse_search_spatial()
26            }
27            _ => Err(ParseError::expected(
28                vec![
29                    "SIMILAR",
30                    "TEXT",
31                    "HYBRID",
32                    "MULTIMODAL",
33                    "INDEX",
34                    "CONTEXT",
35                    "SPATIAL",
36                ],
37                self.peek(),
38                self.position(),
39            )),
40        }
41    }
42
43    /// Parse: SEARCH SIMILAR ([v1, v2] | TEXT 'query') COLLECTION col [LIMIT n] [MIN_SCORE f] [USING provider]
44    fn parse_search_similar(&mut self) -> Result<QueryExpr, ParseError> {
45        self.advance()?; // consume SIMILAR
46
47        // Parse vector literal OR text for semantic search
48        let (vector, text) = if self.consume(&Token::Text)? {
49            // SEARCH SIMILAR TEXT 'query' — semantic search
50            let query_text = self.parse_string()?;
51            (Vec::new(), Some(query_text))
52        } else {
53            // SEARCH SIMILAR [0.1, 0.2] — classic vector search
54            (self.parse_vector_literal()?, None)
55        };
56
57        // Parse COLLECTION
58        self.expect(Token::Collection)?;
59        let collection = self.expect_ident()?;
60
61        // Optional LIMIT
62        let limit = if self.consume(&Token::Limit)? {
63            self.parse_integer()? as usize
64        } else {
65            10
66        };
67
68        // Optional MIN_SCORE
69        let min_score = if self.consume(&Token::MinScore)? {
70            self.parse_float()? as f32
71        } else {
72            0.0
73        };
74
75        // Optional USING provider. `USING` is a reserved keyword
76        // (`Token::Using`), so `consume_search_ident` (which only
77        // matches `Token::Ident`) would never fire. Use the typed
78        // consumer. See bug #108.
79        let provider = if self.consume(&Token::Using)? {
80            Some(self.expect_ident()?)
81        } else {
82            None
83        };
84
85        Ok(QueryExpr::SearchCommand(SearchCommand::Similar {
86            vector,
87            text,
88            provider,
89            collection,
90            limit,
91            min_score,
92        }))
93    }
94
95    /// Parse: SEARCH TEXT 'query string' [COLLECTION|IN col] [LIMIT n] [FUZZY]
96    fn parse_search_text(&mut self) -> Result<QueryExpr, ParseError> {
97        self.advance()?; // consume TEXT
98
99        let query = self.parse_string()?;
100
101        // Optional COLLECTION
102        let collection = if self.consume(&Token::Collection)? || self.consume(&Token::In)? {
103            Some(self.expect_ident()?)
104        } else {
105            None
106        };
107
108        // Optional LIMIT
109        let limit = if self.consume(&Token::Limit)? {
110            self.parse_integer()? as usize
111        } else {
112            10
113        };
114
115        // Optional FUZZY
116        let fuzzy = self.consume(&Token::Fuzzy)?;
117
118        Ok(QueryExpr::SearchCommand(SearchCommand::Text {
119            query,
120            collection,
121            limit,
122            fuzzy,
123        }))
124    }
125
126    /// Parse: SEARCH HYBRID [SIMILAR|VECTOR [v1, v2, ...]] [TEXT 'query'] COLLECTION|IN col [LIMIT|K n]
127    fn parse_search_hybrid(&mut self) -> Result<QueryExpr, ParseError> {
128        self.advance()?; // consume HYBRID
129
130        let mut vector = None;
131        let mut query = None;
132
133        loop {
134            if self.consume(&Token::Similar)? || self.consume(&Token::Vector)? {
135                vector = Some(self.parse_vector_literal()?);
136            } else if self.consume(&Token::Text)? {
137                query = Some(self.parse_string()?);
138            } else {
139                break;
140            }
141        }
142
143        // Require at least one of vector or text
144        if vector.is_none() && query.is_none() {
145            return Err(ParseError::new(
146                "SEARCH HYBRID requires at least SIMILAR or TEXT".to_string(),
147                self.position(),
148            ));
149        }
150
151        // Parse COLLECTION/IN — tolerate collection names that collide
152        // with reserved keywords (e.g. `data`, `text`, `nodes`) by
153        // falling back to `expect_ident_or_keyword` and lowercasing the
154        // keyword form so the stored name matches the source casing.
155        if !(self.consume(&Token::Collection)? || self.consume(&Token::In)?) {
156            return Err(ParseError::expected(
157                vec!["COLLECTION", "IN"],
158                self.peek(),
159                self.position(),
160            ));
161        }
162        let collection = self.expect_collection_name()?;
163
164        // Optional LIMIT
165        let limit = if self.consume(&Token::Limit)? || self.consume(&Token::K)? {
166            let _ = self.consume(&Token::Eq)?;
167            self.parse_integer()? as usize
168        } else {
169            10
170        };
171
172        Ok(QueryExpr::SearchCommand(SearchCommand::Hybrid {
173            vector,
174            query,
175            collection,
176            limit,
177        }))
178    }
179
180    /// Parse: SEARCH MULTIMODAL 'query' [COLLECTION col] [LIMIT n]
181    fn parse_search_multimodal(&mut self) -> Result<QueryExpr, ParseError> {
182        self.advance()?; // consume MULTIMODAL identifier
183
184        let query = self.parse_string()?;
185
186        let collection = if self.consume(&Token::Collection)? {
187            Some(self.expect_ident()?)
188        } else {
189            None
190        };
191
192        let limit = if self.consume(&Token::Limit)? {
193            self.parse_integer()? as usize
194        } else {
195            25
196        };
197
198        Ok(QueryExpr::SearchCommand(SearchCommand::Multimodal {
199            query,
200            collection,
201            limit,
202        }))
203    }
204
205    /// Parse: SEARCH INDEX index VALUE 'value' [COLLECTION col] [LIMIT n] [EXACT|FUZZY]
206    fn parse_search_index(&mut self) -> Result<QueryExpr, ParseError> {
207        self.advance()?; // consume INDEX keyword
208
209        let index = self.expect_ident()?;
210        self.expect_search_ident("VALUE")?;
211        let value = self.parse_string()?;
212
213        let collection = if self.consume(&Token::Collection)? {
214            Some(self.expect_ident()?)
215        } else {
216            None
217        };
218
219        let limit = if self.consume(&Token::Limit)? {
220            self.parse_integer()? as usize
221        } else {
222            25
223        };
224
225        let fuzzy = self.consume(&Token::Fuzzy)? || self.consume_search_ident("FUZZY")?;
226        if !fuzzy {
227            let _ = self.consume_search_ident("EXACT")?;
228        }
229        let exact = !fuzzy;
230
231        Ok(QueryExpr::SearchCommand(SearchCommand::Index {
232            index,
233            value,
234            collection,
235            limit,
236            exact,
237        }))
238    }
239
240    /// Collection/index names frequently collide with reserved words
241    /// (`data`, `text`, `nodes`, `edges`, …). Accept either a plain
242    /// identifier or a keyword, lowercasing the keyword form so the
243    /// stored name matches the source spelling.
244    fn expect_collection_name(&mut self) -> Result<String, ParseError> {
245        let was_ident = matches!(self.peek(), Token::Ident(_));
246        let raw = self.expect_ident_or_keyword()?;
247        Ok(if was_ident {
248            raw
249        } else {
250            raw.to_ascii_lowercase()
251        })
252    }
253
254    fn expect_search_ident(&mut self, expected: &str) -> Result<(), ParseError> {
255        if self.consume_search_ident(expected)? {
256            Ok(())
257        } else {
258            Err(ParseError::expected(
259                vec![expected],
260                self.peek(),
261                self.position(),
262            ))
263        }
264    }
265
266    fn consume_search_ident(&mut self, expected: &str) -> Result<bool, ParseError> {
267        match self.peek().clone() {
268            Token::Ident(name) if name.eq_ignore_ascii_case(expected) => {
269                self.advance()?;
270                Ok(true)
271            }
272            _ => Ok(false),
273        }
274    }
275
276    /// Parse: SEARCH CONTEXT 'query' [FIELD field] [COLLECTION col] [DEPTH n] [LIMIT n]
277    fn parse_search_context(&mut self) -> Result<QueryExpr, ParseError> {
278        self.advance()?; // consume CONTEXT keyword
279
280        let query = self.parse_string()?;
281
282        let field = if self.consume_search_ident("FIELD")? {
283            Some(self.expect_ident()?)
284        } else {
285            None
286        };
287
288        let collection = if self.consume(&Token::Collection)? {
289            Some(self.expect_ident()?)
290        } else {
291            None
292        };
293
294        // Parse optional clauses in any order
295        let mut limit = 25usize;
296        let mut depth = 1usize;
297        for _ in 0..2 {
298            if self.consume(&Token::Limit)? {
299                limit = self.parse_integer()? as usize;
300            } else if self.consume(&Token::Depth)? {
301                depth = self.parse_integer()? as usize;
302            }
303        }
304
305        Ok(QueryExpr::SearchCommand(SearchCommand::Context {
306            query,
307            field,
308            collection,
309            limit,
310            depth,
311        }))
312    }
313
314    /// Parse: SEARCH SPATIAL (RADIUS | BBOX | NEAREST) ...
315    ///
316    /// Syntax:
317    /// - SEARCH SPATIAL RADIUS lat lon radius_km COLLECTION col COLUMN col [LIMIT n]
318    /// - SEARCH SPATIAL BBOX min_lat min_lon max_lat max_lon COLLECTION col COLUMN col [LIMIT n]
319    /// - SEARCH SPATIAL NEAREST lat lon K n COLLECTION col COLUMN col
320    fn parse_search_spatial(&mut self) -> Result<QueryExpr, ParseError> {
321        self.advance()?; // consume SPATIAL
322
323        match self.peek().clone() {
324            Token::Ident(ref name) if name.eq_ignore_ascii_case("RADIUS") => {
325                self.advance()?; // consume RADIUS
326                let lat_pos = self.position();
327                let center_lat = self.parse_float()?;
328                if !(-90.0..=90.0).contains(&center_lat) {
329                    return Err(ParseError::value_out_of_range(
330                        "lat",
331                        "must be in -90.0..=90.0",
332                        lat_pos,
333                    ));
334                }
335                let lon_pos = self.position();
336                let center_lon = self.parse_float()?;
337                if !(-180.0..=180.0).contains(&center_lon) {
338                    return Err(ParseError::value_out_of_range(
339                        "lon",
340                        "must be in -180.0..=180.0",
341                        lon_pos,
342                    ));
343                }
344                let r_pos = self.position();
345                let radius_km = self.parse_float()?;
346                if radius_km.partial_cmp(&0.0) != Some(std::cmp::Ordering::Greater) {
347                    return Err(ParseError::value_out_of_range(
348                        "radius",
349                        "must be a positive number",
350                        r_pos,
351                    ));
352                }
353
354                self.expect(Token::Collection)?;
355                let collection = self.expect_ident()?;
356
357                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
358                let column = self.expect_ident()?;
359
360                let limit = if self.consume(&Token::Limit)? {
361                    self.parse_integer()? as usize
362                } else {
363                    100
364                };
365
366                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialRadius {
367                    center_lat,
368                    center_lon,
369                    radius_km,
370                    collection,
371                    column,
372                    limit,
373                }))
374            }
375            Token::Ident(ref name) if name.eq_ignore_ascii_case("BBOX") => {
376                self.advance()?; // consume BBOX
377                let p = self.position();
378                let min_lat = self.parse_float()?;
379                if !(-90.0..=90.0).contains(&min_lat) {
380                    return Err(ParseError::value_out_of_range(
381                        "lat",
382                        "must be in -90.0..=90.0",
383                        p,
384                    ));
385                }
386                let p = self.position();
387                let min_lon = self.parse_float()?;
388                if !(-180.0..=180.0).contains(&min_lon) {
389                    return Err(ParseError::value_out_of_range(
390                        "lon",
391                        "must be in -180.0..=180.0",
392                        p,
393                    ));
394                }
395                let p = self.position();
396                let max_lat = self.parse_float()?;
397                if !(-90.0..=90.0).contains(&max_lat) {
398                    return Err(ParseError::value_out_of_range(
399                        "lat",
400                        "must be in -90.0..=90.0",
401                        p,
402                    ));
403                }
404                let p = self.position();
405                let max_lon = self.parse_float()?;
406                if !(-180.0..=180.0).contains(&max_lon) {
407                    return Err(ParseError::value_out_of_range(
408                        "lon",
409                        "must be in -180.0..=180.0",
410                        p,
411                    ));
412                }
413
414                self.expect(Token::Collection)?;
415                let collection = self.expect_ident()?;
416
417                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
418                let column = self.expect_ident()?;
419
420                let limit = if self.consume(&Token::Limit)? {
421                    self.parse_integer()? as usize
422                } else {
423                    100
424                };
425
426                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialBbox {
427                    min_lat,
428                    min_lon,
429                    max_lat,
430                    max_lon,
431                    collection,
432                    column,
433                    limit,
434                }))
435            }
436            Token::Ident(ref name) if name.eq_ignore_ascii_case("NEAREST") => {
437                self.advance()?; // consume NEAREST
438                let lat_pos = self.position();
439                let lat = self.parse_float()?;
440                if !(-90.0..=90.0).contains(&lat) {
441                    return Err(ParseError::value_out_of_range(
442                        "lat",
443                        "must be in -90.0..=90.0",
444                        lat_pos,
445                    ));
446                }
447                let lon_pos = self.position();
448                let lon = self.parse_float()?;
449                if !(-180.0..=180.0).contains(&lon) {
450                    return Err(ParseError::value_out_of_range(
451                        "lon",
452                        "must be in -180.0..=180.0",
453                        lon_pos,
454                    ));
455                }
456
457                self.expect(Token::K)?;
458                let k = self.parse_positive_integer("K")? as usize;
459
460                self.expect(Token::Collection)?;
461                let collection = self.expect_ident()?;
462
463                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
464                let column = self.expect_ident()?;
465
466                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialNearest {
467                    lat,
468                    lon,
469                    k,
470                    collection,
471                    column,
472                }))
473            }
474            _ => Err(ParseError::expected(
475                vec!["RADIUS", "BBOX", "NEAREST"],
476                self.peek(),
477                self.position(),
478            )),
479        }
480    }
481
482    /// Parse a vector literal: [0.1, 0.2, 0.3]
483    fn parse_vector_literal(&mut self) -> Result<Vec<f32>, ParseError> {
484        self.expect(Token::LBracket)?;
485        let mut items = Vec::new();
486        if !self.check(&Token::RBracket) {
487            loop {
488                let val = self.parse_float()? as f32;
489                items.push(val);
490                if !self.consume(&Token::Comma)? {
491                    break;
492                }
493            }
494        }
495        self.expect(Token::RBracket)?;
496        Ok(items)
497    }
498}