Skip to main content

reddb_server/storage/query/parser/
search_commands.rs

1//! Search Command Parser: SEARCH SIMILAR | TEXT | HYBRID
2
3use super::super::ast::{QueryExpr, SearchCommand};
4use super::super::lexer::Token;
5use super::error::ParseError;
6use super::Parser;
7use crate::storage::schema::Value;
8
9impl<'a> Parser<'a> {
10    /// Parse: SEARCH subcommand ...
11    pub fn parse_search_command(&mut self) -> Result<QueryExpr, ParseError> {
12        self.expect(Token::Search)?;
13        match self.peek().clone() {
14            Token::Similar => self.parse_search_similar(),
15            Token::Text => self.parse_search_text(),
16            Token::Hybrid => self.parse_search_hybrid(),
17            Token::Index => self.parse_search_index(),
18            Token::Ident(name) if name.eq_ignore_ascii_case("MULTIMODAL") => {
19                self.parse_search_multimodal()
20            }
21            Token::Ident(name) if name.eq_ignore_ascii_case("CONTEXT") => {
22                self.parse_search_context()
23            }
24            Token::Ident(name) if name.eq_ignore_ascii_case("SPATIAL") => {
25                self.parse_search_spatial()
26            }
27            _ => Err(ParseError::expected(
28                vec![
29                    "SIMILAR",
30                    "TEXT",
31                    "HYBRID",
32                    "MULTIMODAL",
33                    "INDEX",
34                    "CONTEXT",
35                    "SPATIAL",
36                ],
37                self.peek(),
38                self.position(),
39            )),
40        }
41    }
42
43    /// Parse: SEARCH SIMILAR ([v1, v2] | TEXT 'query') COLLECTION col [LIMIT n] [MIN_SCORE f] [USING provider]
44    fn parse_search_similar(&mut self) -> Result<QueryExpr, ParseError> {
45        self.advance()?; // consume SIMILAR
46
47        // Parse vector literal OR text for semantic search OR positional placeholder.
48        let mut vector_param: Option<usize> = None;
49        let mut text_param: Option<usize> = None;
50        let (vector, text) = if self.consume(&Token::Text)? {
51            // SEARCH SIMILAR TEXT ('query' | $N) — semantic search
52            if matches!(self.peek(), Token::Dollar | Token::Question) {
53                text_param = Some(self.parse_param_slot("SEARCH SIMILAR TEXT")?);
54                (Vec::new(), None)
55            } else {
56                let query_text = self.parse_string()?;
57                (Vec::new(), Some(query_text))
58            }
59        } else if matches!(self.peek(), Token::Dollar | Token::Question) {
60            // SEARCH SIMILAR $N / ? / ?N — parameterized vector
61            vector_param = Some(self.parse_param_slot("SEARCH SIMILAR vector")?);
62            (Vec::new(), None)
63        } else {
64            // SEARCH SIMILAR [0.1, 0.2] — classic vector search
65            (self.parse_vector_literal()?, None)
66        };
67
68        // Parse COLLECTION
69        self.expect(Token::Collection)?;
70        let collection = self.expect_ident()?;
71
72        // Optional LIMIT — accepts an integer literal or `$N` placeholder (#361).
73        let mut limit_param: Option<usize> = None;
74        let limit = if self.consume(&Token::Limit)? {
75            if matches!(self.peek(), Token::Dollar | Token::Question) {
76                limit_param = Some(self.parse_param_slot("LIMIT")?);
77                0
78            } else {
79                self.parse_integer()? as usize
80            }
81        } else {
82            10
83        };
84
85        // Optional MIN_SCORE — accepts a float literal or `$N` placeholder (#361).
86        let mut min_score_param: Option<usize> = None;
87        let min_score = if self.consume(&Token::MinScore)? {
88            if matches!(self.peek(), Token::Dollar | Token::Question) {
89                min_score_param = Some(self.parse_param_slot("MIN_SCORE")?);
90                0.0
91            } else {
92                self.parse_float()? as f32
93            }
94        } else {
95            0.0
96        };
97
98        // Optional USING provider. `USING` is a reserved keyword
99        // (`Token::Using`), so `consume_search_ident` (which only
100        // matches `Token::Ident`) would never fire. Use the typed
101        // consumer. See bug #108.
102        let provider = if self.consume(&Token::Using)? {
103            Some(self.expect_ident()?)
104        } else {
105            None
106        };
107
108        Ok(QueryExpr::SearchCommand(SearchCommand::Similar {
109            vector,
110            text,
111            provider,
112            collection,
113            limit,
114            min_score,
115            vector_param,
116            limit_param,
117            min_score_param,
118            text_param,
119        }))
120    }
121
122    /// Parse: SEARCH TEXT 'query string' [COLLECTION|IN col] [LIMIT n] [FUZZY]
123    fn parse_search_text(&mut self) -> Result<QueryExpr, ParseError> {
124        self.advance()?; // consume TEXT
125
126        let query = self.parse_string()?;
127
128        // Optional COLLECTION
129        let collection = if self.consume(&Token::Collection)? || self.consume(&Token::In)? {
130            Some(self.expect_ident()?)
131        } else {
132            None
133        };
134
135        // Optional LIMIT — accepts an integer literal or `$N` placeholder (#361).
136        let mut limit_param: Option<usize> = None;
137        let limit = if self.consume(&Token::Limit)? {
138            if matches!(self.peek(), Token::Dollar | Token::Question) {
139                limit_param = Some(self.parse_param_slot("LIMIT")?);
140                0
141            } else {
142                self.parse_integer()? as usize
143            }
144        } else {
145            10
146        };
147
148        // Optional FUZZY
149        let fuzzy = self.consume(&Token::Fuzzy)?;
150
151        Ok(QueryExpr::SearchCommand(SearchCommand::Text {
152            query,
153            collection,
154            limit,
155            fuzzy,
156            limit_param,
157        }))
158    }
159
160    /// Parse: SEARCH HYBRID [SIMILAR|VECTOR [v1, v2, ...]] [TEXT 'query'] COLLECTION|IN col [LIMIT|K n]
161    fn parse_search_hybrid(&mut self) -> Result<QueryExpr, ParseError> {
162        self.advance()?; // consume HYBRID
163
164        let mut vector = None;
165        let mut query = None;
166
167        loop {
168            if self.consume(&Token::Similar)? || self.consume(&Token::Vector)? {
169                vector = Some(self.parse_vector_literal()?);
170            } else if self.consume(&Token::Text)? {
171                query = Some(self.parse_string()?);
172            } else {
173                break;
174            }
175        }
176
177        // Require at least one of vector or text
178        if vector.is_none() && query.is_none() {
179            return Err(ParseError::new(
180                "SEARCH HYBRID requires at least SIMILAR or TEXT".to_string(),
181                self.position(),
182            ));
183        }
184
185        // Parse COLLECTION/IN — tolerate collection names that collide
186        // with reserved keywords (e.g. `data`, `text`, `nodes`) by
187        // falling back to `expect_ident_or_keyword` and lowercasing the
188        // keyword form so the stored name matches the source casing.
189        if !(self.consume(&Token::Collection)? || self.consume(&Token::In)?) {
190            return Err(ParseError::expected(
191                vec!["COLLECTION", "IN"],
192                self.peek(),
193                self.position(),
194            ));
195        }
196        let collection = self.expect_collection_name()?;
197
198        // Optional LIMIT / K — accepts an integer literal or `$N` placeholder (#361).
199        let mut limit_param: Option<usize> = None;
200        let limit = if self.consume(&Token::Limit)? || self.consume(&Token::K)? {
201            let _ = self.consume(&Token::Eq)?;
202            if matches!(self.peek(), Token::Dollar | Token::Question) {
203                limit_param = Some(self.parse_param_slot("LIMIT")?);
204                0
205            } else {
206                self.parse_integer()? as usize
207            }
208        } else {
209            10
210        };
211
212        Ok(QueryExpr::SearchCommand(SearchCommand::Hybrid {
213            vector,
214            query,
215            collection,
216            limit,
217            limit_param,
218        }))
219    }
220
221    /// Parse: SEARCH MULTIMODAL 'query' [COLLECTION col] [LIMIT n]
222    fn parse_search_multimodal(&mut self) -> Result<QueryExpr, ParseError> {
223        self.advance()?; // consume MULTIMODAL identifier
224
225        let query = self.parse_string()?;
226
227        let collection = if self.consume(&Token::Collection)? {
228            Some(self.expect_ident()?)
229        } else {
230            None
231        };
232
233        // Optional LIMIT — accepts an integer literal or `$N` placeholder (#361).
234        let mut limit_param: Option<usize> = None;
235        let limit = if self.consume(&Token::Limit)? {
236            if matches!(self.peek(), Token::Dollar | Token::Question) {
237                limit_param = Some(self.parse_param_slot("LIMIT")?);
238                0
239            } else {
240                self.parse_integer()? as usize
241            }
242        } else {
243            25
244        };
245
246        Ok(QueryExpr::SearchCommand(SearchCommand::Multimodal {
247            query,
248            collection,
249            limit,
250            limit_param,
251        }))
252    }
253
254    /// Parse: SEARCH INDEX index VALUE 'value' [COLLECTION col] [LIMIT n] [EXACT|FUZZY]
255    fn parse_search_index(&mut self) -> Result<QueryExpr, ParseError> {
256        self.advance()?; // consume INDEX keyword
257
258        let index = self.expect_ident()?;
259        self.expect_search_ident("VALUE")?;
260        let value = self.parse_string()?;
261
262        let collection = if self.consume(&Token::Collection)? {
263            Some(self.expect_ident()?)
264        } else {
265            None
266        };
267
268        // Optional LIMIT — accepts an integer literal or `$N` placeholder (#361).
269        let mut limit_param: Option<usize> = None;
270        let limit = if self.consume(&Token::Limit)? {
271            if matches!(self.peek(), Token::Dollar | Token::Question) {
272                limit_param = Some(self.parse_param_slot("LIMIT")?);
273                0
274            } else {
275                self.parse_integer()? as usize
276            }
277        } else {
278            25
279        };
280
281        let fuzzy = self.consume(&Token::Fuzzy)? || self.consume_search_ident("FUZZY")?;
282        if !fuzzy {
283            let _ = self.consume_search_ident("EXACT")?;
284        }
285        let exact = !fuzzy;
286
287        Ok(QueryExpr::SearchCommand(SearchCommand::Index {
288            index,
289            value,
290            collection,
291            limit,
292            exact,
293            limit_param,
294        }))
295    }
296
297    /// Collection/index names frequently collide with reserved words
298    /// (`data`, `text`, `nodes`, `edges`, …). Accept either a plain
299    /// identifier or a keyword, lowercasing the keyword form so the
300    /// stored name matches the source spelling.
301    fn expect_collection_name(&mut self) -> Result<String, ParseError> {
302        let was_ident = matches!(self.peek(), Token::Ident(_));
303        let raw = self.expect_ident_or_keyword()?;
304        Ok(if was_ident {
305            raw
306        } else {
307            raw.to_ascii_lowercase()
308        })
309    }
310
311    fn expect_search_ident(&mut self, expected: &str) -> Result<(), ParseError> {
312        if self.consume_search_ident(expected)? {
313            Ok(())
314        } else {
315            Err(ParseError::expected(
316                vec![expected],
317                self.peek(),
318                self.position(),
319            ))
320        }
321    }
322
323    fn consume_search_ident(&mut self, expected: &str) -> Result<bool, ParseError> {
324        match self.peek().clone() {
325            Token::Ident(name) if name.eq_ignore_ascii_case(expected) => {
326                self.advance()?;
327                Ok(true)
328            }
329            _ => Ok(false),
330        }
331    }
332
333    /// Parse: SEARCH CONTEXT 'query' [FIELD field] [COLLECTION col] [DEPTH n] [LIMIT n]
334    fn parse_search_context(&mut self) -> Result<QueryExpr, ParseError> {
335        self.advance()?; // consume CONTEXT keyword
336
337        let query = self.parse_string()?;
338
339        let field = if self.consume_search_ident("FIELD")? {
340            Some(self.expect_ident()?)
341        } else {
342            None
343        };
344
345        let collection = if self.consume(&Token::Collection)? {
346            Some(self.expect_ident()?)
347        } else {
348            None
349        };
350
351        // Parse optional clauses in any order
352        let mut limit = 25usize;
353        let mut depth = 1usize;
354        let mut limit_param: Option<usize> = None;
355        for _ in 0..2 {
356            if self.consume(&Token::Limit)? {
357                if matches!(self.peek(), Token::Dollar | Token::Question) {
358                    limit_param = Some(self.parse_param_slot("LIMIT")?);
359                    limit = 0;
360                } else {
361                    limit = self.parse_integer()? as usize;
362                }
363            } else if self.consume(&Token::Depth)? {
364                depth = self.parse_integer()? as usize;
365            }
366        }
367
368        Ok(QueryExpr::SearchCommand(SearchCommand::Context {
369            query,
370            field,
371            collection,
372            limit,
373            depth,
374            limit_param,
375        }))
376    }
377
378    /// Parse: SEARCH SPATIAL (RADIUS | BBOX | NEAREST) ...
379    ///
380    /// Syntax:
381    /// - SEARCH SPATIAL RADIUS lat lon radius_km COLLECTION col COLUMN col [LIMIT n]
382    /// - SEARCH SPATIAL BBOX min_lat min_lon max_lat max_lon COLLECTION col COLUMN col [LIMIT n]
383    /// - SEARCH SPATIAL NEAREST lat lon K n COLLECTION col COLUMN col
384    fn parse_search_spatial(&mut self) -> Result<QueryExpr, ParseError> {
385        self.advance()?; // consume SPATIAL
386
387        match self.peek().clone() {
388            Token::Ident(ref name) if name.eq_ignore_ascii_case("RADIUS") => {
389                self.advance()?; // consume RADIUS
390                let lat_pos = self.position();
391                let center_lat = self.parse_float()?;
392                if !(-90.0..=90.0).contains(&center_lat) {
393                    return Err(ParseError::value_out_of_range(
394                        "lat",
395                        "must be in -90.0..=90.0",
396                        lat_pos,
397                    ));
398                }
399                let lon_pos = self.position();
400                let center_lon = self.parse_float()?;
401                if !(-180.0..=180.0).contains(&center_lon) {
402                    return Err(ParseError::value_out_of_range(
403                        "lon",
404                        "must be in -180.0..=180.0",
405                        lon_pos,
406                    ));
407                }
408                let r_pos = self.position();
409                let radius_km = self.parse_float()?;
410                if radius_km.partial_cmp(&0.0) != Some(std::cmp::Ordering::Greater) {
411                    return Err(ParseError::value_out_of_range(
412                        "radius",
413                        "must be a positive number",
414                        r_pos,
415                    ));
416                }
417
418                self.expect(Token::Collection)?;
419                let collection = self.expect_ident()?;
420
421                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
422                let column = self.expect_ident()?;
423
424                let mut limit_param: Option<usize> = None;
425                let limit = if self.consume(&Token::Limit)? {
426                    if matches!(self.peek(), Token::Dollar | Token::Question) {
427                        limit_param = Some(self.parse_param_slot("LIMIT")?);
428                        0
429                    } else {
430                        self.parse_integer()? as usize
431                    }
432                } else {
433                    100
434                };
435
436                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialRadius {
437                    center_lat,
438                    center_lon,
439                    radius_km,
440                    collection,
441                    column,
442                    limit,
443                    limit_param,
444                }))
445            }
446            Token::Ident(ref name) if name.eq_ignore_ascii_case("BBOX") => {
447                self.advance()?; // consume BBOX
448                let p = self.position();
449                let min_lat = self.parse_float()?;
450                if !(-90.0..=90.0).contains(&min_lat) {
451                    return Err(ParseError::value_out_of_range(
452                        "lat",
453                        "must be in -90.0..=90.0",
454                        p,
455                    ));
456                }
457                let p = self.position();
458                let min_lon = self.parse_float()?;
459                if !(-180.0..=180.0).contains(&min_lon) {
460                    return Err(ParseError::value_out_of_range(
461                        "lon",
462                        "must be in -180.0..=180.0",
463                        p,
464                    ));
465                }
466                let p = self.position();
467                let max_lat = self.parse_float()?;
468                if !(-90.0..=90.0).contains(&max_lat) {
469                    return Err(ParseError::value_out_of_range(
470                        "lat",
471                        "must be in -90.0..=90.0",
472                        p,
473                    ));
474                }
475                let p = self.position();
476                let max_lon = self.parse_float()?;
477                if !(-180.0..=180.0).contains(&max_lon) {
478                    return Err(ParseError::value_out_of_range(
479                        "lon",
480                        "must be in -180.0..=180.0",
481                        p,
482                    ));
483                }
484
485                self.expect(Token::Collection)?;
486                let collection = self.expect_ident()?;
487
488                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
489                let column = self.expect_ident()?;
490
491                let mut limit_param: Option<usize> = None;
492                let limit = if self.consume(&Token::Limit)? {
493                    if matches!(self.peek(), Token::Dollar | Token::Question) {
494                        limit_param = Some(self.parse_param_slot("LIMIT")?);
495                        0
496                    } else {
497                        self.parse_integer()? as usize
498                    }
499                } else {
500                    100
501                };
502
503                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialBbox {
504                    min_lat,
505                    min_lon,
506                    max_lat,
507                    max_lon,
508                    collection,
509                    column,
510                    limit,
511                    limit_param,
512                }))
513            }
514            Token::Ident(ref name) if name.eq_ignore_ascii_case("NEAREST") => {
515                self.advance()?; // consume NEAREST
516                let lat_pos = self.position();
517                let lat = self.parse_float()?;
518                if !(-90.0..=90.0).contains(&lat) {
519                    return Err(ParseError::value_out_of_range(
520                        "lat",
521                        "must be in -90.0..=90.0",
522                        lat_pos,
523                    ));
524                }
525                let lon_pos = self.position();
526                let lon = self.parse_float()?;
527                if !(-180.0..=180.0).contains(&lon) {
528                    return Err(ParseError::value_out_of_range(
529                        "lon",
530                        "must be in -180.0..=180.0",
531                        lon_pos,
532                    ));
533                }
534
535                self.expect(Token::K)?;
536                // K accepts a positive integer literal OR `$N` placeholder (#361).
537                let mut k_param: Option<usize> = None;
538                let k = if matches!(self.peek(), Token::Dollar | Token::Question) {
539                    k_param = Some(self.parse_param_slot("K")?);
540                    0
541                } else {
542                    self.parse_positive_integer("K")? as usize
543                };
544
545                self.expect(Token::Collection)?;
546                let collection = self.expect_ident()?;
547
548                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
549                let column = self.expect_ident()?;
550
551                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialNearest {
552                    lat,
553                    lon,
554                    k,
555                    collection,
556                    column,
557                    k_param,
558                }))
559            }
560            _ => Err(ParseError::expected(
561                vec!["RADIUS", "BBOX", "NEAREST"],
562                self.peek(),
563                self.position(),
564            )),
565        }
566    }
567
568    /// Parse a vector literal: [0.1, 0.2, 0.3]
569    fn parse_vector_literal(&mut self) -> Result<Vec<f32>, ParseError> {
570        self.expect(Token::LBracket)?;
571        let mut items = Vec::new();
572        if !self.check(&Token::RBracket) {
573            loop {
574                let val = self.parse_float()? as f32;
575                items.push(val);
576                if !self.consume(&Token::Comma)? {
577                    break;
578                }
579            }
580        }
581        self.expect(Token::RBracket)?;
582        Ok(items)
583    }
584}