Skip to main content

reddb_server/storage/query/parser/
search_commands.rs

1//! Search Command Parser: SEARCH SIMILAR | TEXT | HYBRID
2
3use super::super::ast::{QueryExpr, SearchCommand};
4use super::super::lexer::Token;
5use super::error::ParseError;
6use super::Parser;
7use crate::storage::schema::Value;
8
9impl<'a> Parser<'a> {
10    /// Parse: SEARCH subcommand ...
11    pub fn parse_search_command(&mut self) -> Result<QueryExpr, ParseError> {
12        self.expect(Token::Search)?;
13        match self.peek().clone() {
14            Token::Similar => self.parse_search_similar(),
15            Token::Text => self.parse_search_text(),
16            Token::Hybrid => self.parse_search_hybrid(),
17            Token::Index => self.parse_search_index(),
18            Token::Ident(name) if name.eq_ignore_ascii_case("MULTIMODAL") => {
19                self.parse_search_multimodal()
20            }
21            Token::Ident(name) if name.eq_ignore_ascii_case("CONTEXT") => {
22                self.parse_search_context()
23            }
24            Token::Ident(name) if name.eq_ignore_ascii_case("SPATIAL") => {
25                self.parse_search_spatial()
26            }
27            _ => Err(ParseError::expected(
28                vec![
29                    "SIMILAR",
30                    "TEXT",
31                    "HYBRID",
32                    "MULTIMODAL",
33                    "INDEX",
34                    "CONTEXT",
35                    "SPATIAL",
36                ],
37                self.peek(),
38                self.position(),
39            )),
40        }
41    }
42
43    /// Parse: SEARCH SIMILAR ([v1, v2] | TEXT 'query') COLLECTION col [LIMIT n] [MIN_SCORE f] [USING provider]
44    fn parse_search_similar(&mut self) -> Result<QueryExpr, ParseError> {
45        self.advance()?; // consume SIMILAR
46
47        // Parse vector literal OR text for semantic search OR $N placeholder
48        let mut vector_param: Option<usize> = None;
49        let mut text_param: Option<usize> = None;
50        let (vector, text) = if self.consume(&Token::Text)? {
51            // SEARCH SIMILAR TEXT ('query' | $N) — semantic search
52            if matches!(self.peek(), Token::Dollar | Token::Question) {
53                text_param = Some(self.parse_param_slot("SEARCH SIMILAR TEXT")?);
54                (Vec::new(), None)
55            } else {
56                let query_text = self.parse_string()?;
57                (Vec::new(), Some(query_text))
58            }
59        } else if matches!(self.peek(), Token::Dollar) {
60            // SEARCH SIMILAR $N — parameterized vector
61            if self.placeholder_mode == super::PlaceholderMode::Question {
62                return Err(ParseError::new(
63                    "cannot mix `?` and `$N` placeholders in one statement".to_string(),
64                    self.position(),
65                ));
66            }
67            self.advance()?;
68            let idx = match *self.peek() {
69                Token::Integer(n) if n >= 1 => {
70                    self.advance()?;
71                    (n - 1) as usize
72                }
73                _ => {
74                    return Err(ParseError::new(
75                        "expected `$N` (N >= 1) for SEARCH SIMILAR vector parameter".to_string(),
76                        self.position(),
77                    ));
78                }
79            };
80            self.placeholder_mode = super::PlaceholderMode::Dollar;
81            vector_param = Some(idx);
82            (Vec::new(), None)
83        } else {
84            // SEARCH SIMILAR [0.1, 0.2] — classic vector search
85            (self.parse_vector_literal()?, None)
86        };
87
88        // Parse COLLECTION
89        self.expect(Token::Collection)?;
90        let collection = self.expect_ident()?;
91
92        // Optional LIMIT — accepts an integer literal or `$N` placeholder (#361).
93        let mut limit_param: Option<usize> = None;
94        let limit = if self.consume(&Token::Limit)? {
95            if matches!(self.peek(), Token::Dollar | Token::Question) {
96                limit_param = Some(self.parse_param_slot("LIMIT")?);
97                0
98            } else {
99                self.parse_integer()? as usize
100            }
101        } else {
102            10
103        };
104
105        // Optional MIN_SCORE — accepts a float literal or `$N` placeholder (#361).
106        let mut min_score_param: Option<usize> = None;
107        let min_score = if self.consume(&Token::MinScore)? {
108            if matches!(self.peek(), Token::Dollar | Token::Question) {
109                min_score_param = Some(self.parse_param_slot("MIN_SCORE")?);
110                0.0
111            } else {
112                self.parse_float()? as f32
113            }
114        } else {
115            0.0
116        };
117
118        // Optional USING provider. `USING` is a reserved keyword
119        // (`Token::Using`), so `consume_search_ident` (which only
120        // matches `Token::Ident`) would never fire. Use the typed
121        // consumer. See bug #108.
122        let provider = if self.consume(&Token::Using)? {
123            Some(self.expect_ident()?)
124        } else {
125            None
126        };
127
128        Ok(QueryExpr::SearchCommand(SearchCommand::Similar {
129            vector,
130            text,
131            provider,
132            collection,
133            limit,
134            min_score,
135            vector_param,
136            limit_param,
137            min_score_param,
138            text_param,
139        }))
140    }
141
142    /// Parse: SEARCH TEXT 'query string' [COLLECTION|IN col] [LIMIT n] [FUZZY]
143    fn parse_search_text(&mut self) -> Result<QueryExpr, ParseError> {
144        self.advance()?; // consume TEXT
145
146        let query = self.parse_string()?;
147
148        // Optional COLLECTION
149        let collection = if self.consume(&Token::Collection)? || self.consume(&Token::In)? {
150            Some(self.expect_ident()?)
151        } else {
152            None
153        };
154
155        // Optional LIMIT — accepts an integer literal or `$N` placeholder (#361).
156        let mut limit_param: Option<usize> = None;
157        let limit = if self.consume(&Token::Limit)? {
158            if matches!(self.peek(), Token::Dollar | Token::Question) {
159                limit_param = Some(self.parse_param_slot("LIMIT")?);
160                0
161            } else {
162                self.parse_integer()? as usize
163            }
164        } else {
165            10
166        };
167
168        // Optional FUZZY
169        let fuzzy = self.consume(&Token::Fuzzy)?;
170
171        Ok(QueryExpr::SearchCommand(SearchCommand::Text {
172            query,
173            collection,
174            limit,
175            fuzzy,
176            limit_param,
177        }))
178    }
179
180    /// Parse: SEARCH HYBRID [SIMILAR|VECTOR [v1, v2, ...]] [TEXT 'query'] COLLECTION|IN col [LIMIT|K n]
181    fn parse_search_hybrid(&mut self) -> Result<QueryExpr, ParseError> {
182        self.advance()?; // consume HYBRID
183
184        let mut vector = None;
185        let mut query = None;
186
187        loop {
188            if self.consume(&Token::Similar)? || self.consume(&Token::Vector)? {
189                vector = Some(self.parse_vector_literal()?);
190            } else if self.consume(&Token::Text)? {
191                query = Some(self.parse_string()?);
192            } else {
193                break;
194            }
195        }
196
197        // Require at least one of vector or text
198        if vector.is_none() && query.is_none() {
199            return Err(ParseError::new(
200                "SEARCH HYBRID requires at least SIMILAR or TEXT".to_string(),
201                self.position(),
202            ));
203        }
204
205        // Parse COLLECTION/IN — tolerate collection names that collide
206        // with reserved keywords (e.g. `data`, `text`, `nodes`) by
207        // falling back to `expect_ident_or_keyword` and lowercasing the
208        // keyword form so the stored name matches the source casing.
209        if !(self.consume(&Token::Collection)? || self.consume(&Token::In)?) {
210            return Err(ParseError::expected(
211                vec!["COLLECTION", "IN"],
212                self.peek(),
213                self.position(),
214            ));
215        }
216        let collection = self.expect_collection_name()?;
217
218        // Optional LIMIT / K — accepts an integer literal or `$N` placeholder (#361).
219        let mut limit_param: Option<usize> = None;
220        let limit = if self.consume(&Token::Limit)? || self.consume(&Token::K)? {
221            let _ = self.consume(&Token::Eq)?;
222            if matches!(self.peek(), Token::Dollar | Token::Question) {
223                limit_param = Some(self.parse_param_slot("LIMIT")?);
224                0
225            } else {
226                self.parse_integer()? as usize
227            }
228        } else {
229            10
230        };
231
232        Ok(QueryExpr::SearchCommand(SearchCommand::Hybrid {
233            vector,
234            query,
235            collection,
236            limit,
237            limit_param,
238        }))
239    }
240
241    /// Parse: SEARCH MULTIMODAL 'query' [COLLECTION col] [LIMIT n]
242    fn parse_search_multimodal(&mut self) -> Result<QueryExpr, ParseError> {
243        self.advance()?; // consume MULTIMODAL identifier
244
245        let query = self.parse_string()?;
246
247        let collection = if self.consume(&Token::Collection)? {
248            Some(self.expect_ident()?)
249        } else {
250            None
251        };
252
253        // Optional LIMIT — accepts an integer literal or `$N` placeholder (#361).
254        let mut limit_param: Option<usize> = None;
255        let limit = if self.consume(&Token::Limit)? {
256            if matches!(self.peek(), Token::Dollar | Token::Question) {
257                limit_param = Some(self.parse_param_slot("LIMIT")?);
258                0
259            } else {
260                self.parse_integer()? as usize
261            }
262        } else {
263            25
264        };
265
266        Ok(QueryExpr::SearchCommand(SearchCommand::Multimodal {
267            query,
268            collection,
269            limit,
270            limit_param,
271        }))
272    }
273
274    /// Parse: SEARCH INDEX index VALUE 'value' [COLLECTION col] [LIMIT n] [EXACT|FUZZY]
275    fn parse_search_index(&mut self) -> Result<QueryExpr, ParseError> {
276        self.advance()?; // consume INDEX keyword
277
278        let index = self.expect_ident()?;
279        self.expect_search_ident("VALUE")?;
280        let value = self.parse_string()?;
281
282        let collection = if self.consume(&Token::Collection)? {
283            Some(self.expect_ident()?)
284        } else {
285            None
286        };
287
288        // Optional LIMIT — accepts an integer literal or `$N` placeholder (#361).
289        let mut limit_param: Option<usize> = None;
290        let limit = if self.consume(&Token::Limit)? {
291            if matches!(self.peek(), Token::Dollar | Token::Question) {
292                limit_param = Some(self.parse_param_slot("LIMIT")?);
293                0
294            } else {
295                self.parse_integer()? as usize
296            }
297        } else {
298            25
299        };
300
301        let fuzzy = self.consume(&Token::Fuzzy)? || self.consume_search_ident("FUZZY")?;
302        if !fuzzy {
303            let _ = self.consume_search_ident("EXACT")?;
304        }
305        let exact = !fuzzy;
306
307        Ok(QueryExpr::SearchCommand(SearchCommand::Index {
308            index,
309            value,
310            collection,
311            limit,
312            exact,
313            limit_param,
314        }))
315    }
316
317    /// Collection/index names frequently collide with reserved words
318    /// (`data`, `text`, `nodes`, `edges`, …). Accept either a plain
319    /// identifier or a keyword, lowercasing the keyword form so the
320    /// stored name matches the source spelling.
321    fn expect_collection_name(&mut self) -> Result<String, ParseError> {
322        let was_ident = matches!(self.peek(), Token::Ident(_));
323        let raw = self.expect_ident_or_keyword()?;
324        Ok(if was_ident {
325            raw
326        } else {
327            raw.to_ascii_lowercase()
328        })
329    }
330
331    fn expect_search_ident(&mut self, expected: &str) -> Result<(), ParseError> {
332        if self.consume_search_ident(expected)? {
333            Ok(())
334        } else {
335            Err(ParseError::expected(
336                vec![expected],
337                self.peek(),
338                self.position(),
339            ))
340        }
341    }
342
343    fn consume_search_ident(&mut self, expected: &str) -> Result<bool, ParseError> {
344        match self.peek().clone() {
345            Token::Ident(name) if name.eq_ignore_ascii_case(expected) => {
346                self.advance()?;
347                Ok(true)
348            }
349            _ => Ok(false),
350        }
351    }
352
353    /// Parse: SEARCH CONTEXT 'query' [FIELD field] [COLLECTION col] [DEPTH n] [LIMIT n]
354    fn parse_search_context(&mut self) -> Result<QueryExpr, ParseError> {
355        self.advance()?; // consume CONTEXT keyword
356
357        let query = self.parse_string()?;
358
359        let field = if self.consume_search_ident("FIELD")? {
360            Some(self.expect_ident()?)
361        } else {
362            None
363        };
364
365        let collection = if self.consume(&Token::Collection)? {
366            Some(self.expect_ident()?)
367        } else {
368            None
369        };
370
371        // Parse optional clauses in any order
372        let mut limit = 25usize;
373        let mut depth = 1usize;
374        let mut limit_param: Option<usize> = None;
375        for _ in 0..2 {
376            if self.consume(&Token::Limit)? {
377                if matches!(self.peek(), Token::Dollar | Token::Question) {
378                    limit_param = Some(self.parse_param_slot("LIMIT")?);
379                    limit = 0;
380                } else {
381                    limit = self.parse_integer()? as usize;
382                }
383            } else if self.consume(&Token::Depth)? {
384                depth = self.parse_integer()? as usize;
385            }
386        }
387
388        Ok(QueryExpr::SearchCommand(SearchCommand::Context {
389            query,
390            field,
391            collection,
392            limit,
393            depth,
394            limit_param,
395        }))
396    }
397
398    /// Parse: SEARCH SPATIAL (RADIUS | BBOX | NEAREST) ...
399    ///
400    /// Syntax:
401    /// - SEARCH SPATIAL RADIUS lat lon radius_km COLLECTION col COLUMN col [LIMIT n]
402    /// - SEARCH SPATIAL BBOX min_lat min_lon max_lat max_lon COLLECTION col COLUMN col [LIMIT n]
403    /// - SEARCH SPATIAL NEAREST lat lon K n COLLECTION col COLUMN col
404    fn parse_search_spatial(&mut self) -> Result<QueryExpr, ParseError> {
405        self.advance()?; // consume SPATIAL
406
407        match self.peek().clone() {
408            Token::Ident(ref name) if name.eq_ignore_ascii_case("RADIUS") => {
409                self.advance()?; // consume RADIUS
410                let lat_pos = self.position();
411                let center_lat = self.parse_float()?;
412                if !(-90.0..=90.0).contains(&center_lat) {
413                    return Err(ParseError::value_out_of_range(
414                        "lat",
415                        "must be in -90.0..=90.0",
416                        lat_pos,
417                    ));
418                }
419                let lon_pos = self.position();
420                let center_lon = self.parse_float()?;
421                if !(-180.0..=180.0).contains(&center_lon) {
422                    return Err(ParseError::value_out_of_range(
423                        "lon",
424                        "must be in -180.0..=180.0",
425                        lon_pos,
426                    ));
427                }
428                let r_pos = self.position();
429                let radius_km = self.parse_float()?;
430                if radius_km.partial_cmp(&0.0) != Some(std::cmp::Ordering::Greater) {
431                    return Err(ParseError::value_out_of_range(
432                        "radius",
433                        "must be a positive number",
434                        r_pos,
435                    ));
436                }
437
438                self.expect(Token::Collection)?;
439                let collection = self.expect_ident()?;
440
441                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
442                let column = self.expect_ident()?;
443
444                let mut limit_param: Option<usize> = None;
445                let limit = if self.consume(&Token::Limit)? {
446                    if matches!(self.peek(), Token::Dollar | Token::Question) {
447                        limit_param = Some(self.parse_param_slot("LIMIT")?);
448                        0
449                    } else {
450                        self.parse_integer()? as usize
451                    }
452                } else {
453                    100
454                };
455
456                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialRadius {
457                    center_lat,
458                    center_lon,
459                    radius_km,
460                    collection,
461                    column,
462                    limit,
463                    limit_param,
464                }))
465            }
466            Token::Ident(ref name) if name.eq_ignore_ascii_case("BBOX") => {
467                self.advance()?; // consume BBOX
468                let p = self.position();
469                let min_lat = self.parse_float()?;
470                if !(-90.0..=90.0).contains(&min_lat) {
471                    return Err(ParseError::value_out_of_range(
472                        "lat",
473                        "must be in -90.0..=90.0",
474                        p,
475                    ));
476                }
477                let p = self.position();
478                let min_lon = self.parse_float()?;
479                if !(-180.0..=180.0).contains(&min_lon) {
480                    return Err(ParseError::value_out_of_range(
481                        "lon",
482                        "must be in -180.0..=180.0",
483                        p,
484                    ));
485                }
486                let p = self.position();
487                let max_lat = self.parse_float()?;
488                if !(-90.0..=90.0).contains(&max_lat) {
489                    return Err(ParseError::value_out_of_range(
490                        "lat",
491                        "must be in -90.0..=90.0",
492                        p,
493                    ));
494                }
495                let p = self.position();
496                let max_lon = self.parse_float()?;
497                if !(-180.0..=180.0).contains(&max_lon) {
498                    return Err(ParseError::value_out_of_range(
499                        "lon",
500                        "must be in -180.0..=180.0",
501                        p,
502                    ));
503                }
504
505                self.expect(Token::Collection)?;
506                let collection = self.expect_ident()?;
507
508                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
509                let column = self.expect_ident()?;
510
511                let mut limit_param: Option<usize> = None;
512                let limit = if self.consume(&Token::Limit)? {
513                    if matches!(self.peek(), Token::Dollar | Token::Question) {
514                        limit_param = Some(self.parse_param_slot("LIMIT")?);
515                        0
516                    } else {
517                        self.parse_integer()? as usize
518                    }
519                } else {
520                    100
521                };
522
523                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialBbox {
524                    min_lat,
525                    min_lon,
526                    max_lat,
527                    max_lon,
528                    collection,
529                    column,
530                    limit,
531                    limit_param,
532                }))
533            }
534            Token::Ident(ref name) if name.eq_ignore_ascii_case("NEAREST") => {
535                self.advance()?; // consume NEAREST
536                let lat_pos = self.position();
537                let lat = self.parse_float()?;
538                if !(-90.0..=90.0).contains(&lat) {
539                    return Err(ParseError::value_out_of_range(
540                        "lat",
541                        "must be in -90.0..=90.0",
542                        lat_pos,
543                    ));
544                }
545                let lon_pos = self.position();
546                let lon = self.parse_float()?;
547                if !(-180.0..=180.0).contains(&lon) {
548                    return Err(ParseError::value_out_of_range(
549                        "lon",
550                        "must be in -180.0..=180.0",
551                        lon_pos,
552                    ));
553                }
554
555                self.expect(Token::K)?;
556                // K accepts a positive integer literal OR `$N` placeholder (#361).
557                let mut k_param: Option<usize> = None;
558                let k = if matches!(self.peek(), Token::Dollar | Token::Question) {
559                    k_param = Some(self.parse_param_slot("K")?);
560                    0
561                } else {
562                    self.parse_positive_integer("K")? as usize
563                };
564
565                self.expect(Token::Collection)?;
566                let collection = self.expect_ident()?;
567
568                let _ = self.consume(&Token::Column)? || self.consume_search_ident("COLUMN")?;
569                let column = self.expect_ident()?;
570
571                Ok(QueryExpr::SearchCommand(SearchCommand::SpatialNearest {
572                    lat,
573                    lon,
574                    k,
575                    collection,
576                    column,
577                    k_param,
578                }))
579            }
580            _ => Err(ParseError::expected(
581                vec!["RADIUS", "BBOX", "NEAREST"],
582                self.peek(),
583                self.position(),
584            )),
585        }
586    }
587
588    /// Parse a vector literal: [0.1, 0.2, 0.3]
589    fn parse_vector_literal(&mut self) -> Result<Vec<f32>, ParseError> {
590        self.expect(Token::LBracket)?;
591        let mut items = Vec::new();
592        if !self.check(&Token::RBracket) {
593            loop {
594                let val = self.parse_float()? as f32;
595                items.push(val);
596                if !self.consume(&Token::Comma)? {
597                    break;
598                }
599            }
600        }
601        self.expect(Token::RBracket)?;
602        Ok(items)
603    }
604}