velesdb_core/velesql/
parser.rs

1//! `VelesQL` parser implementation using pest.
2
3use pest::Parser as PestParser;
4use pest_derive::Parser;
5
6use super::ast::{
7    BetweenCondition, Column, CompareOp, Comparison, Condition, FusionConfig, InCondition,
8    IsNullCondition, LikeCondition, MatchCondition, Query, SelectColumns, SelectStatement, Value,
9    VectorExpr, VectorFusedSearch, VectorSearch, WithClause, WithOption, WithValue,
10};
11use super::error::{ParseError, ParseErrorKind};
12
13#[derive(Parser)]
14#[grammar = "velesql/grammar.pest"]
15struct VelesQLParser;
16
17/// `VelesQL` query parser.
18pub struct Parser;
19
20impl Parser {
21    /// Parses a `VelesQL` query string into an AST.
22    ///
23    /// # Errors
24    ///
25    /// Returns a `ParseError` if the query is invalid.
26    ///
27    /// # Example
28    ///
29    /// ```ignore
30    /// use velesdb_core::velesql::Parser;
31    ///
32    /// let query = Parser::parse("SELECT * FROM documents LIMIT 10")?;
33    /// ```
34    pub fn parse(input: &str) -> Result<Query, ParseError> {
35        let pairs = VelesQLParser::parse(Rule::query, input).map_err(|e| {
36            let position = match e.location {
37                pest::error::InputLocation::Pos(p) => p,
38                pest::error::InputLocation::Span((s, _)) => s,
39            };
40            ParseError::new(
41                ParseErrorKind::SyntaxError,
42                position,
43                input.chars().take(50).collect::<String>(),
44                e.to_string(),
45            )
46        })?;
47
48        let query_pair = pairs
49            .into_iter()
50            .next()
51            .ok_or_else(|| ParseError::syntax(0, input, "Empty query"))?;
52
53        Self::parse_query(query_pair)
54    }
55
56    fn parse_query(pair: pest::iterators::Pair<Rule>) -> Result<Query, ParseError> {
57        let mut inner = pair.into_inner();
58
59        let select_pair = inner
60            .find(|p| p.as_rule() == Rule::select_stmt)
61            .ok_or_else(|| ParseError::syntax(0, "", "Expected SELECT statement"))?;
62
63        let select = Self::parse_select_stmt(select_pair)?;
64
65        Ok(Query { select })
66    }
67
68    fn parse_select_stmt(pair: pest::iterators::Pair<Rule>) -> Result<SelectStatement, ParseError> {
69        let mut columns = SelectColumns::All;
70        let mut from = String::new();
71        let mut where_clause = None;
72        let mut limit = None;
73        let mut offset = None;
74        let mut with_clause = None;
75
76        for inner_pair in pair.into_inner() {
77            match inner_pair.as_rule() {
78                Rule::select_list => {
79                    columns = Self::parse_select_list(inner_pair)?;
80                }
81                Rule::identifier => {
82                    from = inner_pair.as_str().to_string();
83                }
84                Rule::where_clause => {
85                    where_clause = Some(Self::parse_where_clause(inner_pair)?);
86                }
87                Rule::limit_clause => {
88                    limit = Some(Self::parse_limit_clause(inner_pair)?);
89                }
90                Rule::offset_clause => {
91                    offset = Some(Self::parse_offset_clause(inner_pair)?);
92                }
93                Rule::with_clause => {
94                    with_clause = Some(Self::parse_with_clause(inner_pair)?);
95                }
96                _ => {}
97            }
98        }
99
100        Ok(SelectStatement {
101            columns,
102            from,
103            where_clause,
104            limit,
105            offset,
106            with_clause,
107        })
108    }
109
110    fn parse_select_list(pair: pest::iterators::Pair<Rule>) -> Result<SelectColumns, ParseError> {
111        let inner = pair.into_inner().next();
112
113        match inner {
114            Some(p) if p.as_rule() == Rule::column_list => {
115                let columns = Self::parse_column_list(p)?;
116                Ok(SelectColumns::Columns(columns))
117            }
118            _ => Ok(SelectColumns::All),
119        }
120    }
121
122    fn parse_column_list(pair: pest::iterators::Pair<Rule>) -> Result<Vec<Column>, ParseError> {
123        let mut columns = Vec::new();
124
125        for col_pair in pair.into_inner() {
126            if col_pair.as_rule() == Rule::column {
127                columns.push(Self::parse_column(col_pair)?);
128            }
129        }
130
131        Ok(columns)
132    }
133
134    fn parse_column(pair: pest::iterators::Pair<Rule>) -> Result<Column, ParseError> {
135        let mut inner = pair.into_inner();
136        let name_pair = inner
137            .next()
138            .ok_or_else(|| ParseError::syntax(0, "", "Expected column name"))?;
139
140        let name = Self::parse_column_name(&name_pair);
141        let alias = inner.next().map(|p| p.as_str().to_string());
142
143        Ok(Column { name, alias })
144    }
145
146    fn parse_column_name(pair: &pest::iterators::Pair<Rule>) -> String {
147        // column_name is atomic (@), so we get the full string directly
148        pair.as_str().to_string()
149    }
150
151    fn parse_where_clause(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
152        let or_expr = pair
153            .into_inner()
154            .next()
155            .ok_or_else(|| ParseError::syntax(0, "", "Expected condition"))?;
156
157        Self::parse_or_expr(or_expr)
158    }
159
160    fn parse_or_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
161        let mut inner = pair.into_inner().peekable();
162
163        let first = inner
164            .next()
165            .ok_or_else(|| ParseError::syntax(0, "", "Expected condition"))?;
166
167        let mut result = Self::parse_and_expr(first)?;
168
169        for and_expr in inner {
170            let right = Self::parse_and_expr(and_expr)?;
171            result = Condition::Or(Box::new(result), Box::new(right));
172        }
173
174        Ok(result)
175    }
176
177    fn parse_and_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
178        let mut inner = pair.into_inner().peekable();
179
180        let first = inner
181            .next()
182            .ok_or_else(|| ParseError::syntax(0, "", "Expected condition"))?;
183
184        let mut result = Self::parse_primary_expr(first)?;
185
186        for primary in inner {
187            let right = Self::parse_primary_expr(primary)?;
188            result = Condition::And(Box::new(result), Box::new(right));
189        }
190
191        Ok(result)
192    }
193
194    fn parse_primary_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
195        let inner = pair
196            .into_inner()
197            .next()
198            .ok_or_else(|| ParseError::syntax(0, "", "Expected primary condition"))?;
199
200        match inner.as_rule() {
201            Rule::or_expr => {
202                let cond = Self::parse_or_expr(inner)?;
203                Ok(Condition::Group(Box::new(cond)))
204            }
205            Rule::vector_fused_search => Self::parse_vector_fused_search(inner),
206            Rule::vector_search => Self::parse_vector_search(inner),
207            Rule::match_expr => Self::parse_match_expr(inner),
208            Rule::in_expr => Self::parse_in_expr(inner),
209            Rule::between_expr => Self::parse_between_expr(inner),
210            Rule::like_expr => Self::parse_like_expr(inner),
211            Rule::is_null_expr => Self::parse_is_null_expr(inner),
212            Rule::compare_expr => Self::parse_compare_expr(inner),
213            _ => Err(ParseError::syntax(
214                0,
215                inner.as_str(),
216                "Unknown condition type",
217            )),
218        }
219    }
220
221    fn parse_vector_search(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
222        let mut vector = None;
223
224        for inner in pair.into_inner() {
225            if inner.as_rule() == Rule::vector_value {
226                vector = Some(Self::parse_vector_value(inner)?);
227            }
228        }
229
230        let vector =
231            vector.ok_or_else(|| ParseError::syntax(0, "", "Expected vector expression"))?;
232
233        Ok(Condition::VectorSearch(VectorSearch { vector }))
234    }
235
236    fn parse_vector_fused_search(
237        pair: pest::iterators::Pair<Rule>,
238    ) -> Result<Condition, ParseError> {
239        let mut vectors = Vec::new();
240        let mut fusion = FusionConfig::default();
241
242        for inner in pair.into_inner() {
243            match inner.as_rule() {
244                Rule::vector_array => {
245                    for vec_value in inner.into_inner() {
246                        if vec_value.as_rule() == Rule::vector_value {
247                            vectors.push(Self::parse_vector_value(vec_value)?);
248                        }
249                    }
250                }
251                Rule::fusion_clause => {
252                    fusion = Self::parse_fusion_clause(inner);
253                }
254                _ => {}
255            }
256        }
257
258        if vectors.is_empty() {
259            return Err(ParseError::syntax(
260                0,
261                "",
262                "Expected at least one vector in NEAR_FUSED",
263            ));
264        }
265
266        Ok(Condition::VectorFusedSearch(VectorFusedSearch {
267            vectors,
268            fusion,
269        }))
270    }
271
272    fn parse_fusion_clause(pair: pest::iterators::Pair<Rule>) -> FusionConfig {
273        let mut strategy = "rrf".to_string();
274        let mut params = std::collections::HashMap::new();
275
276        for inner in pair.into_inner() {
277            match inner.as_rule() {
278                Rule::fusion_strategy => {
279                    strategy = inner.into_inner().next().map_or_else(
280                        || "rrf".to_string(),
281                        |s| s.as_str().trim_matches('\'').to_string(),
282                    );
283                }
284                Rule::fusion_params => {
285                    for param in inner.into_inner() {
286                        if param.as_rule() == Rule::fusion_param_list {
287                            for fp in param.into_inner() {
288                                if fp.as_rule() == Rule::fusion_param {
289                                    let mut fp_inner = fp.into_inner();
290                                    if let (Some(key), Some(val)) =
291                                        (fp_inner.next(), fp_inner.next())
292                                    {
293                                        let key_str = key.as_str().to_string();
294                                        let val_f64 = val.as_str().parse::<f64>().unwrap_or(0.0);
295                                        params.insert(key_str, val_f64);
296                                    }
297                                }
298                            }
299                        }
300                    }
301                }
302                _ => {}
303            }
304        }
305
306        FusionConfig { strategy, params }
307    }
308
309    fn parse_vector_value(pair: pest::iterators::Pair<Rule>) -> Result<VectorExpr, ParseError> {
310        let inner = pair
311            .into_inner()
312            .next()
313            .ok_or_else(|| ParseError::syntax(0, "", "Expected vector expression"))?;
314
315        match inner.as_rule() {
316            Rule::vector_literal => {
317                let values: Result<Vec<f32>, _> = inner
318                    .into_inner()
319                    .filter(|p| p.as_rule() == Rule::float)
320                    .map(|p| {
321                        p.as_str()
322                            .parse::<f32>()
323                            .map_err(|_| ParseError::syntax(0, p.as_str(), "Invalid float value"))
324                    })
325                    .collect();
326                Ok(VectorExpr::Literal(values?))
327            }
328            Rule::parameter => {
329                let name = inner.as_str().trim_start_matches('$').to_string();
330                Ok(VectorExpr::Parameter(name))
331            }
332            _ => Err(ParseError::syntax(
333                0,
334                inner.as_str(),
335                "Expected vector literal or parameter",
336            )),
337        }
338    }
339
340    fn parse_match_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
341        let mut inner = pair.into_inner();
342
343        let column = inner
344            .next()
345            .ok_or_else(|| ParseError::syntax(0, "", "Expected column name"))?
346            .as_str()
347            .to_string();
348
349        let query = inner
350            .next()
351            .ok_or_else(|| ParseError::syntax(0, "", "Expected match query"))?
352            .as_str()
353            .trim_matches('\'')
354            .to_string();
355
356        Ok(Condition::Match(MatchCondition { column, query }))
357    }
358
359    fn parse_in_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
360        let mut inner = pair.into_inner();
361
362        let column = inner
363            .next()
364            .ok_or_else(|| ParseError::syntax(0, "", "Expected column name"))?
365            .as_str()
366            .to_string();
367
368        let value_list = inner
369            .next()
370            .ok_or_else(|| ParseError::syntax(0, "", "Expected value list"))?;
371
372        let values: Result<Vec<Value>, _> = value_list
373            .into_inner()
374            .filter(|p| p.as_rule() == Rule::value)
375            .map(Self::parse_value)
376            .collect();
377
378        Ok(Condition::In(InCondition {
379            column,
380            values: values?,
381        }))
382    }
383
384    fn parse_between_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
385        let mut inner = pair.into_inner();
386
387        let column = inner
388            .next()
389            .ok_or_else(|| ParseError::syntax(0, "", "Expected column name"))?
390            .as_str()
391            .to_string();
392
393        let low = Self::parse_value(
394            inner
395                .next()
396                .ok_or_else(|| ParseError::syntax(0, "", "Expected low value"))?,
397        )?;
398
399        let high = Self::parse_value(
400            inner
401                .next()
402                .ok_or_else(|| ParseError::syntax(0, "", "Expected high value"))?,
403        )?;
404
405        Ok(Condition::Between(BetweenCondition { column, low, high }))
406    }
407
408    fn parse_like_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
409        let mut inner = pair.into_inner();
410
411        let column = inner
412            .next()
413            .ok_or_else(|| ParseError::syntax(0, "", "Expected column name"))?
414            .as_str()
415            .to_string();
416
417        // Parse LIKE or ILIKE operator
418        let like_op = inner
419            .next()
420            .ok_or_else(|| ParseError::syntax(0, "", "Expected LIKE or ILIKE"))?
421            .as_str()
422            .to_uppercase();
423        let case_insensitive = like_op == "ILIKE";
424
425        let pattern = inner
426            .next()
427            .ok_or_else(|| ParseError::syntax(0, "", "Expected pattern"))?
428            .as_str()
429            .trim_matches('\'')
430            .to_string();
431
432        Ok(Condition::Like(LikeCondition {
433            column,
434            pattern,
435            case_insensitive,
436        }))
437    }
438
439    fn parse_is_null_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
440        let mut column = String::new();
441        let mut has_not = false;
442
443        for inner in pair.into_inner() {
444            match inner.as_rule() {
445                Rule::identifier => {
446                    column = inner.as_str().to_string();
447                }
448                Rule::not_kw => {
449                    has_not = true;
450                }
451                _ => {}
452            }
453        }
454
455        if column.is_empty() {
456            return Err(ParseError::syntax(0, "", "Expected column name in IS NULL"));
457        }
458
459        Ok(Condition::IsNull(IsNullCondition {
460            column,
461            is_null: !has_not,
462        }))
463    }
464
465    fn parse_compare_expr(pair: pest::iterators::Pair<Rule>) -> Result<Condition, ParseError> {
466        let mut inner = pair.into_inner();
467
468        let column = inner
469            .next()
470            .ok_or_else(|| ParseError::syntax(0, "", "Expected column name"))?
471            .as_str()
472            .to_string();
473
474        let op_pair = inner
475            .next()
476            .ok_or_else(|| ParseError::syntax(0, "", "Expected operator"))?;
477
478        let operator = match op_pair.as_str() {
479            "=" => CompareOp::Eq,
480            "!=" | "<>" => CompareOp::NotEq,
481            ">" => CompareOp::Gt,
482            ">=" => CompareOp::Gte,
483            "<" => CompareOp::Lt,
484            "<=" => CompareOp::Lte,
485            _ => return Err(ParseError::syntax(0, op_pair.as_str(), "Invalid operator")),
486        };
487
488        let value = Self::parse_value(
489            inner
490                .next()
491                .ok_or_else(|| ParseError::syntax(0, "", "Expected value"))?,
492        )?;
493
494        Ok(Condition::Comparison(Comparison {
495            column,
496            operator,
497            value,
498        }))
499    }
500
501    fn parse_value(pair: pest::iterators::Pair<Rule>) -> Result<Value, ParseError> {
502        let inner = pair
503            .into_inner()
504            .next()
505            .ok_or_else(|| ParseError::syntax(0, "", "Expected value"))?;
506
507        match inner.as_rule() {
508            Rule::integer => {
509                let v = inner
510                    .as_str()
511                    .parse::<i64>()
512                    .map_err(|_| ParseError::syntax(0, inner.as_str(), "Invalid integer"))?;
513                Ok(Value::Integer(v))
514            }
515            Rule::float => {
516                let v = inner
517                    .as_str()
518                    .parse::<f64>()
519                    .map_err(|_| ParseError::syntax(0, inner.as_str(), "Invalid float"))?;
520                Ok(Value::Float(v))
521            }
522            Rule::string => {
523                let s = inner.as_str().trim_matches('\'').to_string();
524                Ok(Value::String(s))
525            }
526            Rule::boolean => {
527                let b = inner.as_str().to_uppercase() == "TRUE";
528                Ok(Value::Boolean(b))
529            }
530            Rule::null_value => Ok(Value::Null),
531            Rule::parameter => {
532                let name = inner.as_str().trim_start_matches('$').to_string();
533                Ok(Value::Parameter(name))
534            }
535            _ => Err(ParseError::syntax(0, inner.as_str(), "Unknown value type")),
536        }
537    }
538
539    fn parse_limit_clause(pair: pest::iterators::Pair<Rule>) -> Result<u64, ParseError> {
540        let int_pair = pair
541            .into_inner()
542            .next()
543            .ok_or_else(|| ParseError::syntax(0, "", "Expected integer for LIMIT"))?;
544
545        int_pair
546            .as_str()
547            .parse::<u64>()
548            .map_err(|_| ParseError::syntax(0, int_pair.as_str(), "Invalid LIMIT value"))
549    }
550
551    fn parse_offset_clause(pair: pest::iterators::Pair<Rule>) -> Result<u64, ParseError> {
552        let int_pair = pair
553            .into_inner()
554            .next()
555            .ok_or_else(|| ParseError::syntax(0, "", "Expected integer for OFFSET"))?;
556
557        int_pair
558            .as_str()
559            .parse::<u64>()
560            .map_err(|_| ParseError::syntax(0, int_pair.as_str(), "Invalid OFFSET value"))
561    }
562
563    fn parse_with_clause(pair: pest::iterators::Pair<Rule>) -> Result<WithClause, ParseError> {
564        let mut options = Vec::new();
565
566        for inner_pair in pair.into_inner() {
567            if inner_pair.as_rule() == Rule::with_option_list {
568                for opt_pair in inner_pair.into_inner() {
569                    if opt_pair.as_rule() == Rule::with_option {
570                        options.push(Self::parse_with_option(opt_pair)?);
571                    }
572                }
573            }
574        }
575
576        Ok(WithClause { options })
577    }
578
579    fn parse_with_option(pair: pest::iterators::Pair<Rule>) -> Result<WithOption, ParseError> {
580        let mut inner = pair.into_inner();
581
582        let key = inner
583            .next()
584            .ok_or_else(|| ParseError::syntax(0, "", "Expected option key"))?
585            .as_str()
586            .to_string();
587
588        let value_pair = inner
589            .next()
590            .ok_or_else(|| ParseError::syntax(0, "", "Expected option value"))?;
591
592        let value = Self::parse_with_value(value_pair)?;
593
594        Ok(WithOption { key, value })
595    }
596
597    fn parse_with_value(pair: pest::iterators::Pair<Rule>) -> Result<WithValue, ParseError> {
598        let inner = pair
599            .into_inner()
600            .next()
601            .ok_or_else(|| ParseError::syntax(0, "", "Expected WITH value"))?;
602
603        match inner.as_rule() {
604            Rule::string => {
605                let s = inner.as_str().trim_matches('\'').to_string();
606                Ok(WithValue::String(s))
607            }
608            Rule::integer => {
609                let v = inner
610                    .as_str()
611                    .parse::<i64>()
612                    .map_err(|_| ParseError::syntax(0, inner.as_str(), "Invalid integer"))?;
613                Ok(WithValue::Integer(v))
614            }
615            Rule::float => {
616                let v = inner
617                    .as_str()
618                    .parse::<f64>()
619                    .map_err(|_| ParseError::syntax(0, inner.as_str(), "Invalid float"))?;
620                Ok(WithValue::Float(v))
621            }
622            Rule::boolean => {
623                let b = inner.as_str().to_uppercase() == "TRUE";
624                Ok(WithValue::Boolean(b))
625            }
626            Rule::identifier => {
627                let s = inner.as_str().to_string();
628                Ok(WithValue::Identifier(s))
629            }
630            _ => Err(ParseError::syntax(
631                0,
632                inner.as_str(),
633                "Invalid WITH value type",
634            )),
635        }
636    }
637}