velesdb-core 1.9.2

High-performance vector database engine written in Rust
Documentation
//! WHERE clause and condition parsing.

use super::helpers::compare_op_from_str;
use super::{extract_identifier, Rule};
use crate::metrics::global_guardrails_metrics;
use crate::velesql::ast::{
    BetweenCondition, Comparison, Condition, InCondition, IsNullCondition, LikeCondition,
    MatchCondition, SimilarityCondition,
};
use crate::velesql::error::ParseError;
use crate::velesql::Parser;

impl Parser {
    const MAX_CONDITION_DEPTH: usize = 256;

    fn extract_column_name(pair: &pest::iterators::Pair<'_, Rule>) -> String {
        if pair.as_rule() != Rule::column_name && pair.as_rule() != Rule::where_column {
            return extract_identifier(pair);
        }

        let parts: Vec<String> = pair
            .clone()
            .into_inner()
            .filter(|p| p.as_rule() == Rule::identifier)
            .map(|p| extract_identifier(&p))
            .collect();

        if parts.is_empty() {
            pair.as_str().to_string()
        } else {
            parts.join(".")
        }
    }

    pub(crate) fn parse_where_clause(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let or_expr = pair
            .into_inner()
            .next()
            .ok_or_else(|| ParseError::syntax(0, "", "Expected condition"))?;

        Self::parse_or_expr_with_depth(or_expr, 0)
    }

    #[allow(dead_code)]
    pub(crate) fn parse_or_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        Self::parse_or_expr_with_depth(pair, 0)
    }

    fn ensure_depth(depth: usize, input: &str) -> Result<(), ParseError> {
        if depth > Self::MAX_CONDITION_DEPTH {
            global_guardrails_metrics().record_parser_depth_limit_rejected();
            return Err(ParseError::syntax(0, input, "Condition nesting too deep"));
        }
        Ok(())
    }

    fn parse_or_expr_with_depth(
        pair: pest::iterators::Pair<Rule>,
        depth: usize,
    ) -> Result<Condition, ParseError> {
        Self::parse_binary_chain(pair, depth, Self::parse_and_expr_with_depth, |l, r| {
            Condition::Or(Box::new(l), Box::new(r))
        })
    }

    #[allow(dead_code)]
    pub(crate) fn parse_and_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        Self::parse_and_expr_with_depth(pair, 0)
    }

    fn parse_and_expr_with_depth(
        pair: pest::iterators::Pair<Rule>,
        depth: usize,
    ) -> Result<Condition, ParseError> {
        Self::parse_binary_chain(pair, depth, Self::parse_primary_expr_with_depth, |l, r| {
            Condition::And(Box::new(l), Box::new(r))
        })
    }

    /// Shared fold for OR/AND chains: depth-check, parse first child via
    /// `parse_child`, then fold remaining children with `combine`.
    fn parse_binary_chain(
        pair: pest::iterators::Pair<Rule>,
        depth: usize,
        parse_child: fn(pest::iterators::Pair<Rule>, usize) -> Result<Condition, ParseError>,
        combine: fn(Condition, Condition) -> Condition,
    ) -> Result<Condition, ParseError> {
        Self::ensure_depth(depth, pair.as_str())?;
        let mut inner = pair.into_inner().peekable();

        let first = inner
            .next()
            .ok_or_else(|| ParseError::syntax(0, "", "Expected condition"))?;

        let mut result = parse_child(first, depth + 1)?;

        for child in inner {
            let right = parse_child(child, depth + 1)?;
            result = combine(result, right);
        }

        Ok(result)
    }

    #[allow(dead_code)]
    pub(crate) fn parse_primary_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        Self::parse_primary_expr_with_depth(pair, 0)
    }

    fn parse_primary_expr_with_depth(
        pair: pest::iterators::Pair<Rule>,
        depth: usize,
    ) -> Result<Condition, ParseError> {
        Self::ensure_depth(depth, pair.as_str())?;
        let inner = pair
            .into_inner()
            .next()
            .ok_or_else(|| ParseError::syntax(0, "", "Expected primary condition"))?;

        match inner.as_rule() {
            Rule::or_expr => {
                let cond = Self::parse_or_expr_with_depth(inner, depth + 1)?;
                Ok(Condition::Group(Box::new(cond)))
            }
            Rule::not_expr => {
                let nested = inner
                    .into_inner()
                    .next()
                    .ok_or_else(|| ParseError::syntax(0, "", "Expected expression after NOT"))?;
                let cond = Self::parse_primary_expr_with_depth(nested, depth + 1)?;
                Ok(Condition::Not(Box::new(cond)))
            }
            Rule::similarity_expr => Self::parse_similarity_expr(inner),
            Rule::graph_match_expr => Self::parse_graph_match_expr(inner),
            Rule::vector_fused_search => Self::parse_vector_fused_search(inner),
            Rule::sparse_vector_search => Self::parse_sparse_vector_search(inner),
            Rule::vector_search => Self::parse_vector_search(inner),
            Rule::match_expr => Self::parse_match_expr(inner),
            Rule::in_expr => Self::parse_in_expr(inner),
            Rule::between_expr => Self::parse_between_expr(inner),
            Rule::like_expr => Self::parse_like_expr(inner),
            Rule::is_null_expr => Self::parse_is_null_expr(inner),
            Rule::compare_expr => Self::parse_compare_expr(inner),
            _ => Err(ParseError::syntax(
                0,
                inner.as_str(),
                "Unknown condition type",
            )),
        }
    }

    /// Parses a similarity expression: `similarity(field, vector) op threshold`
    pub(crate) fn parse_similarity_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let mut field = None;
        let mut vector = None;
        let mut operator = None;
        let mut threshold = None;

        for inner in pair.into_inner() {
            match inner.as_rule() {
                Rule::similarity_field => {
                    field = Some(inner.as_str().to_string());
                }
                Rule::vector_value => {
                    vector = Some(Self::parse_vector_value(inner)?);
                }
                Rule::compare_op => {
                    operator = Some(compare_op_from_str(inner.as_str())?);
                }
                Rule::numeric_threshold => {
                    // numeric_threshold = { float | integer }
                    let inner_value = inner
                        .into_inner()
                        .next()
                        .ok_or_else(|| ParseError::syntax(0, "", "Expected numeric threshold"))?;
                    threshold = Some(inner_value.as_str().parse::<f64>().map_err(|_| {
                        ParseError::syntax(0, inner_value.as_str(), "Invalid threshold")
                    })?);
                }
                _ => {}
            }
        }

        let field = field.ok_or_else(|| ParseError::syntax(0, "", "Expected field name"))?;
        let vector =
            vector.ok_or_else(|| ParseError::syntax(0, "", "Expected vector expression"))?;
        let operator = operator.ok_or_else(|| ParseError::syntax(0, "", "Expected operator"))?;
        let threshold =
            threshold.ok_or_else(|| ParseError::syntax(0, "", "Expected threshold value"))?;

        Ok(Condition::Similarity(SimilarityCondition {
            field,
            vector,
            operator,
            threshold,
        }))
    }

    /// Extracts the leading column name from a pair's inner iterator.
    ///
    /// Many condition parsers start by pulling the first child as a column
    /// name. This helper removes the duplication.
    fn extract_leading_column(
        inner: &mut pest::iterators::Pairs<Rule>,
    ) -> Result<String, ParseError> {
        let column_pair = inner
            .next()
            .ok_or_else(|| ParseError::syntax(0, "", "Expected column name"))?;
        Ok(Self::extract_column_name(&column_pair))
    }

    pub(crate) fn parse_match_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let mut inner = pair.into_inner();
        let column = Self::extract_leading_column(&mut inner)?;

        let query = crate::velesql::parser::helpers::unescape_string_literal(
            inner
                .next()
                .ok_or_else(|| ParseError::syntax(0, "", "Expected match query"))?
                .as_str(),
        );

        Ok(Condition::Match(MatchCondition { column, query }))
    }

    pub(crate) fn parse_graph_match_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let mut graph_pattern = None;
        for inner in pair.into_inner() {
            if inner.as_rule() == Rule::graph_pattern {
                graph_pattern = Some(Self::parse_graph_pattern(inner)?);
            }
        }

        let pattern =
            graph_pattern.ok_or_else(|| ParseError::syntax(0, "", "Expected MATCH pattern"))?;
        Ok(Condition::GraphMatch(crate::velesql::GraphMatchPredicate {
            pattern,
        }))
    }

    pub(crate) fn parse_in_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let mut inner = pair.into_inner();
        let column = Self::extract_leading_column(&mut inner)?;

        // Detect NOT IN via the named `not_kw` rule in the pest parse tree.
        // This is immune to false positives from column names or string values
        // containing "NOT IN" (e.g. 'NOT IN STOCK').
        let negated = inner.peek().is_some_and(|p| p.as_rule() == Rule::not_kw);
        if negated {
            inner.next(); // consume the not_kw token
        }

        let value_list = inner
            .find(|p| p.as_rule() == Rule::value_list)
            .ok_or_else(|| ParseError::syntax(0, "", "Expected value list"))?;

        let values: Result<Vec<_>, _> = value_list
            .into_inner()
            .filter(|p| p.as_rule() == Rule::value)
            .map(Self::parse_value)
            .collect();

        Ok(Condition::In(InCondition {
            column,
            values: values?,
            negated,
        }))
    }

    pub(crate) fn parse_between_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let mut inner = pair.into_inner();
        let column = Self::extract_leading_column(&mut inner)?;

        let low = Self::next_value(&mut inner, "Expected low value")?;
        let high = Self::next_value(&mut inner, "Expected high value")?;

        Ok(Condition::Between(BetweenCondition { column, low, high }))
    }

    /// Consumes the next pair from the iterator and parses it as a value.
    fn next_value(
        inner: &mut pest::iterators::Pairs<Rule>,
        error_msg: &str,
    ) -> Result<crate::velesql::ast::Value, ParseError> {
        let pair = inner
            .next()
            .ok_or_else(|| ParseError::syntax(0, "", error_msg))?;
        Self::parse_value(pair)
    }

    pub(crate) fn parse_like_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let mut inner = pair.into_inner();
        let column = Self::extract_leading_column(&mut inner)?;

        // Parse LIKE or ILIKE operator
        let like_op = inner
            .next()
            .ok_or_else(|| ParseError::syntax(0, "", "Expected LIKE or ILIKE"))?
            .as_str()
            .to_uppercase();
        let case_insensitive = like_op == "ILIKE";

        let pattern = crate::velesql::parser::helpers::unescape_string_literal(
            inner
                .next()
                .ok_or_else(|| ParseError::syntax(0, "", "Expected pattern"))?
                .as_str(),
        );

        Ok(Condition::Like(LikeCondition {
            column,
            pattern,
            case_insensitive,
        }))
    }

    pub(crate) fn parse_is_null_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let mut column = String::new();
        let mut has_not = false;

        for inner in pair.into_inner() {
            match inner.as_rule() {
                Rule::identifier | Rule::column_name | Rule::where_column => {
                    column = Self::extract_column_name(&inner);
                }
                Rule::not_kw => {
                    has_not = true;
                }
                _ => {}
            }
        }

        if column.is_empty() {
            return Err(ParseError::syntax(0, "", "Expected column name in IS NULL"));
        }

        Ok(Condition::IsNull(IsNullCondition {
            column,
            is_null: !has_not,
        }))
    }

    pub(crate) fn parse_compare_expr(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Condition, ParseError> {
        let mut inner = pair.into_inner();
        let column = Self::extract_leading_column(&mut inner)?;

        let op_pair = inner
            .next()
            .ok_or_else(|| ParseError::syntax(0, "", "Expected operator"))?;
        let operator = compare_op_from_str(op_pair.as_str())?;

        let value = Self::parse_value(
            inner
                .next()
                .ok_or_else(|| ParseError::syntax(0, "", "Expected value"))?,
        )?;

        Ok(Condition::Comparison(Comparison {
            column,
            operator,
            value,
        }))
    }
}