velesdb-core 1.14.4

High-performance vector database engine written in Rust
Documentation
//! MATCH query parsing module (EPIC-061/US-002 refactoring).
//!
//! Extracted from select.rs to reduce file size and improve modularity.

use super::{extract_identifier, Rule};
use crate::velesql::ast::{OrderByExpr, Query};
use crate::velesql::error::ParseError;
use crate::velesql::graph_pattern::{
    Direction, GraphPattern, MatchClause, NodePattern, RelationshipPattern, ReturnClause,
    ReturnItem,
};
use crate::velesql::Parser;

impl Parser {
    /// Parse a MATCH query (EPIC-045 US-001).
    pub(crate) fn parse_match_query(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Query, ParseError> {
        let mut patterns = Vec::new();
        let mut where_clause = None;
        let mut return_clause = ReturnClause {
            items: Vec::new(),
            order_by: None,
            limit: None,
        };
        let mut limit = None;

        for inner_pair in pair.into_inner() {
            match inner_pair.as_rule() {
                Rule::graph_pattern => patterns.push(Self::parse_graph_pattern(inner_pair)?),
                Rule::where_clause => where_clause = Some(Self::parse_where_clause(inner_pair)?),
                Rule::return_clause => return_clause = Self::parse_return_clause(inner_pair)?,
                Rule::order_by_clause => {
                    return_clause.order_by = Some(Self::convert_order_by_to_match(inner_pair)?);
                }
                Rule::limit_clause => limit = Self::extract_limit_integer(inner_pair),
                _ => {}
            }
        }

        return_clause.limit = limit;

        Ok(Query::new_match(MatchClause {
            patterns,
            where_clause,
            return_clause,
        }))
    }

    /// Converts a parsed ORDER BY clause into MATCH-compatible `OrderByItem`s.
    fn convert_order_by_to_match(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<Vec<crate::velesql::graph_pattern::OrderByItem>, ParseError> {
        let order_by = Self::parse_order_by_clause(pair)?;
        Ok(order_by
            .into_iter()
            .map(|ob| crate::velesql::graph_pattern::OrderByItem {
                expression: Self::order_by_expr_to_string(ob.expr),
                descending: ob.descending,
            })
            .collect())
    }

    /// Converts an `OrderByExpr` into its string representation.
    fn order_by_expr_to_string(expr: OrderByExpr) -> String {
        match expr {
            OrderByExpr::Field(f) => f,
            OrderByExpr::Similarity(s) => {
                let vec_str = match &s.vector {
                    crate::velesql::ast::VectorExpr::Parameter(name) => format!("${name}"),
                    crate::velesql::ast::VectorExpr::Literal(vals) => format!("{vals:?}"),
                };
                format!("similarity({}, {vec_str})", s.field)
            }
            OrderByExpr::SimilarityBare => "similarity()".to_string(),
            OrderByExpr::Aggregate(a) => format!("{:?}()", a.function_type),
            OrderByExpr::Arithmetic(expr) => format!("{expr}"),
        }
    }

    /// Extracts the integer value from a limit clause pair.
    fn extract_limit_integer(pair: pest::iterators::Pair<Rule>) -> Option<u64> {
        pair.into_inner()
            .find(|lp| lp.as_rule() == Rule::integer)
            .and_then(|lp| lp.as_str().parse().ok())
    }

    /// Parse a graph pattern (EPIC-045 US-001).
    pub(super) fn parse_graph_pattern(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<GraphPattern, ParseError> {
        let mut nodes = Vec::new();
        let mut relationships = Vec::new();

        for inner_pair in pair.into_inner() {
            match inner_pair.as_rule() {
                Rule::node_pattern => {
                    nodes.push(Self::parse_node_pattern(inner_pair)?);
                }
                Rule::relationship_pattern => {
                    relationships.push(Self::parse_relationship_pattern(inner_pair)?);
                }
                _ => {}
            }
        }

        Ok(GraphPattern {
            name: None,
            nodes,
            relationships,
        })
    }

    /// Parse a node pattern (EPIC-045 US-001).
    fn parse_node_pattern(pair: pest::iterators::Pair<Rule>) -> Result<NodePattern, ParseError> {
        let mut node = NodePattern::new();

        for inner_pair in pair.into_inner() {
            if inner_pair.as_rule() == Rule::node_spec {
                Self::apply_node_spec(inner_pair, &mut node)?;
            }
        }

        Ok(node)
    }

    /// Applies all fields from a `node_spec` pest pair to a `NodePattern`.
    fn apply_node_spec(
        spec: pest::iterators::Pair<Rule>,
        node: &mut NodePattern,
    ) -> Result<(), ParseError> {
        for spec_pair in spec.into_inner() {
            match spec_pair.as_rule() {
                Rule::node_alias => {
                    node.alias = Some(spec_pair.as_str().to_string());
                }
                Rule::node_labels => {
                    for label_pair in spec_pair.into_inner() {
                        if label_pair.as_rule() == Rule::label_name {
                            node.labels.push(label_pair.as_str().to_string());
                        }
                    }
                }
                Rule::node_properties => {
                    node.properties = Self::parse_node_properties(spec_pair)?;
                }
                Rule::collection_annotation => {
                    for coll_pair in spec_pair.into_inner() {
                        if coll_pair.as_rule() == Rule::collection_ref {
                            node.collection = Some(coll_pair.as_str().to_string());
                        }
                    }
                }
                _ => {}
            }
        }
        Ok(())
    }

    /// Parse node properties (EPIC-045 US-001).
    pub(super) fn parse_node_properties(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<std::collections::HashMap<String, crate::velesql::Value>, ParseError> {
        let mut props = std::collections::HashMap::new();

        for inner_pair in pair.into_inner() {
            if inner_pair.as_rule() == Rule::property_list {
                Self::collect_property_list(inner_pair, &mut props)?;
            }
        }

        Ok(props)
    }

    /// Collects key-value pairs from a `property_list` pest node.
    fn collect_property_list(
        list_pair: pest::iterators::Pair<Rule>,
        props: &mut std::collections::HashMap<String, crate::velesql::Value>,
    ) -> Result<(), ParseError> {
        for prop_pair in list_pair.into_inner() {
            if prop_pair.as_rule() == Rule::property {
                let (key, value) = Self::parse_single_property(prop_pair)?;
                if !key.is_empty() {
                    props.insert(key, value);
                }
            }
        }
        Ok(())
    }

    /// Parses a single property pair into `(key, value)`.
    fn parse_single_property(
        prop_pair: pest::iterators::Pair<Rule>,
    ) -> Result<(String, crate::velesql::Value), ParseError> {
        let mut key = String::new();
        let mut value = crate::velesql::Value::Null;

        for p in prop_pair.into_inner() {
            match p.as_rule() {
                Rule::identifier => key = extract_identifier(&p),
                Rule::property_value => value = Self::parse_property_value(p)?,
                _ => {}
            }
        }
        Ok((key, value))
    }

    /// Parse a property value (EPIC-045 US-001).
    ///
    /// Delegates scalar literal parsing to the shared [`helpers::parse_scalar_from_rule`].
    fn parse_property_value(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<crate::velesql::Value, ParseError> {
        for inner_pair in pair.into_inner() {
            match inner_pair.as_rule() {
                Rule::string
                | Rule::integer
                | Rule::float
                | Rule::boolean
                | Rule::null_value
                | Rule::parameter => {
                    return super::helpers::parse_scalar_from_rule(&inner_pair);
                }
                _ => {}
            }
        }
        Ok(crate::velesql::Value::Null)
    }

    /// Parse a relationship pattern (EPIC-045 US-001).
    fn parse_relationship_pattern(
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<RelationshipPattern, ParseError> {
        let mut direction = Direction::Outgoing;
        let mut rel = RelationshipPattern::new(direction);

        for inner_pair in pair.into_inner() {
            match inner_pair.as_rule() {
                Rule::rel_incoming => {
                    direction = Direction::Incoming;
                    rel = RelationshipPattern::new(direction);
                    Self::parse_rel_spec_inner(&mut rel, inner_pair)?;
                }
                Rule::rel_outgoing => {
                    direction = Direction::Outgoing;
                    rel = RelationshipPattern::new(direction);
                    Self::parse_rel_spec_inner(&mut rel, inner_pair)?;
                }
                Rule::rel_undirected => {
                    direction = Direction::Both;
                    rel = RelationshipPattern::new(direction);
                    Self::parse_rel_spec_inner(&mut rel, inner_pair)?;
                }
                _ => {}
            }
        }

        Ok(rel)
    }

    /// Parse relationship spec inner (EPIC-045 US-001).
    fn parse_rel_spec_inner(
        rel: &mut RelationshipPattern,
        pair: pest::iterators::Pair<Rule>,
    ) -> Result<(), ParseError> {
        for inner_pair in pair.into_inner() {
            if inner_pair.as_rule() == Rule::rel_spec {
                Self::parse_rel_spec(rel, inner_pair)?;
            }
        }
        Ok(())
    }

    /// Parses the contents of a `rel_spec` node, delegating to `rel_details`.
    fn parse_rel_spec(
        rel: &mut RelationshipPattern,
        spec_pair: pest::iterators::Pair<Rule>,
    ) -> Result<(), ParseError> {
        for detail_pair in spec_pair.into_inner() {
            if detail_pair.as_rule() == Rule::rel_details {
                Self::apply_rel_details(rel, detail_pair)?;
            }
        }
        Ok(())
    }

    /// Applies each detail field from a `rel_details` node to the relationship.
    fn apply_rel_details(
        rel: &mut RelationshipPattern,
        details_pair: pest::iterators::Pair<Rule>,
    ) -> Result<(), ParseError> {
        for detail_pair in details_pair.into_inner() {
            match detail_pair.as_rule() {
                Rule::rel_alias => rel.alias = Some(detail_pair.as_str().to_string()),
                Rule::rel_types => Self::collect_rel_types(detail_pair, &mut rel.types),
                Rule::rel_range => rel.range = Self::parse_rel_range(detail_pair),
                Rule::node_properties => {
                    rel.properties = Self::parse_node_properties(detail_pair)?;
                }
                _ => {}
            }
        }
        Ok(())
    }

    /// Collects relationship type names from a `rel_types` node.
    fn collect_rel_types(pair: pest::iterators::Pair<Rule>, types: &mut Vec<String>) {
        for type_pair in pair.into_inner() {
            if type_pair.as_rule() == Rule::rel_type_name {
                types.push(type_pair.as_str().to_string());
            }
        }
    }

    /// Parse relationship range (EPIC-045 US-001).
    #[allow(clippy::unnecessary_wraps)] // Option is for consistency with caller expectations
    fn parse_rel_range(pair: pest::iterators::Pair<Rule>) -> Option<(u32, u32)> {
        for inner_pair in pair.into_inner() {
            if inner_pair.as_rule() == Rule::range_spec {
                let text = inner_pair.as_str();
                if let Some(dot_pos) = text.find("..") {
                    let start: u32 = text[..dot_pos].parse().unwrap_or(1);
                    let end: u32 = text[dot_pos + 2..].parse().unwrap_or(u32::MAX);
                    return Some((start, end));
                } else if let Ok(exact) = text.parse::<u32>() {
                    return Some((exact, exact));
                }
            } else if inner_pair.as_rule() == Rule::integer {
                if let Ok(exact) = inner_pair.as_str().parse::<u32>() {
                    return Some((exact, exact));
                }
            }
        }
        // Default: unbounded
        Some((1, u32::MAX))
    }

    /// Parse RETURN clause (EPIC-045 US-001).
    #[allow(clippy::unnecessary_wraps)] // Consistent with other parse_* methods
    fn parse_return_clause(pair: pest::iterators::Pair<Rule>) -> Result<ReturnClause, ParseError> {
        let mut items = Vec::new();

        for inner_pair in pair.into_inner() {
            if inner_pair.as_rule() == Rule::return_item_list {
                for item_pair in inner_pair.into_inner() {
                    if item_pair.as_rule() == Rule::return_item {
                        let mut expression = String::new();
                        let mut alias = None;

                        for p in item_pair.into_inner() {
                            match p.as_rule() {
                                Rule::return_expr => {
                                    expression = Self::parse_return_expr(p);
                                }
                                Rule::identifier => {
                                    alias = Some(extract_identifier(&p));
                                }
                                _ => {}
                            }
                        }

                        items.push(ReturnItem { expression, alias });
                    }
                }
            }
        }

        Ok(ReturnClause {
            items,
            order_by: None,
            limit: None,
        })
    }

    /// Parse RETURN expression (EPIC-045 US-001).
    fn parse_return_expr(pair: pest::iterators::Pair<Rule>) -> String {
        let text = pair.as_str().to_string();
        for inner_pair in pair.into_inner() {
            match inner_pair.as_rule() {
                Rule::similarity_return => {
                    return "similarity()".to_string();
                }
                Rule::property_access | Rule::identifier => {
                    return inner_pair.as_str().to_string();
                }
                _ => {}
            }
        }
        text
    }
}