mod formatter;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use super::ast::{Condition, SelectStatement};
use crate::collection::search::query::match_planner::{
CollectionStats, MatchExecutionStrategy, MatchQueryPlanner,
};
use crate::velesql::MatchClause;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct QueryPlan {
pub root: PlanNode,
pub estimated_cost_ms: f64,
pub index_used: Option<IndexType>,
pub filter_strategy: FilterStrategy,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_hit: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub plan_reuse_count: Option<u64>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum PlanNode {
VectorSearch(VectorSearchPlan),
Filter(FilterPlan),
Limit(LimitPlan),
Offset(OffsetPlan),
TableScan(TableScanPlan),
IndexLookup(IndexLookupPlan),
Sequence(Vec<PlanNode>),
MatchTraversal(MatchTraversalPlan),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct VectorSearchPlan {
pub collection: String,
pub ef_search: u32,
pub candidates: u32,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct FilterPlan {
pub conditions: String,
pub selectivity: f64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct LimitPlan {
pub count: u64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct OffsetPlan {
pub count: u64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TableScanPlan {
pub collection: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct IndexLookupPlan {
pub label: String,
pub property: String,
pub value: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MatchTraversalPlan {
pub strategy: String,
pub start_labels: Vec<String>,
pub max_depth: u32,
pub relationship_count: usize,
pub has_similarity: bool,
pub similarity_threshold: Option<f32>,
}
#[allow(dead_code)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExplainOutput {
pub plan: QueryPlan,
#[serde(skip_serializing_if = "Option::is_none")]
pub actual_stats: Option<ActualStats>,
}
#[allow(dead_code)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ActualStats {
pub actual_rows: u64,
pub actual_time_ms: f64,
pub loops: u64,
pub nodes_visited: u64,
pub edges_traversed: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum IndexType {
Hnsw,
Flat,
BinaryQuantization,
Property,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum FilterStrategy {
#[default]
None,
PreFilter,
PostFilter,
}
impl QueryPlan {
#[must_use]
pub fn from_select(stmt: &SelectStatement) -> Self {
Self::from_select_with_indexed_fields(stmt, &HashSet::new())
}
#[must_use]
pub fn from_select_with_indexed_fields(
stmt: &SelectStatement,
indexed_fields: &HashSet<String>,
) -> Self {
let mut has_vector_search = false;
let mut filter_conditions = Vec::new();
let mut index_lookup = None;
if let Some(ref condition) = stmt.where_clause {
Self::analyze_condition(condition, &mut has_vector_search, &mut filter_conditions);
index_lookup = Self::extract_index_lookup(condition, indexed_fields);
}
let (mut nodes, index_used) = Self::build_scan_node(stmt, has_vector_search, index_lookup);
let filter_strategy = Self::append_filter_nodes(&mut nodes, &filter_conditions, stmt);
Self::assemble_plan(nodes, index_used, filter_strategy, has_vector_search)
}
fn assemble_plan(
mut nodes: Vec<PlanNode>,
index_used: Option<IndexType>,
filter_strategy: FilterStrategy,
has_vector_search: bool,
) -> Self {
let root = if nodes.len() == 1 {
nodes.swap_remove(0)
} else {
PlanNode::Sequence(nodes)
};
let estimated_cost_ms = Self::estimate_cost(&root, has_vector_search);
Self {
root,
estimated_cost_ms,
index_used,
filter_strategy,
cache_hit: None,
plan_reuse_count: None,
}
}
fn build_scan_node(
stmt: &SelectStatement,
has_vector_search: bool,
index_lookup: Option<(String, String)>,
) -> (Vec<PlanNode>, Option<IndexType>) {
let mut nodes = Vec::new();
let index_used;
if has_vector_search {
index_used = Some(IndexType::Hnsw);
let candidates = u32::try_from(stmt.limit.unwrap_or(50)).unwrap_or(u32::MAX);
nodes.push(PlanNode::VectorSearch(VectorSearchPlan {
collection: stmt.from.clone(),
ef_search: 100,
candidates,
}));
} else if let Some((property, value)) = index_lookup {
index_used = Some(IndexType::Property);
nodes.push(PlanNode::IndexLookup(IndexLookupPlan {
label: stmt.from.clone(),
property,
value,
}));
} else {
index_used = None;
nodes.push(PlanNode::TableScan(TableScanPlan {
collection: stmt.from.clone(),
}));
}
(nodes, index_used)
}
fn append_filter_nodes(
nodes: &mut Vec<PlanNode>,
filter_conditions: &[String],
stmt: &SelectStatement,
) -> FilterStrategy {
let mut filter_strategy = FilterStrategy::None;
if !filter_conditions.is_empty() {
let selectivity = Self::estimate_selectivity(filter_conditions);
filter_strategy = if selectivity > 0.1 {
FilterStrategy::PostFilter
} else {
FilterStrategy::PreFilter
};
nodes.push(PlanNode::Filter(FilterPlan {
conditions: filter_conditions.join(" AND "),
selectivity,
}));
}
if let Some(offset) = stmt.offset {
nodes.push(PlanNode::Offset(OffsetPlan { count: offset }));
}
if let Some(limit) = stmt.limit {
nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
}
filter_strategy
}
fn analyze_condition(
condition: &Condition,
has_vector_search: &mut bool,
filter_conditions: &mut Vec<String>,
) {
match condition {
Condition::VectorSearch(_)
| Condition::VectorFusedSearch(_)
| Condition::SparseVectorSearch(_)
| Condition::Similarity(_) => {
*has_vector_search = true;
}
Condition::Comparison(cmp) => {
filter_conditions.push(format!("{} {} ?", cmp.column, cmp.operator.as_str()));
}
Condition::In(inc) => {
filter_conditions.push(format!("{} IN (...)", inc.column));
}
Condition::Between(btw) => {
filter_conditions.push(format!("{} BETWEEN ? AND ?", btw.column));
}
Condition::Like(lk) => {
filter_conditions.push(format!("{} LIKE ?", lk.column));
}
Condition::IsNull(isn) => {
let op = if isn.is_null {
"IS NULL"
} else {
"IS NOT NULL"
};
filter_conditions.push(format!("{} {op}", isn.column));
}
Condition::Match(m) => {
filter_conditions.push(format!("{} MATCH ?", m.column));
}
Condition::GraphMatch(_) => {
filter_conditions.push("MATCH (...)".to_string());
}
Condition::And(left, right) | Condition::Or(left, right) => {
Self::analyze_condition(left, has_vector_search, filter_conditions);
Self::analyze_condition(right, has_vector_search, filter_conditions);
}
Condition::Not(inner) | Condition::Group(inner) => {
Self::analyze_condition(inner, has_vector_search, filter_conditions);
}
}
}
fn extract_index_lookup(
condition: &Condition,
indexed_fields: &HashSet<String>,
) -> Option<(String, String)> {
if let Condition::Comparison(cmp) = condition {
if cmp.operator == crate::velesql::CompareOp::Eq && indexed_fields.contains(&cmp.column)
{
return Some((cmp.column.clone(), format!("{:?}", cmp.value)));
}
}
None
}
pub(crate) fn estimate_selectivity(conditions: &[String]) -> f64 {
let base = 0.5_f64;
base.powi(i32::try_from(conditions.len()).unwrap_or(i32::MAX))
}
fn estimate_cost(root: &PlanNode, has_vector_search: bool) -> f64 {
let base_cost = if has_vector_search { 0.05 } else { 1.0 };
match root {
PlanNode::Sequence(nodes) => nodes
.iter()
.fold(base_cost, |acc, node| acc + Self::node_cost(node)),
_ => base_cost + Self::node_cost(root),
}
}
pub(crate) fn node_cost(node: &PlanNode) -> f64 {
match node {
PlanNode::VectorSearch(_) => 0.05,
PlanNode::Filter(f) => 0.01 * (1.0 - f.selectivity),
PlanNode::Limit(_) | PlanNode::Offset(_) => 0.001,
PlanNode::TableScan(_) => 1.0,
PlanNode::IndexLookup(_) => 0.0001, PlanNode::Sequence(nodes) => nodes.iter().map(Self::node_cost).sum(),
PlanNode::MatchTraversal(mt) => {
let base = 0.1;
let depth_factor = f64::from(mt.max_depth) * 0.05;
let similarity_factor = if mt.has_similarity { 0.05 } else { 0.0 };
base + depth_factor + similarity_factor
}
}
}
#[must_use]
pub fn from_match(match_clause: &MatchClause, stats: &CollectionStats) -> Self {
let strategy = MatchQueryPlanner::plan(match_clause, stats);
let strategy_explanation = MatchQueryPlanner::explain(&strategy);
let (start_labels, max_depth, has_similarity, similarity_threshold) =
Self::extract_strategy_info(&strategy);
let relationship_count = match_clause
.patterns
.first()
.map_or(0, |p| p.relationships.len());
let traversal = PlanNode::MatchTraversal(MatchTraversalPlan {
strategy: strategy_explanation,
start_labels,
max_depth,
relationship_count,
has_similarity,
similarity_threshold,
});
let mut nodes = vec![traversal];
if let Some(limit) = match_clause.return_clause.limit {
nodes.push(PlanNode::Limit(LimitPlan { count: limit }));
}
let index_used = if has_similarity {
Some(IndexType::Hnsw)
} else {
None
};
Self::assemble_plan(nodes, index_used, FilterStrategy::None, has_similarity)
}
fn extract_strategy_info(
strategy: &MatchExecutionStrategy,
) -> (Vec<String>, u32, bool, Option<f32>) {
match strategy {
MatchExecutionStrategy::GraphFirst {
start_labels,
max_depth,
} => (start_labels.clone(), *max_depth, false, None),
MatchExecutionStrategy::VectorFirst { threshold, .. } => {
(Vec::new(), 1, true, Some(*threshold))
}
MatchExecutionStrategy::Parallel {
graph_hint,
vector_hint,
} => {
let (labels, depth) = match graph_hint.as_ref() {
MatchExecutionStrategy::GraphFirst {
start_labels,
max_depth,
} => (start_labels.clone(), *max_depth),
_ => (Vec::new(), 1),
};
let threshold = match vector_hint.as_ref() {
MatchExecutionStrategy::VectorFirst { threshold, .. } => Some(*threshold),
_ => None,
};
(labels, depth, true, threshold)
}
}
}
}