use super::{NodeStats, PlanNode};
use crate::collection::stats::CollectionStats as CoreCollectionStats;
use crate::velesql::cost_estimator::CostEstimator;
const COST_UNIT_TO_MS: f64 = 0.001;
pub(super) fn estimate_selectivity(
conditions: &[String],
_stats: Option<&CoreCollectionStats>,
) -> f64 {
estimate_selectivity_heuristic(conditions)
}
pub(super) fn estimate_selectivity_heuristic(conditions: &[String]) -> f64 {
let base = 0.5_f64;
base.powi(i32::try_from(conditions.len()).unwrap_or(i32::MAX))
}
pub(super) fn estimate_cost(
root: &PlanNode,
has_vector_search: bool,
stats: Option<&CoreCollectionStats>,
) -> f64 {
match stats {
Some(s) => {
let cost = CostEstimator::new(s).estimate_plan_cost(root);
let ms = cost.total() * COST_UNIT_TO_MS;
if ms > 0.0 {
ms
} else {
estimate_cost_heuristic(root, has_vector_search)
}
}
None => estimate_cost_heuristic(root, has_vector_search),
}
}
pub(super) fn estimate_cost_heuristic(root: &PlanNode, has_vector_search: bool) -> f64 {
let base_cost = if has_vector_search { 0.05 } else { 1.0 };
match root {
PlanNode::Sequence(nodes) => nodes.iter().fold(base_cost, |acc, n| acc + node_cost(n)),
_ => base_cost + node_cost(root),
}
}
pub(super) fn node_cost(node: &PlanNode) -> f64 {
match node {
PlanNode::VectorSearch(_) => 0.05,
PlanNode::Filter(f) => 0.01 * (1.0 - f.selectivity),
PlanNode::Limit(_) | PlanNode::Offset(_) => 0.001,
PlanNode::TableScan(_) => 1.0,
PlanNode::IndexLookup(_) => 0.0001, PlanNode::Sequence(nodes) => nodes.iter().map(node_cost).sum(),
PlanNode::MatchTraversal(mt) => {
let base = 0.1;
let depth_factor = f64::from(mt.max_depth) * 0.05;
let similarity_factor = if mt.has_similarity { 0.05 } else { 0.0 };
base + depth_factor + similarity_factor
}
}
}
fn node_label_and_weight(node: &PlanNode) -> (&'static str, f64) {
match node {
PlanNode::VectorSearch(_) => ("VectorSearch", 0.95),
PlanNode::TableScan(_) => ("TableScan", 0.95),
PlanNode::MatchTraversal(_) => ("MatchTraversal", 0.95),
PlanNode::IndexLookup(_) => ("IndexLookup", 0.90),
PlanNode::Filter(_) => ("Filter", 0.03),
PlanNode::Limit(_) => ("Limit", 0.01),
PlanNode::Offset(_) => ("Offset", 0.01),
PlanNode::Sequence(_) => ("Sequence", 0.0),
}
}
fn estimate_rows_in(node: &PlanNode, rows_out: u64) -> u64 {
match node {
PlanNode::Filter(f) => {
if f.selectivity > 0.0 && f.selectivity < 1.0 {
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
let estimated = (rows_out as f64 / f.selectivity).ceil() as u64;
estimated.max(rows_out)
} else {
rows_out
}
}
PlanNode::Offset(o) => {
rows_out.saturating_add(o.count)
}
_ => rows_out,
}
}
struct LeafEntry<'a> {
label: &'static str,
weight: f64,
node: &'a PlanNode,
}
fn collect_leaves<'a>(node: &'a PlanNode, out: &mut Vec<LeafEntry<'a>>) {
if let PlanNode::Sequence(children) = node {
for child in children {
collect_leaves(child, out);
}
} else {
let (label, weight) = node_label_and_weight(node);
out.push(LeafEntry {
label,
weight,
node,
});
}
}
#[must_use]
pub fn build_leaf_node_stats(
root: &PlanNode,
actual_rows: u64,
total_time_ms: f64,
) -> Vec<NodeStats> {
let mut leaves = Vec::new();
collect_leaves(root, &mut leaves);
if leaves.is_empty() {
return Vec::new();
}
let total_weight: f64 = leaves.iter().map(|l| l.weight).sum();
let len = leaves.len();
let mut rows_in = vec![actual_rows; len];
let mut rows_out = vec![actual_rows; len];
rows_out[len - 1] = actual_rows;
rows_in[len - 1] = estimate_rows_in(leaves[len - 1].node, actual_rows);
for i in (0..len - 1).rev() {
rows_out[i] = rows_in[i + 1];
rows_in[i] = estimate_rows_in(leaves[i].node, rows_out[i]);
}
leaves
.iter()
.enumerate()
.map(|(i, leaf)| {
let time_ms = if total_weight > 0.0 {
total_time_ms * leaf.weight / total_weight
} else {
0.0
};
NodeStats {
node_label: leaf.label.to_string(),
actual_time_ms: time_ms,
actual_rows_in: rows_in[i],
actual_rows_out: rows_out[i],
loops: 1,
estimated: true,
}
})
.collect()
}