use std::collections::HashMap;
#[derive(Debug, Default)]
pub struct QueryOptimizer {
cost_model: CostModel,
cardinality_hints: HashMap<String, CardinalityHint>,
total_edges: usize,
}
#[derive(Debug, Clone)]
pub struct CostModel {
pub row_read_cost: f64,
pub index_lookup_cost: f64,
pub seq_scan_cost: f64,
pub vector_search_cost: f64,
}
impl Default for CostModel {
fn default() -> Self {
Self {
row_read_cost: 1.0,
index_lookup_cost: 0.5,
seq_scan_cost: 0.1,
vector_search_cost: 10.0,
}
}
}
#[derive(Debug, Clone)]
pub struct CardinalityHint {
pub cardinality: usize,
pub confidence: f64,
pub source: CardinalitySource,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CardinalitySource {
Exact,
HyperLogLog,
Histogram,
Default,
}
#[derive(Debug, Clone)]
pub enum QueryPredicate {
Eq(String, String),
Range {
column: String,
min: Option<String>,
max: Option<String>,
},
In(String, Vec<String>),
Prefix(String, String),
TimeRange(u64, u64),
Project(u16),
Tenant(u32),
SpanType(String),
}
#[derive(Debug, Clone, PartialEq)]
pub enum QueryOperation {
PointLookup,
RangeScan,
FullScan,
VectorSearch { k: usize },
LsmRangeScan { start_us: u64, end_us: u64 },
GraphTraversal {
direction: TraversalDirection,
max_depth: usize,
},
}
#[derive(Debug, Clone)]
pub enum IndexSelection {
PrimaryKey,
Secondary(String),
TimeIndex,
VectorIndex,
FullScan,
LsmScan,
CausalIndex,
ProjectIndex,
MultiIndex(Vec<IndexSelection>),
}
#[derive(Debug, Clone)]
pub struct QueryCost {
pub estimated_cost: f64,
pub total_cost: f64,
pub estimated_rows: usize,
pub records_returned: usize,
pub index: IndexSelection,
pub operation: QueryOperation,
pub breakdown: Vec<(QueryOperation, f64)>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TraversalDirection {
Forward,
Backward,
}
#[derive(Debug, Clone)]
pub struct QueryPlan {
pub cost: QueryCost,
pub predicates: Vec<QueryPredicate>,
pub direction: TraversalDirection,
pub limit: Option<usize>,
pub index_selection: IndexSelection,
pub operations: Vec<QueryOperation>,
}
impl QueryOptimizer {
pub fn new() -> Self {
Self::default()
}
pub fn with_cost_model(cost_model: CostModel) -> Self {
Self {
cost_model,
..Default::default()
}
}
pub fn update_total_edges(&mut self, count: usize, _source: CardinalitySource) {
self.total_edges = count;
}
pub fn set_cardinality_hint(&mut self, column: &str, hint: CardinalityHint) {
self.cardinality_hints.insert(column.to_string(), hint);
}
pub fn plan_query(&self, predicates: &[QueryPredicate], limit: Option<usize>) -> QueryPlan {
if predicates.is_empty() {
return self.build_plan(
IndexSelection::FullScan,
QueryOperation::FullScan,
predicates,
limit,
);
}
let mut candidates: Vec<(IndexSelection, QueryOperation, f64, usize)> = Vec::new();
for pred in predicates {
let sel = self.estimate_selectivity(pred);
let est_rows = (self.total_edges as f64 * sel).max(1.0) as usize;
match pred {
QueryPredicate::Eq(_, _) => {
let cost = self.cost_model.index_lookup_cost;
candidates.push((
IndexSelection::PrimaryKey,
QueryOperation::PointLookup,
cost,
1,
));
}
QueryPredicate::TimeRange(start, end) => {
let cost = est_rows as f64 * self.cost_model.row_read_cost;
candidates.push((
IndexSelection::TimeIndex,
QueryOperation::LsmRangeScan {
start_us: *start,
end_us: *end,
},
cost,
est_rows,
));
}
QueryPredicate::Range { .. } | QueryPredicate::Prefix(_, _) => {
let cost = est_rows as f64 * self.cost_model.row_read_cost;
candidates.push((
IndexSelection::LsmScan,
QueryOperation::RangeScan,
cost,
est_rows,
));
}
QueryPredicate::Project(_) => {
let cost = est_rows as f64 * self.cost_model.index_lookup_cost;
candidates.push((
IndexSelection::ProjectIndex,
QueryOperation::RangeScan,
cost,
est_rows,
));
}
QueryPredicate::Tenant(_)
| QueryPredicate::SpanType(_)
| QueryPredicate::In(_, _) => {
let cost = est_rows as f64 * self.cost_model.row_read_cost;
candidates.push((
IndexSelection::FullScan,
QueryOperation::RangeScan,
cost,
est_rows,
));
}
}
}
let full_scan_cost = self.total_edges.max(1) as f64 * self.cost_model.seq_scan_cost;
candidates.push((
IndexSelection::FullScan,
QueryOperation::FullScan,
full_scan_cost,
self.total_edges.max(1),
));
candidates.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
let (index, operation, _cost, _rows) = candidates.into_iter().next().unwrap();
self.build_plan(index, operation, predicates, limit)
}
fn build_plan(
&self,
index: IndexSelection,
operation: QueryOperation,
predicates: &[QueryPredicate],
limit: Option<usize>,
) -> QueryPlan {
let estimated_rows = match &index {
IndexSelection::PrimaryKey => 1,
IndexSelection::FullScan => self.total_edges.max(1),
_ => {
let sel: f64 = predicates
.iter()
.map(|p| self.estimate_selectivity(p))
.product();
(self.total_edges as f64 * sel).max(1.0) as usize
}
};
let estimated_cost = match &operation {
QueryOperation::PointLookup => self.cost_model.index_lookup_cost,
QueryOperation::RangeScan => estimated_rows as f64 * self.cost_model.row_read_cost,
QueryOperation::FullScan => self.total_edges as f64 * self.cost_model.seq_scan_cost,
QueryOperation::VectorSearch { .. } => self.cost_model.vector_search_cost,
QueryOperation::LsmRangeScan { .. } => {
estimated_rows as f64 * self.cost_model.row_read_cost
}
QueryOperation::GraphTraversal { .. } => {
estimated_rows as f64 * self.cost_model.row_read_cost
}
};
QueryPlan {
cost: QueryCost {
estimated_cost,
total_cost: estimated_cost,
estimated_rows,
records_returned: estimated_rows,
index: index.clone(),
operation: operation.clone(),
breakdown: vec![(operation.clone(), estimated_cost)],
},
predicates: predicates.to_vec(),
direction: TraversalDirection::Forward,
limit,
index_selection: index,
operations: vec![operation],
}
}
pub fn estimate_selectivity(&self, predicate: &QueryPredicate) -> f64 {
match predicate {
QueryPredicate::Eq(col, _) => {
if let Some(hint) = self.cardinality_hints.get(col) {
1.0 / hint.cardinality.max(1) as f64
} else {
0.1 }
}
QueryPredicate::Range { .. } => 0.25, QueryPredicate::In(_, values) => (values.len() as f64 * 0.1).min(0.5),
QueryPredicate::Prefix(_, _) => 0.15, QueryPredicate::TimeRange(_, _) => 0.2,
QueryPredicate::Project(_) => 0.1,
QueryPredicate::Tenant(_) => 0.05,
QueryPredicate::SpanType(_) => 0.15,
}
}
}