#![allow(clippy::cast_precision_loss)]
#![allow(clippy::cast_possible_truncation)]
#![allow(clippy::cast_sign_loss)]
use std::fmt;
pub mod calibration;
pub mod cost_factors;
pub mod cost_model;
pub mod plan_generator;
pub mod query_executor;
#[cfg(test)]
mod plan_generator_tests;
#[cfg(test)]
mod tests;
pub use cost_model::{CostEstimator, OperationCost, OperationCostFactors};
pub use plan_generator::{CandidatePlan, PlanGenerator, QueryCharacteristics};
pub use query_executor::{ExecutionContext, PlanCache, QueryOptimizer};
#[derive(Debug, Clone)]
pub struct QueryParams {
pub dataset_size: usize,
pub ef_search: usize,
pub top_k: usize,
pub filter_selectivity: Option<f64>,
}
impl Default for QueryParams {
fn default() -> Self {
Self {
dataset_size: 10_000,
ef_search: 128,
top_k: 10,
filter_selectivity: None,
}
}
}
impl QueryParams {
#[must_use]
pub fn new(dataset_size: usize, ef_search: usize, top_k: usize) -> Self {
Self {
dataset_size,
ef_search,
top_k,
filter_selectivity: None,
}
}
#[must_use]
pub fn with_filter_selectivity(mut self, selectivity: f64) -> Self {
self.filter_selectivity = Some(selectivity.clamp(0.001, 1.0));
self
}
}
#[derive(Debug, Clone)]
pub struct CostFactors {
pub dataset_size_factor: f64,
pub ef_search_factor: f64,
pub filter_selectivity_factor: f64,
pub top_k_factor: f64,
}
impl Default for CostFactors {
fn default() -> Self {
Self {
dataset_size_factor: 1.0,
ef_search_factor: 1.0,
filter_selectivity_factor: 1.0,
top_k_factor: 1.0,
}
}
}
#[derive(Debug, Clone)]
pub struct QueryCostEstimate {
pub total_cost: f64,
pub estimated_latency_ms: f64,
pub factors: CostFactors,
}
impl QueryCostEstimate {
#[must_use]
pub fn new(total_cost: f64, estimated_latency_ms: f64, factors: CostFactors) -> Self {
Self {
total_cost,
estimated_latency_ms,
factors,
}
}
}
#[derive(Debug, Clone)]
pub struct CostCalibration {
pub base_cost: f64,
pub reference_ef_search: f64,
pub reference_top_k: f64,
pub ms_per_cost_unit: f64,
pub filter_exponent: f64,
}
impl Default for CostCalibration {
fn default() -> Self {
Self {
base_cost: 1.0,
reference_ef_search: 100.0,
reference_top_k: 10.0,
ms_per_cost_unit: 0.1,
filter_exponent: 0.3,
}
}
}
impl CostCalibration {
#[must_use]
pub fn fast_system() -> Self {
Self {
ms_per_cost_unit: 0.05,
..Default::default()
}
}
#[must_use]
pub fn slow_system() -> Self {
Self {
ms_per_cost_unit: 0.2,
..Default::default()
}
}
}
#[derive(Debug, Clone)]
pub struct QueryCostExceeded {
pub estimated: f64,
pub max_allowed: f64,
}
impl fmt::Display for QueryCostExceeded {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Query cost {:.1} exceeds limit {:.1}",
self.estimated, self.max_allowed
)
}
}
impl std::error::Error for QueryCostExceeded {}
#[derive(Debug, Clone)]
pub struct QueryCostEstimator {
calibration: CostCalibration,
max_cost: Option<f64>,
}
impl Default for QueryCostEstimator {
fn default() -> Self {
Self::new(CostCalibration::default())
}
}
impl QueryCostEstimator {
#[must_use]
pub fn new(calibration: CostCalibration) -> Self {
Self {
calibration,
max_cost: None,
}
}
#[must_use]
pub fn with_max_cost(mut self, max_cost: f64) -> Self {
self.max_cost = Some(max_cost);
self
}
pub fn set_max_cost(&mut self, max_cost: Option<f64>) {
self.max_cost = max_cost;
}
#[must_use]
pub fn max_cost(&self) -> Option<f64> {
self.max_cost
}
#[must_use]
pub fn estimate(&self, params: &QueryParams) -> QueryCostEstimate {
let cal = &self.calibration;
let dataset_size_factor = if params.dataset_size > 0 {
(params.dataset_size as f64 + 1.0).log2()
} else {
1.0
};
let ef_search_factor = params.ef_search as f64 / cal.reference_ef_search;
let top_k_factor = (params.top_k as f64 / cal.reference_top_k).sqrt();
let selectivity = params.filter_selectivity.unwrap_or(1.0).max(0.001);
let filter_selectivity_factor = (1.0 / selectivity).powf(cal.filter_exponent);
let total_cost = cal.base_cost
* dataset_size_factor
* ef_search_factor
* top_k_factor
* filter_selectivity_factor;
let estimated_latency_ms = total_cost * cal.ms_per_cost_unit;
let factors = CostFactors {
dataset_size_factor,
ef_search_factor,
filter_selectivity_factor,
top_k_factor,
};
QueryCostEstimate::new(total_cost, estimated_latency_ms, factors)
}
pub fn check_cost_limit(
&self,
params: &QueryParams,
max_cost: f64,
) -> Result<QueryCostEstimate, QueryCostExceeded> {
let estimate = self.estimate(params);
if estimate.total_cost > max_cost {
Err(QueryCostExceeded {
estimated: estimate.total_cost,
max_allowed: max_cost,
})
} else {
Ok(estimate)
}
}
pub fn check_collection_limit(
&self,
params: &QueryParams,
) -> Result<QueryCostEstimate, QueryCostExceeded> {
let estimate = self.estimate(params);
if let Some(max) = self.max_cost {
if estimate.total_cost > max {
return Err(QueryCostExceeded {
estimated: estimate.total_cost,
max_allowed: max,
});
}
}
Ok(estimate)
}
#[must_use]
pub fn explain(&self, params: &QueryParams) -> String {
let estimate = self.estimate(params);
format!(
"Query Cost Estimate\n\
===================\n\
Total Cost: {:.2}\n\
Estimated Latency: {:.2}ms\n\n\
Cost Breakdown:\n\
- Dataset Size Factor (log2({})): {:.2}\n\
- ef_search Factor ({}/{}): {:.2}\n\
- top_k Factor (sqrt({}/10)): {:.2}\n\
- Filter Selectivity Factor: {:.2}\n",
estimate.total_cost,
estimate.estimated_latency_ms,
params.dataset_size,
estimate.factors.dataset_size_factor,
params.ef_search,
self.calibration.reference_ef_search as usize,
estimate.factors.ef_search_factor,
params.top_k,
estimate.factors.top_k_factor,
estimate.factors.filter_selectivity_factor,
)
}
}
#[derive(Debug, Default)]
pub struct QueryParamsBuilder {
params: QueryParams,
}
impl QueryParamsBuilder {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn dataset_size(mut self, size: usize) -> Self {
self.params.dataset_size = size;
self
}
#[must_use]
pub fn ef_search(mut self, ef: usize) -> Self {
self.params.ef_search = ef;
self
}
#[must_use]
pub fn top_k(mut self, k: usize) -> Self {
self.params.top_k = k;
self
}
#[must_use]
pub fn filter_selectivity(mut self, selectivity: f64) -> Self {
self.params.filter_selectivity = Some(selectivity.clamp(0.001, 1.0));
self
}
#[must_use]
pub fn build(self) -> QueryParams {
self.params
}
}