sochdb_query/
query_optimizer.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Query Optimizer (stub for Task 10 integration)
16//!
17//! Provides cost-based query optimization for SOCH-QL.
18//! This is a stub implementation that will be fully implemented later.
19
20use std::collections::HashMap;
21
22/// Query optimizer with cost-based planning
23#[derive(Debug, Default)]
24pub struct QueryOptimizer {
25    cost_model: CostModel,
26    cardinality_hints: HashMap<String, CardinalityHint>,
27    total_edges: usize,
28}
29
30/// Cost model configuration
31#[derive(Debug, Clone)]
32pub struct CostModel {
33    /// Cost of reading a single row
34    pub row_read_cost: f64,
35    /// Cost of index lookup
36    pub index_lookup_cost: f64,
37    /// Cost of sequential scan per row
38    pub seq_scan_cost: f64,
39    /// Cost of vector search per vector
40    pub vector_search_cost: f64,
41}
42
43impl Default for CostModel {
44    fn default() -> Self {
45        Self {
46            row_read_cost: 1.0,
47            index_lookup_cost: 0.5,
48            seq_scan_cost: 0.1,
49            vector_search_cost: 10.0,
50        }
51    }
52}
53
54/// Cardinality hint for a column or index
55#[derive(Debug, Clone)]
56pub struct CardinalityHint {
57    /// Estimated cardinality
58    pub cardinality: usize,
59    /// Confidence level (0-1)
60    pub confidence: f64,
61    /// Source of the estimate
62    pub source: CardinalitySource,
63}
64
65/// Source of cardinality estimate
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum CardinalitySource {
68    /// Exact count
69    Exact,
70    /// HyperLogLog estimate
71    HyperLogLog,
72    /// Histogram estimate
73    Histogram,
74    /// Default/unknown
75    Default,
76}
77
78/// Query predicate for optimization
79#[derive(Debug, Clone)]
80pub enum QueryPredicate {
81    /// Equality predicate
82    Eq(String, String),
83    /// Range predicate
84    Range {
85        column: String,
86        min: Option<String>,
87        max: Option<String>,
88    },
89    /// In list predicate
90    In(String, Vec<String>),
91    /// Prefix match
92    Prefix(String, String),
93    /// Time range predicate
94    TimeRange(u64, u64),
95    /// Project filter
96    Project(u16),
97    /// Tenant filter
98    Tenant(u32),
99    /// Span type filter
100    SpanType(String),
101}
102
103/// Query operation types
104#[derive(Debug, Clone, PartialEq)]
105pub enum QueryOperation {
106    /// Point lookup
107    PointLookup,
108    /// Range scan
109    RangeScan,
110    /// Full scan
111    FullScan,
112    /// Vector search
113    VectorSearch { k: usize },
114    /// LSM range scan
115    LsmRangeScan { start_us: u64, end_us: u64 },
116    /// Graph traversal
117    GraphTraversal {
118        direction: TraversalDirection,
119        max_depth: usize,
120    },
121}
122
123/// Index selection recommendation
124#[derive(Debug, Clone)]
125pub enum IndexSelection {
126    /// Use primary key index
127    PrimaryKey,
128    /// Use secondary index
129    Secondary(String),
130    /// Use time-based index
131    TimeIndex,
132    /// Use vector index
133    VectorIndex,
134    /// Full table scan / LSM scan
135    FullScan,
136    /// LSM scan
137    LsmScan,
138    /// Causal index
139    CausalIndex,
140    /// Project index
141    ProjectIndex,
142    /// Multi-index intersection
143    MultiIndex(Vec<IndexSelection>),
144}
145
146/// Query cost estimate
147#[derive(Debug, Clone)]
148pub struct QueryCost {
149    /// Estimated cost
150    pub estimated_cost: f64,
151    /// Total cost (alias for compatibility)
152    pub total_cost: f64,
153    /// Estimated rows
154    pub estimated_rows: usize,
155    /// Records returned (alias)
156    pub records_returned: usize,
157    /// Selected index
158    pub index: IndexSelection,
159    /// Operation type
160    pub operation: QueryOperation,
161    /// Cost breakdown by operation
162    pub breakdown: Vec<(QueryOperation, f64)>,
163}
164
165/// Traversal direction for range queries
166#[derive(Debug, Clone, Copy, PartialEq, Eq)]
167pub enum TraversalDirection {
168    Forward,
169    Backward,
170}
171
172/// Query plan from optimizer
173#[derive(Debug, Clone)]
174pub struct QueryPlan {
175    /// Cost estimate
176    pub cost: QueryCost,
177    /// Predicates to apply
178    pub predicates: Vec<QueryPredicate>,
179    /// Traversal direction
180    pub direction: TraversalDirection,
181    /// Limit
182    pub limit: Option<usize>,
183    /// Index selection
184    pub index_selection: IndexSelection,
185    /// Operations to execute
186    pub operations: Vec<QueryOperation>,
187}
188
189impl QueryOptimizer {
190    /// Create a new query optimizer
191    pub fn new() -> Self {
192        Self::default()
193    }
194
195    /// Create with custom cost model
196    pub fn with_cost_model(cost_model: CostModel) -> Self {
197        Self {
198            cost_model,
199            ..Default::default()
200        }
201    }
202
203    /// Update total edge count for cardinality estimation
204    pub fn update_total_edges(&mut self, count: usize, _source: CardinalitySource) {
205        self.total_edges = count;
206    }
207
208    /// Set cardinality hint for a column
209    pub fn set_cardinality_hint(&mut self, column: &str, hint: CardinalityHint) {
210        self.cardinality_hints.insert(column.to_string(), hint);
211    }
212
213    /// Plan a query
214    pub fn plan_query(&self, predicates: &[QueryPredicate], limit: Option<usize>) -> QueryPlan {
215        // Simple heuristic: use index if we have predicates, otherwise full scan
216        let (index, operation) = if predicates.is_empty() {
217            (IndexSelection::FullScan, QueryOperation::FullScan)
218        } else {
219            // Check for primary key lookup
220            let has_eq = predicates
221                .iter()
222                .any(|p| matches!(p, QueryPredicate::Eq(_, _)));
223            if has_eq {
224                (IndexSelection::PrimaryKey, QueryOperation::PointLookup)
225            } else {
226                (IndexSelection::FullScan, QueryOperation::RangeScan)
227            }
228        };
229
230        let estimated_rows = match &index {
231            IndexSelection::PrimaryKey => 1,
232            IndexSelection::FullScan => self.total_edges.max(1000),
233            _ => self.total_edges / 10,
234        };
235
236        let estimated_cost = match &operation {
237            QueryOperation::PointLookup => self.cost_model.index_lookup_cost,
238            QueryOperation::RangeScan => estimated_rows as f64 * self.cost_model.row_read_cost,
239            QueryOperation::FullScan => self.total_edges as f64 * self.cost_model.seq_scan_cost,
240            QueryOperation::VectorSearch { .. } => self.cost_model.vector_search_cost,
241            _ => estimated_rows as f64 * self.cost_model.row_read_cost,
242        };
243
244        QueryPlan {
245            cost: QueryCost {
246                estimated_cost,
247                total_cost: estimated_cost,
248                estimated_rows,
249                records_returned: estimated_rows,
250                index: index.clone(),
251                operation: operation.clone(),
252                breakdown: vec![(operation.clone(), estimated_cost)],
253            },
254            predicates: predicates.to_vec(),
255            direction: TraversalDirection::Forward,
256            limit,
257            index_selection: index,
258            operations: vec![operation],
259        }
260    }
261
262    /// Estimate selectivity of a predicate
263    pub fn estimate_selectivity(&self, predicate: &QueryPredicate) -> f64 {
264        match predicate {
265            QueryPredicate::Eq(col, _) => {
266                if let Some(hint) = self.cardinality_hints.get(col) {
267                    1.0 / hint.cardinality.max(1) as f64
268                } else {
269                    0.1 // Default 10% selectivity
270                }
271            }
272            QueryPredicate::Range { .. } => 0.25, // Default 25% for range
273            QueryPredicate::In(_, values) => (values.len() as f64 * 0.1).min(0.5),
274            QueryPredicate::Prefix(_, _) => 0.15, // Default 15% for prefix
275            QueryPredicate::TimeRange(_, _) => 0.2,
276            QueryPredicate::Project(_) => 0.1,
277            QueryPredicate::Tenant(_) => 0.05,
278            QueryPredicate::SpanType(_) => 0.15,
279        }
280    }
281}