Skip to main content

sochdb_query/
query_optimizer.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Query Optimizer (stub for Task 10 integration)
19//!
20//! Provides cost-based query optimization for SOCH-QL.
21//! This is a stub implementation that will be fully implemented later.
22
23use std::collections::HashMap;
24
25/// Query optimizer with cost-based planning
26#[derive(Debug, Default)]
27pub struct QueryOptimizer {
28    cost_model: CostModel,
29    cardinality_hints: HashMap<String, CardinalityHint>,
30    total_edges: usize,
31}
32
33/// Cost model configuration
34#[derive(Debug, Clone)]
35pub struct CostModel {
36    /// Cost of reading a single row
37    pub row_read_cost: f64,
38    /// Cost of index lookup
39    pub index_lookup_cost: f64,
40    /// Cost of sequential scan per row
41    pub seq_scan_cost: f64,
42    /// Cost of vector search per vector
43    pub vector_search_cost: f64,
44}
45
46impl Default for CostModel {
47    fn default() -> Self {
48        Self {
49            row_read_cost: 1.0,
50            index_lookup_cost: 0.5,
51            seq_scan_cost: 0.1,
52            vector_search_cost: 10.0,
53        }
54    }
55}
56
57/// Cardinality hint for a column or index
58#[derive(Debug, Clone)]
59pub struct CardinalityHint {
60    /// Estimated cardinality
61    pub cardinality: usize,
62    /// Confidence level (0-1)
63    pub confidence: f64,
64    /// Source of the estimate
65    pub source: CardinalitySource,
66}
67
68/// Source of cardinality estimate
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum CardinalitySource {
71    /// Exact count
72    Exact,
73    /// HyperLogLog estimate
74    HyperLogLog,
75    /// Histogram estimate
76    Histogram,
77    /// Default/unknown
78    Default,
79}
80
81/// Query predicate for optimization
82#[derive(Debug, Clone)]
83pub enum QueryPredicate {
84    /// Equality predicate
85    Eq(String, String),
86    /// Range predicate
87    Range {
88        column: String,
89        min: Option<String>,
90        max: Option<String>,
91    },
92    /// In list predicate
93    In(String, Vec<String>),
94    /// Prefix match
95    Prefix(String, String),
96    /// Time range predicate
97    TimeRange(u64, u64),
98    /// Project filter
99    Project(u16),
100    /// Tenant filter
101    Tenant(u32),
102    /// Span type filter
103    SpanType(String),
104}
105
106/// Query operation types
107#[derive(Debug, Clone, PartialEq)]
108pub enum QueryOperation {
109    /// Point lookup
110    PointLookup,
111    /// Range scan
112    RangeScan,
113    /// Full scan
114    FullScan,
115    /// Vector search
116    VectorSearch { k: usize },
117    /// LSM range scan
118    LsmRangeScan { start_us: u64, end_us: u64 },
119    /// Graph traversal
120    GraphTraversal {
121        direction: TraversalDirection,
122        max_depth: usize,
123    },
124}
125
126/// Index selection recommendation
127#[derive(Debug, Clone)]
128pub enum IndexSelection {
129    /// Use primary key index
130    PrimaryKey,
131    /// Use secondary index
132    Secondary(String),
133    /// Use time-based index
134    TimeIndex,
135    /// Use vector index
136    VectorIndex,
137    /// Full table scan / LSM scan
138    FullScan,
139    /// LSM scan
140    LsmScan,
141    /// Causal index
142    CausalIndex,
143    /// Project index
144    ProjectIndex,
145    /// Multi-index intersection
146    MultiIndex(Vec<IndexSelection>),
147}
148
149/// Query cost estimate
150#[derive(Debug, Clone)]
151pub struct QueryCost {
152    /// Estimated cost
153    pub estimated_cost: f64,
154    /// Total cost (alias for compatibility)
155    pub total_cost: f64,
156    /// Estimated rows
157    pub estimated_rows: usize,
158    /// Records returned (alias)
159    pub records_returned: usize,
160    /// Selected index
161    pub index: IndexSelection,
162    /// Operation type
163    pub operation: QueryOperation,
164    /// Cost breakdown by operation
165    pub breakdown: Vec<(QueryOperation, f64)>,
166}
167
168/// Traversal direction for range queries
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum TraversalDirection {
171    Forward,
172    Backward,
173}
174
175/// Query plan from optimizer
176#[derive(Debug, Clone)]
177pub struct QueryPlan {
178    /// Cost estimate
179    pub cost: QueryCost,
180    /// Predicates to apply
181    pub predicates: Vec<QueryPredicate>,
182    /// Traversal direction
183    pub direction: TraversalDirection,
184    /// Limit
185    pub limit: Option<usize>,
186    /// Index selection
187    pub index_selection: IndexSelection,
188    /// Operations to execute
189    pub operations: Vec<QueryOperation>,
190}
191
192impl QueryOptimizer {
193    /// Create a new query optimizer
194    pub fn new() -> Self {
195        Self::default()
196    }
197
198    /// Create with custom cost model
199    pub fn with_cost_model(cost_model: CostModel) -> Self {
200        Self {
201            cost_model,
202            ..Default::default()
203        }
204    }
205
206    /// Update total edge count for cardinality estimation
207    pub fn update_total_edges(&mut self, count: usize, _source: CardinalitySource) {
208        self.total_edges = count;
209    }
210
211    /// Set cardinality hint for a column
212    pub fn set_cardinality_hint(&mut self, column: &str, hint: CardinalityHint) {
213        self.cardinality_hints.insert(column.to_string(), hint);
214    }
215
216    /// Plan a query
217    pub fn plan_query(&self, predicates: &[QueryPredicate], limit: Option<usize>) -> QueryPlan {
218        // Simple heuristic: use index if we have predicates, otherwise full scan
219        let (index, operation) = if predicates.is_empty() {
220            (IndexSelection::FullScan, QueryOperation::FullScan)
221        } else {
222            // Check for primary key lookup
223            let has_eq = predicates
224                .iter()
225                .any(|p| matches!(p, QueryPredicate::Eq(_, _)));
226            if has_eq {
227                (IndexSelection::PrimaryKey, QueryOperation::PointLookup)
228            } else {
229                (IndexSelection::FullScan, QueryOperation::RangeScan)
230            }
231        };
232
233        let estimated_rows = match &index {
234            IndexSelection::PrimaryKey => 1,
235            IndexSelection::FullScan => self.total_edges.max(1000),
236            _ => self.total_edges / 10,
237        };
238
239        let estimated_cost = match &operation {
240            QueryOperation::PointLookup => self.cost_model.index_lookup_cost,
241            QueryOperation::RangeScan => estimated_rows as f64 * self.cost_model.row_read_cost,
242            QueryOperation::FullScan => self.total_edges as f64 * self.cost_model.seq_scan_cost,
243            QueryOperation::VectorSearch { .. } => self.cost_model.vector_search_cost,
244            _ => estimated_rows as f64 * self.cost_model.row_read_cost,
245        };
246
247        QueryPlan {
248            cost: QueryCost {
249                estimated_cost,
250                total_cost: estimated_cost,
251                estimated_rows,
252                records_returned: estimated_rows,
253                index: index.clone(),
254                operation: operation.clone(),
255                breakdown: vec![(operation.clone(), estimated_cost)],
256            },
257            predicates: predicates.to_vec(),
258            direction: TraversalDirection::Forward,
259            limit,
260            index_selection: index,
261            operations: vec![operation],
262        }
263    }
264
265    /// Estimate selectivity of a predicate
266    pub fn estimate_selectivity(&self, predicate: &QueryPredicate) -> f64 {
267        match predicate {
268            QueryPredicate::Eq(col, _) => {
269                if let Some(hint) = self.cardinality_hints.get(col) {
270                    1.0 / hint.cardinality.max(1) as f64
271                } else {
272                    0.1 // Default 10% selectivity
273                }
274            }
275            QueryPredicate::Range { .. } => 0.25, // Default 25% for range
276            QueryPredicate::In(_, values) => (values.len() as f64 * 0.1).min(0.5),
277            QueryPredicate::Prefix(_, _) => 0.15, // Default 15% for prefix
278            QueryPredicate::TimeRange(_, _) => 0.2,
279            QueryPredicate::Project(_) => 0.1,
280            QueryPredicate::Tenant(_) => 0.05,
281            QueryPredicate::SpanType(_) => 0.15,
282        }
283    }
284}