vibesql_storage/database/
index_ops.rs

1// ============================================================================
2// Database Index Operations
3// ============================================================================
4
5use vibesql_ast::IndexColumn;
6
7use super::{core::Database, operations::SpatialIndexMetadata};
8use crate::{Row, StorageError};
9
10// ============================================================================
11// DELETE Profiling Statistics (thread-local aggregates)
12// ============================================================================
13
14/// Aggregate statistics for DELETE profiling.
15///
16/// # Environment Variables
17///
18/// - `DELETE_PROFILE=1` - Enable timing collection and print summary on thread exit
19/// - `DELETE_PROFILE_VERBOSE=1` - Also print per-delete breakdown to stderr
20///
21/// When `DELETE_PROFILE=1` is set, aggregate statistics are automatically printed
22/// when the thread-local stats are dropped (typically at thread exit).
23#[derive(Default)]
24pub struct DeleteProfileStats {
25    pub count: u64,
26    pub total_ns: u128,
27    pub pk_lookup_ns: u128,
28    pub row_clone_ns: u128,
29    pub wal_ns: u128,
30    pub index_update_ns: u128,
31    pub row_remove_ns: u128,
32    pub cache_ns: u128,
33}
34
35impl DeleteProfileStats {
36    /// Add timing data from a single delete operation
37    pub fn record(&mut self, phase_times: &[u128; 6], total_ns: u128) {
38        self.count += 1;
39        self.total_ns += total_ns;
40        self.pk_lookup_ns += phase_times[0];
41        self.row_clone_ns += phase_times[1];
42        self.wal_ns += phase_times[2];
43        self.index_update_ns += phase_times[3];
44        self.row_remove_ns += phase_times[4];
45        self.cache_ns += phase_times[5];
46    }
47
48    /// Print a summary of the aggregate statistics
49    pub fn print_summary(&self) {
50        if self.count == 0 {
51            return;
52        }
53        let total = self.total_ns;
54        let avg_us = (total as f64 / self.count as f64) / 1000.0;
55        eprintln!("\n=== DELETE PROFILE SUMMARY ({} deletes) ===", self.count);
56        eprintln!("Average DELETE time: {:.1}µs", avg_us);
57        eprintln!(
58            "  pk_lookup:    {:>8.1}µs ({:>5.1}%)",
59            (self.pk_lookup_ns as f64 / self.count as f64) / 1000.0,
60            if total > 0 { self.pk_lookup_ns as f64 / total as f64 * 100.0 } else { 0.0 }
61        );
62        eprintln!(
63            "  value_clone:  {:>8.1}µs ({:>5.1}%)",
64            (self.row_clone_ns as f64 / self.count as f64) / 1000.0,
65            if total > 0 { self.row_clone_ns as f64 / total as f64 * 100.0 } else { 0.0 }
66        );
67        eprintln!(
68            "  wal:          {:>8.1}µs ({:>5.1}%)",
69            (self.wal_ns as f64 / self.count as f64) / 1000.0,
70            if total > 0 { self.wal_ns as f64 / total as f64 * 100.0 } else { 0.0 }
71        );
72        eprintln!(
73            "  index_update: {:>8.1}µs ({:>5.1}%)",
74            (self.index_update_ns as f64 / self.count as f64) / 1000.0,
75            if total > 0 { self.index_update_ns as f64 / total as f64 * 100.0 } else { 0.0 }
76        );
77        eprintln!(
78            "  row_remove:   {:>8.1}µs ({:>5.1}%)",
79            (self.row_remove_ns as f64 / self.count as f64) / 1000.0,
80            if total > 0 { self.row_remove_ns as f64 / total as f64 * 100.0 } else { 0.0 }
81        );
82        eprintln!(
83            "  cache:        {:>8.1}µs ({:>5.1}%)",
84            (self.cache_ns as f64 / self.count as f64) / 1000.0,
85            if total > 0 { self.cache_ns as f64 / total as f64 * 100.0 } else { 0.0 }
86        );
87        eprintln!("==========================================\n");
88    }
89}
90
91impl Drop for DeleteProfileStats {
92    fn drop(&mut self) {
93        // Print summary if DELETE_PROFILE=1 (auto-summary) or DELETE_PROFILE_SUMMARY=1 (explicit)
94        if self.count > 0
95            && (std::env::var("DELETE_PROFILE").is_ok()
96                || std::env::var("DELETE_PROFILE_SUMMARY").is_ok())
97        {
98            self.print_summary();
99        }
100    }
101}
102
103thread_local! {
104    /// Thread-local aggregate statistics for DELETE profiling
105    pub static DELETE_PROFILE_STATS: std::cell::RefCell<DeleteProfileStats> =
106        std::cell::RefCell::new(DeleteProfileStats::default());
107}
108
109/// Print the DELETE profile summary for the current thread.
110/// Call this at the end of a benchmark to see aggregate statistics.
111pub fn print_delete_profile_summary() {
112    DELETE_PROFILE_STATS.with(|stats| {
113        stats.borrow().print_summary();
114    });
115}
116
117/// Reset the DELETE profile statistics for the current thread.
118pub fn reset_delete_profile_stats() {
119    DELETE_PROFILE_STATS.with(|stats| {
120        *stats.borrow_mut() = DeleteProfileStats::default();
121    });
122}
123
124impl Database {
125    // ============================================================================
126    // Index Management
127    // ============================================================================
128
129    /// Create an index
130    pub fn create_index(
131        &mut self,
132        index_name: String,
133        table_name: String,
134        unique: bool,
135        columns: Vec<IndexColumn>,
136    ) -> Result<(), StorageError> {
137        self.operations.create_index(
138            &self.catalog,
139            &self.tables,
140            index_name,
141            table_name,
142            unique,
143            columns,
144        )
145    }
146
147    /// Create an index with pre-computed keys (for expression indexes)
148    ///
149    /// This method is used when the caller has already evaluated the expressions
150    /// and computed the key values for each row. This is necessary for expression
151    /// indexes where the key values are derived from evaluating expressions on rows.
152    ///
153    /// # Arguments
154    /// * `index_name` - Name of the index to create
155    /// * `table_name` - Name of the table this index is on
156    /// * `unique` - Whether this is a unique index
157    /// * `columns` - The index column definitions (for metadata storage)
158    /// * `keys` - Pre-computed (key_values, row_id) pairs
159    pub fn create_index_with_keys(
160        &mut self,
161        index_name: String,
162        table_name: String,
163        unique: bool,
164        columns: Vec<IndexColumn>,
165        keys: Vec<(Vec<vibesql_types::SqlValue>, usize)>,
166    ) -> Result<(), StorageError> {
167        self.operations.create_index_with_keys(
168            &self.catalog,
169            index_name,
170            table_name,
171            unique,
172            columns,
173            keys,
174        )
175    }
176
177    /// Check if an index exists
178    pub fn index_exists(&self, index_name: &str) -> bool {
179        self.operations.index_exists(index_name)
180    }
181
182    /// Get index metadata
183    pub fn get_index(&self, index_name: &str) -> Option<&super::indexes::IndexMetadata> {
184        self.operations.get_index(index_name)
185    }
186
187    /// Get index data
188    pub fn get_index_data(&self, index_name: &str) -> Option<&super::indexes::IndexData> {
189        self.operations.get_index_data(index_name)
190    }
191
192    /// Update user-defined indexes for update operation
193    ///
194    /// # Arguments
195    /// * `table_name` - Name of the table being updated
196    /// * `old_row` - Row data before the update
197    /// * `new_row` - Row data after the update
198    /// * `row_index` - Index of the row in the table
199    /// * `changed_columns` - Optional set of column indices that were modified. If provided,
200    ///   indexes that don't involve any changed columns will be skipped.
201    pub fn update_indexes_for_update(
202        &mut self,
203        table_name: &str,
204        old_row: &Row,
205        new_row: &Row,
206        row_index: usize,
207        changed_columns: Option<&std::collections::HashSet<usize>>,
208    ) {
209        self.operations.update_indexes_for_update(
210            &self.catalog,
211            table_name,
212            old_row,
213            new_row,
214            row_index,
215            changed_columns,
216        );
217    }
218
219    /// Update user-defined indexes for delete operation
220    pub fn update_indexes_for_delete(&mut self, table_name: &str, row: &Row, row_index: usize) {
221        self.operations.update_indexes_for_delete(&self.catalog, table_name, row, row_index);
222    }
223
224    /// Batch update user-defined indexes for delete operation
225    ///
226    /// This is significantly more efficient than calling `update_indexes_for_delete` in a loop
227    /// because it pre-computes column indices once per index rather than once per row.
228    pub fn batch_update_indexes_for_delete(
229        &mut self,
230        table_name: &str,
231        rows_to_delete: &[(usize, &Row)],
232    ) {
233        self.operations.batch_update_indexes_for_delete(&self.catalog, table_name, rows_to_delete);
234    }
235
236    /// Rebuild user-defined indexes after bulk operations that change row indices
237    pub fn rebuild_indexes(&mut self, table_name: &str) {
238        self.operations.rebuild_indexes(&self.catalog, &self.tables, table_name);
239    }
240
241    /// Adjust user-defined indexes after row deletions
242    ///
243    /// This is more efficient than rebuild_indexes when only a few rows are deleted,
244    /// as it adjusts row indices in place rather than rebuilding from scratch.
245    ///
246    /// # Arguments
247    /// * `table_name` - Name of the table whose indexes need adjustment
248    /// * `deleted_indices` - Sorted list of deleted row indices (ascending order)
249    pub fn adjust_indexes_after_delete(&mut self, table_name: &str, deleted_indices: &[usize]) {
250        self.operations.adjust_indexes_after_delete(table_name, deleted_indices);
251    }
252
253    /// Drop an index
254    pub fn drop_index(&mut self, index_name: &str) -> Result<(), StorageError> {
255        self.operations.drop_index(index_name)
256    }
257
258    /// List all indexes
259    pub fn list_indexes(&self) -> Vec<String> {
260        self.operations.list_indexes()
261    }
262
263    /// List all indexes for a specific table
264    pub fn list_indexes_for_table(&self, table_name: &str) -> Vec<String> {
265        self.operations.list_indexes_for_table(table_name)
266    }
267
268    /// Check if a column has any user-defined index
269    ///
270    /// This is used to determine if updates to a column require index maintenance.
271    /// Returns true if any user-defined index (B-tree or spatial) includes this column.
272    #[inline]
273    pub fn has_index_on_column(&self, table_name: &str, column_name: &str) -> bool {
274        self.operations.has_index_on_column(table_name, column_name)
275    }
276
277    // ============================================================================
278    // Expression Index Methods
279    // ============================================================================
280
281    /// Add row to expression indexes after insert with pre-computed keys
282    ///
283    /// This method handles expression indexes which require pre-computed key values
284    /// since the storage layer cannot evaluate expressions.
285    pub fn add_to_expression_indexes_for_insert(
286        &mut self,
287        table_name: &str,
288        row_index: usize,
289        expression_keys: &std::collections::HashMap<String, Vec<vibesql_types::SqlValue>>,
290    ) {
291        self.operations.add_to_expression_indexes_for_insert(
292            table_name,
293            row_index,
294            expression_keys,
295        );
296    }
297
298    /// Update expression indexes for update operation with pre-computed keys
299    pub fn update_expression_indexes_for_update(
300        &mut self,
301        table_name: &str,
302        row_index: usize,
303        old_expression_keys: &std::collections::HashMap<String, Vec<vibesql_types::SqlValue>>,
304        new_expression_keys: &std::collections::HashMap<String, Vec<vibesql_types::SqlValue>>,
305    ) {
306        self.operations.update_expression_indexes_for_update(
307            table_name,
308            row_index,
309            old_expression_keys,
310            new_expression_keys,
311        );
312    }
313
314    /// Update expression indexes for delete operation with pre-computed keys
315    pub fn update_expression_indexes_for_delete(
316        &mut self,
317        table_name: &str,
318        row_index: usize,
319        expression_keys: &std::collections::HashMap<String, Vec<vibesql_types::SqlValue>>,
320    ) {
321        self.operations.update_expression_indexes_for_delete(
322            table_name,
323            row_index,
324            expression_keys,
325        );
326    }
327
328    /// Get expression indexes for a specific table
329    ///
330    /// Returns metadata for all expression indexes on the table. Used by executor
331    /// to determine which indexes need expression evaluation during DML operations.
332    pub fn get_expression_indexes_for_table(
333        &self,
334        table_name: &str,
335    ) -> Vec<(String, &crate::database::indexes::IndexMetadata)> {
336        self.operations.get_expression_indexes_for_table(table_name)
337    }
338
339    /// Check if a table has any expression indexes
340    pub fn has_expression_indexes(&self, table_name: &str) -> bool {
341        self.operations.has_expression_indexes(table_name)
342    }
343
344    /// Clear expression index data for a table (for rebuilding after compaction)
345    pub fn clear_expression_index_data(&mut self, table_name: &str) {
346        self.operations.clear_expression_index_data(table_name);
347    }
348
349    // ============================================================================
350    // Spatial Index Methods
351    // ============================================================================
352
353    /// Create a spatial index
354    pub fn create_spatial_index(
355        &mut self,
356        metadata: SpatialIndexMetadata,
357        spatial_index: crate::index::SpatialIndex,
358    ) -> Result<(), StorageError> {
359        self.operations.create_spatial_index(metadata, spatial_index)
360    }
361
362    /// Create an IVFFlat index for approximate nearest neighbor search on vector columns
363    ///
364    /// This method creates an IVFFlat (Inverted File with Flat quantization) index
365    /// for efficient approximate nearest neighbor search on vector data.
366    ///
367    /// # Arguments
368    /// * `index_name` - Name for the new index
369    /// * `table_name` - Name of the table containing the vector column
370    /// * `column_name` - Name of the vector column to index
371    /// * `col_idx` - Column index in the table schema
372    /// * `dimensions` - Number of dimensions in the vectors
373    /// * `lists` - Number of clusters for the IVFFlat algorithm
374    /// * `metric` - Distance metric to use (L2, Cosine, InnerProduct)
375    #[allow(clippy::too_many_arguments)]
376    pub fn create_ivfflat_index(
377        &mut self,
378        index_name: String,
379        table_name: String,
380        column_name: String,
381        col_idx: usize,
382        dimensions: usize,
383        lists: usize,
384        metric: vibesql_ast::VectorDistanceMetric,
385    ) -> Result<(), StorageError> {
386        self.operations.create_ivfflat_index(
387            &self.catalog,
388            &self.tables,
389            index_name,
390            table_name,
391            column_name,
392            col_idx,
393            dimensions,
394            lists,
395            metric,
396        )
397    }
398
399    /// Search an IVFFlat index for approximate nearest neighbors
400    ///
401    /// # Arguments
402    /// * `index_name` - Name of the IVFFlat index
403    /// * `query_vector` - The query vector (f64)
404    /// * `k` - Maximum number of nearest neighbors to return
405    ///
406    /// # Returns
407    /// * `Ok(Vec<(usize, f64)>)` - Vector of (row_id, distance) pairs, ordered by distance
408    /// * `Err(StorageError)` - If index not found or not an IVFFlat index
409    pub fn search_ivfflat_index(
410        &self,
411        index_name: &str,
412        query_vector: &[f64],
413        k: usize,
414    ) -> Result<Vec<(usize, f64)>, StorageError> {
415        self.operations.search_ivfflat_index(index_name, query_vector, k)
416    }
417
418    /// Get all IVFFlat indexes for a specific table
419    pub fn get_ivfflat_indexes_for_table(
420        &self,
421        table_name: &str,
422    ) -> Vec<(&super::indexes::IndexMetadata, &super::indexes::ivfflat::IVFFlatIndex)> {
423        self.operations.get_ivfflat_indexes_for_table(table_name)
424    }
425
426    /// Set the number of probes for an IVFFlat index
427    pub fn set_ivfflat_probes(
428        &mut self,
429        index_name: &str,
430        probes: usize,
431    ) -> Result<(), StorageError> {
432        self.operations.set_ivfflat_probes(index_name, probes)
433    }
434
435    // ============================================================================
436    // HNSW Index Methods
437    // ============================================================================
438
439    /// Create an HNSW index for approximate nearest neighbor search on vector columns
440    ///
441    /// This method creates an HNSW (Hierarchical Navigable Small World) index
442    /// for efficient approximate nearest neighbor search on vector data.
443    ///
444    /// # Arguments
445    /// * `index_name` - Name for the new index
446    /// * `table_name` - Name of the table containing the vector column
447    /// * `column_name` - Name of the vector column to index
448    /// * `col_idx` - Column index in the table schema
449    /// * `dimensions` - Number of dimensions in the vectors
450    /// * `m` - Maximum number of connections per node (default 16)
451    /// * `ef_construction` - Size of dynamic candidate list during construction (default 64)
452    /// * `metric` - Distance metric to use (L2, Cosine, InnerProduct)
453    #[allow(clippy::too_many_arguments)]
454    pub fn create_hnsw_index(
455        &mut self,
456        index_name: String,
457        table_name: String,
458        column_name: String,
459        col_idx: usize,
460        dimensions: usize,
461        m: u32,
462        ef_construction: u32,
463        metric: vibesql_ast::VectorDistanceMetric,
464    ) -> Result<(), StorageError> {
465        self.operations.create_hnsw_index(
466            &self.catalog,
467            &self.tables,
468            index_name,
469            table_name,
470            column_name,
471            col_idx,
472            dimensions,
473            m,
474            ef_construction,
475            metric,
476        )
477    }
478
479    /// Search an HNSW index for approximate nearest neighbors
480    ///
481    /// # Arguments
482    /// * `index_name` - Name of the HNSW index
483    /// * `query_vector` - The query vector (f64)
484    /// * `k` - Maximum number of nearest neighbors to return
485    ///
486    /// # Returns
487    /// * `Ok(Vec<(usize, f64)>)` - Vector of (row_id, distance) pairs, ordered by distance
488    /// * `Err(StorageError)` - If index not found or not an HNSW index
489    pub fn search_hnsw_index(
490        &self,
491        index_name: &str,
492        query_vector: &[f64],
493        k: usize,
494    ) -> Result<Vec<(usize, f64)>, StorageError> {
495        self.operations.search_hnsw_index(index_name, query_vector, k)
496    }
497
498    /// Get all HNSW indexes for a specific table
499    pub fn get_hnsw_indexes_for_table(
500        &self,
501        table_name: &str,
502    ) -> Vec<(&super::indexes::IndexMetadata, &super::indexes::hnsw::HnswIndex)> {
503        self.operations.get_hnsw_indexes_for_table(table_name)
504    }
505
506    /// Set the ef_search parameter for an HNSW index
507    pub fn set_hnsw_ef_search(
508        &mut self,
509        index_name: &str,
510        ef_search: usize,
511    ) -> Result<(), StorageError> {
512        self.operations.set_hnsw_ef_search(index_name, ef_search)
513    }
514
515    /// Check if a spatial index exists
516    pub fn spatial_index_exists(&self, index_name: &str) -> bool {
517        self.operations.spatial_index_exists(index_name)
518    }
519
520    /// Get spatial index metadata
521    pub fn get_spatial_index_metadata(&self, index_name: &str) -> Option<&SpatialIndexMetadata> {
522        self.operations.get_spatial_index_metadata(index_name)
523    }
524
525    /// Get spatial index (immutable)
526    pub fn get_spatial_index(&self, index_name: &str) -> Option<&crate::index::SpatialIndex> {
527        self.operations.get_spatial_index(index_name)
528    }
529
530    /// Get spatial index (mutable)
531    pub fn get_spatial_index_mut(
532        &mut self,
533        index_name: &str,
534    ) -> Option<&mut crate::index::SpatialIndex> {
535        self.operations.get_spatial_index_mut(index_name)
536    }
537
538    /// Get all spatial indexes for a specific table
539    pub fn get_spatial_indexes_for_table(
540        &self,
541        table_name: &str,
542    ) -> Vec<(&SpatialIndexMetadata, &crate::index::SpatialIndex)> {
543        self.operations.get_spatial_indexes_for_table(table_name)
544    }
545
546    /// Get all spatial indexes for a specific table (mutable)
547    pub fn get_spatial_indexes_for_table_mut(
548        &mut self,
549        table_name: &str,
550    ) -> Vec<(&SpatialIndexMetadata, &mut crate::index::SpatialIndex)> {
551        self.operations.get_spatial_indexes_for_table_mut(table_name)
552    }
553
554    /// Drop a spatial index
555    pub fn drop_spatial_index(&mut self, index_name: &str) -> Result<(), StorageError> {
556        self.operations.drop_spatial_index(index_name)
557    }
558
559    /// Drop all spatial indexes associated with a table (CASCADE behavior)
560    pub fn drop_spatial_indexes_for_table(&mut self, table_name: &str) -> Vec<String> {
561        self.operations.drop_spatial_indexes_for_table(table_name)
562    }
563
564    /// List all spatial indexes
565    pub fn list_spatial_indexes(&self) -> Vec<String> {
566        self.operations.list_spatial_indexes()
567    }
568
569    // ============================================================================
570    // Direct Index Lookup API (High-Performance OLTP)
571    // ============================================================================
572
573    /// Look up rows by index name and key values - bypasses SQL parsing for maximum performance
574    ///
575    /// This method provides direct B+ tree index lookups, completely bypassing SQL parsing
576    /// and the query execution pipeline. Use this for performance-critical OLTP workloads
577    /// where you know the exact index and key values.
578    ///
579    /// # Arguments
580    /// * `index_name` - Name of the index (as created with CREATE INDEX)
581    /// * `key_values` - Key values to look up (must match index column order)
582    ///
583    /// # Returns
584    /// * `Ok(Some(Vec<&Row>))` - The rows matching the key
585    /// * `Ok(None)` - No rows match the key
586    /// * `Err(StorageError)` - Index not found or other error
587    ///
588    /// # Performance
589    /// This is ~100-300x faster than executing a SQL SELECT query because it:
590    /// - Skips SQL parsing (~300µs saved)
591    /// - Skips query planning and optimization
592    /// - Uses direct B+ tree lookup on the index
593    ///
594    /// # Example
595    /// ```text
596    /// // Single-column index lookup
597    /// let rows = db.lookup_by_index("idx_users_pk", &[SqlValue::Integer(42)])?;
598    ///
599    /// // Composite key lookup
600    /// let rows = db.lookup_by_index("idx_orders_pk", &[
601    ///     SqlValue::Integer(warehouse_id),
602    ///     SqlValue::Integer(district_id),
603    ///     SqlValue::Integer(order_id),
604    /// ])?;
605    /// ```
606    pub fn lookup_by_index(
607        &self,
608        index_name: &str,
609        key_values: &[vibesql_types::SqlValue],
610    ) -> Result<Option<Vec<&Row>>, StorageError> {
611        // Get index metadata to find the table
612        let metadata = self
613            .get_index(index_name)
614            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
615        let table_name = metadata.table_name.clone();
616
617        // Get the index data
618        let index_data = self
619            .get_index_data(index_name)
620            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
621
622        // Perform the lookup
623        let row_indices = match index_data.get(key_values) {
624            Some(indices) => indices,
625            None => return Ok(None),
626        };
627
628        // Get the table
629        let table = self
630            .get_table(&table_name)
631            .ok_or_else(|| StorageError::TableNotFound(table_name.clone()))?;
632
633        // Collect the rows (use get_row() to skip deleted rows)
634        let mut result = Vec::with_capacity(row_indices.len());
635        for &idx in &row_indices {
636            // Use get_row() which returns None for deleted rows
637            if let Some(row) = table.get_row(idx) {
638                result.push(row);
639            }
640        }
641
642        if result.is_empty() {
643            Ok(None)
644        } else {
645            Ok(Some(result))
646        }
647    }
648
649    /// Look up the first row by index - optimized for unique indexes
650    ///
651    /// This is a convenience method for unique indexes where you expect exactly one row.
652    /// Returns only the first matching row.
653    ///
654    /// # Arguments
655    /// * `index_name` - Name of the index
656    /// * `key_values` - Key values to look up
657    ///
658    /// # Returns
659    /// * `Ok(Some(&Row))` - The first matching row
660    /// * `Ok(None)` - No row matches the key
661    /// * `Err(StorageError)` - Index not found or other error
662    pub fn lookup_one_by_index(
663        &self,
664        index_name: &str,
665        key_values: &[vibesql_types::SqlValue],
666    ) -> Result<Option<&Row>, StorageError> {
667        // Get index metadata to find the table
668        let metadata = self
669            .get_index(index_name)
670            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
671        let table_name = metadata.table_name.clone();
672
673        // Get the index data
674        let index_data = self
675            .get_index_data(index_name)
676            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
677
678        // Perform the lookup
679        let row_indices = match index_data.get(key_values) {
680            Some(indices) => indices,
681            None => return Ok(None),
682        };
683
684        // Get the first row index
685        let first_idx = match row_indices.first() {
686            Some(&idx) => idx,
687            None => return Ok(None),
688        };
689
690        // Get the table and return the row using O(1) direct access
691        let table = self
692            .get_table(&table_name)
693            .ok_or_else(|| StorageError::TableNotFound(table_name.clone()))?;
694
695        Ok(table.get_row(first_idx))
696    }
697
698    /// Batch lookup by index - look up multiple keys in a single call
699    ///
700    /// This method is optimized for batch point lookups where you need to retrieve
701    /// multiple rows by their index keys. It's more efficient than calling
702    /// `lookup_by_index` in a loop.
703    ///
704    /// # Arguments
705    /// * `index_name` - Name of the index
706    /// * `keys` - List of key value tuples to look up
707    ///
708    /// # Returns
709    /// * `Ok(Vec<Option<Vec<&Row>>>)` - For each key, the matching rows (or None if not found)
710    /// * `Err(StorageError)` - Index not found or other error
711    ///
712    /// # Example
713    /// ```text
714    /// // Batch lookup multiple items
715    /// let results = db.lookup_by_index_batch("idx_items_pk", &[
716    ///     vec![SqlValue::Integer(1)],
717    ///     vec![SqlValue::Integer(2)],
718    ///     vec![SqlValue::Integer(3)],
719    /// ])?;
720    ///
721    /// for (key_idx, rows) in results.iter().enumerate() {
722    ///     if let Some(rows) = rows {
723    ///         println!("Key {} matched {} rows", key_idx, rows.len());
724    ///     }
725    /// }
726    /// ```
727    pub fn lookup_by_index_batch<'a>(
728        &'a self,
729        index_name: &str,
730        keys: &[Vec<vibesql_types::SqlValue>],
731    ) -> Result<Vec<Option<Vec<&'a Row>>>, StorageError> {
732        // Get index metadata to find the table
733        let metadata = self
734            .get_index(index_name)
735            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
736        let table_name = metadata.table_name.clone();
737
738        // Get the index data
739        let index_data = self
740            .get_index_data(index_name)
741            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
742
743        // Get the table once for O(1) row access
744        let table = self
745            .get_table(&table_name)
746            .ok_or_else(|| StorageError::TableNotFound(table_name.clone()))?;
747
748        // Look up each key using direct row access
749        let mut results = Vec::with_capacity(keys.len());
750        for key in keys {
751            let row_indices = index_data.get(key);
752            match row_indices {
753                Some(indices) if !indices.is_empty() => {
754                    let matched_rows: Vec<_> =
755                        indices.iter().filter_map(|&idx| table.get_row(idx)).collect();
756                    if matched_rows.is_empty() {
757                        results.push(None);
758                    } else {
759                        results.push(Some(matched_rows));
760                    }
761                }
762                _ => results.push(None),
763            }
764        }
765
766        Ok(results)
767    }
768
769    /// Batch lookup returning first row only - optimized for unique indexes
770    ///
771    /// Like `lookup_by_index_batch` but returns only the first matching row for each key.
772    /// More efficient when you know the index is unique.
773    ///
774    /// # Arguments
775    /// * `index_name` - Name of the index
776    /// * `keys` - List of key value tuples to look up
777    ///
778    /// # Returns
779    /// * `Ok(Vec<Option<&Row>>)` - For each key, the first matching row (or None)
780    pub fn lookup_one_by_index_batch<'a>(
781        &'a self,
782        index_name: &str,
783        keys: &[Vec<vibesql_types::SqlValue>],
784    ) -> Result<Vec<Option<&'a Row>>, StorageError> {
785        // Get index metadata to find the table
786        let metadata = self
787            .get_index(index_name)
788            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
789        let table_name = metadata.table_name.clone();
790
791        // Get the index data
792        let index_data = self
793            .get_index_data(index_name)
794            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
795
796        // Get the table once for O(1) row access
797        let table = self
798            .get_table(&table_name)
799            .ok_or_else(|| StorageError::TableNotFound(table_name.clone()))?;
800
801        // Look up each key using direct row access
802        let mut results = Vec::with_capacity(keys.len());
803        for key in keys {
804            let row_indices = index_data.get(key);
805            match row_indices {
806                Some(indices) if !indices.is_empty() => {
807                    results.push(table.get_row(indices[0]));
808                }
809                _ => results.push(None),
810            }
811        }
812
813        Ok(results)
814    }
815
816    // ============================================================================
817    // Prefix Index Lookup API (Multi-column indexes)
818    // ============================================================================
819
820    /// Look up rows by index using prefix matching - for multi-column indexes
821    ///
822    /// This method performs prefix matching on multi-column indexes. For example,
823    /// with an index on (a, b, c), you can look up all rows where (a, b) match
824    /// a specific value, regardless of c.
825    ///
826    /// # Arguments
827    /// * `index_name` - Name of the index (as created with CREATE INDEX)
828    /// * `prefix` - Prefix key values to match (must be a prefix of index columns)
829    ///
830    /// # Returns
831    /// * `Ok(Vec<&Row>)` - The rows matching the prefix (empty if none found)
832    /// * `Err(StorageError)` - Index not found or other error
833    ///
834    /// # Performance
835    /// Uses efficient B+ tree range scan: O(log n + k) where n is total keys, k is matches.
836    ///
837    /// # Example
838    /// ```text
839    /// // Index on (warehouse_id, district_id, order_id) - 3 columns
840    /// // Find all orders for warehouse 1, district 5 (2-column prefix)
841    /// let rows = db.lookup_by_index_prefix("idx_orders_pk", &[
842    ///     SqlValue::Integer(1),  // warehouse_id
843    ///     SqlValue::Integer(5),  // district_id
844    /// ])?;
845    /// ```
846    pub fn lookup_by_index_prefix(
847        &self,
848        index_name: &str,
849        prefix: &[vibesql_types::SqlValue],
850    ) -> Result<Vec<&Row>, StorageError> {
851        // Get index metadata to find the table
852        let metadata = self
853            .get_index(index_name)
854            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
855        let table_name = metadata.table_name.clone();
856
857        // Get the index data
858        let index_data = self
859            .get_index_data(index_name)
860            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
861
862        // Perform the prefix scan
863        let row_indices = index_data.prefix_scan(prefix);
864        if row_indices.is_empty() {
865            return Ok(vec![]);
866        }
867
868        // Get the table
869        let table = self
870            .get_table(&table_name)
871            .ok_or_else(|| StorageError::TableNotFound(table_name.clone()))?;
872
873        // Collect the rows using O(1) direct access
874        let rows: Vec<_> = row_indices.iter().filter_map(|&idx| table.get_row(idx)).collect();
875
876        Ok(rows)
877    }
878
879    /// Batch prefix lookup - look up multiple prefixes in a single call
880    ///
881    /// This method is optimized for batch prefix lookups on multi-column indexes.
882    /// For each prefix, returns all rows where the key prefix matches.
883    ///
884    /// # Arguments
885    /// * `index_name` - Name of the index
886    /// * `prefixes` - List of prefix key tuples to look up
887    ///
888    /// # Returns
889    /// * `Ok(Vec<Vec<&Row>>)` - For each prefix, the matching rows (empty vec if none)
890    /// * `Err(StorageError)` - Index not found or other error
891    ///
892    /// # Example
893    /// ```text
894    /// // Index on (w_id, d_id, o_id) - find new orders for all 10 districts
895    /// let prefixes: Vec<Vec<SqlValue>> = (1..=10)
896    ///     .map(|d| vec![SqlValue::Integer(w_id), SqlValue::Integer(d)])
897    ///     .collect();
898    /// let results = db.lookup_by_index_prefix_batch("idx_new_order_pk", &prefixes)?;
899    /// // results[0] = rows for district 1, results[1] = rows for district 2, etc.
900    /// ```
901    pub fn lookup_by_index_prefix_batch<'a>(
902        &'a self,
903        index_name: &str,
904        prefixes: &[Vec<vibesql_types::SqlValue>],
905    ) -> Result<Vec<Vec<&'a Row>>, StorageError> {
906        // Get index metadata to find the table
907        let metadata = self
908            .get_index(index_name)
909            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
910        let table_name = metadata.table_name.clone();
911
912        // Get the index data
913        let index_data = self
914            .get_index_data(index_name)
915            .ok_or_else(|| StorageError::IndexNotFound(index_name.to_string()))?;
916
917        // Get the table once for O(1) row access
918        let table = self
919            .get_table(&table_name)
920            .ok_or_else(|| StorageError::TableNotFound(table_name.clone()))?;
921
922        // Look up each prefix and collect results
923        let mut results = Vec::with_capacity(prefixes.len());
924        for prefix in prefixes {
925            let row_indices = index_data.prefix_scan(prefix);
926            let matched_rows: Vec<_> =
927                row_indices.iter().filter_map(|&idx| table.get_row(idx)).collect();
928            results.push(matched_rows);
929        }
930
931        Ok(results)
932    }
933
934    // ============================================================================
935    // Fast Delete API (High-Performance OLTP)
936    // ============================================================================
937
938    /// Delete a single row by PK value - fast path that skips unnecessary overhead
939    ///
940    /// This method provides a highly optimized DELETE path for single-row PK deletes.
941    /// It bypasses the full DELETE executor overhead when:
942    /// - There are no triggers on the table
943    /// - There are no foreign key constraints referencing this table
944    /// - The WHERE clause is a simple PK equality (`id = ?`)
945    ///
946    /// # Arguments
947    /// * `table_name` - Name of the table
948    /// * `pk_values` - Primary key values to match
949    ///
950    /// # Returns
951    /// * `Ok(true)` - Row was deleted
952    /// * `Ok(false)` - No row found with this PK
953    /// * `Err(StorageError)` - Table not found or other error
954    ///
955    /// # Performance
956    /// This is ~2-3x faster than the full DELETE executor because it:
957    /// - Uses direct PK index lookup (O(1))
958    /// - Avoids cloning row data
959    /// - Skips ExpressionEvaluator creation
960    /// - Performs minimal index maintenance
961    ///
962    /// # Profiling
963    /// Set environment variables to enable profiling:
964    /// - `DELETE_PROFILE=1` - Enable timing collection and auto-print summary on thread exit
965    /// - `DELETE_PROFILE_VERBOSE=1` - Also print per-delete breakdown to stderr
966    ///
967    /// Use `print_delete_profile_summary()` to manually print aggregate stats.
968    /// Use `reset_delete_profile_stats()` to reset the stats before a benchmark.
969    ///
970    /// # Safety
971    /// Caller must ensure:
972    /// - No triggers exist on this table for DELETE
973    /// - No foreign key constraints reference this table
974    ///
975    /// Note: WAL logging is handled internally by this method.
976    ///
977    /// # Example
978    /// ```text
979    /// // Fast delete by PK
980    /// let deleted = db.delete_by_pk_fast("users", &[SqlValue::Integer(42)])?;
981    /// if deleted {
982    ///     println!("User 42 deleted");
983    /// }
984    /// ```
985    pub fn delete_by_pk_fast(
986        &mut self,
987        table_name: &str,
988        pk_values: &[vibesql_types::SqlValue],
989    ) -> Result<bool, StorageError> {
990        use std::time::Instant;
991
992        // Check if profiling is enabled
993        let profile = std::env::var("DELETE_PROFILE").is_ok();
994        let start = if profile { Some(Instant::now()) } else { None };
995        let mut phase_times: [u128; 6] = [0; 6]; // pk_lookup, value_clone, wal, index_update, row_remove, cache
996
997        // First, find the row index and clone only the values (not the full Row struct)
998        // This avoids double-cloning: previously we cloned the Row, then cloned its values for WAL.
999        // Now we clone values only once and use a reference for index updates.
1000        let (row_index, values) = {
1001            let phase_start = start.map(|_| Instant::now());
1002            let table = self
1003                .get_table(table_name)
1004                .ok_or_else(|| StorageError::TableNotFound(table_name.to_string()))?;
1005
1006            let row_index = match table.primary_key_index() {
1007                Some(pk_index) => match pk_index.get(pk_values) {
1008                    Some(&idx) => idx,
1009                    None => return Ok(false), // Row not found
1010                },
1011                None => return Err(StorageError::Other("Table has no primary key".to_string())),
1012            };
1013            if let Some(ps) = phase_start {
1014                phase_times[0] = ps.elapsed().as_nanos(); // pk_lookup
1015            }
1016
1017            // Get the row values - clone once for both WAL and index updates
1018            let phase_start = start.map(|_| Instant::now());
1019            let values = match table.get_row(row_index) {
1020                Some(r) => r.values.clone(),
1021                None => return Ok(false), // Row already deleted
1022            };
1023            if let Some(ps) = phase_start {
1024                phase_times[1] = ps.elapsed().as_nanos(); // value_clone
1025            }
1026
1027            (row_index, values)
1028        };
1029
1030        // Update user-defined indexes first (using reference to values)
1031        // This must happen before we move ownership of values to WAL
1032        let phase_start = start.map(|_| Instant::now());
1033        self.operations.update_indexes_for_delete_with_values(
1034            &self.catalog,
1035            table_name,
1036            &values,
1037            row_index,
1038        );
1039        if let Some(ps) = phase_start {
1040            phase_times[3] = ps.elapsed().as_nanos(); // index_update
1041        }
1042
1043        // Emit WAL entry before deleting (needed for crash recovery)
1044        // Only emit if persistence is enabled to avoid unnecessary work
1045        // Move ownership of values to avoid a second clone
1046        let phase_start = start.map(|_| Instant::now());
1047        if self.persistence_enabled() {
1048            self.emit_wal_delete(table_name, row_index as u64, values.to_vec());
1049        }
1050        if let Some(ps) = phase_start {
1051            phase_times[2] = ps.elapsed().as_nanos(); // wal
1052        }
1053
1054        // Now delete the row (this updates the internal PK hash index)
1055        let phase_start = start.map(|_| Instant::now());
1056        let table_mut = self
1057            .get_table_mut(table_name)
1058            .ok_or_else(|| StorageError::TableNotFound(table_name.to_string()))?;
1059
1060        let delete_result = table_mut.delete_by_indices(&[row_index]);
1061
1062        // If compaction occurred, rebuild user-defined indexes since row indices changed
1063        if delete_result.compacted {
1064            self.rebuild_indexes(table_name);
1065        }
1066        if let Some(ps) = phase_start {
1067            phase_times[4] = ps.elapsed().as_nanos(); // row_remove
1068        }
1069
1070        // Phase 5: Invalidate columnar cache
1071        let phase_start = start.map(|_| Instant::now());
1072        if delete_result.deleted_count > 0 {
1073            self.invalidate_columnar_cache(table_name);
1074        }
1075        if let Some(ps) = phase_start {
1076            phase_times[5] = ps.elapsed().as_nanos(); // cache
1077        }
1078
1079        // Record and optionally print profile summary
1080        if let Some(s) = start {
1081            let total = s.elapsed().as_nanos();
1082
1083            // Record to thread-local aggregate stats
1084            DELETE_PROFILE_STATS.with(|stats| {
1085                stats.borrow_mut().record(&phase_times, total);
1086            });
1087
1088            // Print per-delete output only if DELETE_PROFILE_VERBOSE is set
1089            if std::env::var("DELETE_PROFILE_VERBOSE").is_ok() {
1090                let total_us = total as f64 / 1000.0;
1091                eprintln!(
1092                    "DELETE_PROFILE: total={:.1}µs | pk_lookup={:.1}µs ({:.0}%) | value_clone={:.1}µs ({:.0}%) | wal={:.1}µs ({:.0}%) | index_update={:.1}µs ({:.0}%) | row_remove={:.1}µs ({:.0}%) | cache={:.1}µs ({:.0}%)",
1093                    total_us,
1094                    phase_times[0] as f64 / 1000.0,
1095                    if total > 0 { phase_times[0] as f64 / total as f64 * 100.0 } else { 0.0 },
1096                    phase_times[1] as f64 / 1000.0,
1097                    if total > 0 { phase_times[1] as f64 / total as f64 * 100.0 } else { 0.0 },
1098                    phase_times[2] as f64 / 1000.0,
1099                    if total > 0 { phase_times[2] as f64 / total as f64 * 100.0 } else { 0.0 },
1100                    phase_times[3] as f64 / 1000.0,
1101                    if total > 0 { phase_times[3] as f64 / total as f64 * 100.0 } else { 0.0 },
1102                    phase_times[4] as f64 / 1000.0,
1103                    if total > 0 { phase_times[4] as f64 / total as f64 * 100.0 } else { 0.0 },
1104                    phase_times[5] as f64 / 1000.0,
1105                    if total > 0 { phase_times[5] as f64 / total as f64 * 100.0 } else { 0.0 },
1106                );
1107            }
1108        }
1109
1110        Ok(delete_result.deleted_count > 0)
1111    }
1112
1113    // ============================================================================
1114    // Table Index Info for DML Cost Estimation
1115    // ============================================================================
1116
1117    /// Get table index information for DML cost estimation
1118    ///
1119    /// This method collects all the metadata needed by `CostEstimator::estimate_insert()`,
1120    /// `estimate_update()`, and `estimate_delete()` to compute accurate DML operation costs.
1121    ///
1122    /// # Arguments
1123    /// * `table_name` - Name of the table to get index info for
1124    ///
1125    /// # Returns
1126    /// * `Some(TableIndexInfo)` - Index information if table exists
1127    /// * `None` - If table doesn't exist
1128    ///
1129    /// # Example
1130    /// ```text
1131    /// let info = db.get_table_index_info("users")?;
1132    /// let insert_cost = cost_estimator.estimate_insert(&info);
1133    /// ```
1134    pub fn get_table_index_info(
1135        &self,
1136        table_name: &str,
1137    ) -> Option<crate::statistics::TableIndexInfo> {
1138        // Get the table
1139        let table = self.get_table(table_name)?;
1140
1141        // Count hash indexes: 1 for PK (if exists) + 1 per unique constraint
1142        let has_primary_key = table.schema.primary_key.is_some();
1143        let unique_constraint_count = table.schema.unique_constraints.len();
1144        let hash_index_count = if has_primary_key { 1 } else { 0 } + unique_constraint_count;
1145
1146        // Count B-tree indexes (user-defined indexes managed at Database level)
1147        let btree_index_count = self.list_indexes_for_table(table_name).len();
1148
1149        // Calculate deleted ratio
1150        let total_rows = table.physical_row_count();
1151        let deleted_ratio =
1152            if total_rows > 0 { table.deleted_count() as f64 / total_rows as f64 } else { 0.0 };
1153
1154        // Check if table uses native columnar storage
1155        let is_native_columnar = table.is_native_columnar();
1156
1157        // Get average row size: prefer actual statistics over schema-based heuristics
1158        //
1159        // When statistics are available (from ANALYZE), use the actual avg_row_bytes
1160        // which accounts for real string fill ratios, NULL prevalence, and actual
1161        // BLOB sizes. This provides more accurate WAL cost estimation.
1162        //
1163        // Fall back to schema-based estimation when no statistics are available.
1164        // See issue #3980 for details.
1165        let avg_row_size = table
1166            .get_statistics()
1167            .and_then(|stats| stats.avg_row_bytes)
1168            .map(|bytes| bytes as usize)
1169            .unwrap_or_else(|| {
1170                // Fall back to schema-based estimation
1171                let column_types: Vec<_> =
1172                    table.schema.columns.iter().map(|col| col.data_type.clone()).collect();
1173                crate::statistics::estimate_row_size(&column_types)
1174            });
1175
1176        Some(crate::statistics::TableIndexInfo::new(
1177            hash_index_count,
1178            btree_index_count,
1179            is_native_columnar,
1180            deleted_ratio,
1181            avg_row_size,
1182        ))
1183    }
1184}
1185
1186#[cfg(test)]
1187mod tests {
1188    use vibesql_catalog::{ColumnSchema, TableSchema};
1189    use vibesql_types::{DataType, SqlValue};
1190
1191    use super::*;
1192
1193    #[test]
1194    fn test_get_table_index_info_basic() {
1195        let mut db = Database::new();
1196
1197        // Create a table with primary key and one unique constraint
1198        let schema = TableSchema::with_all_constraints(
1199            "users".to_string(),
1200            vec![
1201                ColumnSchema::new("id".to_string(), DataType::Integer, false),
1202                ColumnSchema::new(
1203                    "email".to_string(),
1204                    DataType::Varchar { max_length: Some(100) },
1205                    false,
1206                ),
1207                ColumnSchema::new(
1208                    "name".to_string(),
1209                    DataType::Varchar { max_length: Some(100) },
1210                    true,
1211                ),
1212            ],
1213            Some(vec!["id".to_string()]),
1214            vec![vec!["email".to_string()]],
1215        );
1216        db.create_table(schema).unwrap();
1217
1218        // Get table index info
1219        let info = db.get_table_index_info("users").unwrap();
1220
1221        // Should have 2 hash indexes: 1 PK + 1 unique constraint
1222        assert_eq!(info.hash_index_count, 2);
1223        // No B-tree indexes yet
1224        assert_eq!(info.btree_index_count, 0);
1225        // Not native columnar
1226        assert!(!info.is_native_columnar);
1227        // No deleted rows
1228        assert_eq!(info.deleted_ratio, 0.0);
1229    }
1230
1231    #[test]
1232    fn test_get_table_index_info_with_btree_index() {
1233        use vibesql_ast::IndexColumn;
1234
1235        let mut db = Database::new();
1236
1237        // Create a table with primary key
1238        let schema = TableSchema::with_primary_key(
1239            "products".to_string(),
1240            vec![
1241                ColumnSchema::new("id".to_string(), DataType::Integer, false),
1242                ColumnSchema::new(
1243                    "name".to_string(),
1244                    DataType::Varchar { max_length: Some(100) },
1245                    false,
1246                ),
1247                ColumnSchema::new(
1248                    "price".to_string(),
1249                    DataType::Decimal { precision: 10, scale: 2 },
1250                    false,
1251                ),
1252            ],
1253            vec!["id".to_string()],
1254        );
1255        db.create_table(schema).unwrap();
1256
1257        // Create a B-tree index on price
1258        db.create_index(
1259            "idx_products_price".to_string(),
1260            "products".to_string(),
1261            false,
1262            vec![IndexColumn::Column {
1263                column_name: "price".to_string(),
1264                direction: vibesql_ast::OrderDirection::Asc,
1265                prefix_length: None,
1266            }],
1267        )
1268        .unwrap();
1269
1270        // Get table index info
1271        let info = db.get_table_index_info("products").unwrap();
1272
1273        // Should have 1 hash index (PK)
1274        assert_eq!(info.hash_index_count, 1);
1275        // Should have 1 B-tree index
1276        assert_eq!(info.btree_index_count, 1);
1277    }
1278
1279    #[test]
1280    fn test_get_table_index_info_with_deleted_rows() {
1281        let mut db = Database::new();
1282
1283        // Create a table with primary key
1284        let schema = TableSchema::with_primary_key(
1285            "items".to_string(),
1286            vec![
1287                ColumnSchema::new("id".to_string(), DataType::Integer, false),
1288                ColumnSchema::new(
1289                    "name".to_string(),
1290                    DataType::Varchar { max_length: Some(100) },
1291                    false,
1292                ),
1293            ],
1294            vec!["id".to_string()],
1295        );
1296        db.create_table(schema).unwrap();
1297
1298        // Insert 10 rows
1299        for i in 0..10 {
1300            let row = Row::new(vec![
1301                SqlValue::Integer(i),
1302                SqlValue::Varchar(format!("Item {}", i).into()),
1303            ]);
1304            db.insert_row("items", row).unwrap();
1305        }
1306
1307        // Get initial info - no deleted rows
1308        let info = db.get_table_index_info("items").unwrap();
1309        assert_eq!(info.deleted_ratio, 0.0);
1310
1311        // Delete 3 rows (30% deletion)
1312        db.delete_by_pk_fast("items", &[SqlValue::Integer(0)]).unwrap();
1313        db.delete_by_pk_fast("items", &[SqlValue::Integer(1)]).unwrap();
1314        db.delete_by_pk_fast("items", &[SqlValue::Integer(2)]).unwrap();
1315
1316        // Get updated info - should show deleted ratio
1317        let info = db.get_table_index_info("items").unwrap();
1318        // 3 deleted out of 10 = 0.3
1319        assert!((info.deleted_ratio - 0.3).abs() < 0.01);
1320    }
1321
1322    #[test]
1323    fn test_get_table_index_info_nonexistent_table() {
1324        let db = Database::new();
1325
1326        // Should return None for nonexistent table
1327        let info = db.get_table_index_info("nonexistent");
1328        assert!(info.is_none());
1329    }
1330
1331    #[test]
1332    fn test_get_table_index_info_no_primary_key() {
1333        let mut db = Database::new();
1334
1335        // Create a table without primary key
1336        let schema = TableSchema::new(
1337            "logs".to_string(),
1338            vec![
1339                ColumnSchema::new(
1340                    "message".to_string(),
1341                    DataType::Varchar { max_length: Some(500) },
1342                    false,
1343                ),
1344                ColumnSchema::new("level".to_string(), DataType::Integer, false),
1345            ],
1346        );
1347        db.create_table(schema).unwrap();
1348
1349        // Get table index info
1350        let info = db.get_table_index_info("logs").unwrap();
1351
1352        // Should have 0 hash indexes (no PK, no unique constraints)
1353        assert_eq!(info.hash_index_count, 0);
1354        // No B-tree indexes
1355        assert_eq!(info.btree_index_count, 0);
1356    }
1357
1358    #[test]
1359    fn test_get_table_index_info_multiple_unique_constraints() {
1360        let mut db = Database::new();
1361
1362        // Create a table with PK and multiple unique constraints
1363        let schema = TableSchema::with_all_constraints(
1364            "accounts".to_string(),
1365            vec![
1366                ColumnSchema::new("id".to_string(), DataType::Integer, false),
1367                ColumnSchema::new(
1368                    "email".to_string(),
1369                    DataType::Varchar { max_length: Some(100) },
1370                    false,
1371                ),
1372                ColumnSchema::new(
1373                    "username".to_string(),
1374                    DataType::Varchar { max_length: Some(50) },
1375                    false,
1376                ),
1377                ColumnSchema::new(
1378                    "phone".to_string(),
1379                    DataType::Varchar { max_length: Some(20) },
1380                    true,
1381                ),
1382            ],
1383            Some(vec!["id".to_string()]),
1384            vec![vec!["email".to_string()], vec!["username".to_string()]],
1385        );
1386        db.create_table(schema).unwrap();
1387
1388        // Get table index info
1389        let info = db.get_table_index_info("accounts").unwrap();
1390
1391        // Should have 3 hash indexes: 1 PK + 2 unique constraints
1392        assert_eq!(info.hash_index_count, 3);
1393    }
1394
1395    // ============================================================================
1396    // avg_row_size Statistics Tests (Issue #3980)
1397    // ============================================================================
1398
1399    #[test]
1400    fn test_get_table_index_info_uses_schema_estimate_without_stats() {
1401        let mut db = Database::new();
1402
1403        // Create a table with VARCHAR columns
1404        let schema = TableSchema::with_primary_key(
1405            "items".to_string(),
1406            vec![
1407                ColumnSchema::new("id".to_string(), DataType::Integer, false),
1408                ColumnSchema::new(
1409                    "name".to_string(),
1410                    DataType::Varchar { max_length: Some(100) },
1411                    false,
1412                ),
1413            ],
1414            vec!["id".to_string()],
1415        );
1416        db.create_table(schema).unwrap();
1417
1418        // Without ANALYZE, should use schema-based estimation
1419        let info = db.get_table_index_info("items").unwrap();
1420
1421        // avg_row_size should be computed from schema heuristics
1422        // INTEGER (4) + VARCHAR(100) estimate (32) + overhead (8) = 44, min 64
1423        assert!(
1424            info.avg_row_size >= 64,
1425            "Schema-based avg_row_size should be at least base size: {}",
1426            info.avg_row_size
1427        );
1428    }
1429
1430    #[test]
1431    fn test_get_table_index_info_prefers_actual_statistics() {
1432        let mut db = Database::new();
1433
1434        // Create a table with VARCHAR columns
1435        let schema = TableSchema::with_primary_key(
1436            "items".to_string(),
1437            vec![
1438                ColumnSchema::new("id".to_string(), DataType::Integer, false),
1439                ColumnSchema::new(
1440                    "description".to_string(),
1441                    DataType::Varchar { max_length: Some(1000) },
1442                    false,
1443                ),
1444            ],
1445            vec!["id".to_string()],
1446        );
1447        db.create_table(schema).unwrap();
1448
1449        // Insert rows with LONG strings (filling the VARCHAR)
1450        for i in 0..10 {
1451            let long_description = "x".repeat(800); // Much longer than schema heuristic
1452            let row =
1453                Row::new(vec![SqlValue::Integer(i), SqlValue::Varchar(long_description.into())]);
1454            db.insert_row("items", row).unwrap();
1455        }
1456
1457        // Get info WITHOUT statistics - uses schema heuristic
1458        let info_without_stats = db.get_table_index_info("items").unwrap();
1459        let schema_estimate = info_without_stats.avg_row_size;
1460
1461        // Run ANALYZE to compute actual statistics
1462        db.get_table_mut("items").unwrap().analyze();
1463
1464        // Get info WITH statistics - should prefer actual avg_row_bytes
1465        let info_with_stats = db.get_table_index_info("items").unwrap();
1466        let actual_estimate = info_with_stats.avg_row_size;
1467
1468        // Actual statistics should show larger row size due to long strings
1469        // Schema heuristic: VARCHAR(1000) → min(500, 32) = 32 bytes
1470        // Actual data: 800 bytes per string
1471        assert!(
1472            actual_estimate > schema_estimate,
1473            "Actual statistics ({}) should show larger row size than schema heuristic ({}) for long strings",
1474            actual_estimate,
1475            schema_estimate
1476        );
1477    }
1478
1479    #[test]
1480    fn test_get_table_index_info_statistics_vs_schema_short_strings() {
1481        let mut db = Database::new();
1482
1483        // Create a table with VARCHAR columns
1484        let schema = TableSchema::with_primary_key(
1485            "items".to_string(),
1486            vec![
1487                ColumnSchema::new("id".to_string(), DataType::Integer, false),
1488                ColumnSchema::new(
1489                    "code".to_string(),
1490                    DataType::Varchar { max_length: Some(100) },
1491                    false,
1492                ),
1493            ],
1494            vec!["id".to_string()],
1495        );
1496        db.create_table(schema).unwrap();
1497
1498        // Insert rows with SHORT strings (much shorter than heuristic)
1499        for i in 0..10 {
1500            let short_code = format!("A{}", i); // 2-3 chars, much shorter than 32-byte heuristic
1501            let row = Row::new(vec![SqlValue::Integer(i), SqlValue::Varchar(short_code.into())]);
1502            db.insert_row("items", row).unwrap();
1503        }
1504
1505        // Run ANALYZE to compute actual statistics
1506        db.get_table_mut("items").unwrap().analyze();
1507
1508        // Get info WITH statistics
1509        let info = db.get_table_index_info("items").unwrap();
1510
1511        // Should have valid avg_row_size (from actual statistics)
1512        assert!(info.avg_row_size > 0, "avg_row_size should be positive: {}", info.avg_row_size);
1513    }
1514}
vibesql_storage/database/index_ops.rs

vibesql_storage/database/
index_ops.rs