vibesql_storage/table/
mod.rs

1// ============================================================================
2// Table - In-Memory Storage Layer
3// ============================================================================
4//
5// This module provides the core Table abstraction for in-memory row storage.
6// The table implementation follows a delegation pattern, where specialized
7// components handle distinct concerns:
8//
9// ## Architecture
10//
11// ```
12// Table (Orchestration Layer)
13//   ├─> IndexManager        - Hash-based indexing for PK/UNIQUE constraints
14//   ├─> RowNormalizer       - Value normalization and validation
15//   └─> AppendModeTracker   - Sequential insert detection for optimization
16// ```
17//
18// ### Component Responsibilities
19//
20// **IndexManager** (`indexes.rs`):
21// - Maintains hash indexes for primary key and unique constraints
22// - Provides O(1) lookups for duplicate detection
23// - Handles index updates on INSERT/UPDATE/DELETE
24// - Supports selective index maintenance for performance
25//
26// **RowNormalizer** (`normalization.rs`):
27// - CHAR padding/truncation to fixed length
28// - Type validation (ensures values match column types)
29// - NULL constraint validation
30// - Column count verification
31//
32// **AppendModeTracker** (`append_mode.rs`):
33// - Detects sequential primary key insertion patterns
34// - Enables executor-level optimizations when sequential inserts detected
35// - Maintains O(1) tracking overhead
36// - Activates after threshold of consecutive sequential inserts
37//
38// ### Design Principles
39//
40// 1. **Separation of Concerns**: Each component handles one specific responsibility
41// 2. **Delegation Pattern**: Table orchestrates, components execute
42// 3. **Performance First**: Optimizations built into architecture (append mode, selective updates)
43// 4. **Clean API**: Public interface remains simple despite internal complexity
44//
45// ### Refactoring History
46//
47// This module structure is the result of a systematic refactoring effort (#842)
48// that extracted specialized components from a monolithic table.rs file:
49//
50// - **Phase 1** (PR #853): IndexManager extraction
51// - **Phase 3** (PR #856): RowNormalizer extraction
52// - **Phase 4** (PR #858): AppendModeTracker extraction
53// - **Phase 5** (PR #859): Documentation and finalization
54//
55// Note: Phase 2 (Constraint Validation) was closed as invalid - constraint
56// validation properly belongs in the executor layer, not the storage layer.
57
58mod append_mode;
59mod indexes;
60mod normalization;
61
62use append_mode::AppendModeTracker;
63use indexes::IndexManager;
64use normalization::RowNormalizer;
65use vibesql_types::SqlValue;
66
67use crate::{Row, StorageError};
68
69/// Result of a delete operation, indicating how many rows were deleted
70/// and whether table compaction occurred.
71///
72/// # Important
73///
74/// When `compacted` is true, all row indices in the table have changed.
75/// User-defined indexes (B-tree indexes managed at the Database level)
76/// must be rebuilt after compaction to maintain correctness.
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub struct DeleteResult {
79    /// Number of rows that were deleted
80    pub deleted_count: usize,
81    /// Whether table compaction occurred (row indices changed)
82    pub compacted: bool,
83}
84
85impl DeleteResult {
86    /// Create a new DeleteResult
87    pub fn new(deleted_count: usize, compacted: bool) -> Self {
88        Self { deleted_count, compacted }
89    }
90}
91
92/// In-memory table - stores rows with optimized indexing and validation
93///
94/// # Architecture
95///
96/// The `Table` struct acts as an orchestration layer, delegating specialized
97/// operations to dedicated components:
98///
99/// - **Row Storage**: Direct Vec storage for sequential access (table scans)
100/// - **Deletion Bitmap**: O(1) deletion via bitmap marking instead of Vec::remove()
101/// - **Columnar Storage**: Native columnar storage for OLAP-optimized tables
102/// - **Indexing**: `IndexManager` maintains hash indexes for constraint checks
103/// - **Normalization**: `RowNormalizer` handles value transformation and validation
104/// - **Optimization**: Append mode tracking for sequential insert performance
105///
106/// # Storage Formats
107///
108/// Tables support two storage formats:
109/// - **Row-oriented (default)**: Traditional row storage, optimized for OLTP
110/// - **Columnar**: Native column storage, optimized for OLAP with zero conversion overhead
111///
112/// ## Columnar Storage Limitations
113///
114/// **IMPORTANT**: Columnar tables are optimized for read-heavy analytical workloads.
115/// Each INSERT/UPDATE/DELETE operation triggers a full rebuild of the columnar
116/// representation (O(n) cost). This makes columnar tables unsuitable for:
117/// - High-frequency INSERT workloads
118/// - OLTP use cases with frequent writes
119/// - Streaming inserts
120///
121/// **Recommended use cases for columnar tables**:
122/// - Bulk-loaded analytical data (load once, query many times)
123/// - Reporting tables with infrequent updates
124/// - Data warehouse fact tables
125///
126/// For mixed workloads, use row-oriented storage with the columnar cache
127/// (via `scan_columnar()`), which provides SIMD acceleration with caching.
128///
129/// # Performance Characteristics
130///
131/// - **INSERT**: O(1) amortized for row append + O(1) for index updates
132/// - **UPDATE**: O(1) for row update + O(k) for k affected indexes (selective mode)
133/// - **DELETE**: O(1) per row via bitmap marking (amortized O(n) for compaction)
134/// - **SCAN**: O(n) direct vector iteration (skipping deleted rows)
135/// - **COLUMNAR SCAN**: O(n) with SIMD acceleration (no conversion overhead for native columnar)
136/// - **PK/UNIQUE lookup**: O(1) via hash indexes
137///
138/// # Example
139///
140/// ```text
141/// use vibesql_catalog::TableSchema;
142/// use vibesql_storage::Table;
143///
144/// let schema = TableSchema::new("users", columns);
145/// let mut table = Table::new(schema);
146///
147/// // Insert automatically validates and indexes
148/// table.insert(row)?;
149///
150/// // Scan returns all rows
151/// for row in table.scan() {
152///     // Process row...
153/// }
154/// ```
155#[derive(Debug)]
156pub struct Table {
157    /// Table schema defining structure and constraints
158    pub schema: vibesql_catalog::TableSchema,
159
160    /// Row storage - direct vector for sequential access (row-oriented tables only)
161    rows: Vec<Row>,
162
163    /// Deletion bitmap - tracks which rows are logically deleted
164    /// Uses O(1) bit operations instead of O(n) Vec::remove()
165    /// Compaction occurs when deleted_count > rows.len() / 2
166    deleted: Vec<bool>,
167
168    /// Count of deleted rows (cached to avoid counting bits)
169    deleted_count: usize,
170
171    /// Native columnar storage - primary storage for columnar tables
172    /// For columnar tables, this is the authoritative data source
173    /// For row tables, this is None (use Database::get_columnar() for cached columnar data)
174    native_columnar: Option<crate::ColumnarTable>,
175
176    /// Hash indexes for constraint validation (managed by IndexManager)
177    /// Provides O(1) lookups for primary key and unique constraints
178    indexes: IndexManager,
179
180    /// Append mode optimization tracking (managed by AppendModeTracker)
181    /// Detects sequential primary key inserts for executor-level optimizations
182    append_tracker: AppendModeTracker,
183
184    /// Cached statistics for query optimization (computed lazily)
185    statistics: Option<crate::statistics::TableStatistics>,
186
187    /// Counter for modifications since last statistics update
188    modifications_since_stats: usize,
189    // Note: Table-level columnar caching was removed in #3892 to eliminate duplicate
190    // caching with Database::columnar_cache. All columnar caching now goes through
191    // Database::get_columnar() which provides LRU eviction and Arc-based sharing.
192    // Table::scan_columnar() performs fresh conversion on each call.
193}
194
195impl Clone for Table {
196    fn clone(&self) -> Self {
197        Table {
198            schema: self.schema.clone(),
199            rows: self.rows.clone(),
200            deleted: self.deleted.clone(),
201            deleted_count: self.deleted_count,
202            native_columnar: self.native_columnar.clone(),
203            indexes: self.indexes.clone(),
204            append_tracker: self.append_tracker.clone(),
205            statistics: self.statistics.clone(),
206            modifications_since_stats: self.modifications_since_stats,
207        }
208    }
209}
210
211impl Table {
212    /// Create a new empty table with given schema
213    ///
214    /// The storage format is determined by the schema's storage_format field:
215    /// - Row: Traditional row-oriented storage (default)
216    /// - Columnar: Native columnar storage for analytical workloads
217    pub fn new(schema: vibesql_catalog::TableSchema) -> Self {
218        let indexes = IndexManager::new(&schema);
219        let is_columnar = schema.is_columnar();
220
221        // For columnar tables, initialize empty native columnar storage
222        let native_columnar = if is_columnar {
223            // Create empty columnar table with column names from schema
224            let column_names: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
225            Some(
226                crate::ColumnarTable::from_rows(&[], &column_names)
227                    .expect("Creating empty columnar table should never fail"),
228            )
229        } else {
230            None
231        };
232
233        Table {
234            schema,
235            rows: Vec::new(),
236            deleted: Vec::new(),
237            deleted_count: 0,
238            native_columnar,
239            indexes,
240            append_tracker: AppendModeTracker::new(),
241            statistics: None,
242            modifications_since_stats: 0,
243        }
244    }
245
246    /// Check if this table uses native columnar storage
247    pub fn is_native_columnar(&self) -> bool {
248        self.native_columnar.is_some()
249    }
250
251    /// Insert a row into the table
252    ///
253    /// For row-oriented tables, rows are stored directly in a Vec.
254    /// For columnar tables, rows are buffered and the columnar data is rebuilt.
255    pub fn insert(&mut self, row: Row) -> Result<(), StorageError> {
256        // Normalize and validate row (column count, type checking, NULL checking, value
257        // normalization)
258        let normalizer = RowNormalizer::new(&self.schema);
259        let normalized_row = normalizer.normalize_and_validate(row)?;
260
261        // Detect sequential append pattern before inserting
262        if let Some(pk_indices) = self.schema.get_primary_key_indices() {
263            let pk_values: Vec<SqlValue> =
264                pk_indices.iter().map(|&idx| normalized_row.values[idx].clone()).collect();
265            self.append_tracker.update(&pk_values);
266        }
267
268        // Add row to table (always stored for indexing and potential row access)
269        let row_index = self.rows.len();
270        self.rows.push(normalized_row.clone());
271        self.deleted.push(false);
272
273        // Update indexes (delegate to IndexManager)
274        self.indexes.update_for_insert(&self.schema, &normalized_row, row_index);
275
276        // Track modifications for statistics staleness
277        self.modifications_since_stats += 1;
278
279        // Mark stats stale if significant changes (> 10% of table)
280        if let Some(stats) = &mut self.statistics {
281            if self.modifications_since_stats > stats.row_count / 10 {
282                stats.mark_stale();
283            }
284        }
285
286        // For native columnar tables, rebuild columnar data
287        // Note: Database-level columnar cache invalidation is handled by the executor
288        if self.native_columnar.is_some() {
289            self.rebuild_native_columnar()?;
290        }
291
292        Ok(())
293    }
294
295    /// Rebuild native columnar storage from rows (excluding deleted rows)
296    fn rebuild_native_columnar(&mut self) -> Result<(), StorageError> {
297        let column_names: Vec<String> =
298            self.schema.columns.iter().map(|c| c.name.clone()).collect();
299
300        // Collect only live rows for columnar conversion
301        let live_rows: Vec<&Row> = self
302            .rows
303            .iter()
304            .enumerate()
305            .filter(|(idx, _)| !self.deleted[*idx])
306            .map(|(_, row)| row)
307            .collect();
308
309        let columnar = crate::ColumnarTable::from_row_refs(&live_rows, &column_names)
310            .map_err(|e| StorageError::Other(format!("Columnar rebuild failed: {}", e)))?;
311
312        self.native_columnar = Some(columnar);
313        Ok(())
314    }
315
316    /// Insert multiple rows into the table in a single batch operation
317    ///
318    /// This method is optimized for bulk data loading and provides significant
319    /// performance improvements over repeated single-row inserts:
320    ///
321    /// - **Pre-allocation**: Vector capacity is reserved upfront
322    /// - **Batch normalization**: Rows are validated/normalized together
323    /// - **Deferred index updates**: Indexes are rebuilt once after all inserts
324    /// - **Single cache invalidation**: Columnar cache invalidated once at end
325    /// - **Statistics update once**: Stats marked stale only at completion
326    ///
327    /// # Arguments
328    ///
329    /// * `rows` - Vector of rows to insert
330    ///
331    /// # Returns
332    ///
333    /// * `Ok(usize)` - Number of rows successfully inserted
334    /// * `Err(StorageError)` - If any row fails validation (no rows inserted on error)
335    ///
336    /// # Performance
337    ///
338    /// For large batches (1000+ rows), this method is typically 10-50x faster
339    /// than equivalent single-row inserts due to reduced per-row overhead.
340    ///
341    /// # Example
342    ///
343    /// ```text
344    /// let rows = vec![
345    ///     Row::new(vec![SqlValue::Integer(1), SqlValue::Varchar(arcstr::ArcStr::from("Alice"))]),
346    ///     Row::new(vec![SqlValue::Integer(2), SqlValue::Varchar(arcstr::ArcStr::from("Bob"))]),
347    ///     Row::new(vec![SqlValue::Integer(3), SqlValue::Varchar(arcstr::ArcStr::from("Charlie"))]),
348    /// ];
349    /// let count = table.insert_batch(rows)?;
350    /// assert_eq!(count, 3);
351    /// ```
352    pub fn insert_batch(&mut self, rows: Vec<Row>) -> Result<usize, StorageError> {
353        if rows.is_empty() {
354            return Ok(0);
355        }
356
357        let row_count = rows.len();
358        let normalizer = RowNormalizer::new(&self.schema);
359
360        // Phase 1: Normalize and validate all rows upfront
361        // This ensures we fail fast before modifying any state
362        let mut normalized_rows = Vec::with_capacity(row_count);
363        for row in rows {
364            let normalized = normalizer.normalize_and_validate(row)?;
365            normalized_rows.push(normalized);
366        }
367
368        // Phase 2: Pre-allocate capacity for rows and deleted vectors
369        self.rows.reserve(row_count);
370        self.deleted.reserve(row_count);
371
372        // Record starting index for incremental index updates
373        let start_index = self.rows.len();
374
375        // Phase 3: Insert all rows into storage
376        for row in normalized_rows {
377            self.rows.push(row);
378            self.deleted.push(false);
379        }
380
381        // Phase 4: Incrementally update indexes for only the new rows
382        // This is O(batch_size) instead of O(total_rows), avoiding O(n²) behavior
383        // when doing multiple batch inserts
384        for (i, row) in self.rows[start_index..].iter().enumerate() {
385            self.indexes.update_for_insert(&self.schema, row, start_index + i);
386        }
387
388        // Phase 5: Update append mode tracker with last inserted row
389        // (We only track the final state, not intermediate states)
390        if let Some(pk_indices) = self.schema.get_primary_key_indices() {
391            if let Some(last_row) = self.rows.last() {
392                let pk_values: Vec<SqlValue> =
393                    pk_indices.iter().map(|&idx| last_row.values[idx].clone()).collect();
394                // Reset tracker and set to last value (bulk insert breaks sequential pattern)
395                self.append_tracker.reset();
396                self.append_tracker.update(&pk_values);
397            }
398        }
399
400        // Phase 6: Update statistics tracking
401        self.modifications_since_stats += row_count;
402        if let Some(stats) = &mut self.statistics {
403            if self.modifications_since_stats > stats.row_count / 10 {
404                stats.mark_stale();
405            }
406        }
407
408        // Phase 7: Handle columnar storage
409        // For native columnar tables, rebuild columnar data
410        // Note: Database-level columnar cache invalidation is handled by the executor
411        if self.native_columnar.is_some() {
412            self.rebuild_native_columnar()?;
413        }
414
415        Ok(row_count)
416    }
417
418    /// Insert rows from an iterator in a streaming fashion
419    ///
420    /// This method is optimized for very large datasets that may not fit
421    /// in memory all at once. Rows are processed in configurable batch sizes.
422    ///
423    /// # Arguments
424    ///
425    /// * `rows` - Iterator yielding rows to insert
426    /// * `batch_size` - Number of rows to process per batch (default: 1000)
427    ///
428    /// # Returns
429    ///
430    /// * `Ok(usize)` - Total number of rows successfully inserted
431    /// * `Err(StorageError)` - If any row fails validation
432    ///
433    /// # Note
434    ///
435    /// Unlike `insert_batch`, this method commits rows in batches, so a failure
436    /// partway through will leave previously committed batches in the table.
437    /// Use `insert_batch` if you need all-or-nothing semantics.
438    ///
439    /// # Example
440    ///
441    /// ```text
442    /// // Stream rows from a file reader
443    /// let rows_iter = csv_reader.rows().map(|r| Row::from_csv_record(r));
444    /// let count = table.insert_from_iter(rows_iter, 1000)?;
445    /// ```
446    pub fn insert_from_iter<I>(&mut self, rows: I, batch_size: usize) -> Result<usize, StorageError>
447    where
448        I: Iterator<Item = Row>,
449    {
450        let batch_size = if batch_size == 0 { 1000 } else { batch_size };
451        let mut total_inserted = 0;
452        let mut batch = Vec::with_capacity(batch_size);
453
454        for row in rows {
455            batch.push(row);
456
457            if batch.len() >= batch_size {
458                let count = self.insert_batch(std::mem::take(&mut batch))?;
459                total_inserted += count;
460                batch = Vec::with_capacity(batch_size);
461            }
462        }
463
464        // Insert any remaining rows
465        if !batch.is_empty() {
466            let count = self.insert_batch(batch)?;
467            total_inserted += count;
468        }
469
470        Ok(total_inserted)
471    }
472
473    /// Get all rows for scanning
474    ///
475    /// Returns a slice of all rows in the table. For tables with a deletion bitmap,
476    /// this returns the raw storage which may include deleted rows.
477    ///
478    /// **Important**: For operations that need to skip deleted rows, use `scan_live()`
479    /// which filters deleted rows automatically.
480    pub fn scan(&self) -> &[Row] {
481        &self.rows
482    }
483
484    /// Check if a row at the given index is deleted
485    #[inline]
486    pub fn is_row_deleted(&self, idx: usize) -> bool {
487        idx < self.deleted.len() && self.deleted[idx]
488    }
489
490    /// Iterate over live (non-deleted) rows with their physical indices
491    ///
492    /// This is the preferred way to scan table data, as it automatically
493    /// skips rows that have been deleted but not yet compacted.
494    ///
495    /// # Returns
496    /// An iterator yielding `(physical_index, &Row)` pairs for all live rows.
497    ///
498    /// # Example
499    /// ```text
500    /// for (idx, row) in table.scan_live() {
501    ///     // idx is the physical index, can be used with get_row() or delete_by_indices()
502    ///     process_row(idx, row);
503    /// }
504    /// ```
505    #[inline]
506    pub fn scan_live(&self) -> impl Iterator<Item = (usize, &Row)> {
507        self.rows.iter().enumerate().filter(|(idx, _)| !self.deleted[*idx])
508    }
509
510    /// Scan only live (non-deleted) rows, returning an owned Vec.
511    ///
512    /// This method provides an efficient way to get all live rows as a Vec<Row>
513    /// for executor paths that need owned data. Unlike `scan()` which returns
514    /// all rows including deleted ones, this method filters out deleted rows.
515    ///
516    /// # Performance
517    /// O(n) time and space where n is the number of live rows.
518    /// Pre-allocates the exact capacity needed based on `row_count()`.
519    ///
520    /// # Returns
521    /// A Vec containing clones of all non-deleted rows.
522    ///
523    /// # Example
524    /// ```text
525    /// // For SELECT queries that need a Vec<Row>
526    /// let rows = table.scan_live_vec();
527    /// ```
528    #[inline]
529    pub fn scan_live_vec(&self) -> Vec<Row> {
530        let mut result = Vec::with_capacity(self.row_count());
531        for (idx, row) in self.rows.iter().enumerate() {
532            if !self.deleted[idx] {
533                let mut cloned = row.clone();
534                // Set row_id for ROWID pseudo-column support (SQLite compatibility)
535                // If the row already has an explicit row_id (from INSERT INTO t(rowid,...)),
536                // preserve it. Otherwise, use 1-indexed physical index.
537                if cloned.row_id.is_none() {
538                    cloned.row_id = Some((idx + 1) as u64);
539                }
540                result.push(cloned);
541            }
542        }
543        result
544    }
545
546    /// Get a single row by index position (O(1) access)
547    ///
548    /// Returns None if the row is deleted or index is out of bounds.
549    ///
550    /// # Arguments
551    /// * `idx` - The row index position (physical index)
552    ///
553    /// # Returns
554    /// * `Some(&Row)` - The row at the given index if it exists and is not deleted
555    /// * `None` - If the index is out of bounds or row is deleted
556    #[inline]
557    pub fn get_row(&self, idx: usize) -> Option<&Row> {
558        if idx < self.deleted.len() && self.deleted[idx] {
559            return None;
560        }
561        self.rows.get(idx)
562    }
563
564    /// Scan table data in columnar format for SIMD-accelerated processing
565    ///
566    /// This method returns columnar data suitable for high-performance analytical queries.
567    /// Unlike `scan()` which returns row-oriented data, this method returns column-oriented
568    /// data that enables:
569    ///
570    /// - **SIMD vectorization**: Process 4-8 values per CPU instruction
571    /// - **Cache efficiency**: Contiguous column data improves memory access patterns
572    /// - **Type specialization**: Avoid SqlValue enum matching overhead
573    ///
574    /// # Performance
575    ///
576    /// For **native columnar tables**: Zero conversion overhead - returns data directly.
577    /// For **row tables**: O(n * m) conversion cost per call.
578    ///
579    /// # Caching
580    ///
581    /// This method does not cache results. For cached columnar access with LRU eviction,
582    /// use `Database::get_columnar()` which provides Arc-based sharing across queries.
583    ///
584    /// # Returns
585    ///
586    /// * `Ok(ColumnarTable)` - Columnar representation of the table data
587    /// * `Err(StorageError)` - If conversion fails due to type mismatches
588    ///
589    /// # Example
590    ///
591    /// ```text
592    /// let columnar = table.scan_columnar()?;
593    /// // Process with SIMD-accelerated operations
594    /// if let Some(ColumnData::Int64 { values, nulls }) = columnar.get_column("quantity") {
595    ///     // SIMD filtering on values slice
596    /// }
597    /// ```
598    pub fn scan_columnar(&self) -> Result<crate::ColumnarTable, StorageError> {
599        // For native columnar tables, return data directly (zero conversion overhead)
600        if let Some(ref native) = self.native_columnar {
601            return Ok(native.clone());
602        }
603
604        // For row tables, perform fresh conversion each time
605        // Note: Caching is now handled at the Database level via Database::get_columnar()
606        // which provides LRU eviction and Arc-based sharing across queries.
607
608        // Get column names from schema
609        let column_names: Vec<String> =
610            self.schema.columns.iter().map(|c| c.name.clone()).collect();
611
612        // Collect only live rows for columnar conversion
613        let live_rows: Vec<&Row> = self
614            .rows
615            .iter()
616            .enumerate()
617            .filter(|(idx, _)| !self.deleted[*idx])
618            .map(|(_, row)| row)
619            .collect();
620
621        // Convert rows to columnar format
622        crate::ColumnarTable::from_row_refs(&live_rows, &column_names)
623            .map_err(|e| StorageError::Other(format!("Columnar conversion failed: {}", e)))
624    }
625
626    /// Get number of live (non-deleted) rows
627    pub fn row_count(&self) -> usize {
628        self.rows.len() - self.deleted_count
629    }
630
631    /// Get total number of rows including deleted ones (physical storage size)
632    #[inline]
633    pub fn physical_row_count(&self) -> usize {
634        self.rows.len()
635    }
636
637    /// Get count of deleted (logically removed) rows
638    ///
639    /// This is used for DML cost estimation, as tables with many deleted rows
640    /// may have degraded performance for UPDATE/DELETE operations.
641    #[inline]
642    pub fn deleted_count(&self) -> usize {
643        self.deleted_count
644    }
645
646    /// Get table statistics, computing if necessary
647    ///
648    /// Statistics are computed lazily on first access and cached.
649    /// They are marked stale after significant data changes (> 10% of rows).
650    pub fn statistics(&mut self) -> &crate::statistics::TableStatistics {
651        if self.statistics.is_none() || self.statistics.as_ref().unwrap().needs_refresh() {
652            self.statistics =
653                Some(crate::statistics::TableStatistics::compute(&self.rows, &self.schema));
654            self.modifications_since_stats = 0;
655        }
656
657        self.statistics.as_ref().unwrap()
658    }
659
660    /// Get cached table statistics without computing
661    ///
662    /// Returns None if statistics have never been computed or are stale.
663    /// Use `statistics()` if you want to compute/refresh statistics.
664    pub fn get_statistics(&self) -> Option<&crate::statistics::TableStatistics> {
665        self.statistics.as_ref()
666    }
667
668    /// Force recomputation of statistics (ANALYZE command)
669    pub fn analyze(&mut self) {
670        self.statistics =
671            Some(crate::statistics::TableStatistics::compute(&self.rows, &self.schema));
672        self.modifications_since_stats = 0;
673    }
674
675    /// Check if table is in append mode (sequential inserts detected)
676    /// When true, constraint checks can skip duplicate lookups for optimization
677    pub fn is_in_append_mode(&self) -> bool {
678        self.append_tracker.is_active()
679    }
680
681    /// Clear all rows
682    pub fn clear(&mut self) {
683        self.rows.clear();
684        self.deleted.clear();
685        self.deleted_count = 0;
686        // Clear indexes (delegate to IndexManager)
687        self.indexes.clear();
688        // Reset append mode tracking
689        self.append_tracker.reset();
690        // Clear native columnar if present
691        // Note: Database-level columnar cache invalidation is handled by the executor
692        if self.native_columnar.is_some() {
693            let column_names: Vec<String> =
694                self.schema.columns.iter().map(|c| c.name.clone()).collect();
695            self.native_columnar = Some(
696                crate::ColumnarTable::from_rows(&[], &column_names)
697                    .expect("Creating empty columnar table should never fail"),
698            );
699        }
700    }
701
702    /// Update a row at the specified index
703    pub fn update_row(&mut self, index: usize, row: Row) -> Result<(), StorageError> {
704        if index >= self.rows.len() {
705            return Err(StorageError::ColumnIndexOutOfBounds { index });
706        }
707
708        // Cannot update a deleted row
709        if self.deleted[index] {
710            return Err(StorageError::RowNotFound);
711        }
712
713        // Normalize and validate row
714        let normalizer = RowNormalizer::new(&self.schema);
715        let normalized_row = normalizer.normalize_and_validate(row)?;
716
717        // Get old row for index updates (clone to avoid borrow issues)
718        let old_row = self.rows[index].clone();
719
720        // Update the row
721        self.rows[index] = normalized_row.clone();
722
723        // Update indexes (delegate to IndexManager)
724        self.indexes.update_for_update(&self.schema, &old_row, &normalized_row, index);
725
726        // For native columnar tables, rebuild columnar data
727        // Note: Database-level columnar cache invalidation is handled by the executor
728        if self.native_columnar.is_some() {
729            self.rebuild_native_columnar()?;
730        }
731
732        Ok(())
733    }
734
735    /// Update a row with selective index maintenance
736    ///
737    /// Only updates indexes that reference changed columns, providing significant
738    /// performance improvement for tables with many indexes when updating non-indexed columns.
739    ///
740    /// # Arguments
741    /// * `index` - Row index to update
742    /// * `row` - New row data
743    /// * `changed_columns` - Set of column indices that were modified
744    ///
745    /// # Returns
746    /// * `Ok(())` on success
747    /// * `Err(StorageError)` if index out of bounds or column count mismatch
748    pub fn update_row_selective(
749        &mut self,
750        index: usize,
751        row: Row,
752        changed_columns: &std::collections::HashSet<usize>,
753    ) -> Result<(), StorageError> {
754        if index >= self.rows.len() {
755            return Err(StorageError::ColumnIndexOutOfBounds { index });
756        }
757
758        // Cannot update a deleted row
759        if self.deleted[index] {
760            return Err(StorageError::RowNotFound);
761        }
762
763        // Normalize and validate row
764        let normalizer = RowNormalizer::new(&self.schema);
765        let normalized_row = normalizer.normalize_and_validate(row)?;
766
767        // Get old row for index updates (clone to avoid borrow issues)
768        let old_row = self.rows[index].clone();
769
770        // Determine which indexes are affected by the changed columns (delegate to IndexManager)
771        let affected_indexes = self.indexes.get_affected_indexes(&self.schema, changed_columns);
772
773        // Update only affected indexes BEFORE replacing row (delegate to IndexManager)
774        self.indexes.update_selective(
775            &self.schema,
776            &old_row,
777            &normalized_row,
778            index,
779            &affected_indexes,
780        );
781
782        // Update the row (move ownership, no clone needed)
783        self.rows[index] = normalized_row;
784
785        // For native columnar tables, rebuild columnar data
786        // Note: Database-level columnar cache invalidation is handled by the executor
787        if self.native_columnar.is_some() {
788            self.rebuild_native_columnar()?;
789        }
790
791        Ok(())
792    }
793
794    /// Fast path update for pre-validated rows
795    ///
796    /// This variant skips normalization/validation, assuming the caller has already
797    /// validated the row data. Use for performance-critical UPDATE paths where
798    /// validation was done at the executor level.
799    ///
800    /// # Arguments
801    /// * `index` - Row index to update
802    /// * `new_row` - Pre-validated new row data (ownership transferred)
803    /// * `old_row` - Reference to old row for index updates
804    /// * `changed_columns` - Set of column indices that were modified
805    ///
806    /// # Safety
807    /// Caller must ensure row data is valid (correct column count, types, constraints)
808    #[inline]
809    pub fn update_row_unchecked(
810        &mut self,
811        index: usize,
812        new_row: Row,
813        old_row: &Row,
814        changed_columns: &std::collections::HashSet<usize>,
815    ) {
816        // Determine which indexes are affected by the changed columns
817        let affected_indexes = self.indexes.get_affected_indexes(&self.schema, changed_columns);
818
819        // Update affected indexes BEFORE replacing row
820        self.indexes.update_selective(&self.schema, old_row, &new_row, index, &affected_indexes);
821
822        // Update the row (direct move, no validation)
823        self.rows[index] = new_row;
824
825        // Note: Database-level columnar cache invalidation is handled by the executor
826    }
827
828    /// Update a single column value in-place without cloning the row
829    ///
830    /// This is the fastest possible update path for non-indexed columns:
831    /// - No row cloning (direct in-place modification)
832    /// - No index updates (caller must verify column is not indexed)
833    /// - No validation (caller must pre-validate the value)
834    ///
835    /// # Arguments
836    ///
837    /// * `row_index` - Index of the row to update
838    /// * `col_index` - Index of the column to update
839    /// * `new_value` - The new value for the column
840    ///
841    /// # Safety
842    ///
843    /// Caller must ensure:
844    /// - The column is NOT indexed (no internal or user-defined indexes)
845    /// - The value satisfies all constraints (NOT NULL, type, etc.)
846    #[inline]
847    pub fn update_column_inplace(
848        &mut self,
849        row_index: usize,
850        col_index: usize,
851        new_value: vibesql_types::SqlValue,
852    ) {
853        self.rows[row_index].values[col_index] = new_value;
854
855        // Note: Database-level columnar cache invalidation is handled by the executor
856    }
857
858    /// Delete rows matching a predicate
859    ///
860    /// Uses O(1) bitmap marking for each deleted row instead of O(n) Vec::remove().
861    ///
862    /// # Returns
863    /// [`DeleteResult`] containing the count of deleted rows and whether compaction occurred.
864    pub fn delete_where<F>(&mut self, mut predicate: F) -> DeleteResult
865    where
866        F: FnMut(&Row) -> bool,
867    {
868        // Collect indices of rows to delete (skip already-deleted rows)
869        let mut indices_to_delete: Vec<usize> = Vec::new();
870        for (index, row) in self.rows.iter().enumerate() {
871            if !self.deleted[index] && predicate(row) {
872                indices_to_delete.push(index);
873            }
874        }
875
876        if indices_to_delete.is_empty() {
877            return DeleteResult::new(0, false);
878        }
879
880        // Use the optimized delete_by_indices which uses bitmap marking
881        self.delete_by_indices(&indices_to_delete)
882    }
883
884    /// Remove a specific row (used for transaction undo)
885    /// Returns error if row not found
886    ///
887    /// Uses O(1) bitmap marking instead of O(n) Vec::remove().
888    ///
889    /// Note: This method does not return compaction status since it's used
890    /// internally for transaction rollback where index consistency is handled
891    /// at a higher level.
892    pub fn remove_row(&mut self, target_row: &Row) -> Result<(), StorageError> {
893        // Find the first matching non-deleted row
894        for (idx, row) in self.rows.iter().enumerate() {
895            if !self.deleted[idx] && row == target_row {
896                // Use delete_by_indices for consistent behavior
897                // Note: We ignore compaction status here since transaction rollback
898                // handles index consistency at the transaction layer
899                let _ = self.delete_by_indices(&[idx]);
900                return Ok(());
901            }
902        }
903        Err(StorageError::RowNotFound)
904    }
905
906    /// Delete rows by known indices (fast path - no scanning required)
907    ///
908    /// Uses O(1) bitmap marking instead of O(n) Vec::remove(). Rows are marked
909    /// as deleted but remain in the vector until compaction is triggered.
910    ///
911    /// # Arguments
912    /// * `indices` - Indices of rows to delete, need not be sorted
913    ///
914    /// # Returns
915    /// [`DeleteResult`] containing:
916    /// - `deleted_count`: Number of rows deleted
917    /// - `compacted`: Whether compaction occurred (row indices changed)
918    ///
919    /// # Important
920    ///
921    /// When `compacted` is true, all row indices in the table have changed.
922    /// User-defined indexes (B-tree indexes managed at the Database level)
923    /// must be rebuilt after compaction to maintain correctness.
924    ///
925    /// # Performance
926    /// O(d) where d = number of rows to delete, compared to O(d * n) for Vec::remove()
927    pub fn delete_by_indices(&mut self, indices: &[usize]) -> DeleteResult {
928        if indices.is_empty() {
929            return DeleteResult::new(0, false);
930        }
931
932        // Count valid, non-already-deleted indices
933        let mut deleted = 0;
934        for &idx in indices {
935            // Skip invalid or already-deleted indices
936            if idx >= self.rows.len() || self.deleted[idx] {
937                continue;
938            }
939
940            // Update indexes for this row BEFORE marking as deleted
941            let row = &self.rows[idx];
942            self.indexes.update_for_delete(&self.schema, row);
943
944            // Mark row as deleted - O(1) operation
945            self.deleted[idx] = true;
946            self.deleted_count += 1;
947            deleted += 1;
948        }
949
950        if deleted == 0 {
951            return DeleteResult::new(0, false);
952        }
953
954        // Check if compaction is needed (> 50% deleted)
955        // Compaction rebuilds the vectors without deleted rows
956        // NOTE: When compaction occurs, all row indices change and user-defined
957        // indexes (B-tree indexes) must be rebuilt by the caller
958        let compacted = if self.should_compact() {
959            self.compact();
960            true
961        } else {
962            false
963        };
964
965        // For native columnar tables, rebuild columnar data
966        // Note: Database-level columnar cache invalidation is handled by the executor
967        if self.native_columnar.is_some() {
968            let _ = self.rebuild_native_columnar();
969        }
970
971        DeleteResult::new(deleted, compacted)
972    }
973
974    /// Delete rows by known indices with batch-optimized internal index updates
975    ///
976    /// This is an optimized version of `delete_by_indices` that pre-computes
977    /// schema lookups for internal hash indexes, reducing overhead for multi-row
978    /// deletes by ~30-40%.
979    ///
980    /// # Arguments
981    /// * `indices` - Indices of rows to delete, need not be sorted
982    ///
983    /// # Returns
984    /// [`DeleteResult`] containing:
985    /// - `deleted_count`: Number of rows deleted
986    /// - `compacted`: Whether compaction occurred (row indices changed)
987    ///
988    /// # Performance
989    /// - Pre-computes PK/unique column indices once (O(1) vs O(d) schema lookups)
990    /// - Uses batch index updates for internal hash indexes
991    /// - Best for multi-row deletes; single-row deletes use `delete_by_indices`
992    pub fn delete_by_indices_batch(&mut self, indices: &[usize]) -> DeleteResult {
993        if indices.is_empty() {
994            return DeleteResult::new(0, false);
995        }
996
997        // For single-row deletes, use the standard path (no batch overhead)
998        if indices.len() == 1 {
999            return self.delete_by_indices(indices);
1000        }
1001
1002        // Phase 1: Collect valid rows to delete and their references
1003        // This avoids repeated bounds/deleted checks
1004        let mut valid_indices: Vec<usize> = Vec::with_capacity(indices.len());
1005        let mut rows_to_delete: Vec<&Row> = Vec::with_capacity(indices.len());
1006
1007        for &idx in indices {
1008            if idx < self.rows.len() && !self.deleted[idx] {
1009                valid_indices.push(idx);
1010                rows_to_delete.push(&self.rows[idx]);
1011            }
1012        }
1013
1014        if valid_indices.is_empty() {
1015            return DeleteResult::new(0, false);
1016        }
1017
1018        // Phase 2: Batch update internal hash indexes (pre-computes column indices once)
1019        self.indexes.batch_update_for_delete(&self.schema, &rows_to_delete);
1020
1021        // Phase 3: Mark rows as deleted
1022        let deleted = valid_indices.len();
1023        for idx in valid_indices {
1024            self.deleted[idx] = true;
1025            self.deleted_count += 1;
1026        }
1027
1028        // Phase 4: Check compaction and handle columnar
1029        let compacted = if self.should_compact() {
1030            self.compact();
1031            true
1032        } else {
1033            false
1034        };
1035
1036        // For native columnar tables, rebuild columnar data
1037        // (Row tables use Database::columnar_cache which is invalidated by executors)
1038        if self.native_columnar.is_some() {
1039            let _ = self.rebuild_native_columnar();
1040        }
1041
1042        DeleteResult::new(deleted, compacted)
1043    }
1044
1045    /// Check if the table should be compacted
1046    ///
1047    /// Compaction is triggered when more than 50% of rows are deleted.
1048    /// This prevents unbounded growth of deleted row storage.
1049    #[inline]
1050    fn should_compact(&self) -> bool {
1051        // Only compact if we have at least some rows and > 50% are deleted
1052        !self.rows.is_empty() && self.deleted_count > self.rows.len() / 2
1053    }
1054
1055    /// Compact the table by removing deleted rows
1056    ///
1057    /// This rebuilds the rows vector without deleted entries and rebuilds
1058    /// all indexes to point to the new positions.
1059    fn compact(&mut self) {
1060        if self.deleted_count == 0 {
1061            return;
1062        }
1063
1064        // Build new vectors with only live rows
1065        let mut new_rows = Vec::with_capacity(self.rows.len() - self.deleted_count);
1066        for (idx, row) in self.rows.iter().enumerate() {
1067            if !self.deleted[idx] {
1068                new_rows.push(row.clone());
1069            }
1070        }
1071
1072        // Replace old vectors with compacted ones
1073        self.rows = new_rows;
1074        self.deleted = vec![false; self.rows.len()];
1075        self.deleted_count = 0;
1076
1077        // Rebuild all indexes since row positions have changed
1078        self.indexes.rebuild(&self.schema, &self.rows);
1079    }
1080
1081    /// Check if a row at the given index is deleted
1082    #[inline]
1083    pub fn is_deleted(&self, idx: usize) -> bool {
1084        idx < self.deleted.len() && self.deleted[idx]
1085    }
1086
1087    /// Get mutable reference to rows
1088    pub fn rows_mut(&mut self) -> &mut Vec<Row> {
1089        &mut self.rows
1090    }
1091
1092    /// Get mutable reference to schema
1093    pub fn schema_mut(&mut self) -> &mut vibesql_catalog::TableSchema {
1094        &mut self.schema
1095    }
1096
1097    /// Get reference to primary key index
1098    pub fn primary_key_index(&self) -> Option<&std::collections::HashMap<Vec<SqlValue>, usize>> {
1099        self.indexes.primary_key_index()
1100    }
1101
1102    /// Get reference to unique constraint indexes
1103    pub fn unique_indexes(&self) -> &[std::collections::HashMap<Vec<SqlValue>, usize>] {
1104        self.indexes.unique_indexes()
1105    }
1106
1107    /// Rebuild all hash indexes from scratch
1108    /// Used after schema changes that add constraints (e.g., ALTER TABLE ADD PRIMARY KEY)
1109    pub fn rebuild_indexes(&mut self) {
1110        // Recreate the IndexManager to match the current schema
1111        // (in case constraints were added that didn't exist before)
1112        self.indexes = IndexManager::new(&self.schema);
1113
1114        // Rebuild indexes from existing rows
1115        self.indexes.rebuild(&self.schema, &self.rows);
1116    }
1117}
1118
1119#[cfg(test)]
1120mod tests {
1121    use vibesql_catalog::{ColumnSchema, TableSchema};
1122    use vibesql_types::{DataType, SqlValue};
1123
1124    use super::*;
1125
1126    fn create_test_table() -> Table {
1127        let columns = vec![
1128            ColumnSchema::new("id".to_string(), DataType::Integer, false),
1129            ColumnSchema::new("name".to_string(), DataType::Varchar { max_length: Some(50) }, true),
1130        ];
1131        let schema = TableSchema::with_primary_key(
1132            "test_table".to_string(),
1133            columns,
1134            vec!["id".to_string()],
1135        );
1136        Table::new(schema)
1137    }
1138
1139    fn create_row(id: i64, name: &str) -> Row {
1140        Row::from_vec(vec![SqlValue::Integer(id), SqlValue::Varchar(arcstr::ArcStr::from(name))])
1141    }
1142
1143    #[test]
1144    fn test_append_mode_integration() {
1145        let mut table = create_test_table();
1146        assert!(!table.is_in_append_mode());
1147
1148        // Sequential inserts should activate append mode
1149        table.insert(create_row(1, "Alice")).unwrap();
1150        table.insert(create_row(2, "Bob")).unwrap();
1151        table.insert(create_row(3, "Charlie")).unwrap();
1152        table.insert(create_row(4, "David")).unwrap();
1153        assert!(table.is_in_append_mode());
1154
1155        // Clear should reset
1156        table.clear();
1157        assert!(!table.is_in_append_mode());
1158    }
1159
1160    #[test]
1161    fn test_scan_columnar() {
1162        let mut table = create_test_table();
1163
1164        // Insert test data
1165        table.insert(create_row(1, "Alice")).unwrap();
1166        table.insert(create_row(2, "Bob")).unwrap();
1167        table.insert(create_row(3, "Charlie")).unwrap();
1168
1169        // Convert to columnar format
1170        let columnar = table.scan_columnar().unwrap();
1171
1172        // Verify row count
1173        assert_eq!(columnar.row_count(), 3);
1174        assert_eq!(columnar.column_count(), 2);
1175
1176        // Verify column data - id column
1177        let id_col = columnar.get_column("id").expect("id column should exist");
1178        assert_eq!(id_col.len(), 3);
1179        assert!(!id_col.is_null(0));
1180        assert!(!id_col.is_null(1));
1181        assert!(!id_col.is_null(2));
1182
1183        // Verify column data - name column
1184        let name_col = columnar.get_column("name").expect("name column should exist");
1185        assert_eq!(name_col.len(), 3);
1186    }
1187
1188    #[test]
1189    fn test_scan_columnar_empty_table() {
1190        let table = create_test_table();
1191
1192        // Convert empty table to columnar format
1193        let columnar = table.scan_columnar().unwrap();
1194
1195        // Verify empty result
1196        assert_eq!(columnar.row_count(), 0);
1197        assert_eq!(columnar.column_count(), 2); // Schema defines 2 columns
1198    }
1199
1200    #[test]
1201    fn test_scan_columnar_with_nulls() {
1202        let columns = vec![
1203            ColumnSchema::new("id".to_string(), DataType::Integer, false),
1204            ColumnSchema::new("value".to_string(), DataType::Integer, true), // nullable
1205        ];
1206        let schema = TableSchema::new("test_nulls".to_string(), columns);
1207        let mut table = Table::new(schema);
1208
1209        // Insert rows with NULL values
1210        table.insert(Row::from_vec(vec![SqlValue::Integer(1), SqlValue::Integer(100)])).unwrap();
1211        table.insert(Row::from_vec(vec![SqlValue::Integer(2), SqlValue::Null])).unwrap();
1212        table.insert(Row::from_vec(vec![SqlValue::Integer(3), SqlValue::Integer(300)])).unwrap();
1213
1214        // Convert to columnar format
1215        let columnar = table.scan_columnar().unwrap();
1216
1217        // Verify NULL handling
1218        let value_col = columnar.get_column("value").expect("value column should exist");
1219        assert!(!value_col.is_null(0)); // 100
1220        assert!(value_col.is_null(1)); // NULL
1221        assert!(!value_col.is_null(2)); // 300
1222    }
1223
1224    // ========================================================================
1225    // Bulk Insert Tests
1226    // ========================================================================
1227
1228    #[test]
1229    fn test_insert_batch_basic() {
1230        let mut table = create_test_table();
1231
1232        let rows = vec![create_row(1, "Alice"), create_row(2, "Bob"), create_row(3, "Charlie")];
1233
1234        let count = table.insert_batch(rows).unwrap();
1235
1236        assert_eq!(count, 3);
1237        assert_eq!(table.row_count(), 3);
1238
1239        // Verify data
1240        let scanned: Vec<_> = table.scan().to_vec();
1241        assert_eq!(scanned[0].values[0], SqlValue::Integer(1));
1242        assert_eq!(scanned[1].values[0], SqlValue::Integer(2));
1243        assert_eq!(scanned[2].values[0], SqlValue::Integer(3));
1244    }
1245
1246    #[test]
1247    fn test_insert_batch_empty() {
1248        let mut table = create_test_table();
1249
1250        let count = table.insert_batch(Vec::new()).unwrap();
1251
1252        assert_eq!(count, 0);
1253        assert_eq!(table.row_count(), 0);
1254    }
1255
1256    #[test]
1257    fn test_insert_batch_preserves_indexes() {
1258        let mut table = create_test_table();
1259
1260        let rows = vec![create_row(1, "Alice"), create_row(2, "Bob"), create_row(3, "Charlie")];
1261
1262        table.insert_batch(rows).unwrap();
1263
1264        // Primary key index should exist and have 3 entries
1265        assert!(table.primary_key_index().is_some());
1266        let pk_index = table.primary_key_index().unwrap();
1267        assert_eq!(pk_index.len(), 3);
1268
1269        // Each PK should map to correct row index
1270        assert_eq!(pk_index.get(&vec![SqlValue::Integer(1)]), Some(&0));
1271        assert_eq!(pk_index.get(&vec![SqlValue::Integer(2)]), Some(&1));
1272        assert_eq!(pk_index.get(&vec![SqlValue::Integer(3)]), Some(&2));
1273    }
1274
1275    #[test]
1276    fn test_insert_batch_columnar_scan_includes_new_rows() {
1277        let mut table = create_test_table();
1278
1279        // Insert some initial rows
1280        table.insert(create_row(1, "Alice")).unwrap();
1281        let _ = table.scan_columnar().unwrap();
1282
1283        // Batch insert more rows
1284        let rows = vec![create_row(2, "Bob"), create_row(3, "Charlie")];
1285        table.insert_batch(rows).unwrap();
1286
1287        // Columnar scan should reflect all rows
1288        let columnar = table.scan_columnar().unwrap();
1289        assert_eq!(columnar.row_count(), 3);
1290    }
1291
1292    #[test]
1293    fn test_insert_batch_validation_failure_is_atomic() {
1294        let mut table = create_test_table();
1295
1296        // Insert valid row first
1297        table.insert(create_row(1, "Alice")).unwrap();
1298
1299        // Try to batch insert with one invalid row (wrong column count)
1300        let rows = vec![
1301            Row::new(vec![SqlValue::Integer(2), SqlValue::Varchar(arcstr::ArcStr::from("Bob"))]),
1302            Row::new(vec![SqlValue::Integer(3)]), // Invalid - missing column
1303        ];
1304
1305        let result = table.insert_batch(rows);
1306        assert!(result.is_err());
1307
1308        // Table should still have only 1 row (atomic failure)
1309        assert_eq!(table.row_count(), 1);
1310    }
1311
1312    #[test]
1313    fn test_insert_batch_large() {
1314        let mut table = create_test_table();
1315
1316        // Insert 10000 rows in a batch
1317        let rows: Vec<Row> = (0..10_000).map(|i| create_row(i, &format!("User{}", i))).collect();
1318
1319        let count = table.insert_batch(rows).unwrap();
1320
1321        assert_eq!(count, 10_000);
1322        assert_eq!(table.row_count(), 10_000);
1323
1324        // Verify first and last rows
1325        let scanned = table.scan();
1326        assert_eq!(scanned[0].values[0], SqlValue::Integer(0));
1327        assert_eq!(scanned[9999].values[0], SqlValue::Integer(9999));
1328    }
1329
1330    #[test]
1331    fn test_insert_from_iter_basic() {
1332        let mut table = create_test_table();
1333
1334        let rows = (0..100).map(|i| create_row(i, &format!("User{}", i)));
1335
1336        let count = table.insert_from_iter(rows, 10).unwrap();
1337
1338        assert_eq!(count, 100);
1339        assert_eq!(table.row_count(), 100);
1340    }
1341
1342    #[test]
1343    fn test_insert_from_iter_default_batch_size() {
1344        let mut table = create_test_table();
1345
1346        let rows = (0..50).map(|i| create_row(i, &format!("User{}", i)));
1347
1348        // batch_size=0 should use default of 1000
1349        let count = table.insert_from_iter(rows, 0).unwrap();
1350
1351        assert_eq!(count, 50);
1352        assert_eq!(table.row_count(), 50);
1353    }
1354
1355    #[test]
1356    fn test_insert_from_iter_partial_final_batch() {
1357        let mut table = create_test_table();
1358
1359        // 25 rows with batch size 10 = 2 full batches + 5 remaining
1360        let rows = (0..25).map(|i| create_row(i, &format!("User{}", i)));
1361
1362        let count = table.insert_from_iter(rows, 10).unwrap();
1363
1364        assert_eq!(count, 25);
1365        assert_eq!(table.row_count(), 25);
1366    }
1367
1368    #[test]
1369    fn test_insert_batch_after_single_inserts() {
1370        let mut table = create_test_table();
1371
1372        // Single inserts first
1373        table.insert(create_row(1, "Alice")).unwrap();
1374        table.insert(create_row(2, "Bob")).unwrap();
1375
1376        // Then batch insert
1377        let rows = vec![create_row(3, "Charlie"), create_row(4, "David")];
1378        table.insert_batch(rows).unwrap();
1379
1380        assert_eq!(table.row_count(), 4);
1381
1382        // Verify indexes are correct
1383        let pk_index = table.primary_key_index().unwrap();
1384        assert_eq!(pk_index.get(&vec![SqlValue::Integer(1)]), Some(&0));
1385        assert_eq!(pk_index.get(&vec![SqlValue::Integer(2)]), Some(&1));
1386        assert_eq!(pk_index.get(&vec![SqlValue::Integer(3)]), Some(&2));
1387        assert_eq!(pk_index.get(&vec![SqlValue::Integer(4)]), Some(&3));
1388    }
1389}