vibesql_storage/table/mod.rs
1// ============================================================================
2// Table - In-Memory Storage Layer
3// ============================================================================
4//
5// This module provides the core Table abstraction for in-memory row storage.
6// The table implementation follows a delegation pattern, where specialized
7// components handle distinct concerns:
8//
9// ## Architecture
10//
11// ```
12// Table (Orchestration Layer)
13// ├─> IndexManager - Hash-based indexing for PK/UNIQUE constraints
14// ├─> RowNormalizer - Value normalization and validation
15// └─> AppendModeTracker - Sequential insert detection for optimization
16// ```
17//
18// ### Component Responsibilities
19//
20// **IndexManager** (`indexes.rs`):
21// - Maintains hash indexes for primary key and unique constraints
22// - Provides O(1) lookups for duplicate detection
23// - Handles index updates on INSERT/UPDATE/DELETE
24// - Supports selective index maintenance for performance
25//
26// **RowNormalizer** (`normalization.rs`):
27// - CHAR padding/truncation to fixed length
28// - Type validation (ensures values match column types)
29// - NULL constraint validation
30// - Column count verification
31//
32// **AppendModeTracker** (`append_mode.rs`):
33// - Detects sequential primary key insertion patterns
34// - Enables executor-level optimizations when sequential inserts detected
35// - Maintains O(1) tracking overhead
36// - Activates after threshold of consecutive sequential inserts
37//
38// ### Design Principles
39//
40// 1. **Separation of Concerns**: Each component handles one specific responsibility
41// 2. **Delegation Pattern**: Table orchestrates, components execute
42// 3. **Performance First**: Optimizations built into architecture (append mode, selective updates)
43// 4. **Clean API**: Public interface remains simple despite internal complexity
44//
45// ### Refactoring History
46//
47// This module structure is the result of a systematic refactoring effort (#842)
48// that extracted specialized components from a monolithic table.rs file:
49//
50// - **Phase 1** (PR #853): IndexManager extraction
51// - **Phase 3** (PR #856): RowNormalizer extraction
52// - **Phase 4** (PR #858): AppendModeTracker extraction
53// - **Phase 5** (PR #859): Documentation and finalization
54//
55// Note: Phase 2 (Constraint Validation) was closed as invalid - constraint
56// validation properly belongs in the executor layer, not the storage layer.
57
58mod append_mode;
59mod indexes;
60mod normalization;
61
62use append_mode::AppendModeTracker;
63use indexes::IndexManager;
64use normalization::RowNormalizer;
65use vibesql_types::SqlValue;
66
67use crate::{Row, StorageError};
68
69/// Result of a delete operation, indicating how many rows were deleted
70/// and whether table compaction occurred.
71///
72/// # Important
73///
74/// When `compacted` is true, all row indices in the table have changed.
75/// User-defined indexes (B-tree indexes managed at the Database level)
76/// must be rebuilt after compaction to maintain correctness.
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub struct DeleteResult {
79 /// Number of rows that were deleted
80 pub deleted_count: usize,
81 /// Whether table compaction occurred (row indices changed)
82 pub compacted: bool,
83}
84
85impl DeleteResult {
86 /// Create a new DeleteResult
87 pub fn new(deleted_count: usize, compacted: bool) -> Self {
88 Self { deleted_count, compacted }
89 }
90}
91
92/// In-memory table - stores rows with optimized indexing and validation
93///
94/// # Architecture
95///
96/// The `Table` struct acts as an orchestration layer, delegating specialized
97/// operations to dedicated components:
98///
99/// - **Row Storage**: Direct Vec storage for sequential access (table scans)
100/// - **Deletion Bitmap**: O(1) deletion via bitmap marking instead of Vec::remove()
101/// - **Columnar Storage**: Native columnar storage for OLAP-optimized tables
102/// - **Indexing**: `IndexManager` maintains hash indexes for constraint checks
103/// - **Normalization**: `RowNormalizer` handles value transformation and validation
104/// - **Optimization**: Append mode tracking for sequential insert performance
105///
106/// # Storage Formats
107///
108/// Tables support two storage formats:
109/// - **Row-oriented (default)**: Traditional row storage, optimized for OLTP
110/// - **Columnar**: Native column storage, optimized for OLAP with zero conversion overhead
111///
112/// ## Columnar Storage Limitations
113///
114/// **IMPORTANT**: Columnar tables are optimized for read-heavy analytical workloads.
115/// Each INSERT/UPDATE/DELETE operation triggers a full rebuild of the columnar
116/// representation (O(n) cost). This makes columnar tables unsuitable for:
117/// - High-frequency INSERT workloads
118/// - OLTP use cases with frequent writes
119/// - Streaming inserts
120///
121/// **Recommended use cases for columnar tables**:
122/// - Bulk-loaded analytical data (load once, query many times)
123/// - Reporting tables with infrequent updates
124/// - Data warehouse fact tables
125///
126/// For mixed workloads, use row-oriented storage with the columnar cache
127/// (via `scan_columnar()`), which provides SIMD acceleration with caching.
128///
129/// # Performance Characteristics
130///
131/// - **INSERT**: O(1) amortized for row append + O(1) for index updates
132/// - **UPDATE**: O(1) for row update + O(k) for k affected indexes (selective mode)
133/// - **DELETE**: O(1) per row via bitmap marking (amortized O(n) for compaction)
134/// - **SCAN**: O(n) direct vector iteration (skipping deleted rows)
135/// - **COLUMNAR SCAN**: O(n) with SIMD acceleration (no conversion overhead for native columnar)
136/// - **PK/UNIQUE lookup**: O(1) via hash indexes
137///
138/// # Example
139///
140/// ```text
141/// use vibesql_catalog::TableSchema;
142/// use vibesql_storage::Table;
143///
144/// let schema = TableSchema::new("users", columns);
145/// let mut table = Table::new(schema);
146///
147/// // Insert automatically validates and indexes
148/// table.insert(row)?;
149///
150/// // Scan returns all rows
151/// for row in table.scan() {
152/// // Process row...
153/// }
154/// ```
155#[derive(Debug)]
156pub struct Table {
157 /// Table schema defining structure and constraints
158 pub schema: vibesql_catalog::TableSchema,
159
160 /// Row storage - direct vector for sequential access (row-oriented tables only)
161 rows: Vec<Row>,
162
163 /// Deletion bitmap - tracks which rows are logically deleted
164 /// Uses O(1) bit operations instead of O(n) Vec::remove()
165 /// Compaction occurs when deleted_count > rows.len() / 2
166 deleted: Vec<bool>,
167
168 /// Count of deleted rows (cached to avoid counting bits)
169 deleted_count: usize,
170
171 /// Native columnar storage - primary storage for columnar tables
172 /// For columnar tables, this is the authoritative data source
173 /// For row tables, this is None (use Database::get_columnar() for cached columnar data)
174 native_columnar: Option<crate::ColumnarTable>,
175
176 /// Hash indexes for constraint validation (managed by IndexManager)
177 /// Provides O(1) lookups for primary key and unique constraints
178 indexes: IndexManager,
179
180 /// Append mode optimization tracking (managed by AppendModeTracker)
181 /// Detects sequential primary key inserts for executor-level optimizations
182 append_tracker: AppendModeTracker,
183
184 /// Cached statistics for query optimization (computed lazily)
185 statistics: Option<crate::statistics::TableStatistics>,
186
187 /// Counter for modifications since last statistics update
188 modifications_since_stats: usize,
189 // Note: Table-level columnar caching was removed in #3892 to eliminate duplicate
190 // caching with Database::columnar_cache. All columnar caching now goes through
191 // Database::get_columnar() which provides LRU eviction and Arc-based sharing.
192 // Table::scan_columnar() performs fresh conversion on each call.
193}
194
195impl Clone for Table {
196 fn clone(&self) -> Self {
197 Table {
198 schema: self.schema.clone(),
199 rows: self.rows.clone(),
200 deleted: self.deleted.clone(),
201 deleted_count: self.deleted_count,
202 native_columnar: self.native_columnar.clone(),
203 indexes: self.indexes.clone(),
204 append_tracker: self.append_tracker.clone(),
205 statistics: self.statistics.clone(),
206 modifications_since_stats: self.modifications_since_stats,
207 }
208 }
209}
210
211impl Table {
212 /// Create a new empty table with given schema
213 ///
214 /// The storage format is determined by the schema's storage_format field:
215 /// - Row: Traditional row-oriented storage (default)
216 /// - Columnar: Native columnar storage for analytical workloads
217 pub fn new(schema: vibesql_catalog::TableSchema) -> Self {
218 let indexes = IndexManager::new(&schema);
219 let is_columnar = schema.is_columnar();
220
221 // For columnar tables, initialize empty native columnar storage
222 let native_columnar = if is_columnar {
223 // Create empty columnar table with column names from schema
224 let column_names: Vec<String> = schema.columns.iter().map(|c| c.name.clone()).collect();
225 Some(
226 crate::ColumnarTable::from_rows(&[], &column_names)
227 .expect("Creating empty columnar table should never fail"),
228 )
229 } else {
230 None
231 };
232
233 Table {
234 schema,
235 rows: Vec::new(),
236 deleted: Vec::new(),
237 deleted_count: 0,
238 native_columnar,
239 indexes,
240 append_tracker: AppendModeTracker::new(),
241 statistics: None,
242 modifications_since_stats: 0,
243 }
244 }
245
246 /// Check if this table uses native columnar storage
247 pub fn is_native_columnar(&self) -> bool {
248 self.native_columnar.is_some()
249 }
250
251 /// Insert a row into the table
252 ///
253 /// For row-oriented tables, rows are stored directly in a Vec.
254 /// For columnar tables, rows are buffered and the columnar data is rebuilt.
255 pub fn insert(&mut self, row: Row) -> Result<(), StorageError> {
256 // Normalize and validate row (column count, type checking, NULL checking, value
257 // normalization)
258 let normalizer = RowNormalizer::new(&self.schema);
259 let normalized_row = normalizer.normalize_and_validate(row)?;
260
261 // Detect sequential append pattern before inserting
262 if let Some(pk_indices) = self.schema.get_primary_key_indices() {
263 let pk_values: Vec<SqlValue> =
264 pk_indices.iter().map(|&idx| normalized_row.values[idx].clone()).collect();
265 self.append_tracker.update(&pk_values);
266 }
267
268 // Add row to table (always stored for indexing and potential row access)
269 let row_index = self.rows.len();
270 self.rows.push(normalized_row.clone());
271 self.deleted.push(false);
272
273 // Update indexes (delegate to IndexManager)
274 self.indexes.update_for_insert(&self.schema, &normalized_row, row_index);
275
276 // Track modifications for statistics staleness
277 self.modifications_since_stats += 1;
278
279 // Mark stats stale if significant changes (> 10% of table)
280 if let Some(stats) = &mut self.statistics {
281 if self.modifications_since_stats > stats.row_count / 10 {
282 stats.mark_stale();
283 }
284 }
285
286 // For native columnar tables, rebuild columnar data
287 // Note: Database-level columnar cache invalidation is handled by the executor
288 if self.native_columnar.is_some() {
289 self.rebuild_native_columnar()?;
290 }
291
292 Ok(())
293 }
294
295 /// Rebuild native columnar storage from rows (excluding deleted rows)
296 fn rebuild_native_columnar(&mut self) -> Result<(), StorageError> {
297 let column_names: Vec<String> =
298 self.schema.columns.iter().map(|c| c.name.clone()).collect();
299
300 // Collect only live rows for columnar conversion
301 let live_rows: Vec<&Row> = self
302 .rows
303 .iter()
304 .enumerate()
305 .filter(|(idx, _)| !self.deleted[*idx])
306 .map(|(_, row)| row)
307 .collect();
308
309 let columnar = crate::ColumnarTable::from_row_refs(&live_rows, &column_names)
310 .map_err(|e| StorageError::Other(format!("Columnar rebuild failed: {}", e)))?;
311
312 self.native_columnar = Some(columnar);
313 Ok(())
314 }
315
316 /// Insert multiple rows into the table in a single batch operation
317 ///
318 /// This method is optimized for bulk data loading and provides significant
319 /// performance improvements over repeated single-row inserts:
320 ///
321 /// - **Pre-allocation**: Vector capacity is reserved upfront
322 /// - **Batch normalization**: Rows are validated/normalized together
323 /// - **Deferred index updates**: Indexes are rebuilt once after all inserts
324 /// - **Single cache invalidation**: Columnar cache invalidated once at end
325 /// - **Statistics update once**: Stats marked stale only at completion
326 ///
327 /// # Arguments
328 ///
329 /// * `rows` - Vector of rows to insert
330 ///
331 /// # Returns
332 ///
333 /// * `Ok(usize)` - Number of rows successfully inserted
334 /// * `Err(StorageError)` - If any row fails validation (no rows inserted on error)
335 ///
336 /// # Performance
337 ///
338 /// For large batches (1000+ rows), this method is typically 10-50x faster
339 /// than equivalent single-row inserts due to reduced per-row overhead.
340 ///
341 /// # Example
342 ///
343 /// ```text
344 /// let rows = vec![
345 /// Row::new(vec![SqlValue::Integer(1), SqlValue::Varchar(arcstr::ArcStr::from("Alice"))]),
346 /// Row::new(vec![SqlValue::Integer(2), SqlValue::Varchar(arcstr::ArcStr::from("Bob"))]),
347 /// Row::new(vec![SqlValue::Integer(3), SqlValue::Varchar(arcstr::ArcStr::from("Charlie"))]),
348 /// ];
349 /// let count = table.insert_batch(rows)?;
350 /// assert_eq!(count, 3);
351 /// ```
352 pub fn insert_batch(&mut self, rows: Vec<Row>) -> Result<usize, StorageError> {
353 if rows.is_empty() {
354 return Ok(0);
355 }
356
357 let row_count = rows.len();
358 let normalizer = RowNormalizer::new(&self.schema);
359
360 // Phase 1: Normalize and validate all rows upfront
361 // This ensures we fail fast before modifying any state
362 let mut normalized_rows = Vec::with_capacity(row_count);
363 for row in rows {
364 let normalized = normalizer.normalize_and_validate(row)?;
365 normalized_rows.push(normalized);
366 }
367
368 // Phase 2: Pre-allocate capacity for rows and deleted vectors
369 self.rows.reserve(row_count);
370 self.deleted.reserve(row_count);
371
372 // Record starting index for incremental index updates
373 let start_index = self.rows.len();
374
375 // Phase 3: Insert all rows into storage
376 for row in normalized_rows {
377 self.rows.push(row);
378 self.deleted.push(false);
379 }
380
381 // Phase 4: Incrementally update indexes for only the new rows
382 // This is O(batch_size) instead of O(total_rows), avoiding O(n²) behavior
383 // when doing multiple batch inserts
384 for (i, row) in self.rows[start_index..].iter().enumerate() {
385 self.indexes.update_for_insert(&self.schema, row, start_index + i);
386 }
387
388 // Phase 5: Update append mode tracker with last inserted row
389 // (We only track the final state, not intermediate states)
390 if let Some(pk_indices) = self.schema.get_primary_key_indices() {
391 if let Some(last_row) = self.rows.last() {
392 let pk_values: Vec<SqlValue> =
393 pk_indices.iter().map(|&idx| last_row.values[idx].clone()).collect();
394 // Reset tracker and set to last value (bulk insert breaks sequential pattern)
395 self.append_tracker.reset();
396 self.append_tracker.update(&pk_values);
397 }
398 }
399
400 // Phase 6: Update statistics tracking
401 self.modifications_since_stats += row_count;
402 if let Some(stats) = &mut self.statistics {
403 if self.modifications_since_stats > stats.row_count / 10 {
404 stats.mark_stale();
405 }
406 }
407
408 // Phase 7: Handle columnar storage
409 // For native columnar tables, rebuild columnar data
410 // Note: Database-level columnar cache invalidation is handled by the executor
411 if self.native_columnar.is_some() {
412 self.rebuild_native_columnar()?;
413 }
414
415 Ok(row_count)
416 }
417
418 /// Insert rows from an iterator in a streaming fashion
419 ///
420 /// This method is optimized for very large datasets that may not fit
421 /// in memory all at once. Rows are processed in configurable batch sizes.
422 ///
423 /// # Arguments
424 ///
425 /// * `rows` - Iterator yielding rows to insert
426 /// * `batch_size` - Number of rows to process per batch (default: 1000)
427 ///
428 /// # Returns
429 ///
430 /// * `Ok(usize)` - Total number of rows successfully inserted
431 /// * `Err(StorageError)` - If any row fails validation
432 ///
433 /// # Note
434 ///
435 /// Unlike `insert_batch`, this method commits rows in batches, so a failure
436 /// partway through will leave previously committed batches in the table.
437 /// Use `insert_batch` if you need all-or-nothing semantics.
438 ///
439 /// # Example
440 ///
441 /// ```text
442 /// // Stream rows from a file reader
443 /// let rows_iter = csv_reader.rows().map(|r| Row::from_csv_record(r));
444 /// let count = table.insert_from_iter(rows_iter, 1000)?;
445 /// ```
446 pub fn insert_from_iter<I>(&mut self, rows: I, batch_size: usize) -> Result<usize, StorageError>
447 where
448 I: Iterator<Item = Row>,
449 {
450 let batch_size = if batch_size == 0 { 1000 } else { batch_size };
451 let mut total_inserted = 0;
452 let mut batch = Vec::with_capacity(batch_size);
453
454 for row in rows {
455 batch.push(row);
456
457 if batch.len() >= batch_size {
458 let count = self.insert_batch(std::mem::take(&mut batch))?;
459 total_inserted += count;
460 batch = Vec::with_capacity(batch_size);
461 }
462 }
463
464 // Insert any remaining rows
465 if !batch.is_empty() {
466 let count = self.insert_batch(batch)?;
467 total_inserted += count;
468 }
469
470 Ok(total_inserted)
471 }
472
473 /// Get all rows for scanning
474 ///
475 /// Returns a slice of all rows in the table. For tables with a deletion bitmap,
476 /// this returns the raw storage which may include deleted rows.
477 ///
478 /// **Important**: For operations that need to skip deleted rows, use `scan_live()`
479 /// which filters deleted rows automatically.
480 pub fn scan(&self) -> &[Row] {
481 &self.rows
482 }
483
484 /// Check if a row at the given index is deleted
485 #[inline]
486 pub fn is_row_deleted(&self, idx: usize) -> bool {
487 idx < self.deleted.len() && self.deleted[idx]
488 }
489
490 /// Iterate over live (non-deleted) rows with their physical indices
491 ///
492 /// This is the preferred way to scan table data, as it automatically
493 /// skips rows that have been deleted but not yet compacted.
494 ///
495 /// # Returns
496 /// An iterator yielding `(physical_index, &Row)` pairs for all live rows.
497 ///
498 /// # Example
499 /// ```text
500 /// for (idx, row) in table.scan_live() {
501 /// // idx is the physical index, can be used with get_row() or delete_by_indices()
502 /// process_row(idx, row);
503 /// }
504 /// ```
505 #[inline]
506 pub fn scan_live(&self) -> impl Iterator<Item = (usize, &Row)> {
507 self.rows.iter().enumerate().filter(|(idx, _)| !self.deleted[*idx])
508 }
509
510 /// Scan only live (non-deleted) rows, returning an owned Vec.
511 ///
512 /// This method provides an efficient way to get all live rows as a Vec<Row>
513 /// for executor paths that need owned data. Unlike `scan()` which returns
514 /// all rows including deleted ones, this method filters out deleted rows.
515 ///
516 /// # Performance
517 /// O(n) time and space where n is the number of live rows.
518 /// Pre-allocates the exact capacity needed based on `row_count()`.
519 ///
520 /// # Returns
521 /// A Vec containing clones of all non-deleted rows.
522 ///
523 /// # Example
524 /// ```text
525 /// // For SELECT queries that need a Vec<Row>
526 /// let rows = table.scan_live_vec();
527 /// ```
528 #[inline]
529 pub fn scan_live_vec(&self) -> Vec<Row> {
530 let mut result = Vec::with_capacity(self.row_count());
531 for (idx, row) in self.rows.iter().enumerate() {
532 if !self.deleted[idx] {
533 let mut cloned = row.clone();
534 // Set row_id for ROWID pseudo-column support (SQLite compatibility)
535 // If the row already has an explicit row_id (from INSERT INTO t(rowid,...)),
536 // preserve it. Otherwise, use 1-indexed physical index.
537 if cloned.row_id.is_none() {
538 cloned.row_id = Some((idx + 1) as u64);
539 }
540 result.push(cloned);
541 }
542 }
543 result
544 }
545
546 /// Get a single row by index position (O(1) access)
547 ///
548 /// Returns None if the row is deleted or index is out of bounds.
549 ///
550 /// # Arguments
551 /// * `idx` - The row index position (physical index)
552 ///
553 /// # Returns
554 /// * `Some(&Row)` - The row at the given index if it exists and is not deleted
555 /// * `None` - If the index is out of bounds or row is deleted
556 #[inline]
557 pub fn get_row(&self, idx: usize) -> Option<&Row> {
558 if idx < self.deleted.len() && self.deleted[idx] {
559 return None;
560 }
561 self.rows.get(idx)
562 }
563
564 /// Scan table data in columnar format for SIMD-accelerated processing
565 ///
566 /// This method returns columnar data suitable for high-performance analytical queries.
567 /// Unlike `scan()` which returns row-oriented data, this method returns column-oriented
568 /// data that enables:
569 ///
570 /// - **SIMD vectorization**: Process 4-8 values per CPU instruction
571 /// - **Cache efficiency**: Contiguous column data improves memory access patterns
572 /// - **Type specialization**: Avoid SqlValue enum matching overhead
573 ///
574 /// # Performance
575 ///
576 /// For **native columnar tables**: Zero conversion overhead - returns data directly.
577 /// For **row tables**: O(n * m) conversion cost per call.
578 ///
579 /// # Caching
580 ///
581 /// This method does not cache results. For cached columnar access with LRU eviction,
582 /// use `Database::get_columnar()` which provides Arc-based sharing across queries.
583 ///
584 /// # Returns
585 ///
586 /// * `Ok(ColumnarTable)` - Columnar representation of the table data
587 /// * `Err(StorageError)` - If conversion fails due to type mismatches
588 ///
589 /// # Example
590 ///
591 /// ```text
592 /// let columnar = table.scan_columnar()?;
593 /// // Process with SIMD-accelerated operations
594 /// if let Some(ColumnData::Int64 { values, nulls }) = columnar.get_column("quantity") {
595 /// // SIMD filtering on values slice
596 /// }
597 /// ```
598 pub fn scan_columnar(&self) -> Result<crate::ColumnarTable, StorageError> {
599 // For native columnar tables, return data directly (zero conversion overhead)
600 if let Some(ref native) = self.native_columnar {
601 return Ok(native.clone());
602 }
603
604 // For row tables, perform fresh conversion each time
605 // Note: Caching is now handled at the Database level via Database::get_columnar()
606 // which provides LRU eviction and Arc-based sharing across queries.
607
608 // Get column names from schema
609 let column_names: Vec<String> =
610 self.schema.columns.iter().map(|c| c.name.clone()).collect();
611
612 // Collect only live rows for columnar conversion
613 let live_rows: Vec<&Row> = self
614 .rows
615 .iter()
616 .enumerate()
617 .filter(|(idx, _)| !self.deleted[*idx])
618 .map(|(_, row)| row)
619 .collect();
620
621 // Convert rows to columnar format
622 crate::ColumnarTable::from_row_refs(&live_rows, &column_names)
623 .map_err(|e| StorageError::Other(format!("Columnar conversion failed: {}", e)))
624 }
625
626 /// Get number of live (non-deleted) rows
627 pub fn row_count(&self) -> usize {
628 self.rows.len() - self.deleted_count
629 }
630
631 /// Get total number of rows including deleted ones (physical storage size)
632 #[inline]
633 pub fn physical_row_count(&self) -> usize {
634 self.rows.len()
635 }
636
637 /// Get count of deleted (logically removed) rows
638 ///
639 /// This is used for DML cost estimation, as tables with many deleted rows
640 /// may have degraded performance for UPDATE/DELETE operations.
641 #[inline]
642 pub fn deleted_count(&self) -> usize {
643 self.deleted_count
644 }
645
646 /// Get table statistics, computing if necessary
647 ///
648 /// Statistics are computed lazily on first access and cached.
649 /// They are marked stale after significant data changes (> 10% of rows).
650 pub fn statistics(&mut self) -> &crate::statistics::TableStatistics {
651 if self.statistics.is_none() || self.statistics.as_ref().unwrap().needs_refresh() {
652 self.statistics =
653 Some(crate::statistics::TableStatistics::compute(&self.rows, &self.schema));
654 self.modifications_since_stats = 0;
655 }
656
657 self.statistics.as_ref().unwrap()
658 }
659
660 /// Get cached table statistics without computing
661 ///
662 /// Returns None if statistics have never been computed or are stale.
663 /// Use `statistics()` if you want to compute/refresh statistics.
664 pub fn get_statistics(&self) -> Option<&crate::statistics::TableStatistics> {
665 self.statistics.as_ref()
666 }
667
668 /// Force recomputation of statistics (ANALYZE command)
669 pub fn analyze(&mut self) {
670 self.statistics =
671 Some(crate::statistics::TableStatistics::compute(&self.rows, &self.schema));
672 self.modifications_since_stats = 0;
673 }
674
675 /// Check if table is in append mode (sequential inserts detected)
676 /// When true, constraint checks can skip duplicate lookups for optimization
677 pub fn is_in_append_mode(&self) -> bool {
678 self.append_tracker.is_active()
679 }
680
681 /// Clear all rows
682 pub fn clear(&mut self) {
683 self.rows.clear();
684 self.deleted.clear();
685 self.deleted_count = 0;
686 // Clear indexes (delegate to IndexManager)
687 self.indexes.clear();
688 // Reset append mode tracking
689 self.append_tracker.reset();
690 // Clear native columnar if present
691 // Note: Database-level columnar cache invalidation is handled by the executor
692 if self.native_columnar.is_some() {
693 let column_names: Vec<String> =
694 self.schema.columns.iter().map(|c| c.name.clone()).collect();
695 self.native_columnar = Some(
696 crate::ColumnarTable::from_rows(&[], &column_names)
697 .expect("Creating empty columnar table should never fail"),
698 );
699 }
700 }
701
702 /// Update a row at the specified index
703 pub fn update_row(&mut self, index: usize, row: Row) -> Result<(), StorageError> {
704 if index >= self.rows.len() {
705 return Err(StorageError::ColumnIndexOutOfBounds { index });
706 }
707
708 // Cannot update a deleted row
709 if self.deleted[index] {
710 return Err(StorageError::RowNotFound);
711 }
712
713 // Normalize and validate row
714 let normalizer = RowNormalizer::new(&self.schema);
715 let normalized_row = normalizer.normalize_and_validate(row)?;
716
717 // Get old row for index updates (clone to avoid borrow issues)
718 let old_row = self.rows[index].clone();
719
720 // Update the row
721 self.rows[index] = normalized_row.clone();
722
723 // Update indexes (delegate to IndexManager)
724 self.indexes.update_for_update(&self.schema, &old_row, &normalized_row, index);
725
726 // For native columnar tables, rebuild columnar data
727 // Note: Database-level columnar cache invalidation is handled by the executor
728 if self.native_columnar.is_some() {
729 self.rebuild_native_columnar()?;
730 }
731
732 Ok(())
733 }
734
735 /// Update a row with selective index maintenance
736 ///
737 /// Only updates indexes that reference changed columns, providing significant
738 /// performance improvement for tables with many indexes when updating non-indexed columns.
739 ///
740 /// # Arguments
741 /// * `index` - Row index to update
742 /// * `row` - New row data
743 /// * `changed_columns` - Set of column indices that were modified
744 ///
745 /// # Returns
746 /// * `Ok(())` on success
747 /// * `Err(StorageError)` if index out of bounds or column count mismatch
748 pub fn update_row_selective(
749 &mut self,
750 index: usize,
751 row: Row,
752 changed_columns: &std::collections::HashSet<usize>,
753 ) -> Result<(), StorageError> {
754 if index >= self.rows.len() {
755 return Err(StorageError::ColumnIndexOutOfBounds { index });
756 }
757
758 // Cannot update a deleted row
759 if self.deleted[index] {
760 return Err(StorageError::RowNotFound);
761 }
762
763 // Normalize and validate row
764 let normalizer = RowNormalizer::new(&self.schema);
765 let normalized_row = normalizer.normalize_and_validate(row)?;
766
767 // Get old row for index updates (clone to avoid borrow issues)
768 let old_row = self.rows[index].clone();
769
770 // Determine which indexes are affected by the changed columns (delegate to IndexManager)
771 let affected_indexes = self.indexes.get_affected_indexes(&self.schema, changed_columns);
772
773 // Update only affected indexes BEFORE replacing row (delegate to IndexManager)
774 self.indexes.update_selective(
775 &self.schema,
776 &old_row,
777 &normalized_row,
778 index,
779 &affected_indexes,
780 );
781
782 // Update the row (move ownership, no clone needed)
783 self.rows[index] = normalized_row;
784
785 // For native columnar tables, rebuild columnar data
786 // Note: Database-level columnar cache invalidation is handled by the executor
787 if self.native_columnar.is_some() {
788 self.rebuild_native_columnar()?;
789 }
790
791 Ok(())
792 }
793
794 /// Fast path update for pre-validated rows
795 ///
796 /// This variant skips normalization/validation, assuming the caller has already
797 /// validated the row data. Use for performance-critical UPDATE paths where
798 /// validation was done at the executor level.
799 ///
800 /// # Arguments
801 /// * `index` - Row index to update
802 /// * `new_row` - Pre-validated new row data (ownership transferred)
803 /// * `old_row` - Reference to old row for index updates
804 /// * `changed_columns` - Set of column indices that were modified
805 ///
806 /// # Safety
807 /// Caller must ensure row data is valid (correct column count, types, constraints)
808 #[inline]
809 pub fn update_row_unchecked(
810 &mut self,
811 index: usize,
812 new_row: Row,
813 old_row: &Row,
814 changed_columns: &std::collections::HashSet<usize>,
815 ) {
816 // Determine which indexes are affected by the changed columns
817 let affected_indexes = self.indexes.get_affected_indexes(&self.schema, changed_columns);
818
819 // Update affected indexes BEFORE replacing row
820 self.indexes.update_selective(&self.schema, old_row, &new_row, index, &affected_indexes);
821
822 // Update the row (direct move, no validation)
823 self.rows[index] = new_row;
824
825 // Note: Database-level columnar cache invalidation is handled by the executor
826 }
827
828 /// Update a single column value in-place without cloning the row
829 ///
830 /// This is the fastest possible update path for non-indexed columns:
831 /// - No row cloning (direct in-place modification)
832 /// - No index updates (caller must verify column is not indexed)
833 /// - No validation (caller must pre-validate the value)
834 ///
835 /// # Arguments
836 ///
837 /// * `row_index` - Index of the row to update
838 /// * `col_index` - Index of the column to update
839 /// * `new_value` - The new value for the column
840 ///
841 /// # Safety
842 ///
843 /// Caller must ensure:
844 /// - The column is NOT indexed (no internal or user-defined indexes)
845 /// - The value satisfies all constraints (NOT NULL, type, etc.)
846 #[inline]
847 pub fn update_column_inplace(
848 &mut self,
849 row_index: usize,
850 col_index: usize,
851 new_value: vibesql_types::SqlValue,
852 ) {
853 self.rows[row_index].values[col_index] = new_value;
854
855 // Note: Database-level columnar cache invalidation is handled by the executor
856 }
857
858 /// Delete rows matching a predicate
859 ///
860 /// Uses O(1) bitmap marking for each deleted row instead of O(n) Vec::remove().
861 ///
862 /// # Returns
863 /// [`DeleteResult`] containing the count of deleted rows and whether compaction occurred.
864 pub fn delete_where<F>(&mut self, mut predicate: F) -> DeleteResult
865 where
866 F: FnMut(&Row) -> bool,
867 {
868 // Collect indices of rows to delete (skip already-deleted rows)
869 let mut indices_to_delete: Vec<usize> = Vec::new();
870 for (index, row) in self.rows.iter().enumerate() {
871 if !self.deleted[index] && predicate(row) {
872 indices_to_delete.push(index);
873 }
874 }
875
876 if indices_to_delete.is_empty() {
877 return DeleteResult::new(0, false);
878 }
879
880 // Use the optimized delete_by_indices which uses bitmap marking
881 self.delete_by_indices(&indices_to_delete)
882 }
883
884 /// Remove a specific row (used for transaction undo)
885 /// Returns error if row not found
886 ///
887 /// Uses O(1) bitmap marking instead of O(n) Vec::remove().
888 ///
889 /// Note: This method does not return compaction status since it's used
890 /// internally for transaction rollback where index consistency is handled
891 /// at a higher level.
892 pub fn remove_row(&mut self, target_row: &Row) -> Result<(), StorageError> {
893 // Find the first matching non-deleted row
894 for (idx, row) in self.rows.iter().enumerate() {
895 if !self.deleted[idx] && row == target_row {
896 // Use delete_by_indices for consistent behavior
897 // Note: We ignore compaction status here since transaction rollback
898 // handles index consistency at the transaction layer
899 let _ = self.delete_by_indices(&[idx]);
900 return Ok(());
901 }
902 }
903 Err(StorageError::RowNotFound)
904 }
905
906 /// Delete rows by known indices (fast path - no scanning required)
907 ///
908 /// Uses O(1) bitmap marking instead of O(n) Vec::remove(). Rows are marked
909 /// as deleted but remain in the vector until compaction is triggered.
910 ///
911 /// # Arguments
912 /// * `indices` - Indices of rows to delete, need not be sorted
913 ///
914 /// # Returns
915 /// [`DeleteResult`] containing:
916 /// - `deleted_count`: Number of rows deleted
917 /// - `compacted`: Whether compaction occurred (row indices changed)
918 ///
919 /// # Important
920 ///
921 /// When `compacted` is true, all row indices in the table have changed.
922 /// User-defined indexes (B-tree indexes managed at the Database level)
923 /// must be rebuilt after compaction to maintain correctness.
924 ///
925 /// # Performance
926 /// O(d) where d = number of rows to delete, compared to O(d * n) for Vec::remove()
927 pub fn delete_by_indices(&mut self, indices: &[usize]) -> DeleteResult {
928 if indices.is_empty() {
929 return DeleteResult::new(0, false);
930 }
931
932 // Count valid, non-already-deleted indices
933 let mut deleted = 0;
934 for &idx in indices {
935 // Skip invalid or already-deleted indices
936 if idx >= self.rows.len() || self.deleted[idx] {
937 continue;
938 }
939
940 // Update indexes for this row BEFORE marking as deleted
941 let row = &self.rows[idx];
942 self.indexes.update_for_delete(&self.schema, row);
943
944 // Mark row as deleted - O(1) operation
945 self.deleted[idx] = true;
946 self.deleted_count += 1;
947 deleted += 1;
948 }
949
950 if deleted == 0 {
951 return DeleteResult::new(0, false);
952 }
953
954 // Check if compaction is needed (> 50% deleted)
955 // Compaction rebuilds the vectors without deleted rows
956 // NOTE: When compaction occurs, all row indices change and user-defined
957 // indexes (B-tree indexes) must be rebuilt by the caller
958 let compacted = if self.should_compact() {
959 self.compact();
960 true
961 } else {
962 false
963 };
964
965 // For native columnar tables, rebuild columnar data
966 // Note: Database-level columnar cache invalidation is handled by the executor
967 if self.native_columnar.is_some() {
968 let _ = self.rebuild_native_columnar();
969 }
970
971 DeleteResult::new(deleted, compacted)
972 }
973
974 /// Delete rows by known indices with batch-optimized internal index updates
975 ///
976 /// This is an optimized version of `delete_by_indices` that pre-computes
977 /// schema lookups for internal hash indexes, reducing overhead for multi-row
978 /// deletes by ~30-40%.
979 ///
980 /// # Arguments
981 /// * `indices` - Indices of rows to delete, need not be sorted
982 ///
983 /// # Returns
984 /// [`DeleteResult`] containing:
985 /// - `deleted_count`: Number of rows deleted
986 /// - `compacted`: Whether compaction occurred (row indices changed)
987 ///
988 /// # Performance
989 /// - Pre-computes PK/unique column indices once (O(1) vs O(d) schema lookups)
990 /// - Uses batch index updates for internal hash indexes
991 /// - Best for multi-row deletes; single-row deletes use `delete_by_indices`
992 pub fn delete_by_indices_batch(&mut self, indices: &[usize]) -> DeleteResult {
993 if indices.is_empty() {
994 return DeleteResult::new(0, false);
995 }
996
997 // For single-row deletes, use the standard path (no batch overhead)
998 if indices.len() == 1 {
999 return self.delete_by_indices(indices);
1000 }
1001
1002 // Phase 1: Collect valid rows to delete and their references
1003 // This avoids repeated bounds/deleted checks
1004 let mut valid_indices: Vec<usize> = Vec::with_capacity(indices.len());
1005 let mut rows_to_delete: Vec<&Row> = Vec::with_capacity(indices.len());
1006
1007 for &idx in indices {
1008 if idx < self.rows.len() && !self.deleted[idx] {
1009 valid_indices.push(idx);
1010 rows_to_delete.push(&self.rows[idx]);
1011 }
1012 }
1013
1014 if valid_indices.is_empty() {
1015 return DeleteResult::new(0, false);
1016 }
1017
1018 // Phase 2: Batch update internal hash indexes (pre-computes column indices once)
1019 self.indexes.batch_update_for_delete(&self.schema, &rows_to_delete);
1020
1021 // Phase 3: Mark rows as deleted
1022 let deleted = valid_indices.len();
1023 for idx in valid_indices {
1024 self.deleted[idx] = true;
1025 self.deleted_count += 1;
1026 }
1027
1028 // Phase 4: Check compaction and handle columnar
1029 let compacted = if self.should_compact() {
1030 self.compact();
1031 true
1032 } else {
1033 false
1034 };
1035
1036 // For native columnar tables, rebuild columnar data
1037 // (Row tables use Database::columnar_cache which is invalidated by executors)
1038 if self.native_columnar.is_some() {
1039 let _ = self.rebuild_native_columnar();
1040 }
1041
1042 DeleteResult::new(deleted, compacted)
1043 }
1044
1045 /// Check if the table should be compacted
1046 ///
1047 /// Compaction is triggered when more than 50% of rows are deleted.
1048 /// This prevents unbounded growth of deleted row storage.
1049 #[inline]
1050 fn should_compact(&self) -> bool {
1051 // Only compact if we have at least some rows and > 50% are deleted
1052 !self.rows.is_empty() && self.deleted_count > self.rows.len() / 2
1053 }
1054
1055 /// Compact the table by removing deleted rows
1056 ///
1057 /// This rebuilds the rows vector without deleted entries and rebuilds
1058 /// all indexes to point to the new positions.
1059 fn compact(&mut self) {
1060 if self.deleted_count == 0 {
1061 return;
1062 }
1063
1064 // Build new vectors with only live rows
1065 let mut new_rows = Vec::with_capacity(self.rows.len() - self.deleted_count);
1066 for (idx, row) in self.rows.iter().enumerate() {
1067 if !self.deleted[idx] {
1068 new_rows.push(row.clone());
1069 }
1070 }
1071
1072 // Replace old vectors with compacted ones
1073 self.rows = new_rows;
1074 self.deleted = vec![false; self.rows.len()];
1075 self.deleted_count = 0;
1076
1077 // Rebuild all indexes since row positions have changed
1078 self.indexes.rebuild(&self.schema, &self.rows);
1079 }
1080
1081 /// Check if a row at the given index is deleted
1082 #[inline]
1083 pub fn is_deleted(&self, idx: usize) -> bool {
1084 idx < self.deleted.len() && self.deleted[idx]
1085 }
1086
1087 /// Get mutable reference to rows
1088 pub fn rows_mut(&mut self) -> &mut Vec<Row> {
1089 &mut self.rows
1090 }
1091
1092 /// Get mutable reference to schema
1093 pub fn schema_mut(&mut self) -> &mut vibesql_catalog::TableSchema {
1094 &mut self.schema
1095 }
1096
1097 /// Get reference to primary key index
1098 pub fn primary_key_index(&self) -> Option<&std::collections::HashMap<Vec<SqlValue>, usize>> {
1099 self.indexes.primary_key_index()
1100 }
1101
1102 /// Get reference to unique constraint indexes
1103 pub fn unique_indexes(&self) -> &[std::collections::HashMap<Vec<SqlValue>, usize>] {
1104 self.indexes.unique_indexes()
1105 }
1106
1107 /// Rebuild all hash indexes from scratch
1108 /// Used after schema changes that add constraints (e.g., ALTER TABLE ADD PRIMARY KEY)
1109 pub fn rebuild_indexes(&mut self) {
1110 // Recreate the IndexManager to match the current schema
1111 // (in case constraints were added that didn't exist before)
1112 self.indexes = IndexManager::new(&self.schema);
1113
1114 // Rebuild indexes from existing rows
1115 self.indexes.rebuild(&self.schema, &self.rows);
1116 }
1117}
1118
1119#[cfg(test)]
1120mod tests {
1121 use vibesql_catalog::{ColumnSchema, TableSchema};
1122 use vibesql_types::{DataType, SqlValue};
1123
1124 use super::*;
1125
1126 fn create_test_table() -> Table {
1127 let columns = vec![
1128 ColumnSchema::new("id".to_string(), DataType::Integer, false),
1129 ColumnSchema::new("name".to_string(), DataType::Varchar { max_length: Some(50) }, true),
1130 ];
1131 let schema = TableSchema::with_primary_key(
1132 "test_table".to_string(),
1133 columns,
1134 vec!["id".to_string()],
1135 );
1136 Table::new(schema)
1137 }
1138
1139 fn create_row(id: i64, name: &str) -> Row {
1140 Row::from_vec(vec![SqlValue::Integer(id), SqlValue::Varchar(arcstr::ArcStr::from(name))])
1141 }
1142
1143 #[test]
1144 fn test_append_mode_integration() {
1145 let mut table = create_test_table();
1146 assert!(!table.is_in_append_mode());
1147
1148 // Sequential inserts should activate append mode
1149 table.insert(create_row(1, "Alice")).unwrap();
1150 table.insert(create_row(2, "Bob")).unwrap();
1151 table.insert(create_row(3, "Charlie")).unwrap();
1152 table.insert(create_row(4, "David")).unwrap();
1153 assert!(table.is_in_append_mode());
1154
1155 // Clear should reset
1156 table.clear();
1157 assert!(!table.is_in_append_mode());
1158 }
1159
1160 #[test]
1161 fn test_scan_columnar() {
1162 let mut table = create_test_table();
1163
1164 // Insert test data
1165 table.insert(create_row(1, "Alice")).unwrap();
1166 table.insert(create_row(2, "Bob")).unwrap();
1167 table.insert(create_row(3, "Charlie")).unwrap();
1168
1169 // Convert to columnar format
1170 let columnar = table.scan_columnar().unwrap();
1171
1172 // Verify row count
1173 assert_eq!(columnar.row_count(), 3);
1174 assert_eq!(columnar.column_count(), 2);
1175
1176 // Verify column data - id column
1177 let id_col = columnar.get_column("id").expect("id column should exist");
1178 assert_eq!(id_col.len(), 3);
1179 assert!(!id_col.is_null(0));
1180 assert!(!id_col.is_null(1));
1181 assert!(!id_col.is_null(2));
1182
1183 // Verify column data - name column
1184 let name_col = columnar.get_column("name").expect("name column should exist");
1185 assert_eq!(name_col.len(), 3);
1186 }
1187
1188 #[test]
1189 fn test_scan_columnar_empty_table() {
1190 let table = create_test_table();
1191
1192 // Convert empty table to columnar format
1193 let columnar = table.scan_columnar().unwrap();
1194
1195 // Verify empty result
1196 assert_eq!(columnar.row_count(), 0);
1197 assert_eq!(columnar.column_count(), 2); // Schema defines 2 columns
1198 }
1199
1200 #[test]
1201 fn test_scan_columnar_with_nulls() {
1202 let columns = vec![
1203 ColumnSchema::new("id".to_string(), DataType::Integer, false),
1204 ColumnSchema::new("value".to_string(), DataType::Integer, true), // nullable
1205 ];
1206 let schema = TableSchema::new("test_nulls".to_string(), columns);
1207 let mut table = Table::new(schema);
1208
1209 // Insert rows with NULL values
1210 table.insert(Row::from_vec(vec![SqlValue::Integer(1), SqlValue::Integer(100)])).unwrap();
1211 table.insert(Row::from_vec(vec![SqlValue::Integer(2), SqlValue::Null])).unwrap();
1212 table.insert(Row::from_vec(vec![SqlValue::Integer(3), SqlValue::Integer(300)])).unwrap();
1213
1214 // Convert to columnar format
1215 let columnar = table.scan_columnar().unwrap();
1216
1217 // Verify NULL handling
1218 let value_col = columnar.get_column("value").expect("value column should exist");
1219 assert!(!value_col.is_null(0)); // 100
1220 assert!(value_col.is_null(1)); // NULL
1221 assert!(!value_col.is_null(2)); // 300
1222 }
1223
1224 // ========================================================================
1225 // Bulk Insert Tests
1226 // ========================================================================
1227
1228 #[test]
1229 fn test_insert_batch_basic() {
1230 let mut table = create_test_table();
1231
1232 let rows = vec![create_row(1, "Alice"), create_row(2, "Bob"), create_row(3, "Charlie")];
1233
1234 let count = table.insert_batch(rows).unwrap();
1235
1236 assert_eq!(count, 3);
1237 assert_eq!(table.row_count(), 3);
1238
1239 // Verify data
1240 let scanned: Vec<_> = table.scan().to_vec();
1241 assert_eq!(scanned[0].values[0], SqlValue::Integer(1));
1242 assert_eq!(scanned[1].values[0], SqlValue::Integer(2));
1243 assert_eq!(scanned[2].values[0], SqlValue::Integer(3));
1244 }
1245
1246 #[test]
1247 fn test_insert_batch_empty() {
1248 let mut table = create_test_table();
1249
1250 let count = table.insert_batch(Vec::new()).unwrap();
1251
1252 assert_eq!(count, 0);
1253 assert_eq!(table.row_count(), 0);
1254 }
1255
1256 #[test]
1257 fn test_insert_batch_preserves_indexes() {
1258 let mut table = create_test_table();
1259
1260 let rows = vec![create_row(1, "Alice"), create_row(2, "Bob"), create_row(3, "Charlie")];
1261
1262 table.insert_batch(rows).unwrap();
1263
1264 // Primary key index should exist and have 3 entries
1265 assert!(table.primary_key_index().is_some());
1266 let pk_index = table.primary_key_index().unwrap();
1267 assert_eq!(pk_index.len(), 3);
1268
1269 // Each PK should map to correct row index
1270 assert_eq!(pk_index.get(&vec![SqlValue::Integer(1)]), Some(&0));
1271 assert_eq!(pk_index.get(&vec![SqlValue::Integer(2)]), Some(&1));
1272 assert_eq!(pk_index.get(&vec![SqlValue::Integer(3)]), Some(&2));
1273 }
1274
1275 #[test]
1276 fn test_insert_batch_columnar_scan_includes_new_rows() {
1277 let mut table = create_test_table();
1278
1279 // Insert some initial rows
1280 table.insert(create_row(1, "Alice")).unwrap();
1281 let _ = table.scan_columnar().unwrap();
1282
1283 // Batch insert more rows
1284 let rows = vec![create_row(2, "Bob"), create_row(3, "Charlie")];
1285 table.insert_batch(rows).unwrap();
1286
1287 // Columnar scan should reflect all rows
1288 let columnar = table.scan_columnar().unwrap();
1289 assert_eq!(columnar.row_count(), 3);
1290 }
1291
1292 #[test]
1293 fn test_insert_batch_validation_failure_is_atomic() {
1294 let mut table = create_test_table();
1295
1296 // Insert valid row first
1297 table.insert(create_row(1, "Alice")).unwrap();
1298
1299 // Try to batch insert with one invalid row (wrong column count)
1300 let rows = vec![
1301 Row::new(vec![SqlValue::Integer(2), SqlValue::Varchar(arcstr::ArcStr::from("Bob"))]),
1302 Row::new(vec![SqlValue::Integer(3)]), // Invalid - missing column
1303 ];
1304
1305 let result = table.insert_batch(rows);
1306 assert!(result.is_err());
1307
1308 // Table should still have only 1 row (atomic failure)
1309 assert_eq!(table.row_count(), 1);
1310 }
1311
1312 #[test]
1313 fn test_insert_batch_large() {
1314 let mut table = create_test_table();
1315
1316 // Insert 10000 rows in a batch
1317 let rows: Vec<Row> = (0..10_000).map(|i| create_row(i, &format!("User{}", i))).collect();
1318
1319 let count = table.insert_batch(rows).unwrap();
1320
1321 assert_eq!(count, 10_000);
1322 assert_eq!(table.row_count(), 10_000);
1323
1324 // Verify first and last rows
1325 let scanned = table.scan();
1326 assert_eq!(scanned[0].values[0], SqlValue::Integer(0));
1327 assert_eq!(scanned[9999].values[0], SqlValue::Integer(9999));
1328 }
1329
1330 #[test]
1331 fn test_insert_from_iter_basic() {
1332 let mut table = create_test_table();
1333
1334 let rows = (0..100).map(|i| create_row(i, &format!("User{}", i)));
1335
1336 let count = table.insert_from_iter(rows, 10).unwrap();
1337
1338 assert_eq!(count, 100);
1339 assert_eq!(table.row_count(), 100);
1340 }
1341
1342 #[test]
1343 fn test_insert_from_iter_default_batch_size() {
1344 let mut table = create_test_table();
1345
1346 let rows = (0..50).map(|i| create_row(i, &format!("User{}", i)));
1347
1348 // batch_size=0 should use default of 1000
1349 let count = table.insert_from_iter(rows, 0).unwrap();
1350
1351 assert_eq!(count, 50);
1352 assert_eq!(table.row_count(), 50);
1353 }
1354
1355 #[test]
1356 fn test_insert_from_iter_partial_final_batch() {
1357 let mut table = create_test_table();
1358
1359 // 25 rows with batch size 10 = 2 full batches + 5 remaining
1360 let rows = (0..25).map(|i| create_row(i, &format!("User{}", i)));
1361
1362 let count = table.insert_from_iter(rows, 10).unwrap();
1363
1364 assert_eq!(count, 25);
1365 assert_eq!(table.row_count(), 25);
1366 }
1367
1368 #[test]
1369 fn test_insert_batch_after_single_inserts() {
1370 let mut table = create_test_table();
1371
1372 // Single inserts first
1373 table.insert(create_row(1, "Alice")).unwrap();
1374 table.insert(create_row(2, "Bob")).unwrap();
1375
1376 // Then batch insert
1377 let rows = vec![create_row(3, "Charlie"), create_row(4, "David")];
1378 table.insert_batch(rows).unwrap();
1379
1380 assert_eq!(table.row_count(), 4);
1381
1382 // Verify indexes are correct
1383 let pk_index = table.primary_key_index().unwrap();
1384 assert_eq!(pk_index.get(&vec![SqlValue::Integer(1)]), Some(&0));
1385 assert_eq!(pk_index.get(&vec![SqlValue::Integer(2)]), Some(&1));
1386 assert_eq!(pk_index.get(&vec![SqlValue::Integer(3)]), Some(&2));
1387 assert_eq!(pk_index.get(&vec![SqlValue::Integer(4)]), Some(&3));
1388 }
1389}