cqlite-core 0.11.0

Core engine for CQLite — read Apache Cassandra 5.0 SSTables locally without a cluster
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
//! CQLite Core Database Engine
//!
//! A high-performance, embeddable database engine with SSTable-based storage,
//! supporting both native and WASM deployments.

pub mod config;
pub mod cql;
pub mod error;
pub mod parser;
// DISABLED FOR M1: Security and performance modules causing compilation errors
// pub mod performance;
// pub mod security; // Security framework for comprehensive protection
pub mod types;
pub mod util;
pub mod version_hints;

pub mod benchmarks;
pub mod memory;
pub mod platform;
#[cfg(feature = "state_machine")]
pub mod query;
pub mod schema;
pub mod storage;

// Embeddable export writers (Epic #682). The module is always present; the
// Parquet writer inside it is gated behind the optional `parquet` feature.
pub mod export;

// M5: Write engine and serialization modules (Issue #359)
// Re-exported at crate level for convenience when write-support is enabled
#[cfg(feature = "write-support")]
pub use storage::serialization;
#[cfg(feature = "write-support")]
pub use storage::write_engine;

// Ingestion module for one-shot schema & SSTable discovery (Issue #249: CLI-specific)
#[cfg(feature = "cli-helpers")]
pub mod ingestion;

// Discovery module for SSTable scanning and coverage analysis
#[cfg(feature = "state_machine")]
pub mod discovery;

// Testing utilities - hidden from public docs via #[doc(hidden)] but available for integration tests
#[doc(hidden)]
pub mod testing;

// NOTE: memory_safety_runner moved to tools/memory-safety-runner (Issue #245)
// NOTE: memory_safety_tests disabled - MemTable removed in Issue #175

// Re-export main types for convenience
pub use crate::{
    config::Config,
    error::{Error, Result},
    platform::Platform,
    types::*,
};

// Re-export query types when state_machine feature is enabled
#[cfg(feature = "state_machine")]
pub use query::SchemaStatus;

use std::path::Path;
#[cfg(feature = "state_machine")]
use std::path::PathBuf;
use std::sync::Arc;

use crate::{memory::MemoryManager, storage::StorageEngine};

#[cfg(feature = "state_machine")]
use crate::schema::SchemaManager;

#[cfg(feature = "state_machine")]
use crate::query::QueryEngine;

/// Main database handle
///
/// This is the primary interface for interacting with a CQLite database.
/// It coordinates between the storage engine, schema manager, and query engine.
#[derive(Debug)]
pub struct Database {
    storage: Arc<StorageEngine>,
    #[cfg(feature = "state_machine")]
    query: Arc<QueryEngine>,
    memory: Arc<MemoryManager>,
    config: Config,
}

impl Database {
    /// Open a database at the given path with the specified configuration
    ///
    /// # Arguments
    ///
    /// * `path` - The directory path where the database files will be stored
    /// * `config` - Database configuration options
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - The path cannot be created or accessed
    /// - Database files are corrupted
    /// - Configuration is invalid
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// use cqlite_core::{Database, Config};
    /// use std::path::{Path, PathBuf};
    ///
    /// # tokio_test::block_on(async {
    /// let config = Config::default();
    /// let db = Database::open(Path::new("./data"), config).await?;
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// # });
    /// ```
    pub async fn open(path: &Path, config: Config) -> Result<Self> {
        // Initialize platform abstraction layer
        let platform = Arc::new(Platform::new(&config).await?);

        // Initialize memory manager
        let memory = Arc::new(MemoryManager::new(&config)?);

        // Initialize storage engine (no schema registry for simple open)
        let storage = Arc::new(
            StorageEngine::open(
                path,
                &config,
                platform.clone(),
                #[cfg(feature = "state_machine")]
                None,
            )
            .await?,
        );

        // Initialize schema manager
        #[cfg(feature = "state_machine")]
        let schema = Arc::new(SchemaManager::new_with_storage(storage.clone(), &config).await?);

        // Initialize query engine (only when feature enabled)
        #[cfg(feature = "state_machine")]
        let query = Arc::new(QueryEngine::new(
            storage.clone(),
            schema.clone(),
            memory.clone(),
            &config,
        )?);

        Ok(Self {
            storage,
            #[cfg(feature = "state_machine")]
            query,
            memory,
            config,
        })
    }

    /// Open a database with pre-discovered SSTable table directories
    ///
    /// This method is used in the ingestion flow where SSTable discovery has been performed
    /// externally (e.g., via `DiscoveryService`) and the database should be initialized with
    /// specific SSTable files rather than scanning the storage directory.
    ///
    /// # Use Case
    ///
    /// This method is designed for the one-shot ingestion workflow:
    /// 1. `DiscoveryService::discover()` scans external Cassandra data directories
    /// 2. `SchemaManager` parses schema from discovered files
    /// 3. `Database::open_with_discovered_sstables()` creates a queryable database instance
    ///
    /// # Arguments
    ///
    /// * `storage_path` - The directory path for database runtime files (WAL, manifest, memtable)
    /// * `discovered_table_dirs` - Vector of table directory paths from DiscoveryService
    ///   (e.g., `/var/lib/cassandra/data/keyspace1/table1-abc123`)
    /// * `config` - Database configuration options
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - The storage path cannot be created or accessed
    /// - Any discovered table directory cannot be read
    /// - Configuration is invalid
    /// - Storage engine or query engine initialization fails
    ///
    /// # Feature Gates
    ///
    /// This method is only available when the `state_machine` feature is enabled (default in M2+).
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// use cqlite_core::{Database, Config};
    /// use std::path::{Path, PathBuf};
    ///
    /// # tokio_test::block_on(async {
    /// let config = Config::default();
    /// let storage_path = Path::new("./runtime");
    /// let discovered_dirs = vec![
    ///     PathBuf::from("/var/lib/cassandra/data/keyspace1/table1-abc123"),
    ///     PathBuf::from("/var/lib/cassandra/data/keyspace1/table2-def456"),
    /// ];
    ///
    /// let db = Database::open_with_discovered_sstables(
    ///     storage_path,
    ///     discovered_dirs,
    ///     config
    /// ).await?;
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// # });
    /// ```
    #[cfg(feature = "state_machine")]
    pub async fn open_with_discovered_sstables(
        storage_path: &Path,
        discovered_table_dirs: Vec<PathBuf>,
        config: Config,
    ) -> Result<Self> {
        Self::open_with_discovered_sstables_and_registry(
            storage_path,
            discovered_table_dirs,
            config,
            None,
        )
        .await
    }

    /// Open a database with pre-discovered SSTable table directories and optional schema registry
    ///
    /// This is the internal implementation that supports passing a pre-loaded schema registry.
    /// Public callers should use `open_with_discovered_sstables()` which calls this with None.
    /// The ingestion module uses this directly to pass loaded schemas.
    ///
    /// # Arguments
    ///
    /// * `storage_path` - The directory path for database runtime files
    /// * `discovered_table_dirs` - Vector of table directory paths from DiscoveryService
    /// * `config` - Database configuration options
    /// * `schema_registry` - Optional pre-loaded schema registry from ingestion
    #[cfg(feature = "state_machine")]
    pub(crate) async fn open_with_discovered_sstables_and_registry(
        storage_path: &Path,
        discovered_table_dirs: Vec<PathBuf>,
        config: Config,
        schema_registry: Option<Arc<tokio::sync::RwLock<schema::SchemaRegistry>>>,
    ) -> Result<Self> {
        // Initialize platform abstraction layer
        let platform = Arc::new(Platform::new(&config).await?);

        // Initialize memory manager
        let memory = Arc::new(MemoryManager::new(&config)?);

        // Initialize storage engine with pre-discovered SSTables and schema registry
        let storage = Arc::new(
            StorageEngine::open_with_sstables(
                storage_path,
                discovered_table_dirs,
                &config,
                platform.clone(),
                schema_registry.clone(),
            )
            .await?,
        );

        // Initialize schema manager - use registry if provided, otherwise create empty
        let schema = if let Some(registry_rwlock) = schema_registry {
            Arc::new(
                SchemaManager::new_with_registry(storage.clone(), registry_rwlock, &config).await?,
            )
        } else {
            Arc::new(SchemaManager::new_with_storage(storage.clone(), &config).await?)
        };

        // Initialize query engine
        let query = Arc::new(QueryEngine::new(
            storage.clone(),
            schema.clone(),
            memory.clone(),
            &config,
        )?);

        Ok(Self {
            storage,
            query,
            memory,
            config,
        })
    }

    /// Execute a SQL query and return the result
    ///
    /// # Arguments
    ///
    /// * `sql` - The SQL query string to execute
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - SQL syntax is invalid
    /// - Referenced tables/columns don't exist
    /// - Query execution fails
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// # use cqlite_core::{Database, Config};
    /// # use std::path::{Path, PathBuf};
    /// # tokio_test::block_on(async {
    /// # let config = Config::default();
    /// # let db = Database::open(Path::new("./data"), config).await?;
    /// let result = db.execute("SELECT * FROM users WHERE id = 1").await?;
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// # });
    /// ```
    #[cfg(feature = "state_machine")]
    pub async fn execute(&self, sql: &str) -> Result<query::result::QueryResult> {
        let result = self.query.execute(sql).await;

        #[cfg(debug_assertions)]
        if let Ok(ref query_result) = result {
            log::debug!(
                "Database::execute('{}') returning rows_affected: {}",
                sql,
                query_result.rows_affected
            );
        }

        result
    }

    /// Execute a SQL query with streaming results (Issue #280)
    ///
    /// Returns a `QueryResultIterator` that yields rows incrementally via a bounded
    /// channel, enabling memory-efficient processing of large result sets.
    ///
    /// This is the recommended method for exporting large tables, as it avoids
    /// materializing all rows in memory at once.
    ///
    /// # Arguments
    ///
    /// * `sql` - The SQL query to execute (must be a SELECT statement)
    /// * `config` - Streaming configuration (buffer size, chunk hints)
    ///
    /// # Errors
    ///
    /// Returns an error if:
    /// - Query is not a SELECT statement
    /// - SQL syntax is invalid
    /// - Query execution fails
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// # use cqlite_core::{Database, Config};
    /// # use cqlite_core::query::result::StreamingConfig;
    /// # use std::path::Path;
    /// # tokio_test::block_on(async {
    /// # let db = Database::open(Path::new("./data"), Config::default()).await?;
    /// let config = StreamingConfig::default();
    /// let mut iter = db.execute_streaming(
    ///     "SELECT * FROM large_table",
    ///     config
    /// ).await?;
    ///
    /// while let Some(row_result) = iter.next_async().await {
    ///     let row = row_result?;
    ///     // Process row incrementally
    /// }
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// # });
    /// ```
    #[cfg(feature = "state_machine")]
    pub async fn execute_streaming(
        &self,
        sql: &str,
        config: query::result::StreamingConfig,
    ) -> Result<query::result::QueryResultIterator> {
        self.query.execute_streaming(sql, config).await
    }

    /// Prepare a SQL statement for repeated execution
    ///
    /// # Arguments
    ///
    /// * `sql` - The SQL statement to prepare
    ///
    /// # Errors
    ///
    /// Returns an error if SQL syntax is invalid or references non-existent objects
    #[cfg(feature = "state_machine")]
    pub async fn prepare(&self, sql: &str) -> Result<std::sync::Arc<query::PreparedQuery>> {
        self.query.prepare(sql).await
    }

    /// Explain a SQL query without executing it
    ///
    /// # Arguments
    ///
    /// * `sql` - The SQL query to explain
    ///
    /// # Errors
    ///
    /// Returns an error if SQL syntax is invalid
    #[cfg(feature = "state_machine")]
    pub async fn explain(&self, sql: &str) -> Result<query::ExplainResult> {
        self.query.explain(sql).await
    }

    /// Check if schema is available for a table
    ///
    /// This is a fast boolean check useful for pre-flight validation.
    /// For detailed diagnostic information, use `schema_status()`.
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// # use cqlite_core::{Database, Config};
    /// # tokio_test::block_on(async {
    /// let db = Database::open(std::path::Path::new("./data"), Config::default()).await?;
    ///
    /// if !db.has_schema_for_table("users").await {
    ///     eprintln!("Warning: No schema found for 'users' table");
    /// }
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// # });
    /// ```
    #[cfg(feature = "state_machine")]
    pub async fn has_schema_for_table(&self, table: &str) -> bool {
        self.query.has_schema_for_table(table).await
    }

    /// Get detailed schema status for debugging
    ///
    /// Returns diagnostic information about schema availability including
    /// reasons for missing schemas or extraction failures.
    ///
    /// # Examples
    ///
    /// ```rust,no_run
    /// # use cqlite_core::{Database, Config};
    /// # use cqlite_core::query::SchemaStatus;
    /// # tokio_test::block_on(async {
    /// let db = Database::open(std::path::Path::new("./data"), Config::default()).await?;
    ///
    /// match db.schema_status("users").await {
    ///     SchemaStatus::Available { .. } => println!("Schema ready"),
    ///     SchemaStatus::ExtractionFailed { cause, suggestion, .. } => {
    ///         eprintln!("Schema extraction failed: {}", cause);
    ///         eprintln!("Suggestion: {}", suggestion);
    ///     }
    ///     _ => {}
    /// }
    /// # Ok::<(), Box<dyn std::error::Error>>(())
    /// # });
    /// ```
    #[cfg(feature = "state_machine")]
    pub async fn schema_status(&self, table: &str) -> query::SchemaStatus {
        self.query.schema_status(table).await
    }

    /// Get database statistics
    pub async fn stats(&self) -> Result<DatabaseStats> {
        Ok(DatabaseStats {
            storage_stats: self.storage.stats().await?,
            memory_stats: self.memory.stats()?,
            #[cfg(feature = "state_machine")]
            query_stats: self.query.stats(),
        })
    }

    /// Flush all pending writes to disk
    #[cfg(feature = "experimental")]
    pub async fn flush(&self) -> Result<()> {
        self.storage.flush().await
    }

    /// Perform manual compaction of storage files
    #[cfg(feature = "experimental")]
    pub async fn compact(&self) -> Result<()> {
        self.storage.compact().await
    }

    /// Shutdown the database storage engine without consuming self.
    ///
    /// This is useful for language bindings where the Database is wrapped
    /// in an Arc and cannot be consumed. The shutdown operation is idempotent.
    ///
    /// For consuming close that also drops the Database, use `close()`.
    pub async fn shutdown(&self) -> Result<()> {
        self.storage.shutdown().await
    }

    /// Close the database and release all resources
    ///
    /// This method ensures all pending operations are completed and
    /// all resources are properly cleaned up.
    pub async fn close(self) -> Result<()> {
        // Stop background tasks
        self.storage.shutdown().await?;

        // Flush any remaining data (only with experimental feature)
        #[cfg(feature = "experimental")]
        {
            self.storage.flush().await?;
        }

        Ok(())
    }

    /// Get the database configuration
    pub fn config(&self) -> &Config {
        &self.config
    }
}

impl Clone for Database {
    fn clone(&self) -> Self {
        Self {
            storage: self.storage.clone(),
            #[cfg(feature = "state_machine")]
            query: self.query.clone(),
            memory: self.memory.clone(),
            config: self.config.clone(),
        }
    }
}

/// Database statistics
#[derive(Debug, Clone)]
pub struct DatabaseStats {
    /// Storage engine statistics
    pub storage_stats: storage::StorageStats,
    /// Memory manager statistics
    pub memory_stats: memory::MemoryStats,
    /// Query engine statistics
    #[cfg(feature = "state_machine")]
    pub query_stats: query::QueryStats,
}

/// A prepared SQL statement that can be executed multiple times
#[cfg(feature = "state_machine")]
#[derive(Debug)]
pub struct PreparedStatement {
    statement: query::PreparedQuery,
}

#[cfg(feature = "state_machine")]
impl PreparedStatement {
    /// Execute the prepared statement with the given parameters
    pub async fn execute(&self, params: &[Value]) -> Result<query::result::QueryResult> {
        self.statement.execute(params).await
    }
}

// Re-export query result types for convenience
#[cfg(feature = "state_machine")]
pub use query::result::{QueryResult, QueryRow};

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    #[tokio::test]
    async fn test_database_open_close() {
        let temp_dir = TempDir::new().unwrap();
        let config = Config::test_config();

        let db = Database::open(temp_dir.path(), config).await.unwrap();
        db.close().await.unwrap();
    }

    /// Documents that open_with_discovered_sstables_and_registry is crate-private.
    /// This test exists to document the API contract - the function should NOT be
    /// callable from integration tests or external crates.
    #[cfg(feature = "state_machine")]
    #[test]
    fn test_open_with_discovered_sstables_and_registry_is_crate_private() {
        // This test compiling proves the function exists and is accessible within the crate
        // If we accidentally made it pub instead of pub(crate), integration tests could access it
        // The function signature itself enforces this via pub(crate) keyword

        // Note: We don't actually call the function here since it requires async setup
        // The mere existence of this test documents the API boundary
        assert!(
            true,
            "open_with_discovered_sstables_and_registry is correctly marked pub(crate)"
        );
    }

    #[tokio::test]
    #[cfg(feature = "state_machine")]
    async fn test_database_open_with_discovered_sstables() {
        let temp_dir = TempDir::new().unwrap();
        let config = Config::test_config();

        // Create an empty list of discovered table directories
        let discovered_dirs = Vec::new();

        let db = Database::open_with_discovered_sstables(temp_dir.path(), discovered_dirs, config)
            .await
            .unwrap();

        // Verify database was created successfully
        let stats = db.stats().await.unwrap();
        assert_eq!(stats.storage_stats.sstables.sstable_count, 0);

        db.close().await.unwrap();
    }

    #[tokio::test]
    #[cfg(all(
        feature = "legacy-heuristics",
        feature = "state_machine",
        feature = "experimental"
    ))]
    async fn test_database_basic_operations() {
        let temp_dir = TempDir::new().unwrap();
        let config = Config::test_config();

        let db = Database::open(temp_dir.path(), config).await.unwrap();

        // Create table
        let result = db
            .execute("CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT)")
            .await
            .unwrap();
        assert_eq!(result.rows_affected, 0);

        // Insert data
        let result = db
            .execute("INSERT INTO users (id, name) VALUES (1, 'Alice')")
            .await
            .unwrap();

        #[cfg(debug_assertions)]
        log::debug!(
            "Test INSERT assertion - rows_affected: {}",
            result.rows_affected
        );

        assert_eq!(result.rows_affected, 1);

        // Query data - Re-enabled for QA debugging
        let result = db
            .execute("SELECT * FROM users WHERE id = 1")
            .await
            .unwrap();

        #[cfg(debug_assertions)]
        log::debug!("Test SELECT assertion - rows.len(): {}", result.rows.len());

        assert_eq!(result.rows.len(), 1, "SELECT should return 1 row");

        db.close().await.unwrap();
    }
}