cqlite-core 0.11.0

Core engine for CQLite — read Apache Cassandra 5.0 SSTables locally without a cluster
Documentation
//! Integration tests for NB format SSTable reading
//!
//! These tests verify that NB format files (Cassandra 4.x+) can be opened and read correctly.
//! NB format files don't have magic numbers in the Data.db file - metadata is in separate components.

use cqlite_core::storage::sstable::reader::SSTableReader;
use cqlite_core::{Config, Platform};
use std::path::Path;
use std::sync::Arc;

/// Test that NB format files can be detected and opened
#[tokio::test]
async fn test_nb_format_detection_and_opening() {
    let config = Config::default();
    let platform = Arc::new(
        Platform::new(&config)
            .await
            .expect("Failed to create platform"),
    );

    // Use real NB format test data
    let datasets_root = std::env::var("CQLITE_DATASETS_ROOT")
        .expect("CQLITE_DATASETS_ROOT environment variable must be set");
    let test_path = Path::new(&datasets_root).join(
        "sstables/test_collections/collection_clustering_table-6bf78680a25111f0a3fef1a551383fb9/nb-1-big-Data.db"
    );

    if !test_path.exists() {
        println!(
            "⚠️  NB format test data not found at {:?}, skipping test",
            test_path
        );
        return;
    }

    // This should NOT fail with "unknown magic number" or "unsupported format" errors
    let result = SSTableReader::open(&test_path, &config, platform).await;

    match result {
        Ok(reader) => {
            println!("✅ NB format file opened successfully");

            // Verify the header was parsed correctly
            let header = reader.header();
            println!("   Keyspace: {}", header.keyspace);
            println!("   Table: {}", header.table_name);
            println!("   Version: {:?}", header.cassandra_version);
            println!("   Compression: {}", header.compression.algorithm);

            // Check if compression info was loaded
            if let Some(compression_info) = &reader.compression_info {
                println!("   CompressionInfo loaded:");
                println!("     Algorithm: {}", compression_info.algorithm);
                println!("     Chunk length: {}", compression_info.chunk_length);
                println!("     Chunk count: {}", compression_info.chunk_offsets.len());

                // Verify compression algorithm matches header (or is compatible)
                println!(
                    "   Header compression algorithm: {}",
                    header.compression.algorithm
                );
            } else {
                println!("   CompressionInfo not loaded (may be uncompressed or legacy format)");
            }
        }
        Err(e) => {
            panic!("❌ Failed to open NB format file: {}\nThis indicates that NB format detection is not working correctly.", e);
        }
    }
}

/// Test that NB format files can be read and data extracted
#[tokio::test]
async fn test_nb_format_data_reading() {
    let config = Config::default();
    let platform = Arc::new(
        Platform::new(&config)
            .await
            .expect("Failed to create platform"),
    );

    let datasets_root = std::env::var("CQLITE_DATASETS_ROOT")
        .expect("CQLITE_DATASETS_ROOT environment variable must be set");
    let test_path = Path::new(&datasets_root).join(
        "sstables/test_collections/collection_clustering_table-6bf78680a25111f0a3fef1a551383fb9/nb-1-big-Data.db"
    );

    if !test_path.exists() {
        println!("⚠️  NB format test data not found, skipping test");
        return;
    }

    let reader = SSTableReader::open(&test_path, &config, platform)
        .await
        .expect("Failed to open NB format file");

    // Try to read data entries
    // Note: This will use chunk-based reading internally for NB format
    let entries_result = reader.get_all_entries().await;

    match entries_result {
        Ok(entries) => {
            println!("✅ Read {} entries from NB format file", entries.len());

            if entries.is_empty() {
                println!(
                    "⚠️  No entries found - this might indicate chunk reading needs implementation"
                );
            } else {
                // Print first few entries for verification
                for (i, (table_id, key, value)) in entries.iter().take(3).enumerate() {
                    println!(
                        "   Entry {}: table_id={}, key={:?}, value={:?}",
                        i, table_id, key, value
                    );
                }
            }
        }
        Err(e) => {
            // It's okay if reading fails at this stage - we're primarily testing detection
            println!("⚠️  Failed to read entries from NB format file: {}", e);
            println!("   This is expected if ChunkReader integration is not yet complete.");
        }
    }
}

/// Test NB format with multiple test files
#[tokio::test]
async fn test_nb_format_multiple_files() {
    let config = Config::default();
    let platform = Arc::new(
        Platform::new(&config)
            .await
            .expect("Failed to create platform"),
    );

    // Test different NB format files from test data
    let test_cases = vec![
        "test_collections/collection_clustering_table-6bf78680a25111f0a3fef1a551383fb9/nb-1-big-Data.db",
        // Add more test cases as available
    ];

    let datasets_root = std::env::var("CQLITE_DATASETS_ROOT")
        .unwrap_or_else(|_| "/Users/patrick/local_projects/cqlite/test-data/datasets".to_string());

    for test_case in test_cases {
        let test_path = Path::new(&datasets_root).join("sstables").join(test_case);

        if !test_path.exists() {
            println!("⚠️  Test file not found: {:?}, skipping", test_path);
            continue;
        }

        println!("Testing NB format file: {:?}", test_path);

        let result = SSTableReader::open(&test_path, &config, platform.clone()).await;

        match result {
            Ok(reader) => {
                println!("  ✅ Opened successfully");
                let header = reader.header();
                println!("     Compression: {}", header.compression.algorithm);

                // Verify format detection
                assert!(
                    test_path.to_str().unwrap().contains("nb-"),
                    "Test file should be NB format"
                );
            }
            Err(e) => {
                panic!("  ❌ Failed to open NB format file {:?}: {}", test_path, e);
            }
        }
    }
}

/// Test that non-NB format files are not incorrectly detected as NB format
#[tokio::test]
async fn test_non_nb_format_files() {
    let config = Config::default();
    let platform = Arc::new(
        Platform::new(&config)
            .await
            .expect("Failed to create platform"),
    );

    // Test a regular (non-NB) format file
    let datasets_root = std::env::var("CQLITE_DATASETS_ROOT")
        .expect("CQLITE_DATASETS_ROOT environment variable must be set");
    let test_path = Path::new(&datasets_root)
        .join("sstables/test_basic/simple_table-d2e60a60a24e11f085a271be57d0abe2/na-1-big-Data.db");

    if !test_path.exists() {
        println!("⚠️  Non-NB test data not found, skipping test");
        return;
    }

    let result = SSTableReader::open(&test_path, &config, platform).await;

    match result {
        Ok(reader) => {
            println!("✅ Non-NB format file opened successfully");

            // Verify this is NOT treated as NB format
            let header = reader.header();
            println!("   Format version: {:?}", header.cassandra_version);

            // Non-NB files should have proper magic numbers and different parsing
            assert!(
                !test_path.to_str().unwrap().contains("nb-"),
                "This test should use non-NB format file"
            );
        }
        Err(e) => {
            println!("⚠️  Failed to open non-NB file: {}", e);
            // This is acceptable if the file format is not yet supported
        }
    }
}