rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
//! Test SQLite ContentStore Integration
//!
//! This test verifies:
//! 1. Large encrypted metadata is stored in SQLite (not Qdrant)
//! 2. Small filterable fields are stored in Qdrant
//! 3. Search combines data from both sources correctly
//! 4. No data loss during batch operations

use rag_module::RagModule;
use serde_json::json;
use std::time::Instant;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Initialize with test directory
    let test_dir = "./sqlite-test-data";
    std::fs::create_dir_all(test_dir)?;

    println!("๐Ÿงช Testing SQLite ContentStore Integration");
    println!("{}", "=".repeat(60));

    // Initialize RAG module
    let rag = RagModule::new(test_dir).await?;
    rag.initialize().await?;

    let user_id = "sqlite_test_user";
    let collection_name = "aws_estate";

    // Test 1: Single Document Ingestion
    println!("\n๐Ÿ“ Test 1: Single Document Ingestion with Large Metadata");
    println!("{}", "-".repeat(60));

    let large_metadata = json!({
        "KeyId": "eeb91ab6-aadc-4d60-a0e1-dcb7c56f0adf",
        "AccountId": "288761761556",
        "Region": "us-west-2",
        "Arn": "arn:aws:kms:us-west-2:288761761556:key/eeb91ab6-aadc-4d60-a0e1-dcb7c56f0adf",
        "CreationDate": "2023-05-15T10:30:00Z",
        "KeyManager": "CUSTOMER",
        "KeyState": "Enabled",
        "KeyUsage": "ENCRYPT_DECRYPT",
        "Origin": "AWS_KMS",
        "MultiRegion": false,
        "Description": "Production KMS key for data encryption",
        "Tags": [
            {"Key": "Environment", "Value": "Production"},
            {"Key": "Team", "Value": "Security"},
            {"Key": "Application", "Value": "DataEncryption"},
            {"Key": "CostCenter", "Value": "CC-12345"}
        ],
        "KeyPolicy": {
            "Version": "2012-10-17",
            "Statement": [
                {
                    "Sid": "Enable IAM User Permissions",
                    "Effect": "Allow",
                    "Principal": {"AWS": "arn:aws:iam::288761761556:root"},
                    "Action": "kms:*",
                    "Resource": "*"
                }
            ]
        },
        "Aliases": ["alias/prod-encryption-key"],
        "CustomerMasterKeySpec": "SYMMETRIC_DEFAULT",
        "EncryptionAlgorithms": ["SYMMETRIC_DEFAULT"],
        "AdditionalMetadata": {
            "LastRotated": "2024-05-15T10:30:00Z",
            "RotationEnabled": true,
            "RotationPeriodInDays": 365
        }
    });

    let doc1 = json!({
        "content": "KMS Key eeb91ab6-aadc-4d60-a0e1-dcb7c56f0adf in us-west-2 for production data encryption",
        "type": "kms-key",
        "region": "us-west-2",
        "accountId": "288761761556",
        "service": "kms",
        "resourceType": "key",
        "KeyState": "Enabled",
        // Merge all the large metadata
        "KeyId": large_metadata["KeyId"],
        "Arn": large_metadata["Arn"],
        "CreationDate": large_metadata["CreationDate"],
        "KeyManager": large_metadata["KeyManager"],
        "KeyUsage": large_metadata["KeyUsage"],
        "Origin": large_metadata["Origin"],
        "MultiRegion": large_metadata["MultiRegion"],
        "Description": large_metadata["Description"],
        "Tags": large_metadata["Tags"],
        "KeyPolicy": large_metadata["KeyPolicy"],
        "Aliases": large_metadata["Aliases"],
        "CustomerMasterKeySpec": large_metadata["CustomerMasterKeySpec"],
        "EncryptionAlgorithms": large_metadata["EncryptionAlgorithms"],
        "AdditionalMetadata": large_metadata["AdditionalMetadata"],
    });

    let result1 = rag.ingest_aws_estate(doc1.clone(), user_id, collection_name).await?;
    println!("โœ… Single document ingested: {} resources", result1.parsed_resources);

    // Test 2: Batch Document Ingestion (Large Scale)
    println!("\n๐Ÿ“ฆ Test 2: Batch Ingestion with 50 Documents");
    println!("{}", "-".repeat(60));

    let mut batch_docs = Vec::new();
    for i in 0..50 {
        let doc = json!({
            "content": format!("KMS Key key-{:016x} in {} for {} environment with {} encryption",
                i,
                if i % 3 == 0 { "us-west-2" } else if i % 3 == 1 { "us-east-1" } else { "eu-west-1" },
                if i % 2 == 0 { "production" } else { "staging" },
                if i % 2 == 0 { "AES-256" } else { "RSA-2048" }
            ),
            "type": "kms-key",
            "region": if i % 3 == 0 { "us-west-2" } else if i % 3 == 1 { "us-east-1" } else { "eu-west-1" },
            "accountId": "288761761556",
            "service": "kms",
            "resourceType": "key",
            "KeyState": if i % 5 == 0 { "Disabled" } else { "Enabled" },
            "KeyId": format!("key-{:016x}", i),
            "Arn": format!("arn:aws:kms:us-west-2:288761761556:key/key-{:016x}", i),
            "CreationDate": format!("2024-01-{:02}T10:30:00Z", (i % 28) + 1),
            "KeyManager": "CUSTOMER",
            "KeyUsage": "ENCRYPT_DECRYPT",
            "Origin": "AWS_KMS",
            "MultiRegion": i % 4 == 0,
            "Description": format!("KMS key #{} for data encryption", i),
            "Tags": [
                {"Key": "Environment", "Value": if i % 2 == 0 { "Production" } else { "Staging" }},
                {"Key": "Index", "Value": i.to_string()},
            ],
            "KeyPolicy": {
                "Version": "2012-10-17",
                "Statement": [{
                    "Effect": "Allow",
                    "Principal": {"AWS": "arn:aws:iam::288761761556:root"},
                    "Action": "kms:*",
                    "Resource": "*"
                }]
            }
        });
        batch_docs.push(doc);
    }

    let batch_start = Instant::now();
    let batch_result = rag.ingest_aws_estate_batch(batch_docs, user_id, collection_name).await?;
    let batch_duration = batch_start.elapsed();

    println!("โœ… Batch ingestion complete!");
    println!("   ๐Ÿ“Š Total documents: {}", batch_result.total_resources);
    println!("   โœ… Successfully processed: {}", batch_result.parsed_resources);
    println!("   โŒ Failed: {}", batch_result.failed_resources);
    println!("   โšก Time taken: {:?}", batch_duration);
    println!("   ๐Ÿ“ˆ Throughput: {:.2} docs/sec",
        batch_result.parsed_resources as f64 / batch_duration.as_secs_f64());

    // Test 3: Verify SQLite ContentStore Statistics
    println!("\n๐Ÿ“Š Test 3: ContentStore Statistics");
    println!("{}", "-".repeat(60));

    let stats = rag.content_store.get_stats().await?;
    println!("โœ… ContentStore Statistics:");
    println!("   ๐Ÿ“ Total entries: {}", stats.total_entries);
    println!("   ๐Ÿ’พ Total size: {} bytes ({:.2} MB)",
        stats.total_size_bytes,
        stats.total_size_bytes as f64 / 1_048_576.0
    );
    println!("   ๐Ÿ—‚๏ธ  Collections: {}", stats.collection_count);
    for collection in &stats.collections {
        let count = rag.content_store.count_collection(collection).await?;
        let size = rag.content_store.get_collection_size(collection).await?;
        println!("      - {}: {} entries, {} bytes ({:.2} KB)",
            collection, count, size, size as f64 / 1024.0);
    }

    // Test 4: Search and Verify Metadata Combination
    println!("\n๐Ÿ” Test 4: Search with Metadata Combination");
    println!("{}", "-".repeat(60));

    let search_options = rag_module::SearchOptions {
        limit: Some(5),
        score_threshold: Some(0.1),
        ..Default::default()
    };

    let search_start = Instant::now();
    let search_results = rag.search(collection_name, "KMS encryption keys production", user_id, search_options).await?;
    let search_duration = search_start.elapsed();

    println!("โœ… Search complete in {:?}", search_duration);
    println!("   ๐Ÿ“Š Found {} results", search_results.len());

    // Verify each result has combined data
    for (idx, result) in search_results.iter().enumerate() {
        if let Some(ref payload) = result.payload {
            let has_doc_ref = payload.contains_key("doc_ref");
            let has_encrypted_metadata = payload.contains_key("_encrypted_metadata");
            let has_small_fields = payload.contains_key("type") || payload.contains_key("region");

            println!("\n   Result #{} (score: {:.4}):", idx + 1, result.score);
            println!("      โœ“ doc_ref: {}", if has_doc_ref { "โœ…" } else { "โŒ" });
            println!("      โœ“ _encrypted_metadata: {}", if has_encrypted_metadata { "โœ…" } else { "โŒ" });
            println!("      โœ“ small fields (type/region): {}", if has_small_fields { "โœ…" } else { "โŒ" });

            if has_doc_ref {
                let doc_ref = payload.get("doc_ref")
                    .and_then(|v| v.as_str())
                    .unwrap_or("unknown");
                println!("      ๐Ÿ“‹ doc_ref: {}", doc_ref);
            }

            if let Some(type_val) = payload.get("type") {
                println!("      ๐Ÿท๏ธ  type: {}", type_val);
            }

            if let Some(region_val) = payload.get("region") {
                println!("      ๐ŸŒ region: {}", region_val);
            }

            if has_encrypted_metadata {
                let metadata_str = payload.get("_encrypted_metadata")
                    .and_then(|v| v.as_str())
                    .unwrap_or("");
                println!("      ๐Ÿ’พ _encrypted_metadata size: {} bytes", metadata_str.len());

                // Try to parse the encrypted metadata to verify it's valid JSON
                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(metadata_str) {
                    if let Some(obj) = parsed.as_object() {
                        println!("      โœ… Valid JSON with {} fields", obj.len());
                        // Show some sample fields
                        if let Some(key_id) = obj.get("KeyId") {
                            println!("         - KeyId: {}", key_id);
                        }
                        if let Some(arn) = obj.get("Arn") {
                            println!("         - Arn: {}", arn);
                        }
                    }
                } else {
                    println!("      โš ๏ธ  Could not parse _encrypted_metadata as JSON");
                }
            } else {
                println!("      โŒ MISSING _encrypted_metadata - This is a problem!");
            }
        }
    }

    // Test 5: Data Integrity Check
    println!("\n๐Ÿ”’ Test 5: Data Integrity Verification");
    println!("{}", "-".repeat(60));

    let total_expected = 1 + 50; // 1 from test 1 + 50 from test 2
    let sqlite_count = rag.content_store.count_collection(collection_name).await?;

    println!("โœ… Data Integrity Check:");
    println!("   ๐Ÿ“Š Expected documents: {}", total_expected);
    println!("   ๐Ÿ’พ SQLite entries: {}", sqlite_count);

    if sqlite_count >= total_expected as i64 {
        println!("   โœ… All documents accounted for in SQLite!");
    } else {
        println!("   โš ๏ธ  Missing {} documents in SQLite", total_expected as i64 - sqlite_count);
    }

    // Test 6: Direct ContentStore Verification
    println!("\n๐Ÿ” Test 6: Direct ContentStore Query");
    println!("{}", "-".repeat(60));

    if let Some(first_result) = search_results.first() {
        if let Some(payload) = &first_result.payload {
            if let Some(doc_ref) = payload.get("doc_ref").and_then(|v| v.as_str()) {
                println!("Testing direct ContentStore query for: {}", doc_ref);

                match rag.content_store.get_metadata(doc_ref).await? {
                    Some(metadata) => {
                        println!("โœ… Successfully retrieved from ContentStore!");
                        println!("   ๐Ÿ’พ Size: {} bytes ({:.2} KB)", metadata.len(), metadata.len() as f64 / 1024.0);

                        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&metadata) {
                            if let Some(obj) = parsed.as_object() {
                                println!("   ๐Ÿ“‹ Fields in metadata: {}", obj.len());
                                println!("   ๐Ÿ”‘ Top-level keys: {:?}",
                                    obj.keys().take(5).collect::<Vec<_>>());
                            }
                        }
                    }
                    None => {
                        println!("โŒ Not found in ContentStore - This is a problem!");
                    }
                }
            }
        }
    }

    // Final Summary
    println!();
    println!("{}", "=".repeat(60));
    println!("๐ŸŽ‰ Test Suite Complete!");
    println!("{}", "=".repeat(60));
    println!("โœ… SQLite ContentStore is working correctly:");
    println!("   โœ“ Single document ingestion");
    println!("   โœ“ Batch document ingestion (50 docs)");
    println!("   โœ“ Metadata storage in SQLite");
    println!("   โœ“ Search with metadata combination");
    println!("   โœ“ Data integrity verified");
    println!("   โœ“ Direct ContentStore queries");
    println!();
    println!("๐Ÿ“ Test data location: {}", test_dir);
    println!("   ๐Ÿ’พ SQLite database: {}/content_store.db", test_dir);
    println!("   ๐Ÿ—‚๏ธ  Qdrant data: {}/qdrant-data/", test_dir);

    Ok(())
}