rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
//! Test Local Estate Search with Dual-Mode Storage
//!
//! This example demonstrates searching estate resources using the search_estate_resources() API
//! with dual-mode storage (local embedded + server sync):
//! - Searches execute from local embedded storage (fast)
//! - Writes sync to both local and remote server (backup)
//! - Collections auto-discovered from filesystem
//!
//! Usage:
//! ```bash
//! cargo run --example test_local_search
//! ```

use anyhow::Result;
use rag_module::RagModule;

#[tokio::main]
async fn main() -> Result<()> {
    // Initialize logging
    tracing_subscriber::fmt()
        .with_max_level(tracing::Level::INFO)
        .init();

    println!("🔍 Testing Local Estate Search\n");
    println!("{}", "=".repeat(60));

    // Use the actual .escher path
    let home = std::env::var("HOME").expect("HOME environment variable not set");
    let base_path = format!("{}/.escher", home);
    let user_id = "a4d8c418-90e1-702a-64e8-604fc7b72f72";

    println!("\n📍 Path: {}", base_path);
    println!("👤 User ID: {}", user_id);

    // Step 1: Initialize RAG Module with Dual Store
    println!("\nđŸ“Ļ Step 1: Initializing RAG Module...");
    let rag = RagModule::new(&base_path).await?;
    rag.initialize().await?;
    rag.set_user_context(user_id).await?;

    // Verify we're using dual mode
    println!("   ✅ RAG Module initialized");
    println!("   🔍 Storage Mode: Dual (Embedded + Server Sync)");
    println!("   📍 Local: Fast searches from embedded storage");
    println!("   📡 Server: Automatic backup on writes");

    // Step 2: Check what collections exist
    println!("\n📋 Step 2: Listing collections...");
    let collections = rag.vector_store.list_collections().await?;
    println!("   Found {} collections:", collections.len());
    for collection in &collections {
        println!("      - {}", collection);
    }

    // Step 2.5: Create core_estate collection if it doesn't exist (will load existing files)
    let collection_name = "core_estate";
    println!("\nđŸ“Ļ Step 2.5: Ensuring '{}' collection exists...", collection_name);
    if !collections.contains(&collection_name.to_string()) {
        println!("   Creating collection '{}'...", collection_name);
        rag.vector_store.create_collection(collection_name, 1024).await?;
        println!("   ✅ Collection created/loaded");
    } else {
        println!("   ✅ Collection already exists");
    }

    // Step 3: Check collection info
    println!("\n📊 Step 3: Checking '{}' collection info...", collection_name);
    if let Some(info) = rag.vector_store.get_collection_info(collection_name).await? {
        println!("   ✅ Collection exists:");
        println!("      - Name: {}", info.name);
        println!("      - Vector size: {}", info.vector_size);
        println!("      - Points count: {}", info.points_count);
    } else {
        println!("   ❌ Collection '{}' not found!", collection_name);
        return Ok(());
    }

    // Step 4: List some documents
    println!("\n📄 Step 4: Listing documents from '{}'...", collection_name);
    let docs = rag.vector_store.list_documents(collection_name, Some(5), None).await?;
    println!("   Found {} documents (showing first 5):", docs.len());
    for (i, doc) in docs.iter().enumerate() {
        println!("      {}. ID: {}", i + 1, doc.id);
        if let Some(metadata) = &doc.metadata.get("service") {
            println!("         Service: {}", metadata);
        }
    }

    // Step 5: Perform estate resource search using search_estate_resources
    println!("\n🔍 Step 5: Searching estate resources...");
    let query = "list my ec2 instances";
    println!("   Query: '{}'", query);
    println!("   Collection: '{}'", collection_name);

    // Create estate search options
    let estate_options = rag_module::services::search_service::EstateSearchOptions {
        resource_types: None, // Can filter by resource types like ["EC2Instance", "RDSDatabase"]
        account_ids: None,
        regions: None,
        services: None, // Can filter by services like ["ec2", "rds"]
        states: None,
        environment: None,
        application: None,
        synced_after: None,
        limit: Some(5),
        score_threshold: Some(0.5),
        include_metadata: true,
        use_anonymous_ids: false,
    };

    // Use the search_estate_resources method (this is what users actually call)
    let results = rag.search_service.search_estate_resources(
        collection_name,
        query,
        estate_options,
        None, // No IAM context for this example
        user_id,
    ).await?;

    println!("\n📊 Search Results:");
    println!("{}", "=".repeat(60));

    if results.is_empty() {
        println!("❌ No results found!");
    } else {
        println!("✅ Found {} results:\n", results.len());
        for (i, result) in results.iter().enumerate() {
            println!("{}. Result:", i + 1);

            // Extract and display key fields from the JSON result
            if let Some(obj) = result.as_object() {
                if let Some(id) = obj.get("id").and_then(|v| v.as_str()) {
                    println!("   ID: {}", id);
                }
                if let Some(score) = obj.get("score").and_then(|v| v.as_f64()) {
                    println!("   Score: {:.4}", score);
                }
                if let Some(service) = obj.get("service").and_then(|v| v.as_str()) {
                    println!("   Service: {}", service);
                }
                if let Some(resource_type) = obj.get("resourceType").and_then(|v| v.as_str()) {
                    println!("   Resource Type: {}", resource_type);
                }
                if let Some(region) = obj.get("region").and_then(|v| v.as_str()) {
                    println!("   Region: {}", region);
                }
                if let Some(account_id) = obj.get("accountId").and_then(|v| v.as_str()) {
                    println!("   Account: {}", account_id);
                }
            }
            println!();
        }
    }

    println!("{}", "=".repeat(60));

    // Step 6: Verify Dual Mode Operation
    println!("\n🔄 Step 6: Verifying Dual Mode Architecture...");

    // Check if we're using DualVectorStore
    if let Some(_dual_store) = rag.vector_store.as_any().downcast_ref::<rag_module::db::DualVectorStore>() {
        println!("   ✅ Confirmed: Using DualVectorStore");
        println!("   📊 Search executed through search_estate_resources():");
        println!("      → Query preprocessing & spell correction");
        println!("      → BGE-M3 embedding generation");
        println!("      → Vector search in LOCAL embedded storage (fast)");
        println!("      → Privacy filtering & IAM checks");
        println!("      → Results formatted as JSON");
        println!("\n   💾 Write operations (if any):");
        println!("      → Local embedded storage (immediate)");
        println!("      → Remote server sync (automatic backup)");
    } else if let Some(_embedded_store) = rag.vector_store.as_any().downcast_ref::<rag_module::db::EmbeddedQdrantVectorStore>() {
        println!("   â„šī¸ Using EmbeddedQdrantVectorStore only");
    } else if let Some(_server_store) = rag.vector_store.as_any().downcast_ref::<rag_module::db::QdrantServerVectorStore>() {
        println!("   â„šī¸ Using QdrantServerVectorStore only");
    } else {
        println!("   âš ī¸ Unknown vector store type");
    }

    println!("\n{}", "=".repeat(60));
    println!("✅ Test complete!");
    println!("\n💡 Summary:");
    println!("   - Method: search_estate_resources() (user-facing API)");
    println!("   - Storage: Dual mode (embedded + server sync)");
    println!("   - Searches: Local embedded storage (milliseconds)");
    println!("   - Writes: Dual (local + server backup)");
    println!("   - Collections: Auto-discovered from filesystem");
    println!("   - Results: {} matching resources", results.len());

    Ok(())
}