rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
use anyhow::Result;
use rag_module::RagModule;
use rag_module::services::search_service::ChatSearchOptions;

#[tokio::main]
async fn main() -> Result<()> {
    println!("šŸš€ Enhanced Complete Chat Example: Embedded vs Server vs AWS Cloud Mode");
    println!("This example works with ALL THREE Qdrant deployment modes!\n");

    // Detect which mode we're running in
    let qdrant_url = std::env::var("QDRANT_URL").ok();
    let mode = match &qdrant_url {
        Some(url) if url.contains("amazonaws.com") => "ā˜ļø AWS Cloud Mode",
        Some(_) => "🌐 Local Server Mode",
        None => "šŸ’¾ Embedded Mode",
    };

    println!("Running in: {}", mode);
    if let Some(url) = &qdrant_url {
        println!("Qdrant URL: {}", url);
    }
    println!("─────────────────────────────────────────────────────────");

    match mode {
        "šŸ’¾ Embedded Mode" => {
            println!("šŸ“ To switch to local server mode:");
            println!("   1. Run: docker-compose up -d");
            println!("   2. Set: export QDRANT_URL=\"http://localhost:6334\"");
            println!("   3. Run this example again");
            println!();
            println!("šŸ“ To switch to AWS cloud mode:");
            println!("   1. Deploy cluster: cd deployment/cloudformation && aws cloudformation create-stack...");
            println!("   2. Set: export QDRANT_URL=\"http://dev-qdrant-nlb-e8c337edc3ee861b.elb.ap-south-1.amazonaws.com:6334\"");
            println!("   3. Run this example again");
        }
        "🌐 Local Server Mode" => {
            println!("āœ… Using local Qdrant server");
            println!("   Dashboard: http://localhost:6333/dashboard");
            println!("   To switch to embedded: unset QDRANT_URL");
            println!("   To switch to AWS: export QDRANT_URL=\"http://your-nlb-dns:6334\"");
        }
        "ā˜ļø AWS Cloud Mode" => {
            println!("āœ… Using AWS ECS Qdrant cluster");
            println!("   Cluster: Fargate with replication factor 3");
            println!("   Dashboard: http://dev-qdrant-nlb-e8c337edc3ee861b.elb.ap-south-1.amazonaws.com:6333/dashboard");
            println!("   To sxfwitch to embedded: unset QDRANT_URL");
            println!("   To switch to local: export QDRANT_URL=\"http://localhost:6334\"");
        }
        _ => {}
    }
    println!("─────────────────────────────────────────────────────────\n");

    // Initialize RAG module with enhanced services
    // The same code works for both modes!
    let mut rag_module = RagModule::new("./example-enhanced-chat-data").await?;
    rag_module.initialize().await?;

    // Set user context for document operations

    println!("āœ… RAG Module initialized:");
    println!("   - Chat Collection: 1D dummy vectors for scroll-only search");
    println!("   - Estate Collection: 1024D BGE-M3 vectors for semantic search");
    println!("   - Encryption Service: OS keychain integration");
    println!("   - Search Service: Collection-specific algorithms");
    println!("   - Folder Structure: 100% same in both modes!");
    println!();

    // Setup user and context
    let user_id = "cudewvsbxakj1n";
    rag_module.set_user_context(user_id).await?;

    let context_id = "test6"; // User-provided context_id for organizing chat conversations
    let chat_title = Some("Complete Chat Demo - REST vs GraphQL");

    println!("šŸ†• Starting new conversation...");
    println!("āœ… Context ID: {}", context_id);
    println!("šŸ“ User ID: {}\n", user_id);

    // === TURN 1: Initial Question ===
    println!("šŸ’¬ TURN 1: Initial Question");
    let prompt1 = "What is the difference between REST and GraphQL APIs?";
    println!("User: {}", prompt1);

    let prompt_id1 = rag_module.add_prompt(context_id, prompt1, user_id, chat_title).await?;
    println!("āœ… Prompt stored (ID: {})", prompt_id1);

    let response1 = "Great question! Here are the key differences between REST and GraphQL:\n\n**REST (Representational State Transfer):**\n- Uses multiple endpoints (e.g., /users, /posts, /comments)\n- Fixed data structure per endpoint\n- Uses HTTP methods (GET, POST, PUT, DELETE)\n- Can lead to over-fetching or under-fetching data\n- Simpler caching\n\n**GraphQL:**\n- Single endpoint (/graphql)\n- Flexible queries - request exactly what you need\n- Strong type system with schema\n- Solves over-fetching problem\n- More complex caching\n- Real-time subscriptions built-in\n\n**When to use**:\n- REST: Simple APIs, caching important, team familiar with REST\n- GraphQL: Complex data relationships, mobile apps, need flexible queries";

    let response_id1 = rag_module.add_response(context_id, response1, user_id, chat_title).await?;
    println!("Assistant: {}...", &response1[..120]);
    println!("āœ… Response stored (ID: {})\n", response_id1);

    // Check encrypted storage after first turn
    println!("šŸ” Checking encrypted storage:");
    let encrypted_docs = rag_module.get_encrypted_chat_documents(user_id).await?;
    println!("   Documents stored: {}", encrypted_docs.len());

    // === TURN 2: Follow-up with Context Retrieval ===
    println!("\nšŸ’¬ TURN 2: Follow-up Question (Needs Context)");

    // Retrieve context before processing new prompt
    println!("šŸ”„ Retrieving conversation context...");
    let context = rag_module.get_query_response_pairs(context_id, None).await?;
    println!("šŸ“š Retrieved {} conversation pairs", context.total_pairs);

    let prompt2 = "Can you give me a practical example of the over-fetching problem you mentioned?";
    println!("User: {}", prompt2);

    // Show context that would be passed to AI
    println!("\nšŸ¤– Context passed to AI:");
    println!("─────────── PREVIOUS CONVERSATION ───────────");
    for (i, pair) in context.pairs.iter().enumerate() {
        println!("Turn {}: Human: {}", i + 1, pair.query.content);
        println!("Turn {}: Assistant: {}...", i + 1, &pair.response.content[..100]);
        println!();
    }
    println!("NEW PROMPT: {}", prompt2);
    println!("─────────── END CONTEXT ─────────");

    let prompt_id2 = rag_module.add_prompt(context_id, prompt2, user_id, chat_title).await?;
    println!("āœ… Prompt stored with context (ID: {})", prompt_id2);

    let response2 = "Perfect example of REST over-fetching!\n\n**Scenario**: Mobile app showing user profile\n\n**REST API Response** from /users/123:\n```json\n{\n  \"id\": 123,\n  \"name\": \"John Doe\",\n  \"email\": \"john@example.com\",\n  \"bio\": \"Long biography...\",\n  \"posts\": [...50 recent posts...],\n  \"followers\": [...200 followers...],\n  \"following\": [...150 following...],\n  \"preferences\": {...},\n  \"lastLogin\": \"2024-01-15\"\n}\n```\n\n**Problem**: Mobile app only needs name and email, but gets 50 posts + 200 followers + 150 following = huge payload!\n\n**GraphQL Solution**:\n```graphql\nquery {\n  user(id: 123) {\n    name\n    email\n  }\n}\n```\n**Result**: Only name and email returned - much faster on mobile!\n\nThis directly addresses the over-fetching issue I mentioned in my previous explanation about REST vs GraphQL.";

    let response_id2 = rag_module.add_response(context_id, response2, user_id, chat_title).await?;
    println!("Assistant: {}...", &response2[..120]);
    println!("āœ… Response stored (ID: {})", response_id2);

    // === TURN 3: Another Context-Dependent Question ===
    println!("\nšŸ’¬ TURN 3: More Context-Dependent Question");

    // Get updated context
    let updated_context = rag_module.get_query_response_pairs(context_id, None).await?;
    println!("šŸ”„ Updated context: {} conversation pairs", updated_context.total_pairs);

    let prompt3 = "What about the GraphQL caching complexity you mentioned? How is it different from REST caching?";
    println!("User: {}", prompt3);

    // Show query-response pairs format (useful for AI context)
    println!("\nšŸŽÆ Context in Q&A format (for AI):");
    let qa_pairs = rag_module.get_query_response_pairs(context_id, None).await?;
    for (i, pair) in qa_pairs.pairs.iter().enumerate() {
        println!("Q{}: {}", i + 1, pair.query.content);
        println!("A{}: {}...\n", i + 1, &pair.response.content[..80]);
    }

    let prompt_id3 = rag_module.add_prompt(context_id, prompt3, user_id, chat_title).await?;
    println!("āœ… Prompt stored (ID: {})", prompt_id3);

    let response3 = "Excellent follow-up! The caching complexity difference is significant:\n\n**REST Caching (Simpler)**:\n- Each endpoint is cached independently\n- `/users/123` cached separately from `/posts/456`\n- HTTP caching headers work perfectly\n- Easy to invalidate specific resources\n\n**GraphQL Caching (Complex)**:\n- Single endpoint `/graphql` with different queries\n- Same query with different fields = different cache entry\n- Can't use simple HTTP caching\n- Need normalized caching (like Apollo Cache)\n\n**Example Problem**:\n```graphql\n# Query 1\nquery { user(id: 123) { name, email } }\n\n# Query 2  \nquery { user(id: 123) { name, bio } }\n```\nBoth fetch user 123, but traditional caches see them as different!\n\n**Solution**: Normalized caching breaks responses into entities:\n- User:123 -> { name: \"John\", email: \"...\", bio: \"...\" }\n- Smart cache serves both queries from same stored user\n\nThis relates back to what I said earlier about GraphQL being more complex but more flexible than the REST approach.";

    let response_id3 = rag_module.add_response(context_id, response3, user_id, chat_title).await?;
    println!("Assistant: {}...", &response3[..120]);
    println!("āœ… Response stored (ID: {})", response_id3);

    // === ENHANCED SEARCH SERVICE TESTING ===
    println!("\nšŸ” Testing Enhanced Chat Search Service:");
    
    // Test chat history search using enhanced search service
    let chat_search_options = ChatSearchOptions {
        context_id: Some(context_id.to_string()),
        role: None,
        from_timestamp: None,
        to_timestamp: None,
        from_message_index: None,
        to_message_index: None,
        limit: Some(10),
        include_metadata: true,
        user_id: Some(user_id.to_string()),
    };
    
    let chat_search_results = rag_module.search_service
        .search_chat_history(chat_search_options)
        .await?;

    println!("āœ… Chat search returned formatted history:");
    println!("\n{}", serde_json::to_string_pretty(&chat_search_results).unwrap_or_else(|_| "Error formatting".to_string()));

    // === ENCRYPTION SERVICE TESTING ===
    println!("\nšŸ” Testing Enhanced Encryption Service:");
    
    // Test different encryption types
    let test_content = "This is a test message for encryption";
    let encrypted_content = rag_module.encryption_service.encrypt_content(test_content).await?;
    let decrypted_content = rag_module.encryption_service.decrypt_content(&encrypted_content).await?;
    
    println!("āœ… Content encryption test:");
    println!("   Original: {}", test_content);
    println!("   Encrypted length: {} chars", encrypted_content.len());
    println!("   Decrypted: {}", decrypted_content);
    println!("   Match: {}", test_content == decrypted_content);
    
    // Test embedding encryption
    let test_embedding = vec![0.1, 0.2, 0.3, 0.4, 0.5];
    let encrypted_embedding = rag_module.encryption_service.encrypt_embedding(&test_embedding).await?;
    let decrypted_embedding = rag_module.encryption_service.decrypt_embedding(&encrypted_embedding).await?;
    
    println!("āœ… Embedding encryption test:");
    println!("   Original embedding: {:?}", test_embedding);
    println!("   Decrypted embedding: {:?}", decrypted_embedding);
    println!("   Match: {}", test_embedding == decrypted_embedding);

    // === DEMONSTRATE BOTH FUNCTIONALITIES ===
    println!("\nšŸ“Š FINAL DEMONSTRATION: Enhanced Business Architecture Features");

    // 1. Encrypted Storage
    println!("\nšŸ” 1. Encrypted Document Storage:");
    let final_encrypted_docs = rag_module.get_encrypted_chat_documents(user_id).await?;

    println!("   Total encrypted documents: {}", final_encrypted_docs.len());

    let encrypted_file_path = rag_module.get_user_encrypted_documents_path(user_id);
    println!("   Storage location: {}", encrypted_file_path.display());

    if encrypted_file_path.exists() {
        let file_size = tokio::fs::metadata(&encrypted_file_path).await?.len();
        println!("   File size: {} bytes", file_size);

        // Show first document structure
        if let Some(doc) = final_encrypted_docs.first() {
            println!("   Sample document:");
            println!("     - ID: {}", doc.id);
            println!("     - Vector ID: {}", doc.vector_id);
            println!("     - Encrypted content: {} chars", doc.content.len());
            println!("     - Encrypted metadata: {} chars", doc.metadata.encrypted_metadata.len());
            println!("     - Created: {}", doc.metadata.created_at);
        }
    }

    // 2. Context Retrieval
    println!("\nšŸ”„ 2. Context Retrieval Methods:");
    let final_history = rag_module.get_query_response_pairs(context_id, None).await?;
    println!("   Context history pairs: {}", final_history.total_pairs);
    println!("   Context ID: {}", final_history.context_id);
    println!("   Chat title: {}", final_history.chat_title);

    // Show how context would be formatted for AI
    println!("\nšŸ¤– 3. AI Context Formatting Example:");
    println!("   If sending to AI, context would be:");
    println!("   ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”");
    println!("   │ CONVERSATION HISTORY                │");
    println!("   ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜");

    for (i, pair) in final_history.pairs.iter().enumerate() {
        println!("   Turn {}:", i + 1);
        println!("   Human: {}", pair.query.content);
        println!("   Assistant: {}...", &pair.response.content[..100]);
        println!();
    }

    println!("   [NEW USER PROMPT WOULD GO HERE]");

    // 4. Storage Files Summary
    println!("šŸ“ 4. Storage Files Created:");
    println!("   Encrypted documents: {}", encrypted_file_path.display());

    let messages_file = std::path::Path::new("./example-complete-chat-data/messages/messages.jsonl");

    if messages_file.exists() {
        println!("   Messages file: {}", messages_file.display());
    }

    // === FINAL SUMMARY ===
    println!("\n");
    println!("═══════════════════════════════════════════════════════════");
    println!("                    MODE SUMMARY                           ");
    println!("═══════════════════════════════════════════════════════════");
    println!("Current Mode: {}", mode);
    println!();

    match mode {
        "šŸ’¾ Embedded Mode" => {
            println!("āœ… Embedded Mode Benefits:");
            println!("   • No server required");
            println!("   • Simple setup");
            println!("   • Fast for small datasets");
            println!("   • Perfect for development");
            println!();
            println!("šŸ“¦ Data stored in: ./example-enhanced-chat-data/qdrant-data/");
            println!();
            println!("šŸš€ Want to try Local Server Mode?");
            println!("   docker-compose up -d");
            println!("   export QDRANT_URL=\"http://localhost:6334\"");
            println!("   cargo run --example complete_chat_example");
            println!();
            println!("ā˜ļø Want to try AWS Cloud Mode?");
            println!("   export QDRANT_URL=\"http://dev-qdrant-nlb-e8c337edc3ee861b.elb.ap-south-1.amazonaws.com:6334\"");
            println!("   cargo run --example complete_chat_example");
        }
        "🌐 Local Server Mode" => {
            println!("āœ… Local Server Mode Benefits:");
            println!("   • Production-ready");
            println!("   • Scalable to millions of docs");
            println!("   • Web dashboard available");
            println!("   • Multi-app access");
            println!();
            println!("šŸ“¦ Data in Qdrant server + local backup");
            println!("🌐 Dashboard: http://localhost:6333/dashboard");
            println!();
            println!("šŸ’¾ Want to try Embedded Mode?");
            println!("   unset QDRANT_URL");
            println!("   cargo run --example complete_chat_example");
            println!();
            println!("ā˜ļø Want to try AWS Cloud Mode?");
            println!("   export QDRANT_URL=\"http://dev-qdrant-nlb-e8c337edc3ee861b.elb.ap-south-1.amazonaws.com:6334\"");
            println!("   cargo run --example complete_chat_example");
        }
        "ā˜ļø AWS Cloud Mode" => {
            println!("āœ… AWS Cloud Mode Benefits:");
            println!("   • Fully managed on AWS ECS Fargate");
            println!("   • High availability with replication factor 3");
            println!("   • Auto-scaling and load balancing");
            println!("   • Production-grade clustering");
            println!("   • Isolated VPC with security groups");
            println!();
            println!("šŸ“¦ Data replicated across 3 Qdrant nodes");
            println!("🌐 Dashboard: http://dev-qdrant-nlb-e8c337edc3ee861b.elb.ap-south-1.amazonaws.com:6333/dashboard");
            println!();
            println!("šŸ’¾ Want to try Embedded Mode?");
            println!("   unset QDRANT_URL");
            println!("   cargo run --example complete_chat_example");
            println!();
            println!("🌐 Want to try Local Server Mode?");
            println!("   docker-compose up -d");
            println!("   export QDRANT_URL=\"http://localhost:6334\"");
            println!("   cargo run --example complete_chat_example");
        }
        _ => {}
    }

    println!();
    println!("šŸ“š Key Insight: Same code, same folder structure, 3 deployment options!");
    println!("   šŸ’¾ Embedded: Vectors in ./qdrant-data/");
    println!("   🌐 Local Server: Vectors in Docker container");
    println!("   ā˜ļø AWS Cloud: Vectors in ECS Fargate cluster with replication");
    println!("   The ONLY difference is where vectors are stored.");
    println!("═══════════════════════════════════════════════════════════");

    Ok(())

}