rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
//! Clean Search Test - Test user queries for clean results

use anyhow::Result;
use rag_module::*;
use rag_module::services::search_service::EstateSearchOptions;

#[tokio::main]
async fn main() -> Result<()> {
    println!("🔍 CLEAN SEARCH TEST");
    println!("===================\n");
    
    let rag = create_rag_module("./test_data").await?;
    rag.initialize().await?;
    
    let user_id = "test_user_123";
    
    // Test 1: List my EC2 instances
    println!("Query: 'list the s3 '");
    println!("Expected: Clean list of EC2 instances with key details\n");
    
    let ec2_results = rag.search_service.search_estate_resources(
        "list the s3 buckets with creation date",
        EstateSearchOptions {
            resource_types: None,
            account_ids: None,
            regions: None,
            services: None,
            states: None,
            environment: None,
            application: None,
            synced_after: None,
            limit: None, // Let natural filtering determine count
            score_threshold: None, // NO threshold - find all documents
            include_metadata: true,
            use_anonymous_ids: false,
        },
        None,
        user_id,
    ).await?;
    
    println!("📋 EC2 INSTANCES ({} found):", ec2_results.len());
    for (i, result) in ec2_results.iter().take(3).enumerate() { // Show only first 3 for clarity
        // Show available fields for debugging
        if i == 0 {
            if let Some(obj) = result.as_object() {
                println!("   DEBUG - Available keys: {:?}", obj.keys().collect::<Vec<_>>());
            }
        }
        
        // Extract basic fields that should be available  
        let id = result.get("id").and_then(|v| v.as_str()).unwrap_or("unknown");
        let service = result.get("service").and_then(|v| v.as_str()).unwrap_or("unknown");
        let region = result.get("region").and_then(|v| v.as_str()).unwrap_or("unknown");
        let score = result.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0);
        
        println!("{}. {} - {} in {} (score: {:.3})", 
            i + 1, 
            id,
            service,
            region,
            score
        );
    }
    
    println!("\n{}\n", "=".repeat(50));
    
    // Test 2: List my IAM users
    println!("Query: 'list my iam users'");
    println!("Expected: Clean list of IAM users\n");
    
    let iam_results = rag.search_service.search_estate_resources(
        "list my iam users",
        EstateSearchOptions {
            resource_types: None,
            account_ids: None,
            regions: None,
            services: Some(vec!["iam".to_string()]),
            states: None,
            environment: None,
            application: None,
            synced_after: None,
            limit: None, // Let natural filtering determine count
            score_threshold: Some(0.3), // Lower threshold for IAM - different content structure
            include_metadata: true,
            use_anonymous_ids: false,
        },
        None,
        user_id,
    ).await?;
    
    println!("👤 IAM USERS ({} found):", iam_results.len());
    for (i, result) in iam_results.iter().take(3).enumerate() { // Show only first 3 for clarity
        let id = result.get("id").and_then(|v| v.as_str()).unwrap_or("unknown");
        let service = result.get("service").and_then(|v| v.as_str()).unwrap_or("unknown");
        let score = result.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0);
        
        println!("{}. {} - {} (score: {:.3})", 
            i + 1, 
            id,
            service,
            score
        );
    }
    
    println!("\n{}\n", "=".repeat(50));
    
    // Test 3: First check RDS without service filter
    println!("Query: 'show my rds databases' (no service filter)");
    
    let rds_unfiltered = rag.search_service.search_estate_resources(
        "show my rds databases",
        EstateSearchOptions {
            resource_types: None,
            account_ids: None,
            regions: None,
            services: None, // NO service filter to see all RDS-related results
            states: None,
            environment: None,
            application: None,
            synced_after: None,
            limit: Some(10), // Get more results to investigate
            score_threshold: None, // NO threshold - find all documents
            include_metadata: true,
            use_anonymous_ids: false,
        },
        None,
        user_id,
    ).await?;
    
    println!("🗄️  RDS-like results WITHOUT service filter ({} found):", rds_unfiltered.len());
    for (i, result) in rds_unfiltered.iter().take(10).enumerate() {
        let id = result.get("id").and_then(|v| v.as_str()).unwrap_or("unknown");
        let service = result.get("service").and_then(|v| v.as_str()).unwrap_or("unknown");
        let region = result.get("region").and_then(|v| v.as_str()).unwrap_or("unknown");
        let score = result.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0);
        
        // Debug unknown service documents
        if service == "unknown" {
            println!("{}. 🔍 UNKNOWN SERVICE: ID=[{}] - {} in {} (score: {:.3})", 
                i + 1, 
                id,
                service,
                region,
                score
            );
        } else {
            println!("{}. {} - {} in {} (score: {:.3})", 
                i + 1, 
                id,
                service,
                region,
                score
            );
        }
    }
    
    println!("\n{}\n", "=".repeat(50));
    
    // Test 3b: List my RDS databases WITH service filter
    println!("Query: 'show my rds databases' (with service filter)");
    println!("Expected: Clean list of RDS databases\n");
    
    let rds_results = rag.search_service.search_estate_resources(
        "show my rds databases",
        EstateSearchOptions {
            resource_types: None,
            account_ids: None,
            regions: None,
            services: Some(vec!["rds".to_string()]),
            states: None,
            environment: None,
            application: None,
            synced_after: None,
            limit: None, // Let natural filtering determine count
            score_threshold: None, // NO threshold - find all documents
            include_metadata: true,
            use_anonymous_ids: false,
        },
        None,
        user_id,
    ).await?;
    
    println!("🗄️  RDS DATABASES ({} found):", rds_results.len());
    for (i, result) in rds_results.iter().take(3).enumerate() { // Show only first 3 for clarity
        let id = result.get("id").and_then(|v| v.as_str()).unwrap_or("unknown");
        let service = result.get("service").and_then(|v| v.as_str()).unwrap_or("unknown");
        let region = result.get("region").and_then(|v| v.as_str()).unwrap_or("unknown");
        let score = result.get("score").and_then(|v| v.as_f64()).unwrap_or(0.0);
        
        println!("{}. {} - {} in {} (score: {:.3})", 
            i + 1, 
            id,
            service,
            region,
            score
        );
    }
    
    Ok(())
}