rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
use rag_module::RagModule;
use serde_json::json;
use std::path::PathBuf;
use anyhow::Result;

#[tokio::main]
async fn main() -> Result<()> {
    println!("๐Ÿงช Testing Canonical Resource ID Deduplication");
    println!("==============================================");
    println!("This test demonstrates the corner case fix where same resource");
    println!("with different states (running/stopped) are properly deduplicated\n");

    // Clean start
    let base_path = PathBuf::from("./canonical_dedup_test");
    if base_path.exists() {
        std::fs::remove_dir_all(&base_path).ok();
    }

    let rag = RagModule::new(base_path.clone()).await?;
    let user_id = "canonical_user";
    let collection_name = "canonical_test_estate";
    rag.set_user_context(user_id).await?;

    println!("โœ… RAG module initialized\n");

    // Test Case 1: AWS RDS with different status but same resource
    println!("๐Ÿ“‹ Test Case 1: AWS RDS - Status Change (running โ†’ stopped)");
    println!("{}", "-".repeat(70));

    let rds_running = json!({
        "type": "rds",
        "accountId": "123456789012", 
        "region": "us-east-1",
        "dbInstanceIdentifier": "prod-database", 
        "content": "RDS instance prod-database, id prod-database, region us-east-1, engine postgres, class db.r5.large, status running",
        "dbInstanceClass": "db.r5.large",
        "engine": "postgres",
        "dbInstanceStatus": "running",  // DYNAMIC STATUS FIELD
        "allocatedStorage": 100,
        "lastModified": "2024-12-24T10:00:00Z"
    });

    let rds_stopped = json!({
        "type": "rds", 
        "accountId": "123456789012", 
        "region": "us-east-1",
        "dbInstanceIdentifier": "prod-database", 
        "content": "RDS instance prod-database, id prod-database, region us-east-1, engine postgres, class db.r5.large, status stopped", // DIFFERENT CONTENT
        "dbInstanceClass": "db.r5.large",
        "engine": "postgres", 
        "dbInstanceStatus": "stopped", // DIFFERENT STATUS
        "allocatedStorage": 100,
        "lastModified": "2024-12-24T12:00:00Z" // DIFFERENT TIMESTAMP
    });

    println!("๐Ÿ”„ Ingesting RDS instance with status: RUNNING");
    rag.ingest_aws_estate(rds_running.clone(), user_id, collection_name).await?;
    let count_after_running = rag.get_document_count(Some(collection_name), None).await?;
    println!("   Document count: {}", count_after_running);

    println!("๐Ÿ”„ Ingesting SAME RDS instance with status: STOPPED");
    rag.ingest_aws_estate(rds_stopped.clone(), user_id, collection_name).await?;
    let count_after_stopped = rag.get_document_count(Some(collection_name), None).await?;
    println!("   Document count: {}", count_after_stopped);

    if count_after_running == count_after_stopped && count_after_stopped == 1 {
        println!("๐ŸŽ‰ SUCCESS: Same resource with different status was properly deduplicated!");
        println!("   Expected canonical ID: aws:rds:123456789012:us-east-1:prod-database");
    } else {
        println!("โŒ FAILURE: Status change created duplicate documents");
    }

    // Test Case 2: Azure VM with different power state
    println!("\n๐Ÿ“‹ Test Case 2: Azure VM - Power State Change (running โ†’ deallocated)");
    println!("{}", "-".repeat(70));

    let azure_vm_running = json!({
        "type": "vm",
        "subscriptionId": "aaaabbbb-cccc-dddd-eeee-ffffffffffff",
        "resourceGroup": "production-rg", 
        "vmId": "web-server-01",
        "content": "Azure VM web-server-01 in resource group production-rg, location eastus, powerState running",
        "location": "eastus",
        "powerState": "running", // DYNAMIC FIELD
        "vmSize": "Standard_D2s_v3",
        "lastUpdated": "2024-12-24T10:30:00Z"
    });

    let azure_vm_deallocated = json!({
        "type": "vm",
        "subscriptionId": "aaaabbbb-cccc-dddd-eeee-ffffffffffff",
        "resourceGroup": "production-rg",
        "vmId": "web-server-01",
        "content": "Azure VM web-server-01 in resource group production-rg, location eastus, powerState deallocated", // DIFFERENT CONTENT
        "location": "eastus", 
        "powerState": "deallocated", // DIFFERENT POWER STATE
        "vmSize": "Standard_D2s_v3",
        "lastUpdated": "2024-12-24T14:15:00Z" // DIFFERENT TIMESTAMP
    });

    println!("๐Ÿ”„ Ingesting Azure VM with powerState: RUNNING");
    rag.ingest_aws_estate(azure_vm_running.clone(), user_id, collection_name).await?;
    let count_after_vm_running = rag.get_document_count(Some(collection_name), None).await?;
    println!("   Document count: {}", count_after_vm_running);

    println!("๐Ÿ”„ Ingesting SAME Azure VM with powerState: DEALLOCATED");
    rag.ingest_aws_estate(azure_vm_deallocated.clone(), user_id, collection_name).await?;
    let count_after_vm_deallocated = rag.get_document_count(Some(collection_name), None).await?;
    println!("   Document count: {}", count_after_vm_deallocated);

    if count_after_vm_deallocated == 2 {
        println!("๐ŸŽ‰ SUCCESS: Azure VM power state change was properly deduplicated!");
        println!("   Expected canonical ID: azure:vm:aaaabbbb-cccc-dddd-eeee-ffffffffffff:production-rg:web-server-01");
    } else {
        println!("โŒ FAILURE: VM power state change created unexpected document count");
    }

    // Test Case 3: Different resources should still create separate documents
    println!("\n๐Ÿ“‹ Test Case 3: Different Resources - Should Create Separate Documents");
    println!("{}", "-".repeat(70));

    let different_rds = json!({
        "type": "rds",
        "accountId": "123456789012",
        "region": "us-west-2", // DIFFERENT REGION
        "dbInstanceIdentifier": "staging-database", // DIFFERENT DB NAME
        "content": "RDS instance staging-database, id staging-database, region us-west-2, engine mysql, class db.t3.medium, status available",
        "dbInstanceClass": "db.t3.medium",
        "engine": "mysql", // DIFFERENT ENGINE
        "dbInstanceStatus": "available"
    });

    println!("๐Ÿ”„ Ingesting DIFFERENT RDS instance...");
    rag.ingest_aws_estate(different_rds.clone(), user_id, collection_name).await?;
    let final_count = rag.get_document_count(Some(collection_name), None).await?;
    println!("   Document count: {}", final_count);

    if final_count == 3 {
        println!("๐ŸŽ‰ SUCCESS: Different resource created new document!");
        println!("   Expected canonical ID: aws:rds:123456789012:us-west-2:staging-database");
    } else {
        println!("โŒ FAILURE: Expected 3 documents total, got {}", final_count);
    }

    // Test Case 4: Fallback for unknown service types
    println!("\n๐Ÿ“‹ Test Case 4: Unknown Service Type - Fallback Strategy");
    println!("{}", "-".repeat(70));

    let unknown_service = json!({
        "type": "custom-service", // UNKNOWN TYPE
        "id": "resource-123",
        "region": "us-east-1", 
        "content": "Custom resource resource-123 in region us-east-1, status active",
        "status": "active"
    });

    let unknown_service_updated = json!({
        "type": "custom-service",
        "id": "resource-123", // SAME ID
        "region": "us-east-1",
        "content": "Custom resource resource-123 in region us-east-1, status inactive", // DIFFERENT STATUS
        "status": "inactive" // DIFFERENT STATUS
    });

    println!("๐Ÿ”„ Ingesting unknown service type with status: ACTIVE");
    rag.ingest_aws_estate(unknown_service.clone(), user_id, collection_name).await?;
    let unknown_count1 = rag.get_document_count(Some(collection_name), None).await?;
    println!("   Document count: {}", unknown_count1);

    println!("๐Ÿ”„ Ingesting SAME unknown service with status: INACTIVE");
    rag.ingest_aws_estate(unknown_service_updated.clone(), user_id, collection_name).await?;
    let unknown_count2 = rag.get_document_count(Some(collection_name), None).await?;
    println!("   Document count: {}", unknown_count2);

    if unknown_count2 == unknown_count1 {
        println!("๐ŸŽ‰ SUCCESS: Unknown service type used fallback deduplication!");
        println!("   Expected fallback canonical ID: fallback:custom-service:resource-123:us-east-1");
    } else {
        println!("โŒ FAILURE: Unknown service type didn't deduplicate properly");
    }

    // Final verification
    println!("\n๐Ÿ“Š Final Test Summary:");
    println!("=====================");
    println!("Total unique resources expected: 4");
    println!("- AWS RDS prod-database (us-east-1): 1 document");
    println!("- Azure VM web-server-01: 1 document");  
    println!("- AWS RDS staging-database (us-west-2): 1 document");
    println!("- Custom service resource-123: 1 document");
    println!("Actual total documents: {}", unknown_count2);

    if unknown_count2 == 4 {
        println!("\n๐Ÿ† ALL TESTS PASSED! ๐Ÿ†");
        println!("โœ… Canonical resource ID deduplication is working perfectly!");
        println!("โœ… Dynamic fields (status, powerState, timestamps) don't create duplicates");
        println!("โœ… Same logical resources with different states are properly deduplicated");
        println!("โœ… Different resources still create separate documents");
        println!("โœ… Unknown service types use intelligent fallback strategy");
    } else {
        println!("\nโš ๏ธ  Some tests failed - please investigate the results above");
    }

    println!("\n๐Ÿ“ Storage created at: {}", base_path.display());

    Ok(())
}