rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use serde_json::json;

fn main() {
    println!("🔍 Verifying Canonical Resource ID Generation");
    println!("============================================");

    // Test the exact data from our test
    let rds_running_metadata = json!({
        "type": "rds",
        "accountId": "123456789012", 
        "region": "us-east-1",
        "dbInstanceIdentifier": "prod-database", 
        "dbInstanceClass": "db.r5.large",
        "engine": "postgres",
        "dbInstanceStatus": "running",  // DYNAMIC FIELD
        "allocatedStorage": 100,
        "lastModified": "2024-12-24T10:00:00Z"
    });

    let rds_stopped_metadata = json!({
        "type": "rds", 
        "accountId": "123456789012", 
        "region": "us-east-1",
        "dbInstanceIdentifier": "prod-database", 
        "dbInstanceClass": "db.r5.large",
        "engine": "postgres", 
        "dbInstanceStatus": "stopped", // DIFFERENT STATUS
        "allocatedStorage": 100,
        "lastModified": "2024-12-24T12:00:00Z" // DIFFERENT TIMESTAMP
    });

    let azure_vm_metadata = json!({
        "type": "vm",
        "subscriptionId": "aaaabbbb-cccc-dddd-eeee-ffffffffffff",
        "resourceGroup": "production-rg", 
        "vmId": "web-server-01",
        "location": "eastus",
        "powerState": "deallocated", // DYNAMIC FIELD
        "vmSize": "Standard_D2s_v3"
    });

    // Generate canonical IDs
    let canonical_id_1 = generate_canonical_resource_id_test(rds_running_metadata.as_object().unwrap());
    let canonical_id_2 = generate_canonical_resource_id_test(rds_stopped_metadata.as_object().unwrap());
    let canonical_id_3 = generate_canonical_resource_id_test(azure_vm_metadata.as_object().unwrap());

    println!("📋 Canonical ID Test Results:");
    println!("-----------------------------");
    println!("RDS (running):  {}", canonical_id_1);
    println!("RDS (stopped):  {}", canonical_id_2);
    println!("Azure VM:       {}", canonical_id_3);

    // Generate document IDs
    let doc_id_1 = generate_doc_id(&canonical_id_1);
    let doc_id_2 = generate_doc_id(&canonical_id_2);
    let doc_id_3 = generate_doc_id(&canonical_id_3);

    println!("\n📋 Document ID Test Results:");
    println!("-----------------------------");
    println!("RDS (running):  {}", doc_id_1);
    println!("RDS (stopped):  {}", doc_id_2);
    println!("Azure VM:       {}", doc_id_3);

    println!("\n✅ Deduplication Verification:");
    println!("------------------------------");
    if canonical_id_1 == canonical_id_2 {
        println!("🎉 SUCCESS: RDS running/stopped have SAME canonical ID");
        println!("   → Will be deduplicated correctly");
    } else {
        println!("❌ FAILURE: RDS running/stopped have DIFFERENT canonical IDs");
        println!("   → Would create duplicates");
    }

    if doc_id_1 == doc_id_2 {
        println!("🎉 SUCCESS: RDS running/stopped have SAME document ID");
        println!("   → Deduplication working perfectly");
    } else {
        println!("❌ FAILURE: RDS running/stopped have DIFFERENT document IDs");
        println!("   → Deduplication not working");
    }

    if canonical_id_1 != canonical_id_3 {
        println!("🎉 SUCCESS: RDS and Azure VM have DIFFERENT canonical IDs");
        println!("   → Different resources correctly separated");
    } else {
        println!("❌ FAILURE: RDS and Azure VM have SAME canonical ID");
        println!("   → Different resources would collide");
    }
}

fn generate_canonical_resource_id_test(metadata: &serde_json::Map<String, serde_json::Value>) -> String {
    // Determine cloud provider and service type
    let service_type = metadata.get("type")
        .or_else(|| metadata.get("service"))
        .or_else(|| metadata.get("resourceType"))
        .and_then(|s| s.as_str())
        .unwrap_or("unknown")
        .to_lowercase();
    
    // AWS Resources
    if let Some(account_id) = metadata.get("accountId").and_then(|a| a.as_str()) {
        match service_type.as_str() {
            "rds" => {
                if let (Some(region), Some(db_id)) = (
                    metadata.get("region").and_then(|r| r.as_str()),
                    metadata.get("dbInstanceIdentifier").and_then(|d| d.as_str())
                ) {
                    return format!("aws:rds:{}:{}:{}", account_id, region, db_id);
                }
            },
            _ => {}
        }
    }
    
    // Azure Resources
    if let Some(subscription) = metadata.get("subscriptionId").and_then(|s| s.as_str()) {
        match service_type.as_str() {
            "vm" => {
                if let Some(resource_group) = metadata.get("resourceGroup").and_then(|rg| rg.as_str()) {
                    if let Some(vm_id) = metadata.get("vmId").and_then(|v| v.as_str()) {
                        return format!("azure:vm:{}:{}:{}", subscription, resource_group, vm_id);
                    }
                }
            },
            _ => {}
        }
    }
    
    format!("fallback:unknown")
}

fn generate_doc_id(canonical_id: &str) -> String {
    let mut hasher = DefaultHasher::new();
    canonical_id.hash(&mut hasher);
    let hash = hasher.finish();
    format!("canonical_test_estate-{:x}", hash)
}