use rag_module::RagModule;
use serde_json::json;
use std::path::PathBuf;
use anyhow::Result;
#[tokio::main]
async fn main() -> Result<()> {
println!("๐งช Testing Canonical Resource ID Deduplication");
println!("==============================================");
println!("This test demonstrates the corner case fix where same resource");
println!("with different states (running/stopped) are properly deduplicated\n");
let base_path = PathBuf::from("./canonical_dedup_test");
if base_path.exists() {
std::fs::remove_dir_all(&base_path).ok();
}
let rag = RagModule::new(base_path.clone()).await?;
let user_id = "canonical_user";
let collection_name = "canonical_test_estate";
rag.set_user_context(user_id).await?;
println!("โ
RAG module initialized\n");
println!("๐ Test Case 1: AWS RDS - Status Change (running โ stopped)");
println!("{}", "-".repeat(70));
let rds_running = json!({
"type": "rds",
"accountId": "123456789012",
"region": "us-east-1",
"dbInstanceIdentifier": "prod-database",
"content": "RDS instance prod-database, id prod-database, region us-east-1, engine postgres, class db.r5.large, status running",
"dbInstanceClass": "db.r5.large",
"engine": "postgres",
"dbInstanceStatus": "running", "allocatedStorage": 100,
"lastModified": "2024-12-24T10:00:00Z"
});
let rds_stopped = json!({
"type": "rds",
"accountId": "123456789012",
"region": "us-east-1",
"dbInstanceIdentifier": "prod-database",
"content": "RDS instance prod-database, id prod-database, region us-east-1, engine postgres, class db.r5.large, status stopped", "dbInstanceClass": "db.r5.large",
"engine": "postgres",
"dbInstanceStatus": "stopped", "allocatedStorage": 100,
"lastModified": "2024-12-24T12:00:00Z" });
println!("๐ Ingesting RDS instance with status: RUNNING");
rag.ingest_aws_estate(rds_running.clone(), user_id, collection_name).await?;
let count_after_running = rag.get_document_count(Some(collection_name), None).await?;
println!(" Document count: {}", count_after_running);
println!("๐ Ingesting SAME RDS instance with status: STOPPED");
rag.ingest_aws_estate(rds_stopped.clone(), user_id, collection_name).await?;
let count_after_stopped = rag.get_document_count(Some(collection_name), None).await?;
println!(" Document count: {}", count_after_stopped);
if count_after_running == count_after_stopped && count_after_stopped == 1 {
println!("๐ SUCCESS: Same resource with different status was properly deduplicated!");
println!(" Expected canonical ID: aws:rds:123456789012:us-east-1:prod-database");
} else {
println!("โ FAILURE: Status change created duplicate documents");
}
println!("\n๐ Test Case 2: Azure VM - Power State Change (running โ deallocated)");
println!("{}", "-".repeat(70));
let azure_vm_running = json!({
"type": "vm",
"subscriptionId": "aaaabbbb-cccc-dddd-eeee-ffffffffffff",
"resourceGroup": "production-rg",
"vmId": "web-server-01",
"content": "Azure VM web-server-01 in resource group production-rg, location eastus, powerState running",
"location": "eastus",
"powerState": "running", "vmSize": "Standard_D2s_v3",
"lastUpdated": "2024-12-24T10:30:00Z"
});
let azure_vm_deallocated = json!({
"type": "vm",
"subscriptionId": "aaaabbbb-cccc-dddd-eeee-ffffffffffff",
"resourceGroup": "production-rg",
"vmId": "web-server-01",
"content": "Azure VM web-server-01 in resource group production-rg, location eastus, powerState deallocated", "location": "eastus",
"powerState": "deallocated", "vmSize": "Standard_D2s_v3",
"lastUpdated": "2024-12-24T14:15:00Z" });
println!("๐ Ingesting Azure VM with powerState: RUNNING");
rag.ingest_aws_estate(azure_vm_running.clone(), user_id, collection_name).await?;
let count_after_vm_running = rag.get_document_count(Some(collection_name), None).await?;
println!(" Document count: {}", count_after_vm_running);
println!("๐ Ingesting SAME Azure VM with powerState: DEALLOCATED");
rag.ingest_aws_estate(azure_vm_deallocated.clone(), user_id, collection_name).await?;
let count_after_vm_deallocated = rag.get_document_count(Some(collection_name), None).await?;
println!(" Document count: {}", count_after_vm_deallocated);
if count_after_vm_deallocated == 2 {
println!("๐ SUCCESS: Azure VM power state change was properly deduplicated!");
println!(" Expected canonical ID: azure:vm:aaaabbbb-cccc-dddd-eeee-ffffffffffff:production-rg:web-server-01");
} else {
println!("โ FAILURE: VM power state change created unexpected document count");
}
println!("\n๐ Test Case 3: Different Resources - Should Create Separate Documents");
println!("{}", "-".repeat(70));
let different_rds = json!({
"type": "rds",
"accountId": "123456789012",
"region": "us-west-2", "dbInstanceIdentifier": "staging-database", "content": "RDS instance staging-database, id staging-database, region us-west-2, engine mysql, class db.t3.medium, status available",
"dbInstanceClass": "db.t3.medium",
"engine": "mysql", "dbInstanceStatus": "available"
});
println!("๐ Ingesting DIFFERENT RDS instance...");
rag.ingest_aws_estate(different_rds.clone(), user_id, collection_name).await?;
let final_count = rag.get_document_count(Some(collection_name), None).await?;
println!(" Document count: {}", final_count);
if final_count == 3 {
println!("๐ SUCCESS: Different resource created new document!");
println!(" Expected canonical ID: aws:rds:123456789012:us-west-2:staging-database");
} else {
println!("โ FAILURE: Expected 3 documents total, got {}", final_count);
}
println!("\n๐ Test Case 4: Unknown Service Type - Fallback Strategy");
println!("{}", "-".repeat(70));
let unknown_service = json!({
"type": "custom-service", "id": "resource-123",
"region": "us-east-1",
"content": "Custom resource resource-123 in region us-east-1, status active",
"status": "active"
});
let unknown_service_updated = json!({
"type": "custom-service",
"id": "resource-123", "region": "us-east-1",
"content": "Custom resource resource-123 in region us-east-1, status inactive", "status": "inactive" });
println!("๐ Ingesting unknown service type with status: ACTIVE");
rag.ingest_aws_estate(unknown_service.clone(), user_id, collection_name).await?;
let unknown_count1 = rag.get_document_count(Some(collection_name), None).await?;
println!(" Document count: {}", unknown_count1);
println!("๐ Ingesting SAME unknown service with status: INACTIVE");
rag.ingest_aws_estate(unknown_service_updated.clone(), user_id, collection_name).await?;
let unknown_count2 = rag.get_document_count(Some(collection_name), None).await?;
println!(" Document count: {}", unknown_count2);
if unknown_count2 == unknown_count1 {
println!("๐ SUCCESS: Unknown service type used fallback deduplication!");
println!(" Expected fallback canonical ID: fallback:custom-service:resource-123:us-east-1");
} else {
println!("โ FAILURE: Unknown service type didn't deduplicate properly");
}
println!("\n๐ Final Test Summary:");
println!("=====================");
println!("Total unique resources expected: 4");
println!("- AWS RDS prod-database (us-east-1): 1 document");
println!("- Azure VM web-server-01: 1 document");
println!("- AWS RDS staging-database (us-west-2): 1 document");
println!("- Custom service resource-123: 1 document");
println!("Actual total documents: {}", unknown_count2);
if unknown_count2 == 4 {
println!("\n๐ ALL TESTS PASSED! ๐");
println!("โ
Canonical resource ID deduplication is working perfectly!");
println!("โ
Dynamic fields (status, powerState, timestamps) don't create duplicates");
println!("โ
Same logical resources with different states are properly deduplicated");
println!("โ
Different resources still create separate documents");
println!("โ
Unknown service types use intelligent fallback strategy");
} else {
println!("\nโ ๏ธ Some tests failed - please investigate the results above");
}
println!("\n๐ Storage created at: {}", base_path.display());
Ok(())
}