use rag_module::RagModule;
use serde_json::json;
use std::path::PathBuf;
use anyhow::Result;
#[tokio::main]
async fn main() -> Result<()> {
println!("๐งช Testing AWS Estate Deduplication");
println!("=====================================");
let base_path = PathBuf::from("./test_data");
let rag = RagModule::new(base_path).await?;
let user_id = "test_user_dedup";
let collection_name = "aws_estate_dedup_test";
rag.set_user_context(user_id).await?;
println!("\n๐ Test Case 1: Same content + Same metadata");
println!("-".repeat(50));
let same_content_same_metadata = json!({
"content": "This is an EC2 instance with standard configuration",
"service": "EC2",
"region": "us-east-1",
"instance_type": "t3.micro",
"status": "running"
});
println!("๐ Ingesting document first time...");
let result1 = rag.ingest_aws_estate(same_content_same_metadata.clone(), user_id, collection_name).await?;
println!("โ
First ingestion completed");
println!("๐ Ingesting SAME document again...");
let result2 = rag.ingest_aws_estate(same_content_same_metadata.clone(), user_id, collection_name).await?;
println!("โ
Second ingestion completed");
let count1 = rag.get_document_count(Some(collection_name), None).await?;
println!("๐ Document count after same content ingestion: {}", count1);
assert_eq!(count1, 1, "Should only have 1 document, not duplicates!");
println!("\n๐ Test Case 2: Same content + Different metadata");
println!("-".repeat(50));
let same_content_diff_metadata = json!({
"content": "This is an EC2 instance with standard configuration", "service": "EC2",
"region": "us-west-2", "instance_type": "t3.small", "status": "stopped", "updated_at": "2024-12-24T10:00:00Z" });
println!("๐ Ingesting document with SAME content but DIFFERENT metadata...");
let result3 = rag.ingest_aws_estate(same_content_diff_metadata, user_id, collection_name).await?;
println!("โ
Metadata update ingestion completed");
let count2 = rag.get_document_count(Some(collection_name), None).await?;
println!("๐ Document count after metadata update: {}", count2);
assert_eq!(count2, 1, "Should still only have 1 document after metadata update!");
println!("\n๐ Test Case 3: Different content");
println!("-".repeat(50));
let different_content = json!({
"content": "This is a completely different RDS database instance", "service": "RDS",
"region": "us-east-1",
"engine": "postgres",
"status": "available"
});
println!("๐ Ingesting document with DIFFERENT content...");
let result4 = rag.ingest_aws_estate(different_content, user_id, collection_name).await?;
println!("โ
Different content ingestion completed");
let count3 = rag.get_document_count(Some(collection_name), None).await?;
println!("๐ Document count after different content: {}", count3);
assert_eq!(count3, 2, "Should have 2 documents for 2 different contents!");
println!("\n๐ Test Case 4: Batch ingestion with duplicates");
println!("-".repeat(50));
let batch_data = vec![
json!({
"content": "S3 bucket for storing application logs",
"service": "S3",
"bucket_name": "app-logs-bucket"
}),
json!({
"content": "S3 bucket for storing application logs", "service": "S3",
"bucket_name": "app-logs-bucket-updated", "encryption": "AES-256"
}),
json!({
"content": "Lambda function for image processing", "service": "Lambda",
"runtime": "python3.9"
}),
json!({
"content": "S3 bucket for storing application logs", "service": "S3",
"bucket_name": "final-bucket-name", "encryption": "AES-256",
"versioning": "enabled"
})
];
println!("๐ Ingesting batch with {} documents (including duplicates)...", batch_data.len());
let batch_result = rag.ingest_aws_estate_batch(batch_data, user_id, collection_name).await?;
println!("โ
Batch ingestion completed");
let final_count = rag.get_document_count(Some(collection_name), None).await?;
println!("๐ Final document count: {}", final_count);
assert_eq!(final_count, 4, "Should have 4 documents total (2 + 2 unique contents)!");
println!("\n๐ Test Case 5: Search verification");
println!("-".repeat(50));
let search_results = rag.search(collection_name, "EC2 instance", Default::default()).await?;
println!("๐ Search for 'EC2 instance' found {} results", search_results.len());
let s3_results = rag.search(collection_name, "S3 bucket", Default::default()).await?;
println!("๐ Search for 'S3 bucket' found {} results", s3_results.len());
println!("\n๐ All deduplication tests PASSED!");
println!("โ
Same content creates same document ID");
println!("โ
Metadata updates replace existing document");
println!("โ
Different content creates new documents");
println!("โ
Batch processing handles mixed scenarios correctly");
println!("โ
No duplicate documents in storage");
Ok(())
}