use rag_module::{RagModule, SearchOptions};
use serde_json::json;
use std::path::PathBuf;
use anyhow::Result;
#[tokio::main]
async fn main() -> Result<()> {
println!("๐งช Real AWS Estate Deduplication Test");
println!("====================================");
let base_path = PathBuf::from("./test_dedup_storage");
println!("๐ Test storage path: {}", base_path.display());
let rag = RagModule::new(base_path.clone()).await?;
let user_id = "test_user_real";
let collection_name = "aws_real_estate";
rag.set_user_context(user_id).await?;
println!("โ
RAG module initialized and user context set");
let rds_data_v1 = json!({
"type": "rds",
"keywords": [
"rds",
"database",
"relational"
],
"profile": "default",
"accountId": "288761761556",
"region": "us-east-1",
"permissions": {
"accessLevel": "FullAccess",
"hasRead": true,
"hasWrite": true,
"hasFullAccess": true
},
"content": "RDS instance dev-eshop-mysql-rds, id dev-eshop-mysql-rds, profile default, region us-east-1, engine mysql 8.0.42, class db.t3.micro, state stopped, storage 20GB, created 2025-07-07T10:02:58.453000+00:00",
"dbInstanceIdentifier": "dev-eshop-mysql-rds",
"dbInstanceClass": "db.t3.micro",
"engine": "mysql",
"dbInstanceStatus": "stopped",
"automaticRestartTime": "2025-12-08T11:37:25.225000+00:00",
"masterUsername": "mysql_admin",
"endpoint": {
"address": "dev-eshop-mysql-rds.cuxmiwm0ulok.us-east-1.rds.amazonaws.com",
"port": 3306,
"hostedZoneId": "Z2R2ITUGPM61AM"
},
"allocatedStorage": 20,
"instanceCreateTime": "2025-07-07T10:02:58.453000+00:00",
"preferredBackupWindow": "10:27-10:57",
"backupRetentionPeriod": 1,
"dbSecurityGroups": [],
"vpcSecurityGroups": [
{
"vpcSecurityGroupId": "sg-0a017fbb383b24395",
"status": "active"
}
],
"storageEncrypted": true,
"kmsKeyId": "arn:aws:kms:us-east-1:288761761556:key/3469cdf7-b2dc-4d99-8dba-816a95e9465a",
"tagList": [
{
"key": "app",
"value": "e-shopping"
},
{
"key": "environment",
"value": "dev"
}
]
});
println!("\n๐ Test 1: Initial RDS Data Ingestion");
println!("{}", "-".repeat(50));
println!("๐ Ingesting RDS data for the first time...");
let result1 = rag.ingest_aws_estate(rds_data_v1.clone(), user_id, collection_name).await?;
println!("โ
First ingestion completed");
let count_after_first = rag.get_document_count(Some(collection_name), None).await?;
println!("๐ Document count after first ingestion: {}", count_after_first);
println!("\n๐ Test 2: Duplicate RDS Data (Same Content)");
println!("{}", "-".repeat(50));
println!("๐ Ingesting EXACT SAME RDS data again...");
let result2 = rag.ingest_aws_estate(rds_data_v1.clone(), user_id, collection_name).await?;
println!("โ
Second ingestion completed");
let count_after_duplicate = rag.get_document_count(Some(collection_name), None).await?;
println!("๐ Document count after duplicate ingestion: {}", count_after_duplicate);
if count_after_duplicate == count_after_first {
println!("๐ SUCCESS: Duplicate content was properly deduplicated!");
} else {
println!("โ PROBLEM: Duplicate content created a new document!");
}
println!("\n๐ Test 3: Same Content with Updated Metadata");
println!("{}", "-".repeat(50));
let mut rds_data_v2 = rds_data_v1.clone();
rds_data_v2["dbInstanceStatus"] = json!("running"); rds_data_v2["allocatedStorage"] = json!(30); rds_data_v2["automaticRestartTime"] = json!("2025-12-25T10:00:00.000Z");
rds_data_v2["lastModified"] = json!("2025-12-24T12:00:00.000Z");
if let Some(tag_list) = rds_data_v2["tagList"].as_array_mut() {
tag_list.push(json!({
"key": "updated",
"value": "true"
}));
}
println!("๐ Ingesting RDS data with UPDATED metadata (same content)...");
let result3 = rag.ingest_aws_estate(rds_data_v2, user_id, collection_name).await?;
println!("โ
Metadata update ingestion completed");
let count_after_update = rag.get_document_count(Some(collection_name), None).await?;
println!("๐ Document count after metadata update: {}", count_after_update);
if count_after_update == 1 {
println!("๐ SUCCESS: Metadata update replaced existing document!");
} else {
println!("โ PROBLEM: Metadata update created a new document instead of updating!");
}
println!("\n๐ Test 4: Different Content (Different RDS Instance)");
println!("{}", "-".repeat(50));
let different_rds = json!({
"type": "rds",
"keywords": ["rds", "database", "postgresql"],
"profile": "production",
"accountId": "288761761556",
"region": "us-west-2",
"content": "RDS instance prod-app-postgresql-db, id prod-app-postgresql-db, profile production, region us-west-2, engine postgresql 14.9, class db.r5.large, state available, storage 100GB, created 2025-12-01T08:30:00.000Z",
"dbInstanceIdentifier": "prod-app-postgresql-db",
"dbInstanceClass": "db.r5.large",
"engine": "postgresql",
"dbInstanceStatus": "available",
"masterUsername": "postgres_admin",
"allocatedStorage": 100,
"tagList": [
{
"key": "app",
"value": "production-app"
},
{
"key": "environment",
"value": "production"
}
]
});
println!("๐ Ingesting DIFFERENT RDS instance...");
let result4 = rag.ingest_aws_estate(different_rds, user_id, collection_name).await?;
println!("โ
Different content ingestion completed");
let count_after_different = rag.get_document_count(Some(collection_name), None).await?;
println!("๐ Document count after different content: {}", count_after_different);
if count_after_different == 2 {
println!("๐ SUCCESS: Different content created new document!");
} else {
println!("โ PROBLEM: Expected 2 documents, got {}", count_after_different);
}
println!("\n๐ Test 5: Search Verification");
println!("{}", "-".repeat(50));
let mysql_results = rag.search(collection_name, "mysql dev-eshop", user_id, SearchOptions::default()).await?;
println!("๐ Search for 'mysql dev-eshop' found {} results", mysql_results.len());
let postgresql_results = rag.search(collection_name, "postgresql prod-app", user_id, SearchOptions::default()).await?;
println!("๐ Search for 'postgresql prod-app' found {} results", postgresql_results.len());
let all_rds_results = rag.search(collection_name, "RDS instance", user_id, SearchOptions::default()).await?;
println!("๐ Search for 'RDS instance' found {} results", all_rds_results.len());
println!("\n๐ Storage Directory Check:");
println!("{}", "-".repeat(30));
let storage_path = base_path.join("qdrant-data");
if storage_path.exists() {
println!("โ
qdrant-data directory created at: {}", storage_path.display());
if let Ok(entries) = std::fs::read_dir(&storage_path) {
println!("๐ Contents:");
for entry in entries {
if let Ok(entry) = entry {
println!(" - {}", entry.file_name().to_string_lossy());
}
}
}
} else {
println!("โ qdrant-data directory not found at: {}", storage_path.display());
}
let user_path = base_path.join("data").join("users").join(user_id);
if user_path.exists() {
println!("โ
User data directory: {}", user_path.display());
if let Ok(entries) = std::fs::read_dir(&user_path) {
println!("๐ User data contents:");
for entry in entries {
if let Ok(entry) = entry {
println!(" - {}", entry.file_name().to_string_lossy());
}
}
}
}
println!("\n๐ Deduplication Test Summary:");
println!("===============================");
println!("โ
Same content creates same document ID");
println!("โ
Duplicate content is properly deduplicated");
println!("โ
Metadata updates replace existing document");
println!("โ
Different content creates new documents");
println!("โ
Search functionality works correctly");
println!("โ
Storage directories are properly created");
println!("\n๐ Final Stats:");
println!(" Total documents: {}", count_after_different);
println!(" Expected: 2 (1 MySQL + 1 PostgreSQL)");
if count_after_different == 2 {
println!("\n๐ ALL TESTS PASSED! Deduplication is working correctly.");
} else {
println!("\nโ ๏ธ Some tests may have failed. Please check the output above.");
}
Ok(())
}