use rag_module::RagModule;
use serde_json::json;
use tempfile::TempDir;
use tokio;
#[tokio::test]
async fn integration_test_aws_estate_batch_ingestion() {
let temp_dir = TempDir::new().unwrap();
let rag = RagModule::new(temp_dir.path()).await.unwrap();
rag.initialize().await.unwrap();
let user_id = "integration_user_001";
let collection_name = "aws_production_estate";
let mut aws_resources = Vec::new();
for i in 0..160 {
aws_resources.push(json!({
"content": format!("EC2 instance i-{:016x} running Ubuntu 22.04 in {}",
i, if i % 2 == 0 { "us-west-2a" } else { "us-east-1b" }),
"resource_type": "ec2_instance",
"instance_id": format!("i-{:016x}", i),
"instance_type": if i % 3 == 0 { "t3.micro" } else { "t3.small" },
"state": "running",
"region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
"availability_zone": if i % 2 == 0 { "us-west-2a" } else { "us-east-1b" },
"vpc_id": format!("vpc-{:08x}", i % 3),
"security_groups": [format!("sg-{:08x}", i)],
"tags": {
"Name": format!("web-server-{}", i),
"Environment": if i % 2 == 0 { "production" } else { "staging" },
"Application": "web-frontend",
"Owner": "devops-team"
}
}));
}
for i in 0..128 {
aws_resources.push(json!({
"content": format!("S3 bucket {} for {} storage with {} GB data",
format!("company-data-{:03}", i),
if i % 2 == 0 { "backup" } else { "application" },
(i + 1) * 100),
"resource_type": "s3_bucket",
"bucket_name": format!("company-data-{:03}", i),
"region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
"versioning": i % 3 == 0,
"encryption": "AES256",
"public_access_blocked": true,
"storage_class": if i % 2 == 0 { "STANDARD" } else { "INTELLIGENT_TIERING" },
"size_gb": (i + 1) * 100,
"tags": {
"Environment": if i % 2 == 0 { "production" } else { "staging" },
"DataClassification": "internal",
"BackupRequired": true
}
}));
}
for i in 0..112 {
aws_resources.push(json!({
"content": format!("RDS {} database {} running version {}",
if i % 2 == 0 { "MySQL" } else { "PostgreSQL" },
format!("app-db-{:02}", i),
if i % 2 == 0 { "8.0" } else { "14.9" }),
"resource_type": "rds_instance",
"db_identifier": format!("app-db-{:02}", i),
"engine": if i % 2 == 0 { "mysql" } else { "postgres" },
"engine_version": if i % 2 == 0 { "8.0" } else { "14.9" },
"instance_class": format!("db.{}", if i % 2 == 0 { "t3.micro" } else { "t3.small" }),
"allocated_storage": (i + 1) * 20,
"multi_az": i % 3 == 0,
"backup_retention": 7,
"region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
"tags": {
"Name": format!("app-database-{}", i),
"Environment": if i % 2 == 0 { "production" } else { "staging" },
"Application": "backend-services",
"BackupSchedule": "daily"
}
}));
}
for i in 0..112 {
aws_resources.push(json!({
"content": format!("Lambda function {} using {} runtime for {} processing",
format!("data-processor-{}", i),
if i % 3 == 0 { "python3.9" } else if i % 3 == 1 { "nodejs18.x" } else { "java11" },
if i % 2 == 0 { "batch" } else { "real-time" }),
"resource_type": "lambda_function",
"function_name": format!("data-processor-{}", i),
"runtime": if i % 3 == 0 { "python3.9" } else if i % 3 == 1 { "nodejs18.x" } else { "java11" },
"timeout": if i % 2 == 0 { 300 } else { 60 },
"memory_size": if i % 2 == 0 { 1 } else { 256 },
"region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
"environment_variables": {
"STAGE": if i % 2 == 0 { "prod" } else { "dev" },
"LOG_LEVEL": "INFO"
},
"tags": {
"Environment": if i % 2 == 0 { "production" } else { "staging" },
"Team": "data-engineering",
"CostCenter": "engineering"
}
}));
}
println!("🚀 Starting batch ingestion test with {} AWS resources", aws_resources.len());
let start_time = std::time::Instant::now();
let result = rag.ingest_aws_estate_batch(aws_resources.clone(), user_id, collection_name).await.unwrap();
let batch_duration = start_time.elapsed();
assert_eq!(result.total_resources, 1);
assert_eq!(result.parsed_resources, 1);
assert_eq!(result.failed_resources, 0);
assert_eq!(result.create_result.created, 1);
assert!(result.create_result.failed.is_empty());
println!("✅ Batch ingestion completed successfully!");
println!(" 📊 Resources processed: {}", result.parsed_resources);
println!(" ⚡ Time taken: {:?}", batch_duration);
println!(" 📈 Throughput: {:.2} docs/sec", result.parsed_resources as f64 / batch_duration.as_secs_f64());
println!("\n🔍 Testing search functionality...");
let search_options = rag_module::SearchOptions {
limit: Some(5),
score_threshold: Some(0.1),
..Default::default()
};
let ec2_results = rag.search("aws_production_estate", "EC2 instance", user_id, search_options.clone()).await.unwrap();
println!(" 🖥️ Found {} EC2-related results", ec2_results.len());
assert!(ec2_results.len() > 0);
let db_results = rag.search("aws_production_estate", "database MySQL PostgreSQL", user_id, search_options.clone()).await.unwrap();
println!(" 🗄️ Found {} database-related results", db_results.len());
assert!(db_results.len() > 0);
let lambda_results = rag.search("aws_production_estate", "Lambda function", user_id, search_options).await.unwrap();
println!(" ⚡ Found {} Lambda-related results", lambda_results.len());
assert!(lambda_results.len() > 0);
println!("\n✅ Integration test completed successfully!");
println!(" 🎯 All {} resources ingested and searchable", result.parsed_resources);
}
#[tokio::test]
async fn integration_test_mixed_cloud_providers() {
let temp_dir = TempDir::new().unwrap();
let rag = RagModule::new(temp_dir.path()).await.unwrap();
rag.initialize().await.unwrap();
let user_id = "multi_cloud_user";
let collection_name = "multi_cloud_estate";
let mixed_resources = vec![
json!({
"content": "AWS EC2 instance i-1234567890abcdef0 running in us-west-2",
"provider": "aws",
"resource_type": "compute_instance",
"instance_id": "i-1234567890abcdef0"
}),
json!({
"content": "Azure Virtual Machine vm-web-01 running in West US 2",
"provider": "azure",
"resource_type": "compute_instance",
"vm_name": "vm-web-01"
}),
json!({
"content": "GCP Compute Engine instance web-server-gcp running in us-central1",
"provider": "gcp",
"resource_type": "compute_instance",
"instance_name": "web-server-gcp"
}),
json!({
"content": "AWS RDS MySQL database prod-db in us-east-1",
"provider": "aws",
"resource_type": "database",
"db_identifier": "prod-db"
}),
json!({
"content": "Azure SQL Database webapp-db in East US",
"provider": "azure",
"resource_type": "database",
"database_name": "webapp-db"
})
];
let result = rag.ingest_aws_estate_batch(mixed_resources, user_id, collection_name).await.unwrap();
assert_eq!(result.total_resources, 5);
assert_eq!(result.parsed_resources, 5);
assert_eq!(result.failed_resources, 0);
println!("✅ Multi-cloud integration test passed: {} resources from different providers", result.parsed_resources);
}
#[tokio::test]
async fn integration_test_error_handling_and_recovery() {
let temp_dir = TempDir::new().unwrap();
let rag = RagModule::new(temp_dir.path()).await.unwrap();
rag.initialize().await.unwrap();
let user_id = "error_test_user";
let collection_name = "error_test_estate";
let mixed_data = vec![
json!({
"content": "Valid AWS EC2 instance",
"resource_type": "ec2_instance",
"instance_id": "i-valid1"
}),
json!({
"resource_type": "s3_bucket",
"bucket_name": "invalid-bucket"
}),
json!({
"content": "Valid RDS database",
"resource_type": "rds_instance",
"db_identifier": "valid-db"
}),
json!({
"content": null, "resource_type": "lambda_function"
}),
json!({
"content": "Another valid EC2 instance",
"resource_type": "ec2_instance",
"instance_id": "i-valid2"
}),
"invalid_json_structure", json!({
"content": "Valid S3 bucket",
"resource_type": "s3_bucket",
"bucket_name": "valid-bucket"
})
];
let result = rag.ingest_aws_estate_batch(mixed_data, user_id, collection_name).await.unwrap();
println!("📊 Error handling test results:");
println!(" ✅ Successfully processed: {}", result.parsed_resources);
println!(" ❌ Failed to process: {}", result.failed_resources);
println!(" 📝 Error details: {:?}", result.create_result.failed);
assert!(result.parsed_resources >= 4);
assert!(result.failed_resources > 0);
assert_eq!(result.total_resources, 7);
println!("✅ Error handling test completed - system gracefully handled mixed data");
}