rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
//! Integration tests for batch AWS estate ingestion
//! 
//! These tests demonstrate real-world usage scenarios for the batch ingestion functionality.

use rag_module::RagModule;
use serde_json::json;
use tempfile::TempDir;
use tokio;

#[tokio::test]
async fn integration_test_aws_estate_batch_ingestion() {
    // Initialize RAG module
    let temp_dir = TempDir::new().unwrap();
    let rag = RagModule::new(temp_dir.path()).await.unwrap();
    rag.initialize().await.unwrap();

    let user_id = "integration_user_001";
    let collection_name = "aws_production_estate";

    // Create realistic AWS estate data (1 documents as requested)
    let mut aws_resources = Vec::new();

    // EC2 instances
    for i in 0..160 {
        aws_resources.push(json!({
            "content": format!("EC2 instance i-{:016x} running Ubuntu 22.04 in {}", 
                i, if i % 2 == 0 { "us-west-2a" } else { "us-east-1b" }),
            "resource_type": "ec2_instance",
            "instance_id": format!("i-{:016x}", i),
            "instance_type": if i % 3 == 0 { "t3.micro" } else { "t3.small" },
            "state": "running",
            "region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
            "availability_zone": if i % 2 == 0 { "us-west-2a" } else { "us-east-1b" },
            "vpc_id": format!("vpc-{:08x}", i % 3),
            "security_groups": [format!("sg-{:08x}", i)],
            "tags": {
                "Name": format!("web-server-{}", i),
                "Environment": if i % 2 == 0 { "production" } else { "staging" },
                "Application": "web-frontend",
                "Owner": "devops-team"
            }
        }));
    }

    // S3 buckets
    for i in 0..128 {
        aws_resources.push(json!({
            "content": format!("S3 bucket {} for {} storage with {} GB data", 
                format!("company-data-{:03}", i),
                if i % 2 == 0 { "backup" } else { "application" },
                (i + 1) * 100),
            "resource_type": "s3_bucket",
            "bucket_name": format!("company-data-{:03}", i),
            "region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
            "versioning": i % 3 == 0,
            "encryption": "AES256",
            "public_access_blocked": true,
            "storage_class": if i % 2 == 0 { "STANDARD" } else { "INTELLIGENT_TIERING" },
            "size_gb": (i + 1) * 100,
            "tags": {
                "Environment": if i % 2 == 0 { "production" } else { "staging" },
                "DataClassification": "internal",
                "BackupRequired": true
            }
        }));
    }

    // RDS instances
    for i in 0..112 {
        aws_resources.push(json!({
            "content": format!("RDS {} database {} running version {}", 
                if i % 2 == 0 { "MySQL" } else { "PostgreSQL" },
                format!("app-db-{:02}", i),
                if i % 2 == 0 { "8.0" } else { "14.9" }),
            "resource_type": "rds_instance",
            "db_identifier": format!("app-db-{:02}", i),
            "engine": if i % 2 == 0 { "mysql" } else { "postgres" },
            "engine_version": if i % 2 == 0 { "8.0" } else { "14.9" },
            "instance_class": format!("db.{}", if i % 2 == 0 { "t3.micro" } else { "t3.small" }),
            "allocated_storage": (i + 1) * 20,
            "multi_az": i % 3 == 0,
            "backup_retention": 7,
            "region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
            "tags": {
                "Name": format!("app-database-{}", i),
                "Environment": if i % 2 == 0 { "production" } else { "staging" },
                "Application": "backend-services",
                "BackupSchedule": "daily"
            }
        }));
    }

    // Lambda functions
    for i in 0..112 {
        aws_resources.push(json!({
            "content": format!("Lambda function {} using {} runtime for {} processing", 
                format!("data-processor-{}", i),
                if i % 3 == 0 { "python3.9" } else if i % 3 == 1 { "nodejs18.x" } else { "java11" },
                if i % 2 == 0 { "batch" } else { "real-time" }),
            "resource_type": "lambda_function",
            "function_name": format!("data-processor-{}", i),
            "runtime": if i % 3 == 0 { "python3.9" } else if i % 3 == 1 { "nodejs18.x" } else { "java11" },
            "timeout": if i % 2 == 0 { 300 } else { 60 },
            "memory_size": if i % 2 == 0 { 1 } else { 256 },
            "region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
            "environment_variables": {
                "STAGE": if i % 2 == 0 { "prod" } else { "dev" },
                "LOG_LEVEL": "INFO"
            },
            "tags": {
                "Environment": if i % 2 == 0 { "production" } else { "staging" },
                "Team": "data-engineering",
                "CostCenter": "engineering"
            }
        }));
    }

    println!("🚀 Starting batch ingestion test with {} AWS resources", aws_resources.len());
    
    // Measure batch ingestion performance
    let start_time = std::time::Instant::now();
    let result = rag.ingest_aws_estate_batch(aws_resources.clone(), user_id, collection_name).await.unwrap();
    let batch_duration = start_time.elapsed();

    // Verify results
    assert_eq!(result.total_resources, 1);
    assert_eq!(result.parsed_resources, 1);
    assert_eq!(result.failed_resources, 0);
    assert_eq!(result.create_result.created, 1);
    assert!(result.create_result.failed.is_empty());

    println!("✅ Batch ingestion completed successfully!");
    println!("   📊 Resources processed: {}", result.parsed_resources);
    println!("   ⚡ Time taken: {:?}", batch_duration);
    println!("   📈 Throughput: {:.2} docs/sec", result.parsed_resources as f64 / batch_duration.as_secs_f64());

    // Test searching the ingested data
    println!("\n🔍 Testing search functionality...");
    
    // Search for EC2 instances
    let search_options = rag_module::SearchOptions {
        limit: Some(5),
        score_threshold: Some(0.1),
        ..Default::default()
    };

    let ec2_results = rag.search("aws_production_estate", "EC2 instance", user_id, search_options.clone()).await.unwrap();
    println!("   🖥️  Found {} EC2-related results", ec2_results.len());
    assert!(ec2_results.len() > 0);

    // Search for databases
    let db_results = rag.search("aws_production_estate", "database MySQL PostgreSQL", user_id, search_options.clone()).await.unwrap();
    println!("   🗄️  Found {} database-related results", db_results.len());
    assert!(db_results.len() > 0);

    // Search for Lambda functions
    let lambda_results = rag.search("aws_production_estate", "Lambda function", user_id, search_options).await.unwrap();
    println!("   ⚡ Found {} Lambda-related results", lambda_results.len());
    assert!(lambda_results.len() > 0);

    println!("\n✅ Integration test completed successfully!");
    println!("   🎯 All {} resources ingested and searchable", result.parsed_resources);
}

#[tokio::test]
async fn integration_test_mixed_cloud_providers() {
    // Test with mixed cloud provider data
    let temp_dir = TempDir::new().unwrap();
    let rag = RagModule::new(temp_dir.path()).await.unwrap();
    rag.initialize().await.unwrap();

    let user_id = "multi_cloud_user";
    let collection_name = "multi_cloud_estate";

    let mixed_resources = vec![
        json!({
            "content": "AWS EC2 instance i-1234567890abcdef0 running in us-west-2",
            "provider": "aws",
            "resource_type": "compute_instance",
            "instance_id": "i-1234567890abcdef0"
        }),
        json!({
            "content": "Azure Virtual Machine vm-web-01 running in West US 2",
            "provider": "azure",
            "resource_type": "compute_instance", 
            "vm_name": "vm-web-01"
        }),
        json!({
            "content": "GCP Compute Engine instance web-server-gcp running in us-central1",
            "provider": "gcp",
            "resource_type": "compute_instance",
            "instance_name": "web-server-gcp"
        }),
        json!({
            "content": "AWS RDS MySQL database prod-db in us-east-1",
            "provider": "aws",
            "resource_type": "database",
            "db_identifier": "prod-db"
        }),
        json!({
            "content": "Azure SQL Database webapp-db in East US",
            "provider": "azure", 
            "resource_type": "database",
            "database_name": "webapp-db"
        })
    ];

    let result = rag.ingest_aws_estate_batch(mixed_resources, user_id, collection_name).await.unwrap();
    
    assert_eq!(result.total_resources, 5);
    assert_eq!(result.parsed_resources, 5);
    assert_eq!(result.failed_resources, 0);

    println!("✅ Multi-cloud integration test passed: {} resources from different providers", result.parsed_resources);
}

#[tokio::test]
async fn integration_test_error_handling_and_recovery() {
    let temp_dir = TempDir::new().unwrap();
    let rag = RagModule::new(temp_dir.path()).await.unwrap();
    rag.initialize().await.unwrap();

    let user_id = "error_test_user";
    let collection_name = "error_test_estate";

    // Mix of valid and invalid data
    let mixed_data = vec![
        json!({
            "content": "Valid AWS EC2 instance",
            "resource_type": "ec2_instance",
            "instance_id": "i-valid1"
        }),
        json!({
            // Missing required content field
            "resource_type": "s3_bucket",
            "bucket_name": "invalid-bucket"
        }),
        json!({
            "content": "Valid RDS database",
            "resource_type": "rds_instance",
            "db_identifier": "valid-db"
        }),
        json!({
            "content": null, // Invalid content
            "resource_type": "lambda_function"
        }),
        json!({
            "content": "Another valid EC2 instance",
            "resource_type": "ec2_instance", 
            "instance_id": "i-valid2"
        }),
        "invalid_json_structure", // Invalid structure
        json!({
            "content": "Valid S3 bucket",
            "resource_type": "s3_bucket",
            "bucket_name": "valid-bucket"
        })
    ];

    let result = rag.ingest_aws_estate_batch(mixed_data, user_id, collection_name).await.unwrap();

    // Should process valid ones and report failures
    println!("📊 Error handling test results:");
    println!("   ✅ Successfully processed: {}", result.parsed_resources);
    println!("   ❌ Failed to process: {}", result.failed_resources);
    println!("   📝 Error details: {:?}", result.create_result.failed);

    // At least 4 valid resources should be processed
    assert!(result.parsed_resources >= 4);
    assert!(result.failed_resources > 0);
    assert_eq!(result.total_resources, 7); // Total attempted

    println!("✅ Error handling test completed - system gracefully handled mixed data");
}