use rag_module::RagModule;
use serde_json::json;
use tempfile::TempDir;
use std::time::Instant;
struct BenchmarkResult {
documents_processed: usize,
duration: std::time::Duration,
throughput: f64, }
impl BenchmarkResult {
fn new(documents_processed: usize, duration: std::time::Duration) -> Self {
let throughput = documents_processed as f64 / duration.as_secs_f64();
Self {
documents_processed,
duration,
throughput,
}
}
fn print_results(&self, test_name: &str) {
println!("๐ {} Results:", test_name);
println!(" ๐ Documents: {}", self.documents_processed);
println!(" โฑ๏ธ Duration: {:?}", self.duration);
println!(" ๐ Throughput: {:.2} docs/sec", self.throughput);
}
}
fn generate_test_documents(count: usize) -> Vec<serde_json::Value> {
let mut documents = Vec::new();
for i in 0..count {
documents.push(json!({
"content": format!("AWS resource {} - {} instance with {} configuration in {} region",
i,
match i % 4 {
0 => "EC2",
1 => "RDS",
2 => "Lambda",
_ => "S3"
},
match i % 3 {
0 => "production",
1 => "staging",
_ => "development"
},
if i % 2 == 0 { "us-west-2" } else { "us-east-1" }
),
"resource_type": match i % 4 {
0 => "ec2_instance",
1 => "rds_instance",
2 => "lambda_function",
_ => "s3_bucket"
},
"resource_id": format!("resource-{:08}", i),
"region": if i % 2 == 0 { "us-west-2" } else { "us-east-1" },
"environment": match i % 3 {
0 => "prod",
1 => "staging",
_ => "dev"
},
"tags": {
"Application": format!("app-{}", i % 5),
"Team": format!("team-{}", i % 3),
"CostCenter": format!("cost-{}", i % 4)
}
}));
}
documents
}
async fn benchmark_single_ingestion(rag: &RagModule, documents: &[serde_json::Value], user_id: &str) -> BenchmarkResult {
let start_time = Instant::now();
for (i, doc) in documents.iter().enumerate() {
let collection_name = format!("single_test_{}", i);
let _result = rag.ingest_aws_estate(doc.clone(), user_id, &collection_name).await.unwrap();
}
let duration = start_time.elapsed();
BenchmarkResult::new(documents.len(), duration)
}
async fn benchmark_batch_ingestion(rag: &RagModule, documents: Vec<serde_json::Value>, user_id: &str, collection_name: &str) -> BenchmarkResult {
let doc_count = documents.len();
let start_time = Instant::now();
let _result = rag.ingest_aws_estate_batch(documents, user_id, collection_name).await.unwrap();
let duration = start_time.elapsed();
BenchmarkResult::new(doc_count, duration)
}
#[tokio::test]
async fn benchmark_small_batch_performance() {
let temp_dir = TempDir::new().unwrap();
let rag = RagModule::new(temp_dir.path()).await.unwrap();
rag.initialize().await.unwrap();
let user_id = "benchmark_user_small";
let document_count = 10;
println!("๐งช Starting small batch benchmark ({} documents)", document_count);
let documents = generate_test_documents(document_count);
let single_result = benchmark_single_ingestion(&rag, &documents, user_id).await;
single_result.print_results("Single Ingestion");
let batch_result = benchmark_batch_ingestion(&rag, documents, user_id, "batch_test_small").await;
batch_result.print_results("Batch Ingestion");
let improvement = single_result.duration.as_millis() as f64 / batch_result.duration.as_millis() as f64;
println!("๐ Performance Improvement: {:.2}x faster with batch ingestion", improvement);
assert!(batch_result.duration <= single_result.duration * 2); }
#[tokio::test]
async fn benchmark_medium_batch_performance() {
let temp_dir = TempDir::new().unwrap();
let rag = RagModule::new(temp_dir.path()).await.unwrap();
rag.initialize().await.unwrap();
let user_id = "benchmark_user_medium";
let document_count = 1;
println!("๐งช Starting medium batch benchmark ({} documents)", document_count);
let documents = generate_test_documents(document_count);
let single_result = benchmark_single_ingestion(&rag, &documents, user_id).await;
single_result.print_results("Single Ingestion");
let batch_result = benchmark_batch_ingestion(&rag, documents, user_id, "batch_test_medium").await;
batch_result.print_results("Batch Ingestion");
let improvement = single_result.duration.as_millis() as f64 / batch_result.duration.as_millis() as f64;
println!("๐ Performance Improvement: {:.2}x faster with batch ingestion", improvement);
assert!(batch_result.throughput > single_result.throughput * 1.5); }
#[tokio::test]
async fn benchmark_large_batch_performance() {
let temp_dir = TempDir::new().unwrap();
let rag = RagModule::new(temp_dir.path()).await.unwrap();
rag.initialize().await.unwrap();
let user_id = "benchmark_user_large";
let document_count = 100;
println!("๐งช Starting large batch benchmark ({} documents)", document_count);
let documents = generate_test_documents(document_count);
println!("โญ๏ธ Skipping single ingestion for large batch (would be too slow)");
let batch_result = benchmark_batch_ingestion(&rag, documents, user_id, "batch_test_large").await;
batch_result.print_results("Large Batch Ingestion");
assert!(batch_result.throughput > 5.0); println!("โ
Large batch processing achieved {:.2} docs/sec throughput", batch_result.throughput);
}
#[tokio::test]
async fn benchmark_memory_usage() {
let temp_dir = TempDir::new().unwrap();
let rag = RagModule::new(temp_dir.path()).await.unwrap();
rag.initialize().await.unwrap();
let user_id = "benchmark_user_memory";
let document_count = 50;
println!("๐งช Testing memory efficiency with {} documents", document_count);
let documents = generate_test_documents(document_count);
let start_time = Instant::now();
let result = rag.ingest_aws_estate_batch(documents, user_id, "memory_test").await.unwrap();
let duration = start_time.elapsed();
assert_eq!(result.parsed_resources, document_count);
assert_eq!(result.failed_resources, 0);
println!("โ
Memory test completed:");
println!(" ๐ Documents processed: {}", result.parsed_resources);
println!(" โฑ๏ธ Time taken: {:?}", duration);
println!(" ๐ง Memory usage appears stable (no OOM errors)");
}
#[tokio::test]
async fn benchmark_different_batch_sizes() {
let temp_dir = TempDir::new().unwrap();
let rag = RagModule::new(temp_dir.path()).await.unwrap();
rag.initialize().await.unwrap();
let user_id = "benchmark_user_sizes";
let batch_sizes = vec![1, 5, 10, 20, 32, 50, 128, 256, 1];
println!("๐งช Testing different batch sizes for optimal performance");
let mut results = Vec::new();
for &batch_size in &batch_sizes {
let documents = generate_test_documents(batch_size);
let collection_name = format!("batch_size_test_{}", batch_size);
let result = benchmark_batch_ingestion(&rag, documents, user_id, &collection_name).await;
results.push((batch_size, result));
}
println!("\n๐ Batch Size Performance Results:");
println!("{:<12} {:<15} {:<15}", "Batch Size", "Duration", "Throughput");
println!("{}", "-".repeat(45));
let mut best_throughput = 0.0;
let mut best_size = 0;
for (size, result) in &results {
println!("{:<12} {:<15?} {:<15.2}", size, result.duration, result.throughput);
if result.throughput > best_throughput {
best_throughput = result.throughput;
best_size = *size;
}
}
println!("\n๐ Optimal batch size: {} documents ({:.2} docs/sec)", best_size, best_throughput);
let first_throughput = results[0].1.throughput;
let last_throughput = results.last().unwrap().1.throughput;
println!("๐ Throughput improvement from size 1 to {}: {:.2}x",
batch_sizes.last().unwrap(),
last_throughput / first_throughput);
}