use rag_module::RagModule;
use serde_json::json;
use std::time::Instant;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let test_dir = "./sqlite-test-data";
std::fs::create_dir_all(test_dir)?;
println!("๐งช Testing SQLite ContentStore Integration");
println!("{}", "=".repeat(60));
let rag = RagModule::new(test_dir).await?;
rag.initialize().await?;
let user_id = "sqlite_test_user";
let collection_name = "aws_estate";
println!("\n๐ Test 1: Single Document Ingestion with Large Metadata");
println!("{}", "-".repeat(60));
let large_metadata = json!({
"KeyId": "eeb91ab6-aadc-4d60-a0e1-dcb7c56f0adf",
"AccountId": "288761761556",
"Region": "us-west-2",
"Arn": "arn:aws:kms:us-west-2:288761761556:key/eeb91ab6-aadc-4d60-a0e1-dcb7c56f0adf",
"CreationDate": "2023-05-15T10:30:00Z",
"KeyManager": "CUSTOMER",
"KeyState": "Enabled",
"KeyUsage": "ENCRYPT_DECRYPT",
"Origin": "AWS_KMS",
"MultiRegion": false,
"Description": "Production KMS key for data encryption",
"Tags": [
{"Key": "Environment", "Value": "Production"},
{"Key": "Team", "Value": "Security"},
{"Key": "Application", "Value": "DataEncryption"},
{"Key": "CostCenter", "Value": "CC-12345"}
],
"KeyPolicy": {
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Enable IAM User Permissions",
"Effect": "Allow",
"Principal": {"AWS": "arn:aws:iam::288761761556:root"},
"Action": "kms:*",
"Resource": "*"
}
]
},
"Aliases": ["alias/prod-encryption-key"],
"CustomerMasterKeySpec": "SYMMETRIC_DEFAULT",
"EncryptionAlgorithms": ["SYMMETRIC_DEFAULT"],
"AdditionalMetadata": {
"LastRotated": "2024-05-15T10:30:00Z",
"RotationEnabled": true,
"RotationPeriodInDays": 365
}
});
let doc1 = json!({
"content": "KMS Key eeb91ab6-aadc-4d60-a0e1-dcb7c56f0adf in us-west-2 for production data encryption",
"type": "kms-key",
"region": "us-west-2",
"accountId": "288761761556",
"service": "kms",
"resourceType": "key",
"KeyState": "Enabled",
"KeyId": large_metadata["KeyId"],
"Arn": large_metadata["Arn"],
"CreationDate": large_metadata["CreationDate"],
"KeyManager": large_metadata["KeyManager"],
"KeyUsage": large_metadata["KeyUsage"],
"Origin": large_metadata["Origin"],
"MultiRegion": large_metadata["MultiRegion"],
"Description": large_metadata["Description"],
"Tags": large_metadata["Tags"],
"KeyPolicy": large_metadata["KeyPolicy"],
"Aliases": large_metadata["Aliases"],
"CustomerMasterKeySpec": large_metadata["CustomerMasterKeySpec"],
"EncryptionAlgorithms": large_metadata["EncryptionAlgorithms"],
"AdditionalMetadata": large_metadata["AdditionalMetadata"],
});
let result1 = rag.ingest_aws_estate(doc1.clone(), user_id, collection_name).await?;
println!("โ
Single document ingested: {} resources", result1.parsed_resources);
println!("\n๐ฆ Test 2: Batch Ingestion with 50 Documents");
println!("{}", "-".repeat(60));
let mut batch_docs = Vec::new();
for i in 0..50 {
let doc = json!({
"content": format!("KMS Key key-{:016x} in {} for {} environment with {} encryption",
i,
if i % 3 == 0 { "us-west-2" } else if i % 3 == 1 { "us-east-1" } else { "eu-west-1" },
if i % 2 == 0 { "production" } else { "staging" },
if i % 2 == 0 { "AES-256" } else { "RSA-2048" }
),
"type": "kms-key",
"region": if i % 3 == 0 { "us-west-2" } else if i % 3 == 1 { "us-east-1" } else { "eu-west-1" },
"accountId": "288761761556",
"service": "kms",
"resourceType": "key",
"KeyState": if i % 5 == 0 { "Disabled" } else { "Enabled" },
"KeyId": format!("key-{:016x}", i),
"Arn": format!("arn:aws:kms:us-west-2:288761761556:key/key-{:016x}", i),
"CreationDate": format!("2024-01-{:02}T10:30:00Z", (i % 28) + 1),
"KeyManager": "CUSTOMER",
"KeyUsage": "ENCRYPT_DECRYPT",
"Origin": "AWS_KMS",
"MultiRegion": i % 4 == 0,
"Description": format!("KMS key #{} for data encryption", i),
"Tags": [
{"Key": "Environment", "Value": if i % 2 == 0 { "Production" } else { "Staging" }},
{"Key": "Index", "Value": i.to_string()},
],
"KeyPolicy": {
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Principal": {"AWS": "arn:aws:iam::288761761556:root"},
"Action": "kms:*",
"Resource": "*"
}]
}
});
batch_docs.push(doc);
}
let batch_start = Instant::now();
let batch_result = rag.ingest_aws_estate_batch(batch_docs, user_id, collection_name).await?;
let batch_duration = batch_start.elapsed();
println!("โ
Batch ingestion complete!");
println!(" ๐ Total documents: {}", batch_result.total_resources);
println!(" โ
Successfully processed: {}", batch_result.parsed_resources);
println!(" โ Failed: {}", batch_result.failed_resources);
println!(" โก Time taken: {:?}", batch_duration);
println!(" ๐ Throughput: {:.2} docs/sec",
batch_result.parsed_resources as f64 / batch_duration.as_secs_f64());
println!("\n๐ Test 3: ContentStore Statistics");
println!("{}", "-".repeat(60));
let stats = rag.content_store.get_stats().await?;
println!("โ
ContentStore Statistics:");
println!(" ๐ Total entries: {}", stats.total_entries);
println!(" ๐พ Total size: {} bytes ({:.2} MB)",
stats.total_size_bytes,
stats.total_size_bytes as f64 / 1_048_576.0
);
println!(" ๐๏ธ Collections: {}", stats.collection_count);
for collection in &stats.collections {
let count = rag.content_store.count_collection(collection).await?;
let size = rag.content_store.get_collection_size(collection).await?;
println!(" - {}: {} entries, {} bytes ({:.2} KB)",
collection, count, size, size as f64 / 1024.0);
}
println!("\n๐ Test 4: Search with Metadata Combination");
println!("{}", "-".repeat(60));
let search_options = rag_module::SearchOptions {
limit: Some(5),
score_threshold: Some(0.1),
..Default::default()
};
let search_start = Instant::now();
let search_results = rag.search(collection_name, "KMS encryption keys production", user_id, search_options).await?;
let search_duration = search_start.elapsed();
println!("โ
Search complete in {:?}", search_duration);
println!(" ๐ Found {} results", search_results.len());
for (idx, result) in search_results.iter().enumerate() {
if let Some(ref payload) = result.payload {
let has_doc_ref = payload.contains_key("doc_ref");
let has_encrypted_metadata = payload.contains_key("_encrypted_metadata");
let has_small_fields = payload.contains_key("type") || payload.contains_key("region");
println!("\n Result #{} (score: {:.4}):", idx + 1, result.score);
println!(" โ doc_ref: {}", if has_doc_ref { "โ
" } else { "โ" });
println!(" โ _encrypted_metadata: {}", if has_encrypted_metadata { "โ
" } else { "โ" });
println!(" โ small fields (type/region): {}", if has_small_fields { "โ
" } else { "โ" });
if has_doc_ref {
let doc_ref = payload.get("doc_ref")
.and_then(|v| v.as_str())
.unwrap_or("unknown");
println!(" ๐ doc_ref: {}", doc_ref);
}
if let Some(type_val) = payload.get("type") {
println!(" ๐ท๏ธ type: {}", type_val);
}
if let Some(region_val) = payload.get("region") {
println!(" ๐ region: {}", region_val);
}
if has_encrypted_metadata {
let metadata_str = payload.get("_encrypted_metadata")
.and_then(|v| v.as_str())
.unwrap_or("");
println!(" ๐พ _encrypted_metadata size: {} bytes", metadata_str.len());
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(metadata_str) {
if let Some(obj) = parsed.as_object() {
println!(" โ
Valid JSON with {} fields", obj.len());
if let Some(key_id) = obj.get("KeyId") {
println!(" - KeyId: {}", key_id);
}
if let Some(arn) = obj.get("Arn") {
println!(" - Arn: {}", arn);
}
}
} else {
println!(" โ ๏ธ Could not parse _encrypted_metadata as JSON");
}
} else {
println!(" โ MISSING _encrypted_metadata - This is a problem!");
}
}
}
println!("\n๐ Test 5: Data Integrity Verification");
println!("{}", "-".repeat(60));
let total_expected = 1 + 50; let sqlite_count = rag.content_store.count_collection(collection_name).await?;
println!("โ
Data Integrity Check:");
println!(" ๐ Expected documents: {}", total_expected);
println!(" ๐พ SQLite entries: {}", sqlite_count);
if sqlite_count >= total_expected as i64 {
println!(" โ
All documents accounted for in SQLite!");
} else {
println!(" โ ๏ธ Missing {} documents in SQLite", total_expected as i64 - sqlite_count);
}
println!("\n๐ Test 6: Direct ContentStore Query");
println!("{}", "-".repeat(60));
if let Some(first_result) = search_results.first() {
if let Some(payload) = &first_result.payload {
if let Some(doc_ref) = payload.get("doc_ref").and_then(|v| v.as_str()) {
println!("Testing direct ContentStore query for: {}", doc_ref);
match rag.content_store.get_metadata(doc_ref).await? {
Some(metadata) => {
println!("โ
Successfully retrieved from ContentStore!");
println!(" ๐พ Size: {} bytes ({:.2} KB)", metadata.len(), metadata.len() as f64 / 1024.0);
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&metadata) {
if let Some(obj) = parsed.as_object() {
println!(" ๐ Fields in metadata: {}", obj.len());
println!(" ๐ Top-level keys: {:?}",
obj.keys().take(5).collect::<Vec<_>>());
}
}
}
None => {
println!("โ Not found in ContentStore - This is a problem!");
}
}
}
}
}
println!();
println!("{}", "=".repeat(60));
println!("๐ Test Suite Complete!");
println!("{}", "=".repeat(60));
println!("โ
SQLite ContentStore is working correctly:");
println!(" โ Single document ingestion");
println!(" โ Batch document ingestion (50 docs)");
println!(" โ Metadata storage in SQLite");
println!(" โ Search with metadata combination");
println!(" โ Data integrity verified");
println!(" โ Direct ContentStore queries");
println!();
println!("๐ Test data location: {}", test_dir);
println!(" ๐พ SQLite database: {}/content_store.db", test_dir);
println!(" ๐๏ธ Qdrant data: {}/qdrant-data/", test_dir);
Ok(())
}