rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
use rag_module::{RagModule, SearchOptions};
use serde_json::json;
use std::path::PathBuf;
use anyhow::Result;

#[tokio::main]
async fn main() -> Result<()> {
    println!("๐Ÿงช Real AWS Estate Deduplication Test");
    println!("====================================");

    // Use a real test directory that will create qdrant-data
    let base_path = PathBuf::from("./test_dedup_storage");
    println!("๐Ÿ“ Test storage path: {}", base_path.display());

    // Initialize RAG module - this should create the qdrant-data directory
    let rag = RagModule::new(base_path.clone()).await?;
    
    let user_id = "test_user_real";
    let collection_name = "aws_real_estate";

    // Set user context
    rag.set_user_context(user_id).await?;
    println!("โœ… RAG module initialized and user context set");

    // Your actual RDS data
    let rds_data_v1 = json!({
        "type": "rds",
        "keywords": [
            "rds",
            "database", 
            "relational"
        ],
        "profile": "default",
        "accountId": "288761761556",
        "region": "us-east-1",
        "permissions": {
            "accessLevel": "FullAccess",
            "hasRead": true,
            "hasWrite": true,
            "hasFullAccess": true
        },
        "content": "RDS instance dev-eshop-mysql-rds, id dev-eshop-mysql-rds, profile default, region us-east-1, engine mysql 8.0.42, class db.t3.micro, state stopped, storage 20GB, created 2025-07-07T10:02:58.453000+00:00",
        "dbInstanceIdentifier": "dev-eshop-mysql-rds",
        "dbInstanceClass": "db.t3.micro",
        "engine": "mysql",
        "dbInstanceStatus": "stopped",
        "automaticRestartTime": "2025-12-08T11:37:25.225000+00:00",
        "masterUsername": "mysql_admin",
        "endpoint": {
            "address": "dev-eshop-mysql-rds.cuxmiwm0ulok.us-east-1.rds.amazonaws.com",
            "port": 3306,
            "hostedZoneId": "Z2R2ITUGPM61AM"
        },
        "allocatedStorage": 20,
        "instanceCreateTime": "2025-07-07T10:02:58.453000+00:00",
        "preferredBackupWindow": "10:27-10:57",
        "backupRetentionPeriod": 1,
        "dbSecurityGroups": [],
        "vpcSecurityGroups": [
            {
                "vpcSecurityGroupId": "sg-0a017fbb383b24395",
                "status": "active"
            }
        ],
        "storageEncrypted": true,
        "kmsKeyId": "arn:aws:kms:us-east-1:288761761556:key/3469cdf7-b2dc-4d99-8dba-816a95e9465a",
        "tagList": [
            {
                "key": "app",
                "value": "e-shopping"
            },
            {
                "key": "environment", 
                "value": "dev"
            }
        ]
    });

    println!("\n๐Ÿ“‹ Test 1: Initial RDS Data Ingestion");
    println!("{}", "-".repeat(50));
    println!("๐Ÿ”„ Ingesting RDS data for the first time...");
    let result1 = rag.ingest_aws_estate(rds_data_v1.clone(), user_id, collection_name).await?;
    println!("โœ… First ingestion completed");

    // Check document count
    let count_after_first = rag.get_document_count(Some(collection_name), None).await?;
    println!("๐Ÿ“Š Document count after first ingestion: {}", count_after_first);

    println!("\n๐Ÿ“‹ Test 2: Duplicate RDS Data (Same Content)");
    println!("{}", "-".repeat(50));
    println!("๐Ÿ”„ Ingesting EXACT SAME RDS data again...");
    let result2 = rag.ingest_aws_estate(rds_data_v1.clone(), user_id, collection_name).await?;
    println!("โœ… Second ingestion completed");

    // Check document count - should still be 1
    let count_after_duplicate = rag.get_document_count(Some(collection_name), None).await?;
    println!("๐Ÿ“Š Document count after duplicate ingestion: {}", count_after_duplicate);

    if count_after_duplicate == count_after_first {
        println!("๐ŸŽ‰ SUCCESS: Duplicate content was properly deduplicated!");
    } else {
        println!("โŒ PROBLEM: Duplicate content created a new document!");
    }

    println!("\n๐Ÿ“‹ Test 3: Same Content with Updated Metadata");
    println!("{}", "-".repeat(50));

    // Same content but with updated metadata
    let mut rds_data_v2 = rds_data_v1.clone();
    
    // Update some metadata fields while keeping content the same
    rds_data_v2["dbInstanceStatus"] = json!("running"); // Changed from "stopped"
    rds_data_v2["allocatedStorage"] = json!(30); // Increased from 20GB
    rds_data_v2["automaticRestartTime"] = json!("2025-12-25T10:00:00.000Z"); // New restart time
    
    // Add new metadata field
    rds_data_v2["lastModified"] = json!("2025-12-24T12:00:00.000Z");
    
    // Update tags
    if let Some(tag_list) = rds_data_v2["tagList"].as_array_mut() {
        tag_list.push(json!({
            "key": "updated",
            "value": "true"
        }));
    }

    println!("๐Ÿ”„ Ingesting RDS data with UPDATED metadata (same content)...");
    let result3 = rag.ingest_aws_estate(rds_data_v2, user_id, collection_name).await?;
    println!("โœ… Metadata update ingestion completed");

    // Check document count - should still be 1 (updated, not new)
    let count_after_update = rag.get_document_count(Some(collection_name), None).await?;
    println!("๐Ÿ“Š Document count after metadata update: {}", count_after_update);

    if count_after_update == 1 {
        println!("๐ŸŽ‰ SUCCESS: Metadata update replaced existing document!");
    } else {
        println!("โŒ PROBLEM: Metadata update created a new document instead of updating!");
    }

    println!("\n๐Ÿ“‹ Test 4: Different Content (Different RDS Instance)");
    println!("{}", "-".repeat(50));

    // Completely different RDS instance with different content
    let different_rds = json!({
        "type": "rds",
        "keywords": ["rds", "database", "postgresql"],
        "profile": "production",
        "accountId": "288761761556",
        "region": "us-west-2",
        "content": "RDS instance prod-app-postgresql-db, id prod-app-postgresql-db, profile production, region us-west-2, engine postgresql 14.9, class db.r5.large, state available, storage 100GB, created 2025-12-01T08:30:00.000Z",
        "dbInstanceIdentifier": "prod-app-postgresql-db",
        "dbInstanceClass": "db.r5.large", 
        "engine": "postgresql",
        "dbInstanceStatus": "available",
        "masterUsername": "postgres_admin",
        "allocatedStorage": 100,
        "tagList": [
            {
                "key": "app",
                "value": "production-app"
            },
            {
                "key": "environment",
                "value": "production"
            }
        ]
    });

    println!("๐Ÿ”„ Ingesting DIFFERENT RDS instance...");
    let result4 = rag.ingest_aws_estate(different_rds, user_id, collection_name).await?;
    println!("โœ… Different content ingestion completed");

    // Check document count - should now be 2
    let count_after_different = rag.get_document_count(Some(collection_name), None).await?;
    println!("๐Ÿ“Š Document count after different content: {}", count_after_different);

    if count_after_different == 2 {
        println!("๐ŸŽ‰ SUCCESS: Different content created new document!");
    } else {
        println!("โŒ PROBLEM: Expected 2 documents, got {}", count_after_different);
    }

    println!("\n๐Ÿ“‹ Test 5: Search Verification");
    println!("{}", "-".repeat(50));

    // Search for the documents to verify they exist and are correct
    let mysql_results = rag.search(collection_name, "mysql dev-eshop", user_id, SearchOptions::default()).await?;
    println!("๐Ÿ” Search for 'mysql dev-eshop' found {} results", mysql_results.len());

    let postgresql_results = rag.search(collection_name, "postgresql prod-app", user_id, SearchOptions::default()).await?;
    println!("๐Ÿ” Search for 'postgresql prod-app' found {} results", postgresql_results.len());

    let all_rds_results = rag.search(collection_name, "RDS instance", user_id, SearchOptions::default()).await?;
    println!("๐Ÿ” Search for 'RDS instance' found {} results", all_rds_results.len());

    // Check the storage directory
    println!("\n๐Ÿ“ Storage Directory Check:");
    println!("{}", "-".repeat(30));
    let storage_path = base_path.join("qdrant-data");
    if storage_path.exists() {
        println!("โœ… qdrant-data directory created at: {}", storage_path.display());
        
        // List contents
        if let Ok(entries) = std::fs::read_dir(&storage_path) {
            println!("๐Ÿ“‚ Contents:");
            for entry in entries {
                if let Ok(entry) = entry {
                    println!("   - {}", entry.file_name().to_string_lossy());
                }
            }
        }
    } else {
        println!("โŒ qdrant-data directory not found at: {}", storage_path.display());
    }

    // Check user-specific data
    let user_path = base_path.join("data").join("users").join(user_id);
    if user_path.exists() {
        println!("โœ… User data directory: {}", user_path.display());
        if let Ok(entries) = std::fs::read_dir(&user_path) {
            println!("๐Ÿ“‚ User data contents:");
            for entry in entries {
                if let Ok(entry) = entry {
                    println!("   - {}", entry.file_name().to_string_lossy());
                }
            }
        }
    }

    println!("\n๐ŸŽ‰ Deduplication Test Summary:");
    println!("===============================");
    println!("โœ… Same content creates same document ID");
    println!("โœ… Duplicate content is properly deduplicated"); 
    println!("โœ… Metadata updates replace existing document");
    println!("โœ… Different content creates new documents");
    println!("โœ… Search functionality works correctly");
    println!("โœ… Storage directories are properly created");
    println!("\n๐Ÿ“Š Final Stats:");
    println!("   Total documents: {}", count_after_different);
    println!("   Expected: 2 (1 MySQL + 1 PostgreSQL)");
    
    if count_after_different == 2 {
        println!("\n๐Ÿ† ALL TESTS PASSED! Deduplication is working correctly.");
    } else {
        println!("\nโš ๏ธ  Some tests may have failed. Please check the output above.");
    }

    Ok(())
}