rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};

fn main() {
    println!("๐Ÿงช Demonstrating Content-Based Deduplication Logic");
    println!("==================================================");

    // Simulate the content hashing logic we implemented
    let content1 = "This is an EC2 instance with standard configuration";
    let content2 = "This is an EC2 instance with standard configuration"; // Same content
    let content3 = "This is a completely different RDS database instance"; // Different content

    // Generate hashes like our implementation does
    let hash1 = generate_content_hash(content1);
    let hash2 = generate_content_hash(content2);
    let hash3 = generate_content_hash(content3);

    let doc_id1 = format!("aws_estate-{:x}", hash1);
    let doc_id2 = format!("aws_estate-{:x}", hash2);
    let doc_id3 = format!("aws_estate-{:x}", hash3);

    println!("\n๐Ÿ“„ Content 1: \"{}\"", content1);
    println!("๐Ÿ†” Document ID 1: {}", doc_id1);
    
    println!("\n๐Ÿ“„ Content 2: \"{}\"", content2);
    println!("๐Ÿ†” Document ID 2: {}", doc_id2);
    
    println!("\n๐Ÿ“„ Content 3: \"{}\"", content3);
    println!("๐Ÿ†” Document ID 3: {}", doc_id3);

    println!("\nโœ… **Deduplication Results:**");
    
    if doc_id1 == doc_id2 {
        println!("โœ… Same content produces SAME document ID โ†’ Will be deduplicated!");
        println!("   Content 1 and 2 will share the same document (upsert behavior)");
    } else {
        println!("โŒ Same content produces DIFFERENT document IDs โ†’ Would create duplicates!");
    }

    if doc_id1 != doc_id3 {
        println!("โœ… Different content produces DIFFERENT document ID โ†’ Will create new document!");
        println!("   Content 1 and 3 will be stored as separate documents");
    } else {
        println!("โŒ Different content produces SAME document ID โ†’ Would cause collisions!");
    }

    println!("\n๐Ÿ“Š **Hash Distribution:**");
    println!("Hash 1: {:016x}", hash1);
    println!("Hash 2: {:016x}", hash2);  
    println!("Hash 3: {:016x}", hash3);
    
    println!("\n๐ŸŽ‰ **How This Solves Your Problem:**");
    println!("1. ๐Ÿ”„ Same content โ†’ Same hash โ†’ Same doc ID โ†’ Upsert (delete old + add new)");
    println!("2. ๐Ÿ“ Same content + different metadata โ†’ Same hash โ†’ Updates existing document");
    println!("3. ๐Ÿ†• Different content โ†’ Different hash โ†’ Different doc ID โ†’ Creates new document");
    println!("4. ๐Ÿ“ˆ Scales to 2M+ documents with O(1) hash lookup");
}

fn generate_content_hash(content: &str) -> u64 {
    let mut hasher = DefaultHasher::new();
    content.hash(&mut hasher);
    hasher.finish()
}