rag-module 0.6.7

Enterprise RAG module with chat context storage, vector search, session management, and model downloading. Rust implementation with Node.js compatibility.
use anyhow::Result;
use serde_json::json;
use rag_module::RagModule;

/// Example showing how to fetch all documents from your RAG system
/// This retrieves complete document data for learning/analysis purposes
async fn get_all_documents_example() -> Result<()> {
    // Initialize the RAG module
    let base_path = "./rag-data";
    let rag = RagModule::new(base_path).await?;
    rag.initialize().await?;

    println!("🚀 Fetching all documents from RAG system for learning center...\n");

    // === Method 1: Get all documents from all collections ===
    
    // 1. Get all documents from chat_history collection
    println!("📚 Fetching chat history documents...");
    let chat_docs = rag.list_documents(Some("chat_history"), None).await?;
    println!("✅ Retrieved {} chat history documents", chat_docs.len());
    
    // 2. Get all documents from aws_estate collection
    println!("🏗️ Fetching AWS estate documents...");
    let aws_docs = rag.list_documents(Some("aws_estate"), None).await?;
    println!("✅ Retrieved {} AWS estate documents", aws_docs.len());
    
    // 3. Get all documents from knowledge_base collection (if exists)
    println!("📖 Fetching knowledge base documents...");
    let kb_docs = rag.list_documents(Some("knowledge_base"), None).await?;
    println!("✅ Retrieved {} knowledge base documents", kb_docs.len());

    // === Method 2: Get documents with specific limits ===
    
    // Get first 100 documents from any collection
    let limited_options = json!({
        "limit": 100
    });
    let limited_docs = rag.list_documents(Some("aws_estate"), Some(limited_options)).await?;
    println!("✅ Retrieved {} documents (limited to 100)", limited_docs.len());

    // === Method 3: Get document counts for summary ===
    
    let chat_count = chat_docs.len();
    let aws_count = aws_docs.len();
    let kb_count = kb_docs.len();
    
    let mut all_documents = Vec::new();
    all_documents.extend(chat_docs);
    all_documents.extend(aws_docs);
    all_documents.extend(kb_docs);
    
    println!("\n📊 SUMMARY:");
    println!("═══════════════════════════════════════");
    println!("Total documents retrieved: {}", all_documents.len());
    
    // === Method 4: Display document details for learning ===
    
    if !all_documents.is_empty() {
        println!("\n🔍 Sample document structure:");
        let sample_doc = &all_documents[0];
        println!("ID: {}", sample_doc.id);
        println!("Vector ID: {}", sample_doc.vector_id);
        println!("Content length: {} chars", sample_doc.content.len());
        println!("Content preview: {}", 
            if sample_doc.content.len() > 100 {
                format!("{}...", &sample_doc.content[..100])
            } else {
                sample_doc.content.clone()
            }
        );
        println!("Created at: {}", sample_doc.created_at);
        println!("Updated at: {}", sample_doc.updated_at);
        println!("Metadata fields: {:?}", sample_doc.metadata.keys().collect::<Vec<_>>());
        
        if let Some(embedding) = &sample_doc.embedding {
            println!("Embedding dimensions: {}", embedding.len());
        } else {
            println!("No embedding available");
        }
    }

    // === Method 5: Export for learning center ===
    
    println!("\n💾 Exporting documents for learning center...");
    
    // Create a structured export for analysis
    let export_data = json!({
        "export_timestamp": chrono::Utc::now().to_rfc3339(),
        "total_documents": all_documents.len(),
        "collections": {
            "chat_history": chat_count,
            "aws_estate": aws_count, 
            "knowledge_base": kb_count
        },
        "documents": all_documents.iter().map(|doc| {
            json!({
                "id": doc.id,
                "vector_id": doc.vector_id,
                "content": doc.content,
                "metadata": doc.metadata,
                "created_at": doc.created_at.to_rfc3339(),
                "updated_at": doc.updated_at.to_rfc3339(),
                "embedding_dimensions": doc.embedding.as_ref().map(|e| e.len())
            })
        }).collect::<Vec<_>>()
    });
    
    // Save to file for analysis
    let export_file = format!("{}/rag_export_{}.json", base_path, chrono::Utc::now().format("%Y%m%d_%H%M%S"));
    tokio::fs::write(&export_file, serde_json::to_string_pretty(&export_data)?).await?;
    println!("✅ Exported all documents to: {}", export_file);
    
    Ok(())
}

/// Alternative method: Get documents by collection type with user context
async fn get_documents_with_user_context(user_id: &str) -> Result<()> {
    let base_path = "./rag-data";
    let rag = RagModule::new(base_path).await?;
    rag.initialize().await?;
    
    // Set user context for user-specific documents
    rag.set_user_context(user_id).await?;
    
    // Get user-specific documents (if the system uses user isolation)
    let user_docs = rag.list_documents(Some("chat_history"), None).await?;
    println!("👤 Retrieved {} documents for user: {}", user_docs.len(), user_id);
    
    Ok(())
}

/// Simple function to expose a GET-like interface
pub async fn fetch_all_rag_documents(collection_name: Option<&str>, limit: Option<usize>) -> Result<Vec<serde_json::Value>> {
    let base_path = "./rag-data";
    let rag = RagModule::new(base_path).await?;
    rag.initialize().await?;
    
    let options = if let Some(limit) = limit {
        Some(json!({ "limit": limit }))
    } else {
        None
    };
    
    let collection = collection_name.unwrap_or("aws_estate");
    let documents = rag.list_documents(Some(collection), options).await?;
    
    // Convert documents to JSON for easy consumption
    let json_docs: Vec<serde_json::Value> = documents.into_iter().map(|doc| {
        json!({
            "id": doc.id,
            "vector_id": doc.vector_id,
            "content": doc.content,
            "metadata": doc.metadata,
            "created_at": doc.created_at,
            "updated_at": doc.updated_at,
            "embedding_dimensions": doc.embedding.as_ref().map(|e| e.len()),
            "has_embedding": doc.embedding.is_some()
        })
    }).collect();
    
    Ok(json_docs)
}

#[tokio::main]
async fn main() -> Result<()> {
    // Example 1: Get all documents
    get_all_documents_example().await?;
    
    // Example 2: Get documents for specific user
    get_documents_with_user_context("learning_center_user").await?;
    
    // Example 3: Simple fetch function
    let aws_documents = fetch_all_rag_documents(Some("aws_estate"), None).await?;
    println!("🎯 Fetched {} AWS estate documents via simple interface", aws_documents.len());
    
    let chat_documents = fetch_all_rag_documents(Some("chat_history"), Some(50)).await?;
    println!("💬 Fetched {} chat documents (limited to 50)", chat_documents.len());
    
    Ok(())
}