enki-runtime 0.1.4

//! RAG (Retrieval Augmented Generation) Example
//!
//! This example demonstrates how to implement a basic RAG pattern using Enki Runtime:
//! 1. Store documents in memory with metadata
//! 2. Retrieve relevant documents based on user queries
//! 3. Augment the LLM prompt with retrieved context
//! 4. Generate grounded responses
//!
//! # Prerequisites
//!
//! 1. Install Ollama from https://ollama.ai
//! 2. Pull the gemma3 model: `ollama pull gemma3:latest`
//! 3. Ensure Ollama is running (default: http://127.0.0.1:11434)
//!
//! # Running
//!
//! ```bash
//! cargo run --example rag_example
//! ```

use enki_runtime::core::agent::AgentContext;
use enki_runtime::core::error::Result;
use enki_runtime::core::memory::{Memory, MemoryEntry, MemoryQuery};
use enki_runtime::llm::LlmAgent;
use enki_runtime::memory::InMemoryBackend;
use serde_json::json;

/// A simple document for our knowledge base
struct Document {
    title: String,
    content: String,
    category: String,
}

impl Document {
    fn new(title: &str, content: &str, category: &str) -> Self {
        Self {
            title: title.to_string(),
            content: content.to_string(),
            category: category.to_string(),
        }
    }
}

/// A simple RAG system that combines memory retrieval with LLM generation
struct RagSystem {
    memory: InMemoryBackend,
    agent: LlmAgent,
}

impl RagSystem {
    /// Create a new RAG system with an LLM agent
    fn new() -> Result<Self> {
        let memory = InMemoryBackend::new();

        let agent = LlmAgent::builder("rag_agent", "ollama::gemma3:latest")
            .with_system_prompt(
                "You are a helpful assistant that answers questions based on the provided context. \
                 Always ground your answers in the given context. If the context doesn't contain \
                 enough information, say so. Be concise and informative.",
            )
            .with_temperature(0.3) // Lower temperature for more factual responses
            .with_max_tokens(512)
            .build()?;

        Ok(Self { memory, agent })
    }

    /// Add a document to the knowledge base
    async fn add_document(&self, doc: Document) -> Result<String> {
        let entry = MemoryEntry::new(&doc.content)
            .with_metadata("title", json!(doc.title))
            .with_metadata("category", json!(doc.category));

        let id = self.memory.store(entry).await?;
        println!(
            "📄 Stored document: \"{}\" (category: {})",
            doc.title, doc.category
        );
        Ok(id)
    }

    /// Search for relevant documents based on a query
    async fn search(&self, query: &str, limit: usize) -> Result<Vec<MemoryEntry>> {
        let memory_query = MemoryQuery::new()
            .with_semantic_query(query)
            .with_limit(limit);

        self.memory.search(memory_query).await
    }

    /// Perform RAG: retrieve context and generate a response
    async fn query(&mut self, question: &str, ctx: &mut AgentContext) -> Result<String> {
        println!("\n🔍 Query: {}", question);

        // Step 1: Retrieve relevant documents
        let results = self.search(question, 3).await?;

        if results.is_empty() {
            println!("⚠️  No relevant documents found in knowledge base.");
            return Ok("I don't have enough information to answer that question.".to_string());
        }

        // Step 2: Build context from retrieved documents
        println!("\n📚 Retrieved {} relevant document(s):", results.len());
        let mut context = String::new();
        for (i, entry) in results.iter().enumerate() {
            let title = entry
                .metadata
                .get("title")
                .and_then(|v| v.as_str())
                .unwrap_or("Untitled");
            println!("   {}. \"{}\"", i + 1, title);
            context.push_str(&format!(
                "\n--- Document: {} ---\n{}\n",
                title, entry.content
            ));
        }

        // Step 3: Augment the prompt with retrieved context
        let augmented_prompt = format!(
            "Based on the following context, please answer the question.\n\n\
             CONTEXT:\n{}\n\n\
             QUESTION: {}\n\n\
             Please provide a helpful answer based primarily on the context above.",
            context, question
        );

        // Step 4: Generate response using LLM
        println!("\n🤖 Generating response...\n");
        let response = self
            .agent
            .send_message_and_get_response(&augmented_prompt, ctx)
            .await?;

        Ok(response)
    }
}

/// Build a sample knowledge base about Enki Runtime
fn sample_knowledge_base() -> Vec<Document> {
    vec![
        Document::new(
            "Enki Runtime Overview",
            "Enki Runtime is a Rust-based agent mesh framework for building local and distributed \
             AI agent systems. It provides core abstractions like Agent, Memory, Mesh, and Message \
             for building autonomous AI applications. The framework is modular and split into \
             focused sub-crates: Enki-core, Enki-llm, Enki-local, Enki-memory, and \
             Enki-observability.",
            "overview",
        ),
        Document::new(
            "LLM Integration",
            "Enki Runtime supports 13+ LLM providers through a unified interface. Supported \
             providers include OpenAI (GPT-4, GPT-4o), Anthropic (Claude 3, Claude 3.5), \
             Ollama (local models like Llama, Mistral, Gemma), Google (Gemini), DeepSeek, \
             xAI (Grok), Groq, Mistral, Cohere, Phind, and OpenRouter. You can create an \
             LlmAgent using the builder pattern: LlmAgent::builder(name, model).build().",
            "llm",
        ),
        Document::new(
            "Memory System",
            "Enki Runtime provides a pluggable memory system with multiple backends: \
             InMemoryBackend (default, fast, no persistence), SqliteBackend (persistent, \
             requires 'sqlite' feature), and RedisBackend (distributed, requires 'redis' feature). \
             All backends implement the Memory trait with methods like store(), get(), search(), \
             delete(), and clear(). The VectorMemory trait extends Memory with embed() and \
             similarity_search() for semantic search capabilities.",
            "memory",
        ),
        Document::new(
            "Agent Communication",
            "Agents in Enki Runtime communicate through a Mesh. The LocalMesh provides \
             in-process communication between agents. Messages have a topic, payload, and sender. \
             Agents implement the Agent trait with on_start() and on_message() lifecycle methods. \
             You can send messages to specific agents or broadcast to all agents on the mesh.",
            "mesh",
        ),
        Document::new(
            "TOML Configuration",
            "Enki Runtime supports TOML-based agent configuration. Create agents from config \
             files using AgentConfig::from_file() and LlmAgent::from_config(). Configuration \
             includes agent name, model, system prompt, API key, temperature, and max tokens. \
             This enables easy deployment and configuration management without code changes.",
            "config",
        ),
    ]
}

#[tokio::main]
async fn main() -> Result<()> {
    println!("=== Enki Runtime - RAG Example ===\n");
    println!("This example demonstrates Retrieval Augmented Generation (RAG):");
    println!("  1. Store documents in the memory backend");
    println!("  2. Retrieve relevant documents for a query");
    println!("  3. Augment the LLM prompt with context");
    println!("  4. Generate grounded responses\n");

    // Create the RAG system
    let mut rag = match RagSystem::new() {
        Ok(rag) => rag,
        Err(e) => {
            eprintln!("Failed to create RAG system: {}", e);
            eprintln!("Make sure Ollama is running and gemma3:latest is available.");
            eprintln!("Pull the model with: ollama pull gemma3:latest");
            return Err(e);
        }
    };

    println!("✓ RAG system created successfully\n");

    // Build knowledge base
    println!("📖 Building knowledge base...\n");
    for doc in sample_knowledge_base() {
        rag.add_document(doc).await?;
    }

    println!("\n✓ Knowledge base ready with {} documents\n", 5);

    // Create agent context
    let mut ctx = AgentContext::new("rag_demo".to_string(), None);

    // Example queries that demonstrate RAG
    let queries = vec![
        "What LLM providers does Enki Runtime support?",
        "How do agents communicate with each other?",
        "What are the available memory backends?",
    ];

    for query in queries {
        println!("\n{}", "=".repeat(60));
        match rag.query(query, &mut ctx).await {
            Ok(response) => {
                println!("💬 Response:\n{}", response);
            }
            Err(e) => {
                eprintln!("❌ Error: {}", e);
            }
        }
        println!("{}\n", "=".repeat(60));
    }

    println!("\n=== RAG Example Complete ===");
    Ok(())
}