oxide-agent 0.1.0

//! Retrieval-Augmented Generation with `VectorStore`.
//!
//! Demonstrates:
//!   - Adding individual text snippets to the store
//!   - Indexing a file chunked by paragraph
//!   - Querying for the top-k most relevant documents by cosine similarity
//!   - Injecting retrieved context into a chat prompt
//!
//! Run with:
//!   cargo run --example rag

use std::collections::HashMap;
use std::sync::Arc;

use oxide_agent::client::{HttpOllamaClient, OllamaClient};
use oxide_agent::rag::VectorStore;
use oxide_agent::types::{ChatRequest, Message, Role};

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let client = Arc::new(HttpOllamaClient::new("http://localhost:11434"));

    // Use a dedicated embedding model — pull it first if you haven't:
    //   ollama pull nomic-embed-text
    let mut store = VectorStore::new(Arc::clone(&client), "nomic-embed-text");

    // ── Add individual facts ──────────────────────────────────────────────────
    let facts = [
        "Rust ownership means exactly one binding owns a value at any time.",
        "Borrowing lets you hold a reference to data without taking ownership.",
        "Lifetimes annotate how long references are valid to prevent dangling pointers.",
        "The borrow checker enforces ownership and borrowing rules at compile time.",
        "Clone duplicates heap data; Copy is an implicit bit-for-bit copy for stack types.",
    ];

    for fact in facts {
        store.add_text(fact, HashMap::new()).await?;
    }

    println!("Indexed {} facts", store.len());

    // ── Optionally index a local text file ────────────────────────────────────
    // Each double-newline-separated paragraph becomes a separate document.
    // Uncomment if you have a file to index:
    //
    // let n = store.add_file(Path::new("docs/rust_book_chapter4.txt")).await?;
    // println!("Indexed {n} chunks from file");

    // ── Query for relevant context ────────────────────────────────────────────
    let question = "What prevents dangling references in Rust?";
    let top_k = 2;

    let hits = store.query(question, top_k).await?;

    println!("\nTop {top_k} results for: \"{question}\"");
    for (i, hit) in hits.iter().enumerate() {
        println!("  [{i}] score={:.4}  {}", hit.score, hit.content);
        if let Some(src) = hit.metadata.get("source") {
            println!("       source={src}, chunk={}", hit.metadata.get("chunk").map(|s| s.as_str()).unwrap_or("?"));
        }
    }

    // ── Inject retrieved context into a chat prompt (RAG pattern) ─────────────
    let context_block = hits
        .iter()
        .enumerate()
        .map(|(i, h)| format!("[{}] {}", i + 1, h.content))
        .collect::<Vec<_>>()
        .join("\n");

    let system_prompt = format!(
        "You are a Rust expert. Use the following context to answer the question.\n\nContext:\n{context_block}"
    );

    let chat_client = HttpOllamaClient::new("http://localhost:11434");
    let req = ChatRequest {
        model: "llama3".into(),
        messages: vec![
            Message { role: Role::System, content: system_prompt, tool_calls: None },
            Message { role: Role::User, content: question.into(), tool_calls: None },
        ],
        tools: None,
        stream: false,
    };

    let resp = chat_client.chat(req).await?;
    println!("\nRAG answer:\n{}", resp.message.content);

    Ok(())
}