rag_advanced/
rag_advanced.rs

1//! # Example: Advanced RAG Usage
2//!
3//! This example demonstrates advanced RAG features including:
4//! - Custom metadata
5//! - Document management (add, search, delete, count)
6//! - Direct RAG system usage (without agent)
7//! - Batch operations
8//!
9//! ## Prerequisites
10//!
11//! - OpenAI API Key: `export OPENAI_API_KEY=your-key`
12
13use helios_engine::{InMemoryVectorStore, OpenAIEmbeddings, RAGSystem, SearchResult};
14use std::collections::HashMap;
15
16// Helper macro for creating HashMaps
17macro_rules! hashmap {
18    ($($key:expr => $val:expr),* $(,)?) => {{
19        let mut map = HashMap::new();
20        $(map.insert($key, $val);)*
21        map
22    }};
23}
24
25#[tokio::main]
26async fn main() -> helios_engine::Result<()> {
27    println!("šŸš€ Helios Engine - Advanced RAG Features");
28    println!("========================================\n");
29
30    let api_key = std::env::var("OPENAI_API_KEY").unwrap_or_else(|_| {
31        println!("⚠ Warning: OPENAI_API_KEY not set. Using placeholder.");
32        "your-api-key-here".to_string()
33    });
34
35    // Create RAG system directly (without agent)
36    let embeddings = OpenAIEmbeddings::new("https://api.openai.com/v1/embeddings", api_key);
37    let vector_store = InMemoryVectorStore::new();
38    let rag_system = RAGSystem::new(Box::new(embeddings), Box::new(vector_store));
39
40    println!("āœ“ RAG system created\n");
41
42    // --- Example 1: Adding documents with metadata ---
43    println!("Example 1: Documents with Metadata");
44    println!("===================================\n");
45
46    let documents = vec![
47        (
48            "Rust is a systems programming language focused on safety and performance.",
49            hashmap! {
50                "category" => "programming",
51                "language" => "rust",
52                "year" => "2010",
53                "difficulty" => "intermediate",
54            },
55        ),
56        (
57            "Python is known for its simplicity and extensive library ecosystem.",
58            hashmap! {
59                "category" => "programming",
60                "language" => "python",
61                "year" => "1991",
62                "difficulty" => "beginner",
63            },
64        ),
65        (
66            "Machine learning is a subset of AI that enables systems to learn from data.",
67            hashmap! {
68                "category" => "ai",
69                "topic" => "machine-learning",
70                "difficulty" => "advanced",
71            },
72        ),
73        (
74            "Docker is a platform for developing, shipping, and running applications in containers.",
75            hashmap! {
76                "category" => "devops",
77                "tool" => "docker",
78                "year" => "2013",
79            },
80        ),
81    ];
82
83    let mut doc_ids = Vec::new();
84    for (text, meta) in documents.iter() {
85        let metadata: HashMap<String, serde_json::Value> = meta
86            .iter()
87            .map(|(k, v)| (k.to_string(), serde_json::json!(v)))
88            .collect();
89
90        let id = rag_system.add_document(text, Some(metadata)).await?;
91        println!(
92            "Added document: {} (ID: {})",
93            &text[..50.min(text.len())],
94            id
95        );
96        doc_ids.push(id);
97    }
98    println!();
99
100    // --- Example 2: Semantic search ---
101    println!("Example 2: Semantic Search");
102    println!("==========================\n");
103
104    let queries = vec![
105        ("programming language safety", 3),
106        ("containerization technology", 2),
107        ("artificial intelligence", 2),
108    ];
109
110    for (query, limit) in queries {
111        println!("Query: '{}' (limit: {})", query, limit);
112        let results = rag_system.search(query, limit).await?;
113        print_results(&results);
114        println!();
115    }
116
117    // --- Example 3: Document count ---
118    println!("Example 3: Document Management");
119    println!("===============================\n");
120
121    let count = rag_system.count().await?;
122    println!("Total documents: {}\n", count);
123
124    // --- Example 4: Delete a document ---
125    if let Some(first_id) = doc_ids.first() {
126        println!("Deleting document: {}", first_id);
127        rag_system.delete_document(first_id).await?;
128        let new_count = rag_system.count().await?;
129        println!("Documents after deletion: {}\n", new_count);
130    }
131
132    // --- Example 5: Search after deletion ---
133    println!("Example 5: Search After Deletion");
134    println!("=================================\n");
135
136    let results = rag_system.search("programming languages", 5).await?;
137    println!("Results for 'programming languages':");
138    print_results(&results);
139    println!();
140
141    // --- Example 6: Clear all documents ---
142    println!("Example 6: Clear All Documents");
143    println!("===============================\n");
144
145    rag_system.clear().await?;
146    let final_count = rag_system.count().await?;
147    println!("Documents after clear: {}\n", final_count);
148
149    println!("āœ… Example completed successfully!");
150    println!("\nšŸ’” Key Features Demonstrated:");
151    println!("  • Direct RAG system usage (no agent required)");
152    println!("  • Documents with custom metadata");
153    println!("  • Semantic search with configurable limits");
154    println!("  • Document management (add, delete, count, clear)");
155    println!("  • Batch operations");
156
157    println!("\nšŸ“ Advanced Use Cases:");
158    println!("  • Building custom RAG pipelines");
159    println!("  • Document management systems");
160    println!("  • Knowledge base applications");
161    println!("  • Semantic search engines");
162
163    Ok(())
164}
165
166fn print_results(results: &[SearchResult]) {
167    if results.is_empty() {
168        println!("  No results found");
169        return;
170    }
171
172    for (i, result) in results.iter().enumerate() {
173        let preview = if result.text.len() > 80 {
174            format!("{}...", &result.text[..80])
175        } else {
176            result.text.clone()
177        };
178        println!("  {}. [Score: {:.4}] {}", i + 1, result.score, preview);
179        if let Some(metadata) = &result.metadata {
180            let meta_str: Vec<String> = metadata
181                .iter()
182                .filter(|(k, _)| k.as_str() != "timestamp")
183                .map(|(k, v)| format!("{}={}", k, v.as_str().unwrap_or("?")))
184                .collect();
185            if !meta_str.is_empty() {
186                println!("     Metadata: {}", meta_str.join(", "));
187            }
188        }
189    }
190}