1use std::fs::{self};
2use std::path::{Path, PathBuf};
3use std::time::Instant;
4
5use anyhow::{Context, Result};
6use llm_brain::LLMBrain;
7use pdf_extract::extract_text;
8use serde_json::json;
9
10const DEFAULT_CHUNK_SIZE: usize = 500; fn ensure_config_exists() -> Result<()> {
14 let config_dir = PathBuf::from("./config");
15 let default_config_path = config_dir.join("default.toml");
16 if default_config_path.exists() {
17 return Ok(());
18 }
19 if !config_dir.exists() {
20 fs::create_dir_all(&config_dir)?;
21 }
22 let default_toml_content = r#"
23[database]
24path = "./llm_brain_file_rag_db" # DB path for this example
25namespace = "file_rag_ns"
26database = "file_rag_db"
27
28[llm]
29# Provide necessary LLM config
30"#;
31 fs::write(&default_config_path, default_toml_content)?;
32 println!("Created default config: {}", default_config_path.display());
33 Ok(())
34}
35fn chunk_text_by_words(text: &str, chunk_size: usize) -> Vec<String> {
39 let words: Vec<&str> = text.split_whitespace().collect();
40 let mut chunks = Vec::new();
41 for chunk_words in words.chunks(chunk_size) {
42 chunks.push(chunk_words.join(" "));
43 }
44 chunks
45}
46
47async fn ingest_pdf(llm_brain: &LLMBrain, pdf_path: &Path) -> Result<Vec<String>> {
48 let start_time = Instant::now();
49 println!("Processing PDF: {}", pdf_path.display());
50
51 let text = extract_text(pdf_path).context("Failed to extract text from PDF")?;
53 println!("Extracted {} characters.", text.len());
54
55 let chunks = chunk_text_by_words(&text, DEFAULT_CHUNK_SIZE);
57 println!("Split into {} chunks.", chunks.len());
58
59 let filename = pdf_path
61 .file_name()
62 .unwrap_or_default()
63 .to_string_lossy()
64 .to_string();
65 let file_stem = pdf_path
66 .file_stem()
67 .unwrap_or_default()
68 .to_string_lossy()
69 .to_string();
70
71 let mut chunk_ids = Vec::new();
72
73 for (i, chunk_content) in chunks.into_iter().enumerate() {
74 let chunk_num = i + 1;
75 let metadata = json!({
76 "memory_type": "DocumentChunk",
77 "source_file": filename,
78 "chunk_number": chunk_num,
79 "entity_name": format!("{}_chunk_{}", file_stem, chunk_num)
81 });
82
83 match llm_brain.add_memory(chunk_content, metadata).await {
85 Ok(id) => {
86 chunk_ids.push(id.to_string());
87 if chunk_num % 10 == 0 {
88 println!("Added chunk {chunk_num}...");
89 }
90 }
91 Err(e) => {
92 eprintln!("Failed to add chunk {chunk_num}: {e}");
93 }
95 }
96 }
97
98 let doc_content = format!(
100 "Metadata for document: {}. Contains {} chunks.",
101 filename,
102 chunk_ids.len()
103 );
104 let doc_metadata = json!({
105 "memory_type": "DocumentMeta",
106 "entity_name": file_stem,
107 "properties": {
108 "file_path": pdf_path.to_string_lossy(),
109 "file_name": filename,
110 "chunk_count": chunk_ids.len()
111 },
112 "relationships": {
113 "contains_chunks": chunk_ids }
115 });
116 llm_brain.add_memory(doc_content, doc_metadata).await?;
117 println!("Added document metadata entry.");
118
119 println!("Ingestion completed in {:?}.", start_time.elapsed());
120 Ok(chunk_ids) }
122
123async fn run_queries(llm_brain: &LLMBrain, queries: &[&str]) -> Result<()> {
124 println!("\n--- Running Queries ---");
125 for query in queries {
126 println!("\nQuery: {query}");
127 let start_time = Instant::now();
128 match llm_brain.recall(query, 3).await {
129 Ok(results) => {
131 println!(
132 "Found {} relevant chunks in {:?}:",
133 results.len(),
134 start_time.elapsed()
135 );
136 if results.is_empty() {
137 println!(" (No relevant chunks found)");
138 } else {
139 for (fragment, score) in results {
140 println!(
141 " - Score: {:.4}, Source: {:?}, Chunk: {:?}\n Content: {:.150}...",
142 score,
143 fragment.metadata.get("source_file"),
144 fragment.metadata.get("chunk_number"),
145 fragment.content.replace('\n', " ") );
147 }
148 }
149 }
150 Err(e) => {
151 eprintln!(" Error during recall: {e}");
152 }
153 }
154 }
155 Ok(())
156}
157
158#[tokio::main]
159async fn main() -> Result<()> {
160 println!("--- Starting File RAG Example ---");
161 ensure_config_exists()?;
162 let llm_brain = LLMBrain::launch().await?;
163
164 let pdf_path = PathBuf::from("tests/document.pdf");
168
169 if !pdf_path.exists() {
170 eprintln!("Error: PDF file not found at {}", pdf_path.display());
171 eprintln!("Please place a PDF file at that location or update the path in the code.");
172 return Ok(()); }
174
175 if let Err(e) = ingest_pdf(&llm_brain, &pdf_path).await {
177 eprintln!("Error during PDF ingestion: {e}");
178 }
180
181 let test_queries = [
183 "What is the main topic of the document?",
184 "Summarize the key points from the document.",
185 "What are the main conclusions drawn in the document?",
186 "what is silha center",
187 "who is Charlotte Higgins",
188 "Explain the lawsuits",
189 "Explain OpenAI's Involvement",
190 "who is Mike Masnick",
191 "content moderation liability shield", ];
193
194 if let Err(e) = run_queries(&llm_brain, &test_queries).await {
195 eprintln!("Error during querying: {e}");
196 }
197
198 println!("\n--- File RAG Example Finished ---");
199 println!("Note: Database is at ./llm_brain_file_rag_db");
200
201 Ok(())
202}