1use super::content::docid_from_hash;
4use super::Database;
5use crate::config::virtual_path::{is_virtual_path, parse_virtual_path};
6use crate::error::Result;
7use rusqlite::params;
8use std::collections::HashMap;
9use std::path::PathBuf;
10
11#[derive(Debug, Clone)]
13pub struct Document {
14 pub id: i64,
15 pub collection: String,
16 pub path: String,
17 pub title: String,
18 pub hash: String,
19 pub created_at: String,
20 pub modified_at: String,
21 pub active: bool,
22 pub source_type: String,
23 pub source_uri: Option<String>,
24}
25
26#[derive(Debug, Clone)]
28pub struct DocumentResult {
29 pub filepath: String,
30 pub display_path: String,
31 pub title: String,
32 pub context: Option<String>,
33 pub hash: String,
34 pub docid: String,
35 pub collection_name: String,
36 pub modified_at: String,
37 pub body_length: usize,
38 pub body: Option<String>,
39}
40
41impl Database {
42 pub fn insert_doc(&self, doc: &DocumentInsert) -> Result<i64> {
44 self.conn.execute(
45 "INSERT INTO documents (
46 collection, path, title, hash, created_at, modified_at, active, source_type, source_uri,
47 llm_summary, llm_title, llm_keywords, llm_category, llm_intent, llm_concepts,
48 llm_difficulty, llm_queries, llm_metadata_generated_at, llm_model
49 )
50 VALUES (?1, ?2, ?3, ?4, ?5, ?6, 1, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18)",
51 params![
52 doc.collection,
53 doc.path,
54 doc.title,
55 doc.hash,
56 doc.created_at,
57 doc.modified_at,
58 doc.source_type,
59 doc.source_uri,
60 doc.llm_summary,
61 doc.llm_title,
62 doc.llm_keywords,
63 doc.llm_category,
64 doc.llm_intent,
65 doc.llm_concepts,
66 doc.llm_difficulty,
67 doc.llm_queries,
68 doc.llm_metadata_generated_at,
69 doc.llm_model,
70 ],
71 )?;
72 Ok(self.conn.last_insert_rowid())
73 }
74
75 #[allow(clippy::too_many_arguments)]
77 pub fn insert_document(
78 &self,
79 collection: &str,
80 path: &str,
81 title: &str,
82 hash: &str,
83 created_at: &str,
84 modified_at: &str,
85 source_type: &str,
86 source_uri: Option<&str>,
87 ) -> Result<i64> {
88 let doc = DocumentInsert {
89 collection,
90 path,
91 title,
92 hash,
93 created_at,
94 modified_at,
95 source_type,
96 source_uri,
97 llm_summary: None,
98 llm_title: None,
99 llm_keywords: None,
100 llm_category: None,
101 llm_intent: None,
102 llm_concepts: None,
103 llm_difficulty: None,
104 llm_queries: None,
105 llm_metadata_generated_at: None,
106 llm_model: None,
107 };
108 self.insert_doc(&doc)
109 }
110
111 pub fn update_document(
113 &self,
114 id: i64,
115 title: &str,
116 hash: &str,
117 modified_at: &str,
118 ) -> Result<()> {
119 self.conn.execute(
120 "UPDATE documents SET title = ?2, hash = ?3, modified_at = ?4 WHERE id = ?1",
121 params![id, title, hash, modified_at],
122 )?;
123 Ok(())
124 }
125
126 pub fn update_document_title(&self, id: i64, title: &str, modified_at: &str) -> Result<()> {
128 self.conn.execute(
129 "UPDATE documents SET title = ?2, modified_at = ?3 WHERE id = ?1",
130 params![id, title, modified_at],
131 )?;
132 Ok(())
133 }
134
135 pub fn deactivate_document(&self, collection: &str, path: &str) -> Result<bool> {
137 let rows = self.conn.execute(
138 "UPDATE documents SET active = 0 WHERE collection = ?1 AND path = ?2",
139 params![collection, path],
140 )?;
141 Ok(rows > 0)
142 }
143
144 pub fn find_active_document(&self, collection: &str, path: &str) -> Result<Option<Document>> {
146 let result = self.conn.query_row(
147 "SELECT id, collection, path, title, hash, created_at, modified_at, active, source_type, source_uri
148 FROM documents WHERE collection = ?1 AND path = ?2 AND active = 1",
149 params![collection, path],
150 |row| {
151 Ok(Document {
152 id: row.get(0)?,
153 collection: row.get(1)?,
154 path: row.get(2)?,
155 title: row.get(3)?,
156 hash: row.get(4)?,
157 created_at: row.get(5)?,
158 modified_at: row.get(6)?,
159 active: row.get::<_, i32>(7)? == 1,
160 source_type: row.get(8)?,
161 source_uri: row.get(9)?,
162 })
163 },
164 );
165 match result {
166 Ok(doc) => Ok(Some(doc)),
167 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
168 Err(e) => Err(e.into()),
169 }
170 }
171
172 pub fn get_active_document_paths(&self, collection: &str) -> Result<Vec<String>> {
174 let mut stmt = self
175 .conn
176 .prepare("SELECT path FROM documents WHERE collection = ?1 AND active = 1")?;
177 let paths = stmt
178 .query_map(params![collection], |row| row.get(0))?
179 .collect::<std::result::Result<Vec<_>, _>>()?;
180 Ok(paths)
181 }
182
183 pub fn find_by_docid(&self, docid: &str) -> Result<Option<DocumentResult>> {
185 let docid = docid.trim_start_matches('#');
186 let result = self.conn.query_row(
187 "SELECT d.id, d.collection, d.path, d.title, d.hash, d.modified_at,
188 c.doc, LENGTH(c.doc)
189 FROM documents d
190 JOIN content c ON c.hash = d.hash
191 WHERE d.hash LIKE ?1 || '%' AND d.active = 1
192 LIMIT 1",
193 params![docid],
194 |row| {
195 Ok(DocumentResult {
196 filepath: format!(
197 "agentroot://{}/{}",
198 row.get::<_, String>(1)?,
199 row.get::<_, String>(2)?
200 ),
201 display_path: format!(
202 "{}/{}",
203 row.get::<_, String>(1)?,
204 row.get::<_, String>(2)?
205 ),
206 title: row.get(3)?,
207 context: None,
208 hash: row.get(4)?,
209 docid: docid_from_hash(&row.get::<_, String>(4)?),
210 collection_name: row.get(1)?,
211 modified_at: row.get(5)?,
212 body: Some(row.get(6)?),
213 body_length: row.get(7)?,
214 })
215 },
216 );
217 match result {
218 Ok(doc) => Ok(Some(doc)),
219 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
220 Err(e) => Err(e.into()),
221 }
222 }
223
224 pub fn delete_inactive_documents(&self) -> Result<usize> {
226 let rows = self
227 .conn
228 .execute("DELETE FROM documents WHERE active = 0", [])?;
229 Ok(rows)
230 }
231
232 pub fn lookup_document(
234 &self,
235 query: &str,
236 collections: &HashMap<String, PathBuf>,
237 ) -> Result<Option<DocumentResult>> {
238 let query = query.trim();
239
240 if query.starts_with('#')
242 || (query.len() == 6 && query.chars().all(|c| c.is_ascii_hexdigit()))
243 {
244 if let Some(doc) = self.find_by_docid(query)? {
245 return Ok(Some(doc));
246 }
247 }
248
249 if is_virtual_path(query) {
251 if let Ok((collection, path)) = parse_virtual_path(query) {
252 if let Some(doc) = self.find_active_document(&collection, &path)? {
253 return Ok(Some(self.document_to_result(&doc)?));
254 }
255 }
256 }
257
258 let expanded = if query.starts_with("~/") {
260 dirs::home_dir()
261 .map(|home| home.join(&query[2..]).to_string_lossy().to_string())
262 .unwrap_or_else(|| query.to_string())
263 } else {
264 query.to_string()
265 };
266
267 let abs_path = std::path::Path::new(&expanded);
268 if abs_path.is_absolute() {
269 for (coll_name, coll_path) in collections {
270 if let Ok(rel_path) = abs_path.strip_prefix(coll_path) {
271 let path = rel_path.to_string_lossy().to_string();
272 if let Some(doc) = self.find_active_document(coll_name, &path)? {
273 return Ok(Some(self.document_to_result(&doc)?));
274 }
275 }
276 }
277 }
278
279 let candidates = self.fuzzy_find_documents(query, 1)?;
281 Ok(candidates.into_iter().next())
282 }
283
284 pub fn fuzzy_find_documents(&self, query: &str, limit: usize) -> Result<Vec<DocumentResult>> {
286 let query_lower = query.to_lowercase();
287 let mut stmt = self.conn.prepare(
288 "SELECT d.collection, d.path, d.title, d.hash, d.modified_at, c.doc, LENGTH(c.doc)
289 FROM documents d
290 JOIN content c ON c.hash = d.hash
291 WHERE d.active = 1 AND (LOWER(d.path) LIKE '%' || ?1 || '%' OR LOWER(d.title) LIKE '%' || ?1 || '%')
292 ORDER BY LENGTH(d.path)
293 LIMIT ?2"
294 )?;
295
296 let results = stmt
297 .query_map(params![query_lower, limit as i64], |row| {
298 Ok(DocumentResult {
299 filepath: format!(
300 "agentroot://{}/{}",
301 row.get::<_, String>(0)?,
302 row.get::<_, String>(1)?
303 ),
304 display_path: format!(
305 "{}/{}",
306 row.get::<_, String>(0)?,
307 row.get::<_, String>(1)?
308 ),
309 title: row.get(2)?,
310 context: None,
311 hash: row.get(3)?,
312 docid: docid_from_hash(&row.get::<_, String>(3)?),
313 collection_name: row.get(0)?,
314 modified_at: row.get(4)?,
315 body: Some(row.get(5)?),
316 body_length: row.get(6)?,
317 })
318 })?
319 .collect::<std::result::Result<Vec<_>, _>>()?;
320
321 Ok(results)
322 }
323
324 fn document_to_result(&self, doc: &Document) -> Result<DocumentResult> {
325 let body = self.get_content(&doc.hash)?;
326 let body_length = body.as_ref().map(|b| b.len()).unwrap_or(0);
327
328 Ok(DocumentResult {
329 filepath: format!("agentroot://{}/{}", doc.collection, doc.path),
330 display_path: format!("{}/{}", doc.collection, doc.path),
331 title: doc.title.clone(),
332 context: None,
333 hash: doc.hash.clone(),
334 docid: docid_from_hash(&doc.hash),
335 collection_name: doc.collection.clone(),
336 modified_at: doc.modified_at.clone(),
337 body_length,
338 body,
339 })
340 }
341
342 pub fn get_document(&self, query: &str) -> Result<String> {
344 let query = query.trim();
345
346 if query.starts_with('#')
348 || (query.len() == 6 && query.chars().all(|c| c.is_ascii_hexdigit()))
349 {
350 if let Some(doc) = self.find_by_docid(query)? {
351 return doc.body.ok_or_else(|| {
352 crate::error::AgentRootError::DocumentNotFound(query.to_string())
353 });
354 }
355 }
356
357 if is_virtual_path(query) {
359 if let Ok((collection, path)) = parse_virtual_path(query) {
360 if let Some(doc) = self.find_active_document(&collection, &path)? {
361 if let Some(content) = self.get_content(&doc.hash)? {
362 return Ok(content);
363 }
364 }
365 }
366 }
367
368 if query.contains('/') {
370 let parts: Vec<&str> = query.splitn(2, '/').collect();
371 if parts.len() == 2 {
372 if let Some(doc) = self.find_active_document(parts[0], parts[1])? {
373 if let Some(content) = self.get_content(&doc.hash)? {
374 return Ok(content);
375 }
376 }
377 }
378 }
379
380 Err(crate::error::AgentRootError::DocumentNotFound(
381 query.to_string(),
382 ))
383 }
384
385 pub fn list_documents_by_prefix(&self, prefix: &str) -> Result<Vec<DocumentListItem>> {
387 let prefix = prefix.trim_start_matches("agentroot://");
388 let like_pattern = format!("{}%", prefix);
389
390 let mut stmt = self.conn.prepare(
391 "SELECT d.collection, d.path, d.title, d.hash
392 FROM documents d
393 WHERE d.active = 1 AND (d.collection || '/' || d.path) LIKE ?1
394 ORDER BY d.collection, d.path",
395 )?;
396
397 let results = stmt
398 .query_map(params![like_pattern], |row| {
399 Ok(DocumentListItem {
400 path: format!("{}/{}", row.get::<_, String>(0)?, row.get::<_, String>(1)?),
401 title: row.get(2)?,
402 docid: docid_from_hash(&row.get::<_, String>(3)?),
403 })
404 })?
405 .collect::<std::result::Result<Vec<_>, _>>()?;
406
407 Ok(results)
408 }
409
410 pub fn get_documents_by_pattern(&self, pattern: &str) -> Result<Vec<DocumentContent>> {
412 if pattern.contains(',') {
414 let mut results = Vec::new();
415 for part in pattern.split(',') {
416 let part = part.trim();
417 if let Ok(content) = self.get_document(part) {
418 results.push(DocumentContent {
419 path: part.to_string(),
420 content,
421 });
422 }
423 }
424 return Ok(results);
425 }
426
427 let pattern = glob::Pattern::new(pattern)?;
429 let mut stmt = self.conn.prepare(
430 "SELECT d.collection, d.path, c.doc
431 FROM documents d
432 JOIN content c ON c.hash = d.hash
433 WHERE d.active = 1",
434 )?;
435
436 let results = stmt
437 .query_map([], |row| {
438 let path = format!("{}/{}", row.get::<_, String>(0)?, row.get::<_, String>(1)?);
439 Ok((path, row.get::<_, String>(2)?))
440 })?
441 .filter_map(|r| r.ok())
442 .filter(|(path, _)| pattern.matches(path))
443 .map(|(path, content)| DocumentContent { path, content })
444 .collect();
445
446 Ok(results)
447 }
448}
449
450#[derive(Debug, Clone, serde::Serialize)]
452pub struct DocumentListItem {
453 pub path: String,
454 pub title: String,
455 pub docid: String,
456}
457
458#[derive(Debug, Clone)]
460pub struct DocumentContent {
461 pub path: String,
462 pub content: String,
463}
464
465#[derive(Debug, Clone)]
467pub struct DocumentInsert<'a> {
468 pub collection: &'a str,
469 pub path: &'a str,
470 pub title: &'a str,
471 pub hash: &'a str,
472 pub created_at: &'a str,
473 pub modified_at: &'a str,
474 pub source_type: &'a str,
475 pub source_uri: Option<&'a str>,
476 pub llm_summary: Option<&'a str>,
477 pub llm_title: Option<&'a str>,
478 pub llm_keywords: Option<&'a str>,
479 pub llm_category: Option<&'a str>,
480 pub llm_intent: Option<&'a str>,
481 pub llm_concepts: Option<&'a str>,
482 pub llm_difficulty: Option<&'a str>,
483 pub llm_queries: Option<&'a str>,
484 pub llm_metadata_generated_at: Option<&'a str>,
485 pub llm_model: Option<&'a str>,
486}
487
488impl<'a> DocumentInsert<'a> {
489 pub fn new(
491 collection: &'a str,
492 path: &'a str,
493 title: &'a str,
494 hash: &'a str,
495 created_at: &'a str,
496 modified_at: &'a str,
497 ) -> Self {
498 Self {
499 collection,
500 path,
501 title,
502 hash,
503 created_at,
504 modified_at,
505 source_type: "file",
506 source_uri: None,
507 llm_summary: None,
508 llm_title: None,
509 llm_keywords: None,
510 llm_category: None,
511 llm_intent: None,
512 llm_concepts: None,
513 llm_difficulty: None,
514 llm_queries: None,
515 llm_metadata_generated_at: None,
516 llm_model: None,
517 }
518 }
519
520 pub fn with_source_type(mut self, source_type: &'a str) -> Self {
522 self.source_type = source_type;
523 self
524 }
525
526 pub fn with_source_uri(mut self, source_uri: &'a str) -> Self {
528 self.source_uri = Some(source_uri);
529 self
530 }
531
532 pub fn with_llm_metadata(
534 mut self,
535 metadata: &'a crate::llm::DocumentMetadata,
536 _metadata_json: &'a str,
537 model_name: &'a str,
538 generated_at: &'a str,
539 ) -> Self {
540 self.llm_summary = Some(&metadata.summary);
541 self.llm_title = Some(&metadata.semantic_title);
542 self.llm_category = Some(&metadata.category);
543 self.llm_intent = Some(&metadata.intent);
544 self.llm_difficulty = Some(&metadata.difficulty);
545 self.llm_metadata_generated_at = Some(generated_at);
546 self.llm_model = Some(model_name);
547 self
548 }
549
550 pub fn with_llm_metadata_strings(
552 mut self,
553 summary: &'a str,
554 title: &'a str,
555 keywords: &'a str,
556 category: &'a str,
557 intent: &'a str,
558 concepts: &'a str,
559 difficulty: &'a str,
560 queries: &'a str,
561 model_name: &'a str,
562 generated_at: &'a str,
563 ) -> Self {
564 self.llm_summary = Some(summary);
565 self.llm_title = Some(title);
566 self.llm_keywords = Some(keywords);
567 self.llm_category = Some(category);
568 self.llm_intent = Some(intent);
569 self.llm_concepts = Some(concepts);
570 self.llm_difficulty = Some(difficulty);
571 self.llm_queries = Some(queries);
572 self.llm_metadata_generated_at = Some(generated_at);
573 self.llm_model = Some(model_name);
574 self
575 }
576}