Skip to main content

cognis_rag/
docstore.rs

1//! `Docstore` — keyed [`Document`] storage by stable id.
2//!
3//! Used by the parent-document retriever pattern: small chunks live in a
4//! `VectorStore` (similarity search), full parent docs live in a
5//! `Docstore` (id lookup).
6
7use std::collections::HashMap;
8use std::sync::Mutex;
9
10use async_trait::async_trait;
11
12use cognis_core::Result;
13
14use crate::document::Document;
15
16/// Pluggable id-keyed document store.
17#[async_trait]
18pub trait Docstore: Send + Sync {
19    /// Persist `(id, doc)` pairs. Replaces existing entries.
20    async fn put(&self, items: Vec<(String, Document)>) -> Result<()>;
21    /// Look up by id; missing ids are simply absent from the result.
22    async fn get(&self, ids: &[String]) -> Result<Vec<Document>>;
23    /// Delete by id; missing ids are silently ignored.
24    async fn delete(&self, ids: &[String]) -> Result<()>;
25}
26
27/// In-process [`Docstore`] backed by a `Mutex<HashMap>`.
28#[derive(Default)]
29pub struct InMemoryDocstore {
30    inner: Mutex<HashMap<String, Document>>,
31}
32
33impl InMemoryDocstore {
34    /// Empty store.
35    pub fn new() -> Self {
36        Self::default()
37    }
38}
39
40#[async_trait]
41impl Docstore for InMemoryDocstore {
42    async fn put(&self, items: Vec<(String, Document)>) -> Result<()> {
43        let mut inner = self
44            .inner
45            .lock()
46            .map_err(|e| cognis_core::CognisError::Internal(format!("docstore mutex: {e}")))?;
47        for (id, d) in items {
48            inner.insert(id, d);
49        }
50        Ok(())
51    }
52    async fn get(&self, ids: &[String]) -> Result<Vec<Document>> {
53        let inner = self
54            .inner
55            .lock()
56            .map_err(|e| cognis_core::CognisError::Internal(format!("docstore mutex: {e}")))?;
57        Ok(ids.iter().filter_map(|id| inner.get(id).cloned()).collect())
58    }
59    async fn delete(&self, ids: &[String]) -> Result<()> {
60        let mut inner = self
61            .inner
62            .lock()
63            .map_err(|e| cognis_core::CognisError::Internal(format!("docstore mutex: {e}")))?;
64        for id in ids {
65            inner.remove(id);
66        }
67        Ok(())
68    }
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74
75    #[tokio::test]
76    async fn put_then_get() {
77        let s = InMemoryDocstore::new();
78        s.put(vec![
79            ("a".into(), Document::new("alpha")),
80            ("b".into(), Document::new("beta")),
81        ])
82        .await
83        .unwrap();
84        let docs = s
85            .get(&["a".into(), "missing".into(), "b".into()])
86            .await
87            .unwrap();
88        let contents: Vec<_> = docs.iter().map(|d| d.content.clone()).collect();
89        assert!(contents.contains(&"alpha".to_string()));
90        assert!(contents.contains(&"beta".to_string()));
91        assert_eq!(contents.len(), 2);
92    }
93
94    #[tokio::test]
95    async fn delete_removes() {
96        let s = InMemoryDocstore::new();
97        s.put(vec![("a".into(), Document::new("alpha"))])
98            .await
99            .unwrap();
100        s.delete(&["a".into()]).await.unwrap();
101        assert!(s.get(&["a".into()]).await.unwrap().is_empty());
102    }
103}