Skip to main content

graphrag_core/storage/
mod.rs

1//! Storage layer for GraphRAG
2//!
3//! This module provides abstractions and implementations for storing
4//! knowledge graph data, vectors, and metadata.
5
6use crate::core::{Document, Entity, Result, TextChunk};
7#[cfg(feature = "async")]
8use crate::core::{traits::Storage, GraphRAGError};
9use std::collections::HashMap;
10
11/// In-memory storage implementation for development and testing
12#[derive(Debug, Default, Clone)]
13pub struct MemoryStorage {
14    documents: HashMap<String, Document>,
15    entities: HashMap<String, Entity>,
16    chunks: HashMap<String, TextChunk>,
17    metadata: HashMap<String, String>,
18}
19
20impl MemoryStorage {
21    /// Create a new memory storage instance
22    pub fn new() -> Self {
23        Self::default()
24    }
25
26    /// Store a document
27    pub fn store_document(&mut self, id: String, document: Document) -> Result<()> {
28        self.documents.insert(id, document);
29        Ok(())
30    }
31
32    /// Retrieve a document by ID
33    pub fn get_document(&self, id: &str) -> Option<&Document> {
34        self.documents.get(id)
35    }
36
37    /// Store an entity
38    pub fn store_entity(&mut self, id: String, entity: Entity) -> Result<()> {
39        self.entities.insert(id, entity);
40        Ok(())
41    }
42
43    /// Retrieve an entity by ID
44    pub fn get_entity(&self, id: &str) -> Option<&Entity> {
45        self.entities.get(id)
46    }
47
48    /// Get all documents
49    pub fn all_documents(&self) -> Vec<&Document> {
50        self.documents.values().collect()
51    }
52
53    /// Get all entities
54    pub fn all_entities(&self) -> Vec<&Entity> {
55        self.entities.values().collect()
56    }
57
58    /// Store a chunk
59    pub fn store_chunk(&mut self, id: String, chunk: TextChunk) -> Result<()> {
60        self.chunks.insert(id, chunk);
61        Ok(())
62    }
63
64    /// Retrieve a chunk by ID
65    pub fn get_chunk(&self, id: &str) -> Option<&TextChunk> {
66        self.chunks.get(id)
67    }
68
69    /// Get all chunks
70    pub fn all_chunks(&self) -> Vec<&TextChunk> {
71        self.chunks.values().collect()
72    }
73
74    /// Clear all data
75    pub fn clear(&mut self) {
76        self.documents.clear();
77        self.entities.clear();
78        self.chunks.clear();
79        self.metadata.clear();
80    }
81
82    /// Get storage statistics
83    pub fn stats(&self) -> StorageStats {
84        StorageStats {
85            document_count: self.documents.len(),
86            entity_count: self.entities.len(),
87            chunk_count: self.chunks.len(),
88            metadata_count: self.metadata.len(),
89        }
90    }
91}
92
93/// Storage statistics
94#[derive(Debug, Clone)]
95pub struct StorageStats {
96    /// Number of documents stored
97    pub document_count: usize,
98    /// Number of entities stored
99    pub entity_count: usize,
100    /// Number of chunks stored
101    pub chunk_count: usize,
102    /// Number of metadata entries
103    pub metadata_count: usize,
104}
105
106// Implement the Storage trait for MemoryStorage (only when async feature is enabled)
107#[cfg(feature = "async")]
108impl Storage for MemoryStorage {
109    type Entity = Entity;
110    type Document = Document;
111    type Chunk = TextChunk;
112    type Error = GraphRAGError;
113
114    fn store_entity(&mut self, entity: Self::Entity) -> Result<String> {
115        let id = entity.id.to_string();
116        self.entities.insert(id.clone(), entity);
117        Ok(id)
118    }
119
120    fn retrieve_entity(&self, id: &str) -> Result<Option<Self::Entity>> {
121        Ok(self.entities.get(id).cloned())
122    }
123
124    fn store_document(&mut self, document: Self::Document) -> Result<String> {
125        let id = document.id.to_string();
126        self.documents.insert(id.clone(), document);
127        Ok(id)
128    }
129
130    fn retrieve_document(&self, id: &str) -> Result<Option<Self::Document>> {
131        Ok(self.documents.get(id).cloned())
132    }
133
134    fn store_chunk(&mut self, chunk: Self::Chunk) -> Result<String> {
135        let id = chunk.id.to_string();
136        self.chunks.insert(id.clone(), chunk);
137        Ok(id)
138    }
139
140    fn retrieve_chunk(&self, id: &str) -> Result<Option<Self::Chunk>> {
141        Ok(self.chunks.get(id).cloned())
142    }
143
144    fn list_entities(&self) -> Result<Vec<String>> {
145        Ok(self.entities.keys().cloned().collect())
146    }
147
148    fn store_entities_batch(&mut self, entities: Vec<Self::Entity>) -> Result<Vec<String>> {
149        let ids: Vec<String> = entities.iter().map(|e| e.id.to_string()).collect();
150        for entity in entities {
151            self.entities.insert(entity.id.to_string(), entity);
152        }
153        Ok(ids)
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160    use indexmap::IndexMap;
161
162    #[test]
163    fn test_memory_storage() {
164        let mut storage = MemoryStorage::new();
165
166        let doc = Document {
167            id: crate::core::DocumentId::new("doc1".to_string()),
168            title: "Test Document".to_string(),
169            content: "Test content".to_string(),
170            metadata: IndexMap::new(),
171            chunks: Vec::new(),
172        };
173
174        storage.store_document("doc1".to_string(), doc).unwrap();
175        assert_eq!(storage.stats().document_count, 1);
176
177        let retrieved = storage.get_document("doc1");
178        assert!(retrieved.is_some());
179    }
180}