Skip to main content

graphrag_core/persistence/
workspace.rs

1//! Workspace management for GraphRAG persistence
2//!
3//! Provides multi-workspace support with checkpointing and metadata tracking.
4
5use crate::core::{GraphRAGError, KnowledgeGraph, Result};
6use std::fs;
7use std::path::{Path, PathBuf};
8
9/// Workspace metadata
10#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
11pub struct WorkspaceMetadata {
12    /// Workspace name
13    pub name: String,
14    /// Creation timestamp
15    pub created_at: chrono::DateTime<chrono::Utc>,
16    /// Last modified timestamp
17    pub modified_at: chrono::DateTime<chrono::Utc>,
18    /// Number of entities
19    pub entity_count: usize,
20    /// Number of relationships
21    pub relationship_count: usize,
22    /// Number of documents
23    pub document_count: usize,
24    /// Number of chunks
25    pub chunk_count: usize,
26    /// Storage format version
27    pub format_version: String,
28    /// Description (optional)
29    pub description: Option<String>,
30}
31
32impl WorkspaceMetadata {
33    /// Create new workspace metadata
34    pub fn new(name: String) -> Self {
35        let now = chrono::Utc::now();
36        Self {
37            name,
38            created_at: now,
39            modified_at: now,
40            entity_count: 0,
41            relationship_count: 0,
42            document_count: 0,
43            chunk_count: 0,
44            format_version: "1.0".to_string(),
45            description: None,
46        }
47    }
48
49    /// Update counts from knowledge graph
50    pub fn update_from_graph(&mut self, graph: &KnowledgeGraph) {
51        self.entity_count = graph.entity_count();
52        self.relationship_count = graph.relationship_count();
53        self.document_count = graph.document_count();
54        self.chunk_count = graph.chunks().count();
55        self.modified_at = chrono::Utc::now();
56    }
57}
58
59/// Workspace information (lightweight version for listing)
60#[derive(Debug, Clone)]
61pub struct WorkspaceInfo {
62    /// Workspace name
63    pub name: String,
64    /// Path to workspace directory
65    pub path: PathBuf,
66    /// Workspace metadata
67    pub metadata: WorkspaceMetadata,
68    /// Total size in bytes
69    pub size_bytes: u64,
70}
71
72/// Workspace manager for multi-workspace support
73#[derive(Debug, Clone)]
74pub struct WorkspaceManager {
75    /// Base directory for all workspaces
76    base_dir: PathBuf,
77}
78
79impl WorkspaceManager {
80    /// Create a new workspace manager
81    ///
82    /// # Arguments
83    /// * `base_dir` - Base directory path (e.g., "./workspace")
84    ///
85    /// # Example
86    /// ```no_run
87    /// use graphrag_core::persistence::WorkspaceManager;
88    ///
89    /// let workspace = WorkspaceManager::new("./workspace").unwrap();
90    /// ```
91    pub fn new<P: AsRef<Path>>(base_dir: P) -> Result<Self> {
92        let base_dir = base_dir.as_ref().to_path_buf();
93
94        // Create base directory if it doesn't exist
95        if !base_dir.exists() {
96            fs::create_dir_all(&base_dir)?;
97            #[cfg(feature = "tracing")]
98            tracing::info!("Created workspace base directory: {:?}", base_dir);
99        }
100
101        Ok(Self { base_dir })
102    }
103
104    /// Get workspace directory path
105    pub fn workspace_path(&self, workspace_name: &str) -> PathBuf {
106        self.base_dir.join(workspace_name)
107    }
108
109    /// Check if workspace exists
110    pub fn workspace_exists(&self, workspace_name: &str) -> bool {
111        self.workspace_path(workspace_name).exists()
112    }
113
114    /// Create a new workspace
115    pub fn create_workspace(&self, workspace_name: &str) -> Result<()> {
116        let workspace_path = self.workspace_path(workspace_name);
117
118        if workspace_path.exists() {
119            return Err(GraphRAGError::Config {
120                message: format!("Workspace '{}' already exists", workspace_name),
121            });
122        }
123
124        // Create workspace directory
125        fs::create_dir_all(&workspace_path)?;
126
127        // Create metadata
128        let metadata = WorkspaceMetadata::new(workspace_name.to_string());
129        self.save_metadata(&metadata, workspace_name)?;
130
131        #[cfg(feature = "tracing")]
132        tracing::info!("Created workspace: {}", workspace_name);
133
134        Ok(())
135    }
136
137    /// Delete a workspace
138    pub fn delete_workspace(&self, workspace_name: &str) -> Result<()> {
139        let workspace_path = self.workspace_path(workspace_name);
140
141        if !workspace_path.exists() {
142            return Err(GraphRAGError::Config {
143                message: format!("Workspace '{}' does not exist", workspace_name),
144            });
145        }
146
147        fs::remove_dir_all(&workspace_path)?;
148
149        #[cfg(feature = "tracing")]
150        tracing::info!("Deleted workspace: {}", workspace_name);
151
152        Ok(())
153    }
154
155    /// List all workspaces
156    pub fn list_workspaces(&self) -> Result<Vec<WorkspaceInfo>> {
157        let mut workspaces = Vec::new();
158
159        if !self.base_dir.exists() {
160            return Ok(workspaces);
161        }
162
163        for entry in fs::read_dir(&self.base_dir)? {
164            let entry = entry?;
165            let path = entry.path();
166
167            if path.is_dir() {
168                let workspace_name = path
169                    .file_name()
170                    .and_then(|n| n.to_str())
171                    .unwrap_or("unknown")
172                    .to_string();
173
174                // Load metadata
175                let metadata = self.load_metadata(&workspace_name).unwrap_or_else(|_| {
176                    WorkspaceMetadata::new(workspace_name.clone())
177                });
178
179                // Calculate size
180                let size_bytes = Self::calculate_dir_size(&path).unwrap_or(0);
181
182                workspaces.push(WorkspaceInfo {
183                    name: workspace_name,
184                    path,
185                    metadata,
186                    size_bytes,
187                });
188            }
189        }
190
191        // Sort by modification time (newest first)
192        workspaces.sort_by(|a, b| b.metadata.modified_at.cmp(&a.metadata.modified_at));
193
194        Ok(workspaces)
195    }
196
197    /// Save knowledge graph to workspace
198    pub fn save_graph(&self, graph: &KnowledgeGraph, workspace_name: &str) -> Result<()> {
199        // Create workspace if it doesn't exist
200        if !self.workspace_exists(workspace_name) {
201            self.create_workspace(workspace_name)?;
202        }
203
204        let workspace_path = self.workspace_path(workspace_name);
205
206        // Save to JSON (always available as fallback)
207        let json_path = workspace_path.join("graph.json");
208        graph.save_to_json(json_path.to_str().unwrap())?;
209
210        // Save to Parquet (if feature enabled)
211        #[cfg(feature = "persistent-storage")]
212        {
213            use super::parquet::ParquetPersistence;
214            let parquet = ParquetPersistence::new(workspace_path.clone())?;
215            parquet.save_graph(graph)?;
216        }
217
218        // Update metadata
219        let mut metadata = self.load_metadata(workspace_name).unwrap_or_else(|_| {
220            WorkspaceMetadata::new(workspace_name.to_string())
221        });
222        metadata.update_from_graph(graph);
223        self.save_metadata(&metadata, workspace_name)?;
224
225        #[cfg(feature = "tracing")]
226        tracing::info!("Saved graph to workspace: {}", workspace_name);
227
228        Ok(())
229    }
230
231    /// Load knowledge graph from workspace
232    pub fn load_graph(&self, workspace_name: &str) -> Result<KnowledgeGraph> {
233        if !self.workspace_exists(workspace_name) {
234            return Err(GraphRAGError::Config {
235                message: format!("Workspace '{}' does not exist", workspace_name),
236            });
237        }
238
239        let workspace_path = self.workspace_path(workspace_name);
240
241        // Try loading from Parquet first (if feature enabled)
242        #[cfg(feature = "persistent-storage")]
243        {
244            use super::parquet::ParquetPersistence;
245            let parquet = ParquetPersistence::new(workspace_path.clone())?;
246            if let Ok(graph) = parquet.load_graph() {
247                #[cfg(feature = "tracing")]
248                tracing::info!("Loaded graph from Parquet: {}", workspace_name);
249                return Ok(graph);
250            }
251        }
252
253        // Fallback to JSON
254        let json_path = workspace_path.join("graph.json");
255        if json_path.exists() {
256            #[cfg(feature = "tracing")]
257            tracing::info!("Loading graph from JSON fallback: {}", workspace_name);
258            return KnowledgeGraph::load_from_json(json_path.to_str().unwrap());
259        }
260
261        Err(GraphRAGError::Config {
262            message: format!("No graph data found in workspace '{}'", workspace_name),
263        })
264    }
265
266    /// Save workspace metadata
267    fn save_metadata(&self, metadata: &WorkspaceMetadata, workspace_name: &str) -> Result<()> {
268        let workspace_path = self.workspace_path(workspace_name);
269        let metadata_path = workspace_path.join("metadata.toml");
270
271        let toml_string = toml::to_string_pretty(metadata).map_err(|e| GraphRAGError::Config {
272            message: format!("Failed to serialize metadata: {}", e),
273        })?;
274
275        fs::write(metadata_path, toml_string)?;
276
277        Ok(())
278    }
279
280    /// Load workspace metadata
281    fn load_metadata(&self, workspace_name: &str) -> Result<WorkspaceMetadata> {
282        let workspace_path = self.workspace_path(workspace_name);
283        let metadata_path = workspace_path.join("metadata.toml");
284
285        if !metadata_path.exists() {
286            return Err(GraphRAGError::Config {
287                message: format!("Metadata not found for workspace '{}'", workspace_name),
288            });
289        }
290
291        let toml_string = fs::read_to_string(metadata_path)?;
292        let metadata: WorkspaceMetadata = toml::from_str(&toml_string).map_err(|e| {
293            GraphRAGError::Config {
294                message: format!("Failed to parse metadata: {}", e),
295            }
296        })?;
297
298        Ok(metadata)
299    }
300
301    /// Calculate directory size recursively
302    fn calculate_dir_size(path: &Path) -> Result<u64> {
303        let mut total_size = 0u64;
304
305        if path.is_dir() {
306            for entry in fs::read_dir(path)? {
307                let entry = entry?;
308                let path = entry.path();
309                if path.is_dir() {
310                    total_size += Self::calculate_dir_size(&path)?;
311                } else {
312                    total_size += entry.metadata()?.len();
313                }
314            }
315        } else {
316            total_size = fs::metadata(path)?.len();
317        }
318
319        Ok(total_size)
320    }
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326    use tempfile::TempDir;
327
328    #[test]
329    fn test_workspace_manager_creation() {
330        let temp_dir = TempDir::new().unwrap();
331        let workspace = WorkspaceManager::new(temp_dir.path()).unwrap();
332        assert!(workspace.base_dir.exists());
333    }
334
335    #[test]
336    fn test_create_and_list_workspaces() {
337        let temp_dir = TempDir::new().unwrap();
338        let workspace = WorkspaceManager::new(temp_dir.path()).unwrap();
339
340        workspace.create_workspace("test1").unwrap();
341        workspace.create_workspace("test2").unwrap();
342
343        let workspaces = workspace.list_workspaces().unwrap();
344        assert_eq!(workspaces.len(), 2);
345    }
346
347    #[test]
348    fn test_delete_workspace() {
349        let temp_dir = TempDir::new().unwrap();
350        let workspace = WorkspaceManager::new(temp_dir.path()).unwrap();
351
352        workspace.create_workspace("test").unwrap();
353        assert!(workspace.workspace_exists("test"));
354
355        workspace.delete_workspace("test").unwrap();
356        assert!(!workspace.workspace_exists("test"));
357    }
358
359    #[test]
360    fn test_save_and_load_graph() {
361        let temp_dir = TempDir::new().unwrap();
362        let workspace = WorkspaceManager::new(temp_dir.path()).unwrap();
363
364        let graph = KnowledgeGraph::new();
365        workspace.save_graph(&graph, "test").unwrap();
366
367        let loaded_graph = workspace.load_graph("test").unwrap();
368        assert_eq!(loaded_graph.entity_count(), 0);
369    }
370}