graphrag_core/persistence/
workspace.rs1use crate::core::{GraphRAGError, KnowledgeGraph, Result};
6use std::fs;
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
11pub struct WorkspaceMetadata {
12 pub name: String,
14 pub created_at: chrono::DateTime<chrono::Utc>,
16 pub modified_at: chrono::DateTime<chrono::Utc>,
18 pub entity_count: usize,
20 pub relationship_count: usize,
22 pub document_count: usize,
24 pub chunk_count: usize,
26 pub format_version: String,
28 pub description: Option<String>,
30}
31
32impl WorkspaceMetadata {
33 pub fn new(name: String) -> Self {
35 let now = chrono::Utc::now();
36 Self {
37 name,
38 created_at: now,
39 modified_at: now,
40 entity_count: 0,
41 relationship_count: 0,
42 document_count: 0,
43 chunk_count: 0,
44 format_version: "1.0".to_string(),
45 description: None,
46 }
47 }
48
49 pub fn update_from_graph(&mut self, graph: &KnowledgeGraph) {
51 self.entity_count = graph.entity_count();
52 self.relationship_count = graph.relationship_count();
53 self.document_count = graph.document_count();
54 self.chunk_count = graph.chunks().count();
55 self.modified_at = chrono::Utc::now();
56 }
57}
58
59#[derive(Debug, Clone)]
61pub struct WorkspaceInfo {
62 pub name: String,
64 pub path: PathBuf,
66 pub metadata: WorkspaceMetadata,
68 pub size_bytes: u64,
70}
71
72#[derive(Debug, Clone)]
74pub struct WorkspaceManager {
75 base_dir: PathBuf,
77}
78
79impl WorkspaceManager {
80 pub fn new<P: AsRef<Path>>(base_dir: P) -> Result<Self> {
92 let base_dir = base_dir.as_ref().to_path_buf();
93
94 if !base_dir.exists() {
96 fs::create_dir_all(&base_dir)?;
97 #[cfg(feature = "tracing")]
98 tracing::info!("Created workspace base directory: {:?}", base_dir);
99 }
100
101 Ok(Self { base_dir })
102 }
103
104 pub fn workspace_path(&self, workspace_name: &str) -> PathBuf {
106 self.base_dir.join(workspace_name)
107 }
108
109 pub fn workspace_exists(&self, workspace_name: &str) -> bool {
111 self.workspace_path(workspace_name).exists()
112 }
113
114 pub fn create_workspace(&self, workspace_name: &str) -> Result<()> {
116 let workspace_path = self.workspace_path(workspace_name);
117
118 if workspace_path.exists() {
119 return Err(GraphRAGError::Config {
120 message: format!("Workspace '{}' already exists", workspace_name),
121 });
122 }
123
124 fs::create_dir_all(&workspace_path)?;
126
127 let metadata = WorkspaceMetadata::new(workspace_name.to_string());
129 self.save_metadata(&metadata, workspace_name)?;
130
131 #[cfg(feature = "tracing")]
132 tracing::info!("Created workspace: {}", workspace_name);
133
134 Ok(())
135 }
136
137 pub fn delete_workspace(&self, workspace_name: &str) -> Result<()> {
139 let workspace_path = self.workspace_path(workspace_name);
140
141 if !workspace_path.exists() {
142 return Err(GraphRAGError::Config {
143 message: format!("Workspace '{}' does not exist", workspace_name),
144 });
145 }
146
147 fs::remove_dir_all(&workspace_path)?;
148
149 #[cfg(feature = "tracing")]
150 tracing::info!("Deleted workspace: {}", workspace_name);
151
152 Ok(())
153 }
154
155 pub fn list_workspaces(&self) -> Result<Vec<WorkspaceInfo>> {
157 let mut workspaces = Vec::new();
158
159 if !self.base_dir.exists() {
160 return Ok(workspaces);
161 }
162
163 for entry in fs::read_dir(&self.base_dir)? {
164 let entry = entry?;
165 let path = entry.path();
166
167 if path.is_dir() {
168 let workspace_name = path
169 .file_name()
170 .and_then(|n| n.to_str())
171 .unwrap_or("unknown")
172 .to_string();
173
174 let metadata = self.load_metadata(&workspace_name).unwrap_or_else(|_| {
176 WorkspaceMetadata::new(workspace_name.clone())
177 });
178
179 let size_bytes = Self::calculate_dir_size(&path).unwrap_or(0);
181
182 workspaces.push(WorkspaceInfo {
183 name: workspace_name,
184 path,
185 metadata,
186 size_bytes,
187 });
188 }
189 }
190
191 workspaces.sort_by(|a, b| b.metadata.modified_at.cmp(&a.metadata.modified_at));
193
194 Ok(workspaces)
195 }
196
197 pub fn save_graph(&self, graph: &KnowledgeGraph, workspace_name: &str) -> Result<()> {
199 if !self.workspace_exists(workspace_name) {
201 self.create_workspace(workspace_name)?;
202 }
203
204 let workspace_path = self.workspace_path(workspace_name);
205
206 let json_path = workspace_path.join("graph.json");
208 graph.save_to_json(json_path.to_str().unwrap())?;
209
210 #[cfg(feature = "persistent-storage")]
212 {
213 use super::parquet::ParquetPersistence;
214 let parquet = ParquetPersistence::new(workspace_path.clone())?;
215 parquet.save_graph(graph)?;
216 }
217
218 let mut metadata = self.load_metadata(workspace_name).unwrap_or_else(|_| {
220 WorkspaceMetadata::new(workspace_name.to_string())
221 });
222 metadata.update_from_graph(graph);
223 self.save_metadata(&metadata, workspace_name)?;
224
225 #[cfg(feature = "tracing")]
226 tracing::info!("Saved graph to workspace: {}", workspace_name);
227
228 Ok(())
229 }
230
231 pub fn load_graph(&self, workspace_name: &str) -> Result<KnowledgeGraph> {
233 if !self.workspace_exists(workspace_name) {
234 return Err(GraphRAGError::Config {
235 message: format!("Workspace '{}' does not exist", workspace_name),
236 });
237 }
238
239 let workspace_path = self.workspace_path(workspace_name);
240
241 #[cfg(feature = "persistent-storage")]
243 {
244 use super::parquet::ParquetPersistence;
245 let parquet = ParquetPersistence::new(workspace_path.clone())?;
246 if let Ok(graph) = parquet.load_graph() {
247 #[cfg(feature = "tracing")]
248 tracing::info!("Loaded graph from Parquet: {}", workspace_name);
249 return Ok(graph);
250 }
251 }
252
253 let json_path = workspace_path.join("graph.json");
255 if json_path.exists() {
256 #[cfg(feature = "tracing")]
257 tracing::info!("Loading graph from JSON fallback: {}", workspace_name);
258 return KnowledgeGraph::load_from_json(json_path.to_str().unwrap());
259 }
260
261 Err(GraphRAGError::Config {
262 message: format!("No graph data found in workspace '{}'", workspace_name),
263 })
264 }
265
266 fn save_metadata(&self, metadata: &WorkspaceMetadata, workspace_name: &str) -> Result<()> {
268 let workspace_path = self.workspace_path(workspace_name);
269 let metadata_path = workspace_path.join("metadata.toml");
270
271 let toml_string = toml::to_string_pretty(metadata).map_err(|e| GraphRAGError::Config {
272 message: format!("Failed to serialize metadata: {}", e),
273 })?;
274
275 fs::write(metadata_path, toml_string)?;
276
277 Ok(())
278 }
279
280 fn load_metadata(&self, workspace_name: &str) -> Result<WorkspaceMetadata> {
282 let workspace_path = self.workspace_path(workspace_name);
283 let metadata_path = workspace_path.join("metadata.toml");
284
285 if !metadata_path.exists() {
286 return Err(GraphRAGError::Config {
287 message: format!("Metadata not found for workspace '{}'", workspace_name),
288 });
289 }
290
291 let toml_string = fs::read_to_string(metadata_path)?;
292 let metadata: WorkspaceMetadata = toml::from_str(&toml_string).map_err(|e| {
293 GraphRAGError::Config {
294 message: format!("Failed to parse metadata: {}", e),
295 }
296 })?;
297
298 Ok(metadata)
299 }
300
301 fn calculate_dir_size(path: &Path) -> Result<u64> {
303 let mut total_size = 0u64;
304
305 if path.is_dir() {
306 for entry in fs::read_dir(path)? {
307 let entry = entry?;
308 let path = entry.path();
309 if path.is_dir() {
310 total_size += Self::calculate_dir_size(&path)?;
311 } else {
312 total_size += entry.metadata()?.len();
313 }
314 }
315 } else {
316 total_size = fs::metadata(path)?.len();
317 }
318
319 Ok(total_size)
320 }
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326 use tempfile::TempDir;
327
328 #[test]
329 fn test_workspace_manager_creation() {
330 let temp_dir = TempDir::new().unwrap();
331 let workspace = WorkspaceManager::new(temp_dir.path()).unwrap();
332 assert!(workspace.base_dir.exists());
333 }
334
335 #[test]
336 fn test_create_and_list_workspaces() {
337 let temp_dir = TempDir::new().unwrap();
338 let workspace = WorkspaceManager::new(temp_dir.path()).unwrap();
339
340 workspace.create_workspace("test1").unwrap();
341 workspace.create_workspace("test2").unwrap();
342
343 let workspaces = workspace.list_workspaces().unwrap();
344 assert_eq!(workspaces.len(), 2);
345 }
346
347 #[test]
348 fn test_delete_workspace() {
349 let temp_dir = TempDir::new().unwrap();
350 let workspace = WorkspaceManager::new(temp_dir.path()).unwrap();
351
352 workspace.create_workspace("test").unwrap();
353 assert!(workspace.workspace_exists("test"));
354
355 workspace.delete_workspace("test").unwrap();
356 assert!(!workspace.workspace_exists("test"));
357 }
358
359 #[test]
360 fn test_save_and_load_graph() {
361 let temp_dir = TempDir::new().unwrap();
362 let workspace = WorkspaceManager::new(temp_dir.path()).unwrap();
363
364 let graph = KnowledgeGraph::new();
365 workspace.save_graph(&graph, "test").unwrap();
366
367 let loaded_graph = workspace.load_graph("test").unwrap();
368 assert_eq!(loaded_graph.entity_count(), 0);
369 }
370}