aurora_semantic/storage/
mod.rs

1//! Storage module for persisting and loading indexes.
2//!
3//! This module provides abstractions for storing workspace indexes,
4//! document metadata, and embeddings to disk.
5
6mod disk;
7mod metadata;
8
9pub use disk::DiskStorage;
10pub use metadata::WorkspaceMetadata;
11
12use std::path::PathBuf;
13
14use crate::error::Result;
15use crate::types::{Chunk, Document, WorkspaceId, WorkspaceStats};
16
17/// Trait for storage backends.
18pub trait Storage: Send + Sync {
19    /// Initialize storage for a workspace.
20    fn init_workspace(&self, workspace_id: &WorkspaceId, root_path: &PathBuf) -> Result<()>;
21
22    /// Check if a workspace exists.
23    fn workspace_exists(&self, workspace_id: &WorkspaceId) -> bool;
24
25    /// Load workspace metadata.
26    fn load_workspace_metadata(&self, workspace_id: &WorkspaceId) -> Result<WorkspaceMetadata>;
27
28    /// Save workspace metadata.
29    fn save_workspace_metadata(
30        &self,
31        workspace_id: &WorkspaceId,
32        metadata: &WorkspaceMetadata,
33    ) -> Result<()>;
34
35    /// Save documents for a workspace.
36    fn save_documents(&self, workspace_id: &WorkspaceId, documents: &[Document]) -> Result<()>;
37
38    /// Load all documents for a workspace.
39    fn load_documents(&self, workspace_id: &WorkspaceId) -> Result<Vec<Document>>;
40
41    /// Save chunks for a workspace.
42    fn save_chunks(&self, workspace_id: &WorkspaceId, chunks: &[Chunk]) -> Result<()>;
43
44    /// Load all chunks for a workspace.
45    fn load_chunks(&self, workspace_id: &WorkspaceId) -> Result<Vec<Chunk>>;
46
47    /// Save embeddings for chunks.
48    fn save_embeddings(
49        &self,
50        workspace_id: &WorkspaceId,
51        embeddings: &[(String, Vec<f32>)], // (chunk_id, embedding)
52    ) -> Result<()>;
53
54    /// Load embeddings for a workspace.
55    fn load_embeddings(&self, workspace_id: &WorkspaceId) -> Result<Vec<(String, Vec<f32>)>>;
56
57    /// Get the path to the tantivy index directory.
58    fn tantivy_index_path(&self, workspace_id: &WorkspaceId) -> PathBuf;
59
60    /// Get the path to the vector index.
61    fn vector_index_path(&self, workspace_id: &WorkspaceId) -> PathBuf;
62
63    /// Delete a workspace and all its data.
64    fn delete_workspace(&self, workspace_id: &WorkspaceId) -> Result<()>;
65
66    /// List all stored workspaces.
67    fn list_workspaces(&self) -> Result<Vec<WorkspaceId>>;
68
69    /// Get workspace statistics.
70    fn get_workspace_stats(&self, workspace_id: &WorkspaceId) -> Result<WorkspaceStats>;
71
72    /// Flush any pending writes to disk.
73    fn flush(&self) -> Result<()>;
74}
75
76/// Options for storage operations.
77#[derive(Debug, Clone)]
78pub struct StorageOptions {
79    /// Use memory-mapped files where possible.
80    pub use_mmap: bool,
81    /// Compress data on disk.
82    pub compress: bool,
83    /// Buffer size for writes.
84    pub buffer_size: usize,
85}
86
87impl Default for StorageOptions {
88    fn default() -> Self {
89        Self {
90            use_mmap: true,
91            compress: true,
92            buffer_size: 64 * 1024, // 64KB
93        }
94    }
95}