Skip to main content

codemem_storage/
lib.rs

1//! codemem-storage: SQLite persistence layer for Codemem.
2//!
3//! Uses rusqlite with bundled SQLite, WAL mode, and embedded schema.
4
5use codemem_core::{CodememError, MemoryNode, MemoryType};
6use rusqlite::Connection;
7use std::collections::HashMap;
8use std::path::Path;
9use std::sync::Mutex;
10
11mod backend;
12pub mod graph;
13mod graph_persistence;
14mod memory;
15mod migrations;
16mod queries;
17pub mod vector;
18
19pub use graph::GraphEngine;
20pub use graph::RawGraphMetrics;
21pub use vector::HnswIndex;
22
23/// SQLite-backed storage for Codemem memories, embeddings, and graph data.
24///
25/// Wraps `rusqlite::Connection` in a `Mutex` to satisfy `Send + Sync` bounds
26/// required by the `StorageBackend` trait.
27pub struct Storage {
28    conn: Mutex<Connection>,
29}
30
31impl Storage {
32    /// Get a lock on the underlying connection.
33    pub(crate) fn conn(&self) -> Result<std::sync::MutexGuard<'_, Connection>, CodememError> {
34        self.conn
35            .lock()
36            .map_err(|e| CodememError::LockPoisoned(format!("Storage mutex: {e}")))
37    }
38
39    /// Apply standard pragmas to a connection.
40    ///
41    /// `cache_size_mb` and `busy_timeout_secs` override the defaults (64 MB / 5 s)
42    /// when provided — typically sourced from `StorageConfig`.
43    fn apply_pragmas(
44        conn: &Connection,
45        cache_size_mb: Option<u32>,
46        busy_timeout_secs: Option<u64>,
47    ) -> Result<(), CodememError> {
48        // WAL mode for concurrent reads
49        conn.pragma_update(None, "journal_mode", "WAL")
50            .map_err(|e| CodememError::Storage(e.to_string()))?;
51        // Cache size (negative value = KiB in SQLite)
52        let cache_kb = i64::from(cache_size_mb.unwrap_or(64)) * 1000;
53        conn.pragma_update(None, "cache_size", -cache_kb)
54            .map_err(|e| CodememError::Storage(e.to_string()))?;
55        // Foreign keys ON
56        conn.pragma_update(None, "foreign_keys", "ON")
57            .map_err(|e| CodememError::Storage(e.to_string()))?;
58        // NORMAL sync (good balance of safety vs speed)
59        conn.pragma_update(None, "synchronous", "NORMAL")
60            .map_err(|e| CodememError::Storage(e.to_string()))?;
61        // 256MB mmap for faster reads
62        conn.pragma_update(None, "mmap_size", 268435456i64)
63            .map_err(|e| CodememError::Storage(e.to_string()))?;
64        // Temp tables in memory
65        conn.pragma_update(None, "temp_store", "MEMORY")
66            .map_err(|e| CodememError::Storage(e.to_string()))?;
67        // Busy timeout
68        let timeout = busy_timeout_secs.unwrap_or(5);
69        conn.busy_timeout(std::time::Duration::from_secs(timeout))
70            .map_err(|e| CodememError::Storage(e.to_string()))?;
71        Ok(())
72    }
73
74    /// Open (or create) a Codemem database at the given path.
75    pub fn open(path: &Path) -> Result<Self, CodememError> {
76        Self::open_with_config(path, None, None)
77    }
78
79    /// Open a database with explicit storage configuration overrides.
80    pub fn open_with_config(
81        path: &Path,
82        cache_size_mb: Option<u32>,
83        busy_timeout_secs: Option<u64>,
84    ) -> Result<Self, CodememError> {
85        let conn = Connection::open(path).map_err(|e| CodememError::Storage(e.to_string()))?;
86        Self::apply_pragmas(&conn, cache_size_mb, busy_timeout_secs)?;
87        migrations::run_migrations(&conn)?;
88        Ok(Self {
89            conn: Mutex::new(conn),
90        })
91    }
92
93    /// Open an existing database without running migrations.
94    ///
95    /// Use this in lifecycle hooks (context, prompt, summarize) where the
96    /// database has already been migrated by `codemem init` or `codemem serve`,
97    /// to avoid SQLITE_BUSY race conditions with the concurrent MCP server.
98    pub fn open_without_migrations(path: &Path) -> Result<Self, CodememError> {
99        Self::open_without_migrations_with_config(path, None, None)
100    }
101
102    /// Open without migrations, with explicit storage configuration overrides.
103    pub fn open_without_migrations_with_config(
104        path: &Path,
105        cache_size_mb: Option<u32>,
106        busy_timeout_secs: Option<u64>,
107    ) -> Result<Self, CodememError> {
108        let conn = Connection::open(path).map_err(|e| CodememError::Storage(e.to_string()))?;
109        Self::apply_pragmas(&conn, cache_size_mb, busy_timeout_secs)?;
110        Ok(Self {
111            conn: Mutex::new(conn),
112        })
113    }
114
115    /// Open an in-memory database (for testing).
116    pub fn open_in_memory() -> Result<Self, CodememError> {
117        let conn =
118            Connection::open_in_memory().map_err(|e| CodememError::Storage(e.to_string()))?;
119        Self::apply_pragmas(&conn, None, None)?;
120        migrations::run_migrations(&conn)?;
121        Ok(Self {
122            conn: Mutex::new(conn),
123        })
124    }
125
126    /// Compute SHA-256 hash of content for deduplication.
127    pub fn content_hash(content: &str) -> String {
128        codemem_core::content_hash(content)
129    }
130}
131
132/// Internal row struct for memory deserialization.
133pub(crate) struct MemoryRow {
134    pub(crate) id: String,
135    pub(crate) content: String,
136    pub(crate) memory_type: String,
137    pub(crate) importance: f64,
138    pub(crate) confidence: f64,
139    pub(crate) access_count: i64,
140    pub(crate) content_hash: String,
141    pub(crate) tags: String,
142    pub(crate) metadata: String,
143    pub(crate) namespace: Option<String>,
144    pub(crate) created_at: i64,
145    pub(crate) updated_at: i64,
146    pub(crate) last_accessed_at: i64,
147}
148
149impl MemoryRow {
150    pub(crate) fn into_memory_node(self) -> Result<MemoryNode, CodememError> {
151        let memory_type: MemoryType = self.memory_type.parse()?;
152        let tags: Vec<String> = serde_json::from_str(&self.tags).unwrap_or_else(|e| {
153            tracing::warn!(id = %self.id, error = %e, "Malformed tags JSON for memory");
154            Vec::new()
155        });
156        let metadata: HashMap<String, serde_json::Value> = serde_json::from_str(&self.metadata)
157            .unwrap_or_else(|e| {
158                tracing::warn!(id = %self.id, error = %e, "Malformed metadata JSON for memory");
159                HashMap::new()
160            });
161
162        let created_at = chrono::DateTime::from_timestamp(self.created_at, 0)
163            .unwrap_or_else(|| {
164                tracing::warn!(id = %self.id, ts = self.created_at, "Invalid created_at timestamp");
165                chrono::DateTime::<chrono::Utc>::default()
166            })
167            .with_timezone(&chrono::Utc);
168        let updated_at = chrono::DateTime::from_timestamp(self.updated_at, 0)
169            .unwrap_or_else(|| {
170                tracing::warn!(id = %self.id, ts = self.updated_at, "Invalid updated_at timestamp");
171                chrono::DateTime::<chrono::Utc>::default()
172            })
173            .with_timezone(&chrono::Utc);
174        let last_accessed_at = chrono::DateTime::from_timestamp(self.last_accessed_at, 0)
175            .unwrap_or_else(|| {
176                tracing::warn!(id = %self.id, ts = self.last_accessed_at, "Invalid last_accessed_at timestamp");
177                chrono::DateTime::<chrono::Utc>::default()
178            })
179            .with_timezone(&chrono::Utc);
180
181        Ok(MemoryNode {
182            id: self.id,
183            content: self.content,
184            memory_type,
185            importance: self.importance,
186            confidence: self.confidence,
187            access_count: u32::try_from(self.access_count).unwrap_or(u32::MAX),
188            content_hash: self.content_hash,
189            tags,
190            metadata,
191            namespace: self.namespace,
192            created_at,
193            updated_at,
194            last_accessed_at,
195        })
196    }
197}