Skip to main content

codemem_storage/
lib.rs

1//! codemem-storage: SQLite persistence layer for Codemem.
2//!
3//! Uses rusqlite with bundled SQLite, WAL mode, and embedded schema.
4
5use codemem_core::{CodememError, MemoryNode, MemoryType};
6use rusqlite::Connection;
7use sha2::{Digest, Sha256};
8use std::collections::HashMap;
9use std::path::Path;
10use std::sync::Mutex;
11
12mod backend;
13pub mod graph;
14mod graph_persistence;
15mod memory;
16mod migrations;
17mod queries;
18pub mod vector;
19
20pub use graph::GraphEngine;
21pub use graph::RawGraphMetrics;
22pub use vector::HnswIndex;
23
24/// SQLite-backed storage for Codemem memories, embeddings, and graph data.
25///
26/// Wraps `rusqlite::Connection` in a `Mutex` to satisfy `Send + Sync` bounds
27/// required by the `StorageBackend` trait.
28pub struct Storage {
29    conn: Mutex<Connection>,
30}
31
32impl Storage {
33    /// Get a lock on the underlying connection.
34    pub(crate) fn conn(&self) -> Result<std::sync::MutexGuard<'_, Connection>, CodememError> {
35        self.conn
36            .lock()
37            .map_err(|e| CodememError::LockPoisoned(format!("Storage mutex: {e}")))
38    }
39
40    /// Apply standard pragmas to a connection.
41    fn apply_pragmas(conn: &Connection) -> Result<(), CodememError> {
42        // WAL mode for concurrent reads
43        conn.pragma_update(None, "journal_mode", "WAL")
44            .map_err(|e| CodememError::Storage(e.to_string()))?;
45        // 64MB cache
46        conn.pragma_update(None, "cache_size", -64000i64)
47            .map_err(|e| CodememError::Storage(e.to_string()))?;
48        // Foreign keys ON
49        conn.pragma_update(None, "foreign_keys", "ON")
50            .map_err(|e| CodememError::Storage(e.to_string()))?;
51        // NORMAL sync (good balance of safety vs speed)
52        conn.pragma_update(None, "synchronous", "NORMAL")
53            .map_err(|e| CodememError::Storage(e.to_string()))?;
54        // 256MB mmap for faster reads
55        conn.pragma_update(None, "mmap_size", 268435456i64)
56            .map_err(|e| CodememError::Storage(e.to_string()))?;
57        // Temp tables in memory
58        conn.pragma_update(None, "temp_store", "MEMORY")
59            .map_err(|e| CodememError::Storage(e.to_string()))?;
60        // 5s busy timeout
61        conn.busy_timeout(std::time::Duration::from_secs(5))
62            .map_err(|e| CodememError::Storage(e.to_string()))?;
63        Ok(())
64    }
65
66    /// Open (or create) a Codemem database at the given path.
67    pub fn open(path: &Path) -> Result<Self, CodememError> {
68        let conn = Connection::open(path).map_err(|e| CodememError::Storage(e.to_string()))?;
69        Self::apply_pragmas(&conn)?;
70        migrations::run_migrations(&conn)?;
71        Ok(Self {
72            conn: Mutex::new(conn),
73        })
74    }
75
76    /// Open an existing database without running migrations.
77    ///
78    /// Use this in lifecycle hooks (context, prompt, summarize) where the
79    /// database has already been migrated by `codemem init` or `codemem serve`,
80    /// to avoid SQLITE_BUSY race conditions with the concurrent MCP server.
81    pub fn open_without_migrations(path: &Path) -> Result<Self, CodememError> {
82        let conn = Connection::open(path).map_err(|e| CodememError::Storage(e.to_string()))?;
83        Self::apply_pragmas(&conn)?;
84        Ok(Self {
85            conn: Mutex::new(conn),
86        })
87    }
88
89    /// Open an in-memory database (for testing).
90    pub fn open_in_memory() -> Result<Self, CodememError> {
91        let conn =
92            Connection::open_in_memory().map_err(|e| CodememError::Storage(e.to_string()))?;
93        conn.pragma_update(None, "foreign_keys", "ON")
94            .map_err(|e| CodememError::Storage(e.to_string()))?;
95        migrations::run_migrations(&conn)?;
96        Ok(Self {
97            conn: Mutex::new(conn),
98        })
99    }
100
101    /// Compute SHA-256 hash of content for deduplication.
102    // TODO: Delegate to `codemem_core::content_hash()` once it is added to codemem-core.
103    pub fn content_hash(content: &str) -> String {
104        let mut hasher = Sha256::new();
105        hasher.update(content.as_bytes());
106        format!("{:x}", hasher.finalize())
107    }
108}
109
110/// Internal row struct for memory deserialization.
111pub(crate) struct MemoryRow {
112    pub(crate) id: String,
113    pub(crate) content: String,
114    pub(crate) memory_type: String,
115    pub(crate) importance: f64,
116    pub(crate) confidence: f64,
117    pub(crate) access_count: i64,
118    pub(crate) content_hash: String,
119    pub(crate) tags: String,
120    pub(crate) metadata: String,
121    pub(crate) namespace: Option<String>,
122    pub(crate) created_at: i64,
123    pub(crate) updated_at: i64,
124    pub(crate) last_accessed_at: i64,
125}
126
127impl MemoryRow {
128    pub(crate) fn into_memory_node(self) -> Result<MemoryNode, CodememError> {
129        let memory_type: MemoryType = self.memory_type.parse()?;
130        let tags: Vec<String> = serde_json::from_str(&self.tags).unwrap_or_else(|e| {
131            tracing::warn!(id = %self.id, error = %e, "Malformed tags JSON for memory");
132            Vec::new()
133        });
134        let metadata: HashMap<String, serde_json::Value> = serde_json::from_str(&self.metadata)
135            .unwrap_or_else(|e| {
136                tracing::warn!(id = %self.id, error = %e, "Malformed metadata JSON for memory");
137                HashMap::new()
138            });
139
140        let created_at = chrono::DateTime::from_timestamp(self.created_at, 0)
141            .unwrap_or_else(|| {
142                tracing::warn!(id = %self.id, ts = self.created_at, "Invalid created_at timestamp");
143                chrono::DateTime::<chrono::Utc>::default()
144            })
145            .with_timezone(&chrono::Utc);
146        let updated_at = chrono::DateTime::from_timestamp(self.updated_at, 0)
147            .unwrap_or_else(|| {
148                tracing::warn!(id = %self.id, ts = self.updated_at, "Invalid updated_at timestamp");
149                chrono::DateTime::<chrono::Utc>::default()
150            })
151            .with_timezone(&chrono::Utc);
152        let last_accessed_at = chrono::DateTime::from_timestamp(self.last_accessed_at, 0)
153            .unwrap_or_else(|| {
154                tracing::warn!(id = %self.id, ts = self.last_accessed_at, "Invalid last_accessed_at timestamp");
155                chrono::DateTime::<chrono::Utc>::default()
156            })
157            .with_timezone(&chrono::Utc);
158
159        Ok(MemoryNode {
160            id: self.id,
161            content: self.content,
162            memory_type,
163            importance: self.importance,
164            confidence: self.confidence,
165            access_count: u32::try_from(self.access_count).unwrap_or(u32::MAX),
166            content_hash: self.content_hash,
167            tags,
168            metadata,
169            namespace: self.namespace,
170            created_at,
171            updated_at,
172            last_accessed_at,
173        })
174    }
175}