1use codemem_core::{
14 CodememConfig, CodememError, ScoringWeights, StorageBackend, VectorBackend, VectorConfig,
15};
16use codemem_storage::graph::GraphEngine;
17use codemem_storage::HnswIndex;
18use codemem_storage::Storage;
19use std::path::{Path, PathBuf};
20use std::sync::atomic::AtomicBool;
21#[cfg(test)]
22use std::sync::atomic::Ordering;
23use std::sync::{Arc, Mutex, RwLock};
24
25pub mod analysis;
26pub mod bm25;
27pub mod compress;
28pub mod consolidation;
29pub mod enrichment;
30mod enrichment_text;
31mod file_indexing;
32mod graph_linking;
33pub mod hooks;
34pub mod index;
35mod memory_ops;
36pub mod metrics;
37pub mod patterns;
38pub mod persistence;
39pub mod recall;
40pub mod scoring;
41pub mod search;
42pub mod watch;
43
44#[cfg(test)]
45#[path = "tests/engine_integration_tests.rs"]
46mod integration_tests;
47
48#[cfg(test)]
49#[path = "tests/enrichment_tests.rs"]
50mod enrichment_tests;
51
52#[cfg(test)]
53#[path = "tests/recall_tests.rs"]
54mod recall_tests;
55
56#[cfg(test)]
57#[path = "tests/search_tests.rs"]
58mod search_tests;
59
60#[cfg(test)]
61#[path = "tests/consolidation_tests.rs"]
62mod consolidation_tests;
63
64#[cfg(test)]
65#[path = "tests/analysis_tests.rs"]
66mod analysis_tests;
67
68#[cfg(test)]
69#[path = "tests/persistence_tests.rs"]
70mod persistence_tests;
71
72pub use index::{
74 ChunkConfig, CodeChunk, CodeParser, Dependency, IndexAndResolveResult, IndexProgress,
75 IndexResult, Indexer, ManifestResult, ParseResult, Reference, ReferenceKind, ReferenceResolver,
76 ResolvedEdge, Symbol, SymbolKind, Visibility, Workspace,
77};
78
79pub use bm25::Bm25Index;
81pub use metrics::InMemoryMetrics;
82
83pub use enrichment::{EnrichResult, EnrichmentPipelineResult};
85
86pub use persistence::{edge_weight_for, IndexPersistResult};
88
89pub use recall::{ExpandedResult, NamespaceStats};
91
92pub use search::{CodeSearchResult, SummaryTreeNode, SymbolSearchResult};
94
95pub use analysis::{
97 DecisionChain, DecisionConnection, DecisionEntry, ImpactResult, SessionCheckpointReport,
98};
99
100#[derive(Debug, Clone)]
102pub struct SplitPart {
103 pub content: String,
104 pub tags: Option<Vec<String>>,
105 pub importance: Option<f64>,
106}
107
108pub struct IndexCache {
112 pub symbols: Vec<Symbol>,
113 pub chunks: Vec<CodeChunk>,
114 pub root_path: String,
115}
116
117pub struct CodememEngine {
133 pub(crate) storage: Box<dyn StorageBackend>,
134 pub(crate) vector: Mutex<HnswIndex>,
135 pub(crate) graph: Mutex<GraphEngine>,
136 pub(crate) embeddings: Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>>,
138 pub(crate) db_path: Option<PathBuf>,
140 pub(crate) index_cache: Mutex<Option<IndexCache>>,
142 pub(crate) scoring_weights: RwLock<ScoringWeights>,
144 pub(crate) bm25_index: Mutex<Bm25Index>,
146 pub(crate) config: CodememConfig,
148 pub(crate) metrics: Arc<InMemoryMetrics>,
150 dirty: AtomicBool,
153 active_session_id: RwLock<Option<String>>,
155}
156
157impl CodememEngine {
158 pub fn new(
160 storage: Box<dyn StorageBackend>,
161 vector: HnswIndex,
162 graph: GraphEngine,
163 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
164 ) -> Self {
165 let config = CodememConfig::load_or_default();
166 Self::new_with_config(storage, vector, graph, embeddings, config)
167 }
168
169 pub fn new_with_config(
171 storage: Box<dyn StorageBackend>,
172 vector: HnswIndex,
173 graph: GraphEngine,
174 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
175 config: CodememConfig,
176 ) -> Self {
177 Self {
178 storage,
179 vector: Mutex::new(vector),
180 graph: Mutex::new(graph),
181 embeddings: embeddings.map(Mutex::new),
182 db_path: None,
183 index_cache: Mutex::new(None),
184 scoring_weights: RwLock::new(config.scoring.clone()),
185 bm25_index: Mutex::new(Bm25Index::new()),
186 config,
187 metrics: Arc::new(InMemoryMetrics::new()),
188 dirty: AtomicBool::new(false),
189 active_session_id: RwLock::new(None),
190 }
191 }
192
193 pub fn from_db_path(db_path: &Path) -> Result<Self, CodememError> {
195 if let Some(parent) = db_path.parent() {
197 if !parent.exists() {
198 std::fs::create_dir_all(parent).map_err(|e| {
199 CodememError::Storage(format!(
200 "Failed to create database directory {}: {e}",
201 parent.display()
202 ))
203 })?;
204 }
205 }
206
207 let config = CodememConfig::load_or_default();
208
209 let storage = Storage::open_with_config(
211 db_path,
212 Some(config.storage.cache_size_mb),
213 Some(config.storage.busy_timeout_secs),
214 )?;
215 let vector_config = VectorConfig {
216 dimensions: config.vector.dimensions,
217 ..VectorConfig::default()
218 };
219 let mut vector = HnswIndex::new(vector_config.clone())?;
220
221 let index_path = db_path.with_extension("idx");
223 if index_path.exists() {
224 vector.load(&index_path)?;
225 }
226
227 let vector_count = vector.stats().count;
230 let db_stats = storage.stats()?;
231 let db_embed_count = db_stats.embedding_count;
232 if vector_count != db_embed_count {
233 tracing::warn!(
234 "Vector index ({vector_count}) out of sync with DB ({db_embed_count}), rebuilding..."
235 );
236 let mut fresh_vector = HnswIndex::new(vector_config)?;
238 if let Ok(embeddings) = storage.list_all_embeddings() {
239 for (id, embedding) in &embeddings {
240 if let Err(e) = fresh_vector.insert(id, embedding) {
241 tracing::warn!("Failed to re-insert embedding {id}: {e}");
242 }
243 }
244 }
245 vector = fresh_vector;
246 if let Err(e) = vector.save(&index_path) {
248 tracing::warn!("Failed to save rebuilt vector index: {e}");
249 }
250 }
251
252 let graph = GraphEngine::from_storage(&storage)?;
254
255 let embeddings = codemem_embeddings::from_env(Some(&config.embedding)).ok();
257
258 let mut engine =
259 Self::new_with_config(Box::new(storage), vector, graph, embeddings, config);
260 engine.db_path = Some(db_path.to_path_buf());
261
262 engine
265 .lock_graph()?
266 .recompute_centrality_with_options(false);
267
268 let bm25_path = db_path.with_extension("bm25");
270 let mut bm25_loaded = false;
271 if bm25_path.exists() {
272 match std::fs::read(&bm25_path) {
273 Ok(data) => match Bm25Index::deserialize(&data) {
274 Ok(index) => {
275 let mut bm25 = engine.lock_bm25()?;
276 *bm25 = index;
277 bm25_loaded = true;
278 tracing::info!(
279 "Loaded BM25 index from disk ({} documents)",
280 bm25.doc_count
281 );
282 }
283 Err(e) => {
284 tracing::warn!("Failed to deserialize BM25 index, rebuilding: {e}");
285 }
286 },
287 Err(e) => {
288 tracing::warn!("Failed to read BM25 index file, rebuilding: {e}");
289 }
290 }
291 }
292
293 if !bm25_loaded {
294 if let Ok(ids) = engine.storage.list_memory_ids() {
296 let id_refs: Vec<&str> = ids.iter().map(|s| s.as_str()).collect();
297 if let Ok(memories) = engine.storage.get_memories_batch(&id_refs) {
298 let mut bm25 = engine.lock_bm25()?;
299 for memory in &memories {
300 bm25.add_document(&memory.id, &memory.content);
301 }
302 tracing::info!("Rebuilt BM25 index from {} memories", bm25.doc_count);
303 }
304 }
305 }
306
307 Ok(engine)
308 }
309
310 pub fn for_testing() -> Self {
312 let storage = Storage::open_in_memory().unwrap();
313 let vector = HnswIndex::with_defaults().unwrap();
314 let graph = GraphEngine::new();
315 let config = CodememConfig::default();
316 Self {
317 storage: Box::new(storage),
318 vector: Mutex::new(vector),
319 graph: Mutex::new(graph),
320 embeddings: None,
321 db_path: None,
322 index_cache: Mutex::new(None),
323 scoring_weights: RwLock::new(config.scoring.clone()),
324 bm25_index: Mutex::new(Bm25Index::new()),
325 config,
326 metrics: Arc::new(InMemoryMetrics::new()),
327 dirty: AtomicBool::new(false),
328 active_session_id: RwLock::new(None),
329 }
330 }
331
332 pub fn lock_vector(&self) -> Result<std::sync::MutexGuard<'_, HnswIndex>, CodememError> {
335 self.vector
336 .lock()
337 .map_err(|e| CodememError::LockPoisoned(format!("vector: {e}")))
338 }
339
340 pub fn lock_graph(&self) -> Result<std::sync::MutexGuard<'_, GraphEngine>, CodememError> {
341 self.graph
342 .lock()
343 .map_err(|e| CodememError::LockPoisoned(format!("graph: {e}")))
344 }
345
346 pub fn lock_bm25(&self) -> Result<std::sync::MutexGuard<'_, Bm25Index>, CodememError> {
347 self.bm25_index
348 .lock()
349 .map_err(|e| CodememError::LockPoisoned(format!("bm25: {e}")))
350 }
351
352 pub fn lock_embeddings(
353 &self,
354 ) -> Result<
355 Option<std::sync::MutexGuard<'_, Box<dyn codemem_embeddings::EmbeddingProvider>>>,
356 CodememError,
357 > {
358 match &self.embeddings {
359 Some(m) => Ok(Some(m.lock().map_err(|e| {
360 CodememError::LockPoisoned(format!("embeddings: {e}"))
361 })?)),
362 None => Ok(None),
363 }
364 }
365
366 pub fn lock_index_cache(
367 &self,
368 ) -> Result<std::sync::MutexGuard<'_, Option<IndexCache>>, CodememError> {
369 self.index_cache
370 .lock()
371 .map_err(|e| CodememError::LockPoisoned(format!("index_cache: {e}")))
372 }
373
374 pub fn scoring_weights(
375 &self,
376 ) -> Result<std::sync::RwLockReadGuard<'_, ScoringWeights>, CodememError> {
377 self.scoring_weights
378 .read()
379 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights read: {e}")))
380 }
381
382 pub fn scoring_weights_mut(
383 &self,
384 ) -> Result<std::sync::RwLockWriteGuard<'_, ScoringWeights>, CodememError> {
385 self.scoring_weights
386 .write()
387 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights write: {e}")))
388 }
389
390 pub fn set_active_session(&self, id: Option<String>) {
394 match self.active_session_id.write() {
395 Ok(mut guard) => *guard = id,
396 Err(e) => *e.into_inner() = id,
397 }
398 }
399
400 pub fn active_session_id(&self) -> Option<String> {
402 match self.active_session_id.read() {
403 Ok(guard) => guard.clone(),
404 Err(e) => e.into_inner().clone(),
405 }
406 }
407
408 pub fn storage(&self) -> &dyn StorageBackend {
412 &*self.storage
413 }
414
415 pub fn has_embeddings(&self) -> bool {
417 self.embeddings.is_some()
418 }
419
420 pub fn db_path(&self) -> Option<&Path> {
422 self.db_path.as_deref()
423 }
424
425 pub fn config(&self) -> &CodememConfig {
427 &self.config
428 }
429
430 pub fn metrics(&self) -> &Arc<InMemoryMetrics> {
432 &self.metrics
433 }
434
435 pub fn graph_mutex(&self) -> &Mutex<GraphEngine> {
437 &self.graph
438 }
439
440 pub fn vector_mutex(&self) -> &Mutex<HnswIndex> {
442 &self.vector
443 }
444
445 pub fn bm25_mutex(&self) -> &Mutex<Bm25Index> {
447 &self.bm25_index
448 }
449
450 pub fn embeddings_mutex(
452 &self,
453 ) -> Option<&Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>> {
454 self.embeddings.as_ref()
455 }
456
457 #[cfg(test)]
459 pub(crate) fn is_dirty(&self) -> bool {
460 self.dirty.load(Ordering::Acquire)
461 }
462}
463
464pub use file_indexing::{IndexEnrichResult, SessionContext};