1use codemem_core::{
14 CodememConfig, CodememError, ScoringWeights, StorageBackend, VectorBackend, VectorConfig,
15};
16use codemem_storage::graph::GraphEngine;
17use codemem_storage::HnswIndex;
18use codemem_storage::Storage;
19use std::path::{Path, PathBuf};
20use std::sync::atomic::AtomicBool;
21#[cfg(test)]
22use std::sync::atomic::Ordering;
23use std::sync::{Arc, Mutex, RwLock};
24
25pub mod analysis;
26pub mod bm25;
27pub mod compress;
28pub mod consolidation;
29pub mod enrichment;
30mod enrichment_text;
31mod file_indexing;
32mod graph_linking;
33pub mod hooks;
34pub mod index;
35mod memory_ops;
36pub mod metrics;
37pub mod patterns;
38pub mod persistence;
39pub mod recall;
40pub mod scoring;
41pub mod search;
42pub mod watch;
43
44#[cfg(test)]
45#[path = "tests/engine_integration_tests.rs"]
46mod integration_tests;
47
48#[cfg(test)]
49#[path = "tests/enrichment_tests.rs"]
50mod enrichment_tests;
51
52#[cfg(test)]
53#[path = "tests/recall_tests.rs"]
54mod recall_tests;
55
56#[cfg(test)]
57#[path = "tests/search_tests.rs"]
58mod search_tests;
59
60#[cfg(test)]
61#[path = "tests/consolidation_tests.rs"]
62mod consolidation_tests;
63
64#[cfg(test)]
65#[path = "tests/analysis_tests.rs"]
66mod analysis_tests;
67
68#[cfg(test)]
69#[path = "tests/persistence_tests.rs"]
70mod persistence_tests;
71
72pub use index::{
74 ChunkConfig, CodeChunk, CodeParser, Dependency, IndexAndResolveResult, IndexProgress,
75 IndexResult, Indexer, ManifestResult, ParseResult, Reference, ReferenceKind, ReferenceResolver,
76 ResolvedEdge, Symbol, SymbolKind, Visibility, Workspace,
77};
78
79pub use bm25::Bm25Index;
81pub use metrics::InMemoryMetrics;
82
83pub use enrichment::{EnrichResult, EnrichmentPipelineResult};
85
86pub use persistence::{edge_weight_for, IndexPersistResult};
88
89pub use recall::{ExpandedResult, NamespaceStats};
91
92pub use search::{CodeSearchResult, SummaryTreeNode, SymbolSearchResult};
94
95pub use analysis::{
97 DecisionChain, DecisionConnection, DecisionEntry, ImpactResult, SessionCheckpointReport,
98};
99
100#[derive(Debug, Clone)]
102pub struct SplitPart {
103 pub content: String,
104 pub tags: Option<Vec<String>>,
105 pub importance: Option<f64>,
106}
107
108pub struct IndexCache {
112 pub symbols: Vec<Symbol>,
113 pub chunks: Vec<CodeChunk>,
114 pub root_path: String,
115}
116
117pub struct CodememEngine {
133 pub(crate) storage: Box<dyn StorageBackend>,
134 pub(crate) vector: Mutex<HnswIndex>,
135 pub(crate) graph: Mutex<GraphEngine>,
136 pub(crate) embeddings: Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>>,
138 pub(crate) db_path: Option<PathBuf>,
140 pub(crate) index_cache: Mutex<Option<IndexCache>>,
142 pub(crate) scoring_weights: RwLock<ScoringWeights>,
144 pub(crate) bm25_index: Mutex<Bm25Index>,
146 pub(crate) config: CodememConfig,
148 pub(crate) metrics: Arc<InMemoryMetrics>,
150 dirty: AtomicBool,
153}
154
155impl CodememEngine {
156 pub fn new(
158 storage: Box<dyn StorageBackend>,
159 vector: HnswIndex,
160 graph: GraphEngine,
161 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
162 ) -> Self {
163 let config = CodememConfig::load_or_default();
164 Self::new_with_config(storage, vector, graph, embeddings, config)
165 }
166
167 pub fn new_with_config(
169 storage: Box<dyn StorageBackend>,
170 vector: HnswIndex,
171 graph: GraphEngine,
172 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
173 config: CodememConfig,
174 ) -> Self {
175 Self {
176 storage,
177 vector: Mutex::new(vector),
178 graph: Mutex::new(graph),
179 embeddings: embeddings.map(Mutex::new),
180 db_path: None,
181 index_cache: Mutex::new(None),
182 scoring_weights: RwLock::new(config.scoring.clone()),
183 bm25_index: Mutex::new(Bm25Index::new()),
184 config,
185 metrics: Arc::new(InMemoryMetrics::new()),
186 dirty: AtomicBool::new(false),
187 }
188 }
189
190 pub fn from_db_path(db_path: &Path) -> Result<Self, CodememError> {
192 if let Some(parent) = db_path.parent() {
194 if !parent.exists() {
195 std::fs::create_dir_all(parent).map_err(|e| {
196 CodememError::Storage(format!(
197 "Failed to create database directory {}: {e}",
198 parent.display()
199 ))
200 })?;
201 }
202 }
203
204 let config = CodememConfig::load_or_default();
205
206 let storage = Storage::open_with_config(
208 db_path,
209 Some(config.storage.cache_size_mb),
210 Some(config.storage.busy_timeout_secs),
211 )?;
212 let vector_config = VectorConfig {
213 dimensions: config.vector.dimensions,
214 ..VectorConfig::default()
215 };
216 let mut vector = HnswIndex::new(vector_config.clone())?;
217
218 let index_path = db_path.with_extension("idx");
220 if index_path.exists() {
221 vector.load(&index_path)?;
222 }
223
224 let vector_count = vector.stats().count;
227 let db_stats = storage.stats()?;
228 let db_embed_count = db_stats.embedding_count;
229 if vector_count != db_embed_count {
230 tracing::warn!(
231 "Vector index ({vector_count}) out of sync with DB ({db_embed_count}), rebuilding..."
232 );
233 let mut fresh_vector = HnswIndex::new(vector_config)?;
235 if let Ok(embeddings) = storage.list_all_embeddings() {
236 for (id, embedding) in &embeddings {
237 if let Err(e) = fresh_vector.insert(id, embedding) {
238 tracing::warn!("Failed to re-insert embedding {id}: {e}");
239 }
240 }
241 }
242 vector = fresh_vector;
243 if let Err(e) = vector.save(&index_path) {
245 tracing::warn!("Failed to save rebuilt vector index: {e}");
246 }
247 }
248
249 let graph = GraphEngine::from_storage(&storage)?;
251
252 let embeddings = codemem_embeddings::from_env(Some(&config.embedding)).ok();
254
255 let mut engine =
256 Self::new_with_config(Box::new(storage), vector, graph, embeddings, config);
257 engine.db_path = Some(db_path.to_path_buf());
258
259 engine
262 .lock_graph()?
263 .recompute_centrality_with_options(false);
264
265 let bm25_path = db_path.with_extension("bm25");
267 let mut bm25_loaded = false;
268 if bm25_path.exists() {
269 match std::fs::read(&bm25_path) {
270 Ok(data) => match Bm25Index::deserialize(&data) {
271 Ok(index) => {
272 let mut bm25 = engine.lock_bm25()?;
273 *bm25 = index;
274 bm25_loaded = true;
275 tracing::info!(
276 "Loaded BM25 index from disk ({} documents)",
277 bm25.doc_count
278 );
279 }
280 Err(e) => {
281 tracing::warn!("Failed to deserialize BM25 index, rebuilding: {e}");
282 }
283 },
284 Err(e) => {
285 tracing::warn!("Failed to read BM25 index file, rebuilding: {e}");
286 }
287 }
288 }
289
290 if !bm25_loaded {
291 if let Ok(ids) = engine.storage.list_memory_ids() {
293 let id_refs: Vec<&str> = ids.iter().map(|s| s.as_str()).collect();
294 if let Ok(memories) = engine.storage.get_memories_batch(&id_refs) {
295 let mut bm25 = engine.lock_bm25()?;
296 for memory in &memories {
297 bm25.add_document(&memory.id, &memory.content);
298 }
299 tracing::info!("Rebuilt BM25 index from {} memories", bm25.doc_count);
300 }
301 }
302 }
303
304 Ok(engine)
305 }
306
307 pub fn for_testing() -> Self {
309 let storage = Storage::open_in_memory().unwrap();
310 let vector = HnswIndex::with_defaults().unwrap();
311 let graph = GraphEngine::new();
312 let config = CodememConfig::default();
313 Self {
314 storage: Box::new(storage),
315 vector: Mutex::new(vector),
316 graph: Mutex::new(graph),
317 embeddings: None,
318 db_path: None,
319 index_cache: Mutex::new(None),
320 scoring_weights: RwLock::new(config.scoring.clone()),
321 bm25_index: Mutex::new(Bm25Index::new()),
322 config,
323 metrics: Arc::new(InMemoryMetrics::new()),
324 dirty: AtomicBool::new(false),
325 }
326 }
327
328 pub fn lock_vector(&self) -> Result<std::sync::MutexGuard<'_, HnswIndex>, CodememError> {
331 self.vector
332 .lock()
333 .map_err(|e| CodememError::LockPoisoned(format!("vector: {e}")))
334 }
335
336 pub fn lock_graph(&self) -> Result<std::sync::MutexGuard<'_, GraphEngine>, CodememError> {
337 self.graph
338 .lock()
339 .map_err(|e| CodememError::LockPoisoned(format!("graph: {e}")))
340 }
341
342 pub fn lock_bm25(&self) -> Result<std::sync::MutexGuard<'_, Bm25Index>, CodememError> {
343 self.bm25_index
344 .lock()
345 .map_err(|e| CodememError::LockPoisoned(format!("bm25: {e}")))
346 }
347
348 pub fn lock_embeddings(
349 &self,
350 ) -> Result<
351 Option<std::sync::MutexGuard<'_, Box<dyn codemem_embeddings::EmbeddingProvider>>>,
352 CodememError,
353 > {
354 match &self.embeddings {
355 Some(m) => Ok(Some(m.lock().map_err(|e| {
356 CodememError::LockPoisoned(format!("embeddings: {e}"))
357 })?)),
358 None => Ok(None),
359 }
360 }
361
362 pub fn lock_index_cache(
363 &self,
364 ) -> Result<std::sync::MutexGuard<'_, Option<IndexCache>>, CodememError> {
365 self.index_cache
366 .lock()
367 .map_err(|e| CodememError::LockPoisoned(format!("index_cache: {e}")))
368 }
369
370 pub fn scoring_weights(
371 &self,
372 ) -> Result<std::sync::RwLockReadGuard<'_, ScoringWeights>, CodememError> {
373 self.scoring_weights
374 .read()
375 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights read: {e}")))
376 }
377
378 pub fn scoring_weights_mut(
379 &self,
380 ) -> Result<std::sync::RwLockWriteGuard<'_, ScoringWeights>, CodememError> {
381 self.scoring_weights
382 .write()
383 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights write: {e}")))
384 }
385
386 pub fn storage(&self) -> &dyn StorageBackend {
390 &*self.storage
391 }
392
393 pub fn has_embeddings(&self) -> bool {
395 self.embeddings.is_some()
396 }
397
398 pub fn db_path(&self) -> Option<&Path> {
400 self.db_path.as_deref()
401 }
402
403 pub fn config(&self) -> &CodememConfig {
405 &self.config
406 }
407
408 pub fn metrics(&self) -> &Arc<InMemoryMetrics> {
410 &self.metrics
411 }
412
413 pub fn graph_mutex(&self) -> &Mutex<GraphEngine> {
415 &self.graph
416 }
417
418 pub fn vector_mutex(&self) -> &Mutex<HnswIndex> {
420 &self.vector
421 }
422
423 pub fn bm25_mutex(&self) -> &Mutex<Bm25Index> {
425 &self.bm25_index
426 }
427
428 pub fn embeddings_mutex(
430 &self,
431 ) -> Option<&Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>> {
432 self.embeddings.as_ref()
433 }
434
435 #[cfg(test)]
437 pub(crate) fn is_dirty(&self) -> bool {
438 self.dirty.load(Ordering::Acquire)
439 }
440}
441
442pub use file_indexing::{IndexEnrichResult, SessionContext};