1use codemem_core::{
14 CodememConfig, CodememError, ScoringWeights, StorageBackend, VectorBackend, VectorConfig,
15};
16pub use codemem_storage::graph::GraphEngine;
17pub use codemem_storage::HnswIndex;
18pub use codemem_storage::Storage;
19use std::path::{Path, PathBuf};
20use std::sync::atomic::AtomicBool;
21#[cfg(test)]
22use std::sync::atomic::Ordering;
23use std::sync::{Arc, Mutex, RwLock};
24
25pub mod analysis;
26pub mod bm25;
27pub mod compress;
28pub mod consolidation;
29pub mod enrichment;
30mod enrichment_text;
31mod file_indexing;
32mod graph_linking;
33pub mod graph_ops;
34pub mod hooks;
35pub mod index;
36pub mod insights;
37mod memory_ops;
38pub mod metrics;
39pub mod patterns;
40pub mod pca;
41pub mod persistence;
42pub mod recall;
43pub mod scoring;
44pub mod search;
45pub mod watch;
46
47#[cfg(test)]
48#[path = "tests/engine_integration_tests.rs"]
49mod integration_tests;
50
51#[cfg(test)]
52#[path = "tests/enrichment_tests.rs"]
53mod enrichment_tests;
54
55#[cfg(test)]
56#[path = "tests/recall_tests.rs"]
57mod recall_tests;
58
59#[cfg(test)]
60#[path = "tests/search_tests.rs"]
61mod search_tests;
62
63#[cfg(test)]
64#[path = "tests/consolidation_tests.rs"]
65mod consolidation_tests;
66
67#[cfg(test)]
68#[path = "tests/analysis_tests.rs"]
69mod analysis_tests;
70
71#[cfg(test)]
72#[path = "tests/persistence_tests.rs"]
73mod persistence_tests;
74
75pub use index::{
77 ChunkConfig, CodeChunk, CodeParser, Dependency, IndexAndResolveResult, IndexProgress,
78 IndexResult, Indexer, ManifestResult, ParseResult, Reference, ReferenceKind, ReferenceResolver,
79 ResolvedEdge, Symbol, SymbolKind, Visibility, Workspace,
80};
81
82pub use bm25::Bm25Index;
84pub use metrics::InMemoryMetrics;
85
86pub use enrichment::{EnrichResult, EnrichmentPipelineResult};
88
89pub use persistence::{edge_weight_for, IndexPersistResult};
91
92pub use recall::{ExpandedResult, NamespaceStats, RecallQuery};
94
95pub use search::{CodeSearchResult, SummaryTreeNode, SymbolSearchResult};
97
98pub use analysis::{
100 DecisionChain, DecisionConnection, DecisionEntry, ImpactResult, SessionCheckpointReport,
101};
102
103#[derive(Debug, Clone)]
105pub struct SplitPart {
106 pub content: String,
107 pub tags: Option<Vec<String>>,
108 pub importance: Option<f64>,
109}
110
111pub struct IndexCache {
115 pub symbols: Vec<Symbol>,
116 pub chunks: Vec<CodeChunk>,
117 pub root_path: String,
118}
119
120pub struct CodememEngine {
136 pub(crate) storage: Box<dyn StorageBackend>,
137 pub(crate) vector: Mutex<HnswIndex>,
138 pub(crate) graph: Mutex<GraphEngine>,
139 pub(crate) embeddings: Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>>,
141 pub(crate) db_path: Option<PathBuf>,
143 pub(crate) index_cache: Mutex<Option<IndexCache>>,
145 pub(crate) scoring_weights: RwLock<ScoringWeights>,
147 pub(crate) bm25_index: Mutex<Bm25Index>,
149 pub(crate) config: CodememConfig,
151 pub(crate) metrics: Arc<InMemoryMetrics>,
153 dirty: AtomicBool,
156 active_session_id: RwLock<Option<String>>,
158}
159
160impl CodememEngine {
161 pub fn new(
163 storage: Box<dyn StorageBackend>,
164 vector: HnswIndex,
165 graph: GraphEngine,
166 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
167 ) -> Self {
168 let config = CodememConfig::load_or_default();
169 Self::new_with_config(storage, vector, graph, embeddings, config)
170 }
171
172 pub fn new_with_config(
174 storage: Box<dyn StorageBackend>,
175 vector: HnswIndex,
176 graph: GraphEngine,
177 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
178 config: CodememConfig,
179 ) -> Self {
180 Self {
181 storage,
182 vector: Mutex::new(vector),
183 graph: Mutex::new(graph),
184 embeddings: embeddings.map(Mutex::new),
185 db_path: None,
186 index_cache: Mutex::new(None),
187 scoring_weights: RwLock::new(config.scoring.clone()),
188 bm25_index: Mutex::new(Bm25Index::new()),
189 config,
190 metrics: Arc::new(InMemoryMetrics::new()),
191 dirty: AtomicBool::new(false),
192 active_session_id: RwLock::new(None),
193 }
194 }
195
196 pub fn from_db_path(db_path: &Path) -> Result<Self, CodememError> {
198 if let Some(parent) = db_path.parent() {
200 if !parent.exists() {
201 std::fs::create_dir_all(parent).map_err(|e| {
202 CodememError::Storage(format!(
203 "Failed to create database directory {}: {e}",
204 parent.display()
205 ))
206 })?;
207 }
208 }
209
210 let config = CodememConfig::load_or_default();
211
212 let storage = Storage::open_with_config(
214 db_path,
215 Some(config.storage.cache_size_mb),
216 Some(config.storage.busy_timeout_secs),
217 )?;
218 let vector_config = VectorConfig {
219 dimensions: config.vector.dimensions,
220 ..VectorConfig::default()
221 };
222 let mut vector = HnswIndex::new(vector_config.clone())?;
223
224 let index_path = db_path.with_extension("idx");
226 if index_path.exists() {
227 if let Err(e) = vector.load(&index_path) {
228 tracing::warn!("Stale or corrupt vector index, will rebuild: {e}");
229 }
230 }
231
232 let vector_count = vector.stats().count;
235 let db_stats = storage.stats()?;
236 let db_embed_count = db_stats.embedding_count;
237 if vector_count != db_embed_count {
238 tracing::warn!(
239 "Vector index ({vector_count}) out of sync with DB ({db_embed_count}), rebuilding..."
240 );
241 let mut fresh_vector = HnswIndex::new(vector_config)?;
243 if let Ok(embeddings) = storage.list_all_embeddings() {
244 for (id, embedding) in &embeddings {
245 if let Err(e) = fresh_vector.insert(id, embedding) {
246 tracing::warn!("Failed to re-insert embedding {id}: {e}");
247 }
248 }
249 }
250 vector = fresh_vector;
251 if let Err(e) = vector.save(&index_path) {
253 tracing::warn!("Failed to save rebuilt vector index: {e}");
254 }
255 }
256
257 let graph = GraphEngine::from_storage(&storage)?;
259
260 let embeddings = codemem_embeddings::from_env(Some(&config.embedding)).ok();
262
263 let mut engine =
264 Self::new_with_config(Box::new(storage), vector, graph, embeddings, config);
265 engine.db_path = Some(db_path.to_path_buf());
266
267 engine
270 .lock_graph()?
271 .recompute_centrality_with_options(false);
272
273 let bm25_path = db_path.with_extension("bm25");
275 let mut bm25_loaded = false;
276 if bm25_path.exists() {
277 match std::fs::read(&bm25_path) {
278 Ok(data) => match Bm25Index::deserialize(&data) {
279 Ok(index) => {
280 let mut bm25 = engine.lock_bm25()?;
281 *bm25 = index;
282 bm25_loaded = true;
283 tracing::info!(
284 "Loaded BM25 index from disk ({} documents)",
285 bm25.doc_count
286 );
287 }
288 Err(e) => {
289 tracing::warn!("Failed to deserialize BM25 index, rebuilding: {e}");
290 }
291 },
292 Err(e) => {
293 tracing::warn!("Failed to read BM25 index file, rebuilding: {e}");
294 }
295 }
296 }
297
298 if !bm25_loaded {
299 if let Ok(ids) = engine.storage.list_memory_ids() {
301 let id_refs: Vec<&str> = ids.iter().map(|s| s.as_str()).collect();
302 if let Ok(memories) = engine.storage.get_memories_batch(&id_refs) {
303 let mut bm25 = engine.lock_bm25()?;
304 for memory in &memories {
305 bm25.add_document(&memory.id, &memory.content);
306 }
307 tracing::info!("Rebuilt BM25 index from {} memories", bm25.doc_count);
308 }
309 }
310 }
311
312 Ok(engine)
313 }
314
315 pub fn for_testing() -> Self {
317 let storage = Storage::open_in_memory().unwrap();
318 let vector = HnswIndex::with_defaults().unwrap();
319 let graph = GraphEngine::new();
320 let config = CodememConfig::default();
321 Self {
322 storage: Box::new(storage),
323 vector: Mutex::new(vector),
324 graph: Mutex::new(graph),
325 embeddings: None,
326 db_path: None,
327 index_cache: Mutex::new(None),
328 scoring_weights: RwLock::new(config.scoring.clone()),
329 bm25_index: Mutex::new(Bm25Index::new()),
330 config,
331 metrics: Arc::new(InMemoryMetrics::new()),
332 dirty: AtomicBool::new(false),
333 active_session_id: RwLock::new(None),
334 }
335 }
336
337 pub fn lock_vector(&self) -> Result<std::sync::MutexGuard<'_, HnswIndex>, CodememError> {
340 self.vector
341 .lock()
342 .map_err(|e| CodememError::LockPoisoned(format!("vector: {e}")))
343 }
344
345 pub fn lock_graph(&self) -> Result<std::sync::MutexGuard<'_, GraphEngine>, CodememError> {
346 self.graph
347 .lock()
348 .map_err(|e| CodememError::LockPoisoned(format!("graph: {e}")))
349 }
350
351 pub fn lock_bm25(&self) -> Result<std::sync::MutexGuard<'_, Bm25Index>, CodememError> {
352 self.bm25_index
353 .lock()
354 .map_err(|e| CodememError::LockPoisoned(format!("bm25: {e}")))
355 }
356
357 pub fn lock_embeddings(
358 &self,
359 ) -> Result<
360 Option<std::sync::MutexGuard<'_, Box<dyn codemem_embeddings::EmbeddingProvider>>>,
361 CodememError,
362 > {
363 match &self.embeddings {
364 Some(m) => Ok(Some(m.lock().map_err(|e| {
365 CodememError::LockPoisoned(format!("embeddings: {e}"))
366 })?)),
367 None => Ok(None),
368 }
369 }
370
371 pub fn lock_index_cache(
372 &self,
373 ) -> Result<std::sync::MutexGuard<'_, Option<IndexCache>>, CodememError> {
374 self.index_cache
375 .lock()
376 .map_err(|e| CodememError::LockPoisoned(format!("index_cache: {e}")))
377 }
378
379 pub fn scoring_weights(
380 &self,
381 ) -> Result<std::sync::RwLockReadGuard<'_, ScoringWeights>, CodememError> {
382 self.scoring_weights
383 .read()
384 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights read: {e}")))
385 }
386
387 pub fn scoring_weights_mut(
388 &self,
389 ) -> Result<std::sync::RwLockWriteGuard<'_, ScoringWeights>, CodememError> {
390 self.scoring_weights
391 .write()
392 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights write: {e}")))
393 }
394
395 pub fn set_active_session(&self, id: Option<String>) {
399 match self.active_session_id.write() {
400 Ok(mut guard) => *guard = id,
401 Err(e) => *e.into_inner() = id,
402 }
403 }
404
405 pub fn active_session_id(&self) -> Option<String> {
407 match self.active_session_id.read() {
408 Ok(guard) => guard.clone(),
409 Err(e) => e.into_inner().clone(),
410 }
411 }
412
413 pub fn storage(&self) -> &dyn StorageBackend {
417 &*self.storage
418 }
419
420 pub fn has_embeddings(&self) -> bool {
422 self.embeddings.is_some()
423 }
424
425 pub fn db_path(&self) -> Option<&Path> {
427 self.db_path.as_deref()
428 }
429
430 pub fn config(&self) -> &CodememConfig {
432 &self.config
433 }
434
435 pub fn metrics(&self) -> &Arc<InMemoryMetrics> {
437 &self.metrics
438 }
439
440 pub fn with_graph<F, R>(&self, f: F) -> Result<R, CodememError>
445 where
446 F: FnOnce(&GraphEngine) -> R,
447 {
448 let guard = self.lock_graph()?;
449 Ok(f(&guard))
450 }
451
452 pub fn with_vector<F, R>(&self, f: F) -> Result<R, CodememError>
455 where
456 F: FnOnce(&HnswIndex) -> R,
457 {
458 let guard = self.lock_vector()?;
459 Ok(f(&guard))
460 }
461
462 #[cfg(test)]
464 pub(crate) fn is_dirty(&self) -> bool {
465 self.dirty.load(Ordering::Acquire)
466 }
467
468 pub fn list_repos(&self) -> Result<Vec<codemem_core::Repository>, CodememError> {
472 self.storage.list_repos()
473 }
474
475 pub fn add_repo(&self, repo: &codemem_core::Repository) -> Result<(), CodememError> {
477 self.storage.add_repo(repo)
478 }
479
480 pub fn get_repo(&self, id: &str) -> Result<Option<codemem_core::Repository>, CodememError> {
482 self.storage.get_repo(id)
483 }
484
485 pub fn remove_repo(&self, id: &str) -> Result<bool, CodememError> {
487 self.storage.remove_repo(id)
488 }
489
490 pub fn update_repo_status(
492 &self,
493 id: &str,
494 status: &str,
495 indexed_at: Option<&str>,
496 ) -> Result<(), CodememError> {
497 self.storage.update_repo_status(id, status, indexed_at)
498 }
499}
500
501pub use file_indexing::{AnalyzeOptions, AnalyzeProgress, AnalyzeResult, SessionContext};
503
504pub use codemem_embeddings::from_env as embeddings_from_env;
507pub use codemem_embeddings::{EmbeddingProvider, EmbeddingService};