1use codemem_core::{
14 CodememConfig, CodememError, GraphBackend, ScoringWeights, StorageBackend, VectorBackend,
15};
16pub use codemem_storage::graph::GraphEngine;
17pub use codemem_storage::HnswIndex;
18pub use codemem_storage::Storage;
19use std::path::{Path, PathBuf};
20#[cfg(test)]
21use std::sync::atomic::Ordering;
22use std::sync::atomic::{AtomicBool, AtomicI64};
23use std::sync::{Arc, Mutex, OnceLock, RwLock};
24
25pub mod analysis;
26pub mod bm25;
27pub mod compress;
28pub mod consolidation;
29pub mod enrichment;
30mod enrichment_text;
31mod file_indexing;
32mod graph_linking;
33pub mod graph_ops;
34pub mod hooks;
35pub mod index;
36pub mod insights;
37mod memory_ops;
38pub mod metrics;
39pub mod patterns;
40pub mod pca;
41pub mod persistence;
42pub mod recall;
43pub mod review;
44pub mod scoring;
45pub mod search;
46pub mod watch;
47
48#[cfg(test)]
49#[path = "tests/engine_integration_tests.rs"]
50mod integration_tests;
51
52#[cfg(test)]
53#[path = "tests/enrichment_tests.rs"]
54mod enrichment_tests;
55
56#[cfg(test)]
57#[path = "tests/recall_tests.rs"]
58mod recall_tests;
59
60#[cfg(test)]
61#[path = "tests/search_tests.rs"]
62mod search_tests;
63
64#[cfg(test)]
65#[path = "tests/consolidation_tests.rs"]
66mod consolidation_tests;
67
68#[cfg(test)]
69#[path = "tests/analysis_tests.rs"]
70mod analysis_tests;
71
72#[cfg(test)]
73#[path = "tests/persistence_tests.rs"]
74mod persistence_tests;
75
76#[cfg(test)]
77#[path = "tests/memory_expiry_tests.rs"]
78mod memory_expiry_tests;
79
80#[cfg(test)]
81#[path = "tests/scope_tests.rs"]
82mod scope_tests;
83
84pub use index::{
86 ChunkConfig, CodeChunk, CodeParser, Dependency, IndexAndResolveResult, IndexProgress,
87 IndexResult, Indexer, ManifestResult, ParseResult, Reference, ReferenceKind, ReferenceResolver,
88 ResolvedEdge, Symbol, SymbolKind, Visibility, Workspace,
89};
90
91pub use bm25::Bm25Index;
93pub use metrics::InMemoryMetrics;
94pub use review::{BlastRadiusReport, DiffSymbolMapping};
95
96pub use enrichment::{EnrichResult, EnrichmentPipelineResult};
98
99pub use persistence::{edge_weight_for, CrossRepoPersistResult, IndexPersistResult};
101
102pub use recall::{ExpandedResult, NamespaceStats, RecallQuery};
104
105pub use search::{CodeSearchResult, SummaryTreeNode, SymbolSearchResult};
107
108pub use analysis::{
110 DecisionChain, DecisionConnection, DecisionEntry, ImpactResult, SessionCheckpointReport,
111};
112
113#[derive(Debug, Clone)]
115pub struct SplitPart {
116 pub content: String,
117 pub tags: Option<Vec<String>>,
118 pub importance: Option<f64>,
119}
120
121pub struct IndexCache {
125 pub symbols: Vec<Symbol>,
126 pub chunks: Vec<CodeChunk>,
127 pub root_path: String,
128}
129
130pub struct CodememEngine {
144 pub(crate) storage: Box<dyn StorageBackend>,
145 pub(crate) vector: OnceLock<Mutex<Box<dyn VectorBackend>>>,
147 pub(crate) graph: Mutex<Box<dyn GraphBackend>>,
148 pub(crate) embeddings: OnceLock<Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>>>,
150 pub(crate) db_path: Option<PathBuf>,
152 pub(crate) index_cache: Mutex<Option<IndexCache>>,
154 pub(crate) scoring_weights: RwLock<ScoringWeights>,
156 pub(crate) bm25_index: OnceLock<Mutex<Bm25Index>>,
158 pub(crate) config: CodememConfig,
160 pub(crate) metrics: Arc<InMemoryMetrics>,
162 dirty: AtomicBool,
165 active_session_id: RwLock<Option<String>>,
167 scope: RwLock<Option<codemem_core::ScopeContext>>,
169 change_detector: Mutex<Option<index::incremental::ChangeDetector>>,
172 last_expiry_sweep: AtomicI64,
175}
176
177impl CodememEngine {
178 pub fn new(
180 storage: Box<dyn StorageBackend>,
181 vector: Box<dyn VectorBackend>,
182 graph: Box<dyn GraphBackend>,
183 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
184 ) -> Self {
185 let config = CodememConfig::load_or_default();
186 Self::new_with_config(storage, vector, graph, embeddings, config)
187 }
188
189 pub fn new_with_config(
191 storage: Box<dyn StorageBackend>,
192 vector: Box<dyn VectorBackend>,
193 graph: Box<dyn GraphBackend>,
194 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
195 config: CodememConfig,
196 ) -> Self {
197 let vector_lock = OnceLock::new();
198 let _ = vector_lock.set(Mutex::new(vector));
199 let embeddings_lock = OnceLock::new();
200 let _ = embeddings_lock.set(embeddings.map(Mutex::new));
201 let bm25_lock = OnceLock::new();
202 let _ = bm25_lock.set(Mutex::new(Bm25Index::new()));
203 Self {
204 storage,
205 vector: vector_lock,
206 graph: Mutex::new(graph),
207 embeddings: embeddings_lock,
208 db_path: None,
209 index_cache: Mutex::new(None),
210 scoring_weights: RwLock::new(config.scoring.clone()),
211 bm25_index: bm25_lock,
212 config,
213 metrics: Arc::new(InMemoryMetrics::new()),
214 dirty: AtomicBool::new(false),
215 active_session_id: RwLock::new(None),
216 scope: RwLock::new(None),
217 change_detector: Mutex::new(None),
218 last_expiry_sweep: AtomicI64::new(0),
219 }
220 }
221
222 pub fn from_db_path(db_path: &Path) -> Result<Self, CodememError> {
230 if let Some(parent) = db_path.parent() {
232 if !parent.exists() {
233 std::fs::create_dir_all(parent).map_err(|e| {
234 CodememError::Storage(format!(
235 "Failed to create database directory {}: {e}",
236 parent.display()
237 ))
238 })?;
239 }
240 }
241
242 let config = CodememConfig::load_or_default();
243
244 if !config.storage.backend.eq_ignore_ascii_case("sqlite") {
247 return Err(CodememError::Config(format!(
248 "Unsupported storage backend '{}'. Only 'sqlite' is available in this build.",
249 config.storage.backend
250 )));
251 }
252 if !config.vector.backend.eq_ignore_ascii_case("hnsw") {
253 return Err(CodememError::Config(format!(
254 "Unsupported vector backend '{}'. Only 'hnsw' is available in this build.",
255 config.vector.backend
256 )));
257 }
258 if !config.graph.backend.eq_ignore_ascii_case("petgraph") {
259 return Err(CodememError::Config(format!(
260 "Unsupported graph backend '{}'. Only 'petgraph' is available in this build.",
261 config.graph.backend
262 )));
263 }
264
265 let storage = Storage::open_with_config(
267 db_path,
268 Some(config.storage.cache_size_mb),
269 Some(config.storage.busy_timeout_secs),
270 )?;
271
272 let graph = GraphEngine::from_storage(&storage)?;
274
275 let engine = Self {
276 storage: Box::new(storage),
277 vector: OnceLock::new(),
278 graph: Mutex::new(Box::new(graph)),
279 embeddings: OnceLock::new(),
280 db_path: Some(db_path.to_path_buf()),
281 index_cache: Mutex::new(None),
282 scoring_weights: RwLock::new(config.scoring.clone()),
283 bm25_index: OnceLock::new(),
284 config,
285 metrics: Arc::new(InMemoryMetrics::new()),
286 dirty: AtomicBool::new(false),
287 active_session_id: RwLock::new(None),
288 scope: RwLock::new(None),
289 change_detector: Mutex::new(None),
290 last_expiry_sweep: AtomicI64::new(0),
291 };
292
293 engine
296 .lock_graph()?
297 .recompute_centrality_with_options(false);
298
299 Ok(engine)
300 }
301
302 pub fn for_testing() -> Self {
304 let storage = Storage::open_in_memory().unwrap();
305 let graph = GraphEngine::new();
306 let config = CodememConfig::default();
307 let vector_lock = OnceLock::new();
308 let _ = vector_lock.set(Mutex::new(
309 Box::new(HnswIndex::with_defaults().unwrap()) as Box<dyn VectorBackend>
310 ));
311 let embeddings_lock = OnceLock::new();
312 let _ = embeddings_lock.set(None);
313 let bm25_lock = OnceLock::new();
314 let _ = bm25_lock.set(Mutex::new(Bm25Index::new()));
315 Self {
316 storage: Box::new(storage),
317 vector: vector_lock,
318 graph: Mutex::new(Box::new(graph)),
319 embeddings: embeddings_lock,
320 db_path: None,
321 index_cache: Mutex::new(None),
322 scoring_weights: RwLock::new(config.scoring.clone()),
323 bm25_index: bm25_lock,
324 config,
325 metrics: Arc::new(InMemoryMetrics::new()),
326 dirty: AtomicBool::new(false),
327 active_session_id: RwLock::new(None),
328 scope: RwLock::new(None),
329 change_detector: Mutex::new(None),
330 last_expiry_sweep: AtomicI64::new(0),
331 }
332 }
333
334 pub fn lock_vector(
337 &self,
338 ) -> Result<std::sync::MutexGuard<'_, Box<dyn VectorBackend>>, CodememError> {
339 self.vector
340 .get_or_init(|| self.init_vector())
341 .lock()
342 .map_err(|e| CodememError::LockPoisoned(format!("vector: {e}")))
343 }
344
345 pub fn lock_graph(
346 &self,
347 ) -> Result<std::sync::MutexGuard<'_, Box<dyn GraphBackend>>, CodememError> {
348 self.graph
349 .lock()
350 .map_err(|e| CodememError::LockPoisoned(format!("graph: {e}")))
351 }
352
353 pub fn lock_bm25(&self) -> Result<std::sync::MutexGuard<'_, Bm25Index>, CodememError> {
354 self.bm25_index
355 .get_or_init(|| self.init_bm25())
356 .lock()
357 .map_err(|e| CodememError::LockPoisoned(format!("bm25: {e}")))
358 }
359
360 pub fn lock_embeddings(
364 &self,
365 ) -> Result<
366 Option<std::sync::MutexGuard<'_, Box<dyn codemem_embeddings::EmbeddingProvider>>>,
367 CodememError,
368 > {
369 match self.embeddings.get_or_init(|| self.init_embeddings()) {
370 Some(m) => Ok(Some(m.lock().map_err(|e| {
371 CodememError::LockPoisoned(format!("embeddings: {e}"))
372 })?)),
373 None => Ok(None),
374 }
375 }
376
377 fn embeddings_ready(&self) -> bool {
379 self.embeddings.get().is_some_and(|opt| opt.is_some())
380 }
381
382 fn vector_ready(&self) -> bool {
384 self.vector.get().is_some()
385 }
386
387 fn bm25_ready(&self) -> bool {
389 self.bm25_index.get().is_some()
390 }
391
392 pub fn lock_index_cache(
393 &self,
394 ) -> Result<std::sync::MutexGuard<'_, Option<IndexCache>>, CodememError> {
395 self.index_cache
396 .lock()
397 .map_err(|e| CodememError::LockPoisoned(format!("index_cache: {e}")))
398 }
399
400 pub fn scoring_weights(
401 &self,
402 ) -> Result<std::sync::RwLockReadGuard<'_, ScoringWeights>, CodememError> {
403 self.scoring_weights
404 .read()
405 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights read: {e}")))
406 }
407
408 pub fn scoring_weights_mut(
409 &self,
410 ) -> Result<std::sync::RwLockWriteGuard<'_, ScoringWeights>, CodememError> {
411 self.scoring_weights
412 .write()
413 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights write: {e}")))
414 }
415
416 fn init_vector(&self) -> Mutex<Box<dyn VectorBackend>> {
420 let vector_config = self.config.vector.clone();
421 let mut vector = HnswIndex::new(vector_config.clone())
422 .unwrap_or_else(|_| HnswIndex::with_defaults().expect("default vector index"));
423
424 if let Some(ref db_path) = self.db_path {
425 let index_path = db_path.with_extension("idx");
426 if index_path.exists() {
427 if let Err(e) = vector.load(&index_path) {
428 tracing::warn!("Stale or corrupt vector index, will rebuild: {e}");
429 }
430 }
431
432 let vector_count = vector.stats().count;
434 if let Ok(db_stats) = self.storage.stats() {
435 let db_embed_count = db_stats.embedding_count;
436 if vector_count != db_embed_count {
437 tracing::warn!(
438 "Vector index ({vector_count}) out of sync with DB ({db_embed_count}), rebuilding..."
439 );
440 if let Ok(mut fresh) = HnswIndex::new(vector_config) {
441 if let Ok(embeddings) = self.storage.list_all_embeddings() {
442 for (id, emb) in &embeddings {
443 if let Err(e) = fresh.insert(id, emb) {
444 tracing::warn!("Failed to re-insert embedding {id}: {e}");
445 }
446 }
447 }
448 vector = fresh;
449 if let Err(e) = vector.save(&index_path) {
450 tracing::warn!("Failed to save rebuilt vector index: {e}");
451 }
452 }
453 }
454 }
455 }
456
457 Mutex::new(Box::new(vector))
458 }
459
460 fn init_bm25(&self) -> Mutex<Bm25Index> {
462 let mut bm25 = Bm25Index::new();
463
464 if let Some(ref db_path) = self.db_path {
465 let bm25_path = db_path.with_extension("bm25");
466 let mut loaded = false;
467 if bm25_path.exists() {
468 if let Ok(data) = std::fs::read(&bm25_path) {
469 if let Ok(index) = Bm25Index::deserialize(&data) {
470 tracing::info!(
471 "Loaded BM25 index from disk ({} documents)",
472 index.doc_count
473 );
474 bm25 = index;
475 loaded = true;
476 }
477 }
478 }
479 if !loaded {
480 if let Ok(ids) = self.storage.list_memory_ids() {
481 let id_refs: Vec<&str> = ids.iter().map(|s| s.as_str()).collect();
482 if let Ok(memories) = self.storage.get_memories_batch(&id_refs) {
483 for m in &memories {
484 bm25.add_document(&m.id, &m.content);
485 }
486 tracing::info!("Rebuilt BM25 index from {} memories", bm25.doc_count);
487 }
488 }
489 }
490 }
491
492 Mutex::new(bm25)
493 }
494
495 fn init_embeddings(&self) -> Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>> {
500 let provider = match codemem_embeddings::from_env(Some(&self.config.embedding)) {
501 Ok(p) => p,
502 Err(e) => {
503 tracing::warn!("Failed to initialize embedding provider: {e}");
504 return None;
505 }
506 };
507
508 self.backfill_embeddings(&*provider);
510
511 Some(Mutex::new(provider))
512 }
513
514 fn backfill_embeddings(&self, provider: &dyn codemem_embeddings::EmbeddingProvider) {
519 let ids = match self.storage.list_memory_ids() {
520 Ok(ids) => ids,
521 Err(_) => return,
522 };
523
524 let mut to_embed: Vec<(String, String)> = Vec::new();
525 for id in &ids {
526 if self.storage.get_embedding(id).ok().flatten().is_none() {
527 if let Ok(Some(mem)) = self.storage.get_memory_no_touch(id) {
528 let text = self.enrich_memory_text(
529 &mem.content,
530 mem.memory_type,
531 &mem.tags,
532 mem.namespace.as_deref(),
533 Some(&mem.id),
534 );
535 to_embed.push((id.clone(), text));
536 }
537 }
538 }
539
540 if to_embed.is_empty() {
541 return;
542 }
543
544 tracing::info!("Backfilling {} un-embedded memories", to_embed.len());
545 let text_refs: Vec<&str> = to_embed.iter().map(|(_, t)| t.as_str()).collect();
546 match provider.embed_batch(&text_refs) {
547 Ok(embeddings) => {
548 for ((id, _), emb) in to_embed.iter().zip(embeddings.iter()) {
549 let _ = self.storage.store_embedding(id, emb);
550 if let Some(vi_mutex) = self.vector.get() {
552 if let Ok(mut vi) = vi_mutex.lock().map_err(|e| {
553 tracing::warn!("Vector lock failed during backfill: {e}");
554 e
555 }) {
556 let _ = vi.insert(id, emb);
557 }
558 }
559 }
560 tracing::info!("Backfilled {} embeddings", to_embed.len());
561 }
562 Err(e) => tracing::warn!("Backfill embedding failed: {e}"),
563 }
564 }
565
566 pub fn set_active_session(&self, id: Option<String>) {
570 match self.active_session_id.write() {
571 Ok(mut guard) => *guard = id,
572 Err(e) => *e.into_inner() = id,
573 }
574 }
575
576 pub fn active_session_id(&self) -> Option<String> {
578 match self.active_session_id.read() {
579 Ok(guard) => guard.clone(),
580 Err(e) => e.into_inner().clone(),
581 }
582 }
583
584 pub fn set_scope(&self, scope: Option<codemem_core::ScopeContext>) {
588 match self.scope.write() {
589 Ok(mut guard) => *guard = scope,
590 Err(e) => *e.into_inner() = scope,
591 }
592 }
593
594 pub fn scope(&self) -> Option<codemem_core::ScopeContext> {
596 match self.scope.read() {
597 Ok(guard) => guard.clone(),
598 Err(e) => e.into_inner().clone(),
599 }
600 }
601
602 pub fn scope_namespace(&self) -> Option<String> {
604 self.scope().map(|s| s.namespace().to_string())
605 }
606
607 pub fn storage(&self) -> &dyn StorageBackend {
611 &*self.storage
612 }
613
614 pub fn has_embeddings(&self) -> bool {
619 match self.embeddings.get() {
620 Some(opt) => opt.is_some(),
621 None => !self.config.embedding.provider.is_empty(),
622 }
623 }
624
625 pub fn db_path(&self) -> Option<&Path> {
627 self.db_path.as_deref()
628 }
629
630 pub fn config(&self) -> &CodememConfig {
632 &self.config
633 }
634
635 pub fn metrics(&self) -> &Arc<InMemoryMetrics> {
637 &self.metrics
638 }
639
640 pub fn with_graph<F, R>(&self, f: F) -> Result<R, CodememError>
645 where
646 F: FnOnce(&dyn GraphBackend) -> R,
647 {
648 let guard = self.lock_graph()?;
649 Ok(f(&**guard))
650 }
651
652 pub fn with_vector<F, R>(&self, f: F) -> Result<R, CodememError>
655 where
656 F: FnOnce(&dyn VectorBackend) -> R,
657 {
658 let guard = self.lock_vector()?;
659 Ok(f(&**guard))
660 }
661
662 #[cfg(test)]
664 pub(crate) fn is_dirty(&self) -> bool {
665 self.dirty.load(Ordering::Acquire)
666 }
667
668 pub fn list_repos(&self) -> Result<Vec<codemem_core::Repository>, CodememError> {
672 self.storage.list_repos()
673 }
674
675 pub fn add_repo(&self, repo: &codemem_core::Repository) -> Result<(), CodememError> {
677 self.storage.add_repo(repo)
678 }
679
680 pub fn get_repo(&self, id: &str) -> Result<Option<codemem_core::Repository>, CodememError> {
682 self.storage.get_repo(id)
683 }
684
685 pub fn remove_repo(&self, id: &str) -> Result<bool, CodememError> {
687 self.storage.remove_repo(id)
688 }
689
690 pub fn update_repo_status(
692 &self,
693 id: &str,
694 status: &str,
695 indexed_at: Option<&str>,
696 ) -> Result<(), CodememError> {
697 self.storage.update_repo_status(id, status, indexed_at)
698 }
699}
700
701pub use file_indexing::{AnalyzeOptions, AnalyzeProgress, AnalyzeResult, SessionContext};
703
704pub use codemem_embeddings::from_env as embeddings_from_env;
707pub use codemem_embeddings::{EmbeddingProvider, EmbeddingService};