1use codemem_core::{
14 CodememConfig, CodememError, GraphBackend, ScoringWeights, StorageBackend, VectorBackend,
15};
16pub use codemem_storage::graph::GraphEngine;
17pub use codemem_storage::HnswIndex;
18pub use codemem_storage::Storage;
19use std::path::{Path, PathBuf};
20#[cfg(test)]
21use std::sync::atomic::Ordering;
22use std::sync::atomic::{AtomicBool, AtomicI64};
23use std::sync::{Arc, Mutex, OnceLock, RwLock};
24
25pub mod analysis;
26pub mod bm25;
27pub mod compress;
28pub mod consolidation;
29pub mod enrichment;
30mod enrichment_text;
31mod file_indexing;
32mod graph_linking;
33pub mod graph_ops;
34pub mod hooks;
35pub mod index;
36pub mod insights;
37mod memory_ops;
38pub mod metrics;
39pub mod patterns;
40pub mod pca;
41pub mod persistence;
42pub mod recall;
43pub mod review;
44pub mod scoring;
45pub mod search;
46pub mod watch;
47
48#[cfg(test)]
49#[path = "tests/engine_integration_tests.rs"]
50mod integration_tests;
51
52#[cfg(test)]
53#[path = "tests/enrichment_tests.rs"]
54mod enrichment_tests;
55
56#[cfg(test)]
57#[path = "tests/recall_tests.rs"]
58mod recall_tests;
59
60#[cfg(test)]
61#[path = "tests/search_tests.rs"]
62mod search_tests;
63
64#[cfg(test)]
65#[path = "tests/consolidation_tests.rs"]
66mod consolidation_tests;
67
68#[cfg(test)]
69#[path = "tests/analysis_tests.rs"]
70mod analysis_tests;
71
72#[cfg(test)]
73#[path = "tests/persistence_tests.rs"]
74mod persistence_tests;
75
76#[cfg(test)]
77#[path = "tests/memory_expiry_tests.rs"]
78mod memory_expiry_tests;
79
80#[cfg(test)]
81#[path = "tests/scope_tests.rs"]
82mod scope_tests;
83
84#[cfg(test)]
85#[path = "tests/graph_ops_tests.rs"]
86mod graph_ops_tests;
87
88pub use index::{
90 ChunkConfig, CodeChunk, CodeParser, Dependency, IndexAndResolveResult, IndexProgress,
91 IndexResult, Indexer, ManifestResult, ParseResult, Reference, ReferenceKind, ReferenceResolver,
92 ResolvedEdge, Symbol, SymbolKind, Visibility, Workspace,
93};
94
95pub use bm25::Bm25Index;
97pub use metrics::InMemoryMetrics;
98pub use review::{BlastRadiusReport, DiffSymbolMapping};
99
100pub use enrichment::{EnrichResult, EnrichmentPipelineResult};
102
103pub use persistence::{edge_weight_for, CrossRepoPersistResult, IndexPersistResult};
105
106pub use recall::{ExpandedResult, NamespaceStats, RecallQuery};
108
109pub use search::{CodeSearchResult, SummaryTreeNode, SymbolSearchResult};
111
112pub use analysis::{
114 DecisionChain, DecisionConnection, DecisionEntry, ImpactResult, SessionCheckpointReport,
115};
116
117#[derive(Debug, Clone)]
119pub struct SplitPart {
120 pub content: String,
121 pub tags: Option<Vec<String>>,
122 pub importance: Option<f64>,
123}
124
125pub struct IndexCache {
129 pub symbols: Vec<Symbol>,
130 pub chunks: Vec<CodeChunk>,
131 pub root_path: String,
132}
133
134pub struct CodememEngine {
148 pub(crate) storage: Box<dyn StorageBackend>,
149 pub(crate) vector: OnceLock<Mutex<Box<dyn VectorBackend>>>,
151 pub(crate) graph: Mutex<Box<dyn GraphBackend>>,
152 pub(crate) embeddings: OnceLock<Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>>>,
154 pub(crate) db_path: Option<PathBuf>,
156 pub(crate) index_cache: Mutex<Option<IndexCache>>,
158 pub(crate) scoring_weights: RwLock<ScoringWeights>,
160 pub(crate) bm25_index: OnceLock<Mutex<Bm25Index>>,
162 pub(crate) config: CodememConfig,
164 pub(crate) metrics: Arc<InMemoryMetrics>,
166 dirty: AtomicBool,
169 active_session_id: RwLock<Option<String>>,
171 scope: RwLock<Option<codemem_core::ScopeContext>>,
173 change_detector: Mutex<Option<index::incremental::ChangeDetector>>,
176 last_expiry_sweep: AtomicI64,
179}
180
181impl CodememEngine {
182 pub fn new(
184 storage: Box<dyn StorageBackend>,
185 vector: Box<dyn VectorBackend>,
186 graph: Box<dyn GraphBackend>,
187 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
188 ) -> Self {
189 let config = CodememConfig::load_or_default();
190 Self::new_with_config(storage, vector, graph, embeddings, config)
191 }
192
193 pub fn new_with_config(
195 storage: Box<dyn StorageBackend>,
196 vector: Box<dyn VectorBackend>,
197 graph: Box<dyn GraphBackend>,
198 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
199 config: CodememConfig,
200 ) -> Self {
201 let vector_lock = OnceLock::new();
202 let _ = vector_lock.set(Mutex::new(vector));
203 let embeddings_lock = OnceLock::new();
204 let _ = embeddings_lock.set(embeddings.map(Mutex::new));
205 let bm25_lock = OnceLock::new();
206 let _ = bm25_lock.set(Mutex::new(Bm25Index::new()));
207 Self {
208 storage,
209 vector: vector_lock,
210 graph: Mutex::new(graph),
211 embeddings: embeddings_lock,
212 db_path: None,
213 index_cache: Mutex::new(None),
214 scoring_weights: RwLock::new(config.scoring.clone()),
215 bm25_index: bm25_lock,
216 config,
217 metrics: Arc::new(InMemoryMetrics::new()),
218 dirty: AtomicBool::new(false),
219 active_session_id: RwLock::new(None),
220 scope: RwLock::new(None),
221 change_detector: Mutex::new(None),
222 last_expiry_sweep: AtomicI64::new(0),
223 }
224 }
225
226 pub fn from_db_path(db_path: &Path) -> Result<Self, CodememError> {
234 if let Some(parent) = db_path.parent() {
236 if !parent.exists() {
237 std::fs::create_dir_all(parent).map_err(|e| {
238 CodememError::Storage(format!(
239 "Failed to create database directory {}: {e}",
240 parent.display()
241 ))
242 })?;
243 }
244 }
245
246 let config = CodememConfig::load_or_default();
247
248 if !config.storage.backend.eq_ignore_ascii_case("sqlite") {
251 return Err(CodememError::Config(format!(
252 "Unsupported storage backend '{}'. Only 'sqlite' is available in this build.",
253 config.storage.backend
254 )));
255 }
256 if !config.vector.backend.eq_ignore_ascii_case("hnsw") {
257 return Err(CodememError::Config(format!(
258 "Unsupported vector backend '{}'. Only 'hnsw' is available in this build.",
259 config.vector.backend
260 )));
261 }
262 if !config.graph.backend.eq_ignore_ascii_case("petgraph") {
263 return Err(CodememError::Config(format!(
264 "Unsupported graph backend '{}'. Only 'petgraph' is available in this build.",
265 config.graph.backend
266 )));
267 }
268
269 let storage = Storage::open_with_config(
271 db_path,
272 Some(config.storage.cache_size_mb),
273 Some(config.storage.busy_timeout_secs),
274 )?;
275
276 let graph = GraphEngine::from_storage(&storage)?;
278
279 let engine = Self {
280 storage: Box::new(storage),
281 vector: OnceLock::new(),
282 graph: Mutex::new(Box::new(graph)),
283 embeddings: OnceLock::new(),
284 db_path: Some(db_path.to_path_buf()),
285 index_cache: Mutex::new(None),
286 scoring_weights: RwLock::new(config.scoring.clone()),
287 bm25_index: OnceLock::new(),
288 config,
289 metrics: Arc::new(InMemoryMetrics::new()),
290 dirty: AtomicBool::new(false),
291 active_session_id: RwLock::new(None),
292 scope: RwLock::new(None),
293 change_detector: Mutex::new(None),
294 last_expiry_sweep: AtomicI64::new(0),
295 };
296
297 engine
300 .lock_graph()?
301 .recompute_centrality_with_options(false);
302
303 Ok(engine)
304 }
305
306 pub fn for_testing() -> Self {
308 let storage = Storage::open_in_memory().unwrap();
309 let graph = GraphEngine::new();
310 let config = CodememConfig::default();
311 let vector_lock = OnceLock::new();
312 let _ = vector_lock.set(Mutex::new(
313 Box::new(HnswIndex::with_defaults().unwrap()) as Box<dyn VectorBackend>
314 ));
315 let embeddings_lock = OnceLock::new();
316 let _ = embeddings_lock.set(None);
317 let bm25_lock = OnceLock::new();
318 let _ = bm25_lock.set(Mutex::new(Bm25Index::new()));
319 Self {
320 storage: Box::new(storage),
321 vector: vector_lock,
322 graph: Mutex::new(Box::new(graph)),
323 embeddings: embeddings_lock,
324 db_path: None,
325 index_cache: Mutex::new(None),
326 scoring_weights: RwLock::new(config.scoring.clone()),
327 bm25_index: bm25_lock,
328 config,
329 metrics: Arc::new(InMemoryMetrics::new()),
330 dirty: AtomicBool::new(false),
331 active_session_id: RwLock::new(None),
332 scope: RwLock::new(None),
333 change_detector: Mutex::new(None),
334 last_expiry_sweep: AtomicI64::new(0),
335 }
336 }
337
338 pub fn lock_vector(
341 &self,
342 ) -> Result<std::sync::MutexGuard<'_, Box<dyn VectorBackend>>, CodememError> {
343 self.vector
344 .get_or_init(|| self.init_vector())
345 .lock()
346 .map_err(|e| CodememError::LockPoisoned(format!("vector: {e}")))
347 }
348
349 pub fn lock_graph(
350 &self,
351 ) -> Result<std::sync::MutexGuard<'_, Box<dyn GraphBackend>>, CodememError> {
352 self.graph
353 .lock()
354 .map_err(|e| CodememError::LockPoisoned(format!("graph: {e}")))
355 }
356
357 pub fn lock_bm25(&self) -> Result<std::sync::MutexGuard<'_, Bm25Index>, CodememError> {
358 self.bm25_index
359 .get_or_init(|| self.init_bm25())
360 .lock()
361 .map_err(|e| CodememError::LockPoisoned(format!("bm25: {e}")))
362 }
363
364 pub fn lock_embeddings(
368 &self,
369 ) -> Result<
370 Option<std::sync::MutexGuard<'_, Box<dyn codemem_embeddings::EmbeddingProvider>>>,
371 CodememError,
372 > {
373 match self.embeddings.get_or_init(|| self.init_embeddings()) {
374 Some(m) => Ok(Some(m.lock().map_err(|e| {
375 CodememError::LockPoisoned(format!("embeddings: {e}"))
376 })?)),
377 None => Ok(None),
378 }
379 }
380
381 fn embeddings_ready(&self) -> bool {
383 self.embeddings.get().is_some_and(|opt| opt.is_some())
384 }
385
386 fn vector_ready(&self) -> bool {
388 self.vector.get().is_some()
389 }
390
391 fn bm25_ready(&self) -> bool {
393 self.bm25_index.get().is_some()
394 }
395
396 pub fn lock_index_cache(
397 &self,
398 ) -> Result<std::sync::MutexGuard<'_, Option<IndexCache>>, CodememError> {
399 self.index_cache
400 .lock()
401 .map_err(|e| CodememError::LockPoisoned(format!("index_cache: {e}")))
402 }
403
404 pub fn scoring_weights(
405 &self,
406 ) -> Result<std::sync::RwLockReadGuard<'_, ScoringWeights>, CodememError> {
407 self.scoring_weights
408 .read()
409 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights read: {e}")))
410 }
411
412 pub fn scoring_weights_mut(
413 &self,
414 ) -> Result<std::sync::RwLockWriteGuard<'_, ScoringWeights>, CodememError> {
415 self.scoring_weights
416 .write()
417 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights write: {e}")))
418 }
419
420 fn init_vector(&self) -> Mutex<Box<dyn VectorBackend>> {
424 let vector_config = self.config.vector.clone();
425 let mut vector = HnswIndex::new(vector_config.clone())
426 .unwrap_or_else(|_| HnswIndex::with_defaults().expect("default vector index"));
427
428 if let Some(ref db_path) = self.db_path {
429 let index_path = db_path.with_extension("idx");
430 if index_path.exists() {
431 if let Err(e) = vector.load(&index_path) {
432 tracing::warn!("Stale or corrupt vector index, will rebuild: {e}");
433 }
434 }
435
436 let vector_count = vector.stats().count;
438 if let Ok(db_stats) = self.storage.stats() {
439 let db_embed_count = db_stats.embedding_count;
440 if vector_count != db_embed_count {
441 tracing::warn!(
442 "Vector index ({vector_count}) out of sync with DB ({db_embed_count}), rebuilding..."
443 );
444 if let Ok(mut fresh) = HnswIndex::new(vector_config) {
445 if let Ok(embeddings) = self.storage.list_all_embeddings() {
446 for (id, emb) in &embeddings {
447 if let Err(e) = fresh.insert(id, emb) {
448 tracing::warn!("Failed to re-insert embedding {id}: {e}");
449 }
450 }
451 }
452 vector = fresh;
453 if let Err(e) = vector.save(&index_path) {
454 tracing::warn!("Failed to save rebuilt vector index: {e}");
455 }
456 }
457 }
458 }
459 }
460
461 Mutex::new(Box::new(vector))
462 }
463
464 fn init_bm25(&self) -> Mutex<Bm25Index> {
466 let mut bm25 = Bm25Index::new();
467
468 if let Some(ref db_path) = self.db_path {
469 let bm25_path = db_path.with_extension("bm25");
470 let mut loaded = false;
471 if bm25_path.exists() {
472 if let Ok(data) = std::fs::read(&bm25_path) {
473 if let Ok(index) = Bm25Index::deserialize(&data) {
474 tracing::info!(
475 "Loaded BM25 index from disk ({} documents)",
476 index.doc_count
477 );
478 bm25 = index;
479 loaded = true;
480 }
481 }
482 }
483 if !loaded {
484 if let Ok(ids) = self.storage.list_memory_ids() {
485 let id_refs: Vec<&str> = ids.iter().map(|s| s.as_str()).collect();
486 if let Ok(memories) = self.storage.get_memories_batch(&id_refs) {
487 for m in &memories {
488 bm25.add_document(&m.id, &m.content);
489 }
490 tracing::info!("Rebuilt BM25 index from {} memories", bm25.doc_count);
491 }
492 }
493 }
494 }
495
496 Mutex::new(bm25)
497 }
498
499 fn init_embeddings(&self) -> Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>> {
504 let provider = match codemem_embeddings::from_env(Some(&self.config.embedding)) {
505 Ok(p) => p,
506 Err(e) => {
507 tracing::warn!("Failed to initialize embedding provider: {e}");
508 return None;
509 }
510 };
511
512 self.backfill_embeddings(&*provider);
514
515 Some(Mutex::new(provider))
516 }
517
518 fn backfill_embeddings(&self, provider: &dyn codemem_embeddings::EmbeddingProvider) {
523 let ids = match self.storage.list_memory_ids() {
524 Ok(ids) => ids,
525 Err(_) => return,
526 };
527
528 let mut to_embed: Vec<(String, String)> = Vec::new();
529 for id in &ids {
530 if self.storage.get_embedding(id).ok().flatten().is_none() {
531 if let Ok(Some(mem)) = self.storage.get_memory_no_touch(id) {
532 let text = self.enrich_memory_text(
533 &mem.content,
534 mem.memory_type,
535 &mem.tags,
536 mem.namespace.as_deref(),
537 Some(&mem.id),
538 );
539 to_embed.push((id.clone(), text));
540 }
541 }
542 }
543
544 if to_embed.is_empty() {
545 return;
546 }
547
548 tracing::info!("Backfilling {} un-embedded memories", to_embed.len());
549 let text_refs: Vec<&str> = to_embed.iter().map(|(_, t)| t.as_str()).collect();
550 match provider.embed_batch(&text_refs) {
551 Ok(embeddings) => {
552 for ((id, _), emb) in to_embed.iter().zip(embeddings.iter()) {
553 let _ = self.storage.store_embedding(id, emb);
554 if let Some(vi_mutex) = self.vector.get() {
556 if let Ok(mut vi) = vi_mutex.lock().map_err(|e| {
557 tracing::warn!("Vector lock failed during backfill: {e}");
558 e
559 }) {
560 let _ = vi.insert(id, emb);
561 }
562 }
563 }
564 tracing::info!("Backfilled {} embeddings", to_embed.len());
565 }
566 Err(e) => tracing::warn!("Backfill embedding failed: {e}"),
567 }
568 }
569
570 pub fn set_active_session(&self, id: Option<String>) {
574 match self.active_session_id.write() {
575 Ok(mut guard) => *guard = id,
576 Err(e) => *e.into_inner() = id,
577 }
578 }
579
580 pub fn active_session_id(&self) -> Option<String> {
582 match self.active_session_id.read() {
583 Ok(guard) => guard.clone(),
584 Err(e) => e.into_inner().clone(),
585 }
586 }
587
588 pub fn set_scope(&self, scope: Option<codemem_core::ScopeContext>) {
592 match self.scope.write() {
593 Ok(mut guard) => *guard = scope,
594 Err(e) => *e.into_inner() = scope,
595 }
596 }
597
598 pub fn scope(&self) -> Option<codemem_core::ScopeContext> {
600 match self.scope.read() {
601 Ok(guard) => guard.clone(),
602 Err(e) => e.into_inner().clone(),
603 }
604 }
605
606 pub fn scope_namespace(&self) -> Option<String> {
608 self.scope().map(|s| s.namespace().to_string())
609 }
610
611 pub fn storage(&self) -> &dyn StorageBackend {
615 &*self.storage
616 }
617
618 pub fn has_embeddings(&self) -> bool {
623 match self.embeddings.get() {
624 Some(opt) => opt.is_some(),
625 None => !self.config.embedding.provider.is_empty(),
626 }
627 }
628
629 pub fn db_path(&self) -> Option<&Path> {
631 self.db_path.as_deref()
632 }
633
634 pub fn config(&self) -> &CodememConfig {
636 &self.config
637 }
638
639 pub fn metrics(&self) -> &Arc<InMemoryMetrics> {
641 &self.metrics
642 }
643
644 pub fn with_graph<F, R>(&self, f: F) -> Result<R, CodememError>
649 where
650 F: FnOnce(&dyn GraphBackend) -> R,
651 {
652 let guard = self.lock_graph()?;
653 Ok(f(&**guard))
654 }
655
656 pub fn with_vector<F, R>(&self, f: F) -> Result<R, CodememError>
659 where
660 F: FnOnce(&dyn VectorBackend) -> R,
661 {
662 let guard = self.lock_vector()?;
663 Ok(f(&**guard))
664 }
665
666 #[cfg(test)]
668 pub(crate) fn is_dirty(&self) -> bool {
669 self.dirty.load(Ordering::Acquire)
670 }
671
672 pub fn list_repos(&self) -> Result<Vec<codemem_core::Repository>, CodememError> {
676 self.storage.list_repos()
677 }
678
679 pub fn add_repo(&self, repo: &codemem_core::Repository) -> Result<(), CodememError> {
681 self.storage.add_repo(repo)
682 }
683
684 pub fn get_repo(&self, id: &str) -> Result<Option<codemem_core::Repository>, CodememError> {
686 self.storage.get_repo(id)
687 }
688
689 pub fn remove_repo(&self, id: &str) -> Result<bool, CodememError> {
691 self.storage.remove_repo(id)
692 }
693
694 pub fn update_repo_status(
696 &self,
697 id: &str,
698 status: &str,
699 indexed_at: Option<&str>,
700 ) -> Result<(), CodememError> {
701 self.storage.update_repo_status(id, status, indexed_at)
702 }
703}
704
705pub use file_indexing::{AnalyzeOptions, AnalyzeProgress, AnalyzeResult, SessionContext};
707
708pub use codemem_embeddings::from_env as embeddings_from_env;
711pub use codemem_embeddings::{EmbeddingProvider, EmbeddingService};