1use codemem_core::{
14 CodememConfig, CodememError, ScoringWeights, StorageBackend, VectorBackend, VectorConfig,
15};
16pub use codemem_storage::graph::GraphEngine;
17pub use codemem_storage::HnswIndex;
18pub use codemem_storage::Storage;
19use std::path::{Path, PathBuf};
20use std::sync::atomic::AtomicBool;
21#[cfg(test)]
22use std::sync::atomic::Ordering;
23use std::sync::{Arc, Mutex, OnceLock, RwLock};
24
25pub mod analysis;
26pub mod bm25;
27pub mod compress;
28pub mod consolidation;
29pub mod enrichment;
30mod enrichment_text;
31mod file_indexing;
32mod graph_linking;
33pub mod graph_ops;
34pub mod hooks;
35pub mod index;
36pub mod insights;
37mod memory_ops;
38pub mod metrics;
39pub mod patterns;
40pub mod pca;
41pub mod persistence;
42pub mod recall;
43pub mod scoring;
44pub mod search;
45pub mod watch;
46
47#[cfg(test)]
48#[path = "tests/engine_integration_tests.rs"]
49mod integration_tests;
50
51#[cfg(test)]
52#[path = "tests/enrichment_tests.rs"]
53mod enrichment_tests;
54
55#[cfg(test)]
56#[path = "tests/recall_tests.rs"]
57mod recall_tests;
58
59#[cfg(test)]
60#[path = "tests/search_tests.rs"]
61mod search_tests;
62
63#[cfg(test)]
64#[path = "tests/consolidation_tests.rs"]
65mod consolidation_tests;
66
67#[cfg(test)]
68#[path = "tests/analysis_tests.rs"]
69mod analysis_tests;
70
71#[cfg(test)]
72#[path = "tests/persistence_tests.rs"]
73mod persistence_tests;
74
75pub use index::{
77 ChunkConfig, CodeChunk, CodeParser, Dependency, IndexAndResolveResult, IndexProgress,
78 IndexResult, Indexer, ManifestResult, ParseResult, Reference, ReferenceKind, ReferenceResolver,
79 ResolvedEdge, Symbol, SymbolKind, Visibility, Workspace,
80};
81
82pub use bm25::Bm25Index;
84pub use metrics::InMemoryMetrics;
85
86pub use enrichment::{EnrichResult, EnrichmentPipelineResult};
88
89pub use persistence::{edge_weight_for, IndexPersistResult};
91
92pub use recall::{ExpandedResult, NamespaceStats, RecallQuery};
94
95pub use search::{CodeSearchResult, SummaryTreeNode, SymbolSearchResult};
97
98pub use analysis::{
100 DecisionChain, DecisionConnection, DecisionEntry, ImpactResult, SessionCheckpointReport,
101};
102
103#[derive(Debug, Clone)]
105pub struct SplitPart {
106 pub content: String,
107 pub tags: Option<Vec<String>>,
108 pub importance: Option<f64>,
109}
110
111pub struct IndexCache {
115 pub symbols: Vec<Symbol>,
116 pub chunks: Vec<CodeChunk>,
117 pub root_path: String,
118}
119
120pub struct CodememEngine {
136 pub(crate) storage: Box<dyn StorageBackend>,
137 pub(crate) vector: OnceLock<Mutex<HnswIndex>>,
139 pub(crate) graph: Mutex<GraphEngine>,
140 pub(crate) embeddings: OnceLock<Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>>>,
142 pub(crate) db_path: Option<PathBuf>,
144 pub(crate) index_cache: Mutex<Option<IndexCache>>,
146 pub(crate) scoring_weights: RwLock<ScoringWeights>,
148 pub(crate) bm25_index: OnceLock<Mutex<Bm25Index>>,
150 pub(crate) config: CodememConfig,
152 pub(crate) metrics: Arc<InMemoryMetrics>,
154 dirty: AtomicBool,
157 active_session_id: RwLock<Option<String>>,
159 change_detector: Mutex<Option<index::incremental::ChangeDetector>>,
162}
163
164impl CodememEngine {
165 pub fn new(
167 storage: Box<dyn StorageBackend>,
168 vector: HnswIndex,
169 graph: GraphEngine,
170 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
171 ) -> Self {
172 let config = CodememConfig::load_or_default();
173 Self::new_with_config(storage, vector, graph, embeddings, config)
174 }
175
176 pub fn new_with_config(
178 storage: Box<dyn StorageBackend>,
179 vector: HnswIndex,
180 graph: GraphEngine,
181 embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
182 config: CodememConfig,
183 ) -> Self {
184 let vector_lock = OnceLock::new();
185 let _ = vector_lock.set(Mutex::new(vector));
186 let embeddings_lock = OnceLock::new();
187 let _ = embeddings_lock.set(embeddings.map(Mutex::new));
188 let bm25_lock = OnceLock::new();
189 let _ = bm25_lock.set(Mutex::new(Bm25Index::new()));
190 Self {
191 storage,
192 vector: vector_lock,
193 graph: Mutex::new(graph),
194 embeddings: embeddings_lock,
195 db_path: None,
196 index_cache: Mutex::new(None),
197 scoring_weights: RwLock::new(config.scoring.clone()),
198 bm25_index: bm25_lock,
199 config,
200 metrics: Arc::new(InMemoryMetrics::new()),
201 dirty: AtomicBool::new(false),
202 active_session_id: RwLock::new(None),
203 change_detector: Mutex::new(None),
204 }
205 }
206
207 pub fn from_db_path(db_path: &Path) -> Result<Self, CodememError> {
215 if let Some(parent) = db_path.parent() {
217 if !parent.exists() {
218 std::fs::create_dir_all(parent).map_err(|e| {
219 CodememError::Storage(format!(
220 "Failed to create database directory {}: {e}",
221 parent.display()
222 ))
223 })?;
224 }
225 }
226
227 let config = CodememConfig::load_or_default();
228
229 let storage = Storage::open_with_config(
231 db_path,
232 Some(config.storage.cache_size_mb),
233 Some(config.storage.busy_timeout_secs),
234 )?;
235
236 let graph = GraphEngine::from_storage(&storage)?;
238
239 let engine = Self {
240 storage: Box::new(storage),
241 vector: OnceLock::new(),
242 graph: Mutex::new(graph),
243 embeddings: OnceLock::new(),
244 db_path: Some(db_path.to_path_buf()),
245 index_cache: Mutex::new(None),
246 scoring_weights: RwLock::new(config.scoring.clone()),
247 bm25_index: OnceLock::new(),
248 config,
249 metrics: Arc::new(InMemoryMetrics::new()),
250 dirty: AtomicBool::new(false),
251 active_session_id: RwLock::new(None),
252 change_detector: Mutex::new(None),
253 };
254
255 engine
258 .lock_graph()?
259 .recompute_centrality_with_options(false);
260
261 Ok(engine)
262 }
263
264 pub fn for_testing() -> Self {
266 let storage = Storage::open_in_memory().unwrap();
267 let graph = GraphEngine::new();
268 let config = CodememConfig::default();
269 let vector_lock = OnceLock::new();
270 let _ = vector_lock.set(Mutex::new(HnswIndex::with_defaults().unwrap()));
271 let embeddings_lock = OnceLock::new();
272 let _ = embeddings_lock.set(None);
273 let bm25_lock = OnceLock::new();
274 let _ = bm25_lock.set(Mutex::new(Bm25Index::new()));
275 Self {
276 storage: Box::new(storage),
277 vector: vector_lock,
278 graph: Mutex::new(graph),
279 embeddings: embeddings_lock,
280 db_path: None,
281 index_cache: Mutex::new(None),
282 scoring_weights: RwLock::new(config.scoring.clone()),
283 bm25_index: bm25_lock,
284 config,
285 metrics: Arc::new(InMemoryMetrics::new()),
286 dirty: AtomicBool::new(false),
287 active_session_id: RwLock::new(None),
288 change_detector: Mutex::new(None),
289 }
290 }
291
292 pub fn lock_vector(&self) -> Result<std::sync::MutexGuard<'_, HnswIndex>, CodememError> {
295 self.vector
296 .get_or_init(|| self.init_vector())
297 .lock()
298 .map_err(|e| CodememError::LockPoisoned(format!("vector: {e}")))
299 }
300
301 pub fn lock_graph(&self) -> Result<std::sync::MutexGuard<'_, GraphEngine>, CodememError> {
302 self.graph
303 .lock()
304 .map_err(|e| CodememError::LockPoisoned(format!("graph: {e}")))
305 }
306
307 pub fn lock_bm25(&self) -> Result<std::sync::MutexGuard<'_, Bm25Index>, CodememError> {
308 self.bm25_index
309 .get_or_init(|| self.init_bm25())
310 .lock()
311 .map_err(|e| CodememError::LockPoisoned(format!("bm25: {e}")))
312 }
313
314 pub fn lock_embeddings(
318 &self,
319 ) -> Result<
320 Option<std::sync::MutexGuard<'_, Box<dyn codemem_embeddings::EmbeddingProvider>>>,
321 CodememError,
322 > {
323 match self.embeddings.get_or_init(|| self.init_embeddings()) {
324 Some(m) => Ok(Some(m.lock().map_err(|e| {
325 CodememError::LockPoisoned(format!("embeddings: {e}"))
326 })?)),
327 None => Ok(None),
328 }
329 }
330
331 fn embeddings_ready(&self) -> bool {
333 self.embeddings.get().is_some_and(|opt| opt.is_some())
334 }
335
336 fn vector_ready(&self) -> bool {
338 self.vector.get().is_some()
339 }
340
341 fn bm25_ready(&self) -> bool {
343 self.bm25_index.get().is_some()
344 }
345
346 pub fn lock_index_cache(
347 &self,
348 ) -> Result<std::sync::MutexGuard<'_, Option<IndexCache>>, CodememError> {
349 self.index_cache
350 .lock()
351 .map_err(|e| CodememError::LockPoisoned(format!("index_cache: {e}")))
352 }
353
354 pub fn scoring_weights(
355 &self,
356 ) -> Result<std::sync::RwLockReadGuard<'_, ScoringWeights>, CodememError> {
357 self.scoring_weights
358 .read()
359 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights read: {e}")))
360 }
361
362 pub fn scoring_weights_mut(
363 &self,
364 ) -> Result<std::sync::RwLockWriteGuard<'_, ScoringWeights>, CodememError> {
365 self.scoring_weights
366 .write()
367 .map_err(|e| CodememError::LockPoisoned(format!("scoring_weights write: {e}")))
368 }
369
370 fn init_vector(&self) -> Mutex<HnswIndex> {
374 let vector_config = VectorConfig {
375 dimensions: self.config.vector.dimensions,
376 ..VectorConfig::default()
377 };
378 let mut vector = HnswIndex::new(vector_config.clone())
379 .unwrap_or_else(|_| HnswIndex::with_defaults().expect("default vector index"));
380
381 if let Some(ref db_path) = self.db_path {
382 let index_path = db_path.with_extension("idx");
383 if index_path.exists() {
384 if let Err(e) = vector.load(&index_path) {
385 tracing::warn!("Stale or corrupt vector index, will rebuild: {e}");
386 }
387 }
388
389 let vector_count = vector.stats().count;
391 if let Ok(db_stats) = self.storage.stats() {
392 let db_embed_count = db_stats.embedding_count;
393 if vector_count != db_embed_count {
394 tracing::warn!(
395 "Vector index ({vector_count}) out of sync with DB ({db_embed_count}), rebuilding..."
396 );
397 if let Ok(mut fresh) = HnswIndex::new(vector_config) {
398 if let Ok(embeddings) = self.storage.list_all_embeddings() {
399 for (id, emb) in &embeddings {
400 if let Err(e) = fresh.insert(id, emb) {
401 tracing::warn!("Failed to re-insert embedding {id}: {e}");
402 }
403 }
404 }
405 vector = fresh;
406 if let Err(e) = vector.save(&index_path) {
407 tracing::warn!("Failed to save rebuilt vector index: {e}");
408 }
409 }
410 }
411 }
412 }
413
414 Mutex::new(vector)
415 }
416
417 fn init_bm25(&self) -> Mutex<Bm25Index> {
419 let mut bm25 = Bm25Index::new();
420
421 if let Some(ref db_path) = self.db_path {
422 let bm25_path = db_path.with_extension("bm25");
423 let mut loaded = false;
424 if bm25_path.exists() {
425 if let Ok(data) = std::fs::read(&bm25_path) {
426 if let Ok(index) = Bm25Index::deserialize(&data) {
427 tracing::info!(
428 "Loaded BM25 index from disk ({} documents)",
429 index.doc_count
430 );
431 bm25 = index;
432 loaded = true;
433 }
434 }
435 }
436 if !loaded {
437 if let Ok(ids) = self.storage.list_memory_ids() {
438 let id_refs: Vec<&str> = ids.iter().map(|s| s.as_str()).collect();
439 if let Ok(memories) = self.storage.get_memories_batch(&id_refs) {
440 for m in &memories {
441 bm25.add_document(&m.id, &m.content);
442 }
443 tracing::info!("Rebuilt BM25 index from {} memories", bm25.doc_count);
444 }
445 }
446 }
447 }
448
449 Mutex::new(bm25)
450 }
451
452 fn init_embeddings(&self) -> Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>> {
457 let provider = match codemem_embeddings::from_env(Some(&self.config.embedding)) {
458 Ok(p) => p,
459 Err(e) => {
460 tracing::warn!("Failed to initialize embedding provider: {e}");
461 return None;
462 }
463 };
464
465 self.backfill_embeddings(&*provider);
467
468 Some(Mutex::new(provider))
469 }
470
471 fn backfill_embeddings(&self, provider: &dyn codemem_embeddings::EmbeddingProvider) {
476 let ids = match self.storage.list_memory_ids() {
477 Ok(ids) => ids,
478 Err(_) => return,
479 };
480
481 let mut to_embed: Vec<(String, String)> = Vec::new();
482 for id in &ids {
483 if self.storage.get_embedding(id).ok().flatten().is_none() {
484 if let Ok(Some(mem)) = self.storage.get_memory_no_touch(id) {
485 let text = self.enrich_memory_text(
486 &mem.content,
487 mem.memory_type,
488 &mem.tags,
489 mem.namespace.as_deref(),
490 Some(&mem.id),
491 );
492 to_embed.push((id.clone(), text));
493 }
494 }
495 }
496
497 if to_embed.is_empty() {
498 return;
499 }
500
501 tracing::info!("Backfilling {} un-embedded memories", to_embed.len());
502 let text_refs: Vec<&str> = to_embed.iter().map(|(_, t)| t.as_str()).collect();
503 match provider.embed_batch(&text_refs) {
504 Ok(embeddings) => {
505 for ((id, _), emb) in to_embed.iter().zip(embeddings.iter()) {
506 let _ = self.storage.store_embedding(id, emb);
507 if let Some(vi_mutex) = self.vector.get() {
509 if let Ok(mut vi) = vi_mutex.lock().map_err(|e| {
510 tracing::warn!("Vector lock failed during backfill: {e}");
511 e
512 }) {
513 let _ = vi.insert(id, emb);
514 }
515 }
516 }
517 tracing::info!("Backfilled {} embeddings", to_embed.len());
518 }
519 Err(e) => tracing::warn!("Backfill embedding failed: {e}"),
520 }
521 }
522
523 pub fn set_active_session(&self, id: Option<String>) {
527 match self.active_session_id.write() {
528 Ok(mut guard) => *guard = id,
529 Err(e) => *e.into_inner() = id,
530 }
531 }
532
533 pub fn active_session_id(&self) -> Option<String> {
535 match self.active_session_id.read() {
536 Ok(guard) => guard.clone(),
537 Err(e) => e.into_inner().clone(),
538 }
539 }
540
541 pub fn storage(&self) -> &dyn StorageBackend {
545 &*self.storage
546 }
547
548 pub fn has_embeddings(&self) -> bool {
553 match self.embeddings.get() {
554 Some(opt) => opt.is_some(),
555 None => !self.config.embedding.provider.is_empty(),
556 }
557 }
558
559 pub fn db_path(&self) -> Option<&Path> {
561 self.db_path.as_deref()
562 }
563
564 pub fn config(&self) -> &CodememConfig {
566 &self.config
567 }
568
569 pub fn metrics(&self) -> &Arc<InMemoryMetrics> {
571 &self.metrics
572 }
573
574 pub fn with_graph<F, R>(&self, f: F) -> Result<R, CodememError>
579 where
580 F: FnOnce(&GraphEngine) -> R,
581 {
582 let guard = self.lock_graph()?;
583 Ok(f(&guard))
584 }
585
586 pub fn with_vector<F, R>(&self, f: F) -> Result<R, CodememError>
589 where
590 F: FnOnce(&HnswIndex) -> R,
591 {
592 let guard = self.lock_vector()?;
593 Ok(f(&guard))
594 }
595
596 #[cfg(test)]
598 pub(crate) fn is_dirty(&self) -> bool {
599 self.dirty.load(Ordering::Acquire)
600 }
601
602 pub fn list_repos(&self) -> Result<Vec<codemem_core::Repository>, CodememError> {
606 self.storage.list_repos()
607 }
608
609 pub fn add_repo(&self, repo: &codemem_core::Repository) -> Result<(), CodememError> {
611 self.storage.add_repo(repo)
612 }
613
614 pub fn get_repo(&self, id: &str) -> Result<Option<codemem_core::Repository>, CodememError> {
616 self.storage.get_repo(id)
617 }
618
619 pub fn remove_repo(&self, id: &str) -> Result<bool, CodememError> {
621 self.storage.remove_repo(id)
622 }
623
624 pub fn update_repo_status(
626 &self,
627 id: &str,
628 status: &str,
629 indexed_at: Option<&str>,
630 ) -> Result<(), CodememError> {
631 self.storage.update_repo_status(id, status, indexed_at)
632 }
633}
634
635pub use file_indexing::{AnalyzeOptions, AnalyzeProgress, AnalyzeResult, SessionContext};
637
638pub use codemem_embeddings::from_env as embeddings_from_env;
641pub use codemem_embeddings::{EmbeddingProvider, EmbeddingService};