use codemem_core::{
CodememConfig, CodememError, GraphBackend, ScoringWeights, StorageBackend, VectorBackend,
};
pub use codemem_storage::graph::GraphEngine;
pub use codemem_storage::HnswIndex;
pub use codemem_storage::Storage;
use std::path::{Path, PathBuf};
#[cfg(test)]
use std::sync::atomic::Ordering;
use std::sync::atomic::{AtomicBool, AtomicI64};
use std::sync::{Arc, Mutex, OnceLock, RwLock};
pub mod analysis;
pub mod bm25;
pub mod compress;
pub mod consolidation;
pub mod enrichment;
mod enrichment_text;
mod file_indexing;
mod graph_linking;
pub mod graph_ops;
pub mod hooks;
pub mod index;
pub mod insights;
mod memory_ops;
pub mod metrics;
pub mod patterns;
pub mod pca;
pub mod persistence;
pub mod recall;
pub mod review;
pub mod scoring;
pub mod search;
pub mod watch;
#[cfg(test)]
#[path = "tests/engine_integration_tests.rs"]
mod integration_tests;
#[cfg(test)]
#[path = "tests/enrichment_tests.rs"]
mod enrichment_tests;
#[cfg(test)]
#[path = "tests/recall_tests.rs"]
mod recall_tests;
#[cfg(test)]
#[path = "tests/search_tests.rs"]
mod search_tests;
#[cfg(test)]
#[path = "tests/consolidation_tests.rs"]
mod consolidation_tests;
#[cfg(test)]
#[path = "tests/analysis_tests.rs"]
mod analysis_tests;
#[cfg(test)]
#[path = "tests/persistence_tests.rs"]
mod persistence_tests;
#[cfg(test)]
#[path = "tests/memory_expiry_tests.rs"]
mod memory_expiry_tests;
#[cfg(test)]
#[path = "tests/scope_tests.rs"]
mod scope_tests;
#[cfg(test)]
#[path = "tests/graph_ops_tests.rs"]
mod graph_ops_tests;
pub use index::{
ChunkConfig, CodeChunk, CodeParser, Dependency, IndexAndResolveResult, IndexProgress,
IndexResult, Indexer, ManifestResult, ParseResult, Reference, ReferenceKind, ReferenceResolver,
ResolvedEdge, Symbol, SymbolKind, Visibility, Workspace,
};
pub use bm25::Bm25Index;
pub use metrics::InMemoryMetrics;
pub use review::{BlastRadiusReport, DiffSymbolMapping, MissingCoChange};
pub use enrichment::{EnrichResult, EnrichmentPipelineResult};
pub use persistence::{edge_weight_for, CrossRepoPersistResult, IndexPersistResult};
pub use recall::{ExpandedResult, NamespaceStats, RecallQuery};
pub use search::{CodeSearchResult, SummaryTreeNode, SymbolSearchResult};
pub use analysis::{
DecisionChain, DecisionConnection, DecisionEntry, ImpactResult, SessionCheckpointReport,
};
#[derive(Debug, Clone)]
pub struct SplitPart {
pub content: String,
pub tags: Option<Vec<String>>,
pub importance: Option<f64>,
}
pub struct IndexCache {
pub symbols: Vec<Symbol>,
pub chunks: Vec<CodeChunk>,
pub root_path: String,
}
pub struct CodememEngine {
pub(crate) storage: Box<dyn StorageBackend>,
pub(crate) vector: OnceLock<Mutex<Box<dyn VectorBackend>>>,
pub(crate) graph: Mutex<Box<dyn GraphBackend>>,
pub(crate) embeddings: OnceLock<Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>>>,
pub(crate) db_path: Option<PathBuf>,
pub(crate) index_cache: Mutex<Option<IndexCache>>,
pub(crate) scoring_weights: RwLock<ScoringWeights>,
pub(crate) bm25_index: OnceLock<Mutex<Bm25Index>>,
pub(crate) config: CodememConfig,
pub(crate) metrics: Arc<InMemoryMetrics>,
dirty: AtomicBool,
active_session_id: RwLock<Option<String>>,
scope: RwLock<Option<codemem_core::ScopeContext>>,
change_detector: Mutex<Option<index::incremental::ChangeDetector>>,
last_expiry_sweep: AtomicI64,
}
impl CodememEngine {
pub fn new(
storage: Box<dyn StorageBackend>,
vector: Box<dyn VectorBackend>,
graph: Box<dyn GraphBackend>,
embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
) -> Self {
let config = CodememConfig::load_or_default();
Self::new_with_config(storage, vector, graph, embeddings, config)
}
pub fn new_with_config(
storage: Box<dyn StorageBackend>,
vector: Box<dyn VectorBackend>,
graph: Box<dyn GraphBackend>,
embeddings: Option<Box<dyn codemem_embeddings::EmbeddingProvider>>,
config: CodememConfig,
) -> Self {
let vector_lock = OnceLock::new();
let _ = vector_lock.set(Mutex::new(vector));
let embeddings_lock = OnceLock::new();
let _ = embeddings_lock.set(embeddings.map(Mutex::new));
let bm25_lock = OnceLock::new();
let _ = bm25_lock.set(Mutex::new(Bm25Index::new()));
Self {
storage,
vector: vector_lock,
graph: Mutex::new(graph),
embeddings: embeddings_lock,
db_path: None,
index_cache: Mutex::new(None),
scoring_weights: RwLock::new(config.scoring.clone()),
bm25_index: bm25_lock,
config,
metrics: Arc::new(InMemoryMetrics::new()),
dirty: AtomicBool::new(false),
active_session_id: RwLock::new(None),
scope: RwLock::new(None),
change_detector: Mutex::new(None),
last_expiry_sweep: AtomicI64::new(0),
}
}
pub fn from_db_path(db_path: &Path) -> Result<Self, CodememError> {
if let Some(parent) = db_path.parent() {
if !parent.exists() {
std::fs::create_dir_all(parent).map_err(|e| {
CodememError::Storage(format!(
"Failed to create database directory {}: {e}",
parent.display()
))
})?;
}
}
let config = CodememConfig::load_or_default();
if !config.storage.backend.eq_ignore_ascii_case("sqlite") {
return Err(CodememError::Config(format!(
"Unsupported storage backend '{}'. Only 'sqlite' is available in this build.",
config.storage.backend
)));
}
if !config.vector.backend.eq_ignore_ascii_case("hnsw") {
return Err(CodememError::Config(format!(
"Unsupported vector backend '{}'. Only 'hnsw' is available in this build.",
config.vector.backend
)));
}
if !config.graph.backend.eq_ignore_ascii_case("petgraph") {
return Err(CodememError::Config(format!(
"Unsupported graph backend '{}'. Only 'petgraph' is available in this build.",
config.graph.backend
)));
}
let storage = Storage::open_with_config(
db_path,
Some(config.storage.cache_size_mb),
Some(config.storage.busy_timeout_secs),
)?;
let graph = GraphEngine::from_storage(&storage)?;
let engine = Self {
storage: Box::new(storage),
vector: OnceLock::new(),
graph: Mutex::new(Box::new(graph)),
embeddings: OnceLock::new(),
db_path: Some(db_path.to_path_buf()),
index_cache: Mutex::new(None),
scoring_weights: RwLock::new(config.scoring.clone()),
bm25_index: OnceLock::new(),
config,
metrics: Arc::new(InMemoryMetrics::new()),
dirty: AtomicBool::new(false),
active_session_id: RwLock::new(None),
scope: RwLock::new(None),
change_detector: Mutex::new(None),
last_expiry_sweep: AtomicI64::new(0),
};
Ok(engine)
}
pub fn for_testing() -> Self {
let storage = Storage::open_in_memory().unwrap();
let graph = GraphEngine::new();
let config = CodememConfig::default();
let vector_lock = OnceLock::new();
let _ = vector_lock.set(Mutex::new(
Box::new(HnswIndex::with_defaults().unwrap()) as Box<dyn VectorBackend>
));
let embeddings_lock = OnceLock::new();
let _ = embeddings_lock.set(None);
let bm25_lock = OnceLock::new();
let _ = bm25_lock.set(Mutex::new(Bm25Index::new()));
Self {
storage: Box::new(storage),
vector: vector_lock,
graph: Mutex::new(Box::new(graph)),
embeddings: embeddings_lock,
db_path: None,
index_cache: Mutex::new(None),
scoring_weights: RwLock::new(config.scoring.clone()),
bm25_index: bm25_lock,
config,
metrics: Arc::new(InMemoryMetrics::new()),
dirty: AtomicBool::new(false),
active_session_id: RwLock::new(None),
scope: RwLock::new(None),
change_detector: Mutex::new(None),
last_expiry_sweep: AtomicI64::new(0),
}
}
pub fn lock_vector(
&self,
) -> Result<std::sync::MutexGuard<'_, Box<dyn VectorBackend>>, CodememError> {
self.vector
.get_or_init(|| self.init_vector())
.lock()
.map_err(|e| CodememError::LockPoisoned(format!("vector: {e}")))
}
pub fn lock_graph(
&self,
) -> Result<std::sync::MutexGuard<'_, Box<dyn GraphBackend>>, CodememError> {
self.graph
.lock()
.map_err(|e| CodememError::LockPoisoned(format!("graph: {e}")))
}
pub fn lock_bm25(&self) -> Result<std::sync::MutexGuard<'_, Bm25Index>, CodememError> {
self.bm25_index
.get_or_init(|| self.init_bm25())
.lock()
.map_err(|e| CodememError::LockPoisoned(format!("bm25: {e}")))
}
pub fn lock_embeddings(
&self,
) -> Result<
Option<std::sync::MutexGuard<'_, Box<dyn codemem_embeddings::EmbeddingProvider>>>,
CodememError,
> {
match self.embeddings.get_or_init(|| self.init_embeddings()) {
Some(m) => Ok(Some(m.lock().map_err(|e| {
CodememError::LockPoisoned(format!("embeddings: {e}"))
})?)),
None => Ok(None),
}
}
fn embeddings_ready(&self) -> bool {
self.embeddings.get().is_some_and(|opt| opt.is_some())
}
fn vector_ready(&self) -> bool {
self.vector.get().is_some()
}
fn bm25_ready(&self) -> bool {
self.bm25_index.get().is_some()
}
pub fn lock_index_cache(
&self,
) -> Result<std::sync::MutexGuard<'_, Option<IndexCache>>, CodememError> {
self.index_cache
.lock()
.map_err(|e| CodememError::LockPoisoned(format!("index_cache: {e}")))
}
pub fn scoring_weights(
&self,
) -> Result<std::sync::RwLockReadGuard<'_, ScoringWeights>, CodememError> {
self.scoring_weights
.read()
.map_err(|e| CodememError::LockPoisoned(format!("scoring_weights read: {e}")))
}
pub fn scoring_weights_mut(
&self,
) -> Result<std::sync::RwLockWriteGuard<'_, ScoringWeights>, CodememError> {
self.scoring_weights
.write()
.map_err(|e| CodememError::LockPoisoned(format!("scoring_weights write: {e}")))
}
fn init_vector(&self) -> Mutex<Box<dyn VectorBackend>> {
let vector_config = self.config.vector.clone();
let mut vector = HnswIndex::new(vector_config.clone())
.unwrap_or_else(|_| HnswIndex::with_defaults().expect("default vector index"));
if let Some(ref db_path) = self.db_path {
let index_path = db_path.with_extension("idx");
if index_path.exists() {
if let Err(e) = vector.load(&index_path) {
tracing::warn!("Stale or corrupt vector index, will rebuild: {e}");
}
}
let vector_count = vector.stats().count;
if let Ok(db_stats) = self.storage.stats() {
let db_embed_count = db_stats.embedding_count;
if vector_count != db_embed_count {
tracing::warn!(
"Vector index ({vector_count}) out of sync with DB ({db_embed_count}), rebuilding..."
);
if let Ok(mut fresh) = HnswIndex::new(vector_config) {
if let Ok(embeddings) = self.storage.list_all_embeddings() {
for (id, emb) in &embeddings {
if let Err(e) = fresh.insert(id, emb) {
tracing::warn!("Failed to re-insert embedding {id}: {e}");
}
}
}
vector = fresh;
if let Err(e) = vector.save(&index_path) {
tracing::warn!("Failed to save rebuilt vector index: {e}");
}
}
}
}
}
Mutex::new(Box::new(vector))
}
fn init_bm25(&self) -> Mutex<Bm25Index> {
let mut bm25 = Bm25Index::new();
if let Some(ref db_path) = self.db_path {
let bm25_path = db_path.with_extension("bm25");
let mut loaded = false;
if bm25_path.exists() {
if let Ok(data) = std::fs::read(&bm25_path) {
if let Ok(index) = Bm25Index::deserialize(&data) {
tracing::info!(
"Loaded BM25 index from disk ({} documents)",
index.doc_count
);
bm25 = index;
loaded = true;
}
}
}
if !loaded {
if let Ok(ids) = self.storage.list_memory_ids() {
let id_refs: Vec<&str> = ids.iter().map(|s| s.as_str()).collect();
if let Ok(memories) = self.storage.get_memories_batch(&id_refs) {
for m in &memories {
bm25.add_document(&m.id, &m.content);
}
tracing::info!("Rebuilt BM25 index from {} memories", bm25.doc_count);
}
}
}
}
Mutex::new(bm25)
}
fn init_embeddings(&self) -> Option<Mutex<Box<dyn codemem_embeddings::EmbeddingProvider>>> {
let provider = match codemem_embeddings::from_env(Some(&self.config.embedding)) {
Ok(p) => p,
Err(e) => {
tracing::warn!("Failed to initialize embedding provider: {e}");
return None;
}
};
self.backfill_embeddings(&*provider);
Some(Mutex::new(provider))
}
fn backfill_embeddings(&self, provider: &dyn codemem_embeddings::EmbeddingProvider) {
let ids = match self.storage.list_memory_ids() {
Ok(ids) => ids,
Err(_) => return,
};
let mut to_embed: Vec<(String, String)> = Vec::new();
for id in &ids {
if self.storage.get_embedding(id).ok().flatten().is_none() {
if let Ok(Some(mem)) = self.storage.get_memory_no_touch(id) {
let text = self.enrich_memory_text(
&mem.content,
mem.memory_type,
&mem.tags,
mem.namespace.as_deref(),
Some(&mem.id),
);
to_embed.push((id.clone(), text));
}
}
}
if to_embed.is_empty() {
return;
}
tracing::info!("Backfilling {} un-embedded memories", to_embed.len());
let text_refs: Vec<&str> = to_embed.iter().map(|(_, t)| t.as_str()).collect();
match provider.embed_batch(&text_refs) {
Ok(embeddings) => {
for ((id, _), emb) in to_embed.iter().zip(embeddings.iter()) {
let _ = self.storage.store_embedding(id, emb);
if let Some(vi_mutex) = self.vector.get() {
if let Ok(mut vi) = vi_mutex.lock().map_err(|e| {
tracing::warn!("Vector lock failed during backfill: {e}");
e
}) {
let _ = vi.insert(id, emb);
}
}
}
tracing::info!("Backfilled {} embeddings", to_embed.len());
}
Err(e) => tracing::warn!("Backfill embedding failed: {e}"),
}
}
pub fn set_active_session(&self, id: Option<String>) {
match self.active_session_id.write() {
Ok(mut guard) => *guard = id,
Err(e) => *e.into_inner() = id,
}
}
pub fn active_session_id(&self) -> Option<String> {
match self.active_session_id.read() {
Ok(guard) => guard.clone(),
Err(e) => e.into_inner().clone(),
}
}
pub fn set_scope(&self, scope: Option<codemem_core::ScopeContext>) {
match self.scope.write() {
Ok(mut guard) => *guard = scope,
Err(e) => *e.into_inner() = scope,
}
}
pub fn scope(&self) -> Option<codemem_core::ScopeContext> {
match self.scope.read() {
Ok(guard) => guard.clone(),
Err(e) => e.into_inner().clone(),
}
}
pub fn scope_namespace(&self) -> Option<String> {
self.scope().map(|s| s.namespace().to_string())
}
pub fn storage(&self) -> &dyn StorageBackend {
&*self.storage
}
pub fn has_embeddings(&self) -> bool {
match self.embeddings.get() {
Some(opt) => opt.is_some(),
None => !self.config.embedding.provider.is_empty(),
}
}
pub fn db_path(&self) -> Option<&Path> {
self.db_path.as_deref()
}
pub fn config(&self) -> &CodememConfig {
&self.config
}
pub fn metrics(&self) -> &Arc<InMemoryMetrics> {
&self.metrics
}
pub fn with_graph<F, R>(&self, f: F) -> Result<R, CodememError>
where
F: FnOnce(&dyn GraphBackend) -> R,
{
let guard = self.lock_graph()?;
Ok(f(&**guard))
}
pub fn with_vector<F, R>(&self, f: F) -> Result<R, CodememError>
where
F: FnOnce(&dyn VectorBackend) -> R,
{
let guard = self.lock_vector()?;
Ok(f(&**guard))
}
#[cfg(test)]
pub(crate) fn is_dirty(&self) -> bool {
self.dirty.load(Ordering::Acquire)
}
pub fn list_repos(&self) -> Result<Vec<codemem_core::Repository>, CodememError> {
self.storage.list_repos()
}
pub fn add_repo(&self, repo: &codemem_core::Repository) -> Result<(), CodememError> {
self.storage.add_repo(repo)
}
pub fn get_repo(&self, id: &str) -> Result<Option<codemem_core::Repository>, CodememError> {
self.storage.get_repo(id)
}
pub fn remove_repo(&self, id: &str) -> Result<bool, CodememError> {
self.storage.remove_repo(id)
}
pub fn update_repo_status(
&self,
id: &str,
status: &str,
indexed_at: Option<&str>,
) -> Result<(), CodememError> {
self.storage.update_repo_status(id, status, indexed_at)
}
}
pub use file_indexing::{AnalyzeOptions, AnalyzeProgress, AnalyzeResult, SessionContext};
pub use codemem_embeddings::from_env as embeddings_from_env;
pub use codemem_embeddings::{resolve_model_id, EmbeddingProvider, EmbeddingService};