mcp_memory/code_registry.rs
1//! Per-project registry of code-index database handles.
2//!
3//! Code search is physically partitioned one SQLite file per project
4//! (`<memory_file>.code/<project>.code.db`). This keeps each project's FTS
5//! index small and independent, makes dropping/re-indexing a project trivial,
6//! and guarantees the regular knowledge-graph tools (which only ever touch the
7//! main memory database) can never see or mutate code-symbol data.
8//!
9//! [`init`] is called once at server startup; [`resolve`] lazily opens (and
10//! caches) the handle for a given project. A project handle owns an in-memory
11//! entity cache, so to keep cache coherence there must be **at most one live
12//! [`GraphHandle`] per project file** in the process. That invariant is upheld
13//! by tracking each handle with a `Weak`: as long as any caller (e.g. a running
14//! watcher) holds a strong reference, `resolve` hands back that same instance.
15//! A small LRU of strong references keeps recently-used, otherwise-idle handles
16//! warm to avoid reopen churn.
17
18#![cfg(feature = "code")]
19
20use std::collections::HashMap;
21use std::num::NonZeroUsize;
22use std::path::PathBuf;
23use std::sync::{Arc, OnceLock, Weak};
24
25use lru::LruCache;
26use parking_lot::Mutex;
27
28use crate::config::{Durability, SqliteTuning};
29use crate::errors::{MCSError, Result};
30use crate::kg::GraphHandle;
31
32/// Default project when a caller omits the `project` argument.
33pub const DEFAULT_PROJECT: &str = "default";
34/// Upper bound on project-name length (also keeps the derived filename sane).
35const MAX_PROJECT_LEN: usize = 64;
36/// How many idle project handles stay warm before the LRU evicts (and closes)
37/// them. A watcher's own strong reference keeps a project open regardless.
38const MAX_WARM_HANDLES: usize = 16;
39
40/// Construction parameters captured at startup so [`resolve`] can build a
41/// [`GraphHandle`] per project on demand.
42struct RegistryConfig {
43 base: PathBuf,
44 durability: Durability,
45 tuning: SqliteTuning,
46 lru_cache: NonZeroUsize,
47 read_pool_size: usize,
48}
49
50struct Inner {
51 /// Canonical instance per project — `Weak` so a handle is dropped (and the
52 /// SQLite connections closed) once no caller and no warm slot hold it.
53 live: HashMap<String, Weak<GraphHandle>>,
54 /// Recently-used handles kept alive to avoid reopen churn.
55 warm: LruCache<String, Arc<GraphHandle>>,
56}
57
58static CONFIG: OnceLock<RegistryConfig> = OnceLock::new();
59static INNER: OnceLock<Mutex<Inner>> = OnceLock::new();
60
61/// Initialize the registry. Idempotent; safe to call once at startup. `base` is
62/// the directory under which per-project databases are created.
63pub fn init(
64 base: PathBuf,
65 durability: Durability,
66 tuning: SqliteTuning,
67 lru_cache: NonZeroUsize,
68 read_pool_size: usize,
69) {
70 // Best-effort: a failure here surfaces later as an open error from `resolve`.
71 let _ = std::fs::create_dir_all(&base);
72 let _ = CONFIG.set(RegistryConfig {
73 base,
74 durability,
75 tuning,
76 lru_cache,
77 read_pool_size,
78 });
79 let warm = LruCache::new(NonZeroUsize::new(MAX_WARM_HANDLES).expect("MAX_WARM_HANDLES > 0"));
80 let _ = INNER.set(Mutex::new(Inner {
81 live: HashMap::new(),
82 warm,
83 }));
84}
85
86/// Validate a project identifier. It is used verbatim as a filename component,
87/// so restrict it to a safe, traversal-free character set.
88pub fn validate_project(project: &str) -> Result<()> {
89 let ok = !project.is_empty()
90 && project.len() <= MAX_PROJECT_LEN
91 && project
92 .bytes()
93 .all(|b| b.is_ascii_alphanumeric() || b == b'_' || b == b'-');
94 if ok {
95 Ok(())
96 } else {
97 Err(MCSError::InvalidParams(format!(
98 "invalid project '{project}': use 1-{MAX_PROJECT_LEN} chars of [A-Za-z0-9_-]"
99 )))
100 }
101}
102
103/// Resolve the (lazily opened) database handle for `project`, opening it if
104/// necessary. Returns the single canonical instance for that project so callers
105/// share one entity cache.
106pub fn resolve(project: &str) -> Result<Arc<GraphHandle>> {
107 validate_project(project)?;
108 let cfg = CONFIG.get().ok_or_else(|| {
109 MCSError::InvalidParams("code registry not initialized (start the server with --code)".into())
110 })?;
111 let inner = INNER.get().expect("registry inner set alongside config");
112
113 let mut g = inner.lock();
114 // Reuse the canonical instance if it is still alive anywhere.
115 if let Some(existing) = g.live.get(project).and_then(Weak::upgrade) {
116 g.warm.put(project.to_string(), Arc::clone(&existing));
117 return Ok(existing);
118 }
119
120 // Cold path (rare): opening a project. Drop any `Weak`s whose handles have
121 // been closed so `live` stays bounded by the live project count, not by the
122 // number of projects ever touched.
123 g.live.retain(|_, w| w.strong_count() > 0);
124
125 let path = cfg.base.join(format!("{project}.code.db"));
126 let handle = Arc::new(GraphHandle::new(
127 &path,
128 cfg.durability,
129 cfg.tuning,
130 cfg.lru_cache,
131 cfg.read_pool_size,
132 )?);
133 g.live.insert(project.to_string(), Arc::downgrade(&handle));
134 g.warm.put(project.to_string(), Arc::clone(&handle));
135 Ok(handle)
136}