pub(crate) mod cursor;
mod helpers;
mod helpers_admin;
mod helpers_calls;
#[cfg(feature = "documents")]
mod helpers_documents;
mod helpers_graph;
mod helpers_grep;
mod helpers_impls;
#[cfg(feature = "crawl")]
mod helpers_web;
#[cfg(any(feature = "memory", feature = "documents"))]
mod memory;
mod savings;
mod telemetry;
mod tools;
mod tools_admin;
mod tools_git;
mod tools_memory;
#[cfg(feature = "crawl")]
mod tools_web;
mod types;
mod types_admin;
mod types_documents;
mod types_graph;
mod types_impls;
use std::collections::BTreeMap;
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use arc_swap::ArcSwap;
use lru::LruCache;
use rmcp::ServerHandler;
use rmcp::handler::server::tool::ToolRouter;
use rmcp::model::{ServerCapabilities, ServerInfo};
use rmcp::tool_handler;
use tokio::sync::RwLock;
use crate::extract::{FileMapL1, Import};
use crate::lang::LangId;
use crate::store::Store;
pub mod params {
pub use rmcp::handler::server::wrapper::Parameters;
pub use super::types::{
BlameFileParams, BlameSymbolParams, CommitsTouchingParams, DependentsParams,
DiffFileParams, DiffOutlineParams, FindCallersParams, FindCommitsByPathParams,
FindReferencesParams, HotFilesParams, ListFilesParams, MemoryDeleteParams, MemoryGetParams,
MemoryListParams, MemoryPutParams, MemorySearchParams, OutlineParams, RecentChangesParams,
RepoInfoParams, RescanParams, SearchDocumentsParams, SearchSymbolsParams, StatusParams,
SymbolHistoryParams, TelemetrySummaryParams, WorkingTreeStatusParams, WorkspaceGrepParams,
};
#[cfg(feature = "crawl")]
pub use super::types::{WebCrawlParams, WebMapParams, WebScrapeParams};
pub use super::types_admin::{CacheClearParams, CacheGcParams, CacheStatsParams};
pub use super::types_graph::CallGraphParams;
pub use super::types_impls::FindImplementationsParams;
}
pub use params::Parameters;
pub(crate) const OUTLINE_CACHE_CAP: usize = 512;
pub(crate) struct OutlineEntry {
pub map: Arc<FileMapL1>,
pub source: Arc<Vec<u8>>,
}
pub(crate) type OutlineCache = Mutex<LruCache<(gix::ObjectId, LangId), Arc<OutlineEntry>>>;
#[derive(Clone)]
pub struct BasemindServer {
pub(crate) state: Arc<ServerState>,
#[allow(dead_code)]
tool_router: ToolRouter<Self>,
}
pub(crate) struct ServerState {
pub(crate) store: RwLock<Store>,
pub(crate) root: PathBuf,
pub(crate) cache: ArcSwap<MapCache>,
pub(crate) repo: Option<Arc<crate::git::Repo>>,
pub(crate) git_cache: Arc<crate::git_cache::GitCache>,
pub(crate) outline_cache: Arc<OutlineCache>,
pub(crate) config: Arc<crate::config::Config>,
pub(crate) telemetry: Arc<telemetry::Telemetry>,
pub(crate) corpus_bytes: std::sync::atomic::AtomicU64,
pub(crate) cache_generation: std::sync::atomic::AtomicU32,
#[allow(dead_code)] pub(crate) scope: String,
#[cfg(any(feature = "memory", feature = "documents"))]
pub(crate) lance: tokio::sync::OnceCell<Arc<crate::lance::LanceStore>>,
#[cfg(feature = "intelligence")]
pub(crate) embedder: tokio::sync::OnceCell<Arc<crate::embeddings::SharedEmbedder>>,
#[cfg(feature = "crawl")]
pub(crate) crawl_engine: Option<kreuzcrawl::CrawlEngineHandle>,
}
pub(crate) struct MapCache {
pub(crate) by_path: BTreeMap<crate::path::RelPath, FileMapL1>,
pub(crate) imports_index: Vec<(PathBuf, Vec<Import>)>,
}
impl MapCache {
fn build(store: &Store) -> Self {
let mut by_path = BTreeMap::new();
for (path, entry) in &store.index.files {
match store.read_l1_by_hex(&entry.hash_hex) {
Ok(Some(l1)) => {
by_path.insert(path.clone(), l1);
}
Ok(None) | Err(_) => continue,
}
}
let imports_index: Vec<(PathBuf, Vec<Import>)> = by_path
.iter()
.map(|(p, l1)| (p.to_path_buf(), l1.imports.clone()))
.collect();
Self {
by_path,
imports_index,
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct ServerOptions {
pub background: bool,
pub watch: bool,
}
impl Default for ServerOptions {
fn default() -> Self {
Self {
background: true,
watch: true,
}
}
}
impl BasemindServer {
pub fn new(
store: Store,
root: PathBuf,
config: Arc<crate::config::Config>,
repo: Option<Arc<crate::git::Repo>>,
git_cache: Arc<crate::git_cache::GitCache>,
) -> Self {
Self::new_with_options(
store,
root,
config,
repo,
git_cache,
ServerOptions::default(),
)
}
pub fn new_oneshot(
store: Store,
root: PathBuf,
config: Arc<crate::config::Config>,
repo: Option<Arc<crate::git::Repo>>,
git_cache: Arc<crate::git_cache::GitCache>,
) -> Self {
Self::new_with_options(
store,
root,
config,
repo,
git_cache,
ServerOptions {
background: false,
watch: false,
},
)
}
pub fn new_with_options(
store: Store,
root: PathBuf,
config: Arc<crate::config::Config>,
repo: Option<Arc<crate::git::Repo>>,
git_cache: Arc<crate::git_cache::GitCache>,
options: ServerOptions,
) -> Self {
let scope = repo
.as_ref()
.map(|r| crate::git::scope_key(r))
.unwrap_or_else(|| format!("path:{}", root.display()));
let cache = Arc::new(MapCache::build(&store));
let corpus_bytes: u64 = store.index.files.values().map(|e| e.size_bytes).sum();
let needs_initial_scan =
store.view == crate::store::VIEW_WORKING && cache.by_path.is_empty();
tracing::info!(
files = cache.by_path.len(),
corpus_bytes,
git = repo.is_some(),
scope = %scope,
"preloaded code map into RAM for MCP server"
);
let outline_cache: Arc<OutlineCache> = Arc::new(Mutex::new(LruCache::new(
NonZeroUsize::new(OUTLINE_CACHE_CAP).expect("OUTLINE_CACHE_CAP > 0"),
)));
let telemetry_handle = Arc::new(telemetry::Telemetry::new(&store.basemind_dir));
#[cfg(feature = "crawl")]
let crawl_engine = match crate::web::build_engine(&config.crawl) {
Ok(e) => Some(e),
Err(error) => {
tracing::warn!(
?error,
"crawl engine init failed; web_* tools will report errors"
);
None
}
};
let state = Arc::new(ServerState {
store: RwLock::new(store),
root,
cache: ArcSwap::from(cache),
repo,
git_cache,
outline_cache,
config,
telemetry: telemetry_handle,
corpus_bytes: std::sync::atomic::AtomicU64::new(corpus_bytes),
cache_generation: std::sync::atomic::AtomicU32::new(1),
scope,
#[cfg(any(feature = "memory", feature = "documents"))]
lance: tokio::sync::OnceCell::new(),
#[cfg(feature = "intelligence")]
embedder: tokio::sync::OnceCell::new(),
#[cfg(feature = "crawl")]
crawl_engine,
});
if options.background {
let view_is_working = {
match state.store.try_read() {
Ok(g) => g.view == crate::store::VIEW_WORKING,
Err(_) => false,
}
};
if options.watch && view_is_working {
spawn_serve_watcher(Arc::clone(&state));
} else {
spawn_view_watcher(Arc::clone(&state));
}
if needs_initial_scan {
let scan_state = Arc::clone(&state);
tracing::info!("empty index on startup; running initial scan in background");
tokio::spawn(async move {
match helpers::scan_and_refresh(Arc::clone(&scan_state), None).await {
Ok(report) => tracing::info!(
scanned = report.stats.scanned,
updated = report.stats.updated,
"initial background scan complete"
),
Err(error) => {
tracing::warn!(%error, "initial background scan failed");
}
}
run_background_gc(scan_state).await;
});
} else {
let gc_state = Arc::clone(&state);
tokio::spawn(async move {
run_background_gc(gc_state).await;
});
}
}
#[allow(unused_mut)]
let mut router = Self::tool_router_core()
+ Self::tool_router_git()
+ Self::tool_router_memory()
+ Self::tool_router_admin();
#[cfg(feature = "crawl")]
{
router += Self::tool_router_web();
}
Self {
state,
tool_router: router,
}
}
}
async fn run_background_gc(state: Arc<ServerState>) {
let result = tokio::task::spawn_blocking(move || {
let store = state.store.blocking_read();
let referenced = crate::store_gc::collect_referenced_hashes(&store.basemind_dir)?;
crate::store_gc::gc_blobs(&store.basemind_dir, &referenced)
})
.await;
match result {
Ok(Ok(report)) if report.removed > 0 => tracing::info!(
removed = report.removed,
bytes_freed = report.bytes_freed,
"background blob GC reclaimed orphaned blobs"
),
Ok(Ok(_)) => tracing::debug!("background blob GC: nothing to reclaim"),
Ok(Err(error)) => tracing::warn!(%error, "background blob GC failed"),
Err(error) => tracing::warn!(%error, "background blob GC task panicked"),
}
}
fn spawn_serve_watcher(state: Arc<ServerState>) {
let root = state.root.clone();
let config = Arc::clone(&state.config);
let handle = tokio::runtime::Handle::current();
let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>();
std::thread::Builder::new()
.name("basemind-mcp-serve-watcher".to_string())
.spawn(move || {
let _keep_sender_alive = _shutdown_tx;
tracing::info!(root = %root.display(), "serve watcher armed (live incremental rescan)");
let result =
crate::watcher::watch_paths(&root, &config, shutdown_rx, |paths, _kind| {
let refresh_state = Arc::clone(&state);
match handle.block_on(helpers::scan_and_refresh(refresh_state, Some(paths))) {
Ok(report) => tracing::debug!(
scanned = report.stats.scanned,
updated = report.stats.updated,
removed = report.stats.removed,
"serve watcher: incremental rescan complete"
),
Err(error) => tracing::warn!(
%error,
"serve watcher: incremental rescan failed (watcher continues)"
),
}
});
if let Err(error) = result {
tracing::warn!(%error, "serve watcher exited with error");
}
tracing::info!("serve watcher: exiting");
})
.ok();
}
fn spawn_view_watcher(state: Arc<ServerState>) {
let (basemind_dir, view) = {
let store = match state.store.try_read() {
Ok(g) => g,
Err(_) => return,
};
(store.basemind_dir.clone(), store.view.clone())
};
let view_dir = basemind_dir.join(crate::store::VIEWS_DIR).join(&view);
let target = view_dir.join(crate::store::INDEX_FILE);
std::thread::Builder::new()
.name("basemind-mcp-view-watcher".to_string())
.spawn(move || {
use notify_debouncer_full::new_debouncer;
use std::time::Duration;
let (tx, rx) = std::sync::mpsc::channel();
let mut debouncer = match new_debouncer(Duration::from_millis(150), None, tx) {
Ok(d) => d,
Err(e) => {
tracing::warn!(error = %e, "view watcher: failed to start debouncer");
return;
}
};
if let Err(e) = debouncer.watch(&view_dir, notify::RecursiveMode::NonRecursive) {
tracing::warn!(error = %e, dir = %view_dir.display(), "view watcher: failed to watch");
return;
}
tracing::info!(target = %target.display(), "view watcher armed");
while let Ok(result) = rx.recv() {
let events = match result {
Ok(e) => e,
Err(_) => continue,
};
let touches_index = events
.iter()
.any(|de| de.event.paths.iter().any(|p| p == &target));
if !touches_index {
continue;
}
let new_store = match crate::store::Store::open_read_only(
state.root.as_path(),
&state
.store
.try_read()
.map(|g| g.view.clone())
.unwrap_or_default(),
) {
Ok(s) => s,
Err(e) => {
tracing::warn!(error = %e, "view watcher: store reopen failed");
continue;
}
};
let new_cache = Arc::new(MapCache::build(&new_store));
tracing::info!(
files = new_cache.by_path.len(),
"view watcher: rebuilt MapCache from refreshed index"
);
state.cache.store(new_cache);
state
.cache_generation
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
tracing::info!("view watcher: channel closed; exiting");
})
.ok();
}
#[tool_handler(router = self.tool_router.clone())]
impl ServerHandler for BasemindServer {
fn get_info(&self) -> ServerInfo {
ServerInfo::new(ServerCapabilities::builder().enable_tools().build()).with_instructions(
"basemind is the indexed context layer for this repository, served over MCP: a \
tree-sitter code map across 300+ languages (symbols, references, callers, call \
graphs, implementations), git history + blame at symbol resolution, full-text + \
semantic search, document RAG over 90+ file formats, and shared cross-session \
memory. Prefer these tools over reading files when navigating large or unfamiliar \
codebases.\n\
Context economy — these tools return paths, line numbers, and signatures, not \
file bodies, so they cost a fraction of the tokens of reading source. Default to \
them: `outline` a file before you open it (then read only the span you need); \
`search_symbols` instead of grep for a definition; `find_references` / \
`find_callers` instead of grepping call sites; `workspace_grep` instead of \
shelling out to ripgrep; `rescan` after edits instead of reconnecting. Do not \
re-read a file basemind already mapped.\n\
Routing: \
\"where is X defined?\" → `search_symbols`; \
\"what calls X?\" → `find_references` (any name) or `find_callers` (specific def); \
\"shape of this file?\" → `outline` (add `l2: true` for calls + docs); \
\"what changed recently?\" → `recent_changes`, `commits_touching`, `symbol_history`; \
\"who last touched this?\" → `blame_file` / `blame_symbol`; \
\"where's the churn?\" → `hot_files`; \
\"semantic search across PDFs/docs in the repo?\" → `search_documents`; \
\"recall something the agent remembered earlier?\" → `memory_get` / `memory_list` / \
`memory_search`; \
\"remember this for later sessions?\" → `memory_put` (delete with `memory_delete`); \
\"refresh the index after editing code?\" → `rescan` (or `rescan { paths: [...] }` \
to limit to changed files).\n\
\"got a truncated result? fetch the next page?\" → pass `next_cursor` from the prior \
response back as `cursor`.\n\
\"need regex over file contents?\" → `workspace_grep`.\n\
Code-map tools: `outline`, `search_symbols`, `find_references`, `find_callers`, \
`list_files`, `workspace_grep`, `dependents`, `status`, `repo_info`, \
`symbol_history`. \
Git tools (inside a repo): `working_tree_status`, `recent_changes`, `commits_touching`, \
`find_commits_by_path`, `hot_files`, `diff_outline`, `diff_file`, `blame_file`, \
`blame_symbol`. \
Intelligence tools (require build with `--features documents,memory`): \
`search_documents`, `memory_put`, `memory_get`, `memory_list`, `memory_search`, \
`memory_delete`. \
Web tools (require build with `--features crawl`): `web_scrape` (one URL), \
`web_crawl` (follow links from a seed URL), `web_map` (sitemap-only discovery). \
Crawled pages land in the same LanceDB documents table as on-disk docs, scoped \
under `web:<host>` — find them later with `search_documents`. \
All paths are repository-relative with forward-slash separators. \
If a tool reports \"no indexed files\", run `basemind scan` in the repo first.",
)
}
}