pub(crate) mod cursor;
mod helpers;
mod helpers_admin;
mod helpers_calls;
#[cfg(feature = "comms")]
mod helpers_comms;
#[cfg(feature = "documents")]
mod helpers_documents;
mod helpers_graph;
mod helpers_grep;
mod helpers_impls;
#[cfg(feature = "crawl")]
mod helpers_web;
#[cfg(any(feature = "memory", feature = "documents"))]
mod memory;
mod savings;
mod telemetry;
mod tools;
mod tools_admin;
#[cfg(feature = "comms")]
mod tools_comms;
mod tools_git;
mod tools_memory;
#[cfg(feature = "crawl")]
mod tools_web;
mod types;
mod types_admin;
#[cfg(feature = "comms")]
mod types_comms;
mod types_documents;
mod types_graph;
mod types_impls;
mod types_memory;
use std::collections::BTreeMap;
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use arc_swap::ArcSwap;
use lru::LruCache;
use rmcp::ServerHandler;
use rmcp::handler::server::tool::ToolRouter;
use rmcp::model::{ServerCapabilities, ServerInfo};
use rmcp::tool_handler;
use tokio::sync::RwLock;
use crate::extract::{FileMapL1, Import};
use crate::lang::LangId;
use crate::store::Store;
pub mod params {
pub use rmcp::handler::server::wrapper::Parameters;
pub use super::types::{
BlameFileParams, BlameSymbolParams, CommitsTouchingParams, DependentsParams,
DiffFileParams, DiffOutlineParams, FindCallersParams, FindCommitsByPathParams,
FindReferencesParams, HotFilesParams, ListFilesParams, OutlineParams, RecentChangesParams,
RepoInfoParams, RescanParams, SearchDocumentsParams, SearchSymbolsParams, StatusParams,
SymbolHistoryParams, TelemetrySummaryParams, WorkingTreeStatusParams, WorkspaceGrepParams,
};
#[cfg(feature = "crawl")]
pub use super::types::{WebCrawlParams, WebMapParams, WebScrapeParams};
pub use super::types_admin::{CacheClearParams, CacheGcParams, CacheStatsParams};
pub use super::types_graph::CallGraphParams;
pub use super::types_impls::FindImplementationsParams;
pub use super::types_memory::{
MemoryDeleteParams, MemoryGetParams, MemoryListParams, MemoryPutParams, MemorySearchParams,
Visibility,
};
}
pub use params::Parameters;
pub(crate) const OUTLINE_CACHE_CAP: usize = 512;
pub(crate) struct OutlineEntry {
pub map: Arc<FileMapL1>,
pub source: Arc<Vec<u8>>,
}
pub(crate) type OutlineCache = Mutex<LruCache<(gix::ObjectId, LangId), Arc<OutlineEntry>>>;
#[derive(Clone)]
pub struct BasemindServer {
pub(crate) state: Arc<ServerState>,
#[allow(dead_code)]
tool_router: ToolRouter<Self>,
}
pub(crate) struct ServerState {
pub(crate) store: RwLock<Store>,
pub(crate) root: PathBuf,
pub(crate) cache: ArcSwap<MapCache>,
pub(crate) repo: Option<Arc<crate::git::Repo>>,
pub(crate) git_cache: Arc<crate::git_cache::GitCache>,
pub(crate) outline_cache: Arc<OutlineCache>,
pub(crate) config: Arc<crate::config::Config>,
pub(crate) telemetry: Arc<telemetry::Telemetry>,
pub(crate) corpus_bytes: std::sync::atomic::AtomicU64,
pub(crate) cache_generation: std::sync::atomic::AtomicU32,
#[allow(dead_code)] pub(crate) scope: String,
#[allow(dead_code)] pub(crate) agent_id: String,
#[cfg(any(feature = "memory", feature = "documents"))]
pub(crate) lance: tokio::sync::OnceCell<Arc<crate::lance::LanceStore>>,
#[cfg(feature = "intelligence")]
pub(crate) embedder: tokio::sync::OnceCell<Arc<crate::embeddings::SharedEmbedder>>,
#[cfg(feature = "crawl")]
pub(crate) crawl_engine: Option<kreuzcrawl::CrawlEngineHandle>,
#[cfg(feature = "comms")]
pub(crate) comms_client: tokio::sync::Mutex<Option<crate::comms::client::CommsClient>>,
}
pub(crate) struct MapCache {
pub(crate) by_path: BTreeMap<crate::path::RelPath, FileMapL1>,
pub(crate) imports_index: Vec<(PathBuf, Vec<Import>)>,
}
impl MapCache {
fn build(store: &Store) -> Self {
let mut by_path = BTreeMap::new();
for (path, entry) in &store.index.files {
match store.read_l1_by_hex(&entry.hash_hex) {
Ok(Some(l1)) => {
by_path.insert(path.clone(), l1);
}
Ok(None) | Err(_) => continue,
}
}
let imports_index: Vec<(PathBuf, Vec<Import>)> = by_path
.iter()
.map(|(p, l1)| (p.to_path_buf(), l1.imports.clone()))
.collect();
Self {
by_path,
imports_index,
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct ServerOptions {
pub background: bool,
pub watch: bool,
}
impl Default for ServerOptions {
fn default() -> Self {
Self {
background: true,
watch: true,
}
}
}
impl BasemindServer {
pub fn new(
store: Store,
root: PathBuf,
config: Arc<crate::config::Config>,
repo: Option<Arc<crate::git::Repo>>,
git_cache: Arc<crate::git_cache::GitCache>,
) -> Self {
Self::new_with_options(
store,
root,
config,
repo,
git_cache,
ServerOptions::default(),
)
}
pub fn new_oneshot(
store: Store,
root: PathBuf,
config: Arc<crate::config::Config>,
repo: Option<Arc<crate::git::Repo>>,
git_cache: Arc<crate::git_cache::GitCache>,
) -> Self {
Self::new_with_options(
store,
root,
config,
repo,
git_cache,
ServerOptions {
background: false,
watch: false,
},
)
}
pub fn new_with_options(
store: Store,
root: PathBuf,
config: Arc<crate::config::Config>,
repo: Option<Arc<crate::git::Repo>>,
git_cache: Arc<crate::git_cache::GitCache>,
options: ServerOptions,
) -> Self {
let scope = repo
.as_ref()
.map(|r| crate::git::scope_key(r))
.unwrap_or_else(|| format!("path:{}", root.display()));
let agent_id = resolve_agent_id(&config, &store);
let cache = Arc::new(MapCache::build(&store));
let corpus_bytes: u64 = store.index.files.values().map(|e| e.size_bytes).sum();
let view_is_working = store.view == crate::store::VIEW_WORKING;
let fjall_index_empty = store
.index_db
.as_ref()
.map(|db| db.symbols_index_is_empty())
.unwrap_or(false);
let needs_initial_scan = view_is_working && (cache.by_path.is_empty() || fjall_index_empty);
tracing::info!(
files = cache.by_path.len(),
corpus_bytes,
git = repo.is_some(),
scope = %scope,
"preloaded code map into RAM for MCP server"
);
let outline_cache: Arc<OutlineCache> = Arc::new(Mutex::new(LruCache::new(
NonZeroUsize::new(OUTLINE_CACHE_CAP).expect("OUTLINE_CACHE_CAP > 0"),
)));
let telemetry_handle = Arc::new(telemetry::Telemetry::new(&store.basemind_dir));
#[cfg(feature = "crawl")]
let crawl_engine = match crate::web::build_engine(&config.crawl) {
Ok(e) => Some(e),
Err(error) => {
tracing::warn!(
?error,
"crawl engine init failed; web_* tools will report errors"
);
None
}
};
let state = Arc::new(ServerState {
store: RwLock::new(store),
root,
cache: ArcSwap::from(cache),
repo,
git_cache,
outline_cache,
config,
telemetry: telemetry_handle,
corpus_bytes: std::sync::atomic::AtomicU64::new(corpus_bytes),
cache_generation: std::sync::atomic::AtomicU32::new(1),
scope,
agent_id,
#[cfg(any(feature = "memory", feature = "documents"))]
lance: tokio::sync::OnceCell::new(),
#[cfg(feature = "intelligence")]
embedder: tokio::sync::OnceCell::new(),
#[cfg(feature = "crawl")]
crawl_engine,
#[cfg(feature = "comms")]
comms_client: tokio::sync::Mutex::new(None),
});
if options.background {
let view_is_working = {
match state.store.try_read() {
Ok(g) => g.view == crate::store::VIEW_WORKING,
Err(_) => false,
}
};
if options.watch && view_is_working {
spawn_serve_watcher(Arc::clone(&state));
} else {
spawn_view_watcher(Arc::clone(&state));
}
if needs_initial_scan {
let scan_state = Arc::clone(&state);
tracing::info!("empty index on startup; running initial scan in background");
tokio::spawn(async move {
match helpers::scan_and_refresh(Arc::clone(&scan_state), None).await {
Ok(report) => tracing::info!(
scanned = report.stats.scanned,
updated = report.stats.updated,
"initial background scan complete"
),
Err(error) => {
tracing::warn!(%error, "initial background scan failed");
}
}
run_background_gc(scan_state).await;
});
} else {
let gc_state = Arc::clone(&state);
tokio::spawn(async move {
run_background_gc(gc_state).await;
});
}
}
#[allow(unused_mut)]
let mut router = Self::tool_router_core()
+ Self::tool_router_git()
+ Self::tool_router_memory()
+ Self::tool_router_admin();
#[cfg(feature = "crawl")]
{
router += Self::tool_router_web();
}
#[cfg(feature = "comms")]
{
router += Self::tool_router_comms();
}
Self {
state,
tool_router: router,
}
}
}
const AGENT_ID_FILE: &str = "agent-id";
fn resolve_agent_id(config: &crate::config::Config, store: &Store) -> String {
fn validated(candidate: Option<String>) -> Option<String> {
candidate
.and_then(|s| crate::comms::ids::AgentId::parse(s).ok())
.map(|a| a.into_string())
}
if let Some(id) = validated(std::env::var("BASEMIND_AGENT_ID").ok()) {
return id;
}
if let Some(id) = validated(config.comms.agent_id.clone()) {
return id;
}
if let Some(id) = validated(load_or_create_persisted_agent_id(&store.basemind_dir)) {
return id;
}
"anon".to_string()
}
fn load_or_create_persisted_agent_id(basemind_dir: &std::path::Path) -> Option<String> {
let path = basemind_dir.join(AGENT_ID_FILE);
if let Ok(existing) = std::fs::read_to_string(&path) {
let trimmed = existing.trim();
if !trimmed.is_empty() {
return Some(trimmed.to_string());
}
}
let nanos = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
let token = format!("session-{:x}-{:x}", std::process::id(), nanos);
let _ = std::fs::write(&path, &token);
Some(token)
}
async fn run_background_gc(state: Arc<ServerState>) {
let result = tokio::task::spawn_blocking(move || {
let store = state.store.blocking_read();
let referenced = crate::store_gc::collect_referenced_hashes(&store.basemind_dir)?;
crate::store_gc::gc_blobs(&store.basemind_dir, &referenced)
})
.await;
match result {
Ok(Ok(report)) if report.removed > 0 => tracing::info!(
removed = report.removed,
bytes_freed = report.bytes_freed,
"background blob GC reclaimed orphaned blobs"
),
Ok(Ok(_)) => tracing::debug!("background blob GC: nothing to reclaim"),
Ok(Err(error)) => tracing::warn!(%error, "background blob GC failed"),
Err(error) => tracing::warn!(%error, "background blob GC task panicked"),
}
}
fn spawn_serve_watcher(state: Arc<ServerState>) {
let root = state.root.clone();
let config = Arc::clone(&state.config);
let handle = tokio::runtime::Handle::current();
let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>();
std::thread::Builder::new()
.name("basemind-mcp-serve-watcher".to_string())
.spawn(move || {
let _keep_sender_alive = _shutdown_tx;
tracing::info!(root = %root.display(), "serve watcher armed (live incremental rescan)");
let result =
crate::watcher::watch_paths(&root, &config, shutdown_rx, |paths, _kind| {
let refresh_state = Arc::clone(&state);
match handle.block_on(helpers::scan_and_refresh(refresh_state, Some(paths))) {
Ok(report) => tracing::debug!(
scanned = report.stats.scanned,
updated = report.stats.updated,
removed = report.stats.removed,
"serve watcher: incremental rescan complete"
),
Err(error) => tracing::warn!(
%error,
"serve watcher: incremental rescan failed (watcher continues)"
),
}
});
if let Err(error) = result {
tracing::warn!(%error, "serve watcher exited with error");
}
tracing::info!("serve watcher: exiting");
})
.ok();
}
fn spawn_view_watcher(state: Arc<ServerState>) {
let (basemind_dir, view) = {
let store = match state.store.try_read() {
Ok(g) => g,
Err(_) => return,
};
(store.basemind_dir.clone(), store.view.clone())
};
let view_dir = basemind_dir.join(crate::store::VIEWS_DIR).join(&view);
let target = view_dir.join(crate::store::INDEX_FILE);
std::thread::Builder::new()
.name("basemind-mcp-view-watcher".to_string())
.spawn(move || {
use notify_debouncer_full::new_debouncer;
use std::time::Duration;
let (tx, rx) = std::sync::mpsc::channel();
let mut debouncer = match new_debouncer(Duration::from_millis(150), None, tx) {
Ok(d) => d,
Err(e) => {
tracing::warn!(error = %e, "view watcher: failed to start debouncer");
return;
}
};
if let Err(e) = debouncer.watch(&view_dir, notify::RecursiveMode::NonRecursive) {
tracing::warn!(error = %e, dir = %view_dir.display(), "view watcher: failed to watch");
return;
}
tracing::info!(target = %target.display(), "view watcher armed");
while let Ok(result) = rx.recv() {
let events = match result {
Ok(e) => e,
Err(_) => continue,
};
let touches_index = events
.iter()
.any(|de| de.event.paths.iter().any(|p| p == &target));
if !touches_index {
continue;
}
let new_store = match crate::store::Store::open_read_only(
state.root.as_path(),
&state
.store
.try_read()
.map(|g| g.view.clone())
.unwrap_or_default(),
) {
Ok(s) => s,
Err(e) => {
tracing::warn!(error = %e, "view watcher: store reopen failed");
continue;
}
};
let new_cache = Arc::new(MapCache::build(&new_store));
tracing::info!(
files = new_cache.by_path.len(),
"view watcher: rebuilt MapCache from refreshed index"
);
state.cache.store(new_cache);
state
.cache_generation
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
tracing::info!("view watcher: channel closed; exiting");
})
.ok();
}
#[tool_handler(router = self.tool_router.clone())]
impl ServerHandler for BasemindServer {
fn get_info(&self) -> ServerInfo {
ServerInfo::new(ServerCapabilities::builder().enable_tools().build()).with_instructions(
"basemind is the indexed context layer for this repository, served over MCP: a \
tree-sitter code map across 300+ languages (symbols, references, callers, call \
graphs, implementations), git history + blame at symbol resolution, full-text + \
semantic search, document RAG over 90+ file formats, and shared cross-session \
memory. basemind first, shell/grep/git fallback: prefer these tools over reading \
files, over grep, and over naked `git` — and use them for document extraction, web \
crawling, and code parsing too. You may be one of several agents in this repo: on \
start, check the comms room and post status as you work (see Agent comms below).\n\
Context economy — these tools return paths, line numbers, and signatures, not \
file bodies, so they cost a fraction of the tokens of reading source. Default to \
them: `outline` a file before you open it (then read only the span you need); \
`search_symbols` instead of grep for a definition; `find_references` / \
`find_callers` instead of grepping call sites; `workspace_grep` instead of \
shelling out to ripgrep; `rescan` after edits instead of reconnecting. Do not \
re-read a file basemind already mapped. Same discipline beyond code: use the git \
tools (`recent_changes` / `blame_*` / `diff_*` / `commits_touching`) instead of \
shelling out to `git log`/`git blame`; `search_documents` and the documents \
pipeline for extraction, RAG, keyword + entity (NER), and summary instead of \
opening files; `web_scrape` / `web_crawl` / `web_map` for scraping, crawling, and \
sitemaps.\n\
Routing: \
\"where is X defined?\" → `search_symbols`; \
\"what calls X?\" → `find_references` (any name) or `find_callers` (specific def); \
\"shape of this file?\" → `outline` (add `l2: true` for calls + docs); \
\"what changed recently?\" → `recent_changes`, `commits_touching`, `symbol_history`; \
\"who last touched this?\" → `blame_file` / `blame_symbol`; \
\"where's the churn?\" → `hot_files`; \
\"semantic search across PDFs/docs in the repo?\" → `search_documents`; \
\"recall something the agent remembered earlier?\" → `memory_get` / `memory_list` / \
`memory_search`; \
\"remember this for later sessions?\" → `memory_put` (delete with `memory_delete`); \
\"refresh the index after editing code?\" → `rescan` (or `rescan { paths: [...] }` \
to limit to changed files); \
\"any other agents working here / leave a note for the next session?\" → \
`room_list` / `inbox_read` / `room_post`.\n\
\"got a truncated result? fetch the next page?\" → pass `next_cursor` from the prior \
response back as `cursor`.\n\
\"need regex over file contents?\" → `workspace_grep`.\n\
Code-map tools: `outline`, `search_symbols`, `find_references`, `find_callers`, \
`list_files`, `workspace_grep`, `dependents`, `status`, `repo_info`, \
`symbol_history`. \
Git tools (inside a repo): `working_tree_status`, `recent_changes`, `commits_touching`, \
`find_commits_by_path`, `hot_files`, `diff_outline`, `diff_file`, `blame_file`, \
`blame_symbol`. \
Intelligence tools (require build with `--features documents,memory`): \
`search_documents`, `memory_put`, `memory_get`, `memory_list`, `memory_search`, \
`memory_delete`. \
Web tools (require build with `--features crawl`): `web_scrape` (one URL), \
`web_crawl` (follow links from a seed URL), `web_map` (sitemap-only discovery). \
Crawled pages land in the same LanceDB documents table as on-disk docs, scoped \
under `web:<host>` — find them later with `search_documents`. \
Agent comms (require build with `--features comms`): you may share this repo's \
rooms with other agents working alongside you. On start, check `room_list` + \
`inbox_read` (and recent `room_history`) for what's been said; `room_history` and \
`inbox_read` return front-matter only (subject / from / id) — call `message_get` \
with an id for a body. Post a concise `room_post {room, subject, body, reply_to?}` \
when you begin, finish, or hit a decision, and reply (`reply_to`) to messages \
about your work — do not stay silent when collaborating. Tools: `room_list`, \
`room_join`, `room_post`, `room_history`, `inbox_read`, `message_get`, \
`room_create`, `room_leave`, `agent_register`, `agent_list`. \
All paths are repository-relative with forward-slash separators. \
If a tool reports \"no indexed files\", run `basemind scan` in the repo first.",
)
}
}