basemind 0.4.0-rc.3

Full AI context layer over MCP — tree-sitter code-map, document RAG (PDF/Office/HTML/email + OCR + reranker), shared agent memory, on-demand web crawl, git history + blame + per-symbol diff. 300+ languages, 8 coding-agent harnesses, content-addressed Fjall + LanceDB.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
//! MCP server exposing the basemind code map + git context to AI agents.
//!
//! The server opens the store writably and is the canonical Fjall owner: it holds the exclusive
//! lock so the in-process `rescan` tool (and the background watcher) can refresh the index. While
//! a server is running, standalone `basemind scan` / `basemind watch` against the same repo fail
//! fast with a lock error rather than racing it. Tools return JSON so the agent can navigate by
//! file path + line numbers without opening source files.
//!
//! Transport: stdio (the canonical MCP transport). Spawn via `basemind serve`.

pub(crate) mod cursor;
mod helpers;
mod helpers_admin;
mod helpers_calls;
#[cfg(feature = "documents")]
mod helpers_documents;
mod helpers_graph;
mod helpers_grep;
mod helpers_impls;
#[cfg(feature = "crawl")]
mod helpers_web;
#[cfg(any(feature = "memory", feature = "documents"))]
mod memory;
mod savings;
mod telemetry;
mod tools;
mod tools_admin;
mod tools_git;
mod tools_memory;
#[cfg(feature = "crawl")]
mod tools_web;
mod types;
mod types_admin;
mod types_documents;
mod types_graph;
mod types_impls;

use std::collections::BTreeMap;
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};

use arc_swap::ArcSwap;
use lru::LruCache;
use rmcp::ServerHandler;
use rmcp::handler::server::tool::ToolRouter;
use rmcp::model::{ServerCapabilities, ServerInfo};
use rmcp::tool_handler;
use tokio::sync::RwLock;

use crate::extract::{FileMapL1, Import};
use crate::lang::LangId;
use crate::store::Store;

/// Public re-export of every tool `*Params` type plus the `Parameters` wrapper, so the
/// in-process CLI (`src/cli/`) can build tool arguments and call the `#[tool]` methods
/// directly. This is the parity-by-construction surface: the CLI runs the identical tool
/// code an MCP client would dispatch.
pub mod params {
    pub use rmcp::handler::server::wrapper::Parameters;

    pub use super::types::{
        BlameFileParams, BlameSymbolParams, CommitsTouchingParams, DependentsParams,
        DiffFileParams, DiffOutlineParams, FindCallersParams, FindCommitsByPathParams,
        FindReferencesParams, HotFilesParams, ListFilesParams, MemoryDeleteParams, MemoryGetParams,
        MemoryListParams, MemoryPutParams, MemorySearchParams, OutlineParams, RecentChangesParams,
        RepoInfoParams, RescanParams, SearchDocumentsParams, SearchSymbolsParams, StatusParams,
        SymbolHistoryParams, TelemetrySummaryParams, WorkingTreeStatusParams, WorkspaceGrepParams,
    };
    #[cfg(feature = "crawl")]
    pub use super::types::{WebCrawlParams, WebMapParams, WebScrapeParams};
    pub use super::types_admin::{CacheClearParams, CacheGcParams, CacheStatsParams};
    pub use super::types_graph::CallGraphParams;
    pub use super::types_impls::FindImplementationsParams;
}

pub use params::Parameters;

/// In-memory cache for `symbol_history`-style workflows: given a blob's git OID and the
/// language we'd extract with, hold onto the parsed `FileMapL1` and the source bytes so
/// repeated visits to the same blob (across commits, modes, or tool calls) skip the
/// tree-sitter parse entirely. Memory-only — blob OIDs are content-addressed and immutable,
/// so cache invalidation is implicit (a new blob = a new key).
///
/// Cap chosen to bound steady-state memory at a few MB for typical repositories: 512
/// entries × ~few KiB per `FileMapL1` + Arc'd source = on the order of 1–10 MiB.
pub(crate) const OUTLINE_CACHE_CAP: usize = 512;

pub(crate) struct OutlineEntry {
    pub map: Arc<FileMapL1>,
    pub source: Arc<Vec<u8>>,
}

pub(crate) type OutlineCache = Mutex<LruCache<(gix::ObjectId, LangId), Arc<OutlineEntry>>>;

/// Shared MCP server state. `ToolRouter<Self>` is Clone (Arc inside), so we hold it directly
/// on the struct as the `#[tool_handler]` macro expects.
#[derive(Clone)]
pub struct BasemindServer {
    pub(crate) state: Arc<ServerState>,
    // Touched by macro-generated dispatch; dead_code can't see that.
    #[allow(dead_code)]
    tool_router: ToolRouter<Self>,
}

pub(crate) struct ServerState {
    pub(crate) store: RwLock<Store>,
    pub(crate) root: PathBuf,
    /// In-RAM mirror of every indexed file's L1 blob.
    ///
    /// Cross-file queries (`search_symbols`, `dependents`) otherwise re-read 1 blob per file
    /// per call — for a 39k-file repo that's seconds. With the preload they're pure-RAM scans.
    /// Wrapped in `ArcSwap` so the filesystem watcher can publish a new snapshot without
    /// blocking readers. Read-path tools do `.load_full()` once at the top to take a stable
    /// `Arc<MapCache>` for the duration of the call.
    pub(crate) cache: ArcSwap<MapCache>,
    /// Discovered git repository, or `None` when serving against a non-git directory.
    /// All git-aware tools (`working_tree_status`, `recent_changes`, …) check this and
    /// return an MCP error if `None`.
    pub(crate) repo: Option<Arc<crate::git::Repo>>,
    /// Sha-keyed cache for commit-files diffs, log walks, and blame results.
    pub(crate) git_cache: Arc<crate::git_cache::GitCache>,
    /// `(blob_oid, lang) -> Arc<OutlineEntry>` cache that keeps `symbol_history` fast on
    /// hot files even when the symbol's source blob shows up in many adjacent commits.
    pub(crate) outline_cache: Arc<OutlineCache>,
    /// Scanner config (include / exclude globs, eager_l2, document tier knobs, …).
    /// Held on the server so the `rescan` MCP tool can re-run a scan in-process
    /// without re-reading `.basemind/basemind.toml`.
    pub(crate) config: Arc<crate::config::Config>,
    /// Per-tool-call telemetry writer; appends to `.basemind/telemetry.jsonl`.
    /// Always present (best-effort writes); the dashboard surfaces / statusline
    /// read from the same file.
    pub(crate) telemetry: Arc<telemetry::Telemetry>,
    /// Sum of `size_bytes` across every indexed file. Captured at boot and
    /// after each `rescan`. Feeds the corpus-baseline cost in
    /// [`super::savings::estimate`].
    pub(crate) corpus_bytes: std::sync::atomic::AtomicU64,
    /// Monotonic counter bumped every time `cache` is swapped (boot, rescan, view watcher).
    /// In-memory pagination cursors embed this value as a snapshot id so a resume call
    /// against a stale generation can be detected and reported back as
    /// `cursor_invalidated = true`.
    pub(crate) cache_generation: std::sync::atomic::AtomicU32,
    /// Per-repo scope key for LanceDB tables and `memory_by_key` Fjall keyspace.
    /// Computed once at boot. Do NOT recompute per-call.
    #[allow(dead_code)] // used by memory / documents feature tools
    pub(crate) scope: String,
    /// LanceDB vector store. Lazy-init on first memory/document call.
    #[cfg(any(feature = "memory", feature = "documents"))]
    pub(crate) lance: tokio::sync::OnceCell<Arc<crate::lance::LanceStore>>,
    /// Shared embedding engine. Lazy-init on first embed call.
    #[cfg(feature = "intelligence")]
    pub(crate) embedder: tokio::sync::OnceCell<Arc<crate::embeddings::SharedEmbedder>>,
    /// Shared kreuzcrawl engine. Initialised at server boot from the `[crawl]`
    /// config section; `None` if engine construction failed (the web_* tools
    /// will return an MCP error rather than crash).
    #[cfg(feature = "crawl")]
    pub(crate) crawl_engine: Option<kreuzcrawl::CrawlEngineHandle>,
}

pub(crate) struct MapCache {
    /// path → L1 (kept sorted by path; iteration order matches `list_files`)
    pub(crate) by_path: BTreeMap<crate::path::RelPath, FileMapL1>,
    /// Pre-flattened `(path, imports)` view used by the `dependents` tool. Without this,
    /// every `dependents` call rebuilds the same `HashMap<PathBuf, Vec<Import>>` from
    /// scratch. Precomputing once at server boot drops that to pure pointer-chase.
    pub(crate) imports_index: Vec<(PathBuf, Vec<Import>)>,
}

impl MapCache {
    fn build(store: &Store) -> Self {
        let mut by_path = BTreeMap::new();
        for (path, entry) in &store.index.files {
            match store.read_l1_by_hex(&entry.hash_hex) {
                Ok(Some(l1)) => {
                    by_path.insert(path.clone(), l1);
                }
                Ok(None) | Err(_) => continue,
            }
        }
        let imports_index: Vec<(PathBuf, Vec<Import>)> = by_path
            .iter()
            .map(|(p, l1)| (p.to_path_buf(), l1.imports.clone()))
            .collect();
        Self {
            by_path,
            imports_index,
        }
    }
}

/// Construction-time switches for [`BasemindServer`].
///
/// `serve` wants every background facility running; a one-shot CLI query wants
/// none of them (no auto-scan, no view watcher, no background GC) so the process
/// exits the instant the single tool call returns.
///
/// NOTE: this is intentionally a struct of named bools rather than a bare flag —
/// a future workstream (the live FS watcher / `--no-watch`) will extend it with
/// finer-grained switches (e.g. `auto_scan` vs `watch` decoupled). Keep new knobs
/// additive and defaulted so callers that only care about `background` stay terse.
#[derive(Debug, Clone, Copy)]
pub struct ServerOptions {
    /// When true, spawn the empty-index auto-scan, the view watcher thread, and
    /// the background blob GC. When false, the server is a pure one-shot query
    /// handle: it preloads the in-RAM map cache and nothing else.
    pub background: bool,
    /// When true (and `background` is on, and the served view is the working
    /// view), spawn a live filesystem watcher that funnels changed paths into
    /// `scan_and_refresh` so the in-RAM map stays current as the agent edits.
    /// When false, fall back to the passive view watcher (which only reacts to
    /// external scans writing `index.msgpack`). Disabled for one-shot queries.
    ///
    /// `--no-watch` on `basemind serve` flips this off — useful for very large
    /// repos (e.g. the ~81k-file TypeScript tree) or CI, where the continuous
    /// incremental re-scan is not worth the cost.
    pub watch: bool,
}

impl Default for ServerOptions {
    fn default() -> Self {
        // Default mirrors `serve`: everything on.
        Self {
            background: true,
            watch: true,
        }
    }
}

impl BasemindServer {
    /// Construct a server with all background facilities running (the `serve` path).
    pub fn new(
        store: Store,
        root: PathBuf,
        config: Arc<crate::config::Config>,
        repo: Option<Arc<crate::git::Repo>>,
        git_cache: Arc<crate::git_cache::GitCache>,
    ) -> Self {
        Self::new_with_options(
            store,
            root,
            config,
            repo,
            git_cache,
            ServerOptions::default(),
        )
    }

    /// Construct a one-shot server with every background facility disabled.
    ///
    /// Used by the `basemind` CLI to run a single MCP tool in-process and exit —
    /// no auto-scan, no view watcher, no background GC. The in-RAM map cache is
    /// still preloaded so the tool sees the same data an MCP client would.
    pub fn new_oneshot(
        store: Store,
        root: PathBuf,
        config: Arc<crate::config::Config>,
        repo: Option<Arc<crate::git::Repo>>,
        git_cache: Arc<crate::git_cache::GitCache>,
    ) -> Self {
        Self::new_with_options(
            store,
            root,
            config,
            repo,
            git_cache,
            ServerOptions {
                background: false,
                watch: false,
            },
        )
    }

    /// Shared constructor honoring [`ServerOptions`]. `new` / `new_oneshot` are
    /// the public entry points; this threads the `background` switch through the
    /// three spawn sites + the initial auto-scan.
    pub fn new_with_options(
        store: Store,
        root: PathBuf,
        config: Arc<crate::config::Config>,
        repo: Option<Arc<crate::git::Repo>>,
        git_cache: Arc<crate::git_cache::GitCache>,
        options: ServerOptions,
    ) -> Self {
        let scope = repo
            .as_ref()
            .map(|r| crate::git::scope_key(r))
            .unwrap_or_else(|| format!("path:{}", root.display()));
        let cache = Arc::new(MapCache::build(&store));
        let corpus_bytes: u64 = store.index.files.values().map(|e| e.size_bytes).sum();
        // A fresh repo has no index yet. Auto-scan on startup (working view only)
        // so the agent never has to run `basemind scan` by hand — the scan runs
        // in-process below, after the server is up, so it never contends for the
        // Fjall lock this `serve` already holds.
        let needs_initial_scan =
            store.view == crate::store::VIEW_WORKING && cache.by_path.is_empty();
        tracing::info!(
            files = cache.by_path.len(),
            corpus_bytes,
            git = repo.is_some(),
            scope = %scope,
            "preloaded code map into RAM for MCP server"
        );
        let outline_cache: Arc<OutlineCache> = Arc::new(Mutex::new(LruCache::new(
            NonZeroUsize::new(OUTLINE_CACHE_CAP).expect("OUTLINE_CACHE_CAP > 0"),
        )));
        let telemetry_handle = Arc::new(telemetry::Telemetry::new(&store.basemind_dir));
        #[cfg(feature = "crawl")]
        let crawl_engine = match crate::web::build_engine(&config.crawl) {
            Ok(e) => Some(e),
            Err(error) => {
                tracing::warn!(
                    ?error,
                    "crawl engine init failed; web_* tools will report errors"
                );
                None
            }
        };
        let state = Arc::new(ServerState {
            store: RwLock::new(store),
            root,
            cache: ArcSwap::from(cache),
            repo,
            git_cache,
            outline_cache,
            config,
            telemetry: telemetry_handle,
            corpus_bytes: std::sync::atomic::AtomicU64::new(corpus_bytes),
            cache_generation: std::sync::atomic::AtomicU32::new(1),
            scope,
            #[cfg(any(feature = "memory", feature = "documents"))]
            lance: tokio::sync::OnceCell::new(),
            #[cfg(feature = "intelligence")]
            embedder: tokio::sync::OnceCell::new(),
            #[cfg(feature = "crawl")]
            crawl_engine,
        });
        // One-shot CLI queries skip ALL background facilities: no view watcher,
        // no auto-scan, no background GC. They preload the map cache (above) and
        // return immediately so the process can exit after a single tool call.
        if options.background {
            // Live FS watcher vs. passive view watcher are mutually exclusive for
            // the working view: the active watcher already triggers
            // `scan_and_refresh`, which writes `index.msgpack` — the exact event
            // the passive watcher reacts to. Running both would double-refresh.
            //
            // Non-working views (staged / rev-<sha>) are immutable snapshots, so
            // the active watcher is meaningless there; they always get the passive
            // watcher (which still picks up an external re-scan of that view).
            let view_is_working = {
                match state.store.try_read() {
                    Ok(g) => g.view == crate::store::VIEW_WORKING,
                    // Unable to read the view at boot is unexpected; fall back to the
                    // passive watcher rather than risk watching the wrong tree.
                    Err(_) => false,
                }
            };
            if options.watch && view_is_working {
                spawn_serve_watcher(Arc::clone(&state));
            } else {
                spawn_view_watcher(Arc::clone(&state));
            }
            // Background blob GC: reclaim orphaned blobs left behind by prior scans /
            // branch switches. Detached so it never blocks serve startup, and it never
            // crashes serve (all errors are warned + swallowed).
            if needs_initial_scan {
                // A fresh scan is what *creates* reclaimable orphans, so chain GC after it.
                let scan_state = Arc::clone(&state);
                tracing::info!("empty index on startup; running initial scan in background");
                tokio::spawn(async move {
                    match helpers::scan_and_refresh(Arc::clone(&scan_state), None).await {
                        Ok(report) => tracing::info!(
                            scanned = report.stats.scanned,
                            updated = report.stats.updated,
                            "initial background scan complete"
                        ),
                        Err(error) => {
                            tracing::warn!(%error, "initial background scan failed");
                        }
                    }
                    // Run GC after the scan settles, regardless of scan outcome.
                    run_background_gc(scan_state).await;
                });
            } else {
                // No initial scan — run GC shortly after startup to reclaim any
                // orphans from earlier sessions.
                let gc_state = Arc::clone(&state);
                tokio::spawn(async move {
                    run_background_gc(gc_state).await;
                });
            }
        }
        #[allow(unused_mut)]
        let mut router = Self::tool_router_core()
            + Self::tool_router_git()
            + Self::tool_router_memory()
            + Self::tool_router_admin();
        #[cfg(feature = "crawl")]
        {
            router += Self::tool_router_web();
        }
        Self {
            state,
            tool_router: router,
        }
    }
}

/// Run an in-process blob GC once, logging the outcome and swallowing any error.
///
/// Uses the UNLOCKED `store_gc` primitives (`collect_referenced_hashes` + `gc_blobs`)
/// under a `blocking_read()` store guard — NEVER `store_gc::run_gc`, which re-acquires
/// the `.basemind/.lock` flock that `serve` already holds (that would deadlock). The
/// held read guard blocks the only in-process writer (`scan_and_refresh`) for the
/// mark+sweep; cross-process scans are impossible because serve holds the flock.
async fn run_background_gc(state: Arc<ServerState>) {
    let result = tokio::task::spawn_blocking(move || {
        let store = state.store.blocking_read();
        let referenced = crate::store_gc::collect_referenced_hashes(&store.basemind_dir)?;
        crate::store_gc::gc_blobs(&store.basemind_dir, &referenced)
    })
    .await;
    match result {
        Ok(Ok(report)) if report.removed > 0 => tracing::info!(
            removed = report.removed,
            bytes_freed = report.bytes_freed,
            "background blob GC reclaimed orphaned blobs"
        ),
        Ok(Ok(_)) => tracing::debug!("background blob GC: nothing to reclaim"),
        Ok(Err(error)) => tracing::warn!(%error, "background blob GC failed"),
        Err(error) => tracing::warn!(%error, "background blob GC task panicked"),
    }
}

/// Active filesystem watcher embedded in `serve` for the working view.
///
/// Unlike [`spawn_view_watcher`] (which is passive — it only reacts to an
/// external process writing `index.msgpack`), this watches the working tree
/// directly and funnels every debounced batch of changed paths into the
/// canonical in-process refresh, [`helpers::scan_and_refresh`]. That re-scans
/// under serve's already-open `Store` (its `RwLock`), so we never open a second
/// `.basemind/.lock` flock — the reason we cannot reuse `watcher::watch`, which
/// owns its own `Store`.
///
/// Threading bridge: `watcher::watch_paths` runs the debouncer on a blocking std
/// thread, but `scan_and_refresh` is async. We capture the current tokio runtime
/// `Handle` at spawn time and `handle.block_on(...)` the refresh inside the
/// callback. `block_on` is safe here because the callback runs on a plain OS
/// thread with no tokio runtime entered (it's `std::thread`, not a worker), so
/// the "cannot block the current thread from within a runtime" guard never trips.
///
/// Lifetime: the thread is detached and runs for the process lifetime, mirroring
/// `spawn_view_watcher`. The `shutdown` oneshot sender is dropped immediately, so
/// `watch_paths`'s `shutdown.try_recv()` returns `Disconnected` only if the loop
/// ever polls it after the sender drops — in practice the loop exits when the
/// process tears down stdio and the debouncer channel closes. A failed
/// incremental refresh is logged and swallowed so a transient scan error never
/// kills the watcher.
fn spawn_serve_watcher(state: Arc<ServerState>) {
    let root = state.root.clone();
    let config = Arc::clone(&state.config);
    let handle = tokio::runtime::Handle::current();
    // Keep the sender alive for the process lifetime by leaking it into the
    // detached closure-free slot: we never signal shutdown explicitly (the
    // process exit tears the watcher down), so hold the receiver and drop the
    // sender at the end of `serve`'s life via the thread owning it.
    let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>();
    std::thread::Builder::new()
        .name("basemind-mcp-serve-watcher".to_string())
        .spawn(move || {
            // Hold the sender for the whole watcher lifetime so the receiver never
            // sees `Disconnected` early; the watcher exits when the debouncer
            // channel closes at process teardown.
            let _keep_sender_alive = _shutdown_tx;
            tracing::info!(root = %root.display(), "serve watcher armed (live incremental rescan)");
            let result =
                crate::watcher::watch_paths(&root, &config, shutdown_rx, |paths, _kind| {
                    let refresh_state = Arc::clone(&state);
                    // Bridge the blocking watcher thread into the async refresh.
                    match handle.block_on(helpers::scan_and_refresh(refresh_state, Some(paths))) {
                        Ok(report) => tracing::debug!(
                            scanned = report.stats.scanned,
                            updated = report.stats.updated,
                            removed = report.stats.removed,
                            "serve watcher: incremental rescan complete"
                        ),
                        Err(error) => tracing::warn!(
                            %error,
                            "serve watcher: incremental rescan failed (watcher continues)"
                        ),
                    }
                });
            if let Err(error) = result {
                tracing::warn!(%error, "serve watcher exited with error");
            }
            tracing::info!("serve watcher: exiting");
        })
        .ok();
}

fn spawn_view_watcher(state: Arc<ServerState>) {
    let (basemind_dir, view) = {
        let store = match state.store.try_read() {
            Ok(g) => g,
            Err(_) => return,
        };
        (store.basemind_dir.clone(), store.view.clone())
    };
    let view_dir = basemind_dir.join(crate::store::VIEWS_DIR).join(&view);
    let target = view_dir.join(crate::store::INDEX_FILE);

    std::thread::Builder::new()
        .name("basemind-mcp-view-watcher".to_string())
        .spawn(move || {
            use notify_debouncer_full::new_debouncer;
            use std::time::Duration;

            let (tx, rx) = std::sync::mpsc::channel();
            let mut debouncer = match new_debouncer(Duration::from_millis(150), None, tx) {
                Ok(d) => d,
                Err(e) => {
                    tracing::warn!(error = %e, "view watcher: failed to start debouncer");
                    return;
                }
            };
            if let Err(e) = debouncer.watch(&view_dir, notify::RecursiveMode::NonRecursive) {
                tracing::warn!(error = %e, dir = %view_dir.display(), "view watcher: failed to watch");
                return;
            }
            tracing::info!(target = %target.display(), "view watcher armed");

            while let Ok(result) = rx.recv() {
                let events = match result {
                    Ok(e) => e,
                    Err(_) => continue,
                };
                let touches_index = events
                    .iter()
                    .any(|de| de.event.paths.iter().any(|p| p == &target));
                if !touches_index {
                    continue;
                }
                let new_store = match crate::store::Store::open_read_only(
                    state.root.as_path(),
                    &state
                        .store
                        .try_read()
                        .map(|g| g.view.clone())
                        .unwrap_or_default(),
                ) {
                    Ok(s) => s,
                    Err(e) => {
                        tracing::warn!(error = %e, "view watcher: store reopen failed");
                        continue;
                    }
                };
                let new_cache = Arc::new(MapCache::build(&new_store));
                tracing::info!(
                    files = new_cache.by_path.len(),
                    "view watcher: rebuilt MapCache from refreshed index"
                );
                state.cache.store(new_cache);
                state
                    .cache_generation
                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
            }
            tracing::info!("view watcher: channel closed; exiting");
        })
        .ok();
}

#[tool_handler(router = self.tool_router.clone())]
impl ServerHandler for BasemindServer {
    fn get_info(&self) -> ServerInfo {
        ServerInfo::new(ServerCapabilities::builder().enable_tools().build()).with_instructions(
            "basemind is the indexed context layer for this repository, served over MCP: a \
             tree-sitter code map across 300+ languages (symbols, references, callers, call \
             graphs, implementations), git history + blame at symbol resolution, full-text + \
             semantic search, document RAG over 90+ file formats, and shared cross-session \
             memory. Prefer these tools over reading files when navigating large or unfamiliar \
             codebases.\n\
             Context economy — these tools return paths, line numbers, and signatures, not \
             file bodies, so they cost a fraction of the tokens of reading source. Default to \
             them: `outline` a file before you open it (then read only the span you need); \
             `search_symbols` instead of grep for a definition; `find_references` / \
             `find_callers` instead of grepping call sites; `workspace_grep` instead of \
             shelling out to ripgrep; `rescan` after edits instead of reconnecting. Do not \
             re-read a file basemind already mapped.\n\
             Routing: \
             \"where is X defined?\" → `search_symbols`; \
             \"what calls X?\" → `find_references` (any name) or `find_callers` (specific def); \
             \"shape of this file?\" → `outline` (add `l2: true` for calls + docs); \
             \"what changed recently?\" → `recent_changes`, `commits_touching`, `symbol_history`; \
             \"who last touched this?\" → `blame_file` / `blame_symbol`; \
             \"where's the churn?\" → `hot_files`; \
             \"semantic search across PDFs/docs in the repo?\" → `search_documents`; \
             \"recall something the agent remembered earlier?\" → `memory_get` / `memory_list` / \
             `memory_search`; \
             \"remember this for later sessions?\" → `memory_put` (delete with `memory_delete`); \
             \"refresh the index after editing code?\" → `rescan` (or `rescan { paths: [...] }` \
             to limit to changed files).\n\
             \"got a truncated result? fetch the next page?\" → pass `next_cursor` from the prior \
             response back as `cursor`.\n\
             \"need regex over file contents?\" → `workspace_grep`.\n\
             Code-map tools: `outline`, `search_symbols`, `find_references`, `find_callers`, \
             `list_files`, `workspace_grep`, `dependents`, `status`, `repo_info`, \
             `symbol_history`. \
             Git tools (inside a repo): `working_tree_status`, `recent_changes`, `commits_touching`, \
             `find_commits_by_path`, `hot_files`, `diff_outline`, `diff_file`, `blame_file`, \
             `blame_symbol`. \
             Intelligence tools (require build with `--features documents,memory`): \
             `search_documents`, `memory_put`, `memory_get`, `memory_list`, `memory_search`, \
             `memory_delete`. \
             Web tools (require build with `--features crawl`): `web_scrape` (one URL), \
             `web_crawl` (follow links from a seed URL), `web_map` (sitemap-only discovery). \
             Crawled pages land in the same LanceDB documents table as on-disk docs, scoped \
             under `web:<host>` — find them later with `search_documents`. \
             All paths are repository-relative with forward-slash separators. \
             If a tool reports \"no indexed files\", run `basemind scan` in the repo first.",
        )
    }
}