Skip to main content

mcp_memory/actions/
code.rs

1//! MCP tool handlers for the tree-sitter code knowledge graph.
2//!
3//! These map parsed code symbols (see [`crate::code`]) onto a knowledge graph
4//! so the regular search/traversal primitives work on code, and expose
5//! code-focused tools: `code_index`, `code_watch`, `code_outline`,
6//! `code_search`, `code_get_symbol`.
7//!
8//! Symbols are stored as entities named `{relpath}::{symbol}` with type
9//! `code:<kind>`; metadata (file, line range, signature, doc) lives in
10//! observations. Edges: `defines` (file→symbol), `calls`/`references`
11//! (caller→callee, resolved only when the callee name is unambiguous).
12//!
13//! Multiple independent projects are supported by **physical partitioning**:
14//! each `code_index`/`code_watch` call takes an optional `project` identifier
15//! (default `"default"`) that selects a dedicated SQLite database, opened via
16//! [`crate::code_registry`]. Projects therefore never collide and are fully
17//! isolated from the main memory graph and from the knowledge-graph tools.
18
19use std::collections::{HashMap, HashSet};
20use std::path::Path;
21use std::sync::atomic::{AtomicUsize, Ordering};
22use std::time::{SystemTime, UNIX_EPOCH};
23
24use serde_json::{Value, json};
25
26use crate::code::{self, Def, MAX_SYMBOLS_PER_FILE};
27use crate::errors::{MCSError, Result};
28use crate::kg::GraphHandle;
29use crate::types::{Entity, Relation};
30
31/// Cap on files processed in a single `code_index` call.
32const MAX_INDEX_FILES: usize = 100_000;
33/// Total symbols across all files (prevents OOM on huge repos).
34const MAX_TOTAL_SYMBOLS: usize = 5_000_000;
35/// Batch size for graph writes (keeps each write transaction bounded).
36const WRITE_BATCH: usize = 1_000;
37/// Default / max result rows for `code_search`.
38const DEFAULT_SEARCH_LIMIT: usize = 20;
39const MAX_SEARCH_LIMIT: usize = 500;
40/// Cap on callers/callees returned by `code_get_symbol`.
41const MAX_EDGES_RETURNED: usize = 500;
42
43macro_rules! text_content {
44    ($text:expr) => {
45        json!({ "content": [{ "type": "text", "text": $text }] })
46    };
47}
48
49fn to_json(v: &impl serde::Serialize) -> Result<Value> {
50    let text = serde_json::to_string(v).map_err(MCSError::JsonError)?;
51    Ok(text_content!(text))
52}
53
54/// Read + validate the optional `project` argument, defaulting to
55/// [`crate::code_registry::DEFAULT_PROJECT`]. Each project maps to its own DB.
56fn project_of(params: &Value) -> Result<String> {
57    let p = params
58        .get("project")
59        .and_then(|v| v.as_str())
60        .filter(|s| !s.is_empty())
61        .unwrap_or(crate::code_registry::DEFAULT_PROJECT);
62    crate::code_registry::validate_project(p)?;
63    Ok(p.to_string())
64}
65
66/// Repo-relative, forward-slash path used as the `code:file` entity name.
67fn rel_path(p: &Path, base: &Path) -> String {
68    let r = if p.is_absolute() {
69        p.strip_prefix(base).unwrap_or(p)
70    } else {
71        p
72    };
73    r.to_string_lossy().replace('\\', "/")
74}
75
76/// Read a single-valued `key: value` observation off an entity.
77fn obs_val<'a>(entity: &'a Entity, key: &str) -> Option<&'a str> {
78    let prefix = format!("{key}: ");
79    entity
80        .observations
81        .iter()
82        .find_map(|o| o.strip_prefix(&prefix))
83}
84
85/// Strip the `code:` prefix from an entity type for display.
86fn kind_of(entity: &Entity) -> &str {
87    entity.entity_type.strip_prefix("code:").unwrap_or(&entity.entity_type)
88}
89
90fn is_code_entity(entity: &Entity) -> bool {
91    entity.entity_type.starts_with("code:")
92}
93
94/// Compact, location-focused view of a code symbol entity.
95fn symbol_row(entity: &Entity) -> Value {
96    json!({
97        "name": entity.name,
98        "kind": kind_of(entity),
99        "file": obs_val(entity, "file"),
100        "lines": obs_val(entity, "lines"),
101        "lang": obs_val(entity, "lang"),
102        "signature": obs_val(entity, "signature"),
103        "doc": obs_val(entity, "doc"),
104    })
105}
106
107// ---------------------------------------------------------------------------
108// code_index
109// ---------------------------------------------------------------------------
110
111/// Parsed symbols for one file, with qualified names already assigned.
112struct FileWork {
113    rel: String,
114    lang: &'static str,
115    hash: String,
116    /// Whether a `code:file` entity already existed (drives purge-skip).
117    existed: bool,
118    named: Vec<(Def, String)>,
119    refs: Vec<code::Ref>,
120}
121
122/// Outcome of processing a single path during the parallel parse phase.
123enum Outcome {
124    Indexed(Box<FileWork>),
125    Skipped,
126    Failed,
127    Unsupported,
128}
129
130/// Read + hash + (incrementally) parse one file. CPU-bound and independent per
131/// file, so this runs on the parse thread pool. Reads use the graph's
132/// concurrent read pool; no writes happen here.
133fn parse_one(kg: &GraphHandle, path: &Path, base: &Path,
134             force: bool, total_symbols: &AtomicUsize) -> Outcome {
135    let Some(lang) = code::detect(path) else {
136        return Outcome::Unsupported;
137    };
138    let rel = rel_path(path, base);
139    let Ok(bytes) = std::fs::read(path) else {
140        return Outcome::Failed;
141    };
142    let hash = code::hash_bytes(&bytes);
143
144    // Project isolation is physical (one DB per project), so the file entity is
145    // just the repo-relative path — no project prefix needed.
146    let existing = kg.get_entity(&rel).ok().flatten();
147    let existed = existing.is_some();
148    // Incremental: skip unchanged files (matching stored hash).
149    if !force
150        && let Some(e) = &existing
151        && obs_val(e, "hash") == Some(hash.as_str())
152    {
153        return Outcome::Skipped;
154    }
155
156    let parsed = code::parse_source(lang, &bytes);
157    let mut seen: HashSet<String> = HashSet::new();
158    let mut named: Vec<(Def, String)> = Vec::with_capacity(parsed.defs.len());
159    for d in parsed.defs.into_iter().take(MAX_SYMBOLS_PER_FILE) {
160        let mut q = format!("{rel}::{}", d.name);
161        if !seen.insert(q.clone()) {
162            q = format!("{q}::L{}", d.line_start);
163            seen.insert(q.clone());
164        }
165        named.push((d, q));
166    }
167
168    // Accumulate towards the total symbol cap.
169    let prev = total_symbols.fetch_add(named.len(), Ordering::Relaxed);
170    if prev + named.len() > MAX_TOTAL_SYMBOLS {
171        // Undo — we overshot. Non-atomic for correctness: the caller's cap check
172        // stops new files from being accepted; any surplus is simply ignored in
173        // the merge phase below.
174        return Outcome::Skipped;
175    }
176
177    Outcome::Indexed(Box::new(FileWork {
178        rel,
179        lang: lang.name(),
180        hash,
181        existed,
182        named,
183        refs: parsed.refs,
184    }))
185}
186
187pub fn handle_code_index(args: Option<&Value>) -> Result<Value> {
188    let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
189    let path = params
190        .get("path")
191        .and_then(|v| v.as_str())
192        .ok_or_else(|| MCSError::InvalidParams("Missing 'path' parameter".into()))?;
193    let project = project_of(params)?;
194    let kg = crate::code_registry::resolve(&project)?;
195    let kg = kg.as_ref();
196    let force = params.get("force").and_then(|v| v.as_bool()).unwrap_or(false);
197
198    let root = Path::new(path);
199    if !root.exists() {
200        return Err(MCSError::InvalidParams(format!("Path not found: {path}")));
201    }
202    // Canonicalize so entity names are stable regardless of how the path is
203    // spelled (symlinks, `.`, `..`) — critical for matching the symlink-resolved
204    // paths the watcher receives from the OS. Falls back to the raw path.
205    let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
206    let base = canonical_base();
207    let files = code::walk(&root, code::MAX_FILE_BYTES);
208    index_paths(kg, files, &base, force)
209}
210
211/// The canonicalized current working directory, used as the base for
212/// repo-relative entity names. Shared by the indexer and the watcher so both
213/// derive identical names.
214pub(crate) fn canonical_base() -> std::path::PathBuf {
215    std::env::current_dir()
216        .and_then(|d| d.canonicalize())
217        .unwrap_or_else(|_| std::path::PathBuf::from("."))
218}
219
220/// Repo-relative entity name for a path under `base` (matches [`parse_one`]).
221/// Exposed for the watcher to purge symbols of deleted files.
222pub(crate) fn file_entity_name(path: &Path, base: &Path) -> String {
223    rel_path(path, base)
224}
225
226/// Map a caller-supplied file path to its stored entity name. Relative paths
227/// are assumed already repo-relative; absolute paths are canonicalized and
228/// based the same way [`handle_code_index`] stores them.
229fn lookup_file_name(file: &str) -> String {
230    let p = Path::new(file);
231    if p.is_absolute() {
232        let c = p.canonicalize().unwrap_or_else(|_| p.to_path_buf());
233        rel_path(&c, &canonical_base())
234    } else {
235        file.to_string()
236    }
237}
238
239/// Parse + write a known set of `files` into `kg`. Shared by the `code_index`
240/// tool (after walking a path) and the watcher (a debounced batch of changed
241/// files). `base` anchors repo-relative entity names; the same `base` must be
242/// used across calls for a project so re-indexing updates rather than
243/// duplicates. Batching a whole change set through one call keeps the parse
244/// pool and write transactions amortized instead of per-file.
245pub(crate) fn index_paths(
246    kg: &GraphHandle,
247    mut files: Vec<std::path::PathBuf>,
248    base: &Path,
249    force: bool,
250) -> Result<Value> {
251    files.truncate(MAX_INDEX_FILES);
252
253    let now = SystemTime::now()
254        .duration_since(UNIX_EPOCH)
255        .map(|d| d.as_secs())
256        .unwrap_or(0);
257
258    // Parse phase (parallel): read + hash + parse each file across the CPU
259    // cores. Files are independent and parsing is the dominant cost; reads use
260    // the concurrent read pool. The single-writer graph mutations stay serial
261    // in the merge phase below.
262    let n = files.len();
263    let n_threads = std::thread::available_parallelism()
264        .map(|t| t.get())
265        .unwrap_or(4)
266        .min(n.max(1));
267    let next = AtomicUsize::new(0);
268    let total_symbols = AtomicUsize::new(0);
269    let buckets: Vec<Vec<Outcome>> = std::thread::scope(|scope| {
270        let handles: Vec<_> = (0..n_threads)
271            .map(|_| {
272                scope.spawn(|| {
273                    let mut local = Vec::new();
274                    loop {
275                        let i = next.fetch_add(1, Ordering::Relaxed);
276                        if i >= n {
277                            break;
278                        }
279                        // Pre-check total symbol cap to avoid unnecessary work.
280                        if total_symbols.load(Ordering::Relaxed) >= MAX_TOTAL_SYMBOLS {
281                            continue;
282                        }
283                        local.push(parse_one(kg, &files[i], base, force, &total_symbols));
284                    }
285                    local
286                })
287            })
288            .collect();
289        handles.into_iter().map(|h| h.join().unwrap()).collect()
290    });
291
292    // Merge phase (serial): tally outcomes and build the global symbol index
293    // (bare name -> qualified names) used to resolve unambiguous call edges.
294    let mut work: Vec<FileWork> = Vec::new();
295    let mut def_index: HashMap<String, Vec<String>> = HashMap::new();
296    let mut files_indexed = 0usize;
297    let mut files_skipped = 0usize;
298    let mut files_failed = 0usize;
299    for outcome in buckets.into_iter().flatten() {
300        match outcome {
301            Outcome::Indexed(fw) => {
302                for (d, q) in &fw.named {
303                    def_index.entry(d.name.clone()).or_default().push(q.clone());
304                }
305                work.push(*fw);
306                files_indexed += 1;
307            }
308            Outcome::Skipped => files_skipped += 1,
309            Outcome::Failed => files_failed += 1,
310            Outcome::Unsupported => {}
311        }
312    }
313
314    // Write phase (serial, single writer). Streamed in `WRITE_BATCH` chunks so
315    // the transient entity/relation buffers stay bounded regardless of repo
316    // size; the parsed `work` is the only large allocation. Entities are written
317    // in full *before* any relation, since relations resolve their endpoints by
318    // name and would silently drop against a not-yet-written entity.
319
320    // Pass 1: purge changed files and write all entities.
321    let mut ebuf: Vec<Entity> = Vec::with_capacity(WRITE_BATCH);
322    let mut symbols = 0usize;
323    for fw in &work {
324        if fw.existed {
325            kg.code_purge_file(&fw.rel)?;
326        }
327        ebuf.push(Entity {
328            name: fw.rel.clone(),
329            entity_type: "code:file".into(),
330            observations: vec![
331                format!("lang: {}", fw.lang),
332                format!("hash: {}", fw.hash),
333                format!("symbols: {}", fw.named.len()),
334                format!("indexed_at: {now}"),
335            ],
336        });
337        for (d, q) in &fw.named {
338            let mut obs = vec![
339                format!("kind: {}", d.kind),
340                format!("lang: {}", fw.lang),
341                format!("file: {}", fw.rel),
342                format!("lines: {}-{}", d.line_start, d.line_end),
343                format!("signature: {}", d.signature),
344            ];
345            if let Some(doc) = &d.doc {
346                obs.push(format!("doc: {doc}"));
347            }
348            ebuf.push(Entity {
349                name: q.clone(),
350                entity_type: format!("code:{}", d.kind),
351                observations: obs,
352            });
353            symbols += 1;
354        }
355        if ebuf.len() >= WRITE_BATCH {
356            kg.upsert_entities(&ebuf)?;
357            ebuf.clear();
358        }
359    }
360    if !ebuf.is_empty() {
361        kg.upsert_entities(&ebuf)?;
362    }
363
364    // Pass 2: write `defines` edges and unambiguously-resolved call edges.
365    let mut rbuf: Vec<Relation> = Vec::with_capacity(WRITE_BATCH);
366    let mut rel_seen: HashSet<(String, String, &'static str)> = HashSet::new();
367    let mut relation_count = 0usize;
368    for fw in &work {
369        let file_entity = &fw.rel;
370        for (_, q) in &fw.named {
371            rbuf.push(Relation {
372                from: file_entity.clone(),
373                to: q.clone(),
374                relation_type: "defines".into(),
375            });
376            relation_count += 1;
377        }
378        for r in &fw.refs {
379            let Some(targets) = def_index.get(&r.name) else { continue };
380            if targets.len() != 1 {
381                continue; // ambiguous or unresolved — drop (no false edges)
382            }
383            let callee = &targets[0];
384            let caller = enclosing(&fw.named, r.line)
385                .map(|q| q.to_string())
386                .unwrap_or_else(|| file_entity.clone());
387            if &caller == callee {
388                continue;
389            }
390            let rtype: &'static str = if r.kind == "call" { "calls" } else { "references" };
391            if !rel_seen.insert((caller.clone(), callee.clone(), rtype)) {
392                continue;
393            }
394            rbuf.push(Relation {
395                from: caller,
396                to: callee.clone(),
397                relation_type: rtype.into(),
398            });
399            relation_count += 1;
400        }
401        if rbuf.len() >= WRITE_BATCH {
402            kg.create_relations(&rbuf)?;
403            rbuf.clear();
404        }
405    }
406    if !rbuf.is_empty() {
407        kg.create_relations(&rbuf)?;
408    }
409
410    to_json(&json!({
411        "files_indexed": files_indexed,
412        "files_skipped": files_skipped,
413        "files_failed": files_failed,
414        "symbols": symbols,
415        "relations": relation_count,
416    }))
417}
418
419/// Smallest-span definition whose line range encloses `line`, if any.
420fn enclosing(named: &[(Def, String)], line: usize) -> Option<&str> {
421    named
422        .iter()
423        .filter(|(d, _)| d.line_start <= line && line <= d.line_end)
424        .min_by_key(|(d, _)| d.line_end - d.line_start)
425        .map(|(_, q)| q.as_str())
426}
427
428// ---------------------------------------------------------------------------
429// code_outline
430// ---------------------------------------------------------------------------
431
432pub fn handle_code_outline(args: Option<&Value>) -> Result<Value> {
433    let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
434    let file = params
435        .get("file")
436        .and_then(|v| v.as_str())
437        .ok_or_else(|| MCSError::InvalidParams("Missing 'file' parameter".into()))?;
438    let file = file.replace('\\', "/");
439    let project = project_of(params)?;
440    let kg = crate::code_registry::resolve(&project)?;
441    let kg = kg.as_ref();
442
443    // Map the caller's path to the stored entity name. A relative path is
444    // already repo-relative (matches the stored name); an absolute path is
445    // canonicalized + based exactly as the indexer does.
446    let lookup = lookup_file_name(&file);
447    let defines = kg.search_relations(Some(&lookup), None, Some("defines"), Some(MAX_SYMBOLS_PER_FILE));
448    let names: Vec<String> = defines.into_iter().map(|r| r.to).collect();
449    if names.is_empty() {
450        return to_json(&json!({
451            "file": file,
452            "symbols": [],
453            "note": "no symbols indexed for this file; run code_index first",
454        }));
455    }
456    let mut rows: Vec<Value> = kg
457        .batch_get_entities(&names)
458        .into_iter()
459        .flatten()
460        .map(|e| symbol_row(&e))
461        .collect();
462    // Order by starting line for a readable outline.
463    rows.sort_by_key(|r| {
464        r.get("lines")
465            .and_then(|v| v.as_str())
466            .and_then(|s| s.split('-').next())
467            .and_then(|s| s.parse::<u64>().ok())
468            .unwrap_or(0)
469    });
470
471    to_json(&json!({ "file": file, "symbols": rows }))
472}
473
474// ---------------------------------------------------------------------------
475// code_search
476// ---------------------------------------------------------------------------
477
478pub fn handle_code_search(args: Option<&Value>) -> Result<Value> {
479    let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
480    let query = params
481        .get("query")
482        .and_then(|v| v.as_str())
483        .ok_or_else(|| MCSError::InvalidParams("Missing 'query' parameter".into()))?;
484    let kind = params.get("kind").and_then(|v| v.as_str()).filter(|s| !s.is_empty());
485    let lang = params.get("lang").and_then(|v| v.as_str()).filter(|s| !s.is_empty());
486    let project = project_of(params)?;
487    let kg = crate::code_registry::resolve(&project)?;
488    let kg = kg.as_ref();
489    let limit = params
490        .get("limit")
491        .and_then(|v| v.as_u64())
492        .map(|n| n as usize)
493        .unwrap_or(DEFAULT_SEARCH_LIMIT)
494        .clamp(1, MAX_SEARCH_LIMIT);
495
496    // Over-fetch then drop file entities / apply kind+lang filters (search has a
497    // single-type filter). Project scoping is implicit in the per-project DB.
498    let raw = kg.search_nodes_filtered(query, None, 0, limit.saturating_mul(5).min(1000));
499    let rows: Vec<Value> = raw
500        .into_iter()
501        .filter(|e| e.entity_type != "code:file")
502        .filter(|e| kind.is_none_or(|k| kind_of(e) == k))
503        .filter(|e| lang.is_none_or(|l| obs_val(e, "lang") == Some(l)))
504        .take(limit)
505        .map(|e| symbol_row(&e))
506        .collect();
507
508    to_json(&json!({ "results": rows }))
509}
510
511// ---------------------------------------------------------------------------
512// code_get_symbol
513// ---------------------------------------------------------------------------
514
515pub fn handle_code_get_symbol(args: Option<&Value>) -> Result<Value> {
516    let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
517    let name = params
518        .get("name")
519        .and_then(|v| v.as_str())
520        .ok_or_else(|| MCSError::InvalidParams("Missing 'name' parameter".into()))?;
521    let project = project_of(params)?;
522    let kg = crate::code_registry::resolve(&project)?;
523    let kg = kg.as_ref();
524
525    // Resolve within the project DB: exact (fully-qualified) name first, else
526    // fuzzy by bare name suffix.
527    let mut matches: Vec<Entity> = Vec::new();
528    if let Ok(Some(e)) = kg.get_entity(name)
529        && is_code_entity(&e)
530    {
531        matches.push(e);
532    }
533    if matches.is_empty() {
534        let suffix = format!("::{name}");
535        matches = kg
536            .search_nodes_filtered(name, None, 0, 200)
537            .into_iter()
538            .filter(is_code_entity)
539            .filter(|e| e.name.ends_with(&suffix))
540            .take(10)
541            .collect();
542    }
543    if matches.is_empty() {
544        return Err(MCSError::InvalidParams(format!(
545            "No code symbol matching '{name}' (run code_index first?)"
546        )));
547    }
548
549    let edge_types = ["calls", "references"];
550    let results: Vec<Value> = matches
551        .iter()
552        .map(|e| {
553            let mut callers: Vec<String> = Vec::new();
554            let mut callees: Vec<String> = Vec::new();
555            for t in edge_types {
556                for r in kg.search_relations(None, Some(&e.name), Some(t), Some(MAX_EDGES_RETURNED)) {
557                    callers.push(r.from);
558                }
559                for r in kg.search_relations(Some(&e.name), None, Some(t), Some(MAX_EDGES_RETURNED)) {
560                    callees.push(r.to);
561                }
562            }
563            callers.truncate(MAX_EDGES_RETURNED);
564            callees.truncate(MAX_EDGES_RETURNED);
565            let mut row = symbol_row(e);
566            row["callers"] = json!(callers);
567            row["callees"] = json!(callees);
568            row
569        })
570        .collect();
571
572    if results.len() == 1 {
573        to_json(&results.into_iter().next().unwrap())
574    } else {
575        to_json(&json!({ "matches": results }))
576    }
577}
578
579/// Start watching a project directory for file changes and re-index on
580/// modification. Spawns a background thread that monitors the directory
581/// tree with a debounced file-watcher. The initial index runs synchronously
582/// before returning.
583///
584/// The background thread holds the project's `Arc<GraphHandle>` (resolved from
585/// [`crate::code_registry`]) for its lifetime, pinning the canonical instance
586/// so re-index calls share one entity cache.
587pub fn handle_code_watch(args: Option<&Value>) -> Result<Value> {
588    let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
589    let path = params
590        .get("path")
591        .and_then(|v| v.as_str())
592        .ok_or_else(|| MCSError::InvalidParams("Missing 'path' parameter".into()))?;
593    let project = project_of(params)?;
594    let force = params.get("force").and_then(|v| v.as_bool()).unwrap_or(false);
595
596    let root = std::path::PathBuf::from(path);
597    if !root.exists() {
598        return Err(MCSError::InvalidParams(format!("Path not found: {path}")));
599    }
600    // Watch the canonicalized root so OS events carry the same (symlink-resolved)
601    // paths the indexer stored, keeping incremental updates and deletes aligned.
602    let root = root.canonicalize().unwrap_or(root);
603    let watch_path = root.to_string_lossy().to_string();
604
605    // Initial index immediately (also opens/warms the project DB).
606    let index_args = json!({
607        "path": &watch_path,
608        "project": project,
609        "force": force,
610    });
611    let _ = handle_code_index(Some(&index_args))?;
612
613    // Pin the canonical handle and spawn the background watcher.
614    let kg_arc = crate::code_registry::resolve(&project)?;
615    crate::watcher::spawn_watcher(kg_arc, watch_path.clone(), &project);
616
617    to_json(&json!({
618        "status": "watching",
619        "project": project,
620        "path": watch_path,
621    }))
622}