mnem-mcp 0.1.3

Model Context Protocol server for mnem - the AI-native, local-first memory substrate for agents.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
//! Static MCP tool description table.
//!
//! Extracted from `tools.rs` in R3. `all_tools(allow_labels)` is the
//! single entry point the MCP server calls to advertise its tool list.

use serde_json::json;

use crate::protocol::ToolDef;

/// Build the tool list.
///
/// The advertised schemas are **stable**: they do NOT mutate based on
/// `allow_labels` / `MNEM_BENCH`. This is a post-audit guarantee - a
/// public API surface that changes shape based on a runtime env var
/// is not a public API. Every schema always exposes the full set of
/// fields (including `label` / `ntype`). The `MNEM_BENCH` gate is
/// still enforced at the **handler** layer (see `handlers/*.rs`): when
/// the gate is off, caller-supplied `label` / `ntype` is silently
/// coerced to `Node::DEFAULT_NTYPE`. Schema introspection therefore
/// always shows the full surface; the handler side is the boundary.
///
/// The `allow_labels` parameter is retained as `pub fn` signature for
/// source + binary compat with callers that already thread it through;
/// the value is ignored at schema-build time.
pub fn all_tools(allow_labels: bool) -> Vec<ToolDef> {
    // Retained to preserve the public signature post-audit; handlers
    // are where the label/ntype gate is enforced.
    let _ = allow_labels;

    let search_schema = json!({
        "type": "object",
        "properties": {
            "label":         { "type": "string", "description": "Node label (e.g. 'Person'). Honoured by default. Set MNEM_LABELS=0 (or legacy MNEM_BENCH=0) at server launch to force every label to Node::DEFAULT_NTYPE." },
            "where":         { "type": "object", "description": "Optional prop-equality filter, e.g. {\"name\": \"Alice\"}. Single property only in this version." },
            "with_outgoing": { "type": "array", "items": { "type": "string" }, "description": "Edge labels to include on each hit." },
            "limit":         { "type": "integer", "minimum": 1, "maximum": 500, "default": 10 }
        },
        "additionalProperties": false
    });

    let commit_nodes_item_schema = json!({
        "type": "object",
        "properties": {
            "ntype":   { "type": "string", "description": "Node type / label. Honoured by default. Set MNEM_LABELS=0 (or legacy MNEM_BENCH=0) at server launch to force the handler to substitute Node::DEFAULT_NTYPE." },
            "summary": { "type": "string", "description": "Short LLM-facing summary. Indexed by text + retrieve." },
            "props":   { "type": "object" },
            "content": { "type": "string", "description": "Optional text/markdown body (UTF-8)." }
        },
        "additionalProperties": false
    });

    let list_nodes_schema = json!({
        "type": "object",
        "properties": {
            "label":  { "type": "string", "description": "Optional label (ntype) filter. Honoured by default. Set MNEM_LABELS=0 (or legacy MNEM_BENCH=0) at server launch to force the filter to be silently dropped." },
            "limit":  { "type": "integer", "minimum": 1, "maximum": 1000, "default": 50 },
            "offset": { "type": "integer", "minimum": 0, "default": 0 }
        },
        "additionalProperties": false
    });

    // `mnem_resolve_or_create`: `label` is load-bearing for the tool's
    // semantics ("find-or-create by (label, prop_name) == value"). We
    // always advertise it. When the server is not launched under
    // MNEM_BENCH=1, the handler substitutes Node::DEFAULT_NTYPE for any
    // caller-supplied label.
    // audit-2026-04-25 C3-10: accept a friendly `{name, kind}`
    // shape as an alias for `{prop_name: "name", value: <name>,
    // label: <kind>}`. Most agent callers think in (entity-name,
    // entity-type) terms; the canonical (label, prop_name, value)
    // shape stays available for callers that anchor on a different
    // property (e.g. `email`, `slug`). `agent_id` defaults to
    // "mnem mcp" so the alias path is callable end-to-end without
    // extra fields.
    let resolve_or_create_schema = json!({
        "type": "object",
        "properties": {
            "label":     { "type": "string", "description": "Node label / kind. Honoured by default. Set MNEM_LABELS=0 (or legacy MNEM_BENCH=0) at server launch to force the handler to substitute Node::DEFAULT_NTYPE." },
            "kind":      { "type": "string", "description": "Alias for `label`. Pick one." },
            "prop_name": { "type": "string", "description": "Property to anchor the find-or-create on. Defaults to `name` when the `name` alias is used." },
            "name":      { "type": "string", "description": "Alias for the natural-language entity name. When set, `prop_name` defaults to \"name\" and `value` defaults to this string." },
            "value":     { "description": "String, number, bool, or JSON object/array. Canonicalised before indexing." },
            "agent_id":  { "type": "string", "description": "Commit author. Defaults to 'mnem mcp' when absent." },
            "task_id":   { "type": "string" },
            "extra_props": { "type": "object", "description": "Additional properties to set if the node has to be created." },
            "global": { "type": "boolean", "description": "When true, also resolve-or-create the same entity in the global graph (~/.mnemglobal/.mnem/) and stamp its UUID as `_global_anchor` on the local node. Best-effort: silently skipped if the global graph has not been initialised." }
        },
        "additionalProperties": false
    });

    let retrieve_schema = json!({
        "type": "object",
        "properties": {
            "label":        { "type": "string", "description": "Label filter. Honoured by default. Set MNEM_LABELS=0 (or legacy MNEM_BENCH=0) at server launch to force the filter to be silently dropped." },
            "where":        { "type": "object", "description": "Optional single-property equality gate, e.g. {\"team\": \"eng\"}." },
            "text":         { "type": "string", "description": "Query text. Retained so a reranker can read (query, candidate) pairs jointly. For retrieval proper, pass a `vector` in the matching embed model or configure the sparse lane separately." },
            "vector":       {
                "type": "object",
                "properties": {
                    "model":  { "type": "string", "minLength": 1 },
                    "values": { "type": "array", "items": { "type": "number" }, "minItems": 1 }
                },
                "required": ["model", "values"],
                "additionalProperties": false
            },
            "token_budget": { "type": "integer", "minimum": 0, "description": "Max rendered-text tokens to return. Default: unlimited." },
            "limit":        { "type": "integer", "minimum": 1, "description": "Max items to return, independent of the token budget. No hard ceiling; callers own back-pressure." },
            "vector_cap":   { "type": "integer", "minimum": 1, "description": "Override the per-lane cap on vector candidates (default: retriever-built-in). Raising it lets rerank / graph-expand see more of the long tail." },
            "rerank_top_k": { "type": "integer", "minimum": 1, "description": "If a reranker is wired in via the host config, how many fused candidates to rerank. Has no effect without a reranker." },
            "fusion":       { "type": "string", "enum": ["convex_min_max", "rrf"], "description": "Rank-fusion strategy over the lane outputs. `convex_min_max` (default) per Bruch 2023; `rrf` for the classic Reciprocal Rank Fusion baseline." },
            "graph_expand": { "type": "integer", "minimum": 1, "description": "Enable graph-expand: after hybrid fusion produces a top-K, traverse authored edges up to this many frontier nodes. Disables when absent." },
            "graph_decay":  { "type": "number", "minimum": 0.0, "maximum": 1.0, "description": "Score decay applied per hop during graph-expand. Default preserves retriever built-in." },
            "graph_depth":  { "type": "integer", "minimum": 1, "maximum": 4, "description": "Multi-hop traversal depth. 1 = single-hop; 2+ for MuSiQue-style compositional queries. Clamped to [1, 4]." },
            "graph_etype":  { "type": "array", "items": { "type": "string" }, "description": "Edge-type allowlist for graph-expand. Empty / absent means all edge types." },
            "graph_max_per_seed": { "type": "integer", "minimum": 1, "description": "Per-seed outgoing-edge cap: prevents a hot-seed node from starving siblings in the global graph_expand budget." },
            "graph_mode":   { "type": "string", "enum": ["decay", "ppr"], "description": "Graph-expand strategy. `decay` (default) = historical BFS with decay^depth scoring; `ppr` = personalised PageRank over the hybrid adjacency index (E2+). PPR falls through to decay when no adjacency index is wired." },
            "ppr_damping":  { "type": "number", "minimum": 0.0, "maximum": 0.999, "description": "PPR damping factor. Default 0.85. Ignored unless graph_mode = \"ppr\"." },
            "ppr_iter":     { "type": "integer", "minimum": 1, "description": "PPR power-iteration cap. Default 15. Ignored unless graph_mode = \"ppr\"." }
        },
        "additionalProperties": false
    });

    #[cfg_attr(not(feature = "summarize"), allow(unused_mut))]
    let mut tools: Vec<ToolDef> = vec![
        ToolDef {
            name: "mnem_stats",
            description: "Repository overview: op-head, head commit, ref summary, known labels. \
                          Cheap; call this first to discover what a repo contains.",
            input_schema: json!({
                "type": "object",
                "properties": {},
                "required": [],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_schema",
            description: "List every node label and edge label present in the current commit, \
                          along with the property names the IndexSet has built for each label. \
                          Agents use this to write well-scoped queries.",
            input_schema: json!({
                "type": "object",
                "properties": {},
                "required": [],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_search",
            description: "Search for nodes. Uses the indexed path when a label + exact property \
                          match is specified; falls back to label-scoped scan or full scan \
                          otherwise. Optionally include each hit's outgoing edges of named \
                          labels.",
            input_schema: search_schema,
        },
        ToolDef {
            name: "mnem_get_node",
            description: "Fetch a single node by UUID (as returned by mnem_search / mnem_commit). \
                          Returns full props + content size + outgoing edge count.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "id": { "type": "string", "description": "Node UUID (hyphenated form)." }
                },
                "required": ["id"],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_traverse",
            description: "From a start node, list outgoing neighbours reachable via specified \
                          edge labels. One-hop only in this version; deeper traversal lands in a future version.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "start":       { "type": "string", "description": "Start node UUID." },
                    "edge_labels": { "type": "array", "items": { "type": "string" }, "description": "Edge labels to follow." },
                    "limit":       { "type": "integer", "minimum": 1, "maximum": 200, "default": 25 }
                },
                "required": ["start"],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_commit",
            description: "Add nodes and/or edges as a single commit. `agent_id` (required) is \
                          stored as the Commit author. `task_id` is accepted and reserved for \
                          future Operation.task_id plumbing (tracked in ); today it is \
                          not persisted. Returns the new op-id, commit CID, and created node UUIDs.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "agent_id": { "type": "string", "description": "Required. Stored as the Commit author." },
                    "task_id":  { "type": "string", "description": "Reserved. Accepted but not yet persisted ." },
                    "message":  { "type": "string", "default": "" },
                    "nodes":    {
                        "type": "array",
                        "items": commit_nodes_item_schema
                    },
                    "edges": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "etype": { "type": "string" },
                                "src":   { "type": "string", "description": "Source node UUID." },
                                "dst":   { "type": "string", "description": "Destination node UUID." },
                                "props": { "type": "object" }
                            },
                            "required": ["etype", "src", "dst"],
                            "additionalProperties": false
                        }
                    }
                },
                "required": ["agent_id"],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_commit_relation",
            description: "Compound write: resolve-or-create a subject node, resolve-or-create an \
                          object node, and connect them with a typed edge - all in one commit. \
                          Audit fix G6 (2026-04-25): collapses the 3-tool dance \
                          (resolve_or_create + resolve_or_create + commit-edge) that an LLM under \
                          no specific instruction was unlikely to perform fully, leaving the graph \
                          flat. Anchor property defaults to `name`; pass `anchor` to switch to \
                          `email` / `slug` / `id`. Typical call: \
                          {\"subject\": \"Alice\", \"subject_kind\": \"Entity:Person\", \
                          \"predicate\": \"works_at\", \"object\": \"Globex\", \
                          \"object_kind\": \"Entity:Organization\"}.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "subject":      { "type": "string", "description": "Subject entity natural-language name (the value of the anchor property)." },
                    "subject_kind": { "type": "string", "description": "Subject ntype (e.g. 'Entity:Person'). Honoured when labels are enabled (default); otherwise the handler substitutes Node::DEFAULT_NTYPE." },
                    "predicate":    { "type": "string", "description": "Edge type (e.g. 'works_at', 'lives_in', 'has_preference')." },
                    "object":       { "type": "string", "description": "Object entity natural-language name (the value of the anchor property)." },
                    "object_kind":  { "type": "string", "description": "Object ntype (e.g. 'Entity:Organization'). Honoured when labels are enabled (default); otherwise the handler substitutes Node::DEFAULT_NTYPE." },
                    "anchor":       { "type": "string", "default": "name", "description": "Property name to anchor the resolve_or_create on. Defaults to `name`." },
                    "subject_props":{ "type": "object", "description": "Optional extra props to set on the subject node." },
                    "object_props": { "type": "object", "description": "Optional extra props to set on the object node." },
                    "edge_props":   { "type": "object", "description": "Optional props to set on the edge." },
                    "agent_id":     { "type": "string", "description": "Commit author. Defaults to 'mnem mcp' when absent." },
                    "message":      { "type": "string", "default": "mnem_mcp commit_relation" }
                },
                "required": ["subject", "predicate", "object"],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_delete_node",
            description: "Remove a node from the current head. Commits a new op with the removal. \
                          The node is no longer reachable from the new commit's node tree, but its \
                          prior CID and any prior commits that referenced it remain addressable \
                          (mnem's history is append-only). Edges incident to the node are NOT \
                          auto-removed; delete them explicitly or via a future cascade flag.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "id":        { "type": "string", "description": "Node UUID to remove." },
                    "agent_id":  { "type": "string", "description": "Required. Stored as the Commit author." },
                    "message":   { "type": "string", "default": "mnem_mcp delete" }
                },
                "required": ["id", "agent_id"],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_tombstone_node",
            description: "Logically \"forget\" a node without deleting its content. Unlike \
                          mnem_delete_node this does NOT remove the node from the node tree - the \
                          node's CID stays stable and any prior edges / commits that reference \
                          it remain intact. What changes is that subsequent retrieves filter the \
                          node out by default (agent can no longer see the memory). Use this when \
                          a user says \"forget X\" or revokes consent; use mnem_delete_node only \
                          when the goal is to free storage, not memory hygiene. Errors if the \
                          node does not exist or has already been tombstoned.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "id":        { "type": "string", "description": "Node UUID to tombstone." },
                    "reason":    { "type": "string", "description": "Free-form reason recorded on the tombstone (e.g. the user's own phrasing)." },
                    "agent_id":  { "type": "string", "description": "Required. Stored as the Commit author." },
                    "message":   { "type": "string", "default": "mnem_mcp tombstone" }
                },
                "required": ["id", "agent_id"],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_list_nodes",
            description: "Enumerate nodes at the current head, optionally filtered by label. \
                          Returns UUID + label + optional summary per node. Cheap discovery tool \
                          an agent can call before composing a retrieval: lets it see what's in \
                          the repo without a text-search guess.",
            input_schema: list_nodes_schema,
        },
        ToolDef {
            name: "mnem_resolve_or_create",
            description: "Find-or-create a node by a primary-key property. Accepts EITHER the \
                          friendly `{name: \"Alice\", kind: \"Person\"}` shape (anchors on the \
                          `name` property) OR the canonical \
                          `{prop_name: \"email\", value: \"a@x\", label: \"Person\"}` shape \
                          (anchors on whatever property you choose). If a node with the same \
                          (label, anchor-property) == value already exists, its UUID is \
                          returned; otherwise a new node is committed. Prevents the duplicate-\
                          entity problem agents hit when the same fact is re-asserted across \
                          tool calls. audit-2026-04-25 C3-10: `name`/`kind` aliases added.",
            input_schema: resolve_or_create_schema,
        },
        ToolDef {
            name: "mnem_recent",
            description: "Walk the op-log from the current head backwards. Returns the last N \
                          operations with time, author, agent_id, task_id, and one-line message.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 10 }
                },
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_vector_search",
            description: "Cosine-similarity nearest-neighbour search over stored node embeddings. \
                          Pass the embedding-model identifier and a query vector; receive the \
                          top-k matches. Nodes whose embedding.model differs from the query are \
                          silently skipped - each index binds to one (model, dim).",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "model":  { "type": "string", "minLength": 1 },
                    "vector": { "type": "array", "items": { "type": "number" }, "minItems": 1 },
                    "k":      { "type": "integer", "minimum": 1, "maximum": 500, "default": 10 }
                },
                "required": ["model", "vector"],
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_retrieve",
            description: "Composite retrieval: combines label + prop-eq filters with cosine \
                          vector search and (optionally) the learned-sparse lane, fuses ranked \
                          lists via min-max convex combination (Bruch 2023) or RRF, optionally \
                          runs multi-hop graph expansion over the authored edges, and greedily \
                          packs rendered nodes under a token budget. Use this as the default \
                          tool when assembling LLM context: it returns nodes pre-rendered to \
                          text plus tokens_used / dropped / candidates_seen metadata so you \
                          know whether the budget was tight. All retrieval knobs exposed by \
                          POST /v1/retrieve are available here so MCP callers reach parity \
                          with the HTTP surface.",
            input_schema: retrieve_schema,
        },
        ToolDef {
            name: "mnem_ingest",
            description: "Ingest a source as a Doc + Chunk + Entity subgraph. Accepts EITHER \
                          {path: \"<file>\"} (server reads the file from disk) OR \
                          {text: \"...\", source?: \"label\"} (caller has already buffered the \
                          document). Runs parse + chunk + rule-based-NER and commits in one \
                          transaction. Chunker choice: 'auto' (picks per source kind), \
                          'paragraph' (blank-line split, best for markdown), 'recursive' \
                          (token-budgeted sliding window, best for PDFs), 'session' (groups \
                          conversation messages). Typical calls: \
                          {\"path\": \"notes.md\"}, \
                          {\"path\": \"book.pdf\", \"chunker\": \"recursive\", \"max_tokens\": 1024}, \
                          {\"text\": \"Alice met Bob.\", \"source\": \"convo-2026-04-25\"}. \
                          File / text size is capped at 32 MiB and max_tokens at 8192 for DoS \
                          resistance. Returns commit_cid plus per-run node / chunk / entity / \
                          relation counts. audit-2026-04-25 C3-8: schema accepts both shapes.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "path":       { "type": "string", "description": "Absolute or relative path to the source file on the MCP server's filesystem. Mutually exclusive with `text`." },
                    "text":       { "type": "string", "description": "Inline document body. Use this when the caller already has the bytes; mutually exclusive with `path`." },
                    "source":     { "type": "string", "description": "Cosmetic label rendered as the `path:` field in the output when ingesting via `text`. Defaults to 'inline-text'." },
                    "ntype":      { "type": "string", "description": "Root Doc node label (default 'Doc').", "default": "Doc" },
                    "chunker":    { "type": "string", "enum": ["auto", "paragraph", "recursive", "session"], "default": "auto" },
                    "max_tokens": { "type": "integer", "minimum": 1, "maximum": 8192, "default": 512 },
                    "overlap":    { "type": "integer", "minimum": 0, "maximum": 8192, "default": 32 },
                    "agent_id":   { "type": "string", "description": "Commit author. Defaults to 'mnem mcp' when absent." },
                    "message":    { "type": "string", "default": "mnem_mcp ingest" }
                },
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_global_retrieve",
            description: "Semantic search on the global anchor graph (~/.mnemglobal/.mnem/) only. \
                          Always targets the global graph regardless of which repo the MCP server \
                          is pointed at. Use this when you explicitly want to read from the shared \
                          cross-session memory store. Use mnem_retrieve for the current local repo.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "text":         { "type": "string", "description": "Query text. Passed as BM25/rerank input and optionally auto-embedded when an embedder is configured." },
                    "vector":       {
                        "type": "object",
                        "description": "Pre-computed query vector.",
                        "properties": {
                            "model":  { "type": "string" },
                            "values": { "type": "array", "items": { "type": "number" } }
                        },
                        "required": ["model", "values"],
                        "additionalProperties": false
                    },
                    "limit":        { "type": "integer", "minimum": 1, "maximum": 1000, "default": 10, "description": "Max results to return." },
                    "token_budget": { "type": "integer", "minimum": 1, "description": "Soft token cap on total rendered output." }
                },
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_global_ingest",
            description: "Ingest a source as a Doc + Chunk + Entity subgraph directly into the \
                          global anchor graph (~/.mnemglobal/.mnem/). Always targets the global \
                          graph regardless of which repo the MCP server is pointed at. Accepts \
                          EITHER {path: \"<file>\"} (server reads the file from disk) OR \
                          {text: \"...\", source?: \"label\"} (caller has already buffered the \
                          document). Same chunker options as mnem_ingest. Use this for documents \
                          that should be queryable across all sessions and projects.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "path":       { "type": "string", "description": "Absolute or relative path to the source file on the MCP server's filesystem. Mutually exclusive with `text`." },
                    "text":       { "type": "string", "description": "Inline document body. Mutually exclusive with `path`." },
                    "source":     { "type": "string", "description": "Cosmetic label for the `path:` field when ingesting via `text`. Defaults to 'inline-text'." },
                    "ntype":      { "type": "string", "description": "Root Doc node label (default 'Doc').", "default": "Doc" },
                    "chunker":    { "type": "string", "enum": ["auto", "paragraph", "recursive", "session"], "default": "auto" },
                    "max_tokens": { "type": "integer", "minimum": 1, "maximum": 8192, "default": 512 },
                    "overlap":    { "type": "integer", "minimum": 0, "maximum": 8192, "default": 32 },
                    "agent_id":   { "type": "string", "description": "Commit author. Defaults to 'mnem mcp' when absent." },
                    "message":    { "type": "string", "default": "mnem_mcp global_ingest" }
                },
                "additionalProperties": false
            }),
        },
        ToolDef {
            name: "mnem_global_add",
            description: "Write nodes and/or edges directly to the global graph \
                          (~/.mnemglobal/.mnem/). Use this when an entity or fact should \
                          belong to the shared cross-repo graph rather than (or in addition \
                          to) the current local repo. Typical use: named entities \
                          (people, orgs, places) that appear across multiple projects.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "nodes": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "ntype":   { "type": "string", "description": "Node label (e.g. 'Entity:Person'). Defaults to Node::DEFAULT_NTYPE." },
                                "summary": { "type": "string", "description": "Human-readable summary sentence." },
                                "props":   { "type": "object", "description": "Arbitrary key/value props." }
                            },
                            "additionalProperties": false
                        }
                    },
                    "edges": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "src":       { "type": "string", "description": "Source node UUID." },
                                "predicate": { "type": "string", "description": "Edge label (e.g. 'works_at')." },
                                "dst":       { "type": "string", "description": "Destination node UUID." }
                            },
                            "required": ["src", "predicate", "dst"],
                            "additionalProperties": false
                        }
                    },
                    "agent_id": { "type": "string", "description": "Commit author. Defaults to 'mnem mcp' when absent." },
                    "message":  { "type": "string", "default": "mnem_mcp global_add" }
                },
                "additionalProperties": false
            }),
        },
    ];
    // C3 FIX-5: community_summarize is the only tool that pulls the
    // embed-providers tree into the MCP binary. Hide it behind the
    // `summarize` feature so default builds stay lean (~2.3 MiB saving).
    #[cfg(feature = "summarize")]
    {
        // E4 T2: extractive community summarizer. No LLM, no BM25;
        // reuses the embedder the server already uses for retrieve.
        tools.push(ToolDef {
            name: "mnem_community_summarize",
            description: "Extractive Centroid + MMR summarizer over a caller-supplied set of node \
                          UUIDs. Looks up each node's `summary` field, embeds the collected \
                          sentences through the server's configured embedder (MNEM_EMBED_* env \
                          vars or `[embed]` in <repo>/config.toml), and picks `k` sentences \
                          balancing proximity to the community centroid against MMR diversity. \
                          No LLM call, no rewrite: the returned sentences are verbatim slices \
                          from the input summaries. Optional `query` biases selection toward \
                          query-relevant sentences. This is the MCP mirror of POST /v1/retrieve \
                          with `summarize: true`, except you choose the node set directly \
                          (typical callers: a Leiden-community node list, or a hand-curated \
                          subgraph). Degree-centrality fallback is uniform today; PPR slots in \
                          unchanged once E2 lands.",
            input_schema: json!({
                "type": "object",
                "properties": {
                    "node_ids":   {
                        "type": "array",
                        "items": { "type": "string" },
                        "minItems": 1,
                        "maxItems": 10000,
                        "description": "Node UUIDs (as produced by other tool outputs)."
                    },
                    "query":      {
                        "type": "string",
                        "description": "Optional query text. When set, biases sentence selection toward query-relevance (beta=0.3 in the Centroid+MMR weighting)."
                    },
                    "k":          {
                        "type": "integer",
                        "minimum": 0,
                        "maximum": 1000,
                        "default": 3,
                        "description": "Max number of sentences to return. Clamped to min(k, sentences)."
                    },
                    "mmr_lambda": {
                        "type": "number",
                        "minimum": 0.0,
                        "maximum": 1.0,
                        "default": 0.5,
                        "description": "MMR diversity weight. 0 = pure relevance, 1 = pure diversity."
                    }
                },
                "required": ["node_ids"],
                "additionalProperties": false
            }),
        });
    }
    tools
}