Skip to main content

mcp_methods/server/
manifest.rs

1//! YAML manifest schema + loader.
2//!
3//! A manifest is a YAML file declaring the tools, source roots, custom
4//! embedder, and trust gates the server should apply. The loader parses,
5//! validates, and returns a [`Manifest`]; consumers (CLI wiring, tool
6//! registration) operate on the validated structure.
7//!
8//! Path strings (`source_root`, `python:` tool paths, embedder module)
9//! are kept as the raw user input — relative-to-yaml resolution happens
10//! at the use site so the data stays pure and testable.
11//!
12//! Validation is fail-fast and user-facing: the caller surfaces
13//! [`ManifestError`] messages directly to the operator.
14//!
15//! Schema mirrors the Python `kglite.mcp_server.manifest` module 1:1 so
16//! a manifest written for the Python server boots unchanged on the new
17//! Rust server.
18
19// A handful of fields/helpers are exposed for downstream consumers
20// (e.g. kglite-mcp-server reads `CypherTool::cypher` directly when
21// registering manifest-declared tools) and so look unused from this
22// crate's perspective. Silence dead-code warnings rather than chase
23// every cross-crate use.
24#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34    "name",
35    "instructions",
36    "overview_prefix",
37    "source_root",
38    "source_roots",
39    "trust",
40    "tools",
41    "embedder",
42    "builtins",
43    "env_file",
44    "workspace",
45    "extensions",
46];
47const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
48const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
49const ALLOWED_TRUST_KEYS: &[&str] = &[
50    "allow_python_tools",
51    "allow_embedder",
52    "allow_query_preprocessor",
53];
54const ALLOWED_TOOL_KEYS: &[&str] = &[
55    "name",
56    "description",
57    "parameters",
58    "cypher",
59    "python",
60    "function",
61    "bundled",
62    "hidden",
63];
64const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
65const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
66const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
67
68#[derive(Debug, Error)]
69#[error("{path}: {message}")]
70pub struct ManifestError {
71    pub path: String,
72    pub message: String,
73}
74
75impl ManifestError {
76    pub fn at(path: &Path, message: impl Into<String>) -> Self {
77        Self {
78            path: path.display().to_string(),
79            message: message.into(),
80        }
81    }
82
83    pub fn bare(message: impl Into<String>) -> Self {
84        Self {
85            path: "<manifest>".to_string(),
86            message: message.into(),
87        }
88    }
89}
90
91#[derive(Debug, Default, Clone)]
92pub struct TrustConfig {
93    pub allow_python_tools: bool,
94    pub allow_embedder: bool,
95    /// Advisory gate: the manifest declares that an extension-defined
96    /// query preprocessor hook is permitted to run. The framework does
97    /// not parse or execute the preprocessor itself — it lives in the
98    /// opaque `extensions:` passthrough — but downstream consumers
99    /// (e.g. kglite-mcp-server) read this flag and refuse to boot the
100    /// hook when it is false. Same pattern as `allow_embedder`.
101    pub allow_query_preprocessor: bool,
102}
103
104#[derive(Debug, Clone)]
105pub enum ToolSpec {
106    Cypher(CypherTool),
107    Python(PythonTool),
108    /// Override the agent-facing surface of a bundled tool (one the
109    /// downstream binary provides natively — `cypher_query`,
110    /// `graph_overview`, `read_source`, etc.). The framework parses
111    /// the override but does not enforce that the named tool exists;
112    /// the downstream consumer (e.g. `kglite-mcp-server`) is
113    /// responsible for validating the name against its bundled
114    /// catalogue at boot time and applying the override when
115    /// emitting `tools/list`.
116    ///
117    /// Pre-0.3.31 the only customisation path for the bundled tool
118    /// surface was the manifest's global `instructions:` block —
119    /// useful for first-message orientation but not attached to
120    /// individual tools. Bundled overrides let operators rewrite a
121    /// specific tool's `description` (what the agent sees in
122    /// `tools/list`) or `hidden`-flag it out entirely.
123    Bundled(BundledOverride),
124}
125
126impl ToolSpec {
127    pub fn name(&self) -> &str {
128        match self {
129            ToolSpec::Cypher(t) => &t.name,
130            ToolSpec::Python(t) => &t.name,
131            ToolSpec::Bundled(t) => &t.name,
132        }
133    }
134}
135
136#[derive(Debug, Clone)]
137pub struct CypherTool {
138    pub name: String,
139    pub cypher: String,
140    pub description: Option<String>,
141    pub parameters: Option<serde_json::Value>,
142}
143
144#[derive(Debug, Clone)]
145pub struct PythonTool {
146    pub name: String,
147    pub python: String,
148    pub function: String,
149    pub description: Option<String>,
150    pub parameters: Option<serde_json::Value>,
151}
152
153#[derive(Debug, Clone)]
154pub struct BundledOverride {
155    /// Name of the bundled tool to override (e.g. `cypher_query`,
156    /// `repo_management`). Validation against the downstream
157    /// binary's actual catalogue happens at the consumer's boot
158    /// time — the framework only checks shape here.
159    pub name: String,
160    /// New agent-facing description that replaces the bundled
161    /// tool's default. `None` means "do not override; keep the
162    /// default."
163    pub description: Option<String>,
164    /// When true, the downstream consumer should omit this tool
165    /// from `tools/list` AND reject calls to it. Defaults to
166    /// false (visible).
167    pub hidden: bool,
168}
169
170#[derive(Debug, Clone)]
171pub struct EmbedderConfig {
172    pub module: String,
173    pub class: String,
174    pub kwargs: serde_json::Map<String, serde_json::Value>,
175}
176
177#[derive(Debug, Default, Clone)]
178pub struct BuiltinsConfig {
179    pub save_graph: bool,
180    pub temp_cleanup: TempCleanup,
181}
182
183#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
184pub enum TempCleanup {
185    #[default]
186    Never,
187    OnOverview,
188}
189
190impl TempCleanup {
191    pub fn as_str(&self) -> &'static str {
192        match self {
193            TempCleanup::Never => "never",
194            TempCleanup::OnOverview => "on_overview",
195        }
196    }
197}
198
199#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
200pub enum WorkspaceKind {
201    /// Clone-and-track GitHub repos. The default when no `workspace:`
202    /// block is set and the operator passed `--workspace DIR`.
203    #[default]
204    Github,
205    /// Bind a fixed local directory as the active source root. No
206    /// cloning happens; `set_root_dir(path)` swaps the active root.
207    Local,
208}
209
210impl WorkspaceKind {
211    pub fn as_str(&self) -> &'static str {
212        match self {
213            WorkspaceKind::Github => "github",
214            WorkspaceKind::Local => "local",
215        }
216    }
217}
218
219#[derive(Debug, Clone, Default)]
220pub struct WorkspaceConfig {
221    pub kind: WorkspaceKind,
222    /// Local-mode only: path to the directory to bind as the source
223    /// root. Relative paths resolve against the YAML's parent dir.
224    pub root: Option<String>,
225    /// Local-mode only: wire the framework's file watcher to `root`
226    /// (debounced rebuild trigger via the post-activate hook).
227    pub watch: bool,
228    /// Optional opt-in for the [`find_workspace_manifest`] parent-walk
229    /// fallback. When set, this manifest is auto-discovered by
230    /// ``mcp-server --workspace DIR`` (and similar callers) only when
231    /// ``DIR`` canonicalises to ``applies_to`` resolved against the
232    /// manifest's parent directory. When unset, the parent-walk
233    /// fallback NEVER fires for this manifest — operators must pass
234    /// ``--mcp-config`` explicitly.
235    ///
236    /// Eliminates the accidental-discovery footgun where a workspace
237    /// manifest is auto-picked-up by an unrelated sibling dir. The
238    /// manifest's own declaration is the opt-in.
239    pub applies_to: Option<String>,
240}
241
242#[derive(Debug, Clone)]
243pub struct Manifest {
244    pub yaml_path: PathBuf,
245    pub name: Option<String>,
246    pub instructions: Option<String>,
247    pub overview_prefix: Option<String>,
248    pub source_roots: Vec<String>,
249    pub trust: TrustConfig,
250    pub tools: Vec<ToolSpec>,
251    pub embedder: Option<EmbedderConfig>,
252    pub builtins: BuiltinsConfig,
253    /// Optional explicit `.env` path (relative to the YAML or absolute).
254    /// When unset, the runtime walks upward from the start directory
255    /// looking for a `.env` file.
256    pub env_file: Option<String>,
257    /// Optional explicit workspace declaration. When set, this wins
258    /// over CLI `--workspace`/`--source-root` flags interpretation
259    /// (manifest is the source of truth — same rule as `source_root:`).
260    pub workspace: Option<WorkspaceConfig>,
261    /// Raw passthrough for downstream-binary-specific manifest keys.
262    /// The framework accepts any mapping under `extensions:` and stores
263    /// it here without validating the inner keys; downstream consumers
264    /// (e.g. kglite-mcp-server) read whatever they need from this map.
265    ///
266    /// This keeps the framework's strict-unknown-key validation strong
267    /// for the surfaces it owns (`builtins`, `workspace`, …) while
268    /// letting consumers add their own configuration namespace without
269    /// per-key framework round-trips.
270    pub extensions: serde_json::Map<String, serde_json::Value>,
271}
272
273impl Manifest {
274    /// JSON-friendly representation of the validated manifest for
275    /// FFI / RPC exposure (pyo3 wrappers, JSON-RPC bridges, etc.).
276    ///
277    /// The shape is stable across patch releases: fields can be added
278    /// non-breaking, but key renames or removals are breaking changes.
279    /// When adding a new field to `Manifest`, extend this method too —
280    /// the `to_json_shape_is_stable` test will fail until you do.
281    /// The `extensions` map is passed through unchanged; downstream
282    /// consumers parse their own namespace from it.
283    pub fn to_json(&self) -> serde_json::Value {
284        serde_json::json!({
285            "yaml_path": self.yaml_path.display().to_string(),
286            "name": self.name,
287            "instructions": self.instructions,
288            "overview_prefix": self.overview_prefix,
289            "source_roots": self.source_roots,
290            "trust": {
291                "allow_python_tools": self.trust.allow_python_tools,
292                "allow_embedder": self.trust.allow_embedder,
293                "allow_query_preprocessor": self.trust.allow_query_preprocessor,
294            },
295            "tools": self.tools.iter().map(|t| match t {
296                ToolSpec::Cypher(c) => serde_json::json!({
297                    "kind": "cypher",
298                    "name": c.name,
299                    "cypher": c.cypher,
300                    "description": c.description,
301                    "parameters": c.parameters,
302                }),
303                ToolSpec::Python(p) => serde_json::json!({
304                    "kind": "python",
305                    "name": p.name,
306                    "python": p.python,
307                    "function": p.function,
308                    "description": p.description,
309                    "parameters": p.parameters,
310                }),
311                ToolSpec::Bundled(b) => serde_json::json!({
312                    "kind": "bundled",
313                    "name": b.name,
314                    "description": b.description,
315                    "hidden": b.hidden,
316                }),
317            }).collect::<Vec<_>>(),
318            "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
319                "module": e.module,
320                "class": e.class,
321                "kwargs": e.kwargs,
322            })),
323            "builtins": {
324                "save_graph": self.builtins.save_graph,
325                "temp_cleanup": self.builtins.temp_cleanup.as_str(),
326            },
327            "env_file": self.env_file,
328            "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
329                "kind": w.kind.as_str(),
330                "root": w.root,
331                "watch": w.watch,
332                "applies_to": w.applies_to,
333            })),
334            "extensions": self.extensions,
335        })
336    }
337}
338
339/// Auto-detect ``<basename>_mcp.yaml`` next to a graph file.
340pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
341    let stem = graph_path.file_stem()?;
342    let parent = graph_path.parent()?;
343    let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
344    if candidate.is_file() {
345        Some(candidate)
346    } else {
347        None
348    }
349}
350
351/// Auto-detect ``workspace_mcp.yaml`` for a workspace directory.
352///
353/// Checks two locations in strict priority order:
354///
355/// 1. **Primary** — ``<workspace_dir>/workspace_mcp.yaml``. The
356///    documented and recommended location. If this exists, it is
357///    returned unconditionally; the parent-walk fallback is NOT
358///    consulted even if a parent manifest also exists. No opt-in
359///    declaration required — the manifest sitting inside the
360///    workspace dir is itself the operator's intent.
361/// 2. **Parent-walk fallback** —
362///    ``<workspace_dir>/../workspace_mcp.yaml``. Triggered only when
363///    the primary is absent AND the parent manifest *declares* it
364///    applies to this specific workspace dir via the
365///    ``workspace.applies_to:`` field:
366///
367///    ```yaml
368///    # open_source/workspace_mcp.yaml
369///    workspace:
370///      kind: github
371///      applies_to: ./repos     # required for parent-walk discovery
372///    ```
373///
374///    The framework loads the parent manifest, canonicalises
375///    ``manifest.workspace.applies_to`` against the manifest's parent
376///    directory, and compares it to the actual ``workspace_dir``.
377///    Match → manifest is returned. No declaration or path mismatch
378///    → discovery returns ``None`` (operator must pass
379///    ``--mcp-config`` explicitly).
380///
381///    The natural layout for github-clone-tracker workspaces is:
382///
383///    ```text
384///    open_source/
385///    ├── workspace_mcp.yaml     # config sits beside the sandbox; declares
386///    │                          # workspace.applies_to: ./repos
387///    └── repos/                 # --workspace points here
388///    ```
389///
390///    The ``applies_to`` opt-in eliminates the accidental-discovery
391///    footgun where a manifest in a project root would auto-attach to
392///    any unrelated sibling dir. Operators who didn't author the
393///    manifest get the safe default (no auto-detection); operators
394///    who did get the ergonomic UX (no ``--mcp-config`` boilerplate).
395///
396/// Bounded to one level up; will not walk past the filesystem root.
397/// Symlink-safe via canonicalisation. Added per kglite operator
398/// feedback after the 0.6.x → 0.9.x migration audit.
399pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
400    let primary = workspace_dir.join("workspace_mcp.yaml");
401    if primary.is_file() {
402        return Some(primary);
403    }
404    // Parent-walk fallback. Compare against canonicalised paths to
405    // handle "/" (where parent == self) and symlinks consistently.
406    let parent = workspace_dir.parent()?;
407    let workspace_resolved = workspace_dir.canonicalize().ok()?;
408    let parent_resolved = parent.canonicalize().ok()?;
409    if parent_resolved == workspace_resolved {
410        // No real parent (filesystem root).
411        return None;
412    }
413    let fallback = parent.join("workspace_mcp.yaml");
414    if !fallback.is_file() {
415        return None;
416    }
417
418    // The fallback manifest must declare workspace.applies_to and
419    // that declaration must canonicalise to the actual workspace_dir.
420    // Otherwise the discovery is unsafe (could be accidental).
421    let manifest = match load(&fallback) {
422        Ok(m) => m,
423        Err(e) => {
424            tracing::warn!(
425                manifest = %fallback.display(),
426                error = %e,
427                "parent-walk manifest exists but failed to parse; ignoring"
428            );
429            return None;
430        }
431    };
432    let declared = manifest
433        .workspace
434        .as_ref()
435        .and_then(|w| w.applies_to.as_ref());
436    let Some(declared_path) = declared else {
437        tracing::info!(
438            manifest = %fallback.display(),
439            "parent-walk manifest does not declare workspace.applies_to; \
440             ignoring (set workspace.applies_to: <relative path> to opt in)"
441        );
442        return None;
443    };
444    let manifest_dir = fallback.parent()?;
445    let declared_abs = match manifest_dir.join(declared_path).canonicalize() {
446        Ok(p) => p,
447        Err(e) => {
448            tracing::warn!(
449                manifest = %fallback.display(),
450                applies_to = %declared_path,
451                error = %e,
452                "parent-walk manifest's workspace.applies_to cannot be resolved; ignoring"
453            );
454            return None;
455        }
456    };
457    if declared_abs == workspace_resolved {
458        tracing::info!(
459            workspace_dir = %workspace_dir.display(),
460            manifest = %fallback.display(),
461            "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
462        );
463        Some(fallback)
464    } else {
465        tracing::info!(
466            workspace_dir = %workspace_resolved.display(),
467            manifest = %fallback.display(),
468            declared = %declared_abs.display(),
469            "parent-walk manifest's workspace.applies_to does not match \
470             this workspace_dir; ignoring"
471        );
472        None
473    }
474}
475
476/// Parse and validate a manifest YAML file.
477pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
478    let text = fs::read_to_string(yaml_path)
479        .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
480    let raw: serde_yaml::Value = serde_yaml::from_str(&text)
481        .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
482    let raw = match raw {
483        serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
484        v => v,
485    };
486    let map = raw
487        .as_mapping()
488        .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
489    build(map, yaml_path)
490}
491
492fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
493    check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
494
495    if raw.contains_key("source_root") && raw.contains_key("source_roots") {
496        return Err(ManifestError::at(
497            yaml_path,
498            "specify either source_root (str) or source_roots (list), not both",
499        ));
500    }
501
502    let mut source_roots: Vec<String> = Vec::new();
503    if let Some(v) = raw.get("source_root") {
504        let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
505            ManifestError::at(yaml_path, "source_root must be a non-empty string")
506        })?;
507        source_roots.push(s.to_string());
508    } else if let Some(v) = raw.get("source_roots") {
509        let seq = v.as_sequence().ok_or_else(|| {
510            ManifestError::at(
511                yaml_path,
512                "source_roots must be a list of non-empty strings",
513            )
514        })?;
515        if seq.is_empty() {
516            return Err(ManifestError::at(
517                yaml_path,
518                "source_roots must be non-empty when set",
519            ));
520        }
521        for item in seq {
522            let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
523                ManifestError::at(
524                    yaml_path,
525                    "source_roots must be a list of non-empty strings",
526                )
527            })?;
528            source_roots.push(s.to_string());
529        }
530    }
531
532    let trust = build_trust(raw.get("trust"), yaml_path)?;
533    let tools = build_tools(raw.get("tools"), yaml_path)?;
534    let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
535    let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
536    let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
537    let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
538
539    Ok(Manifest {
540        yaml_path: yaml_path.to_path_buf(),
541        name: optional_str(raw, "name", yaml_path)?,
542        instructions: optional_str(raw, "instructions", yaml_path)?,
543        overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
544        source_roots,
545        trust,
546        tools,
547        embedder,
548        builtins,
549        env_file: optional_str(raw, "env_file", yaml_path)?,
550        workspace,
551        extensions,
552    })
553}
554
555fn build_extensions(
556    raw: Option<&serde_yaml::Value>,
557    yaml_path: &Path,
558) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
559    let Some(raw) = raw else {
560        return Ok(serde_json::Map::new());
561    };
562    if matches!(raw, serde_yaml::Value::Null) {
563        return Ok(serde_json::Map::new());
564    }
565    if !raw.is_mapping() {
566        return Err(ManifestError::at(
567            yaml_path,
568            "extensions must be a mapping (downstream-binary-specific keys)",
569        ));
570    }
571    match yaml_to_json(raw.clone())? {
572        serde_json::Value::Object(o) => Ok(o),
573        _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
574    }
575}
576
577fn build_workspace(
578    raw: Option<&serde_yaml::Value>,
579    yaml_path: &Path,
580) -> Result<Option<WorkspaceConfig>, ManifestError> {
581    let Some(raw) = raw else { return Ok(None) };
582    if matches!(raw, serde_yaml::Value::Null) {
583        return Ok(None);
584    }
585    let map = raw
586        .as_mapping()
587        .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
588    check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
589    let kind = match map.get("kind") {
590        None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
591        Some(serde_yaml::Value::String(s)) => match s.as_str() {
592            "github" => WorkspaceKind::Github,
593            "local" => WorkspaceKind::Local,
594            other => {
595                return Err(ManifestError::at(
596                    yaml_path,
597                    format!(
598                        "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
599                    ),
600                ));
601            }
602        },
603        Some(_) => {
604            return Err(ManifestError::at(
605                yaml_path,
606                format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
607            ))
608        }
609    };
610    let root = match map.get("root") {
611        None | Some(serde_yaml::Value::Null) => None,
612        Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
613        _ => {
614            return Err(ManifestError::at(
615                yaml_path,
616                "workspace.root must be a non-empty string",
617            ))
618        }
619    };
620    let watch = match map.get("watch") {
621        None | Some(serde_yaml::Value::Null) => false,
622        Some(serde_yaml::Value::Bool(b)) => *b,
623        Some(_) => {
624            return Err(ManifestError::at(
625                yaml_path,
626                "workspace.watch must be a bool",
627            ))
628        }
629    };
630    let applies_to = match map.get("applies_to") {
631        None | Some(serde_yaml::Value::Null) => None,
632        Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
633        _ => {
634            return Err(ManifestError::at(
635                yaml_path,
636                "workspace.applies_to must be a non-empty string (a relative path)",
637            ))
638        }
639    };
640    if kind == WorkspaceKind::Local && root.is_none() {
641        return Err(ManifestError::at(
642            yaml_path,
643            "workspace.kind: local requires workspace.root to be set",
644        ));
645    }
646    if kind == WorkspaceKind::Github && watch {
647        return Err(ManifestError::at(
648            yaml_path,
649            "workspace.watch is only valid with workspace.kind: local",
650        ));
651    }
652    Ok(Some(WorkspaceConfig {
653        kind,
654        root,
655        watch,
656        applies_to,
657    }))
658}
659
660fn check_keys(
661    map: &serde_yaml::Mapping,
662    allowed: &[&str],
663    label: &str,
664    yaml_path: &Path,
665) -> Result<(), ManifestError> {
666    let mut unknown: Vec<String> = Vec::new();
667    for (k, _) in map {
668        let key = k.as_str().unwrap_or("<non-string-key>");
669        if !allowed.contains(&key) {
670            unknown.push(key.to_string());
671        }
672    }
673    if !unknown.is_empty() {
674        unknown.sort();
675        return Err(ManifestError::at(
676            yaml_path,
677            format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
678        ));
679    }
680    Ok(())
681}
682
683fn optional_str(
684    raw: &serde_yaml::Mapping,
685    key: &str,
686    yaml_path: &Path,
687) -> Result<Option<String>, ManifestError> {
688    match raw.get(key) {
689        None | Some(serde_yaml::Value::Null) => Ok(None),
690        Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
691        Some(_) => Err(ManifestError::at(
692            yaml_path,
693            format!("{key} must be a string"),
694        )),
695    }
696}
697
698fn build_trust(
699    raw: Option<&serde_yaml::Value>,
700    yaml_path: &Path,
701) -> Result<TrustConfig, ManifestError> {
702    let Some(raw) = raw else {
703        return Ok(TrustConfig::default());
704    };
705    let map = raw
706        .as_mapping()
707        .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
708    check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
709    let mut cfg = TrustConfig::default();
710    if let Some(v) = map.get("allow_python_tools") {
711        cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
712            ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
713        })?;
714    }
715    if let Some(v) = map.get("allow_embedder") {
716        cfg.allow_embedder = v
717            .as_bool()
718            .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
719    }
720    if let Some(v) = map.get("allow_query_preprocessor") {
721        cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
722            ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
723        })?;
724    }
725    Ok(cfg)
726}
727
728fn build_tools(
729    raw: Option<&serde_yaml::Value>,
730    yaml_path: &Path,
731) -> Result<Vec<ToolSpec>, ManifestError> {
732    let Some(raw) = raw else {
733        return Ok(Vec::new());
734    };
735    let seq = raw
736        .as_sequence()
737        .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
738    let mut tools: Vec<ToolSpec> = Vec::new();
739    let mut seen: BTreeMap<String, ()> = BTreeMap::new();
740    for (i, entry) in seq.iter().enumerate() {
741        let tool = build_tool(entry, i, yaml_path)?;
742        let name = tool.name().to_string();
743        if seen.insert(name.clone(), ()).is_some() {
744            return Err(ManifestError::at(
745                yaml_path,
746                format!("duplicate tool name: {name:?}"),
747            ));
748        }
749        tools.push(tool);
750    }
751    Ok(tools)
752}
753
754fn build_tool(
755    entry: &serde_yaml::Value,
756    idx: usize,
757    yaml_path: &Path,
758) -> Result<ToolSpec, ManifestError> {
759    let map = entry
760        .as_mapping()
761        .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
762    check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
763
764    // Kind detection. `cypher` and `python` are tool-creation kinds
765    // (operator declares a new named tool); `bundled` is a tool-
766    // override kind (operator picks a bundled tool name and customises
767    // its agent-facing surface). Exactly one must be present.
768    let has_cypher = map.contains_key("cypher");
769    let has_python = map.contains_key("python");
770    let has_bundled = map.contains_key("bundled");
771    let kinds_present: Vec<&str> = [
772        ("cypher", has_cypher),
773        ("python", has_python),
774        ("bundled", has_bundled),
775    ]
776    .into_iter()
777    .filter(|(_, p)| *p)
778    .map(|(k, _)| k)
779    .collect();
780    if kinds_present.is_empty() {
781        return Err(ManifestError::at(
782            yaml_path,
783            format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
784        ));
785    }
786    if kinds_present.len() > 1 {
787        return Err(ManifestError::at(
788            yaml_path,
789            format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
790        ));
791    }
792
793    // The `bundled` kind takes its name from the `bundled:` value
794    // itself (e.g. `bundled: cypher_query`) and forbids the
795    // tool-creation fields. Branch early so we don't run the
796    // tool-creation `name:` requirement against an override entry.
797    if has_bundled {
798        return build_bundled_override(map, idx, yaml_path);
799    }
800
801    let name = map
802        .get("name")
803        .and_then(|v| v.as_str())
804        .filter(|s| valid_identifier(s))
805        .ok_or_else(|| {
806            ManifestError::at(
807                yaml_path,
808                format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
809            )
810        })?
811        .to_string();
812
813    // `hidden:` is only valid on bundled overrides (`hidden:`-flagging
814    // a tool you're declaring inline doesn't make sense — just don't
815    // declare it). Reject early so the operator gets a clear error.
816    if map.contains_key("hidden") {
817        return Err(ManifestError::at(
818            yaml_path,
819            format!(
820                "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
821            ),
822        ));
823    }
824
825    let description = match map.get("description") {
826        None | Some(serde_yaml::Value::Null) => None,
827        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
828        Some(_) => {
829            return Err(ManifestError::at(
830                yaml_path,
831                format!("tools[{idx}] ({name:?}).description must be a string"),
832            ))
833        }
834    };
835
836    let parameters = match map.get("parameters") {
837        None | Some(serde_yaml::Value::Null) => None,
838        Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
839        Some(_) => {
840            return Err(ManifestError::at(
841                yaml_path,
842                format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
843            ))
844        }
845    };
846
847    if has_cypher {
848        let cypher = map
849            .get("cypher")
850            .and_then(|v| v.as_str())
851            .filter(|s| !s.trim().is_empty())
852            .ok_or_else(|| {
853                ManifestError::at(
854                    yaml_path,
855                    format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
856                )
857            })?
858            .to_string();
859        return Ok(ToolSpec::Cypher(CypherTool {
860            name,
861            cypher,
862            description,
863            parameters,
864        }));
865    }
866
867    // python tool
868    let python = map
869        .get("python")
870        .and_then(|v| v.as_str())
871        .filter(|s| !s.is_empty())
872        .ok_or_else(|| {
873            ManifestError::at(
874                yaml_path,
875                format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
876            )
877        })?
878        .to_string();
879    let function = map
880        .get("function")
881        .and_then(|v| v.as_str())
882        .filter(|s| valid_identifier(s))
883        .ok_or_else(|| {
884            ManifestError::at(
885                yaml_path,
886                format!(
887                    "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
888                ),
889            )
890        })?
891        .to_string();
892    Ok(ToolSpec::Python(PythonTool {
893        name,
894        python,
895        function,
896        description,
897        parameters,
898    }))
899}
900
901/// Parse a `bundled:` override entry from `tools[idx]`. The caller
902/// (`build_tool`) has already established that the entry has
903/// `bundled:` set as the kind discriminator.
904fn build_bundled_override(
905    map: &serde_yaml::Mapping,
906    idx: usize,
907    yaml_path: &Path,
908) -> Result<ToolSpec, ManifestError> {
909    let name = map
910        .get("bundled")
911        .and_then(|v| v.as_str())
912        .filter(|s| valid_identifier(s))
913        .ok_or_else(|| {
914            ManifestError::at(
915                yaml_path,
916                format!(
917                    "tools[{idx}] `bundled:` must be a string naming a bundled tool \
918                     (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
919                ),
920            )
921        })?
922        .to_string();
923
924    // Tool-creation fields are forbidden on override entries — the
925    // override only customises an existing bundled tool's surface,
926    // it doesn't declare a new tool. Catch these at parse time so
927    // operators get a clear error rather than silent confusion.
928    for forbidden in ["name", "parameters", "function"] {
929        if map.contains_key(forbidden) {
930            return Err(ManifestError::at(
931                yaml_path,
932                format!(
933                    "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
934                     (only `description:` and `hidden:` are permitted on overrides)"
935                ),
936            ));
937        }
938    }
939
940    let description = match map.get("description") {
941        None | Some(serde_yaml::Value::Null) => None,
942        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
943        Some(_) => {
944            return Err(ManifestError::at(
945                yaml_path,
946                format!("tools[{idx}] bundled override {name:?}.description must be a string"),
947            ))
948        }
949    };
950
951    let hidden = match map.get("hidden") {
952        None | Some(serde_yaml::Value::Null) => false,
953        Some(serde_yaml::Value::Bool(b)) => *b,
954        Some(_) => {
955            return Err(ManifestError::at(
956                yaml_path,
957                format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
958            ))
959        }
960    };
961
962    Ok(ToolSpec::Bundled(BundledOverride {
963        name,
964        description,
965        hidden,
966    }))
967}
968
969fn build_embedder(
970    raw: Option<&serde_yaml::Value>,
971    yaml_path: &Path,
972) -> Result<Option<EmbedderConfig>, ManifestError> {
973    let Some(raw) = raw else { return Ok(None) };
974    if matches!(raw, serde_yaml::Value::Null) {
975        return Ok(None);
976    }
977    let map = raw
978        .as_mapping()
979        .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
980    check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
981    let module = map
982        .get("module")
983        .and_then(|v| v.as_str())
984        .filter(|s| !s.is_empty())
985        .ok_or_else(|| {
986            ManifestError::at(
987                yaml_path,
988                "embedder.module must be a non-empty string (path or dotted name)",
989            )
990        })?
991        .to_string();
992    let class = map
993        .get("class")
994        .and_then(|v| v.as_str())
995        .filter(|s| valid_identifier(s))
996        .ok_or_else(|| {
997            ManifestError::at(
998                yaml_path,
999                "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1000            )
1001        })?
1002        .to_string();
1003    let kwargs = match map.get("kwargs") {
1004        None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1005        Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1006            serde_json::Value::Object(o) => o,
1007            _ => {
1008                return Err(ManifestError::at(
1009                    yaml_path,
1010                    "embedder.kwargs must be a mapping",
1011                ))
1012            }
1013        },
1014        Some(_) => {
1015            return Err(ManifestError::at(
1016                yaml_path,
1017                "embedder.kwargs must be a mapping",
1018            ))
1019        }
1020    };
1021    Ok(Some(EmbedderConfig {
1022        module,
1023        class,
1024        kwargs,
1025    }))
1026}
1027
1028fn build_builtins(
1029    raw: Option<&serde_yaml::Value>,
1030    yaml_path: &Path,
1031) -> Result<BuiltinsConfig, ManifestError> {
1032    let Some(raw) = raw else {
1033        return Ok(BuiltinsConfig::default());
1034    };
1035    if matches!(raw, serde_yaml::Value::Null) {
1036        return Ok(BuiltinsConfig::default());
1037    }
1038    let map = raw
1039        .as_mapping()
1040        .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1041    check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1042    let mut cfg = BuiltinsConfig::default();
1043    if let Some(v) = map.get("save_graph") {
1044        cfg.save_graph = v
1045            .as_bool()
1046            .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1047    }
1048    if let Some(v) = map.get("temp_cleanup") {
1049        let s = v.as_str().ok_or_else(|| {
1050            ManifestError::at(
1051                yaml_path,
1052                format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1053            )
1054        })?;
1055        cfg.temp_cleanup = match s {
1056            "never" => TempCleanup::Never,
1057            "on_overview" => TempCleanup::OnOverview,
1058            other => {
1059                return Err(ManifestError::at(
1060                    yaml_path,
1061                    format!(
1062                        "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1063                    ),
1064                ))
1065            }
1066        };
1067    }
1068    Ok(cfg)
1069}
1070
1071fn valid_identifier(s: &str) -> bool {
1072    let mut chars = s.chars();
1073    match chars.next() {
1074        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1075        _ => return false,
1076    }
1077    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1078}
1079
1080fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1081    serde_json::to_value(&v)
1082        .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1083}
1084
1085#[derive(Debug, Deserialize)]
1086struct _Reserved;
1087
1088#[cfg(test)]
1089mod tests {
1090    use super::*;
1091
1092    fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1093        let mut f = tempfile::NamedTempFile::new().unwrap();
1094        std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1095        f
1096    }
1097
1098    #[test]
1099    fn loads_minimal_empty_manifest() {
1100        let f = write_tmp("");
1101        let m = load(f.path()).unwrap();
1102        assert_eq!(m.tools.len(), 0);
1103        assert_eq!(m.source_roots.len(), 0);
1104        assert!(!m.trust.allow_python_tools);
1105        assert!(!m.trust.allow_embedder);
1106        assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1107    }
1108
1109    #[test]
1110    fn loads_name_and_instructions() {
1111        let f = write_tmp("name: Demo\ninstructions: |\n  multi-line\n  block\n");
1112        let m = load(f.path()).unwrap();
1113        assert_eq!(m.name.as_deref(), Some("Demo"));
1114        assert!(m.instructions.unwrap().contains("multi-line"));
1115    }
1116
1117    #[test]
1118    fn rejects_unknown_top_key() {
1119        let f = write_tmp("bogus: 1\n");
1120        let err = load(f.path()).unwrap_err();
1121        assert!(err.message.contains("unknown top-level"));
1122    }
1123
1124    #[test]
1125    fn source_root_string_normalises_to_list() {
1126        let f = write_tmp("source_root: ./data\n");
1127        let m = load(f.path()).unwrap();
1128        assert_eq!(m.source_roots, vec!["./data".to_string()]);
1129    }
1130
1131    #[test]
1132    fn source_roots_list_preserved() {
1133        let f = write_tmp("source_roots:\n  - ./a\n  - ./b\n");
1134        let m = load(f.path()).unwrap();
1135        assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1136    }
1137
1138    #[test]
1139    fn rejects_both_source_root_and_source_roots() {
1140        let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1141        assert!(load(f.path()).unwrap_err().message.contains("not both"));
1142    }
1143
1144    #[test]
1145    fn cypher_tool_parses() {
1146        let f = write_tmp("tools:\n  - name: lookup\n    cypher: MATCH (n) RETURN n\n");
1147        let m = load(f.path()).unwrap();
1148        assert_eq!(m.tools.len(), 1);
1149        match &m.tools[0] {
1150            ToolSpec::Cypher(t) => {
1151                assert_eq!(t.name, "lookup");
1152                assert!(t.cypher.contains("MATCH"));
1153            }
1154            _ => panic!("expected cypher tool"),
1155        }
1156    }
1157
1158    #[test]
1159    fn python_tool_parses() {
1160        let f =
1161            write_tmp("tools:\n  - name: detail\n    python: ./tools.py\n    function: detail\n");
1162        let m = load(f.path()).unwrap();
1163        match &m.tools[0] {
1164            ToolSpec::Python(t) => {
1165                assert_eq!(t.python, "./tools.py");
1166                assert_eq!(t.function, "detail");
1167            }
1168            _ => panic!("expected python tool"),
1169        }
1170    }
1171
1172    #[test]
1173    fn rejects_tool_with_both_kinds() {
1174        let f = write_tmp(
1175            "tools:\n  - name: x\n    cypher: 'MATCH (n) RETURN n'\n    python: ./t.py\n    function: x\n",
1176        );
1177        assert!(load(f.path())
1178            .unwrap_err()
1179            .message
1180            .contains("multiple kinds"));
1181    }
1182
1183    #[test]
1184    fn rejects_tool_with_no_kind() {
1185        let f = write_tmp("tools:\n  - name: x\n");
1186        assert!(load(f.path())
1187            .unwrap_err()
1188            .message
1189            .contains("needs exactly one"));
1190    }
1191
1192    #[test]
1193    fn rejects_duplicate_tool_names() {
1194        let f = write_tmp(
1195            "tools:\n  - name: same\n    cypher: 'MATCH (n) RETURN n'\n  - name: same\n    cypher: 'MATCH (m) RETURN m'\n",
1196        );
1197        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1198    }
1199
1200    // ─── Bundled override shape (0.3.31) ────────────────────────
1201
1202    #[test]
1203    fn bundled_override_with_description_parses() {
1204        let f =
1205            write_tmp("tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n");
1206        let m = load(f.path()).unwrap();
1207        assert_eq!(m.tools.len(), 1);
1208        match &m.tools[0] {
1209            ToolSpec::Bundled(b) => {
1210                assert_eq!(b.name, "repo_management");
1211                assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1212                assert!(!b.hidden);
1213            }
1214            _ => panic!("expected bundled override"),
1215        }
1216    }
1217
1218    #[test]
1219    fn bundled_override_with_hidden_parses() {
1220        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: true\n");
1221        let m = load(f.path()).unwrap();
1222        match &m.tools[0] {
1223            ToolSpec::Bundled(b) => {
1224                assert_eq!(b.name, "ping");
1225                assert!(b.hidden);
1226                assert!(b.description.is_none());
1227            }
1228            _ => panic!("expected bundled override"),
1229        }
1230    }
1231
1232    #[test]
1233    fn bundled_override_alongside_cypher_tools_parses() {
1234        let f = write_tmp(
1235            "tools:\n\
1236             \x20\x20- bundled: cypher_query\n\
1237             \x20\x20\x20\x20description: \"Custom server description\"\n\
1238             \x20\x20- name: lookup\n\
1239             \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1240        );
1241        let m = load(f.path()).unwrap();
1242        assert_eq!(m.tools.len(), 2);
1243        assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1244        assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1245    }
1246
1247    #[test]
1248    fn rejects_bundled_with_cypher_kind() {
1249        let f =
1250            write_tmp("tools:\n  - bundled: cypher_query\n    cypher: \"MATCH (n) RETURN n\"\n");
1251        let err = load(f.path()).unwrap_err();
1252        assert!(
1253            err.message.contains("multiple kinds"),
1254            "got: {}",
1255            err.message
1256        );
1257    }
1258
1259    #[test]
1260    fn rejects_bundled_with_name_field() {
1261        let f = write_tmp("tools:\n  - bundled: ping\n    name: ping\n");
1262        let err = load(f.path()).unwrap_err();
1263        assert!(
1264            err.message.contains("cannot set `name:`"),
1265            "got: {}",
1266            err.message
1267        );
1268    }
1269
1270    #[test]
1271    fn rejects_bundled_with_parameters_field() {
1272        let f =
1273            write_tmp("tools:\n  - bundled: cypher_query\n    parameters:\n      type: object\n");
1274        let err = load(f.path()).unwrap_err();
1275        assert!(
1276            err.message.contains("cannot set `parameters:`"),
1277            "got: {}",
1278            err.message
1279        );
1280    }
1281
1282    #[test]
1283    fn rejects_bundled_with_non_bool_hidden() {
1284        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: yes-please\n");
1285        let err = load(f.path()).unwrap_err();
1286        assert!(
1287            err.message.contains("hidden must be a bool"),
1288            "got: {}",
1289            err.message
1290        );
1291    }
1292
1293    #[test]
1294    fn rejects_hidden_on_cypher_tool() {
1295        let f = write_tmp(
1296            "tools:\n  - name: lookup\n    cypher: \"MATCH (n) RETURN n\"\n    hidden: true\n",
1297        );
1298        let err = load(f.path()).unwrap_err();
1299        assert!(
1300            err.message
1301                .contains("`hidden:` is only valid on `bundled:` override entries"),
1302            "got: {}",
1303            err.message
1304        );
1305    }
1306
1307    #[test]
1308    fn rejects_duplicate_bundled_overrides() {
1309        // The dedup check is on tool name; two `bundled: ping` entries
1310        // share the same name and should be rejected the same way
1311        // duplicate cypher tools are.
1312        let f = write_tmp(
1313            "tools:\n  - bundled: ping\n    hidden: true\n  - bundled: ping\n    description: \"x\"\n",
1314        );
1315        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1316    }
1317
1318    #[test]
1319    fn rejects_bundled_with_invalid_identifier() {
1320        let f = write_tmp("tools:\n  - bundled: \"123-bad\"\n    hidden: true\n");
1321        let err = load(f.path()).unwrap_err();
1322        assert!(
1323            err.message.contains("must be a string"),
1324            "got: {}",
1325            err.message
1326        );
1327    }
1328
1329    #[test]
1330    fn bundled_override_to_json_shape() {
1331        let f = write_tmp(
1332            "tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n    hidden: false\n",
1333        );
1334        let m = load(f.path()).unwrap();
1335        let v = m.to_json();
1336        assert_eq!(v["tools"][0]["kind"], "bundled");
1337        assert_eq!(v["tools"][0]["name"], "repo_management");
1338        assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1339        assert_eq!(v["tools"][0]["hidden"], false);
1340    }
1341
1342    #[test]
1343    fn embedder_parses() {
1344        let f = write_tmp(
1345            "embedder:\n  module: ./e.py\n  class: GraphEmbedder\n  kwargs:\n    cooldown: 900\n",
1346        );
1347        let m = load(f.path()).unwrap();
1348        let e = m.embedder.unwrap();
1349        assert_eq!(e.module, "./e.py");
1350        assert_eq!(e.class, "GraphEmbedder");
1351        assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1352    }
1353
1354    #[test]
1355    fn builtins_parses_temp_cleanup() {
1356        let f = write_tmp("builtins:\n  save_graph: true\n  temp_cleanup: on_overview\n");
1357        let m = load(f.path()).unwrap();
1358        assert!(m.builtins.save_graph);
1359        assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1360    }
1361
1362    #[test]
1363    fn rejects_invalid_temp_cleanup() {
1364        let f = write_tmp("builtins:\n  temp_cleanup: nuke\n");
1365        assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1366    }
1367
1368    #[test]
1369    fn allow_embedder_trust_parses() {
1370        let f = write_tmp("trust:\n  allow_embedder: true\n");
1371        let m = load(f.path()).unwrap();
1372        assert!(m.trust.allow_embedder);
1373    }
1374
1375    #[test]
1376    fn allow_query_preprocessor_trust_parses() {
1377        let f = write_tmp("trust:\n  allow_query_preprocessor: true\n");
1378        let m = load(f.path()).unwrap();
1379        assert!(m.trust.allow_query_preprocessor);
1380        assert!(!m.trust.allow_embedder);
1381        assert!(!m.trust.allow_python_tools);
1382    }
1383
1384    #[test]
1385    fn allow_query_preprocessor_rejects_non_bool() {
1386        let f = write_tmp("trust:\n  allow_query_preprocessor: \"yes\"\n");
1387        let err = load(f.path()).unwrap_err();
1388        assert!(err
1389            .message
1390            .contains("allow_query_preprocessor must be a bool"));
1391    }
1392
1393    #[test]
1394    fn find_sibling_works() {
1395        let dir = tempfile::tempdir().unwrap();
1396        let graph = dir.path().join("demo.kgl");
1397        std::fs::write(&graph, b"\x00").unwrap();
1398        let sibling = dir.path().join("demo_mcp.yaml");
1399        std::fs::write(&sibling, "name: x\n").unwrap();
1400        assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1401    }
1402
1403    #[test]
1404    fn workspace_local_parses() {
1405        let f = write_tmp("workspace:\n  kind: local\n  root: ./src\n  watch: true\n");
1406        let m = load(f.path()).unwrap();
1407        let w = m.workspace.unwrap();
1408        assert_eq!(w.kind, WorkspaceKind::Local);
1409        assert_eq!(w.root.as_deref(), Some("./src"));
1410        assert!(w.watch);
1411    }
1412
1413    #[test]
1414    fn workspace_github_default_kind() {
1415        let f = write_tmp("workspace: {}\n");
1416        let m = load(f.path()).unwrap();
1417        let w = m.workspace.unwrap();
1418        assert_eq!(w.kind, WorkspaceKind::Github);
1419        assert!(w.root.is_none());
1420        assert!(!w.watch);
1421    }
1422
1423    #[test]
1424    fn workspace_local_without_root_errors() {
1425        let f = write_tmp("workspace:\n  kind: local\n");
1426        let err = load(f.path()).unwrap_err();
1427        assert!(err.message.contains("requires workspace.root"));
1428    }
1429
1430    #[test]
1431    fn workspace_unknown_key_rejected() {
1432        let f = write_tmp("workspace:\n  kind: local\n  root: ./x\n  bogus: 1\n");
1433        let err = load(f.path()).unwrap_err();
1434        assert!(err.message.contains("unknown workspace keys"));
1435    }
1436
1437    #[test]
1438    fn workspace_invalid_kind_rejected() {
1439        let f = write_tmp("workspace:\n  kind: docker\n  root: ./x\n");
1440        let err = load(f.path()).unwrap_err();
1441        assert!(err.message.contains("workspace.kind"));
1442    }
1443
1444    #[test]
1445    fn workspace_watch_invalid_for_github() {
1446        let f = write_tmp("workspace:\n  kind: github\n  watch: true\n");
1447        let err = load(f.path()).unwrap_err();
1448        assert!(err.message.contains("watch is only valid"));
1449    }
1450
1451    #[test]
1452    fn extensions_passthrough_parses() {
1453        let f = write_tmp(
1454            "extensions:\n  csv_http_server: true\n  csv_http_server_dir: temp/\n  arbitrary:\n    nested: 1\n",
1455        );
1456        let m = load(f.path()).unwrap();
1457        assert_eq!(
1458            m.extensions
1459                .get("csv_http_server")
1460                .and_then(|v| v.as_bool()),
1461            Some(true)
1462        );
1463        assert_eq!(
1464            m.extensions
1465                .get("csv_http_server_dir")
1466                .and_then(|v| v.as_str()),
1467            Some("temp/")
1468        );
1469        // Nested values pass through unchanged.
1470        assert_eq!(
1471            m.extensions
1472                .get("arbitrary")
1473                .and_then(|v| v.get("nested"))
1474                .and_then(|v| v.as_i64()),
1475            Some(1)
1476        );
1477    }
1478
1479    #[test]
1480    fn extensions_absent_defaults_to_empty() {
1481        let f = write_tmp("name: x\n");
1482        let m = load(f.path()).unwrap();
1483        assert!(m.extensions.is_empty());
1484    }
1485
1486    #[test]
1487    fn extensions_inner_keys_unvalidated() {
1488        // The framework intentionally does NOT validate keys inside
1489        // `extensions:` — they're downstream-binary concerns. Any shape
1490        // that's a YAML mapping must round-trip.
1491        let f = write_tmp(
1492            "extensions:\n  whatever_kglite_wants: foo\n  some_other_consumer: { a: 1, b: 2 }\n",
1493        );
1494        load(f.path()).unwrap();
1495    }
1496
1497    #[test]
1498    fn extensions_must_be_a_mapping() {
1499        let f = write_tmp("extensions: not-a-mapping\n");
1500        let err = load(f.path()).unwrap_err();
1501        assert!(err.message.contains("extensions must be a mapping"));
1502    }
1503
1504    #[test]
1505    fn env_file_key_parses() {
1506        let f = write_tmp("env_file: ../.env\n");
1507        let m = load(f.path()).unwrap();
1508        assert_eq!(m.env_file.as_deref(), Some("../.env"));
1509    }
1510
1511    #[test]
1512    fn env_file_unset_is_none() {
1513        let f = write_tmp("name: Demo\n");
1514        let m = load(f.path()).unwrap();
1515        assert!(m.env_file.is_none());
1516    }
1517
1518    #[test]
1519    fn find_workspace_works() {
1520        let dir = tempfile::tempdir().unwrap();
1521        let manifest = dir.path().join("workspace_mcp.yaml");
1522        std::fs::write(&manifest, "name: ws\n").unwrap();
1523        assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1524    }
1525
1526    #[test]
1527    fn find_workspace_walks_one_level_up_with_applies_to() {
1528        // Layout: <tmp>/parent/workspace_mcp.yaml (declares
1529        // workspace.applies_to: ./repos) + <tmp>/parent/repos/.
1530        // Discovery from <tmp>/parent/repos/ should walk up one level
1531        // and find the sibling manifest because applies_to matches.
1532        let dir = tempfile::tempdir().unwrap();
1533        let parent = dir.path().join("parent");
1534        std::fs::create_dir(&parent).unwrap();
1535        let manifest = parent.join("workspace_mcp.yaml");
1536        std::fs::write(
1537            &manifest,
1538            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1539        )
1540        .unwrap();
1541        let repos = parent.join("repos");
1542        std::fs::create_dir(&repos).unwrap();
1543
1544        // Primary location still works.
1545        assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1546
1547        // Parent-walk fallback resolves to the same manifest. Compare
1548        // canonicalised paths to handle macOS /private/var vs /var.
1549        let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1550        assert_eq!(
1551            found.canonicalize().unwrap(),
1552            manifest.canonicalize().unwrap()
1553        );
1554    }
1555
1556    #[test]
1557    fn find_workspace_ignores_parent_without_applies_to() {
1558        // Parent manifest exists but does NOT declare workspace.applies_to.
1559        // The parent-walk fallback must refuse to auto-detect it —
1560        // otherwise an unrelated workspace_mcp.yaml in a sibling dir
1561        // could surprise-attach to whatever --workspace path the
1562        // operator passes. Safe default: require the opt-in.
1563        let dir = tempfile::tempdir().unwrap();
1564        let parent = dir.path().join("parent");
1565        std::fs::create_dir(&parent).unwrap();
1566        let manifest = parent.join("workspace_mcp.yaml");
1567        std::fs::write(&manifest, "name: not for repos\n").unwrap();
1568        let repos = parent.join("repos");
1569        std::fs::create_dir(&repos).unwrap();
1570
1571        assert_eq!(
1572            find_workspace_manifest(&repos),
1573            None,
1574            "parent manifest without workspace.applies_to must NOT auto-attach"
1575        );
1576    }
1577
1578    #[test]
1579    fn find_workspace_ignores_parent_with_mismatched_applies_to() {
1580        // Parent manifest declares applies_to: ./repos but the
1581        // actual --workspace path is ./other_dir. The mismatch must
1582        // suppress auto-detection.
1583        let dir = tempfile::tempdir().unwrap();
1584        let parent = dir.path().join("parent");
1585        std::fs::create_dir(&parent).unwrap();
1586        let manifest = parent.join("workspace_mcp.yaml");
1587        std::fs::write(
1588            &manifest,
1589            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1590        )
1591        .unwrap();
1592        let other = parent.join("other_dir");
1593        std::fs::create_dir(&other).unwrap();
1594
1595        assert_eq!(
1596            find_workspace_manifest(&other),
1597            None,
1598            "applies_to: ./repos must NOT match --workspace ./other_dir"
1599        );
1600    }
1601
1602    #[test]
1603    fn find_workspace_returns_none_when_missing_everywhere() {
1604        let dir = tempfile::tempdir().unwrap();
1605        let child = dir.path().join("child");
1606        std::fs::create_dir(&child).unwrap();
1607        // No manifest in either child or its parent (tmpdir root).
1608        assert_eq!(find_workspace_manifest(&child), None);
1609    }
1610
1611    #[test]
1612    fn find_workspace_primary_wins_over_parent_fallback() {
1613        // Both primary AND parent-fallback exist. The primary must
1614        // win — this anchors the precedence rule documented on
1615        // `find_workspace_manifest`. The parent declares applies_to
1616        // matching the child dir, so it WOULD be a valid fallback —
1617        // but the primary preempts it. If a future refactor swaps
1618        // the order, this test fails loudly.
1619        let dir = tempfile::tempdir().unwrap();
1620        let parent_manifest = dir.path().join("workspace_mcp.yaml");
1621        std::fs::write(
1622            &parent_manifest,
1623            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1624        )
1625        .unwrap();
1626        let child = dir.path().join("repos");
1627        std::fs::create_dir(&child).unwrap();
1628        let child_manifest = child.join("workspace_mcp.yaml");
1629        std::fs::write(&child_manifest, "name: child\n").unwrap();
1630
1631        // Discovery from `child` should return the child manifest,
1632        // NOT the parent's. Compare canonicalised to handle the
1633        // macOS /private/var vs /var symlink consistently.
1634        let found = find_workspace_manifest(&child).expect("primary should resolve");
1635        assert_eq!(
1636            found.canonicalize().unwrap(),
1637            child_manifest.canonicalize().unwrap(),
1638            "primary location must win when both primary and parent fallback exist"
1639        );
1640    }
1641
1642    #[test]
1643    fn to_json_shape_is_stable() {
1644        let f = write_tmp(
1645            r#"
1646name: KGLite Codebase
1647source_roots: [src, lib]
1648trust:
1649  allow_embedder: true
1650embedder:
1651  module: kglite.embed
1652  class: SentenceTransformerEmbedder
1653builtins:
1654  save_graph: true
1655  temp_cleanup: on_overview
1656"#,
1657        );
1658        let m = load(f.path()).unwrap();
1659        let actual = m.to_json();
1660        let expected = serde_json::json!({
1661            "yaml_path": f.path().display().to_string(),
1662            "name": "KGLite Codebase",
1663            "instructions": null,
1664            "overview_prefix": null,
1665            "source_roots": ["src", "lib"],
1666            "trust": {
1667                "allow_python_tools": false,
1668                "allow_embedder": true,
1669                "allow_query_preprocessor": false,
1670            },
1671            "tools": [],
1672            "embedder": {
1673                "module": "kglite.embed",
1674                "class": "SentenceTransformerEmbedder",
1675                "kwargs": {},
1676            },
1677            "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
1678            "env_file": null,
1679            "workspace": null,
1680            "extensions": {},
1681        });
1682        assert_eq!(actual, expected);
1683    }
1684
1685    #[test]
1686    fn to_json_round_trips_tools_and_workspace() {
1687        let f = write_tmp(
1688            r#"
1689name: Full Surface
1690source_root: ./src
1691trust:
1692  allow_python_tools: true
1693tools:
1694  - name: nodes_for
1695    cypher: "MATCH (n {name: $name}) RETURN n"
1696    description: "fetch nodes by name"
1697  - name: run_query
1698    python: tools.py
1699    function: run
1700workspace:
1701  kind: local
1702  root: /tmp/ws
1703  watch: true
1704builtins:
1705  save_graph: false
1706env_file: .env.local
1707extensions:
1708  kglite:
1709    flavour: standard
1710"#,
1711        );
1712        let m = load(f.path()).unwrap();
1713        let v = m.to_json();
1714        assert_eq!(v["name"], "Full Surface");
1715        assert_eq!(v["trust"]["allow_python_tools"], true);
1716        assert_eq!(v["workspace"]["kind"], "local");
1717        assert_eq!(v["workspace"]["root"], "/tmp/ws");
1718        assert_eq!(v["workspace"]["watch"], true);
1719        assert_eq!(v["env_file"], ".env.local");
1720        assert_eq!(v["tools"][0]["kind"], "cypher");
1721        assert_eq!(v["tools"][0]["name"], "nodes_for");
1722        assert_eq!(v["tools"][1]["kind"], "python");
1723        assert_eq!(v["tools"][1]["name"], "run_query");
1724        assert_eq!(v["tools"][1]["python"], "tools.py");
1725        assert_eq!(v["tools"][1]["function"], "run");
1726        assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
1727    }
1728}