Skip to main content

mcp_methods/server/
manifest.rs

1//! YAML manifest schema + loader.
2//!
3//! A manifest is a YAML file declaring the tools, source roots, custom
4//! embedder, and trust gates the server should apply. The loader parses,
5//! validates, and returns a [`Manifest`]; consumers (CLI wiring, tool
6//! registration) operate on the validated structure.
7//!
8//! Path strings (`source_root`, `python:` tool paths, embedder module)
9//! are kept as the raw user input — relative-to-yaml resolution happens
10//! at the use site so the data stays pure and testable.
11//!
12//! Validation is fail-fast and user-facing: the caller surfaces
13//! [`ManifestError`] messages directly to the operator.
14//!
15//! Schema mirrors the Python `kglite.mcp_server.manifest` module 1:1 so
16//! a manifest written for the Python server boots unchanged on the new
17//! Rust server.
18
19// A handful of fields/helpers are exposed for downstream consumers
20// (e.g. kglite-mcp-server reads `CypherTool::cypher` directly when
21// registering manifest-declared tools) and so look unused from this
22// crate's perspective. Silence dead-code warnings rather than chase
23// every cross-crate use.
24#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34    "name",
35    "instructions",
36    "overview_prefix",
37    "source_root",
38    "source_roots",
39    "trust",
40    "tools",
41    "embedder",
42    "builtins",
43    "env_file",
44    "workspace",
45    "extensions",
46    "skills",
47];
48const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
49const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
50const ALLOWED_TRUST_KEYS: &[&str] = &["allow_python_tools", "allow_embedder"];
51const ALLOWED_TOOL_KEYS: &[&str] = &[
52    "name",
53    "description",
54    "parameters",
55    "cypher",
56    "python",
57    "function",
58    "bundled",
59    "hidden",
60    // 0.3.34: per-deployment rename for bundled tools (the bundled
61    // override block already covers `description` and `hidden`; this
62    // adds the third axis — what the agent sees in `tools/list`).
63    "rename",
64];
65const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
66const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
67const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
68
69#[derive(Debug, Error)]
70#[error("{path}: {message}")]
71pub struct ManifestError {
72    pub path: String,
73    pub message: String,
74}
75
76impl ManifestError {
77    pub fn at(path: &Path, message: impl Into<String>) -> Self {
78        Self {
79            path: path.display().to_string(),
80            message: message.into(),
81        }
82    }
83
84    pub fn bare(message: impl Into<String>) -> Self {
85        Self {
86            path: "<manifest>".to_string(),
87            message: message.into(),
88        }
89    }
90}
91
92#[derive(Debug, Default, Clone)]
93pub struct TrustConfig {
94    pub allow_python_tools: bool,
95    pub allow_embedder: bool,
96}
97
98#[derive(Debug, Clone)]
99pub enum ToolSpec {
100    Cypher(CypherTool),
101    Python(PythonTool),
102    /// Override the agent-facing surface of a bundled tool (one the
103    /// downstream binary provides natively — `cypher_query`,
104    /// `graph_overview`, `read_source`, etc.). The framework parses
105    /// the override but does not enforce that the named tool exists;
106    /// the downstream consumer (e.g. `kglite-mcp-server`) is
107    /// responsible for validating the name against its bundled
108    /// catalogue at boot time and applying the override when
109    /// emitting `tools/list`.
110    ///
111    /// Pre-0.3.31 the only customisation path for the bundled tool
112    /// surface was the manifest's global `instructions:` block —
113    /// useful for first-message orientation but not attached to
114    /// individual tools. Bundled overrides let operators rewrite a
115    /// specific tool's `description` (what the agent sees in
116    /// `tools/list`) or `hidden`-flag it out entirely.
117    Bundled(BundledOverride),
118}
119
120impl ToolSpec {
121    pub fn name(&self) -> &str {
122        match self {
123            ToolSpec::Cypher(t) => &t.name,
124            ToolSpec::Python(t) => &t.name,
125            ToolSpec::Bundled(t) => &t.name,
126        }
127    }
128}
129
130#[derive(Debug, Clone)]
131pub struct CypherTool {
132    pub name: String,
133    pub cypher: String,
134    pub description: Option<String>,
135    pub parameters: Option<serde_json::Value>,
136}
137
138#[derive(Debug, Clone)]
139pub struct PythonTool {
140    pub name: String,
141    pub python: String,
142    pub function: String,
143    pub description: Option<String>,
144    pub parameters: Option<serde_json::Value>,
145}
146
147#[derive(Debug, Clone)]
148pub struct BundledOverride {
149    /// Name of the bundled tool to override (e.g. `cypher_query`,
150    /// `repo_management`). Validation against the downstream
151    /// binary's actual catalogue happens at the consumer's boot
152    /// time — the framework only checks shape here.
153    pub name: String,
154    /// New agent-facing description that replaces the bundled
155    /// tool's default. `None` means "do not override; keep the
156    /// default."
157    pub description: Option<String>,
158    /// When true, the downstream consumer should omit this tool
159    /// from `tools/list` AND reject calls to it. Defaults to
160    /// false (visible).
161    pub hidden: bool,
162    /// Per-deployment rename: expose the bundled tool to the agent
163    /// under this name instead of its canonical name. `None` keeps
164    /// the canonical name. Lets operators running multiple kglite
165    /// servers (each backed by a different graph) disambiguate
166    /// otherwise-identical tool surfaces — without rename, an agent
167    /// running three servers sees three copies of `cypher_query`,
168    /// each indistinguishable in ToolSearch results. With rename,
169    /// the same servers can expose `legal_cypher_query`,
170    /// `prospect_cypher_query`, `open_source_cypher_query`.
171    /// Must be a valid identifier (`^[a-zA-Z_][a-zA-Z0-9_]*$`);
172    /// validation against duplicates across the manifest's tools is
173    /// the downstream consumer's responsibility.
174    pub rename: Option<String>,
175}
176
177#[derive(Debug, Clone)]
178pub struct EmbedderConfig {
179    pub module: String,
180    pub class: String,
181    pub kwargs: serde_json::Map<String, serde_json::Value>,
182}
183
184#[derive(Debug, Default, Clone)]
185pub struct BuiltinsConfig {
186    pub save_graph: bool,
187    pub temp_cleanup: TempCleanup,
188}
189
190#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
191pub enum TempCleanup {
192    #[default]
193    Never,
194    OnOverview,
195}
196
197impl TempCleanup {
198    pub fn as_str(&self) -> &'static str {
199        match self {
200            TempCleanup::Never => "never",
201            TempCleanup::OnOverview => "on_overview",
202        }
203    }
204}
205
206#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
207pub enum WorkspaceKind {
208    /// Clone-and-track GitHub repos. The default when no `workspace:`
209    /// block is set and the operator passed `--workspace DIR`.
210    #[default]
211    Github,
212    /// Bind a fixed local directory as the active source root. No
213    /// cloning happens; `set_root_dir(path)` swaps the active root.
214    Local,
215}
216
217impl WorkspaceKind {
218    pub fn as_str(&self) -> &'static str {
219        match self {
220            WorkspaceKind::Github => "github",
221            WorkspaceKind::Local => "local",
222        }
223    }
224}
225
226#[derive(Debug, Clone, Default)]
227pub struct WorkspaceConfig {
228    pub kind: WorkspaceKind,
229    /// Local-mode only: path to the directory to bind as the source
230    /// root. Relative paths resolve against the YAML's parent dir.
231    pub root: Option<String>,
232    /// Local-mode only: wire the framework's file watcher to `root`
233    /// (debounced rebuild trigger via the post-activate hook).
234    pub watch: bool,
235    /// Optional opt-in for the [`find_workspace_manifest`] parent-walk
236    /// fallback. When set, this manifest is auto-discovered by
237    /// ``mcp-server --workspace DIR`` (and similar callers) only when
238    /// the operator's ``DIR`` matches the declaration here. When
239    /// unset, the parent-walk fallback NEVER fires for this manifest
240    /// — operators must pass ``--mcp-config`` explicitly.
241    ///
242    /// Values are glob patterns matching the workspace dir's basename
243    /// (single-segment match — parent-walk is always single-level).
244    /// Three forms:
245    ///
246    /// - **Single pattern** (`./repos`, `repos`, `*`, `a*`, `prod-?`):
247    ///   match against the workspace dir's basename. Literal strings
248    ///   like `repos` match only `repos`; glob patterns like `*` or
249    ///   `prod-*` match any name fitting the pattern.
250    /// - **List of patterns** (`[./repos, ./clones]`, `[prod-*, test-*]`):
251    ///   match if any pattern matches. Useful for curated subsets or
252    ///   multiple naming conventions in one manifest.
253    ///
254    /// Leading `./` is optional and stripped at parse time. Patterns
255    /// must be single-segment — `./a/b` is rejected. Invalid glob
256    /// syntax is rejected at parse time.
257    ///
258    /// Eliminates the accidental-discovery footgun where a workspace
259    /// manifest is auto-picked-up by an unrelated sibling dir. The
260    /// manifest's own declaration is the opt-in.
261    pub applies_to: Option<AppliesTo>,
262}
263
264/// Declaration of which workspace dirs the manifest applies to for
265/// the [`find_workspace_manifest`] parent-walk fallback. See
266/// [`WorkspaceConfig::applies_to`] for the full semantics. Each
267/// entry is a glob pattern (literal or with `*` / `?` / `[abc]`)
268/// matched against the workspace dir's basename.
269#[derive(Debug, Clone, PartialEq, Eq)]
270pub enum AppliesTo {
271    /// Single glob pattern. Matches if the workspace dir's basename
272    /// satisfies the pattern. Literal names (`repos`) match only
273    /// that name; `*` matches anything; `prod-*` matches anything
274    /// starting with `prod-`.
275    Pattern(String),
276    /// Multiple patterns. Matches if any pattern in the list matches.
277    Patterns(Vec<String>),
278}
279
280/// One source of skills declared by the manifest. Either the magic
281/// "library bundled" token (rendered as the YAML boolean `true`), or
282/// a filesystem path resolved against the manifest's parent dir.
283///
284/// Path conventions match the rest of the manifest:
285/// - `./foo` or `foo` — relative to the manifest's parent dir
286/// - `~/foo` — home-relative (POSIX `$HOME` expansion)
287/// - `/foo` — absolute
288#[derive(Debug, Clone, PartialEq, Eq)]
289pub enum SkillSource {
290    /// The compile-time bundled skills shipped with `mcp-methods` plus
291    /// any added by the downstream binary at registry-build time.
292    /// In YAML: a bare `true` token in the `skills:` list.
293    Bundled,
294    /// A filesystem path containing `*.md` skill files. Walked at
295    /// boot. Path resolution happens at registry-build time, not parse
296    /// time — `SkillSource::Path` stores the raw operator-declared
297    /// string for round-tripping through `Manifest::to_json()`.
298    Path(String),
299}
300
301/// The parsed value of the `skills:` field in the manifest.
302///
303/// Skills are opt-in. `SkillsSource::Disabled` is the default and
304/// matches verbatim-current MCP behavior: no `prompts/list`, no
305/// methodology surface, identical context cost to pre-skills
306/// deployments. Existing kglite manifests work unchanged.
307///
308/// When enabled, the [`crate::server::skills::Registry`] walks each
309/// source in declaration order, layering them against the
310/// project-local `<basename>.skills/` directory which is always
311/// auto-detected as the top-priority layer.
312#[derive(Debug, Clone, Default, PartialEq, Eq)]
313pub enum SkillsSource {
314    /// `skills: false` or no declaration. Skills disabled entirely.
315    #[default]
316    Disabled,
317    /// One or more sources, walked in declaration order at registry
318    /// build time. First-match-per-skill-name wins across the root
319    /// layer; the auto-detected project layer (`<basename>.skills/`
320    /// adjacent to the YAML) preempts the entire root layer.
321    Sources(Vec<SkillSource>),
322}
323
324#[derive(Debug, Clone)]
325pub struct Manifest {
326    pub yaml_path: PathBuf,
327    pub name: Option<String>,
328    pub instructions: Option<String>,
329    pub overview_prefix: Option<String>,
330    pub source_roots: Vec<String>,
331    pub trust: TrustConfig,
332    pub tools: Vec<ToolSpec>,
333    pub embedder: Option<EmbedderConfig>,
334    pub builtins: BuiltinsConfig,
335    /// Optional explicit `.env` path (relative to the YAML or absolute).
336    /// When unset, the runtime walks upward from the start directory
337    /// looking for a `.env` file.
338    pub env_file: Option<String>,
339    /// Optional explicit workspace declaration. When set, this wins
340    /// over CLI `--workspace`/`--source-root` flags interpretation
341    /// (manifest is the source of truth — same rule as `source_root:`).
342    pub workspace: Option<WorkspaceConfig>,
343    /// Raw passthrough for downstream-binary-specific manifest keys.
344    /// The framework accepts any mapping under `extensions:` and stores
345    /// it here without validating the inner keys; downstream consumers
346    /// (e.g. kglite-mcp-server) read whatever they need from this map.
347    ///
348    /// This keeps the framework's strict-unknown-key validation strong
349    /// for the surfaces it owns (`builtins`, `workspace`, …) while
350    /// letting consumers add their own configuration namespace without
351    /// per-key framework round-trips.
352    pub extensions: serde_json::Map<String, serde_json::Value>,
353    /// Opt-in skills declaration. `SkillsSource::Disabled` is the
354    /// default and preserves current MCP behavior (no `prompts/`
355    /// surface). When set to any non-`Disabled` value, downstream
356    /// binaries pass this to [`crate::server::skills::Registry`] for
357    /// loading + composition; the framework then exposes the
358    /// resulting skill set via `prompts/list` and `prompts/get`.
359    ///
360    /// Three-layer composition: the operator-declared sources here
361    /// form the root layer; the project-local `<basename>.skills/`
362    /// directory (auto-detected) preempts them. See
363    /// `dev-documentation/skills-aware-mcp.md` for the full design.
364    pub skills: SkillsSource,
365}
366
367impl Manifest {
368    /// JSON-friendly representation of the validated manifest for
369    /// FFI / RPC exposure (pyo3 wrappers, JSON-RPC bridges, etc.).
370    ///
371    /// The shape is stable across patch releases: fields can be added
372    /// non-breaking, but key renames or removals are breaking changes.
373    /// When adding a new field to `Manifest`, extend this method too —
374    /// the `to_json_shape_is_stable` test will fail until you do.
375    /// The `extensions` map is passed through unchanged; downstream
376    /// consumers parse their own namespace from it.
377    pub fn to_json(&self) -> serde_json::Value {
378        serde_json::json!({
379            "yaml_path": self.yaml_path.display().to_string(),
380            "name": self.name,
381            "instructions": self.instructions,
382            "overview_prefix": self.overview_prefix,
383            "source_roots": self.source_roots,
384            "trust": {
385                "allow_python_tools": self.trust.allow_python_tools,
386                "allow_embedder": self.trust.allow_embedder,
387            },
388            "tools": self.tools.iter().map(|t| match t {
389                ToolSpec::Cypher(c) => serde_json::json!({
390                    "kind": "cypher",
391                    "name": c.name,
392                    "cypher": c.cypher,
393                    "description": c.description,
394                    "parameters": c.parameters,
395                }),
396                ToolSpec::Python(p) => serde_json::json!({
397                    "kind": "python",
398                    "name": p.name,
399                    "python": p.python,
400                    "function": p.function,
401                    "description": p.description,
402                    "parameters": p.parameters,
403                }),
404                ToolSpec::Bundled(b) => serde_json::json!({
405                    "kind": "bundled",
406                    "name": b.name,
407                    "description": b.description,
408                    "hidden": b.hidden,
409                    "rename": b.rename,
410                }),
411            }).collect::<Vec<_>>(),
412            "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
413                "module": e.module,
414                "class": e.class,
415                "kwargs": e.kwargs,
416            })),
417            "builtins": {
418                "save_graph": self.builtins.save_graph,
419                "temp_cleanup": self.builtins.temp_cleanup.as_str(),
420            },
421            "env_file": self.env_file,
422            "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
423                "kind": w.kind.as_str(),
424                "root": w.root,
425                "watch": w.watch,
426                "applies_to": w.applies_to.as_ref().map(|a| match a {
427                    AppliesTo::Pattern(p) => serde_json::Value::String(p.clone()),
428                    AppliesTo::Patterns(ps) => serde_json::Value::Array(
429                        ps.iter().map(|p| serde_json::Value::String(p.clone())).collect()
430                    ),
431                }),
432            })),
433            "extensions": self.extensions,
434            "skills": self.skills_to_json(),
435        })
436    }
437
438    /// JSON shape for the parsed `skills:` field. Emits the operator-
439    /// declared shape unchanged (modulo normalisation), suitable for
440    /// downstream pyo3 wrappers that need to introspect what the
441    /// manifest declared without re-running the parser.
442    ///
443    /// Phase 1a (this file) emits the raw declaration only. Phase 1b
444    /// adds a separate accessor on the resolved registry that exposes
445    /// the *post-resolution* skill list with provenance — that's the
446    /// per-skill `{path, origin, frontmatter}` shape kglite asked for
447    /// in their feedback. The two surfaces are intentionally
448    /// distinct: this method describes the manifest, the
449    /// registry method describes the runtime resolution.
450    fn skills_to_json(&self) -> serde_json::Value {
451        match &self.skills {
452            SkillsSource::Disabled => serde_json::Value::Bool(false),
453            SkillsSource::Sources(sources) => {
454                let arr: Vec<serde_json::Value> = sources
455                    .iter()
456                    .map(|s| match s {
457                        SkillSource::Bundled => serde_json::Value::Bool(true),
458                        SkillSource::Path(p) => serde_json::Value::String(p.clone()),
459                    })
460                    .collect();
461                serde_json::Value::Array(arr)
462            }
463        }
464    }
465}
466
467/// Auto-detect ``<basename>_mcp.yaml`` next to a graph file.
468pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
469    let stem = graph_path.file_stem()?;
470    let parent = graph_path.parent()?;
471    let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
472    if candidate.is_file() {
473        Some(candidate)
474    } else {
475        None
476    }
477}
478
479/// Auto-detect ``workspace_mcp.yaml`` for a workspace directory.
480///
481/// Checks two locations in strict priority order:
482///
483/// 1. **Primary** — ``<workspace_dir>/workspace_mcp.yaml``. The
484///    documented and recommended location. If this exists, it is
485///    returned unconditionally; the parent-walk fallback is NOT
486///    consulted even if a parent manifest also exists. No opt-in
487///    declaration required — the manifest sitting inside the
488///    workspace dir is itself the operator's intent.
489/// 2. **Parent-walk fallback** —
490///    ``<workspace_dir>/../workspace_mcp.yaml``. Triggered only when
491///    the primary is absent AND the parent manifest *declares* it
492///    applies to this specific workspace dir via the
493///    ``workspace.applies_to:`` field:
494///
495///    ```yaml
496///    # open_source/workspace_mcp.yaml
497///    workspace:
498///      kind: github
499///      applies_to: ./repos     # required for parent-walk discovery
500///    ```
501///
502///    The framework loads the parent manifest, canonicalises
503///    ``manifest.workspace.applies_to`` against the manifest's parent
504///    directory, and compares it to the actual ``workspace_dir``.
505///    Match → manifest is returned. No declaration or path mismatch
506///    → discovery returns ``None`` (operator must pass
507///    ``--mcp-config`` explicitly).
508///
509///    The natural layout for github-clone-tracker workspaces is:
510///
511///    ```text
512///    open_source/
513///    ├── workspace_mcp.yaml     # config sits beside the sandbox; declares
514///    │                          # workspace.applies_to: ./repos
515///    └── repos/                 # --workspace points here
516///    ```
517///
518///    The ``applies_to`` opt-in eliminates the accidental-discovery
519///    footgun where a manifest in a project root would auto-attach to
520///    any unrelated sibling dir. Operators who didn't author the
521///    manifest get the safe default (no auto-detection); operators
522///    who did get the ergonomic UX (no ``--mcp-config`` boilerplate).
523///
524/// Bounded to one level up; will not walk past the filesystem root.
525/// Symlink-safe via canonicalisation. Added per kglite operator
526/// feedback after the 0.6.x → 0.9.x migration audit.
527pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
528    let primary = workspace_dir.join("workspace_mcp.yaml");
529    if primary.is_file() {
530        return Some(primary);
531    }
532    // Parent-walk fallback. Compare against canonicalised paths to
533    // handle "/" (where parent == self) and symlinks consistently.
534    let parent = workspace_dir.parent()?;
535    let workspace_resolved = workspace_dir.canonicalize().ok()?;
536    let parent_resolved = parent.canonicalize().ok()?;
537    if parent_resolved == workspace_resolved {
538        // No real parent (filesystem root).
539        return None;
540    }
541    let fallback = parent.join("workspace_mcp.yaml");
542    if !fallback.is_file() {
543        return None;
544    }
545
546    // The fallback manifest must declare workspace.applies_to and
547    // that declaration must canonicalise to the actual workspace_dir.
548    // Otherwise the discovery is unsafe (could be accidental).
549    let manifest = match load(&fallback) {
550        Ok(m) => m,
551        Err(e) => {
552            tracing::warn!(
553                manifest = %fallback.display(),
554                error = %e,
555                "parent-walk manifest exists but failed to parse; ignoring"
556            );
557            return None;
558        }
559    };
560    let declared = manifest
561        .workspace
562        .as_ref()
563        .and_then(|w| w.applies_to.as_ref());
564    let Some(declared_applies_to) = declared else {
565        tracing::info!(
566            manifest = %fallback.display(),
567            "parent-walk manifest does not declare workspace.applies_to; \
568             ignoring (set workspace.applies_to: <pattern> to opt in)"
569        );
570        return None;
571    };
572    // Match the workspace dir's basename against the declared pattern(s).
573    // The parent-walk guarantee (workspace_dir.parent() == manifest_dir)
574    // is already established above — only the basename match is left.
575    let Some(basename) = workspace_resolved.file_name().and_then(|n| n.to_str()) else {
576        return None; // path with no usable basename, defensive
577    };
578    let patterns: Vec<&str> = match declared_applies_to {
579        AppliesTo::Pattern(p) => vec![p.as_str()],
580        AppliesTo::Patterns(ps) => ps.iter().map(String::as_str).collect(),
581    };
582    let matched = patterns.iter().any(|pat| {
583        match globset::Glob::new(pat) {
584            Ok(g) => g.compile_matcher().is_match(basename),
585            Err(_) => {
586                // Should not happen — patterns were validated at parse
587                // time. Defensive: treat as non-match.
588                false
589            }
590        }
591    });
592    if matched {
593        tracing::info!(
594            workspace_dir = %workspace_dir.display(),
595            manifest = %fallback.display(),
596            "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
597        );
598        Some(fallback)
599    } else {
600        tracing::info!(
601            workspace_dir = %workspace_resolved.display(),
602            manifest = %fallback.display(),
603            basename = %basename,
604            patterns = ?patterns,
605            "parent-walk manifest's workspace.applies_to does not match \
606             this workspace_dir's basename; ignoring"
607        );
608        None
609    }
610}
611
612/// Parse and validate a manifest YAML file.
613pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
614    let text = fs::read_to_string(yaml_path)
615        .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
616    let raw: serde_yaml::Value = serde_yaml::from_str(&text)
617        .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
618    let raw = match raw {
619        serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
620        v => v,
621    };
622    let map = raw
623        .as_mapping()
624        .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
625    build(map, yaml_path)
626}
627
628fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
629    check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
630
631    if raw.contains_key("source_root") && raw.contains_key("source_roots") {
632        return Err(ManifestError::at(
633            yaml_path,
634            "specify either source_root (str) or source_roots (list), not both",
635        ));
636    }
637
638    let mut source_roots: Vec<String> = Vec::new();
639    if let Some(v) = raw.get("source_root") {
640        let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
641            ManifestError::at(yaml_path, "source_root must be a non-empty string")
642        })?;
643        source_roots.push(s.to_string());
644    } else if let Some(v) = raw.get("source_roots") {
645        let seq = v.as_sequence().ok_or_else(|| {
646            ManifestError::at(
647                yaml_path,
648                "source_roots must be a list of non-empty strings",
649            )
650        })?;
651        if seq.is_empty() {
652            return Err(ManifestError::at(
653                yaml_path,
654                "source_roots must be non-empty when set",
655            ));
656        }
657        for item in seq {
658            let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
659                ManifestError::at(
660                    yaml_path,
661                    "source_roots must be a list of non-empty strings",
662                )
663            })?;
664            source_roots.push(s.to_string());
665        }
666    }
667
668    let trust = build_trust(raw.get("trust"), yaml_path)?;
669    let tools = build_tools(raw.get("tools"), yaml_path)?;
670    let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
671    let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
672    let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
673    let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
674    let skills = build_skills(raw.get("skills"), yaml_path)?;
675
676    Ok(Manifest {
677        yaml_path: yaml_path.to_path_buf(),
678        name: optional_str(raw, "name", yaml_path)?,
679        instructions: optional_str(raw, "instructions", yaml_path)?,
680        overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
681        source_roots,
682        trust,
683        tools,
684        embedder,
685        builtins,
686        env_file: optional_str(raw, "env_file", yaml_path)?,
687        workspace,
688        extensions,
689        skills,
690    })
691}
692
693/// Parse the polymorphic `skills:` field. Accepts:
694///
695/// - **Absent or `false`** → [`SkillsSource::Disabled`]. Pure-current
696///   MCP behavior. This is the default and what existing deployments
697///   resolve to without any YAML change.
698/// - **`skills: true`** → single bundled source. Sugar for
699///   `skills: [true]`.
700/// - **`skills: <path-string>`** → single path source. Sugar for
701///   `skills: [<path>]`.
702/// - **`skills: [bool, string, ...]`** → ordered list. Booleans MUST
703///   be `true` (the bundled marker); `false` is rejected at parse
704///   time as nonsense in list context. Each path is stored verbatim
705///   as the operator wrote it; resolution against the manifest's
706///   parent dir happens at registry-build time, not here.
707///
708/// Empty lists are accepted and parsed as `SkillsSource::Sources(vec![])`;
709/// the registry treats them as "skills opted in but no root layer,"
710/// meaning the project-local `<basename>.skills/` auto-detection
711/// still fires while the bundled + custom-path layers stay empty.
712/// Useful for operators who want to rely solely on adjacent project
713/// skills.
714fn build_skills(
715    raw: Option<&serde_yaml::Value>,
716    yaml_path: &Path,
717) -> Result<SkillsSource, ManifestError> {
718    use serde_yaml::Value;
719
720    match raw {
721        None | Some(Value::Null) | Some(Value::Bool(false)) => Ok(SkillsSource::Disabled),
722        Some(Value::Bool(true)) => Ok(SkillsSource::Sources(vec![SkillSource::Bundled])),
723        Some(Value::String(s)) => {
724            if s.is_empty() {
725                return Err(ManifestError::at(
726                    yaml_path,
727                    "skills: path must be a non-empty string",
728                ));
729            }
730            Ok(SkillsSource::Sources(vec![SkillSource::Path(s.clone())]))
731        }
732        Some(Value::Sequence(seq)) => {
733            let mut sources = Vec::with_capacity(seq.len());
734            for (idx, item) in seq.iter().enumerate() {
735                match item {
736                    Value::Bool(true) => sources.push(SkillSource::Bundled),
737                    Value::Bool(false) => {
738                        return Err(ManifestError::at(
739                            yaml_path,
740                            format!(
741                                "skills[{idx}]: `false` is not a valid entry in a `skills:` \
742                                 list (only `true` for bundled, or a path string)"
743                            ),
744                        ));
745                    }
746                    Value::String(s) => {
747                        if s.is_empty() {
748                            return Err(ManifestError::at(
749                                yaml_path,
750                                format!("skills[{idx}]: path must be a non-empty string"),
751                            ));
752                        }
753                        sources.push(SkillSource::Path(s.clone()));
754                    }
755                    _ => {
756                        return Err(ManifestError::at(
757                            yaml_path,
758                            format!(
759                                "skills[{idx}]: each entry must be `true` (for bundled) or a \
760                                 path string"
761                            ),
762                        ));
763                    }
764                }
765            }
766            Ok(SkillsSource::Sources(sources))
767        }
768        Some(_) => Err(ManifestError::at(
769            yaml_path,
770            "skills must be `false`, `true`, a path string, or a list of \
771             (true | path string) entries",
772        )),
773    }
774}
775
776fn build_extensions(
777    raw: Option<&serde_yaml::Value>,
778    yaml_path: &Path,
779) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
780    let Some(raw) = raw else {
781        return Ok(serde_json::Map::new());
782    };
783    if matches!(raw, serde_yaml::Value::Null) {
784        return Ok(serde_json::Map::new());
785    }
786    if !raw.is_mapping() {
787        return Err(ManifestError::at(
788            yaml_path,
789            "extensions must be a mapping (downstream-binary-specific keys)",
790        ));
791    }
792    match yaml_to_json(raw.clone())? {
793        serde_json::Value::Object(o) => Ok(o),
794        _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
795    }
796}
797
798fn build_workspace(
799    raw: Option<&serde_yaml::Value>,
800    yaml_path: &Path,
801) -> Result<Option<WorkspaceConfig>, ManifestError> {
802    let Some(raw) = raw else { return Ok(None) };
803    if matches!(raw, serde_yaml::Value::Null) {
804        return Ok(None);
805    }
806    let map = raw
807        .as_mapping()
808        .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
809    check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
810    let kind = match map.get("kind") {
811        None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
812        Some(serde_yaml::Value::String(s)) => match s.as_str() {
813            "github" => WorkspaceKind::Github,
814            "local" => WorkspaceKind::Local,
815            other => {
816                return Err(ManifestError::at(
817                    yaml_path,
818                    format!(
819                        "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
820                    ),
821                ));
822            }
823        },
824        Some(_) => {
825            return Err(ManifestError::at(
826                yaml_path,
827                format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
828            ))
829        }
830    };
831    let root = match map.get("root") {
832        None | Some(serde_yaml::Value::Null) => None,
833        Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
834        _ => {
835            return Err(ManifestError::at(
836                yaml_path,
837                "workspace.root must be a non-empty string",
838            ))
839        }
840    };
841    let watch = match map.get("watch") {
842        None | Some(serde_yaml::Value::Null) => false,
843        Some(serde_yaml::Value::Bool(b)) => *b,
844        Some(_) => {
845            return Err(ManifestError::at(
846                yaml_path,
847                "workspace.watch must be a bool",
848            ))
849        }
850    };
851    let applies_to =
852        match map.get("applies_to") {
853            None | Some(serde_yaml::Value::Null) => None,
854            Some(serde_yaml::Value::String(s)) => {
855                Some(AppliesTo::Pattern(parse_applies_to_pattern(s, yaml_path)?))
856            }
857            Some(serde_yaml::Value::Sequence(seq)) => {
858                if seq.is_empty() {
859                    return Err(ManifestError::at(
860                        yaml_path,
861                        "workspace.applies_to: list must contain at least one pattern",
862                    ));
863                }
864                let mut patterns = Vec::with_capacity(seq.len());
865                for (i, item) in seq.iter().enumerate() {
866                    let s = item.as_str().ok_or_else(|| {
867                        ManifestError::at(
868                            yaml_path,
869                            format!("workspace.applies_to[{i}] must be a string"),
870                        )
871                    })?;
872                    let cleaned = parse_applies_to_pattern(s, yaml_path).map_err(|e| {
873                        ManifestError::at(
874                            yaml_path,
875                            format!("workspace.applies_to[{i}]: {}", e.message),
876                        )
877                    })?;
878                    patterns.push(cleaned);
879                }
880                Some(AppliesTo::Patterns(patterns))
881            }
882            _ => return Err(ManifestError::at(
883                yaml_path,
884                "workspace.applies_to must be a non-empty string (a pattern) or a list of patterns",
885            )),
886        };
887    if kind == WorkspaceKind::Local && root.is_none() {
888        return Err(ManifestError::at(
889            yaml_path,
890            "workspace.kind: local requires workspace.root to be set",
891        ));
892    }
893    if kind == WorkspaceKind::Github && watch {
894        return Err(ManifestError::at(
895            yaml_path,
896            "workspace.watch is only valid with workspace.kind: local",
897        ));
898    }
899    Ok(Some(WorkspaceConfig {
900        kind,
901        root,
902        watch,
903        applies_to,
904    }))
905}
906
907/// Parse + validate a single ``workspace.applies_to`` entry. Accepts
908/// any glob pattern matching a single path segment (no embedded
909/// slashes, no `..`). The leading ``./`` is optional and stripped.
910/// Validates glob syntax via `globset::Glob::new` so invalid patterns
911/// surface clear errors at boot.
912///
913/// Returns the cleaned pattern string (without `./` prefix) on
914/// success.
915fn parse_applies_to_pattern(raw: &str, yaml_path: &Path) -> Result<String, ManifestError> {
916    let trimmed = raw.trim();
917    if trimmed.is_empty() {
918        return Err(ManifestError::at(
919            yaml_path,
920            "workspace.applies_to: pattern must not be empty",
921        ));
922    }
923    // Strip a single leading `./` for ergonomic equivalence between
924    // `./repos` and `repos`. Both forms commonly appear in operator
925    // muscle memory; normalise so storage + glob matching is uniform.
926    let stripped = trimmed.strip_prefix("./").unwrap_or(trimmed);
927    if stripped.is_empty() {
928        return Err(ManifestError::at(
929            yaml_path,
930            "workspace.applies_to: pattern must not be empty after stripping `./` prefix",
931        ));
932    }
933    if stripped.contains('/') {
934        return Err(ManifestError::at(
935            yaml_path,
936            format!(
937                "workspace.applies_to: pattern {raw:?} must be a single path segment \
938                 (no embedded `/`) — parent-walk discovery is bounded to one level"
939            ),
940        ));
941    }
942    if stripped == ".." || stripped.starts_with("../") {
943        return Err(ManifestError::at(
944            yaml_path,
945            format!("workspace.applies_to: pattern {raw:?} must not contain `..`"),
946        ));
947    }
948    if Path::new(stripped).is_absolute() {
949        return Err(ManifestError::at(
950            yaml_path,
951            format!("workspace.applies_to: pattern {raw:?} must be relative, not absolute"),
952        ));
953    }
954    // Validate glob syntax. Construct a Glob to surface any syntax
955    // errors immediately — we don't keep the compiled form (cheap to
956    // re-compile at match time, keeps `WorkspaceConfig` Clone-cheap).
957    globset::Glob::new(stripped).map_err(|e| {
958        ManifestError::at(
959            yaml_path,
960            format!("workspace.applies_to: invalid glob pattern {raw:?}: {e}"),
961        )
962    })?;
963    Ok(stripped.to_string())
964}
965
966fn check_keys(
967    map: &serde_yaml::Mapping,
968    allowed: &[&str],
969    label: &str,
970    yaml_path: &Path,
971) -> Result<(), ManifestError> {
972    let mut unknown: Vec<String> = Vec::new();
973    for (k, _) in map {
974        let key = k.as_str().unwrap_or("<non-string-key>");
975        if !allowed.contains(&key) {
976            unknown.push(key.to_string());
977        }
978    }
979    if !unknown.is_empty() {
980        unknown.sort();
981        return Err(ManifestError::at(
982            yaml_path,
983            format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
984        ));
985    }
986    Ok(())
987}
988
989fn optional_str(
990    raw: &serde_yaml::Mapping,
991    key: &str,
992    yaml_path: &Path,
993) -> Result<Option<String>, ManifestError> {
994    match raw.get(key) {
995        None | Some(serde_yaml::Value::Null) => Ok(None),
996        Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
997        Some(_) => Err(ManifestError::at(
998            yaml_path,
999            format!("{key} must be a string"),
1000        )),
1001    }
1002}
1003
1004fn build_trust(
1005    raw: Option<&serde_yaml::Value>,
1006    yaml_path: &Path,
1007) -> Result<TrustConfig, ManifestError> {
1008    let Some(raw) = raw else {
1009        return Ok(TrustConfig::default());
1010    };
1011    let map = raw
1012        .as_mapping()
1013        .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
1014    check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
1015    let mut cfg = TrustConfig::default();
1016    if let Some(v) = map.get("allow_python_tools") {
1017        cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
1018            ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
1019        })?;
1020    }
1021    if let Some(v) = map.get("allow_embedder") {
1022        cfg.allow_embedder = v
1023            .as_bool()
1024            .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
1025    }
1026    Ok(cfg)
1027}
1028
1029fn build_tools(
1030    raw: Option<&serde_yaml::Value>,
1031    yaml_path: &Path,
1032) -> Result<Vec<ToolSpec>, ManifestError> {
1033    let Some(raw) = raw else {
1034        return Ok(Vec::new());
1035    };
1036    let seq = raw
1037        .as_sequence()
1038        .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
1039    let mut tools: Vec<ToolSpec> = Vec::new();
1040    let mut seen: BTreeMap<String, ()> = BTreeMap::new();
1041    for (i, entry) in seq.iter().enumerate() {
1042        let tool = build_tool(entry, i, yaml_path)?;
1043        let name = tool.name().to_string();
1044        if seen.insert(name.clone(), ()).is_some() {
1045            return Err(ManifestError::at(
1046                yaml_path,
1047                format!("duplicate tool name: {name:?}"),
1048            ));
1049        }
1050        tools.push(tool);
1051    }
1052    Ok(tools)
1053}
1054
1055fn build_tool(
1056    entry: &serde_yaml::Value,
1057    idx: usize,
1058    yaml_path: &Path,
1059) -> Result<ToolSpec, ManifestError> {
1060    let map = entry
1061        .as_mapping()
1062        .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
1063    check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
1064
1065    // Kind detection. `cypher` and `python` are tool-creation kinds
1066    // (operator declares a new named tool); `bundled` is a tool-
1067    // override kind (operator picks a bundled tool name and customises
1068    // its agent-facing surface). Exactly one must be present.
1069    let has_cypher = map.contains_key("cypher");
1070    let has_python = map.contains_key("python");
1071    let has_bundled = map.contains_key("bundled");
1072    let kinds_present: Vec<&str> = [
1073        ("cypher", has_cypher),
1074        ("python", has_python),
1075        ("bundled", has_bundled),
1076    ]
1077    .into_iter()
1078    .filter(|(_, p)| *p)
1079    .map(|(k, _)| k)
1080    .collect();
1081    if kinds_present.is_empty() {
1082        return Err(ManifestError::at(
1083            yaml_path,
1084            format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
1085        ));
1086    }
1087    if kinds_present.len() > 1 {
1088        return Err(ManifestError::at(
1089            yaml_path,
1090            format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
1091        ));
1092    }
1093
1094    // The `bundled` kind takes its name from the `bundled:` value
1095    // itself (e.g. `bundled: cypher_query`) and forbids the
1096    // tool-creation fields. Branch early so we don't run the
1097    // tool-creation `name:` requirement against an override entry.
1098    if has_bundled {
1099        return build_bundled_override(map, idx, yaml_path);
1100    }
1101
1102    let name = map
1103        .get("name")
1104        .and_then(|v| v.as_str())
1105        .filter(|s| valid_identifier(s))
1106        .ok_or_else(|| {
1107            ManifestError::at(
1108                yaml_path,
1109                format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
1110            )
1111        })?
1112        .to_string();
1113
1114    // `hidden:` is only valid on bundled overrides (`hidden:`-flagging
1115    // a tool you're declaring inline doesn't make sense — just don't
1116    // declare it). Reject early so the operator gets a clear error.
1117    if map.contains_key("hidden") {
1118        return Err(ManifestError::at(
1119            yaml_path,
1120            format!(
1121                "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
1122            ),
1123        ));
1124    }
1125
1126    let description = match map.get("description") {
1127        None | Some(serde_yaml::Value::Null) => None,
1128        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1129        Some(_) => {
1130            return Err(ManifestError::at(
1131                yaml_path,
1132                format!("tools[{idx}] ({name:?}).description must be a string"),
1133            ))
1134        }
1135    };
1136
1137    let parameters = match map.get("parameters") {
1138        None | Some(serde_yaml::Value::Null) => None,
1139        Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
1140        Some(_) => {
1141            return Err(ManifestError::at(
1142                yaml_path,
1143                format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
1144            ))
1145        }
1146    };
1147
1148    if has_cypher {
1149        let cypher = map
1150            .get("cypher")
1151            .and_then(|v| v.as_str())
1152            .filter(|s| !s.trim().is_empty())
1153            .ok_or_else(|| {
1154                ManifestError::at(
1155                    yaml_path,
1156                    format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
1157                )
1158            })?
1159            .to_string();
1160        return Ok(ToolSpec::Cypher(CypherTool {
1161            name,
1162            cypher,
1163            description,
1164            parameters,
1165        }));
1166    }
1167
1168    // python tool
1169    let python = map
1170        .get("python")
1171        .and_then(|v| v.as_str())
1172        .filter(|s| !s.is_empty())
1173        .ok_or_else(|| {
1174            ManifestError::at(
1175                yaml_path,
1176                format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
1177            )
1178        })?
1179        .to_string();
1180    let function = map
1181        .get("function")
1182        .and_then(|v| v.as_str())
1183        .filter(|s| valid_identifier(s))
1184        .ok_or_else(|| {
1185            ManifestError::at(
1186                yaml_path,
1187                format!(
1188                    "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
1189                ),
1190            )
1191        })?
1192        .to_string();
1193    Ok(ToolSpec::Python(PythonTool {
1194        name,
1195        python,
1196        function,
1197        description,
1198        parameters,
1199    }))
1200}
1201
1202/// Parse a `bundled:` override entry from `tools[idx]`. The caller
1203/// (`build_tool`) has already established that the entry has
1204/// `bundled:` set as the kind discriminator.
1205fn build_bundled_override(
1206    map: &serde_yaml::Mapping,
1207    idx: usize,
1208    yaml_path: &Path,
1209) -> Result<ToolSpec, ManifestError> {
1210    let name = map
1211        .get("bundled")
1212        .and_then(|v| v.as_str())
1213        .filter(|s| valid_identifier(s))
1214        .ok_or_else(|| {
1215            ManifestError::at(
1216                yaml_path,
1217                format!(
1218                    "tools[{idx}] `bundled:` must be a string naming a bundled tool \
1219                     (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
1220                ),
1221            )
1222        })?
1223        .to_string();
1224
1225    // Tool-creation fields are forbidden on override entries — the
1226    // override only customises an existing bundled tool's surface,
1227    // it doesn't declare a new tool. Catch these at parse time so
1228    // operators get a clear error rather than silent confusion.
1229    for forbidden in ["name", "parameters", "function"] {
1230        if map.contains_key(forbidden) {
1231            return Err(ManifestError::at(
1232                yaml_path,
1233                format!(
1234                    "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
1235                     (only `description:`, `hidden:`, and `rename:` are permitted on overrides)"
1236                ),
1237            ));
1238        }
1239    }
1240
1241    let description = match map.get("description") {
1242        None | Some(serde_yaml::Value::Null) => None,
1243        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1244        Some(_) => {
1245            return Err(ManifestError::at(
1246                yaml_path,
1247                format!("tools[{idx}] bundled override {name:?}.description must be a string"),
1248            ))
1249        }
1250    };
1251
1252    let hidden = match map.get("hidden") {
1253        None | Some(serde_yaml::Value::Null) => false,
1254        Some(serde_yaml::Value::Bool(b)) => *b,
1255        Some(_) => {
1256            return Err(ManifestError::at(
1257                yaml_path,
1258                format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
1259            ))
1260        }
1261    };
1262
1263    // 0.3.34: optional per-deployment rename. Validated as an
1264    // identifier here; cross-tool collision check is the consumer's
1265    // job (it knows what other names — bundled, cypher, python — it
1266    // has in scope).
1267    let rename = match map.get("rename") {
1268        None | Some(serde_yaml::Value::Null) => None,
1269        Some(serde_yaml::Value::String(s)) => {
1270            if !valid_identifier(s) {
1271                return Err(ManifestError::at(
1272                    yaml_path,
1273                    format!(
1274                        "tools[{idx}] bundled override {name:?}.rename must be a valid identifier \
1275                         (^[a-zA-Z_][a-zA-Z0-9_]*$), got {s:?}"
1276                    ),
1277                ));
1278            }
1279            Some(s.clone())
1280        }
1281        Some(_) => {
1282            return Err(ManifestError::at(
1283                yaml_path,
1284                format!("tools[{idx}] bundled override {name:?}.rename must be a string"),
1285            ))
1286        }
1287    };
1288
1289    Ok(ToolSpec::Bundled(BundledOverride {
1290        name,
1291        description,
1292        hidden,
1293        rename,
1294    }))
1295}
1296
1297fn build_embedder(
1298    raw: Option<&serde_yaml::Value>,
1299    yaml_path: &Path,
1300) -> Result<Option<EmbedderConfig>, ManifestError> {
1301    let Some(raw) = raw else { return Ok(None) };
1302    if matches!(raw, serde_yaml::Value::Null) {
1303        return Ok(None);
1304    }
1305    let map = raw
1306        .as_mapping()
1307        .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
1308    check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
1309    let module = map
1310        .get("module")
1311        .and_then(|v| v.as_str())
1312        .filter(|s| !s.is_empty())
1313        .ok_or_else(|| {
1314            ManifestError::at(
1315                yaml_path,
1316                "embedder.module must be a non-empty string (path or dotted name)",
1317            )
1318        })?
1319        .to_string();
1320    let class = map
1321        .get("class")
1322        .and_then(|v| v.as_str())
1323        .filter(|s| valid_identifier(s))
1324        .ok_or_else(|| {
1325            ManifestError::at(
1326                yaml_path,
1327                "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1328            )
1329        })?
1330        .to_string();
1331    let kwargs = match map.get("kwargs") {
1332        None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1333        Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1334            serde_json::Value::Object(o) => o,
1335            _ => {
1336                return Err(ManifestError::at(
1337                    yaml_path,
1338                    "embedder.kwargs must be a mapping",
1339                ))
1340            }
1341        },
1342        Some(_) => {
1343            return Err(ManifestError::at(
1344                yaml_path,
1345                "embedder.kwargs must be a mapping",
1346            ))
1347        }
1348    };
1349    Ok(Some(EmbedderConfig {
1350        module,
1351        class,
1352        kwargs,
1353    }))
1354}
1355
1356fn build_builtins(
1357    raw: Option<&serde_yaml::Value>,
1358    yaml_path: &Path,
1359) -> Result<BuiltinsConfig, ManifestError> {
1360    let Some(raw) = raw else {
1361        return Ok(BuiltinsConfig::default());
1362    };
1363    if matches!(raw, serde_yaml::Value::Null) {
1364        return Ok(BuiltinsConfig::default());
1365    }
1366    let map = raw
1367        .as_mapping()
1368        .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1369    check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1370    let mut cfg = BuiltinsConfig::default();
1371    if let Some(v) = map.get("save_graph") {
1372        cfg.save_graph = v
1373            .as_bool()
1374            .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1375    }
1376    if let Some(v) = map.get("temp_cleanup") {
1377        let s = v.as_str().ok_or_else(|| {
1378            ManifestError::at(
1379                yaml_path,
1380                format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1381            )
1382        })?;
1383        cfg.temp_cleanup = match s {
1384            "never" => TempCleanup::Never,
1385            "on_overview" => TempCleanup::OnOverview,
1386            other => {
1387                return Err(ManifestError::at(
1388                    yaml_path,
1389                    format!(
1390                        "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1391                    ),
1392                ))
1393            }
1394        };
1395    }
1396    Ok(cfg)
1397}
1398
1399fn valid_identifier(s: &str) -> bool {
1400    let mut chars = s.chars();
1401    match chars.next() {
1402        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1403        _ => return false,
1404    }
1405    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1406}
1407
1408fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1409    serde_json::to_value(&v)
1410        .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1411}
1412
1413#[derive(Debug, Deserialize)]
1414struct _Reserved;
1415
1416#[cfg(test)]
1417mod tests {
1418    use super::*;
1419
1420    fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1421        let mut f = tempfile::NamedTempFile::new().unwrap();
1422        std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1423        f
1424    }
1425
1426    #[test]
1427    fn loads_minimal_empty_manifest() {
1428        let f = write_tmp("");
1429        let m = load(f.path()).unwrap();
1430        assert_eq!(m.tools.len(), 0);
1431        assert_eq!(m.source_roots.len(), 0);
1432        assert!(!m.trust.allow_python_tools);
1433        assert!(!m.trust.allow_embedder);
1434        assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1435    }
1436
1437    #[test]
1438    fn loads_name_and_instructions() {
1439        let f = write_tmp("name: Demo\ninstructions: |\n  multi-line\n  block\n");
1440        let m = load(f.path()).unwrap();
1441        assert_eq!(m.name.as_deref(), Some("Demo"));
1442        assert!(m.instructions.unwrap().contains("multi-line"));
1443    }
1444
1445    #[test]
1446    fn rejects_unknown_top_key() {
1447        let f = write_tmp("bogus: 1\n");
1448        let err = load(f.path()).unwrap_err();
1449        assert!(err.message.contains("unknown top-level"));
1450    }
1451
1452    #[test]
1453    fn source_root_string_normalises_to_list() {
1454        let f = write_tmp("source_root: ./data\n");
1455        let m = load(f.path()).unwrap();
1456        assert_eq!(m.source_roots, vec!["./data".to_string()]);
1457    }
1458
1459    #[test]
1460    fn source_roots_list_preserved() {
1461        let f = write_tmp("source_roots:\n  - ./a\n  - ./b\n");
1462        let m = load(f.path()).unwrap();
1463        assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1464    }
1465
1466    #[test]
1467    fn rejects_both_source_root_and_source_roots() {
1468        let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1469        assert!(load(f.path()).unwrap_err().message.contains("not both"));
1470    }
1471
1472    #[test]
1473    fn cypher_tool_parses() {
1474        let f = write_tmp("tools:\n  - name: lookup\n    cypher: MATCH (n) RETURN n\n");
1475        let m = load(f.path()).unwrap();
1476        assert_eq!(m.tools.len(), 1);
1477        match &m.tools[0] {
1478            ToolSpec::Cypher(t) => {
1479                assert_eq!(t.name, "lookup");
1480                assert!(t.cypher.contains("MATCH"));
1481            }
1482            _ => panic!("expected cypher tool"),
1483        }
1484    }
1485
1486    #[test]
1487    fn python_tool_parses() {
1488        let f =
1489            write_tmp("tools:\n  - name: detail\n    python: ./tools.py\n    function: detail\n");
1490        let m = load(f.path()).unwrap();
1491        match &m.tools[0] {
1492            ToolSpec::Python(t) => {
1493                assert_eq!(t.python, "./tools.py");
1494                assert_eq!(t.function, "detail");
1495            }
1496            _ => panic!("expected python tool"),
1497        }
1498    }
1499
1500    #[test]
1501    fn rejects_tool_with_both_kinds() {
1502        let f = write_tmp(
1503            "tools:\n  - name: x\n    cypher: 'MATCH (n) RETURN n'\n    python: ./t.py\n    function: x\n",
1504        );
1505        assert!(load(f.path())
1506            .unwrap_err()
1507            .message
1508            .contains("multiple kinds"));
1509    }
1510
1511    #[test]
1512    fn rejects_tool_with_no_kind() {
1513        let f = write_tmp("tools:\n  - name: x\n");
1514        assert!(load(f.path())
1515            .unwrap_err()
1516            .message
1517            .contains("needs exactly one"));
1518    }
1519
1520    #[test]
1521    fn rejects_duplicate_tool_names() {
1522        let f = write_tmp(
1523            "tools:\n  - name: same\n    cypher: 'MATCH (n) RETURN n'\n  - name: same\n    cypher: 'MATCH (m) RETURN m'\n",
1524        );
1525        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1526    }
1527
1528    // ─── Bundled override shape (0.3.31) ────────────────────────
1529
1530    #[test]
1531    fn bundled_override_with_description_parses() {
1532        let f =
1533            write_tmp("tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n");
1534        let m = load(f.path()).unwrap();
1535        assert_eq!(m.tools.len(), 1);
1536        match &m.tools[0] {
1537            ToolSpec::Bundled(b) => {
1538                assert_eq!(b.name, "repo_management");
1539                assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1540                assert!(!b.hidden);
1541            }
1542            _ => panic!("expected bundled override"),
1543        }
1544    }
1545
1546    #[test]
1547    fn bundled_override_with_hidden_parses() {
1548        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: true\n");
1549        let m = load(f.path()).unwrap();
1550        match &m.tools[0] {
1551            ToolSpec::Bundled(b) => {
1552                assert_eq!(b.name, "ping");
1553                assert!(b.hidden);
1554                assert!(b.description.is_none());
1555            }
1556            _ => panic!("expected bundled override"),
1557        }
1558    }
1559
1560    #[test]
1561    fn bundled_override_alongside_cypher_tools_parses() {
1562        let f = write_tmp(
1563            "tools:\n\
1564             \x20\x20- bundled: cypher_query\n\
1565             \x20\x20\x20\x20description: \"Custom server description\"\n\
1566             \x20\x20- name: lookup\n\
1567             \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1568        );
1569        let m = load(f.path()).unwrap();
1570        assert_eq!(m.tools.len(), 2);
1571        assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1572        assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1573    }
1574
1575    #[test]
1576    fn rejects_bundled_with_cypher_kind() {
1577        let f =
1578            write_tmp("tools:\n  - bundled: cypher_query\n    cypher: \"MATCH (n) RETURN n\"\n");
1579        let err = load(f.path()).unwrap_err();
1580        assert!(
1581            err.message.contains("multiple kinds"),
1582            "got: {}",
1583            err.message
1584        );
1585    }
1586
1587    #[test]
1588    fn rejects_bundled_with_name_field() {
1589        let f = write_tmp("tools:\n  - bundled: ping\n    name: ping\n");
1590        let err = load(f.path()).unwrap_err();
1591        assert!(
1592            err.message.contains("cannot set `name:`"),
1593            "got: {}",
1594            err.message
1595        );
1596    }
1597
1598    #[test]
1599    fn rejects_bundled_with_parameters_field() {
1600        let f =
1601            write_tmp("tools:\n  - bundled: cypher_query\n    parameters:\n      type: object\n");
1602        let err = load(f.path()).unwrap_err();
1603        assert!(
1604            err.message.contains("cannot set `parameters:`"),
1605            "got: {}",
1606            err.message
1607        );
1608    }
1609
1610    #[test]
1611    fn rejects_bundled_with_non_bool_hidden() {
1612        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: yes-please\n");
1613        let err = load(f.path()).unwrap_err();
1614        assert!(
1615            err.message.contains("hidden must be a bool"),
1616            "got: {}",
1617            err.message
1618        );
1619    }
1620
1621    #[test]
1622    fn rejects_hidden_on_cypher_tool() {
1623        let f = write_tmp(
1624            "tools:\n  - name: lookup\n    cypher: \"MATCH (n) RETURN n\"\n    hidden: true\n",
1625        );
1626        let err = load(f.path()).unwrap_err();
1627        assert!(
1628            err.message
1629                .contains("`hidden:` is only valid on `bundled:` override entries"),
1630            "got: {}",
1631            err.message
1632        );
1633    }
1634
1635    #[test]
1636    fn rejects_duplicate_bundled_overrides() {
1637        // The dedup check is on tool name; two `bundled: ping` entries
1638        // share the same name and should be rejected the same way
1639        // duplicate cypher tools are.
1640        let f = write_tmp(
1641            "tools:\n  - bundled: ping\n    hidden: true\n  - bundled: ping\n    description: \"x\"\n",
1642        );
1643        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1644    }
1645
1646    #[test]
1647    fn rejects_bundled_with_invalid_identifier() {
1648        let f = write_tmp("tools:\n  - bundled: \"123-bad\"\n    hidden: true\n");
1649        let err = load(f.path()).unwrap_err();
1650        assert!(
1651            err.message.contains("must be a string"),
1652            "got: {}",
1653            err.message
1654        );
1655    }
1656
1657    // 0.3.34 — `tools[].bundled: rename:` per-deployment override
1658    #[test]
1659    fn bundled_rename_parses_when_valid_identifier() {
1660        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n");
1661        let m = load(f.path()).unwrap();
1662        match &m.tools[0] {
1663            ToolSpec::Bundled(b) => {
1664                assert_eq!(b.name, "cypher_query");
1665                assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1666                assert!(!b.hidden);
1667                assert!(b.description.is_none());
1668            }
1669            _ => panic!("expected bundled override"),
1670        }
1671    }
1672
1673    #[test]
1674    fn bundled_rename_alongside_description_parses() {
1675        let f = write_tmp(
1676            "tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n    description: \"Legal-corpus cypher\"\n",
1677        );
1678        let m = load(f.path()).unwrap();
1679        match &m.tools[0] {
1680            ToolSpec::Bundled(b) => {
1681                assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1682                assert_eq!(b.description.as_deref(), Some("Legal-corpus cypher"));
1683            }
1684            _ => panic!("expected bundled override"),
1685        }
1686    }
1687
1688    #[test]
1689    fn bundled_rename_defaults_to_none() {
1690        let f = write_tmp("tools:\n  - bundled: cypher_query\n    description: \"x\"\n");
1691        let m = load(f.path()).unwrap();
1692        match &m.tools[0] {
1693            ToolSpec::Bundled(b) => assert!(b.rename.is_none()),
1694            _ => panic!("expected bundled override"),
1695        }
1696    }
1697
1698    #[test]
1699    fn rejects_bundled_rename_with_invalid_identifier() {
1700        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: \"123-bad\"\n");
1701        let err = load(f.path()).unwrap_err();
1702        assert!(
1703            err.message.contains("rename must be a valid identifier"),
1704            "got: {}",
1705            err.message
1706        );
1707    }
1708
1709    #[test]
1710    fn rejects_bundled_rename_with_non_string_value() {
1711        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: 42\n");
1712        let err = load(f.path()).unwrap_err();
1713        assert!(
1714            err.message.contains("rename must be a string"),
1715            "got: {}",
1716            err.message
1717        );
1718    }
1719
1720    #[test]
1721    fn bundled_rename_serialises_to_json() {
1722        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n");
1723        let m = load(f.path()).unwrap();
1724        let json = m.to_json();
1725        let tools = json.get("tools").and_then(|t| t.as_array()).unwrap();
1726        let entry = &tools[0];
1727        assert_eq!(entry.get("kind").and_then(|v| v.as_str()), Some("bundled"));
1728        assert_eq!(
1729            entry.get("name").and_then(|v| v.as_str()),
1730            Some("cypher_query")
1731        );
1732        assert_eq!(
1733            entry.get("rename").and_then(|v| v.as_str()),
1734            Some("legal_cypher_query")
1735        );
1736    }
1737
1738    #[test]
1739    fn bundled_override_to_json_shape() {
1740        let f = write_tmp(
1741            "tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n    hidden: false\n",
1742        );
1743        let m = load(f.path()).unwrap();
1744        let v = m.to_json();
1745        assert_eq!(v["tools"][0]["kind"], "bundled");
1746        assert_eq!(v["tools"][0]["name"], "repo_management");
1747        assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1748        assert_eq!(v["tools"][0]["hidden"], false);
1749    }
1750
1751    #[test]
1752    fn embedder_parses() {
1753        let f = write_tmp(
1754            "embedder:\n  module: ./e.py\n  class: GraphEmbedder\n  kwargs:\n    cooldown: 900\n",
1755        );
1756        let m = load(f.path()).unwrap();
1757        let e = m.embedder.unwrap();
1758        assert_eq!(e.module, "./e.py");
1759        assert_eq!(e.class, "GraphEmbedder");
1760        assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1761    }
1762
1763    #[test]
1764    fn builtins_parses_temp_cleanup() {
1765        let f = write_tmp("builtins:\n  save_graph: true\n  temp_cleanup: on_overview\n");
1766        let m = load(f.path()).unwrap();
1767        assert!(m.builtins.save_graph);
1768        assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1769    }
1770
1771    #[test]
1772    fn rejects_invalid_temp_cleanup() {
1773        let f = write_tmp("builtins:\n  temp_cleanup: nuke\n");
1774        assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1775    }
1776
1777    #[test]
1778    fn allow_embedder_trust_parses() {
1779        let f = write_tmp("trust:\n  allow_embedder: true\n");
1780        let m = load(f.path()).unwrap();
1781        assert!(m.trust.allow_embedder);
1782    }
1783
1784    #[test]
1785    fn retired_allow_query_preprocessor_is_rejected_as_unknown() {
1786        // Retired in 0.3.43: the gate's sole consumer (kglite) removed the
1787        // preprocessor extension, so the strict validator now treats the key
1788        // as any other unknown trust key rather than carrying dead surface.
1789        let f = write_tmp("trust:\n  allow_query_preprocessor: true\n");
1790        let err = load(f.path()).unwrap_err();
1791        assert!(err.message.contains("trust keys"));
1792        assert!(err.message.contains("allow_query_preprocessor"));
1793    }
1794
1795    #[test]
1796    fn find_sibling_works() {
1797        let dir = tempfile::tempdir().unwrap();
1798        let graph = dir.path().join("demo.kgl");
1799        std::fs::write(&graph, b"\x00").unwrap();
1800        let sibling = dir.path().join("demo_mcp.yaml");
1801        std::fs::write(&sibling, "name: x\n").unwrap();
1802        assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1803    }
1804
1805    #[test]
1806    fn workspace_local_parses() {
1807        let f = write_tmp("workspace:\n  kind: local\n  root: ./src\n  watch: true\n");
1808        let m = load(f.path()).unwrap();
1809        let w = m.workspace.unwrap();
1810        assert_eq!(w.kind, WorkspaceKind::Local);
1811        assert_eq!(w.root.as_deref(), Some("./src"));
1812        assert!(w.watch);
1813    }
1814
1815    #[test]
1816    fn workspace_github_default_kind() {
1817        let f = write_tmp("workspace: {}\n");
1818        let m = load(f.path()).unwrap();
1819        let w = m.workspace.unwrap();
1820        assert_eq!(w.kind, WorkspaceKind::Github);
1821        assert!(w.root.is_none());
1822        assert!(!w.watch);
1823    }
1824
1825    #[test]
1826    fn workspace_local_without_root_errors() {
1827        let f = write_tmp("workspace:\n  kind: local\n");
1828        let err = load(f.path()).unwrap_err();
1829        assert!(err.message.contains("requires workspace.root"));
1830    }
1831
1832    #[test]
1833    fn workspace_unknown_key_rejected() {
1834        let f = write_tmp("workspace:\n  kind: local\n  root: ./x\n  bogus: 1\n");
1835        let err = load(f.path()).unwrap_err();
1836        assert!(err.message.contains("unknown workspace keys"));
1837    }
1838
1839    #[test]
1840    fn workspace_invalid_kind_rejected() {
1841        let f = write_tmp("workspace:\n  kind: docker\n  root: ./x\n");
1842        let err = load(f.path()).unwrap_err();
1843        assert!(err.message.contains("workspace.kind"));
1844    }
1845
1846    #[test]
1847    fn workspace_watch_invalid_for_github() {
1848        let f = write_tmp("workspace:\n  kind: github\n  watch: true\n");
1849        let err = load(f.path()).unwrap_err();
1850        assert!(err.message.contains("watch is only valid"));
1851    }
1852
1853    #[test]
1854    fn extensions_passthrough_parses() {
1855        let f = write_tmp(
1856            "extensions:\n  csv_http_server: true\n  csv_http_server_dir: temp/\n  arbitrary:\n    nested: 1\n",
1857        );
1858        let m = load(f.path()).unwrap();
1859        assert_eq!(
1860            m.extensions
1861                .get("csv_http_server")
1862                .and_then(|v| v.as_bool()),
1863            Some(true)
1864        );
1865        assert_eq!(
1866            m.extensions
1867                .get("csv_http_server_dir")
1868                .and_then(|v| v.as_str()),
1869            Some("temp/")
1870        );
1871        // Nested values pass through unchanged.
1872        assert_eq!(
1873            m.extensions
1874                .get("arbitrary")
1875                .and_then(|v| v.get("nested"))
1876                .and_then(|v| v.as_i64()),
1877            Some(1)
1878        );
1879    }
1880
1881    #[test]
1882    fn extensions_absent_defaults_to_empty() {
1883        let f = write_tmp("name: x\n");
1884        let m = load(f.path()).unwrap();
1885        assert!(m.extensions.is_empty());
1886    }
1887
1888    #[test]
1889    fn extensions_inner_keys_unvalidated() {
1890        // The framework intentionally does NOT validate keys inside
1891        // `extensions:` — they're downstream-binary concerns. Any shape
1892        // that's a YAML mapping must round-trip.
1893        let f = write_tmp(
1894            "extensions:\n  whatever_kglite_wants: foo\n  some_other_consumer: { a: 1, b: 2 }\n",
1895        );
1896        load(f.path()).unwrap();
1897    }
1898
1899    #[test]
1900    fn extensions_must_be_a_mapping() {
1901        let f = write_tmp("extensions: not-a-mapping\n");
1902        let err = load(f.path()).unwrap_err();
1903        assert!(err.message.contains("extensions must be a mapping"));
1904    }
1905
1906    #[test]
1907    fn env_file_key_parses() {
1908        let f = write_tmp("env_file: ../.env\n");
1909        let m = load(f.path()).unwrap();
1910        assert_eq!(m.env_file.as_deref(), Some("../.env"));
1911    }
1912
1913    #[test]
1914    fn env_file_unset_is_none() {
1915        let f = write_tmp("name: Demo\n");
1916        let m = load(f.path()).unwrap();
1917        assert!(m.env_file.is_none());
1918    }
1919
1920    #[test]
1921    fn find_workspace_works() {
1922        let dir = tempfile::tempdir().unwrap();
1923        let manifest = dir.path().join("workspace_mcp.yaml");
1924        std::fs::write(&manifest, "name: ws\n").unwrap();
1925        assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1926    }
1927
1928    #[test]
1929    fn find_workspace_walks_one_level_up_with_applies_to() {
1930        // Layout: <tmp>/parent/workspace_mcp.yaml (declares
1931        // workspace.applies_to: ./repos) + <tmp>/parent/repos/.
1932        // Discovery from <tmp>/parent/repos/ should walk up one level
1933        // and find the sibling manifest because applies_to matches.
1934        let dir = tempfile::tempdir().unwrap();
1935        let parent = dir.path().join("parent");
1936        std::fs::create_dir(&parent).unwrap();
1937        let manifest = parent.join("workspace_mcp.yaml");
1938        std::fs::write(
1939            &manifest,
1940            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1941        )
1942        .unwrap();
1943        let repos = parent.join("repos");
1944        std::fs::create_dir(&repos).unwrap();
1945
1946        // Primary location still works.
1947        assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1948
1949        // Parent-walk fallback resolves to the same manifest. Compare
1950        // canonicalised paths to handle macOS /private/var vs /var.
1951        let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1952        assert_eq!(
1953            found.canonicalize().unwrap(),
1954            manifest.canonicalize().unwrap()
1955        );
1956    }
1957
1958    #[test]
1959    fn find_workspace_ignores_parent_without_applies_to() {
1960        // Parent manifest exists but does NOT declare workspace.applies_to.
1961        // The parent-walk fallback must refuse to auto-detect it —
1962        // otherwise an unrelated workspace_mcp.yaml in a sibling dir
1963        // could surprise-attach to whatever --workspace path the
1964        // operator passes. Safe default: require the opt-in.
1965        let dir = tempfile::tempdir().unwrap();
1966        let parent = dir.path().join("parent");
1967        std::fs::create_dir(&parent).unwrap();
1968        let manifest = parent.join("workspace_mcp.yaml");
1969        std::fs::write(&manifest, "name: not for repos\n").unwrap();
1970        let repos = parent.join("repos");
1971        std::fs::create_dir(&repos).unwrap();
1972
1973        assert_eq!(
1974            find_workspace_manifest(&repos),
1975            None,
1976            "parent manifest without workspace.applies_to must NOT auto-attach"
1977        );
1978    }
1979
1980    #[test]
1981    fn find_workspace_ignores_parent_with_mismatched_applies_to() {
1982        // Parent manifest declares applies_to: ./repos but the
1983        // actual --workspace path is ./other_dir. The mismatch must
1984        // suppress auto-detection.
1985        let dir = tempfile::tempdir().unwrap();
1986        let parent = dir.path().join("parent");
1987        std::fs::create_dir(&parent).unwrap();
1988        let manifest = parent.join("workspace_mcp.yaml");
1989        std::fs::write(
1990            &manifest,
1991            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1992        )
1993        .unwrap();
1994        let other = parent.join("other_dir");
1995        std::fs::create_dir(&other).unwrap();
1996
1997        assert_eq!(
1998            find_workspace_manifest(&other),
1999            None,
2000            "applies_to: ./repos must NOT match --workspace ./other_dir"
2001        );
2002    }
2003
2004    #[test]
2005    fn find_workspace_applies_to_wildcard_matches_any_child() {
2006        // applies_to: '*' (or './*') means "any direct child of the
2007        // manifest's parent dir." Three different child names should
2008        // all auto-detect the manifest.
2009        let dir = tempfile::tempdir().unwrap();
2010        let parent = dir.path().join("parent");
2011        std::fs::create_dir(&parent).unwrap();
2012        let manifest = parent.join("workspace_mcp.yaml");
2013        std::fs::write(&manifest, "workspace:\n  kind: github\n  applies_to: '*'\n").unwrap();
2014        for child_name in ["repos", "clones", "totally-different-name"] {
2015            let child = parent.join(child_name);
2016            std::fs::create_dir(&child).unwrap();
2017            let found =
2018                find_workspace_manifest(&child).expect("wildcard should match any direct child");
2019            assert_eq!(
2020                found.canonicalize().unwrap(),
2021                manifest.canonicalize().unwrap(),
2022                "wildcard should match child {child_name:?}"
2023            );
2024        }
2025    }
2026
2027    #[test]
2028    fn find_workspace_applies_to_glob_matches_prefix() {
2029        // applies_to: './prod-*' should match any direct child whose
2030        // basename starts with "prod-".
2031        let dir = tempfile::tempdir().unwrap();
2032        let parent = dir.path().join("parent");
2033        std::fs::create_dir(&parent).unwrap();
2034        let manifest = parent.join("workspace_mcp.yaml");
2035        std::fs::write(
2036            &manifest,
2037            "workspace:\n  kind: github\n  applies_to: ./prod-*\n",
2038        )
2039        .unwrap();
2040        // Match cases.
2041        for child_name in ["prod-api", "prod-web", "prod-"] {
2042            let child = parent.join(child_name);
2043            std::fs::create_dir(&child).unwrap();
2044            assert!(
2045                find_workspace_manifest(&child).is_some(),
2046                "prod-* should match {child_name:?}"
2047            );
2048        }
2049        // Non-match cases.
2050        for child_name in ["test-api", "stage-web", "random"] {
2051            let child = parent.join(child_name);
2052            std::fs::create_dir(&child).unwrap();
2053            assert_eq!(
2054                find_workspace_manifest(&child),
2055                None,
2056                "prod-* should NOT match {child_name:?}"
2057            );
2058        }
2059    }
2060
2061    #[test]
2062    fn find_workspace_applies_to_list_matches_any_entry() {
2063        // applies_to: [./repos, ./clones] should match either name
2064        // but reject anything else.
2065        let dir = tempfile::tempdir().unwrap();
2066        let parent = dir.path().join("parent");
2067        std::fs::create_dir(&parent).unwrap();
2068        let manifest = parent.join("workspace_mcp.yaml");
2069        std::fs::write(
2070            &manifest,
2071            "workspace:\n  kind: github\n  applies_to:\n    - ./repos\n    - ./clones\n",
2072        )
2073        .unwrap();
2074        for matching in ["repos", "clones"] {
2075            let child = parent.join(matching);
2076            std::fs::create_dir(&child).unwrap();
2077            assert!(
2078                find_workspace_manifest(&child).is_some(),
2079                "list should match {matching:?}"
2080            );
2081        }
2082        let other = parent.join("scratch");
2083        std::fs::create_dir(&other).unwrap();
2084        assert_eq!(
2085            find_workspace_manifest(&other),
2086            None,
2087            "list with [repos, clones] must NOT match scratch"
2088        );
2089    }
2090
2091    #[test]
2092    fn applies_to_rejects_deep_path_at_parse_time() {
2093        let f = write_tmp("workspace:\n  kind: github\n  applies_to: ./too/deep/path\n");
2094        let err = load(f.path()).unwrap_err();
2095        assert!(
2096            err.message.contains("must be a single path segment"),
2097            "got: {}",
2098            err.message
2099        );
2100    }
2101
2102    #[test]
2103    fn applies_to_rejects_invalid_glob_at_parse_time() {
2104        // globset rejects unterminated character class.
2105        let f = write_tmp("workspace:\n  kind: github\n  applies_to: './[unterminated'\n");
2106        let err = load(f.path()).unwrap_err();
2107        assert!(
2108            err.message.contains("invalid glob pattern"),
2109            "got: {}",
2110            err.message
2111        );
2112    }
2113
2114    #[test]
2115    fn applies_to_rejects_parent_relative() {
2116        // Bare `..` is caught by the `..` rejection branch. The
2117        // multi-segment form `../foo` is caught earlier by the
2118        // single-segment check; either is rejected.
2119        let f = write_tmp("workspace:\n  kind: github\n  applies_to: '..'\n");
2120        let err = load(f.path()).unwrap_err();
2121        assert!(err.message.contains("must not contain `..`"));
2122
2123        let f2 = write_tmp("workspace:\n  kind: github\n  applies_to: '../up'\n");
2124        let err2 = load(f2.path()).unwrap_err();
2125        assert!(err2.message.contains("must be a single path segment"));
2126    }
2127
2128    #[test]
2129    fn find_workspace_returns_none_when_missing_everywhere() {
2130        let dir = tempfile::tempdir().unwrap();
2131        let child = dir.path().join("child");
2132        std::fs::create_dir(&child).unwrap();
2133        // No manifest in either child or its parent (tmpdir root).
2134        assert_eq!(find_workspace_manifest(&child), None);
2135    }
2136
2137    #[test]
2138    fn find_workspace_primary_wins_over_parent_fallback() {
2139        // Both primary AND parent-fallback exist. The primary must
2140        // win — this anchors the precedence rule documented on
2141        // `find_workspace_manifest`. The parent declares applies_to
2142        // matching the child dir, so it WOULD be a valid fallback —
2143        // but the primary preempts it. If a future refactor swaps
2144        // the order, this test fails loudly.
2145        let dir = tempfile::tempdir().unwrap();
2146        let parent_manifest = dir.path().join("workspace_mcp.yaml");
2147        std::fs::write(
2148            &parent_manifest,
2149            "workspace:\n  kind: github\n  applies_to: ./repos\n",
2150        )
2151        .unwrap();
2152        let child = dir.path().join("repos");
2153        std::fs::create_dir(&child).unwrap();
2154        let child_manifest = child.join("workspace_mcp.yaml");
2155        std::fs::write(&child_manifest, "name: child\n").unwrap();
2156
2157        // Discovery from `child` should return the child manifest,
2158        // NOT the parent's. Compare canonicalised to handle the
2159        // macOS /private/var vs /var symlink consistently.
2160        let found = find_workspace_manifest(&child).expect("primary should resolve");
2161        assert_eq!(
2162            found.canonicalize().unwrap(),
2163            child_manifest.canonicalize().unwrap(),
2164            "primary location must win when both primary and parent fallback exist"
2165        );
2166    }
2167
2168    #[test]
2169    fn to_json_shape_is_stable() {
2170        let f = write_tmp(
2171            r#"
2172name: KGLite Codebase
2173source_roots: [src, lib]
2174trust:
2175  allow_embedder: true
2176embedder:
2177  module: kglite.embed
2178  class: SentenceTransformerEmbedder
2179builtins:
2180  save_graph: true
2181  temp_cleanup: on_overview
2182"#,
2183        );
2184        let m = load(f.path()).unwrap();
2185        let actual = m.to_json();
2186        let expected = serde_json::json!({
2187            "yaml_path": f.path().display().to_string(),
2188            "name": "KGLite Codebase",
2189            "instructions": null,
2190            "overview_prefix": null,
2191            "source_roots": ["src", "lib"],
2192            "trust": {
2193                "allow_python_tools": false,
2194                "allow_embedder": true,
2195            },
2196            "tools": [],
2197            "embedder": {
2198                "module": "kglite.embed",
2199                "class": "SentenceTransformerEmbedder",
2200                "kwargs": {},
2201            },
2202            "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
2203            "env_file": null,
2204            "workspace": null,
2205            "extensions": {},
2206            "skills": false,
2207        });
2208        assert_eq!(actual, expected);
2209    }
2210
2211    #[test]
2212    fn to_json_round_trips_tools_and_workspace() {
2213        let f = write_tmp(
2214            r#"
2215name: Full Surface
2216source_root: ./src
2217trust:
2218  allow_python_tools: true
2219tools:
2220  - name: nodes_for
2221    cypher: "MATCH (n {name: $name}) RETURN n"
2222    description: "fetch nodes by name"
2223  - name: run_query
2224    python: tools.py
2225    function: run
2226workspace:
2227  kind: local
2228  root: /tmp/ws
2229  watch: true
2230builtins:
2231  save_graph: false
2232env_file: .env.local
2233extensions:
2234  kglite:
2235    flavour: standard
2236"#,
2237        );
2238        let m = load(f.path()).unwrap();
2239        let v = m.to_json();
2240        assert_eq!(v["name"], "Full Surface");
2241        assert_eq!(v["trust"]["allow_python_tools"], true);
2242        assert_eq!(v["workspace"]["kind"], "local");
2243        assert_eq!(v["workspace"]["root"], "/tmp/ws");
2244        assert_eq!(v["workspace"]["watch"], true);
2245        assert_eq!(v["env_file"], ".env.local");
2246        assert_eq!(v["tools"][0]["kind"], "cypher");
2247        assert_eq!(v["tools"][0]["name"], "nodes_for");
2248        assert_eq!(v["tools"][1]["kind"], "python");
2249        assert_eq!(v["tools"][1]["name"], "run_query");
2250        assert_eq!(v["tools"][1]["python"], "tools.py");
2251        assert_eq!(v["tools"][1]["function"], "run");
2252        assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
2253    }
2254
2255    // ─── Skills schema (Phase 1a — manifest-level only) ───────────
2256
2257    #[test]
2258    fn skills_disabled_by_default() {
2259        let f = write_tmp("name: x\n");
2260        let m = load(f.path()).unwrap();
2261        assert_eq!(m.skills, SkillsSource::Disabled);
2262        assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2263    }
2264
2265    #[test]
2266    fn skills_explicit_false_disabled() {
2267        let f = write_tmp("name: x\nskills: false\n");
2268        let m = load(f.path()).unwrap();
2269        assert_eq!(m.skills, SkillsSource::Disabled);
2270    }
2271
2272    #[test]
2273    fn skills_bool_true_parses_to_single_bundled() {
2274        let f = write_tmp("name: x\nskills: true\n");
2275        let m = load(f.path()).unwrap();
2276        assert_eq!(m.skills, SkillsSource::Sources(vec![SkillSource::Bundled]));
2277        // JSON shape: list with one boolean true.
2278        let v = m.to_json();
2279        assert_eq!(v["skills"], serde_json::json!([true]));
2280    }
2281
2282    #[test]
2283    fn skills_path_string_parses_to_single_path() {
2284        let f = write_tmp("name: x\nskills: ./local-skills/\n");
2285        let m = load(f.path()).unwrap();
2286        assert_eq!(
2287            m.skills,
2288            SkillsSource::Sources(vec![SkillSource::Path("./local-skills/".into())])
2289        );
2290        // JSON round-trip preserves the operator-declared path verbatim.
2291        let v = m.to_json();
2292        assert_eq!(v["skills"], serde_json::json!(["./local-skills/"]));
2293    }
2294
2295    #[test]
2296    fn skills_list_polymorphic_parses() {
2297        let f =
2298            write_tmp("name: x\nskills:\n  - true\n  - ./local-overrides/\n  - ~/shared-skills/\n");
2299        let m = load(f.path()).unwrap();
2300        assert_eq!(
2301            m.skills,
2302            SkillsSource::Sources(vec![
2303                SkillSource::Bundled,
2304                SkillSource::Path("./local-overrides/".into()),
2305                SkillSource::Path("~/shared-skills/".into()),
2306            ])
2307        );
2308        // JSON preserves entry types: bool for bundled, string for paths.
2309        let v = m.to_json();
2310        assert_eq!(
2311            v["skills"],
2312            serde_json::json!([true, "./local-overrides/", "~/shared-skills/"])
2313        );
2314    }
2315
2316    #[test]
2317    fn skills_empty_list_parses_as_opt_in_with_no_root_sources() {
2318        // Empty list means "opt in but only the auto-detected project
2319        // layer fires." The registry treats this as `Sources(vec![])`,
2320        // not `Disabled`. Operators relying solely on
2321        // `<basename>.skills/` adjacent to the YAML use this form.
2322        let f = write_tmp("name: x\nskills: []\n");
2323        let m = load(f.path()).unwrap();
2324        assert_eq!(m.skills, SkillsSource::Sources(vec![]));
2325    }
2326
2327    #[test]
2328    fn skills_false_in_list_rejected() {
2329        let f = write_tmp("name: x\nskills:\n  - false\n");
2330        let err = load(f.path()).unwrap_err();
2331        assert!(
2332            err.message.contains("skills[0]")
2333                && err.message.contains("`false` is not a valid entry"),
2334            "unexpected: {}",
2335            err.message
2336        );
2337    }
2338
2339    #[test]
2340    fn skills_invalid_type_rejected() {
2341        let f = write_tmp("name: x\nskills: 42\n");
2342        let err = load(f.path()).unwrap_err();
2343        assert!(
2344            err.message.contains("skills must be"),
2345            "unexpected: {}",
2346            err.message
2347        );
2348    }
2349
2350    #[test]
2351    fn skills_empty_path_string_rejected() {
2352        let f = write_tmp("name: x\nskills: \"\"\n");
2353        let err = load(f.path()).unwrap_err();
2354        assert!(
2355            err.message.contains("non-empty string"),
2356            "unexpected: {}",
2357            err.message
2358        );
2359    }
2360
2361    #[test]
2362    fn skills_field_is_purely_additive_on_existing_manifests() {
2363        // A manifest written before the skills field existed (i.e. no
2364        // `skills:` declaration) must still parse cleanly with
2365        // SkillsSource::Disabled. This is the "no impact on existing
2366        // MCP servers" guarantee at the schema level.
2367        let f = write_tmp(
2368            r#"
2369name: legacy
2370source_roots: [src]
2371trust:
2372  allow_python_tools: true
2373workspace:
2374  kind: github
2375"#,
2376        );
2377        let m = load(f.path()).unwrap();
2378        assert_eq!(m.skills, SkillsSource::Disabled);
2379        assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2380    }
2381}