Skip to main content

mcp_methods/server/
manifest.rs

1//! YAML manifest schema + loader.
2//!
3//! A manifest is a YAML file declaring the tools, source roots, custom
4//! embedder, and trust gates the server should apply. The loader parses,
5//! validates, and returns a [`Manifest`]; consumers (CLI wiring, tool
6//! registration) operate on the validated structure.
7//!
8//! Path strings (`source_root`, `python:` tool paths, embedder module)
9//! are kept as the raw user input — relative-to-yaml resolution happens
10//! at the use site so the data stays pure and testable.
11//!
12//! Validation is fail-fast and user-facing: the caller surfaces
13//! [`ManifestError`] messages directly to the operator.
14//!
15//! Schema mirrors the Python `kglite.mcp_server.manifest` module 1:1 so
16//! a manifest written for the Python server boots unchanged on the new
17//! Rust server.
18
19// A handful of fields/helpers are exposed for downstream consumers
20// (e.g. kglite-mcp-server reads `CypherTool::cypher` directly when
21// registering manifest-declared tools) and so look unused from this
22// crate's perspective. Silence dead-code warnings rather than chase
23// every cross-crate use.
24#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34    "name",
35    "instructions",
36    "overview_prefix",
37    "source_root",
38    "source_roots",
39    "trust",
40    "tools",
41    "embedder",
42    "builtins",
43    "env_file",
44    "workspace",
45    "extensions",
46    "skills",
47];
48const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
49const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
50const ALLOWED_TRUST_KEYS: &[&str] = &["allow_python_tools", "allow_embedder"];
51const ALLOWED_TOOL_KEYS: &[&str] = &[
52    "name",
53    "description",
54    "parameters",
55    "cypher",
56    "python",
57    "function",
58    "bundled",
59    "hidden",
60    // 0.3.34: per-deployment rename for bundled tools (the bundled
61    // override block already covers `description` and `hidden`; this
62    // adds the third axis — what the agent sees in `tools/list`).
63    "rename",
64];
65const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
66const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup", "screen_stargazers"];
67const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
68
69#[derive(Debug, Error)]
70#[error("{path}: {message}")]
71pub struct ManifestError {
72    pub path: String,
73    pub message: String,
74}
75
76impl ManifestError {
77    pub fn at(path: &Path, message: impl Into<String>) -> Self {
78        Self {
79            path: path.display().to_string(),
80            message: message.into(),
81        }
82    }
83
84    pub fn bare(message: impl Into<String>) -> Self {
85        Self {
86            path: "<manifest>".to_string(),
87            message: message.into(),
88        }
89    }
90}
91
92#[derive(Debug, Default, Clone)]
93pub struct TrustConfig {
94    pub allow_python_tools: bool,
95    pub allow_embedder: bool,
96}
97
98#[derive(Debug, Clone)]
99pub enum ToolSpec {
100    Cypher(CypherTool),
101    Python(PythonTool),
102    /// Override the agent-facing surface of a bundled tool (one the
103    /// downstream binary provides natively — `cypher_query`,
104    /// `graph_overview`, `read_source`, etc.). The framework parses
105    /// the override but does not enforce that the named tool exists;
106    /// the downstream consumer (e.g. `kglite-mcp-server`) is
107    /// responsible for validating the name against its bundled
108    /// catalogue at boot time and applying the override when
109    /// emitting `tools/list`.
110    ///
111    /// Pre-0.3.31 the only customisation path for the bundled tool
112    /// surface was the manifest's global `instructions:` block —
113    /// useful for first-message orientation but not attached to
114    /// individual tools. Bundled overrides let operators rewrite a
115    /// specific tool's `description` (what the agent sees in
116    /// `tools/list`) or `hidden`-flag it out entirely.
117    Bundled(BundledOverride),
118}
119
120impl ToolSpec {
121    pub fn name(&self) -> &str {
122        match self {
123            ToolSpec::Cypher(t) => &t.name,
124            ToolSpec::Python(t) => &t.name,
125            ToolSpec::Bundled(t) => &t.name,
126        }
127    }
128}
129
130#[derive(Debug, Clone)]
131pub struct CypherTool {
132    pub name: String,
133    pub cypher: String,
134    pub description: Option<String>,
135    pub parameters: Option<serde_json::Value>,
136}
137
138#[derive(Debug, Clone)]
139pub struct PythonTool {
140    pub name: String,
141    pub python: String,
142    pub function: String,
143    pub description: Option<String>,
144    pub parameters: Option<serde_json::Value>,
145}
146
147#[derive(Debug, Clone)]
148pub struct BundledOverride {
149    /// Name of the bundled tool to override (e.g. `cypher_query`,
150    /// `repo_management`). Validation against the downstream
151    /// binary's actual catalogue happens at the consumer's boot
152    /// time — the framework only checks shape here.
153    pub name: String,
154    /// New agent-facing description that replaces the bundled
155    /// tool's default. `None` means "do not override; keep the
156    /// default."
157    pub description: Option<String>,
158    /// When true, the downstream consumer should omit this tool
159    /// from `tools/list` AND reject calls to it. Defaults to
160    /// false (visible).
161    pub hidden: bool,
162    /// Per-deployment rename: expose the bundled tool to the agent
163    /// under this name instead of its canonical name. `None` keeps
164    /// the canonical name. Lets operators running multiple kglite
165    /// servers (each backed by a different graph) disambiguate
166    /// otherwise-identical tool surfaces — without rename, an agent
167    /// running three servers sees three copies of `cypher_query`,
168    /// each indistinguishable in ToolSearch results. With rename,
169    /// the same servers can expose `legal_cypher_query`,
170    /// `prospect_cypher_query`, `open_source_cypher_query`.
171    /// Must be a valid identifier (`^[a-zA-Z_][a-zA-Z0-9_]*$`);
172    /// validation against duplicates across the manifest's tools is
173    /// the downstream consumer's responsibility.
174    pub rename: Option<String>,
175}
176
177#[derive(Debug, Clone)]
178pub struct EmbedderConfig {
179    pub module: String,
180    pub class: String,
181    pub kwargs: serde_json::Map<String, serde_json::Value>,
182}
183
184#[derive(Debug, Clone)]
185pub struct BuiltinsConfig {
186    pub save_graph: bool,
187    pub temp_cleanup: TempCleanup,
188    /// Register the `screen_stargazers` GitHub tool. Default on; set
189    /// `builtins.screen_stargazers: false` to keep the other GitHub tools
190    /// (`github_issues` / `github_api`) but drop stargazer screening.
191    pub screen_stargazers: bool,
192}
193
194impl Default for BuiltinsConfig {
195    fn default() -> Self {
196        Self {
197            save_graph: false,
198            temp_cleanup: TempCleanup::default(),
199            screen_stargazers: true,
200        }
201    }
202}
203
204#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
205pub enum TempCleanup {
206    #[default]
207    Never,
208    OnOverview,
209}
210
211impl TempCleanup {
212    pub fn as_str(&self) -> &'static str {
213        match self {
214            TempCleanup::Never => "never",
215            TempCleanup::OnOverview => "on_overview",
216        }
217    }
218}
219
220#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
221pub enum WorkspaceKind {
222    /// Clone-and-track GitHub repos. The default when no `workspace:`
223    /// block is set and the operator passed `--workspace DIR`.
224    #[default]
225    Github,
226    /// Bind a fixed local directory as the active source root. No
227    /// cloning happens; `set_root_dir(path)` swaps the active root.
228    Local,
229}
230
231impl WorkspaceKind {
232    pub fn as_str(&self) -> &'static str {
233        match self {
234            WorkspaceKind::Github => "github",
235            WorkspaceKind::Local => "local",
236        }
237    }
238}
239
240#[derive(Debug, Clone, Default)]
241pub struct WorkspaceConfig {
242    pub kind: WorkspaceKind,
243    /// Local-mode only: path to the directory to bind as the source
244    /// root. Relative paths resolve against the YAML's parent dir.
245    pub root: Option<String>,
246    /// Local-mode only: wire the framework's file watcher to `root`
247    /// (debounced rebuild trigger via the post-activate hook).
248    pub watch: bool,
249    /// Optional opt-in for the [`find_workspace_manifest`] parent-walk
250    /// fallback. When set, this manifest is auto-discovered by
251    /// ``mcp-server --workspace DIR`` (and similar callers) only when
252    /// the operator's ``DIR`` matches the declaration here. When
253    /// unset, the parent-walk fallback NEVER fires for this manifest
254    /// — operators must pass ``--mcp-config`` explicitly.
255    ///
256    /// Values are glob patterns matching the workspace dir's basename
257    /// (single-segment match — parent-walk is always single-level).
258    /// Three forms:
259    ///
260    /// - **Single pattern** (`./repos`, `repos`, `*`, `a*`, `prod-?`):
261    ///   match against the workspace dir's basename. Literal strings
262    ///   like `repos` match only `repos`; glob patterns like `*` or
263    ///   `prod-*` match any name fitting the pattern.
264    /// - **List of patterns** (`[./repos, ./clones]`, `[prod-*, test-*]`):
265    ///   match if any pattern matches. Useful for curated subsets or
266    ///   multiple naming conventions in one manifest.
267    ///
268    /// Leading `./` is optional and stripped at parse time. Patterns
269    /// must be single-segment — `./a/b` is rejected. Invalid glob
270    /// syntax is rejected at parse time.
271    ///
272    /// Eliminates the accidental-discovery footgun where a workspace
273    /// manifest is auto-picked-up by an unrelated sibling dir. The
274    /// manifest's own declaration is the opt-in.
275    pub applies_to: Option<AppliesTo>,
276}
277
278/// Declaration of which workspace dirs the manifest applies to for
279/// the [`find_workspace_manifest`] parent-walk fallback. See
280/// [`WorkspaceConfig::applies_to`] for the full semantics. Each
281/// entry is a glob pattern (literal or with `*` / `?` / `[abc]`)
282/// matched against the workspace dir's basename.
283#[derive(Debug, Clone, PartialEq, Eq)]
284pub enum AppliesTo {
285    /// Single glob pattern. Matches if the workspace dir's basename
286    /// satisfies the pattern. Literal names (`repos`) match only
287    /// that name; `*` matches anything; `prod-*` matches anything
288    /// starting with `prod-`.
289    Pattern(String),
290    /// Multiple patterns. Matches if any pattern in the list matches.
291    Patterns(Vec<String>),
292}
293
294/// One source of skills declared by the manifest. Either the magic
295/// "library bundled" token (rendered as the YAML boolean `true`), or
296/// a filesystem path resolved against the manifest's parent dir.
297///
298/// Path conventions match the rest of the manifest:
299/// - `./foo` or `foo` — relative to the manifest's parent dir
300/// - `~/foo` — home-relative (POSIX `$HOME` expansion)
301/// - `/foo` — absolute
302#[derive(Debug, Clone, PartialEq, Eq)]
303pub enum SkillSource {
304    /// The compile-time bundled skills shipped with `mcp-methods` plus
305    /// any added by the downstream binary at registry-build time.
306    /// In YAML: a bare `true` token in the `skills:` list.
307    Bundled,
308    /// A filesystem path containing `*.md` skill files. Walked at
309    /// boot. Path resolution happens at registry-build time, not parse
310    /// time — `SkillSource::Path` stores the raw operator-declared
311    /// string for round-tripping through `Manifest::to_json()`.
312    Path(String),
313}
314
315/// The parsed value of the `skills:` field in the manifest.
316///
317/// Skills are opt-in. `SkillsSource::Disabled` is the default and
318/// matches verbatim-current MCP behavior: no `prompts/list`, no
319/// methodology surface, identical context cost to pre-skills
320/// deployments. Existing kglite manifests work unchanged.
321///
322/// When enabled, the [`crate::server::skills::Registry`] walks each
323/// source in declaration order, layering them against the
324/// project-local `<basename>.skills/` directory which is always
325/// auto-detected as the top-priority layer.
326#[derive(Debug, Clone, Default, PartialEq, Eq)]
327pub enum SkillsSource {
328    /// `skills: false` or no declaration. Skills disabled entirely.
329    #[default]
330    Disabled,
331    /// One or more sources, walked in declaration order at registry
332    /// build time. First-match-per-skill-name wins across the root
333    /// layer; the auto-detected project layer (`<basename>.skills/`
334    /// adjacent to the YAML) preempts the entire root layer.
335    Sources(Vec<SkillSource>),
336}
337
338#[derive(Debug, Clone)]
339pub struct Manifest {
340    pub yaml_path: PathBuf,
341    pub name: Option<String>,
342    pub instructions: Option<String>,
343    pub overview_prefix: Option<String>,
344    pub source_roots: Vec<String>,
345    pub trust: TrustConfig,
346    pub tools: Vec<ToolSpec>,
347    pub embedder: Option<EmbedderConfig>,
348    pub builtins: BuiltinsConfig,
349    /// Optional explicit `.env` path (relative to the YAML or absolute).
350    /// When unset, the runtime walks upward from the start directory
351    /// looking for a `.env` file.
352    pub env_file: Option<String>,
353    /// Optional explicit workspace declaration. When set, this wins
354    /// over CLI `--workspace`/`--source-root` flags interpretation
355    /// (manifest is the source of truth — same rule as `source_root:`).
356    pub workspace: Option<WorkspaceConfig>,
357    /// Raw passthrough for downstream-binary-specific manifest keys.
358    /// The framework accepts any mapping under `extensions:` and stores
359    /// it here without validating the inner keys; downstream consumers
360    /// (e.g. kglite-mcp-server) read whatever they need from this map.
361    ///
362    /// This keeps the framework's strict-unknown-key validation strong
363    /// for the surfaces it owns (`builtins`, `workspace`, …) while
364    /// letting consumers add their own configuration namespace without
365    /// per-key framework round-trips.
366    pub extensions: serde_json::Map<String, serde_json::Value>,
367    /// Opt-in skills declaration. `SkillsSource::Disabled` is the
368    /// default and preserves current MCP behavior (no `prompts/`
369    /// surface). When set to any non-`Disabled` value, downstream
370    /// binaries pass this to [`crate::server::skills::Registry`] for
371    /// loading + composition; the framework then exposes the
372    /// resulting skill set via `prompts/list` and `prompts/get`.
373    ///
374    /// Three-layer composition: the operator-declared sources here
375    /// form the root layer; the project-local `<basename>.skills/`
376    /// directory (auto-detected) preempts them. See
377    /// `dev-documentation/skills-aware-mcp.md` for the full design.
378    pub skills: SkillsSource,
379}
380
381impl Manifest {
382    /// JSON-friendly representation of the validated manifest for
383    /// FFI / RPC exposure (pyo3 wrappers, JSON-RPC bridges, etc.).
384    ///
385    /// The shape is stable across patch releases: fields can be added
386    /// non-breaking, but key renames or removals are breaking changes.
387    /// When adding a new field to `Manifest`, extend this method too —
388    /// the `to_json_shape_is_stable` test will fail until you do.
389    /// The `extensions` map is passed through unchanged; downstream
390    /// consumers parse their own namespace from it.
391    pub fn to_json(&self) -> serde_json::Value {
392        serde_json::json!({
393            "yaml_path": self.yaml_path.display().to_string(),
394            "name": self.name,
395            "instructions": self.instructions,
396            "overview_prefix": self.overview_prefix,
397            "source_roots": self.source_roots,
398            "trust": {
399                "allow_python_tools": self.trust.allow_python_tools,
400                "allow_embedder": self.trust.allow_embedder,
401            },
402            "tools": self.tools.iter().map(|t| match t {
403                ToolSpec::Cypher(c) => serde_json::json!({
404                    "kind": "cypher",
405                    "name": c.name,
406                    "cypher": c.cypher,
407                    "description": c.description,
408                    "parameters": c.parameters,
409                }),
410                ToolSpec::Python(p) => serde_json::json!({
411                    "kind": "python",
412                    "name": p.name,
413                    "python": p.python,
414                    "function": p.function,
415                    "description": p.description,
416                    "parameters": p.parameters,
417                }),
418                ToolSpec::Bundled(b) => serde_json::json!({
419                    "kind": "bundled",
420                    "name": b.name,
421                    "description": b.description,
422                    "hidden": b.hidden,
423                    "rename": b.rename,
424                }),
425            }).collect::<Vec<_>>(),
426            "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
427                "module": e.module,
428                "class": e.class,
429                "kwargs": e.kwargs,
430            })),
431            "builtins": {
432                "save_graph": self.builtins.save_graph,
433                "temp_cleanup": self.builtins.temp_cleanup.as_str(),
434                "screen_stargazers": self.builtins.screen_stargazers,
435            },
436            "env_file": self.env_file,
437            "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
438                "kind": w.kind.as_str(),
439                "root": w.root,
440                "watch": w.watch,
441                "applies_to": w.applies_to.as_ref().map(|a| match a {
442                    AppliesTo::Pattern(p) => serde_json::Value::String(p.clone()),
443                    AppliesTo::Patterns(ps) => serde_json::Value::Array(
444                        ps.iter().map(|p| serde_json::Value::String(p.clone())).collect()
445                    ),
446                }),
447            })),
448            "extensions": self.extensions,
449            "skills": self.skills_to_json(),
450        })
451    }
452
453    /// JSON shape for the parsed `skills:` field. Emits the operator-
454    /// declared shape unchanged (modulo normalisation), suitable for
455    /// downstream pyo3 wrappers that need to introspect what the
456    /// manifest declared without re-running the parser.
457    ///
458    /// Phase 1a (this file) emits the raw declaration only. Phase 1b
459    /// adds a separate accessor on the resolved registry that exposes
460    /// the *post-resolution* skill list with provenance — that's the
461    /// per-skill `{path, origin, frontmatter}` shape kglite asked for
462    /// in their feedback. The two surfaces are intentionally
463    /// distinct: this method describes the manifest, the
464    /// registry method describes the runtime resolution.
465    fn skills_to_json(&self) -> serde_json::Value {
466        match &self.skills {
467            SkillsSource::Disabled => serde_json::Value::Bool(false),
468            SkillsSource::Sources(sources) => {
469                let arr: Vec<serde_json::Value> = sources
470                    .iter()
471                    .map(|s| match s {
472                        SkillSource::Bundled => serde_json::Value::Bool(true),
473                        SkillSource::Path(p) => serde_json::Value::String(p.clone()),
474                    })
475                    .collect();
476                serde_json::Value::Array(arr)
477            }
478        }
479    }
480}
481
482/// Auto-detect ``<basename>_mcp.yaml`` next to a graph file.
483pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
484    let stem = graph_path.file_stem()?;
485    let parent = graph_path.parent()?;
486    let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
487    if candidate.is_file() {
488        Some(candidate)
489    } else {
490        None
491    }
492}
493
494/// Auto-detect ``workspace_mcp.yaml`` for a workspace directory.
495///
496/// Checks two locations in strict priority order:
497///
498/// 1. **Primary** — ``<workspace_dir>/workspace_mcp.yaml``. The
499///    documented and recommended location. If this exists, it is
500///    returned unconditionally; the parent-walk fallback is NOT
501///    consulted even if a parent manifest also exists. No opt-in
502///    declaration required — the manifest sitting inside the
503///    workspace dir is itself the operator's intent.
504/// 2. **Parent-walk fallback** —
505///    ``<workspace_dir>/../workspace_mcp.yaml``. Triggered only when
506///    the primary is absent AND the parent manifest *declares* it
507///    applies to this specific workspace dir via the
508///    ``workspace.applies_to:`` field:
509///
510///    ```yaml
511///    # open_source/workspace_mcp.yaml
512///    workspace:
513///      kind: github
514///      applies_to: ./repos     # required for parent-walk discovery
515///    ```
516///
517///    The framework loads the parent manifest, canonicalises
518///    ``manifest.workspace.applies_to`` against the manifest's parent
519///    directory, and compares it to the actual ``workspace_dir``.
520///    Match → manifest is returned. No declaration or path mismatch
521///    → discovery returns ``None`` (operator must pass
522///    ``--mcp-config`` explicitly).
523///
524///    The natural layout for github-clone-tracker workspaces is:
525///
526///    ```text
527///    open_source/
528///    ├── workspace_mcp.yaml     # config sits beside the sandbox; declares
529///    │                          # workspace.applies_to: ./repos
530///    └── repos/                 # --workspace points here
531///    ```
532///
533///    The ``applies_to`` opt-in eliminates the accidental-discovery
534///    footgun where a manifest in a project root would auto-attach to
535///    any unrelated sibling dir. Operators who didn't author the
536///    manifest get the safe default (no auto-detection); operators
537///    who did get the ergonomic UX (no ``--mcp-config`` boilerplate).
538///
539/// Bounded to one level up; will not walk past the filesystem root.
540/// Symlink-safe via canonicalisation. Added per kglite operator
541/// feedback after the 0.6.x → 0.9.x migration audit.
542pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
543    let primary = workspace_dir.join("workspace_mcp.yaml");
544    if primary.is_file() {
545        return Some(primary);
546    }
547    // Parent-walk fallback. Compare against canonicalised paths to
548    // handle "/" (where parent == self) and symlinks consistently.
549    let parent = workspace_dir.parent()?;
550    let workspace_resolved = workspace_dir.canonicalize().ok()?;
551    let parent_resolved = parent.canonicalize().ok()?;
552    if parent_resolved == workspace_resolved {
553        // No real parent (filesystem root).
554        return None;
555    }
556    let fallback = parent.join("workspace_mcp.yaml");
557    if !fallback.is_file() {
558        return None;
559    }
560
561    // The fallback manifest must declare workspace.applies_to and
562    // that declaration must canonicalise to the actual workspace_dir.
563    // Otherwise the discovery is unsafe (could be accidental).
564    let manifest = match load(&fallback) {
565        Ok(m) => m,
566        Err(e) => {
567            tracing::warn!(
568                manifest = %fallback.display(),
569                error = %e,
570                "parent-walk manifest exists but failed to parse; ignoring"
571            );
572            return None;
573        }
574    };
575    let declared = manifest
576        .workspace
577        .as_ref()
578        .and_then(|w| w.applies_to.as_ref());
579    let Some(declared_applies_to) = declared else {
580        tracing::info!(
581            manifest = %fallback.display(),
582            "parent-walk manifest does not declare workspace.applies_to; \
583             ignoring (set workspace.applies_to: <pattern> to opt in)"
584        );
585        return None;
586    };
587    // Match the workspace dir's basename against the declared pattern(s).
588    // The parent-walk guarantee (workspace_dir.parent() == manifest_dir)
589    // is already established above — only the basename match is left.
590    let Some(basename) = workspace_resolved.file_name().and_then(|n| n.to_str()) else {
591        return None; // path with no usable basename, defensive
592    };
593    let patterns: Vec<&str> = match declared_applies_to {
594        AppliesTo::Pattern(p) => vec![p.as_str()],
595        AppliesTo::Patterns(ps) => ps.iter().map(String::as_str).collect(),
596    };
597    let matched = patterns.iter().any(|pat| {
598        match globset::Glob::new(pat) {
599            Ok(g) => g.compile_matcher().is_match(basename),
600            Err(_) => {
601                // Should not happen — patterns were validated at parse
602                // time. Defensive: treat as non-match.
603                false
604            }
605        }
606    });
607    if matched {
608        tracing::info!(
609            workspace_dir = %workspace_dir.display(),
610            manifest = %fallback.display(),
611            "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
612        );
613        Some(fallback)
614    } else {
615        tracing::info!(
616            workspace_dir = %workspace_resolved.display(),
617            manifest = %fallback.display(),
618            basename = %basename,
619            patterns = ?patterns,
620            "parent-walk manifest's workspace.applies_to does not match \
621             this workspace_dir's basename; ignoring"
622        );
623        None
624    }
625}
626
627/// Parse and validate a manifest YAML file.
628pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
629    let text = fs::read_to_string(yaml_path)
630        .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
631    let raw: serde_yaml::Value = serde_yaml::from_str(&text)
632        .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
633    let raw = match raw {
634        serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
635        v => v,
636    };
637    let map = raw
638        .as_mapping()
639        .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
640    build(map, yaml_path)
641}
642
643fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
644    check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
645
646    if raw.contains_key("source_root") && raw.contains_key("source_roots") {
647        return Err(ManifestError::at(
648            yaml_path,
649            "specify either source_root (str) or source_roots (list), not both",
650        ));
651    }
652
653    let mut source_roots: Vec<String> = Vec::new();
654    if let Some(v) = raw.get("source_root") {
655        let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
656            ManifestError::at(yaml_path, "source_root must be a non-empty string")
657        })?;
658        source_roots.push(s.to_string());
659    } else if let Some(v) = raw.get("source_roots") {
660        let seq = v.as_sequence().ok_or_else(|| {
661            ManifestError::at(
662                yaml_path,
663                "source_roots must be a list of non-empty strings",
664            )
665        })?;
666        if seq.is_empty() {
667            return Err(ManifestError::at(
668                yaml_path,
669                "source_roots must be non-empty when set",
670            ));
671        }
672        for item in seq {
673            let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
674                ManifestError::at(
675                    yaml_path,
676                    "source_roots must be a list of non-empty strings",
677                )
678            })?;
679            source_roots.push(s.to_string());
680        }
681    }
682
683    let trust = build_trust(raw.get("trust"), yaml_path)?;
684    let tools = build_tools(raw.get("tools"), yaml_path)?;
685    let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
686    let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
687    let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
688    let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
689    let skills = build_skills(raw.get("skills"), yaml_path)?;
690
691    Ok(Manifest {
692        yaml_path: yaml_path.to_path_buf(),
693        name: optional_str(raw, "name", yaml_path)?,
694        instructions: optional_str(raw, "instructions", yaml_path)?,
695        overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
696        source_roots,
697        trust,
698        tools,
699        embedder,
700        builtins,
701        env_file: optional_str(raw, "env_file", yaml_path)?,
702        workspace,
703        extensions,
704        skills,
705    })
706}
707
708/// Parse the polymorphic `skills:` field. Accepts:
709///
710/// - **Absent or `false`** → [`SkillsSource::Disabled`]. Pure-current
711///   MCP behavior. This is the default and what existing deployments
712///   resolve to without any YAML change.
713/// - **`skills: true`** → single bundled source. Sugar for
714///   `skills: [true]`.
715/// - **`skills: <path-string>`** → single path source. Sugar for
716///   `skills: [<path>]`.
717/// - **`skills: [bool, string, ...]`** → ordered list. Booleans MUST
718///   be `true` (the bundled marker); `false` is rejected at parse
719///   time as nonsense in list context. Each path is stored verbatim
720///   as the operator wrote it; resolution against the manifest's
721///   parent dir happens at registry-build time, not here.
722///
723/// Empty lists are accepted and parsed as `SkillsSource::Sources(vec![])`;
724/// the registry treats them as "skills opted in but no root layer,"
725/// meaning the project-local `<basename>.skills/` auto-detection
726/// still fires while the bundled + custom-path layers stay empty.
727/// Useful for operators who want to rely solely on adjacent project
728/// skills.
729fn build_skills(
730    raw: Option<&serde_yaml::Value>,
731    yaml_path: &Path,
732) -> Result<SkillsSource, ManifestError> {
733    use serde_yaml::Value;
734
735    match raw {
736        None | Some(Value::Null) | Some(Value::Bool(false)) => Ok(SkillsSource::Disabled),
737        Some(Value::Bool(true)) => Ok(SkillsSource::Sources(vec![SkillSource::Bundled])),
738        Some(Value::String(s)) => {
739            if s.is_empty() {
740                return Err(ManifestError::at(
741                    yaml_path,
742                    "skills: path must be a non-empty string",
743                ));
744            }
745            Ok(SkillsSource::Sources(vec![SkillSource::Path(s.clone())]))
746        }
747        Some(Value::Sequence(seq)) => {
748            let mut sources = Vec::with_capacity(seq.len());
749            for (idx, item) in seq.iter().enumerate() {
750                match item {
751                    Value::Bool(true) => sources.push(SkillSource::Bundled),
752                    Value::Bool(false) => {
753                        return Err(ManifestError::at(
754                            yaml_path,
755                            format!(
756                                "skills[{idx}]: `false` is not a valid entry in a `skills:` \
757                                 list (only `true` for bundled, or a path string)"
758                            ),
759                        ));
760                    }
761                    Value::String(s) => {
762                        if s.is_empty() {
763                            return Err(ManifestError::at(
764                                yaml_path,
765                                format!("skills[{idx}]: path must be a non-empty string"),
766                            ));
767                        }
768                        sources.push(SkillSource::Path(s.clone()));
769                    }
770                    _ => {
771                        return Err(ManifestError::at(
772                            yaml_path,
773                            format!(
774                                "skills[{idx}]: each entry must be `true` (for bundled) or a \
775                                 path string"
776                            ),
777                        ));
778                    }
779                }
780            }
781            Ok(SkillsSource::Sources(sources))
782        }
783        Some(_) => Err(ManifestError::at(
784            yaml_path,
785            "skills must be `false`, `true`, a path string, or a list of \
786             (true | path string) entries",
787        )),
788    }
789}
790
791fn build_extensions(
792    raw: Option<&serde_yaml::Value>,
793    yaml_path: &Path,
794) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
795    let Some(raw) = raw else {
796        return Ok(serde_json::Map::new());
797    };
798    if matches!(raw, serde_yaml::Value::Null) {
799        return Ok(serde_json::Map::new());
800    }
801    if !raw.is_mapping() {
802        return Err(ManifestError::at(
803            yaml_path,
804            "extensions must be a mapping (downstream-binary-specific keys)",
805        ));
806    }
807    match yaml_to_json(raw.clone())? {
808        serde_json::Value::Object(o) => Ok(o),
809        _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
810    }
811}
812
813fn build_workspace(
814    raw: Option<&serde_yaml::Value>,
815    yaml_path: &Path,
816) -> Result<Option<WorkspaceConfig>, ManifestError> {
817    let Some(raw) = raw else { return Ok(None) };
818    if matches!(raw, serde_yaml::Value::Null) {
819        return Ok(None);
820    }
821    let map = raw
822        .as_mapping()
823        .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
824    check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
825    let kind = match map.get("kind") {
826        None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
827        Some(serde_yaml::Value::String(s)) => match s.as_str() {
828            "github" => WorkspaceKind::Github,
829            "local" => WorkspaceKind::Local,
830            other => {
831                return Err(ManifestError::at(
832                    yaml_path,
833                    format!(
834                        "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
835                    ),
836                ));
837            }
838        },
839        Some(_) => {
840            return Err(ManifestError::at(
841                yaml_path,
842                format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
843            ))
844        }
845    };
846    let root = match map.get("root") {
847        None | Some(serde_yaml::Value::Null) => None,
848        Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
849        _ => {
850            return Err(ManifestError::at(
851                yaml_path,
852                "workspace.root must be a non-empty string",
853            ))
854        }
855    };
856    let watch = match map.get("watch") {
857        None | Some(serde_yaml::Value::Null) => false,
858        Some(serde_yaml::Value::Bool(b)) => *b,
859        Some(_) => {
860            return Err(ManifestError::at(
861                yaml_path,
862                "workspace.watch must be a bool",
863            ))
864        }
865    };
866    let applies_to =
867        match map.get("applies_to") {
868            None | Some(serde_yaml::Value::Null) => None,
869            Some(serde_yaml::Value::String(s)) => {
870                Some(AppliesTo::Pattern(parse_applies_to_pattern(s, yaml_path)?))
871            }
872            Some(serde_yaml::Value::Sequence(seq)) => {
873                if seq.is_empty() {
874                    return Err(ManifestError::at(
875                        yaml_path,
876                        "workspace.applies_to: list must contain at least one pattern",
877                    ));
878                }
879                let mut patterns = Vec::with_capacity(seq.len());
880                for (i, item) in seq.iter().enumerate() {
881                    let s = item.as_str().ok_or_else(|| {
882                        ManifestError::at(
883                            yaml_path,
884                            format!("workspace.applies_to[{i}] must be a string"),
885                        )
886                    })?;
887                    let cleaned = parse_applies_to_pattern(s, yaml_path).map_err(|e| {
888                        ManifestError::at(
889                            yaml_path,
890                            format!("workspace.applies_to[{i}]: {}", e.message),
891                        )
892                    })?;
893                    patterns.push(cleaned);
894                }
895                Some(AppliesTo::Patterns(patterns))
896            }
897            _ => return Err(ManifestError::at(
898                yaml_path,
899                "workspace.applies_to must be a non-empty string (a pattern) or a list of patterns",
900            )),
901        };
902    if kind == WorkspaceKind::Local && root.is_none() {
903        return Err(ManifestError::at(
904            yaml_path,
905            "workspace.kind: local requires workspace.root to be set",
906        ));
907    }
908    if kind == WorkspaceKind::Github && watch {
909        return Err(ManifestError::at(
910            yaml_path,
911            "workspace.watch is only valid with workspace.kind: local",
912        ));
913    }
914    Ok(Some(WorkspaceConfig {
915        kind,
916        root,
917        watch,
918        applies_to,
919    }))
920}
921
922/// Parse + validate a single ``workspace.applies_to`` entry. Accepts
923/// any glob pattern matching a single path segment (no embedded
924/// slashes, no `..`). The leading ``./`` is optional and stripped.
925/// Validates glob syntax via `globset::Glob::new` so invalid patterns
926/// surface clear errors at boot.
927///
928/// Returns the cleaned pattern string (without `./` prefix) on
929/// success.
930fn parse_applies_to_pattern(raw: &str, yaml_path: &Path) -> Result<String, ManifestError> {
931    let trimmed = raw.trim();
932    if trimmed.is_empty() {
933        return Err(ManifestError::at(
934            yaml_path,
935            "workspace.applies_to: pattern must not be empty",
936        ));
937    }
938    // Strip a single leading `./` for ergonomic equivalence between
939    // `./repos` and `repos`. Both forms commonly appear in operator
940    // muscle memory; normalise so storage + glob matching is uniform.
941    let stripped = trimmed.strip_prefix("./").unwrap_or(trimmed);
942    if stripped.is_empty() {
943        return Err(ManifestError::at(
944            yaml_path,
945            "workspace.applies_to: pattern must not be empty after stripping `./` prefix",
946        ));
947    }
948    if stripped.contains('/') {
949        return Err(ManifestError::at(
950            yaml_path,
951            format!(
952                "workspace.applies_to: pattern {raw:?} must be a single path segment \
953                 (no embedded `/`) — parent-walk discovery is bounded to one level"
954            ),
955        ));
956    }
957    if stripped == ".." || stripped.starts_with("../") {
958        return Err(ManifestError::at(
959            yaml_path,
960            format!("workspace.applies_to: pattern {raw:?} must not contain `..`"),
961        ));
962    }
963    if Path::new(stripped).is_absolute() {
964        return Err(ManifestError::at(
965            yaml_path,
966            format!("workspace.applies_to: pattern {raw:?} must be relative, not absolute"),
967        ));
968    }
969    // Validate glob syntax. Construct a Glob to surface any syntax
970    // errors immediately — we don't keep the compiled form (cheap to
971    // re-compile at match time, keeps `WorkspaceConfig` Clone-cheap).
972    globset::Glob::new(stripped).map_err(|e| {
973        ManifestError::at(
974            yaml_path,
975            format!("workspace.applies_to: invalid glob pattern {raw:?}: {e}"),
976        )
977    })?;
978    Ok(stripped.to_string())
979}
980
981fn check_keys(
982    map: &serde_yaml::Mapping,
983    allowed: &[&str],
984    label: &str,
985    yaml_path: &Path,
986) -> Result<(), ManifestError> {
987    let mut unknown: Vec<String> = Vec::new();
988    for (k, _) in map {
989        let key = k.as_str().unwrap_or("<non-string-key>");
990        if !allowed.contains(&key) {
991            unknown.push(key.to_string());
992        }
993    }
994    if !unknown.is_empty() {
995        unknown.sort();
996        return Err(ManifestError::at(
997            yaml_path,
998            format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
999        ));
1000    }
1001    Ok(())
1002}
1003
1004fn optional_str(
1005    raw: &serde_yaml::Mapping,
1006    key: &str,
1007    yaml_path: &Path,
1008) -> Result<Option<String>, ManifestError> {
1009    match raw.get(key) {
1010        None | Some(serde_yaml::Value::Null) => Ok(None),
1011        Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
1012        Some(_) => Err(ManifestError::at(
1013            yaml_path,
1014            format!("{key} must be a string"),
1015        )),
1016    }
1017}
1018
1019fn build_trust(
1020    raw: Option<&serde_yaml::Value>,
1021    yaml_path: &Path,
1022) -> Result<TrustConfig, ManifestError> {
1023    let Some(raw) = raw else {
1024        return Ok(TrustConfig::default());
1025    };
1026    let map = raw
1027        .as_mapping()
1028        .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
1029    check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
1030    let mut cfg = TrustConfig::default();
1031    if let Some(v) = map.get("allow_python_tools") {
1032        cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
1033            ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
1034        })?;
1035    }
1036    if let Some(v) = map.get("allow_embedder") {
1037        cfg.allow_embedder = v
1038            .as_bool()
1039            .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
1040    }
1041    Ok(cfg)
1042}
1043
1044fn build_tools(
1045    raw: Option<&serde_yaml::Value>,
1046    yaml_path: &Path,
1047) -> Result<Vec<ToolSpec>, ManifestError> {
1048    let Some(raw) = raw else {
1049        return Ok(Vec::new());
1050    };
1051    let seq = raw
1052        .as_sequence()
1053        .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
1054    let mut tools: Vec<ToolSpec> = Vec::new();
1055    let mut seen: BTreeMap<String, ()> = BTreeMap::new();
1056    for (i, entry) in seq.iter().enumerate() {
1057        let tool = build_tool(entry, i, yaml_path)?;
1058        let name = tool.name().to_string();
1059        if seen.insert(name.clone(), ()).is_some() {
1060            return Err(ManifestError::at(
1061                yaml_path,
1062                format!("duplicate tool name: {name:?}"),
1063            ));
1064        }
1065        tools.push(tool);
1066    }
1067    Ok(tools)
1068}
1069
1070fn build_tool(
1071    entry: &serde_yaml::Value,
1072    idx: usize,
1073    yaml_path: &Path,
1074) -> Result<ToolSpec, ManifestError> {
1075    let map = entry
1076        .as_mapping()
1077        .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
1078    check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
1079
1080    // Kind detection. `cypher` and `python` are tool-creation kinds
1081    // (operator declares a new named tool); `bundled` is a tool-
1082    // override kind (operator picks a bundled tool name and customises
1083    // its agent-facing surface). Exactly one must be present.
1084    let has_cypher = map.contains_key("cypher");
1085    let has_python = map.contains_key("python");
1086    let has_bundled = map.contains_key("bundled");
1087    let kinds_present: Vec<&str> = [
1088        ("cypher", has_cypher),
1089        ("python", has_python),
1090        ("bundled", has_bundled),
1091    ]
1092    .into_iter()
1093    .filter(|(_, p)| *p)
1094    .map(|(k, _)| k)
1095    .collect();
1096    if kinds_present.is_empty() {
1097        return Err(ManifestError::at(
1098            yaml_path,
1099            format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
1100        ));
1101    }
1102    if kinds_present.len() > 1 {
1103        return Err(ManifestError::at(
1104            yaml_path,
1105            format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
1106        ));
1107    }
1108
1109    // The `bundled` kind takes its name from the `bundled:` value
1110    // itself (e.g. `bundled: cypher_query`) and forbids the
1111    // tool-creation fields. Branch early so we don't run the
1112    // tool-creation `name:` requirement against an override entry.
1113    if has_bundled {
1114        return build_bundled_override(map, idx, yaml_path);
1115    }
1116
1117    let name = map
1118        .get("name")
1119        .and_then(|v| v.as_str())
1120        .filter(|s| valid_identifier(s))
1121        .ok_or_else(|| {
1122            ManifestError::at(
1123                yaml_path,
1124                format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
1125            )
1126        })?
1127        .to_string();
1128
1129    // `hidden:` is only valid on bundled overrides (`hidden:`-flagging
1130    // a tool you're declaring inline doesn't make sense — just don't
1131    // declare it). Reject early so the operator gets a clear error.
1132    if map.contains_key("hidden") {
1133        return Err(ManifestError::at(
1134            yaml_path,
1135            format!(
1136                "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
1137            ),
1138        ));
1139    }
1140
1141    let description = match map.get("description") {
1142        None | Some(serde_yaml::Value::Null) => None,
1143        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1144        Some(_) => {
1145            return Err(ManifestError::at(
1146                yaml_path,
1147                format!("tools[{idx}] ({name:?}).description must be a string"),
1148            ))
1149        }
1150    };
1151
1152    let parameters = match map.get("parameters") {
1153        None | Some(serde_yaml::Value::Null) => None,
1154        Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
1155        Some(_) => {
1156            return Err(ManifestError::at(
1157                yaml_path,
1158                format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
1159            ))
1160        }
1161    };
1162
1163    if has_cypher {
1164        let cypher = map
1165            .get("cypher")
1166            .and_then(|v| v.as_str())
1167            .filter(|s| !s.trim().is_empty())
1168            .ok_or_else(|| {
1169                ManifestError::at(
1170                    yaml_path,
1171                    format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
1172                )
1173            })?
1174            .to_string();
1175        return Ok(ToolSpec::Cypher(CypherTool {
1176            name,
1177            cypher,
1178            description,
1179            parameters,
1180        }));
1181    }
1182
1183    // python tool
1184    let python = map
1185        .get("python")
1186        .and_then(|v| v.as_str())
1187        .filter(|s| !s.is_empty())
1188        .ok_or_else(|| {
1189            ManifestError::at(
1190                yaml_path,
1191                format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
1192            )
1193        })?
1194        .to_string();
1195    let function = map
1196        .get("function")
1197        .and_then(|v| v.as_str())
1198        .filter(|s| valid_identifier(s))
1199        .ok_or_else(|| {
1200            ManifestError::at(
1201                yaml_path,
1202                format!(
1203                    "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
1204                ),
1205            )
1206        })?
1207        .to_string();
1208    Ok(ToolSpec::Python(PythonTool {
1209        name,
1210        python,
1211        function,
1212        description,
1213        parameters,
1214    }))
1215}
1216
1217/// Parse a `bundled:` override entry from `tools[idx]`. The caller
1218/// (`build_tool`) has already established that the entry has
1219/// `bundled:` set as the kind discriminator.
1220fn build_bundled_override(
1221    map: &serde_yaml::Mapping,
1222    idx: usize,
1223    yaml_path: &Path,
1224) -> Result<ToolSpec, ManifestError> {
1225    let name = map
1226        .get("bundled")
1227        .and_then(|v| v.as_str())
1228        .filter(|s| valid_identifier(s))
1229        .ok_or_else(|| {
1230            ManifestError::at(
1231                yaml_path,
1232                format!(
1233                    "tools[{idx}] `bundled:` must be a string naming a bundled tool \
1234                     (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
1235                ),
1236            )
1237        })?
1238        .to_string();
1239
1240    // Tool-creation fields are forbidden on override entries — the
1241    // override only customises an existing bundled tool's surface,
1242    // it doesn't declare a new tool. Catch these at parse time so
1243    // operators get a clear error rather than silent confusion.
1244    for forbidden in ["name", "parameters", "function"] {
1245        if map.contains_key(forbidden) {
1246            return Err(ManifestError::at(
1247                yaml_path,
1248                format!(
1249                    "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
1250                     (only `description:`, `hidden:`, and `rename:` are permitted on overrides)"
1251                ),
1252            ));
1253        }
1254    }
1255
1256    let description = match map.get("description") {
1257        None | Some(serde_yaml::Value::Null) => None,
1258        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1259        Some(_) => {
1260            return Err(ManifestError::at(
1261                yaml_path,
1262                format!("tools[{idx}] bundled override {name:?}.description must be a string"),
1263            ))
1264        }
1265    };
1266
1267    let hidden = match map.get("hidden") {
1268        None | Some(serde_yaml::Value::Null) => false,
1269        Some(serde_yaml::Value::Bool(b)) => *b,
1270        Some(_) => {
1271            return Err(ManifestError::at(
1272                yaml_path,
1273                format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
1274            ))
1275        }
1276    };
1277
1278    // 0.3.34: optional per-deployment rename. Validated as an
1279    // identifier here; cross-tool collision check is the consumer's
1280    // job (it knows what other names — bundled, cypher, python — it
1281    // has in scope).
1282    let rename = match map.get("rename") {
1283        None | Some(serde_yaml::Value::Null) => None,
1284        Some(serde_yaml::Value::String(s)) => {
1285            if !valid_identifier(s) {
1286                return Err(ManifestError::at(
1287                    yaml_path,
1288                    format!(
1289                        "tools[{idx}] bundled override {name:?}.rename must be a valid identifier \
1290                         (^[a-zA-Z_][a-zA-Z0-9_]*$), got {s:?}"
1291                    ),
1292                ));
1293            }
1294            Some(s.clone())
1295        }
1296        Some(_) => {
1297            return Err(ManifestError::at(
1298                yaml_path,
1299                format!("tools[{idx}] bundled override {name:?}.rename must be a string"),
1300            ))
1301        }
1302    };
1303
1304    Ok(ToolSpec::Bundled(BundledOverride {
1305        name,
1306        description,
1307        hidden,
1308        rename,
1309    }))
1310}
1311
1312fn build_embedder(
1313    raw: Option<&serde_yaml::Value>,
1314    yaml_path: &Path,
1315) -> Result<Option<EmbedderConfig>, ManifestError> {
1316    let Some(raw) = raw else { return Ok(None) };
1317    if matches!(raw, serde_yaml::Value::Null) {
1318        return Ok(None);
1319    }
1320    let map = raw
1321        .as_mapping()
1322        .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
1323    check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
1324    let module = map
1325        .get("module")
1326        .and_then(|v| v.as_str())
1327        .filter(|s| !s.is_empty())
1328        .ok_or_else(|| {
1329            ManifestError::at(
1330                yaml_path,
1331                "embedder.module must be a non-empty string (path or dotted name)",
1332            )
1333        })?
1334        .to_string();
1335    let class = map
1336        .get("class")
1337        .and_then(|v| v.as_str())
1338        .filter(|s| valid_identifier(s))
1339        .ok_or_else(|| {
1340            ManifestError::at(
1341                yaml_path,
1342                "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1343            )
1344        })?
1345        .to_string();
1346    let kwargs = match map.get("kwargs") {
1347        None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1348        Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1349            serde_json::Value::Object(o) => o,
1350            _ => {
1351                return Err(ManifestError::at(
1352                    yaml_path,
1353                    "embedder.kwargs must be a mapping",
1354                ))
1355            }
1356        },
1357        Some(_) => {
1358            return Err(ManifestError::at(
1359                yaml_path,
1360                "embedder.kwargs must be a mapping",
1361            ))
1362        }
1363    };
1364    Ok(Some(EmbedderConfig {
1365        module,
1366        class,
1367        kwargs,
1368    }))
1369}
1370
1371fn build_builtins(
1372    raw: Option<&serde_yaml::Value>,
1373    yaml_path: &Path,
1374) -> Result<BuiltinsConfig, ManifestError> {
1375    let Some(raw) = raw else {
1376        return Ok(BuiltinsConfig::default());
1377    };
1378    if matches!(raw, serde_yaml::Value::Null) {
1379        return Ok(BuiltinsConfig::default());
1380    }
1381    let map = raw
1382        .as_mapping()
1383        .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1384    check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1385    let mut cfg = BuiltinsConfig::default();
1386    if let Some(v) = map.get("save_graph") {
1387        cfg.save_graph = v
1388            .as_bool()
1389            .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1390    }
1391    if let Some(v) = map.get("screen_stargazers") {
1392        cfg.screen_stargazers = v.as_bool().ok_or_else(|| {
1393            ManifestError::at(yaml_path, "builtins.screen_stargazers must be a bool")
1394        })?;
1395    }
1396    if let Some(v) = map.get("temp_cleanup") {
1397        let s = v.as_str().ok_or_else(|| {
1398            ManifestError::at(
1399                yaml_path,
1400                format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1401            )
1402        })?;
1403        cfg.temp_cleanup = match s {
1404            "never" => TempCleanup::Never,
1405            "on_overview" => TempCleanup::OnOverview,
1406            other => {
1407                return Err(ManifestError::at(
1408                    yaml_path,
1409                    format!(
1410                        "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1411                    ),
1412                ))
1413            }
1414        };
1415    }
1416    Ok(cfg)
1417}
1418
1419fn valid_identifier(s: &str) -> bool {
1420    let mut chars = s.chars();
1421    match chars.next() {
1422        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1423        _ => return false,
1424    }
1425    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1426}
1427
1428fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1429    serde_json::to_value(&v)
1430        .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1431}
1432
1433#[derive(Debug, Deserialize)]
1434struct _Reserved;
1435
1436#[cfg(test)]
1437mod tests {
1438    use super::*;
1439
1440    fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1441        let mut f = tempfile::NamedTempFile::new().unwrap();
1442        std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1443        f
1444    }
1445
1446    #[test]
1447    fn loads_minimal_empty_manifest() {
1448        let f = write_tmp("");
1449        let m = load(f.path()).unwrap();
1450        assert_eq!(m.tools.len(), 0);
1451        assert_eq!(m.source_roots.len(), 0);
1452        assert!(!m.trust.allow_python_tools);
1453        assert!(!m.trust.allow_embedder);
1454        assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1455    }
1456
1457    #[test]
1458    fn loads_name_and_instructions() {
1459        let f = write_tmp("name: Demo\ninstructions: |\n  multi-line\n  block\n");
1460        let m = load(f.path()).unwrap();
1461        assert_eq!(m.name.as_deref(), Some("Demo"));
1462        assert!(m.instructions.unwrap().contains("multi-line"));
1463    }
1464
1465    #[test]
1466    fn rejects_unknown_top_key() {
1467        let f = write_tmp("bogus: 1\n");
1468        let err = load(f.path()).unwrap_err();
1469        assert!(err.message.contains("unknown top-level"));
1470    }
1471
1472    #[test]
1473    fn source_root_string_normalises_to_list() {
1474        let f = write_tmp("source_root: ./data\n");
1475        let m = load(f.path()).unwrap();
1476        assert_eq!(m.source_roots, vec!["./data".to_string()]);
1477    }
1478
1479    #[test]
1480    fn source_roots_list_preserved() {
1481        let f = write_tmp("source_roots:\n  - ./a\n  - ./b\n");
1482        let m = load(f.path()).unwrap();
1483        assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1484    }
1485
1486    #[test]
1487    fn rejects_both_source_root_and_source_roots() {
1488        let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1489        assert!(load(f.path()).unwrap_err().message.contains("not both"));
1490    }
1491
1492    #[test]
1493    fn cypher_tool_parses() {
1494        let f = write_tmp("tools:\n  - name: lookup\n    cypher: MATCH (n) RETURN n\n");
1495        let m = load(f.path()).unwrap();
1496        assert_eq!(m.tools.len(), 1);
1497        match &m.tools[0] {
1498            ToolSpec::Cypher(t) => {
1499                assert_eq!(t.name, "lookup");
1500                assert!(t.cypher.contains("MATCH"));
1501            }
1502            _ => panic!("expected cypher tool"),
1503        }
1504    }
1505
1506    #[test]
1507    fn python_tool_parses() {
1508        let f =
1509            write_tmp("tools:\n  - name: detail\n    python: ./tools.py\n    function: detail\n");
1510        let m = load(f.path()).unwrap();
1511        match &m.tools[0] {
1512            ToolSpec::Python(t) => {
1513                assert_eq!(t.python, "./tools.py");
1514                assert_eq!(t.function, "detail");
1515            }
1516            _ => panic!("expected python tool"),
1517        }
1518    }
1519
1520    #[test]
1521    fn rejects_tool_with_both_kinds() {
1522        let f = write_tmp(
1523            "tools:\n  - name: x\n    cypher: 'MATCH (n) RETURN n'\n    python: ./t.py\n    function: x\n",
1524        );
1525        assert!(load(f.path())
1526            .unwrap_err()
1527            .message
1528            .contains("multiple kinds"));
1529    }
1530
1531    #[test]
1532    fn rejects_tool_with_no_kind() {
1533        let f = write_tmp("tools:\n  - name: x\n");
1534        assert!(load(f.path())
1535            .unwrap_err()
1536            .message
1537            .contains("needs exactly one"));
1538    }
1539
1540    #[test]
1541    fn rejects_duplicate_tool_names() {
1542        let f = write_tmp(
1543            "tools:\n  - name: same\n    cypher: 'MATCH (n) RETURN n'\n  - name: same\n    cypher: 'MATCH (m) RETURN m'\n",
1544        );
1545        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1546    }
1547
1548    // ─── Bundled override shape (0.3.31) ────────────────────────
1549
1550    #[test]
1551    fn bundled_override_with_description_parses() {
1552        let f =
1553            write_tmp("tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n");
1554        let m = load(f.path()).unwrap();
1555        assert_eq!(m.tools.len(), 1);
1556        match &m.tools[0] {
1557            ToolSpec::Bundled(b) => {
1558                assert_eq!(b.name, "repo_management");
1559                assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1560                assert!(!b.hidden);
1561            }
1562            _ => panic!("expected bundled override"),
1563        }
1564    }
1565
1566    #[test]
1567    fn bundled_override_with_hidden_parses() {
1568        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: true\n");
1569        let m = load(f.path()).unwrap();
1570        match &m.tools[0] {
1571            ToolSpec::Bundled(b) => {
1572                assert_eq!(b.name, "ping");
1573                assert!(b.hidden);
1574                assert!(b.description.is_none());
1575            }
1576            _ => panic!("expected bundled override"),
1577        }
1578    }
1579
1580    #[test]
1581    fn bundled_override_alongside_cypher_tools_parses() {
1582        let f = write_tmp(
1583            "tools:\n\
1584             \x20\x20- bundled: cypher_query\n\
1585             \x20\x20\x20\x20description: \"Custom server description\"\n\
1586             \x20\x20- name: lookup\n\
1587             \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1588        );
1589        let m = load(f.path()).unwrap();
1590        assert_eq!(m.tools.len(), 2);
1591        assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1592        assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1593    }
1594
1595    #[test]
1596    fn rejects_bundled_with_cypher_kind() {
1597        let f =
1598            write_tmp("tools:\n  - bundled: cypher_query\n    cypher: \"MATCH (n) RETURN n\"\n");
1599        let err = load(f.path()).unwrap_err();
1600        assert!(
1601            err.message.contains("multiple kinds"),
1602            "got: {}",
1603            err.message
1604        );
1605    }
1606
1607    #[test]
1608    fn rejects_bundled_with_name_field() {
1609        let f = write_tmp("tools:\n  - bundled: ping\n    name: ping\n");
1610        let err = load(f.path()).unwrap_err();
1611        assert!(
1612            err.message.contains("cannot set `name:`"),
1613            "got: {}",
1614            err.message
1615        );
1616    }
1617
1618    #[test]
1619    fn rejects_bundled_with_parameters_field() {
1620        let f =
1621            write_tmp("tools:\n  - bundled: cypher_query\n    parameters:\n      type: object\n");
1622        let err = load(f.path()).unwrap_err();
1623        assert!(
1624            err.message.contains("cannot set `parameters:`"),
1625            "got: {}",
1626            err.message
1627        );
1628    }
1629
1630    #[test]
1631    fn rejects_bundled_with_non_bool_hidden() {
1632        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: yes-please\n");
1633        let err = load(f.path()).unwrap_err();
1634        assert!(
1635            err.message.contains("hidden must be a bool"),
1636            "got: {}",
1637            err.message
1638        );
1639    }
1640
1641    #[test]
1642    fn rejects_hidden_on_cypher_tool() {
1643        let f = write_tmp(
1644            "tools:\n  - name: lookup\n    cypher: \"MATCH (n) RETURN n\"\n    hidden: true\n",
1645        );
1646        let err = load(f.path()).unwrap_err();
1647        assert!(
1648            err.message
1649                .contains("`hidden:` is only valid on `bundled:` override entries"),
1650            "got: {}",
1651            err.message
1652        );
1653    }
1654
1655    #[test]
1656    fn rejects_duplicate_bundled_overrides() {
1657        // The dedup check is on tool name; two `bundled: ping` entries
1658        // share the same name and should be rejected the same way
1659        // duplicate cypher tools are.
1660        let f = write_tmp(
1661            "tools:\n  - bundled: ping\n    hidden: true\n  - bundled: ping\n    description: \"x\"\n",
1662        );
1663        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1664    }
1665
1666    #[test]
1667    fn rejects_bundled_with_invalid_identifier() {
1668        let f = write_tmp("tools:\n  - bundled: \"123-bad\"\n    hidden: true\n");
1669        let err = load(f.path()).unwrap_err();
1670        assert!(
1671            err.message.contains("must be a string"),
1672            "got: {}",
1673            err.message
1674        );
1675    }
1676
1677    // 0.3.34 — `tools[].bundled: rename:` per-deployment override
1678    #[test]
1679    fn bundled_rename_parses_when_valid_identifier() {
1680        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n");
1681        let m = load(f.path()).unwrap();
1682        match &m.tools[0] {
1683            ToolSpec::Bundled(b) => {
1684                assert_eq!(b.name, "cypher_query");
1685                assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1686                assert!(!b.hidden);
1687                assert!(b.description.is_none());
1688            }
1689            _ => panic!("expected bundled override"),
1690        }
1691    }
1692
1693    #[test]
1694    fn bundled_rename_alongside_description_parses() {
1695        let f = write_tmp(
1696            "tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n    description: \"Legal-corpus cypher\"\n",
1697        );
1698        let m = load(f.path()).unwrap();
1699        match &m.tools[0] {
1700            ToolSpec::Bundled(b) => {
1701                assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1702                assert_eq!(b.description.as_deref(), Some("Legal-corpus cypher"));
1703            }
1704            _ => panic!("expected bundled override"),
1705        }
1706    }
1707
1708    #[test]
1709    fn bundled_rename_defaults_to_none() {
1710        let f = write_tmp("tools:\n  - bundled: cypher_query\n    description: \"x\"\n");
1711        let m = load(f.path()).unwrap();
1712        match &m.tools[0] {
1713            ToolSpec::Bundled(b) => assert!(b.rename.is_none()),
1714            _ => panic!("expected bundled override"),
1715        }
1716    }
1717
1718    #[test]
1719    fn rejects_bundled_rename_with_invalid_identifier() {
1720        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: \"123-bad\"\n");
1721        let err = load(f.path()).unwrap_err();
1722        assert!(
1723            err.message.contains("rename must be a valid identifier"),
1724            "got: {}",
1725            err.message
1726        );
1727    }
1728
1729    #[test]
1730    fn rejects_bundled_rename_with_non_string_value() {
1731        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: 42\n");
1732        let err = load(f.path()).unwrap_err();
1733        assert!(
1734            err.message.contains("rename must be a string"),
1735            "got: {}",
1736            err.message
1737        );
1738    }
1739
1740    #[test]
1741    fn bundled_rename_serialises_to_json() {
1742        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n");
1743        let m = load(f.path()).unwrap();
1744        let json = m.to_json();
1745        let tools = json.get("tools").and_then(|t| t.as_array()).unwrap();
1746        let entry = &tools[0];
1747        assert_eq!(entry.get("kind").and_then(|v| v.as_str()), Some("bundled"));
1748        assert_eq!(
1749            entry.get("name").and_then(|v| v.as_str()),
1750            Some("cypher_query")
1751        );
1752        assert_eq!(
1753            entry.get("rename").and_then(|v| v.as_str()),
1754            Some("legal_cypher_query")
1755        );
1756    }
1757
1758    #[test]
1759    fn bundled_override_to_json_shape() {
1760        let f = write_tmp(
1761            "tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n    hidden: false\n",
1762        );
1763        let m = load(f.path()).unwrap();
1764        let v = m.to_json();
1765        assert_eq!(v["tools"][0]["kind"], "bundled");
1766        assert_eq!(v["tools"][0]["name"], "repo_management");
1767        assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1768        assert_eq!(v["tools"][0]["hidden"], false);
1769    }
1770
1771    #[test]
1772    fn embedder_parses() {
1773        let f = write_tmp(
1774            "embedder:\n  module: ./e.py\n  class: GraphEmbedder\n  kwargs:\n    cooldown: 900\n",
1775        );
1776        let m = load(f.path()).unwrap();
1777        let e = m.embedder.unwrap();
1778        assert_eq!(e.module, "./e.py");
1779        assert_eq!(e.class, "GraphEmbedder");
1780        assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1781    }
1782
1783    #[test]
1784    fn builtins_parses_temp_cleanup() {
1785        let f = write_tmp("builtins:\n  save_graph: true\n  temp_cleanup: on_overview\n");
1786        let m = load(f.path()).unwrap();
1787        assert!(m.builtins.save_graph);
1788        assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1789    }
1790
1791    #[test]
1792    fn rejects_invalid_temp_cleanup() {
1793        let f = write_tmp("builtins:\n  temp_cleanup: nuke\n");
1794        assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1795    }
1796
1797    #[test]
1798    fn allow_embedder_trust_parses() {
1799        let f = write_tmp("trust:\n  allow_embedder: true\n");
1800        let m = load(f.path()).unwrap();
1801        assert!(m.trust.allow_embedder);
1802    }
1803
1804    #[test]
1805    fn retired_allow_query_preprocessor_is_rejected_as_unknown() {
1806        // Retired in 0.3.43: the gate's sole consumer (kglite) removed the
1807        // preprocessor extension, so the strict validator now treats the key
1808        // as any other unknown trust key rather than carrying dead surface.
1809        let f = write_tmp("trust:\n  allow_query_preprocessor: true\n");
1810        let err = load(f.path()).unwrap_err();
1811        assert!(err.message.contains("trust keys"));
1812        assert!(err.message.contains("allow_query_preprocessor"));
1813    }
1814
1815    #[test]
1816    fn find_sibling_works() {
1817        let dir = tempfile::tempdir().unwrap();
1818        let graph = dir.path().join("demo.kgl");
1819        std::fs::write(&graph, b"\x00").unwrap();
1820        let sibling = dir.path().join("demo_mcp.yaml");
1821        std::fs::write(&sibling, "name: x\n").unwrap();
1822        assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1823    }
1824
1825    #[test]
1826    fn workspace_local_parses() {
1827        let f = write_tmp("workspace:\n  kind: local\n  root: ./src\n  watch: true\n");
1828        let m = load(f.path()).unwrap();
1829        let w = m.workspace.unwrap();
1830        assert_eq!(w.kind, WorkspaceKind::Local);
1831        assert_eq!(w.root.as_deref(), Some("./src"));
1832        assert!(w.watch);
1833    }
1834
1835    #[test]
1836    fn workspace_github_default_kind() {
1837        let f = write_tmp("workspace: {}\n");
1838        let m = load(f.path()).unwrap();
1839        let w = m.workspace.unwrap();
1840        assert_eq!(w.kind, WorkspaceKind::Github);
1841        assert!(w.root.is_none());
1842        assert!(!w.watch);
1843    }
1844
1845    #[test]
1846    fn workspace_local_without_root_errors() {
1847        let f = write_tmp("workspace:\n  kind: local\n");
1848        let err = load(f.path()).unwrap_err();
1849        assert!(err.message.contains("requires workspace.root"));
1850    }
1851
1852    #[test]
1853    fn workspace_unknown_key_rejected() {
1854        let f = write_tmp("workspace:\n  kind: local\n  root: ./x\n  bogus: 1\n");
1855        let err = load(f.path()).unwrap_err();
1856        assert!(err.message.contains("unknown workspace keys"));
1857    }
1858
1859    #[test]
1860    fn workspace_invalid_kind_rejected() {
1861        let f = write_tmp("workspace:\n  kind: docker\n  root: ./x\n");
1862        let err = load(f.path()).unwrap_err();
1863        assert!(err.message.contains("workspace.kind"));
1864    }
1865
1866    #[test]
1867    fn workspace_watch_invalid_for_github() {
1868        let f = write_tmp("workspace:\n  kind: github\n  watch: true\n");
1869        let err = load(f.path()).unwrap_err();
1870        assert!(err.message.contains("watch is only valid"));
1871    }
1872
1873    #[test]
1874    fn extensions_passthrough_parses() {
1875        let f = write_tmp(
1876            "extensions:\n  csv_http_server: true\n  csv_http_server_dir: temp/\n  arbitrary:\n    nested: 1\n",
1877        );
1878        let m = load(f.path()).unwrap();
1879        assert_eq!(
1880            m.extensions
1881                .get("csv_http_server")
1882                .and_then(|v| v.as_bool()),
1883            Some(true)
1884        );
1885        assert_eq!(
1886            m.extensions
1887                .get("csv_http_server_dir")
1888                .and_then(|v| v.as_str()),
1889            Some("temp/")
1890        );
1891        // Nested values pass through unchanged.
1892        assert_eq!(
1893            m.extensions
1894                .get("arbitrary")
1895                .and_then(|v| v.get("nested"))
1896                .and_then(|v| v.as_i64()),
1897            Some(1)
1898        );
1899    }
1900
1901    #[test]
1902    fn extensions_absent_defaults_to_empty() {
1903        let f = write_tmp("name: x\n");
1904        let m = load(f.path()).unwrap();
1905        assert!(m.extensions.is_empty());
1906    }
1907
1908    #[test]
1909    fn extensions_inner_keys_unvalidated() {
1910        // The framework intentionally does NOT validate keys inside
1911        // `extensions:` — they're downstream-binary concerns. Any shape
1912        // that's a YAML mapping must round-trip.
1913        let f = write_tmp(
1914            "extensions:\n  whatever_kglite_wants: foo\n  some_other_consumer: { a: 1, b: 2 }\n",
1915        );
1916        load(f.path()).unwrap();
1917    }
1918
1919    #[test]
1920    fn extensions_must_be_a_mapping() {
1921        let f = write_tmp("extensions: not-a-mapping\n");
1922        let err = load(f.path()).unwrap_err();
1923        assert!(err.message.contains("extensions must be a mapping"));
1924    }
1925
1926    #[test]
1927    fn env_file_key_parses() {
1928        let f = write_tmp("env_file: ../.env\n");
1929        let m = load(f.path()).unwrap();
1930        assert_eq!(m.env_file.as_deref(), Some("../.env"));
1931    }
1932
1933    #[test]
1934    fn env_file_unset_is_none() {
1935        let f = write_tmp("name: Demo\n");
1936        let m = load(f.path()).unwrap();
1937        assert!(m.env_file.is_none());
1938    }
1939
1940    #[test]
1941    fn find_workspace_works() {
1942        let dir = tempfile::tempdir().unwrap();
1943        let manifest = dir.path().join("workspace_mcp.yaml");
1944        std::fs::write(&manifest, "name: ws\n").unwrap();
1945        assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1946    }
1947
1948    #[test]
1949    fn find_workspace_walks_one_level_up_with_applies_to() {
1950        // Layout: <tmp>/parent/workspace_mcp.yaml (declares
1951        // workspace.applies_to: ./repos) + <tmp>/parent/repos/.
1952        // Discovery from <tmp>/parent/repos/ should walk up one level
1953        // and find the sibling manifest because applies_to matches.
1954        let dir = tempfile::tempdir().unwrap();
1955        let parent = dir.path().join("parent");
1956        std::fs::create_dir(&parent).unwrap();
1957        let manifest = parent.join("workspace_mcp.yaml");
1958        std::fs::write(
1959            &manifest,
1960            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1961        )
1962        .unwrap();
1963        let repos = parent.join("repos");
1964        std::fs::create_dir(&repos).unwrap();
1965
1966        // Primary location still works.
1967        assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1968
1969        // Parent-walk fallback resolves to the same manifest. Compare
1970        // canonicalised paths to handle macOS /private/var vs /var.
1971        let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1972        assert_eq!(
1973            found.canonicalize().unwrap(),
1974            manifest.canonicalize().unwrap()
1975        );
1976    }
1977
1978    #[test]
1979    fn find_workspace_ignores_parent_without_applies_to() {
1980        // Parent manifest exists but does NOT declare workspace.applies_to.
1981        // The parent-walk fallback must refuse to auto-detect it —
1982        // otherwise an unrelated workspace_mcp.yaml in a sibling dir
1983        // could surprise-attach to whatever --workspace path the
1984        // operator passes. Safe default: require the opt-in.
1985        let dir = tempfile::tempdir().unwrap();
1986        let parent = dir.path().join("parent");
1987        std::fs::create_dir(&parent).unwrap();
1988        let manifest = parent.join("workspace_mcp.yaml");
1989        std::fs::write(&manifest, "name: not for repos\n").unwrap();
1990        let repos = parent.join("repos");
1991        std::fs::create_dir(&repos).unwrap();
1992
1993        assert_eq!(
1994            find_workspace_manifest(&repos),
1995            None,
1996            "parent manifest without workspace.applies_to must NOT auto-attach"
1997        );
1998    }
1999
2000    #[test]
2001    fn find_workspace_ignores_parent_with_mismatched_applies_to() {
2002        // Parent manifest declares applies_to: ./repos but the
2003        // actual --workspace path is ./other_dir. The mismatch must
2004        // suppress auto-detection.
2005        let dir = tempfile::tempdir().unwrap();
2006        let parent = dir.path().join("parent");
2007        std::fs::create_dir(&parent).unwrap();
2008        let manifest = parent.join("workspace_mcp.yaml");
2009        std::fs::write(
2010            &manifest,
2011            "workspace:\n  kind: github\n  applies_to: ./repos\n",
2012        )
2013        .unwrap();
2014        let other = parent.join("other_dir");
2015        std::fs::create_dir(&other).unwrap();
2016
2017        assert_eq!(
2018            find_workspace_manifest(&other),
2019            None,
2020            "applies_to: ./repos must NOT match --workspace ./other_dir"
2021        );
2022    }
2023
2024    #[test]
2025    fn find_workspace_applies_to_wildcard_matches_any_child() {
2026        // applies_to: '*' (or './*') means "any direct child of the
2027        // manifest's parent dir." Three different child names should
2028        // all auto-detect the manifest.
2029        let dir = tempfile::tempdir().unwrap();
2030        let parent = dir.path().join("parent");
2031        std::fs::create_dir(&parent).unwrap();
2032        let manifest = parent.join("workspace_mcp.yaml");
2033        std::fs::write(&manifest, "workspace:\n  kind: github\n  applies_to: '*'\n").unwrap();
2034        for child_name in ["repos", "clones", "totally-different-name"] {
2035            let child = parent.join(child_name);
2036            std::fs::create_dir(&child).unwrap();
2037            let found =
2038                find_workspace_manifest(&child).expect("wildcard should match any direct child");
2039            assert_eq!(
2040                found.canonicalize().unwrap(),
2041                manifest.canonicalize().unwrap(),
2042                "wildcard should match child {child_name:?}"
2043            );
2044        }
2045    }
2046
2047    #[test]
2048    fn find_workspace_applies_to_glob_matches_prefix() {
2049        // applies_to: './prod-*' should match any direct child whose
2050        // basename starts with "prod-".
2051        let dir = tempfile::tempdir().unwrap();
2052        let parent = dir.path().join("parent");
2053        std::fs::create_dir(&parent).unwrap();
2054        let manifest = parent.join("workspace_mcp.yaml");
2055        std::fs::write(
2056            &manifest,
2057            "workspace:\n  kind: github\n  applies_to: ./prod-*\n",
2058        )
2059        .unwrap();
2060        // Match cases.
2061        for child_name in ["prod-api", "prod-web", "prod-"] {
2062            let child = parent.join(child_name);
2063            std::fs::create_dir(&child).unwrap();
2064            assert!(
2065                find_workspace_manifest(&child).is_some(),
2066                "prod-* should match {child_name:?}"
2067            );
2068        }
2069        // Non-match cases.
2070        for child_name in ["test-api", "stage-web", "random"] {
2071            let child = parent.join(child_name);
2072            std::fs::create_dir(&child).unwrap();
2073            assert_eq!(
2074                find_workspace_manifest(&child),
2075                None,
2076                "prod-* should NOT match {child_name:?}"
2077            );
2078        }
2079    }
2080
2081    #[test]
2082    fn find_workspace_applies_to_list_matches_any_entry() {
2083        // applies_to: [./repos, ./clones] should match either name
2084        // but reject anything else.
2085        let dir = tempfile::tempdir().unwrap();
2086        let parent = dir.path().join("parent");
2087        std::fs::create_dir(&parent).unwrap();
2088        let manifest = parent.join("workspace_mcp.yaml");
2089        std::fs::write(
2090            &manifest,
2091            "workspace:\n  kind: github\n  applies_to:\n    - ./repos\n    - ./clones\n",
2092        )
2093        .unwrap();
2094        for matching in ["repos", "clones"] {
2095            let child = parent.join(matching);
2096            std::fs::create_dir(&child).unwrap();
2097            assert!(
2098                find_workspace_manifest(&child).is_some(),
2099                "list should match {matching:?}"
2100            );
2101        }
2102        let other = parent.join("scratch");
2103        std::fs::create_dir(&other).unwrap();
2104        assert_eq!(
2105            find_workspace_manifest(&other),
2106            None,
2107            "list with [repos, clones] must NOT match scratch"
2108        );
2109    }
2110
2111    #[test]
2112    fn applies_to_rejects_deep_path_at_parse_time() {
2113        let f = write_tmp("workspace:\n  kind: github\n  applies_to: ./too/deep/path\n");
2114        let err = load(f.path()).unwrap_err();
2115        assert!(
2116            err.message.contains("must be a single path segment"),
2117            "got: {}",
2118            err.message
2119        );
2120    }
2121
2122    #[test]
2123    fn applies_to_rejects_invalid_glob_at_parse_time() {
2124        // globset rejects unterminated character class.
2125        let f = write_tmp("workspace:\n  kind: github\n  applies_to: './[unterminated'\n");
2126        let err = load(f.path()).unwrap_err();
2127        assert!(
2128            err.message.contains("invalid glob pattern"),
2129            "got: {}",
2130            err.message
2131        );
2132    }
2133
2134    #[test]
2135    fn applies_to_rejects_parent_relative() {
2136        // Bare `..` is caught by the `..` rejection branch. The
2137        // multi-segment form `../foo` is caught earlier by the
2138        // single-segment check; either is rejected.
2139        let f = write_tmp("workspace:\n  kind: github\n  applies_to: '..'\n");
2140        let err = load(f.path()).unwrap_err();
2141        assert!(err.message.contains("must not contain `..`"));
2142
2143        let f2 = write_tmp("workspace:\n  kind: github\n  applies_to: '../up'\n");
2144        let err2 = load(f2.path()).unwrap_err();
2145        assert!(err2.message.contains("must be a single path segment"));
2146    }
2147
2148    #[test]
2149    fn find_workspace_returns_none_when_missing_everywhere() {
2150        let dir = tempfile::tempdir().unwrap();
2151        let child = dir.path().join("child");
2152        std::fs::create_dir(&child).unwrap();
2153        // No manifest in either child or its parent (tmpdir root).
2154        assert_eq!(find_workspace_manifest(&child), None);
2155    }
2156
2157    #[test]
2158    fn find_workspace_primary_wins_over_parent_fallback() {
2159        // Both primary AND parent-fallback exist. The primary must
2160        // win — this anchors the precedence rule documented on
2161        // `find_workspace_manifest`. The parent declares applies_to
2162        // matching the child dir, so it WOULD be a valid fallback —
2163        // but the primary preempts it. If a future refactor swaps
2164        // the order, this test fails loudly.
2165        let dir = tempfile::tempdir().unwrap();
2166        let parent_manifest = dir.path().join("workspace_mcp.yaml");
2167        std::fs::write(
2168            &parent_manifest,
2169            "workspace:\n  kind: github\n  applies_to: ./repos\n",
2170        )
2171        .unwrap();
2172        let child = dir.path().join("repos");
2173        std::fs::create_dir(&child).unwrap();
2174        let child_manifest = child.join("workspace_mcp.yaml");
2175        std::fs::write(&child_manifest, "name: child\n").unwrap();
2176
2177        // Discovery from `child` should return the child manifest,
2178        // NOT the parent's. Compare canonicalised to handle the
2179        // macOS /private/var vs /var symlink consistently.
2180        let found = find_workspace_manifest(&child).expect("primary should resolve");
2181        assert_eq!(
2182            found.canonicalize().unwrap(),
2183            child_manifest.canonicalize().unwrap(),
2184            "primary location must win when both primary and parent fallback exist"
2185        );
2186    }
2187
2188    #[test]
2189    fn to_json_shape_is_stable() {
2190        let f = write_tmp(
2191            r#"
2192name: KGLite Codebase
2193source_roots: [src, lib]
2194trust:
2195  allow_embedder: true
2196embedder:
2197  module: kglite.embed
2198  class: SentenceTransformerEmbedder
2199builtins:
2200  save_graph: true
2201  temp_cleanup: on_overview
2202"#,
2203        );
2204        let m = load(f.path()).unwrap();
2205        let actual = m.to_json();
2206        let expected = serde_json::json!({
2207            "yaml_path": f.path().display().to_string(),
2208            "name": "KGLite Codebase",
2209            "instructions": null,
2210            "overview_prefix": null,
2211            "source_roots": ["src", "lib"],
2212            "trust": {
2213                "allow_python_tools": false,
2214                "allow_embedder": true,
2215            },
2216            "tools": [],
2217            "embedder": {
2218                "module": "kglite.embed",
2219                "class": "SentenceTransformerEmbedder",
2220                "kwargs": {},
2221            },
2222            "builtins": { "save_graph": true, "temp_cleanup": "on_overview", "screen_stargazers": true },
2223            "env_file": null,
2224            "workspace": null,
2225            "extensions": {},
2226            "skills": false,
2227        });
2228        assert_eq!(actual, expected);
2229    }
2230
2231    #[test]
2232    fn to_json_round_trips_tools_and_workspace() {
2233        let f = write_tmp(
2234            r#"
2235name: Full Surface
2236source_root: ./src
2237trust:
2238  allow_python_tools: true
2239tools:
2240  - name: nodes_for
2241    cypher: "MATCH (n {name: $name}) RETURN n"
2242    description: "fetch nodes by name"
2243  - name: run_query
2244    python: tools.py
2245    function: run
2246workspace:
2247  kind: local
2248  root: /tmp/ws
2249  watch: true
2250builtins:
2251  save_graph: false
2252env_file: .env.local
2253extensions:
2254  kglite:
2255    flavour: standard
2256"#,
2257        );
2258        let m = load(f.path()).unwrap();
2259        let v = m.to_json();
2260        assert_eq!(v["name"], "Full Surface");
2261        assert_eq!(v["trust"]["allow_python_tools"], true);
2262        assert_eq!(v["workspace"]["kind"], "local");
2263        assert_eq!(v["workspace"]["root"], "/tmp/ws");
2264        assert_eq!(v["workspace"]["watch"], true);
2265        assert_eq!(v["env_file"], ".env.local");
2266        assert_eq!(v["tools"][0]["kind"], "cypher");
2267        assert_eq!(v["tools"][0]["name"], "nodes_for");
2268        assert_eq!(v["tools"][1]["kind"], "python");
2269        assert_eq!(v["tools"][1]["name"], "run_query");
2270        assert_eq!(v["tools"][1]["python"], "tools.py");
2271        assert_eq!(v["tools"][1]["function"], "run");
2272        assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
2273    }
2274
2275    // ─── Skills schema (Phase 1a — manifest-level only) ───────────
2276
2277    #[test]
2278    fn skills_disabled_by_default() {
2279        let f = write_tmp("name: x\n");
2280        let m = load(f.path()).unwrap();
2281        assert_eq!(m.skills, SkillsSource::Disabled);
2282        assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2283    }
2284
2285    #[test]
2286    fn skills_explicit_false_disabled() {
2287        let f = write_tmp("name: x\nskills: false\n");
2288        let m = load(f.path()).unwrap();
2289        assert_eq!(m.skills, SkillsSource::Disabled);
2290    }
2291
2292    #[test]
2293    fn skills_bool_true_parses_to_single_bundled() {
2294        let f = write_tmp("name: x\nskills: true\n");
2295        let m = load(f.path()).unwrap();
2296        assert_eq!(m.skills, SkillsSource::Sources(vec![SkillSource::Bundled]));
2297        // JSON shape: list with one boolean true.
2298        let v = m.to_json();
2299        assert_eq!(v["skills"], serde_json::json!([true]));
2300    }
2301
2302    #[test]
2303    fn skills_path_string_parses_to_single_path() {
2304        let f = write_tmp("name: x\nskills: ./local-skills/\n");
2305        let m = load(f.path()).unwrap();
2306        assert_eq!(
2307            m.skills,
2308            SkillsSource::Sources(vec![SkillSource::Path("./local-skills/".into())])
2309        );
2310        // JSON round-trip preserves the operator-declared path verbatim.
2311        let v = m.to_json();
2312        assert_eq!(v["skills"], serde_json::json!(["./local-skills/"]));
2313    }
2314
2315    #[test]
2316    fn skills_list_polymorphic_parses() {
2317        let f =
2318            write_tmp("name: x\nskills:\n  - true\n  - ./local-overrides/\n  - ~/shared-skills/\n");
2319        let m = load(f.path()).unwrap();
2320        assert_eq!(
2321            m.skills,
2322            SkillsSource::Sources(vec![
2323                SkillSource::Bundled,
2324                SkillSource::Path("./local-overrides/".into()),
2325                SkillSource::Path("~/shared-skills/".into()),
2326            ])
2327        );
2328        // JSON preserves entry types: bool for bundled, string for paths.
2329        let v = m.to_json();
2330        assert_eq!(
2331            v["skills"],
2332            serde_json::json!([true, "./local-overrides/", "~/shared-skills/"])
2333        );
2334    }
2335
2336    #[test]
2337    fn skills_empty_list_parses_as_opt_in_with_no_root_sources() {
2338        // Empty list means "opt in but only the auto-detected project
2339        // layer fires." The registry treats this as `Sources(vec![])`,
2340        // not `Disabled`. Operators relying solely on
2341        // `<basename>.skills/` adjacent to the YAML use this form.
2342        let f = write_tmp("name: x\nskills: []\n");
2343        let m = load(f.path()).unwrap();
2344        assert_eq!(m.skills, SkillsSource::Sources(vec![]));
2345    }
2346
2347    #[test]
2348    fn skills_false_in_list_rejected() {
2349        let f = write_tmp("name: x\nskills:\n  - false\n");
2350        let err = load(f.path()).unwrap_err();
2351        assert!(
2352            err.message.contains("skills[0]")
2353                && err.message.contains("`false` is not a valid entry"),
2354            "unexpected: {}",
2355            err.message
2356        );
2357    }
2358
2359    #[test]
2360    fn skills_invalid_type_rejected() {
2361        let f = write_tmp("name: x\nskills: 42\n");
2362        let err = load(f.path()).unwrap_err();
2363        assert!(
2364            err.message.contains("skills must be"),
2365            "unexpected: {}",
2366            err.message
2367        );
2368    }
2369
2370    #[test]
2371    fn skills_empty_path_string_rejected() {
2372        let f = write_tmp("name: x\nskills: \"\"\n");
2373        let err = load(f.path()).unwrap_err();
2374        assert!(
2375            err.message.contains("non-empty string"),
2376            "unexpected: {}",
2377            err.message
2378        );
2379    }
2380
2381    #[test]
2382    fn skills_field_is_purely_additive_on_existing_manifests() {
2383        // A manifest written before the skills field existed (i.e. no
2384        // `skills:` declaration) must still parse cleanly with
2385        // SkillsSource::Disabled. This is the "no impact on existing
2386        // MCP servers" guarantee at the schema level.
2387        let f = write_tmp(
2388            r#"
2389name: legacy
2390source_roots: [src]
2391trust:
2392  allow_python_tools: true
2393workspace:
2394  kind: github
2395"#,
2396        );
2397        let m = load(f.path()).unwrap();
2398        assert_eq!(m.skills, SkillsSource::Disabled);
2399        assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2400    }
2401}