Skip to main content

mcp_methods/server/
manifest.rs

1//! YAML manifest schema + loader.
2//!
3//! A manifest is a YAML file declaring the tools, source roots, custom
4//! embedder, and trust gates the server should apply. The loader parses,
5//! validates, and returns a [`Manifest`]; consumers (CLI wiring, tool
6//! registration) operate on the validated structure.
7//!
8//! Path strings (`source_root`, `python:` tool paths, embedder module)
9//! are kept as the raw user input — relative-to-yaml resolution happens
10//! at the use site so the data stays pure and testable.
11//!
12//! Validation is fail-fast and user-facing: the caller surfaces
13//! [`ManifestError`] messages directly to the operator.
14//!
15//! Schema mirrors the Python `kglite.mcp_server.manifest` module 1:1 so
16//! a manifest written for the Python server boots unchanged on the new
17//! Rust server.
18
19// A handful of fields/helpers are exposed for downstream consumers
20// (e.g. kglite-mcp-server reads `CypherTool::cypher` directly when
21// registering manifest-declared tools) and so look unused from this
22// crate's perspective. Silence dead-code warnings rather than chase
23// every cross-crate use.
24#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34    "name",
35    "instructions",
36    "overview_prefix",
37    "source_root",
38    "source_roots",
39    "trust",
40    "tools",
41    "embedder",
42    "builtins",
43    "env_file",
44    "workspace",
45    "extensions",
46    "skills",
47];
48const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
49const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
50const ALLOWED_TRUST_KEYS: &[&str] = &[
51    "allow_python_tools",
52    "allow_embedder",
53    "allow_query_preprocessor",
54];
55const ALLOWED_TOOL_KEYS: &[&str] = &[
56    "name",
57    "description",
58    "parameters",
59    "cypher",
60    "python",
61    "function",
62    "bundled",
63    "hidden",
64    // 0.3.34: per-deployment rename for bundled tools (the bundled
65    // override block already covers `description` and `hidden`; this
66    // adds the third axis — what the agent sees in `tools/list`).
67    "rename",
68];
69const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
70const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
71const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
72
73#[derive(Debug, Error)]
74#[error("{path}: {message}")]
75pub struct ManifestError {
76    pub path: String,
77    pub message: String,
78}
79
80impl ManifestError {
81    pub fn at(path: &Path, message: impl Into<String>) -> Self {
82        Self {
83            path: path.display().to_string(),
84            message: message.into(),
85        }
86    }
87
88    pub fn bare(message: impl Into<String>) -> Self {
89        Self {
90            path: "<manifest>".to_string(),
91            message: message.into(),
92        }
93    }
94}
95
96#[derive(Debug, Default, Clone)]
97pub struct TrustConfig {
98    pub allow_python_tools: bool,
99    pub allow_embedder: bool,
100    /// Advisory gate: the manifest declares that an extension-defined
101    /// query preprocessor hook is permitted to run. The framework does
102    /// not parse or execute the preprocessor itself — it lives in the
103    /// opaque `extensions:` passthrough — but downstream consumers
104    /// (e.g. kglite-mcp-server) read this flag and refuse to boot the
105    /// hook when it is false. Same pattern as `allow_embedder`.
106    pub allow_query_preprocessor: bool,
107}
108
109#[derive(Debug, Clone)]
110pub enum ToolSpec {
111    Cypher(CypherTool),
112    Python(PythonTool),
113    /// Override the agent-facing surface of a bundled tool (one the
114    /// downstream binary provides natively — `cypher_query`,
115    /// `graph_overview`, `read_source`, etc.). The framework parses
116    /// the override but does not enforce that the named tool exists;
117    /// the downstream consumer (e.g. `kglite-mcp-server`) is
118    /// responsible for validating the name against its bundled
119    /// catalogue at boot time and applying the override when
120    /// emitting `tools/list`.
121    ///
122    /// Pre-0.3.31 the only customisation path for the bundled tool
123    /// surface was the manifest's global `instructions:` block —
124    /// useful for first-message orientation but not attached to
125    /// individual tools. Bundled overrides let operators rewrite a
126    /// specific tool's `description` (what the agent sees in
127    /// `tools/list`) or `hidden`-flag it out entirely.
128    Bundled(BundledOverride),
129}
130
131impl ToolSpec {
132    pub fn name(&self) -> &str {
133        match self {
134            ToolSpec::Cypher(t) => &t.name,
135            ToolSpec::Python(t) => &t.name,
136            ToolSpec::Bundled(t) => &t.name,
137        }
138    }
139}
140
141#[derive(Debug, Clone)]
142pub struct CypherTool {
143    pub name: String,
144    pub cypher: String,
145    pub description: Option<String>,
146    pub parameters: Option<serde_json::Value>,
147}
148
149#[derive(Debug, Clone)]
150pub struct PythonTool {
151    pub name: String,
152    pub python: String,
153    pub function: String,
154    pub description: Option<String>,
155    pub parameters: Option<serde_json::Value>,
156}
157
158#[derive(Debug, Clone)]
159pub struct BundledOverride {
160    /// Name of the bundled tool to override (e.g. `cypher_query`,
161    /// `repo_management`). Validation against the downstream
162    /// binary's actual catalogue happens at the consumer's boot
163    /// time — the framework only checks shape here.
164    pub name: String,
165    /// New agent-facing description that replaces the bundled
166    /// tool's default. `None` means "do not override; keep the
167    /// default."
168    pub description: Option<String>,
169    /// When true, the downstream consumer should omit this tool
170    /// from `tools/list` AND reject calls to it. Defaults to
171    /// false (visible).
172    pub hidden: bool,
173    /// Per-deployment rename: expose the bundled tool to the agent
174    /// under this name instead of its canonical name. `None` keeps
175    /// the canonical name. Lets operators running multiple kglite
176    /// servers (each backed by a different graph) disambiguate
177    /// otherwise-identical tool surfaces — without rename, an agent
178    /// running three servers sees three copies of `cypher_query`,
179    /// each indistinguishable in ToolSearch results. With rename,
180    /// the same servers can expose `legal_cypher_query`,
181    /// `prospect_cypher_query`, `open_source_cypher_query`.
182    /// Must be a valid identifier (`^[a-zA-Z_][a-zA-Z0-9_]*$`);
183    /// validation against duplicates across the manifest's tools is
184    /// the downstream consumer's responsibility.
185    pub rename: Option<String>,
186}
187
188#[derive(Debug, Clone)]
189pub struct EmbedderConfig {
190    pub module: String,
191    pub class: String,
192    pub kwargs: serde_json::Map<String, serde_json::Value>,
193}
194
195#[derive(Debug, Default, Clone)]
196pub struct BuiltinsConfig {
197    pub save_graph: bool,
198    pub temp_cleanup: TempCleanup,
199}
200
201#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
202pub enum TempCleanup {
203    #[default]
204    Never,
205    OnOverview,
206}
207
208impl TempCleanup {
209    pub fn as_str(&self) -> &'static str {
210        match self {
211            TempCleanup::Never => "never",
212            TempCleanup::OnOverview => "on_overview",
213        }
214    }
215}
216
217#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
218pub enum WorkspaceKind {
219    /// Clone-and-track GitHub repos. The default when no `workspace:`
220    /// block is set and the operator passed `--workspace DIR`.
221    #[default]
222    Github,
223    /// Bind a fixed local directory as the active source root. No
224    /// cloning happens; `set_root_dir(path)` swaps the active root.
225    Local,
226}
227
228impl WorkspaceKind {
229    pub fn as_str(&self) -> &'static str {
230        match self {
231            WorkspaceKind::Github => "github",
232            WorkspaceKind::Local => "local",
233        }
234    }
235}
236
237#[derive(Debug, Clone, Default)]
238pub struct WorkspaceConfig {
239    pub kind: WorkspaceKind,
240    /// Local-mode only: path to the directory to bind as the source
241    /// root. Relative paths resolve against the YAML's parent dir.
242    pub root: Option<String>,
243    /// Local-mode only: wire the framework's file watcher to `root`
244    /// (debounced rebuild trigger via the post-activate hook).
245    pub watch: bool,
246    /// Optional opt-in for the [`find_workspace_manifest`] parent-walk
247    /// fallback. When set, this manifest is auto-discovered by
248    /// ``mcp-server --workspace DIR`` (and similar callers) only when
249    /// the operator's ``DIR`` matches the declaration here. When
250    /// unset, the parent-walk fallback NEVER fires for this manifest
251    /// — operators must pass ``--mcp-config`` explicitly.
252    ///
253    /// Values are glob patterns matching the workspace dir's basename
254    /// (single-segment match — parent-walk is always single-level).
255    /// Three forms:
256    ///
257    /// - **Single pattern** (`./repos`, `repos`, `*`, `a*`, `prod-?`):
258    ///   match against the workspace dir's basename. Literal strings
259    ///   like `repos` match only `repos`; glob patterns like `*` or
260    ///   `prod-*` match any name fitting the pattern.
261    /// - **List of patterns** (`[./repos, ./clones]`, `[prod-*, test-*]`):
262    ///   match if any pattern matches. Useful for curated subsets or
263    ///   multiple naming conventions in one manifest.
264    ///
265    /// Leading `./` is optional and stripped at parse time. Patterns
266    /// must be single-segment — `./a/b` is rejected. Invalid glob
267    /// syntax is rejected at parse time.
268    ///
269    /// Eliminates the accidental-discovery footgun where a workspace
270    /// manifest is auto-picked-up by an unrelated sibling dir. The
271    /// manifest's own declaration is the opt-in.
272    pub applies_to: Option<AppliesTo>,
273}
274
275/// Declaration of which workspace dirs the manifest applies to for
276/// the [`find_workspace_manifest`] parent-walk fallback. See
277/// [`WorkspaceConfig::applies_to`] for the full semantics. Each
278/// entry is a glob pattern (literal or with `*` / `?` / `[abc]`)
279/// matched against the workspace dir's basename.
280#[derive(Debug, Clone, PartialEq, Eq)]
281pub enum AppliesTo {
282    /// Single glob pattern. Matches if the workspace dir's basename
283    /// satisfies the pattern. Literal names (`repos`) match only
284    /// that name; `*` matches anything; `prod-*` matches anything
285    /// starting with `prod-`.
286    Pattern(String),
287    /// Multiple patterns. Matches if any pattern in the list matches.
288    Patterns(Vec<String>),
289}
290
291/// One source of skills declared by the manifest. Either the magic
292/// "library bundled" token (rendered as the YAML boolean `true`), or
293/// a filesystem path resolved against the manifest's parent dir.
294///
295/// Path conventions match the rest of the manifest:
296/// - `./foo` or `foo` — relative to the manifest's parent dir
297/// - `~/foo` — home-relative (POSIX `$HOME` expansion)
298/// - `/foo` — absolute
299#[derive(Debug, Clone, PartialEq, Eq)]
300pub enum SkillSource {
301    /// The compile-time bundled skills shipped with `mcp-methods` plus
302    /// any added by the downstream binary at registry-build time.
303    /// In YAML: a bare `true` token in the `skills:` list.
304    Bundled,
305    /// A filesystem path containing `*.md` skill files. Walked at
306    /// boot. Path resolution happens at registry-build time, not parse
307    /// time — `SkillSource::Path` stores the raw operator-declared
308    /// string for round-tripping through `Manifest::to_json()`.
309    Path(String),
310}
311
312/// The parsed value of the `skills:` field in the manifest.
313///
314/// Skills are opt-in. `SkillsSource::Disabled` is the default and
315/// matches verbatim-current MCP behavior: no `prompts/list`, no
316/// methodology surface, identical context cost to pre-skills
317/// deployments. Existing kglite manifests work unchanged.
318///
319/// When enabled, the [`crate::server::skills::Registry`] walks each
320/// source in declaration order, layering them against the
321/// project-local `<basename>.skills/` directory which is always
322/// auto-detected as the top-priority layer.
323#[derive(Debug, Clone, Default, PartialEq, Eq)]
324pub enum SkillsSource {
325    /// `skills: false` or no declaration. Skills disabled entirely.
326    #[default]
327    Disabled,
328    /// One or more sources, walked in declaration order at registry
329    /// build time. First-match-per-skill-name wins across the root
330    /// layer; the auto-detected project layer (`<basename>.skills/`
331    /// adjacent to the YAML) preempts the entire root layer.
332    Sources(Vec<SkillSource>),
333}
334
335#[derive(Debug, Clone)]
336pub struct Manifest {
337    pub yaml_path: PathBuf,
338    pub name: Option<String>,
339    pub instructions: Option<String>,
340    pub overview_prefix: Option<String>,
341    pub source_roots: Vec<String>,
342    pub trust: TrustConfig,
343    pub tools: Vec<ToolSpec>,
344    pub embedder: Option<EmbedderConfig>,
345    pub builtins: BuiltinsConfig,
346    /// Optional explicit `.env` path (relative to the YAML or absolute).
347    /// When unset, the runtime walks upward from the start directory
348    /// looking for a `.env` file.
349    pub env_file: Option<String>,
350    /// Optional explicit workspace declaration. When set, this wins
351    /// over CLI `--workspace`/`--source-root` flags interpretation
352    /// (manifest is the source of truth — same rule as `source_root:`).
353    pub workspace: Option<WorkspaceConfig>,
354    /// Raw passthrough for downstream-binary-specific manifest keys.
355    /// The framework accepts any mapping under `extensions:` and stores
356    /// it here without validating the inner keys; downstream consumers
357    /// (e.g. kglite-mcp-server) read whatever they need from this map.
358    ///
359    /// This keeps the framework's strict-unknown-key validation strong
360    /// for the surfaces it owns (`builtins`, `workspace`, …) while
361    /// letting consumers add their own configuration namespace without
362    /// per-key framework round-trips.
363    pub extensions: serde_json::Map<String, serde_json::Value>,
364    /// Opt-in skills declaration. `SkillsSource::Disabled` is the
365    /// default and preserves current MCP behavior (no `prompts/`
366    /// surface). When set to any non-`Disabled` value, downstream
367    /// binaries pass this to [`crate::server::skills::Registry`] for
368    /// loading + composition; the framework then exposes the
369    /// resulting skill set via `prompts/list` and `prompts/get`.
370    ///
371    /// Three-layer composition: the operator-declared sources here
372    /// form the root layer; the project-local `<basename>.skills/`
373    /// directory (auto-detected) preempts them. See
374    /// `dev-documentation/skills-aware-mcp.md` for the full design.
375    pub skills: SkillsSource,
376}
377
378impl Manifest {
379    /// JSON-friendly representation of the validated manifest for
380    /// FFI / RPC exposure (pyo3 wrappers, JSON-RPC bridges, etc.).
381    ///
382    /// The shape is stable across patch releases: fields can be added
383    /// non-breaking, but key renames or removals are breaking changes.
384    /// When adding a new field to `Manifest`, extend this method too —
385    /// the `to_json_shape_is_stable` test will fail until you do.
386    /// The `extensions` map is passed through unchanged; downstream
387    /// consumers parse their own namespace from it.
388    pub fn to_json(&self) -> serde_json::Value {
389        serde_json::json!({
390            "yaml_path": self.yaml_path.display().to_string(),
391            "name": self.name,
392            "instructions": self.instructions,
393            "overview_prefix": self.overview_prefix,
394            "source_roots": self.source_roots,
395            "trust": {
396                "allow_python_tools": self.trust.allow_python_tools,
397                "allow_embedder": self.trust.allow_embedder,
398                "allow_query_preprocessor": self.trust.allow_query_preprocessor,
399            },
400            "tools": self.tools.iter().map(|t| match t {
401                ToolSpec::Cypher(c) => serde_json::json!({
402                    "kind": "cypher",
403                    "name": c.name,
404                    "cypher": c.cypher,
405                    "description": c.description,
406                    "parameters": c.parameters,
407                }),
408                ToolSpec::Python(p) => serde_json::json!({
409                    "kind": "python",
410                    "name": p.name,
411                    "python": p.python,
412                    "function": p.function,
413                    "description": p.description,
414                    "parameters": p.parameters,
415                }),
416                ToolSpec::Bundled(b) => serde_json::json!({
417                    "kind": "bundled",
418                    "name": b.name,
419                    "description": b.description,
420                    "hidden": b.hidden,
421                    "rename": b.rename,
422                }),
423            }).collect::<Vec<_>>(),
424            "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
425                "module": e.module,
426                "class": e.class,
427                "kwargs": e.kwargs,
428            })),
429            "builtins": {
430                "save_graph": self.builtins.save_graph,
431                "temp_cleanup": self.builtins.temp_cleanup.as_str(),
432            },
433            "env_file": self.env_file,
434            "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
435                "kind": w.kind.as_str(),
436                "root": w.root,
437                "watch": w.watch,
438                "applies_to": w.applies_to.as_ref().map(|a| match a {
439                    AppliesTo::Pattern(p) => serde_json::Value::String(p.clone()),
440                    AppliesTo::Patterns(ps) => serde_json::Value::Array(
441                        ps.iter().map(|p| serde_json::Value::String(p.clone())).collect()
442                    ),
443                }),
444            })),
445            "extensions": self.extensions,
446            "skills": self.skills_to_json(),
447        })
448    }
449
450    /// JSON shape for the parsed `skills:` field. Emits the operator-
451    /// declared shape unchanged (modulo normalisation), suitable for
452    /// downstream pyo3 wrappers that need to introspect what the
453    /// manifest declared without re-running the parser.
454    ///
455    /// Phase 1a (this file) emits the raw declaration only. Phase 1b
456    /// adds a separate accessor on the resolved registry that exposes
457    /// the *post-resolution* skill list with provenance — that's the
458    /// per-skill `{path, origin, frontmatter}` shape kglite asked for
459    /// in their feedback. The two surfaces are intentionally
460    /// distinct: this method describes the manifest, the
461    /// registry method describes the runtime resolution.
462    fn skills_to_json(&self) -> serde_json::Value {
463        match &self.skills {
464            SkillsSource::Disabled => serde_json::Value::Bool(false),
465            SkillsSource::Sources(sources) => {
466                let arr: Vec<serde_json::Value> = sources
467                    .iter()
468                    .map(|s| match s {
469                        SkillSource::Bundled => serde_json::Value::Bool(true),
470                        SkillSource::Path(p) => serde_json::Value::String(p.clone()),
471                    })
472                    .collect();
473                serde_json::Value::Array(arr)
474            }
475        }
476    }
477}
478
479/// Auto-detect ``<basename>_mcp.yaml`` next to a graph file.
480pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
481    let stem = graph_path.file_stem()?;
482    let parent = graph_path.parent()?;
483    let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
484    if candidate.is_file() {
485        Some(candidate)
486    } else {
487        None
488    }
489}
490
491/// Auto-detect ``workspace_mcp.yaml`` for a workspace directory.
492///
493/// Checks two locations in strict priority order:
494///
495/// 1. **Primary** — ``<workspace_dir>/workspace_mcp.yaml``. The
496///    documented and recommended location. If this exists, it is
497///    returned unconditionally; the parent-walk fallback is NOT
498///    consulted even if a parent manifest also exists. No opt-in
499///    declaration required — the manifest sitting inside the
500///    workspace dir is itself the operator's intent.
501/// 2. **Parent-walk fallback** —
502///    ``<workspace_dir>/../workspace_mcp.yaml``. Triggered only when
503///    the primary is absent AND the parent manifest *declares* it
504///    applies to this specific workspace dir via the
505///    ``workspace.applies_to:`` field:
506///
507///    ```yaml
508///    # open_source/workspace_mcp.yaml
509///    workspace:
510///      kind: github
511///      applies_to: ./repos     # required for parent-walk discovery
512///    ```
513///
514///    The framework loads the parent manifest, canonicalises
515///    ``manifest.workspace.applies_to`` against the manifest's parent
516///    directory, and compares it to the actual ``workspace_dir``.
517///    Match → manifest is returned. No declaration or path mismatch
518///    → discovery returns ``None`` (operator must pass
519///    ``--mcp-config`` explicitly).
520///
521///    The natural layout for github-clone-tracker workspaces is:
522///
523///    ```text
524///    open_source/
525///    ├── workspace_mcp.yaml     # config sits beside the sandbox; declares
526///    │                          # workspace.applies_to: ./repos
527///    └── repos/                 # --workspace points here
528///    ```
529///
530///    The ``applies_to`` opt-in eliminates the accidental-discovery
531///    footgun where a manifest in a project root would auto-attach to
532///    any unrelated sibling dir. Operators who didn't author the
533///    manifest get the safe default (no auto-detection); operators
534///    who did get the ergonomic UX (no ``--mcp-config`` boilerplate).
535///
536/// Bounded to one level up; will not walk past the filesystem root.
537/// Symlink-safe via canonicalisation. Added per kglite operator
538/// feedback after the 0.6.x → 0.9.x migration audit.
539pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
540    let primary = workspace_dir.join("workspace_mcp.yaml");
541    if primary.is_file() {
542        return Some(primary);
543    }
544    // Parent-walk fallback. Compare against canonicalised paths to
545    // handle "/" (where parent == self) and symlinks consistently.
546    let parent = workspace_dir.parent()?;
547    let workspace_resolved = workspace_dir.canonicalize().ok()?;
548    let parent_resolved = parent.canonicalize().ok()?;
549    if parent_resolved == workspace_resolved {
550        // No real parent (filesystem root).
551        return None;
552    }
553    let fallback = parent.join("workspace_mcp.yaml");
554    if !fallback.is_file() {
555        return None;
556    }
557
558    // The fallback manifest must declare workspace.applies_to and
559    // that declaration must canonicalise to the actual workspace_dir.
560    // Otherwise the discovery is unsafe (could be accidental).
561    let manifest = match load(&fallback) {
562        Ok(m) => m,
563        Err(e) => {
564            tracing::warn!(
565                manifest = %fallback.display(),
566                error = %e,
567                "parent-walk manifest exists but failed to parse; ignoring"
568            );
569            return None;
570        }
571    };
572    let declared = manifest
573        .workspace
574        .as_ref()
575        .and_then(|w| w.applies_to.as_ref());
576    let Some(declared_applies_to) = declared else {
577        tracing::info!(
578            manifest = %fallback.display(),
579            "parent-walk manifest does not declare workspace.applies_to; \
580             ignoring (set workspace.applies_to: <pattern> to opt in)"
581        );
582        return None;
583    };
584    // Match the workspace dir's basename against the declared pattern(s).
585    // The parent-walk guarantee (workspace_dir.parent() == manifest_dir)
586    // is already established above — only the basename match is left.
587    let Some(basename) = workspace_resolved.file_name().and_then(|n| n.to_str()) else {
588        return None; // path with no usable basename, defensive
589    };
590    let patterns: Vec<&str> = match declared_applies_to {
591        AppliesTo::Pattern(p) => vec![p.as_str()],
592        AppliesTo::Patterns(ps) => ps.iter().map(String::as_str).collect(),
593    };
594    let matched = patterns.iter().any(|pat| {
595        match globset::Glob::new(pat) {
596            Ok(g) => g.compile_matcher().is_match(basename),
597            Err(_) => {
598                // Should not happen — patterns were validated at parse
599                // time. Defensive: treat as non-match.
600                false
601            }
602        }
603    });
604    if matched {
605        tracing::info!(
606            workspace_dir = %workspace_dir.display(),
607            manifest = %fallback.display(),
608            "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
609        );
610        Some(fallback)
611    } else {
612        tracing::info!(
613            workspace_dir = %workspace_resolved.display(),
614            manifest = %fallback.display(),
615            basename = %basename,
616            patterns = ?patterns,
617            "parent-walk manifest's workspace.applies_to does not match \
618             this workspace_dir's basename; ignoring"
619        );
620        None
621    }
622}
623
624/// Parse and validate a manifest YAML file.
625pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
626    let text = fs::read_to_string(yaml_path)
627        .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
628    let raw: serde_yaml::Value = serde_yaml::from_str(&text)
629        .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
630    let raw = match raw {
631        serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
632        v => v,
633    };
634    let map = raw
635        .as_mapping()
636        .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
637    build(map, yaml_path)
638}
639
640fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
641    check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
642
643    if raw.contains_key("source_root") && raw.contains_key("source_roots") {
644        return Err(ManifestError::at(
645            yaml_path,
646            "specify either source_root (str) or source_roots (list), not both",
647        ));
648    }
649
650    let mut source_roots: Vec<String> = Vec::new();
651    if let Some(v) = raw.get("source_root") {
652        let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
653            ManifestError::at(yaml_path, "source_root must be a non-empty string")
654        })?;
655        source_roots.push(s.to_string());
656    } else if let Some(v) = raw.get("source_roots") {
657        let seq = v.as_sequence().ok_or_else(|| {
658            ManifestError::at(
659                yaml_path,
660                "source_roots must be a list of non-empty strings",
661            )
662        })?;
663        if seq.is_empty() {
664            return Err(ManifestError::at(
665                yaml_path,
666                "source_roots must be non-empty when set",
667            ));
668        }
669        for item in seq {
670            let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
671                ManifestError::at(
672                    yaml_path,
673                    "source_roots must be a list of non-empty strings",
674                )
675            })?;
676            source_roots.push(s.to_string());
677        }
678    }
679
680    let trust = build_trust(raw.get("trust"), yaml_path)?;
681    let tools = build_tools(raw.get("tools"), yaml_path)?;
682    let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
683    let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
684    let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
685    let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
686    let skills = build_skills(raw.get("skills"), yaml_path)?;
687
688    Ok(Manifest {
689        yaml_path: yaml_path.to_path_buf(),
690        name: optional_str(raw, "name", yaml_path)?,
691        instructions: optional_str(raw, "instructions", yaml_path)?,
692        overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
693        source_roots,
694        trust,
695        tools,
696        embedder,
697        builtins,
698        env_file: optional_str(raw, "env_file", yaml_path)?,
699        workspace,
700        extensions,
701        skills,
702    })
703}
704
705/// Parse the polymorphic `skills:` field. Accepts:
706///
707/// - **Absent or `false`** → [`SkillsSource::Disabled`]. Pure-current
708///   MCP behavior. This is the default and what existing deployments
709///   resolve to without any YAML change.
710/// - **`skills: true`** → single bundled source. Sugar for
711///   `skills: [true]`.
712/// - **`skills: <path-string>`** → single path source. Sugar for
713///   `skills: [<path>]`.
714/// - **`skills: [bool, string, ...]`** → ordered list. Booleans MUST
715///   be `true` (the bundled marker); `false` is rejected at parse
716///   time as nonsense in list context. Each path is stored verbatim
717///   as the operator wrote it; resolution against the manifest's
718///   parent dir happens at registry-build time, not here.
719///
720/// Empty lists are accepted and parsed as `SkillsSource::Sources(vec![])`;
721/// the registry treats them as "skills opted in but no root layer,"
722/// meaning the project-local `<basename>.skills/` auto-detection
723/// still fires while the bundled + custom-path layers stay empty.
724/// Useful for operators who want to rely solely on adjacent project
725/// skills.
726fn build_skills(
727    raw: Option<&serde_yaml::Value>,
728    yaml_path: &Path,
729) -> Result<SkillsSource, ManifestError> {
730    use serde_yaml::Value;
731
732    match raw {
733        None | Some(Value::Null) | Some(Value::Bool(false)) => Ok(SkillsSource::Disabled),
734        Some(Value::Bool(true)) => Ok(SkillsSource::Sources(vec![SkillSource::Bundled])),
735        Some(Value::String(s)) => {
736            if s.is_empty() {
737                return Err(ManifestError::at(
738                    yaml_path,
739                    "skills: path must be a non-empty string",
740                ));
741            }
742            Ok(SkillsSource::Sources(vec![SkillSource::Path(s.clone())]))
743        }
744        Some(Value::Sequence(seq)) => {
745            let mut sources = Vec::with_capacity(seq.len());
746            for (idx, item) in seq.iter().enumerate() {
747                match item {
748                    Value::Bool(true) => sources.push(SkillSource::Bundled),
749                    Value::Bool(false) => {
750                        return Err(ManifestError::at(
751                            yaml_path,
752                            format!(
753                                "skills[{idx}]: `false` is not a valid entry in a `skills:` \
754                                 list (only `true` for bundled, or a path string)"
755                            ),
756                        ));
757                    }
758                    Value::String(s) => {
759                        if s.is_empty() {
760                            return Err(ManifestError::at(
761                                yaml_path,
762                                format!("skills[{idx}]: path must be a non-empty string"),
763                            ));
764                        }
765                        sources.push(SkillSource::Path(s.clone()));
766                    }
767                    _ => {
768                        return Err(ManifestError::at(
769                            yaml_path,
770                            format!(
771                                "skills[{idx}]: each entry must be `true` (for bundled) or a \
772                                 path string"
773                            ),
774                        ));
775                    }
776                }
777            }
778            Ok(SkillsSource::Sources(sources))
779        }
780        Some(_) => Err(ManifestError::at(
781            yaml_path,
782            "skills must be `false`, `true`, a path string, or a list of \
783             (true | path string) entries",
784        )),
785    }
786}
787
788fn build_extensions(
789    raw: Option<&serde_yaml::Value>,
790    yaml_path: &Path,
791) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
792    let Some(raw) = raw else {
793        return Ok(serde_json::Map::new());
794    };
795    if matches!(raw, serde_yaml::Value::Null) {
796        return Ok(serde_json::Map::new());
797    }
798    if !raw.is_mapping() {
799        return Err(ManifestError::at(
800            yaml_path,
801            "extensions must be a mapping (downstream-binary-specific keys)",
802        ));
803    }
804    match yaml_to_json(raw.clone())? {
805        serde_json::Value::Object(o) => Ok(o),
806        _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
807    }
808}
809
810fn build_workspace(
811    raw: Option<&serde_yaml::Value>,
812    yaml_path: &Path,
813) -> Result<Option<WorkspaceConfig>, ManifestError> {
814    let Some(raw) = raw else { return Ok(None) };
815    if matches!(raw, serde_yaml::Value::Null) {
816        return Ok(None);
817    }
818    let map = raw
819        .as_mapping()
820        .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
821    check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
822    let kind = match map.get("kind") {
823        None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
824        Some(serde_yaml::Value::String(s)) => match s.as_str() {
825            "github" => WorkspaceKind::Github,
826            "local" => WorkspaceKind::Local,
827            other => {
828                return Err(ManifestError::at(
829                    yaml_path,
830                    format!(
831                        "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
832                    ),
833                ));
834            }
835        },
836        Some(_) => {
837            return Err(ManifestError::at(
838                yaml_path,
839                format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
840            ))
841        }
842    };
843    let root = match map.get("root") {
844        None | Some(serde_yaml::Value::Null) => None,
845        Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
846        _ => {
847            return Err(ManifestError::at(
848                yaml_path,
849                "workspace.root must be a non-empty string",
850            ))
851        }
852    };
853    let watch = match map.get("watch") {
854        None | Some(serde_yaml::Value::Null) => false,
855        Some(serde_yaml::Value::Bool(b)) => *b,
856        Some(_) => {
857            return Err(ManifestError::at(
858                yaml_path,
859                "workspace.watch must be a bool",
860            ))
861        }
862    };
863    let applies_to =
864        match map.get("applies_to") {
865            None | Some(serde_yaml::Value::Null) => None,
866            Some(serde_yaml::Value::String(s)) => {
867                Some(AppliesTo::Pattern(parse_applies_to_pattern(s, yaml_path)?))
868            }
869            Some(serde_yaml::Value::Sequence(seq)) => {
870                if seq.is_empty() {
871                    return Err(ManifestError::at(
872                        yaml_path,
873                        "workspace.applies_to: list must contain at least one pattern",
874                    ));
875                }
876                let mut patterns = Vec::with_capacity(seq.len());
877                for (i, item) in seq.iter().enumerate() {
878                    let s = item.as_str().ok_or_else(|| {
879                        ManifestError::at(
880                            yaml_path,
881                            format!("workspace.applies_to[{i}] must be a string"),
882                        )
883                    })?;
884                    let cleaned = parse_applies_to_pattern(s, yaml_path).map_err(|e| {
885                        ManifestError::at(
886                            yaml_path,
887                            format!("workspace.applies_to[{i}]: {}", e.message),
888                        )
889                    })?;
890                    patterns.push(cleaned);
891                }
892                Some(AppliesTo::Patterns(patterns))
893            }
894            _ => return Err(ManifestError::at(
895                yaml_path,
896                "workspace.applies_to must be a non-empty string (a pattern) or a list of patterns",
897            )),
898        };
899    if kind == WorkspaceKind::Local && root.is_none() {
900        return Err(ManifestError::at(
901            yaml_path,
902            "workspace.kind: local requires workspace.root to be set",
903        ));
904    }
905    if kind == WorkspaceKind::Github && watch {
906        return Err(ManifestError::at(
907            yaml_path,
908            "workspace.watch is only valid with workspace.kind: local",
909        ));
910    }
911    Ok(Some(WorkspaceConfig {
912        kind,
913        root,
914        watch,
915        applies_to,
916    }))
917}
918
919/// Parse + validate a single ``workspace.applies_to`` entry. Accepts
920/// any glob pattern matching a single path segment (no embedded
921/// slashes, no `..`). The leading ``./`` is optional and stripped.
922/// Validates glob syntax via `globset::Glob::new` so invalid patterns
923/// surface clear errors at boot.
924///
925/// Returns the cleaned pattern string (without `./` prefix) on
926/// success.
927fn parse_applies_to_pattern(raw: &str, yaml_path: &Path) -> Result<String, ManifestError> {
928    let trimmed = raw.trim();
929    if trimmed.is_empty() {
930        return Err(ManifestError::at(
931            yaml_path,
932            "workspace.applies_to: pattern must not be empty",
933        ));
934    }
935    // Strip a single leading `./` for ergonomic equivalence between
936    // `./repos` and `repos`. Both forms commonly appear in operator
937    // muscle memory; normalise so storage + glob matching is uniform.
938    let stripped = trimmed.strip_prefix("./").unwrap_or(trimmed);
939    if stripped.is_empty() {
940        return Err(ManifestError::at(
941            yaml_path,
942            "workspace.applies_to: pattern must not be empty after stripping `./` prefix",
943        ));
944    }
945    if stripped.contains('/') {
946        return Err(ManifestError::at(
947            yaml_path,
948            format!(
949                "workspace.applies_to: pattern {raw:?} must be a single path segment \
950                 (no embedded `/`) — parent-walk discovery is bounded to one level"
951            ),
952        ));
953    }
954    if stripped == ".." || stripped.starts_with("../") {
955        return Err(ManifestError::at(
956            yaml_path,
957            format!("workspace.applies_to: pattern {raw:?} must not contain `..`"),
958        ));
959    }
960    if Path::new(stripped).is_absolute() {
961        return Err(ManifestError::at(
962            yaml_path,
963            format!("workspace.applies_to: pattern {raw:?} must be relative, not absolute"),
964        ));
965    }
966    // Validate glob syntax. Construct a Glob to surface any syntax
967    // errors immediately — we don't keep the compiled form (cheap to
968    // re-compile at match time, keeps `WorkspaceConfig` Clone-cheap).
969    globset::Glob::new(stripped).map_err(|e| {
970        ManifestError::at(
971            yaml_path,
972            format!("workspace.applies_to: invalid glob pattern {raw:?}: {e}"),
973        )
974    })?;
975    Ok(stripped.to_string())
976}
977
978fn check_keys(
979    map: &serde_yaml::Mapping,
980    allowed: &[&str],
981    label: &str,
982    yaml_path: &Path,
983) -> Result<(), ManifestError> {
984    let mut unknown: Vec<String> = Vec::new();
985    for (k, _) in map {
986        let key = k.as_str().unwrap_or("<non-string-key>");
987        if !allowed.contains(&key) {
988            unknown.push(key.to_string());
989        }
990    }
991    if !unknown.is_empty() {
992        unknown.sort();
993        return Err(ManifestError::at(
994            yaml_path,
995            format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
996        ));
997    }
998    Ok(())
999}
1000
1001fn optional_str(
1002    raw: &serde_yaml::Mapping,
1003    key: &str,
1004    yaml_path: &Path,
1005) -> Result<Option<String>, ManifestError> {
1006    match raw.get(key) {
1007        None | Some(serde_yaml::Value::Null) => Ok(None),
1008        Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
1009        Some(_) => Err(ManifestError::at(
1010            yaml_path,
1011            format!("{key} must be a string"),
1012        )),
1013    }
1014}
1015
1016fn build_trust(
1017    raw: Option<&serde_yaml::Value>,
1018    yaml_path: &Path,
1019) -> Result<TrustConfig, ManifestError> {
1020    let Some(raw) = raw else {
1021        return Ok(TrustConfig::default());
1022    };
1023    let map = raw
1024        .as_mapping()
1025        .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
1026    check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
1027    let mut cfg = TrustConfig::default();
1028    if let Some(v) = map.get("allow_python_tools") {
1029        cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
1030            ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
1031        })?;
1032    }
1033    if let Some(v) = map.get("allow_embedder") {
1034        cfg.allow_embedder = v
1035            .as_bool()
1036            .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
1037    }
1038    if let Some(v) = map.get("allow_query_preprocessor") {
1039        cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
1040            ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
1041        })?;
1042    }
1043    Ok(cfg)
1044}
1045
1046fn build_tools(
1047    raw: Option<&serde_yaml::Value>,
1048    yaml_path: &Path,
1049) -> Result<Vec<ToolSpec>, ManifestError> {
1050    let Some(raw) = raw else {
1051        return Ok(Vec::new());
1052    };
1053    let seq = raw
1054        .as_sequence()
1055        .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
1056    let mut tools: Vec<ToolSpec> = Vec::new();
1057    let mut seen: BTreeMap<String, ()> = BTreeMap::new();
1058    for (i, entry) in seq.iter().enumerate() {
1059        let tool = build_tool(entry, i, yaml_path)?;
1060        let name = tool.name().to_string();
1061        if seen.insert(name.clone(), ()).is_some() {
1062            return Err(ManifestError::at(
1063                yaml_path,
1064                format!("duplicate tool name: {name:?}"),
1065            ));
1066        }
1067        tools.push(tool);
1068    }
1069    Ok(tools)
1070}
1071
1072fn build_tool(
1073    entry: &serde_yaml::Value,
1074    idx: usize,
1075    yaml_path: &Path,
1076) -> Result<ToolSpec, ManifestError> {
1077    let map = entry
1078        .as_mapping()
1079        .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
1080    check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
1081
1082    // Kind detection. `cypher` and `python` are tool-creation kinds
1083    // (operator declares a new named tool); `bundled` is a tool-
1084    // override kind (operator picks a bundled tool name and customises
1085    // its agent-facing surface). Exactly one must be present.
1086    let has_cypher = map.contains_key("cypher");
1087    let has_python = map.contains_key("python");
1088    let has_bundled = map.contains_key("bundled");
1089    let kinds_present: Vec<&str> = [
1090        ("cypher", has_cypher),
1091        ("python", has_python),
1092        ("bundled", has_bundled),
1093    ]
1094    .into_iter()
1095    .filter(|(_, p)| *p)
1096    .map(|(k, _)| k)
1097    .collect();
1098    if kinds_present.is_empty() {
1099        return Err(ManifestError::at(
1100            yaml_path,
1101            format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
1102        ));
1103    }
1104    if kinds_present.len() > 1 {
1105        return Err(ManifestError::at(
1106            yaml_path,
1107            format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
1108        ));
1109    }
1110
1111    // The `bundled` kind takes its name from the `bundled:` value
1112    // itself (e.g. `bundled: cypher_query`) and forbids the
1113    // tool-creation fields. Branch early so we don't run the
1114    // tool-creation `name:` requirement against an override entry.
1115    if has_bundled {
1116        return build_bundled_override(map, idx, yaml_path);
1117    }
1118
1119    let name = map
1120        .get("name")
1121        .and_then(|v| v.as_str())
1122        .filter(|s| valid_identifier(s))
1123        .ok_or_else(|| {
1124            ManifestError::at(
1125                yaml_path,
1126                format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
1127            )
1128        })?
1129        .to_string();
1130
1131    // `hidden:` is only valid on bundled overrides (`hidden:`-flagging
1132    // a tool you're declaring inline doesn't make sense — just don't
1133    // declare it). Reject early so the operator gets a clear error.
1134    if map.contains_key("hidden") {
1135        return Err(ManifestError::at(
1136            yaml_path,
1137            format!(
1138                "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
1139            ),
1140        ));
1141    }
1142
1143    let description = match map.get("description") {
1144        None | Some(serde_yaml::Value::Null) => None,
1145        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1146        Some(_) => {
1147            return Err(ManifestError::at(
1148                yaml_path,
1149                format!("tools[{idx}] ({name:?}).description must be a string"),
1150            ))
1151        }
1152    };
1153
1154    let parameters = match map.get("parameters") {
1155        None | Some(serde_yaml::Value::Null) => None,
1156        Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
1157        Some(_) => {
1158            return Err(ManifestError::at(
1159                yaml_path,
1160                format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
1161            ))
1162        }
1163    };
1164
1165    if has_cypher {
1166        let cypher = map
1167            .get("cypher")
1168            .and_then(|v| v.as_str())
1169            .filter(|s| !s.trim().is_empty())
1170            .ok_or_else(|| {
1171                ManifestError::at(
1172                    yaml_path,
1173                    format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
1174                )
1175            })?
1176            .to_string();
1177        return Ok(ToolSpec::Cypher(CypherTool {
1178            name,
1179            cypher,
1180            description,
1181            parameters,
1182        }));
1183    }
1184
1185    // python tool
1186    let python = map
1187        .get("python")
1188        .and_then(|v| v.as_str())
1189        .filter(|s| !s.is_empty())
1190        .ok_or_else(|| {
1191            ManifestError::at(
1192                yaml_path,
1193                format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
1194            )
1195        })?
1196        .to_string();
1197    let function = map
1198        .get("function")
1199        .and_then(|v| v.as_str())
1200        .filter(|s| valid_identifier(s))
1201        .ok_or_else(|| {
1202            ManifestError::at(
1203                yaml_path,
1204                format!(
1205                    "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
1206                ),
1207            )
1208        })?
1209        .to_string();
1210    Ok(ToolSpec::Python(PythonTool {
1211        name,
1212        python,
1213        function,
1214        description,
1215        parameters,
1216    }))
1217}
1218
1219/// Parse a `bundled:` override entry from `tools[idx]`. The caller
1220/// (`build_tool`) has already established that the entry has
1221/// `bundled:` set as the kind discriminator.
1222fn build_bundled_override(
1223    map: &serde_yaml::Mapping,
1224    idx: usize,
1225    yaml_path: &Path,
1226) -> Result<ToolSpec, ManifestError> {
1227    let name = map
1228        .get("bundled")
1229        .and_then(|v| v.as_str())
1230        .filter(|s| valid_identifier(s))
1231        .ok_or_else(|| {
1232            ManifestError::at(
1233                yaml_path,
1234                format!(
1235                    "tools[{idx}] `bundled:` must be a string naming a bundled tool \
1236                     (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
1237                ),
1238            )
1239        })?
1240        .to_string();
1241
1242    // Tool-creation fields are forbidden on override entries — the
1243    // override only customises an existing bundled tool's surface,
1244    // it doesn't declare a new tool. Catch these at parse time so
1245    // operators get a clear error rather than silent confusion.
1246    for forbidden in ["name", "parameters", "function"] {
1247        if map.contains_key(forbidden) {
1248            return Err(ManifestError::at(
1249                yaml_path,
1250                format!(
1251                    "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
1252                     (only `description:`, `hidden:`, and `rename:` are permitted on overrides)"
1253                ),
1254            ));
1255        }
1256    }
1257
1258    let description = match map.get("description") {
1259        None | Some(serde_yaml::Value::Null) => None,
1260        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1261        Some(_) => {
1262            return Err(ManifestError::at(
1263                yaml_path,
1264                format!("tools[{idx}] bundled override {name:?}.description must be a string"),
1265            ))
1266        }
1267    };
1268
1269    let hidden = match map.get("hidden") {
1270        None | Some(serde_yaml::Value::Null) => false,
1271        Some(serde_yaml::Value::Bool(b)) => *b,
1272        Some(_) => {
1273            return Err(ManifestError::at(
1274                yaml_path,
1275                format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
1276            ))
1277        }
1278    };
1279
1280    // 0.3.34: optional per-deployment rename. Validated as an
1281    // identifier here; cross-tool collision check is the consumer's
1282    // job (it knows what other names — bundled, cypher, python — it
1283    // has in scope).
1284    let rename = match map.get("rename") {
1285        None | Some(serde_yaml::Value::Null) => None,
1286        Some(serde_yaml::Value::String(s)) => {
1287            if !valid_identifier(s) {
1288                return Err(ManifestError::at(
1289                    yaml_path,
1290                    format!(
1291                        "tools[{idx}] bundled override {name:?}.rename must be a valid identifier \
1292                         (^[a-zA-Z_][a-zA-Z0-9_]*$), got {s:?}"
1293                    ),
1294                ));
1295            }
1296            Some(s.clone())
1297        }
1298        Some(_) => {
1299            return Err(ManifestError::at(
1300                yaml_path,
1301                format!("tools[{idx}] bundled override {name:?}.rename must be a string"),
1302            ))
1303        }
1304    };
1305
1306    Ok(ToolSpec::Bundled(BundledOverride {
1307        name,
1308        description,
1309        hidden,
1310        rename,
1311    }))
1312}
1313
1314fn build_embedder(
1315    raw: Option<&serde_yaml::Value>,
1316    yaml_path: &Path,
1317) -> Result<Option<EmbedderConfig>, ManifestError> {
1318    let Some(raw) = raw else { return Ok(None) };
1319    if matches!(raw, serde_yaml::Value::Null) {
1320        return Ok(None);
1321    }
1322    let map = raw
1323        .as_mapping()
1324        .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
1325    check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
1326    let module = map
1327        .get("module")
1328        .and_then(|v| v.as_str())
1329        .filter(|s| !s.is_empty())
1330        .ok_or_else(|| {
1331            ManifestError::at(
1332                yaml_path,
1333                "embedder.module must be a non-empty string (path or dotted name)",
1334            )
1335        })?
1336        .to_string();
1337    let class = map
1338        .get("class")
1339        .and_then(|v| v.as_str())
1340        .filter(|s| valid_identifier(s))
1341        .ok_or_else(|| {
1342            ManifestError::at(
1343                yaml_path,
1344                "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1345            )
1346        })?
1347        .to_string();
1348    let kwargs = match map.get("kwargs") {
1349        None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1350        Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1351            serde_json::Value::Object(o) => o,
1352            _ => {
1353                return Err(ManifestError::at(
1354                    yaml_path,
1355                    "embedder.kwargs must be a mapping",
1356                ))
1357            }
1358        },
1359        Some(_) => {
1360            return Err(ManifestError::at(
1361                yaml_path,
1362                "embedder.kwargs must be a mapping",
1363            ))
1364        }
1365    };
1366    Ok(Some(EmbedderConfig {
1367        module,
1368        class,
1369        kwargs,
1370    }))
1371}
1372
1373fn build_builtins(
1374    raw: Option<&serde_yaml::Value>,
1375    yaml_path: &Path,
1376) -> Result<BuiltinsConfig, ManifestError> {
1377    let Some(raw) = raw else {
1378        return Ok(BuiltinsConfig::default());
1379    };
1380    if matches!(raw, serde_yaml::Value::Null) {
1381        return Ok(BuiltinsConfig::default());
1382    }
1383    let map = raw
1384        .as_mapping()
1385        .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1386    check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1387    let mut cfg = BuiltinsConfig::default();
1388    if let Some(v) = map.get("save_graph") {
1389        cfg.save_graph = v
1390            .as_bool()
1391            .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1392    }
1393    if let Some(v) = map.get("temp_cleanup") {
1394        let s = v.as_str().ok_or_else(|| {
1395            ManifestError::at(
1396                yaml_path,
1397                format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1398            )
1399        })?;
1400        cfg.temp_cleanup = match s {
1401            "never" => TempCleanup::Never,
1402            "on_overview" => TempCleanup::OnOverview,
1403            other => {
1404                return Err(ManifestError::at(
1405                    yaml_path,
1406                    format!(
1407                        "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1408                    ),
1409                ))
1410            }
1411        };
1412    }
1413    Ok(cfg)
1414}
1415
1416fn valid_identifier(s: &str) -> bool {
1417    let mut chars = s.chars();
1418    match chars.next() {
1419        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1420        _ => return false,
1421    }
1422    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1423}
1424
1425fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1426    serde_json::to_value(&v)
1427        .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1428}
1429
1430#[derive(Debug, Deserialize)]
1431struct _Reserved;
1432
1433#[cfg(test)]
1434mod tests {
1435    use super::*;
1436
1437    fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1438        let mut f = tempfile::NamedTempFile::new().unwrap();
1439        std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1440        f
1441    }
1442
1443    #[test]
1444    fn loads_minimal_empty_manifest() {
1445        let f = write_tmp("");
1446        let m = load(f.path()).unwrap();
1447        assert_eq!(m.tools.len(), 0);
1448        assert_eq!(m.source_roots.len(), 0);
1449        assert!(!m.trust.allow_python_tools);
1450        assert!(!m.trust.allow_embedder);
1451        assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1452    }
1453
1454    #[test]
1455    fn loads_name_and_instructions() {
1456        let f = write_tmp("name: Demo\ninstructions: |\n  multi-line\n  block\n");
1457        let m = load(f.path()).unwrap();
1458        assert_eq!(m.name.as_deref(), Some("Demo"));
1459        assert!(m.instructions.unwrap().contains("multi-line"));
1460    }
1461
1462    #[test]
1463    fn rejects_unknown_top_key() {
1464        let f = write_tmp("bogus: 1\n");
1465        let err = load(f.path()).unwrap_err();
1466        assert!(err.message.contains("unknown top-level"));
1467    }
1468
1469    #[test]
1470    fn source_root_string_normalises_to_list() {
1471        let f = write_tmp("source_root: ./data\n");
1472        let m = load(f.path()).unwrap();
1473        assert_eq!(m.source_roots, vec!["./data".to_string()]);
1474    }
1475
1476    #[test]
1477    fn source_roots_list_preserved() {
1478        let f = write_tmp("source_roots:\n  - ./a\n  - ./b\n");
1479        let m = load(f.path()).unwrap();
1480        assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1481    }
1482
1483    #[test]
1484    fn rejects_both_source_root_and_source_roots() {
1485        let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1486        assert!(load(f.path()).unwrap_err().message.contains("not both"));
1487    }
1488
1489    #[test]
1490    fn cypher_tool_parses() {
1491        let f = write_tmp("tools:\n  - name: lookup\n    cypher: MATCH (n) RETURN n\n");
1492        let m = load(f.path()).unwrap();
1493        assert_eq!(m.tools.len(), 1);
1494        match &m.tools[0] {
1495            ToolSpec::Cypher(t) => {
1496                assert_eq!(t.name, "lookup");
1497                assert!(t.cypher.contains("MATCH"));
1498            }
1499            _ => panic!("expected cypher tool"),
1500        }
1501    }
1502
1503    #[test]
1504    fn python_tool_parses() {
1505        let f =
1506            write_tmp("tools:\n  - name: detail\n    python: ./tools.py\n    function: detail\n");
1507        let m = load(f.path()).unwrap();
1508        match &m.tools[0] {
1509            ToolSpec::Python(t) => {
1510                assert_eq!(t.python, "./tools.py");
1511                assert_eq!(t.function, "detail");
1512            }
1513            _ => panic!("expected python tool"),
1514        }
1515    }
1516
1517    #[test]
1518    fn rejects_tool_with_both_kinds() {
1519        let f = write_tmp(
1520            "tools:\n  - name: x\n    cypher: 'MATCH (n) RETURN n'\n    python: ./t.py\n    function: x\n",
1521        );
1522        assert!(load(f.path())
1523            .unwrap_err()
1524            .message
1525            .contains("multiple kinds"));
1526    }
1527
1528    #[test]
1529    fn rejects_tool_with_no_kind() {
1530        let f = write_tmp("tools:\n  - name: x\n");
1531        assert!(load(f.path())
1532            .unwrap_err()
1533            .message
1534            .contains("needs exactly one"));
1535    }
1536
1537    #[test]
1538    fn rejects_duplicate_tool_names() {
1539        let f = write_tmp(
1540            "tools:\n  - name: same\n    cypher: 'MATCH (n) RETURN n'\n  - name: same\n    cypher: 'MATCH (m) RETURN m'\n",
1541        );
1542        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1543    }
1544
1545    // ─── Bundled override shape (0.3.31) ────────────────────────
1546
1547    #[test]
1548    fn bundled_override_with_description_parses() {
1549        let f =
1550            write_tmp("tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n");
1551        let m = load(f.path()).unwrap();
1552        assert_eq!(m.tools.len(), 1);
1553        match &m.tools[0] {
1554            ToolSpec::Bundled(b) => {
1555                assert_eq!(b.name, "repo_management");
1556                assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1557                assert!(!b.hidden);
1558            }
1559            _ => panic!("expected bundled override"),
1560        }
1561    }
1562
1563    #[test]
1564    fn bundled_override_with_hidden_parses() {
1565        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: true\n");
1566        let m = load(f.path()).unwrap();
1567        match &m.tools[0] {
1568            ToolSpec::Bundled(b) => {
1569                assert_eq!(b.name, "ping");
1570                assert!(b.hidden);
1571                assert!(b.description.is_none());
1572            }
1573            _ => panic!("expected bundled override"),
1574        }
1575    }
1576
1577    #[test]
1578    fn bundled_override_alongside_cypher_tools_parses() {
1579        let f = write_tmp(
1580            "tools:\n\
1581             \x20\x20- bundled: cypher_query\n\
1582             \x20\x20\x20\x20description: \"Custom server description\"\n\
1583             \x20\x20- name: lookup\n\
1584             \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1585        );
1586        let m = load(f.path()).unwrap();
1587        assert_eq!(m.tools.len(), 2);
1588        assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1589        assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1590    }
1591
1592    #[test]
1593    fn rejects_bundled_with_cypher_kind() {
1594        let f =
1595            write_tmp("tools:\n  - bundled: cypher_query\n    cypher: \"MATCH (n) RETURN n\"\n");
1596        let err = load(f.path()).unwrap_err();
1597        assert!(
1598            err.message.contains("multiple kinds"),
1599            "got: {}",
1600            err.message
1601        );
1602    }
1603
1604    #[test]
1605    fn rejects_bundled_with_name_field() {
1606        let f = write_tmp("tools:\n  - bundled: ping\n    name: ping\n");
1607        let err = load(f.path()).unwrap_err();
1608        assert!(
1609            err.message.contains("cannot set `name:`"),
1610            "got: {}",
1611            err.message
1612        );
1613    }
1614
1615    #[test]
1616    fn rejects_bundled_with_parameters_field() {
1617        let f =
1618            write_tmp("tools:\n  - bundled: cypher_query\n    parameters:\n      type: object\n");
1619        let err = load(f.path()).unwrap_err();
1620        assert!(
1621            err.message.contains("cannot set `parameters:`"),
1622            "got: {}",
1623            err.message
1624        );
1625    }
1626
1627    #[test]
1628    fn rejects_bundled_with_non_bool_hidden() {
1629        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: yes-please\n");
1630        let err = load(f.path()).unwrap_err();
1631        assert!(
1632            err.message.contains("hidden must be a bool"),
1633            "got: {}",
1634            err.message
1635        );
1636    }
1637
1638    #[test]
1639    fn rejects_hidden_on_cypher_tool() {
1640        let f = write_tmp(
1641            "tools:\n  - name: lookup\n    cypher: \"MATCH (n) RETURN n\"\n    hidden: true\n",
1642        );
1643        let err = load(f.path()).unwrap_err();
1644        assert!(
1645            err.message
1646                .contains("`hidden:` is only valid on `bundled:` override entries"),
1647            "got: {}",
1648            err.message
1649        );
1650    }
1651
1652    #[test]
1653    fn rejects_duplicate_bundled_overrides() {
1654        // The dedup check is on tool name; two `bundled: ping` entries
1655        // share the same name and should be rejected the same way
1656        // duplicate cypher tools are.
1657        let f = write_tmp(
1658            "tools:\n  - bundled: ping\n    hidden: true\n  - bundled: ping\n    description: \"x\"\n",
1659        );
1660        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1661    }
1662
1663    #[test]
1664    fn rejects_bundled_with_invalid_identifier() {
1665        let f = write_tmp("tools:\n  - bundled: \"123-bad\"\n    hidden: true\n");
1666        let err = load(f.path()).unwrap_err();
1667        assert!(
1668            err.message.contains("must be a string"),
1669            "got: {}",
1670            err.message
1671        );
1672    }
1673
1674    // 0.3.34 — `tools[].bundled: rename:` per-deployment override
1675    #[test]
1676    fn bundled_rename_parses_when_valid_identifier() {
1677        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n");
1678        let m = load(f.path()).unwrap();
1679        match &m.tools[0] {
1680            ToolSpec::Bundled(b) => {
1681                assert_eq!(b.name, "cypher_query");
1682                assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1683                assert!(!b.hidden);
1684                assert!(b.description.is_none());
1685            }
1686            _ => panic!("expected bundled override"),
1687        }
1688    }
1689
1690    #[test]
1691    fn bundled_rename_alongside_description_parses() {
1692        let f = write_tmp(
1693            "tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n    description: \"Legal-corpus cypher\"\n",
1694        );
1695        let m = load(f.path()).unwrap();
1696        match &m.tools[0] {
1697            ToolSpec::Bundled(b) => {
1698                assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1699                assert_eq!(b.description.as_deref(), Some("Legal-corpus cypher"));
1700            }
1701            _ => panic!("expected bundled override"),
1702        }
1703    }
1704
1705    #[test]
1706    fn bundled_rename_defaults_to_none() {
1707        let f = write_tmp("tools:\n  - bundled: cypher_query\n    description: \"x\"\n");
1708        let m = load(f.path()).unwrap();
1709        match &m.tools[0] {
1710            ToolSpec::Bundled(b) => assert!(b.rename.is_none()),
1711            _ => panic!("expected bundled override"),
1712        }
1713    }
1714
1715    #[test]
1716    fn rejects_bundled_rename_with_invalid_identifier() {
1717        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: \"123-bad\"\n");
1718        let err = load(f.path()).unwrap_err();
1719        assert!(
1720            err.message.contains("rename must be a valid identifier"),
1721            "got: {}",
1722            err.message
1723        );
1724    }
1725
1726    #[test]
1727    fn rejects_bundled_rename_with_non_string_value() {
1728        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: 42\n");
1729        let err = load(f.path()).unwrap_err();
1730        assert!(
1731            err.message.contains("rename must be a string"),
1732            "got: {}",
1733            err.message
1734        );
1735    }
1736
1737    #[test]
1738    fn bundled_rename_serialises_to_json() {
1739        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n");
1740        let m = load(f.path()).unwrap();
1741        let json = m.to_json();
1742        let tools = json.get("tools").and_then(|t| t.as_array()).unwrap();
1743        let entry = &tools[0];
1744        assert_eq!(entry.get("kind").and_then(|v| v.as_str()), Some("bundled"));
1745        assert_eq!(
1746            entry.get("name").and_then(|v| v.as_str()),
1747            Some("cypher_query")
1748        );
1749        assert_eq!(
1750            entry.get("rename").and_then(|v| v.as_str()),
1751            Some("legal_cypher_query")
1752        );
1753    }
1754
1755    #[test]
1756    fn bundled_override_to_json_shape() {
1757        let f = write_tmp(
1758            "tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n    hidden: false\n",
1759        );
1760        let m = load(f.path()).unwrap();
1761        let v = m.to_json();
1762        assert_eq!(v["tools"][0]["kind"], "bundled");
1763        assert_eq!(v["tools"][0]["name"], "repo_management");
1764        assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1765        assert_eq!(v["tools"][0]["hidden"], false);
1766    }
1767
1768    #[test]
1769    fn embedder_parses() {
1770        let f = write_tmp(
1771            "embedder:\n  module: ./e.py\n  class: GraphEmbedder\n  kwargs:\n    cooldown: 900\n",
1772        );
1773        let m = load(f.path()).unwrap();
1774        let e = m.embedder.unwrap();
1775        assert_eq!(e.module, "./e.py");
1776        assert_eq!(e.class, "GraphEmbedder");
1777        assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1778    }
1779
1780    #[test]
1781    fn builtins_parses_temp_cleanup() {
1782        let f = write_tmp("builtins:\n  save_graph: true\n  temp_cleanup: on_overview\n");
1783        let m = load(f.path()).unwrap();
1784        assert!(m.builtins.save_graph);
1785        assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1786    }
1787
1788    #[test]
1789    fn rejects_invalid_temp_cleanup() {
1790        let f = write_tmp("builtins:\n  temp_cleanup: nuke\n");
1791        assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1792    }
1793
1794    #[test]
1795    fn allow_embedder_trust_parses() {
1796        let f = write_tmp("trust:\n  allow_embedder: true\n");
1797        let m = load(f.path()).unwrap();
1798        assert!(m.trust.allow_embedder);
1799    }
1800
1801    #[test]
1802    fn allow_query_preprocessor_trust_parses() {
1803        let f = write_tmp("trust:\n  allow_query_preprocessor: true\n");
1804        let m = load(f.path()).unwrap();
1805        assert!(m.trust.allow_query_preprocessor);
1806        assert!(!m.trust.allow_embedder);
1807        assert!(!m.trust.allow_python_tools);
1808    }
1809
1810    #[test]
1811    fn allow_query_preprocessor_rejects_non_bool() {
1812        let f = write_tmp("trust:\n  allow_query_preprocessor: \"yes\"\n");
1813        let err = load(f.path()).unwrap_err();
1814        assert!(err
1815            .message
1816            .contains("allow_query_preprocessor must be a bool"));
1817    }
1818
1819    #[test]
1820    fn find_sibling_works() {
1821        let dir = tempfile::tempdir().unwrap();
1822        let graph = dir.path().join("demo.kgl");
1823        std::fs::write(&graph, b"\x00").unwrap();
1824        let sibling = dir.path().join("demo_mcp.yaml");
1825        std::fs::write(&sibling, "name: x\n").unwrap();
1826        assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1827    }
1828
1829    #[test]
1830    fn workspace_local_parses() {
1831        let f = write_tmp("workspace:\n  kind: local\n  root: ./src\n  watch: true\n");
1832        let m = load(f.path()).unwrap();
1833        let w = m.workspace.unwrap();
1834        assert_eq!(w.kind, WorkspaceKind::Local);
1835        assert_eq!(w.root.as_deref(), Some("./src"));
1836        assert!(w.watch);
1837    }
1838
1839    #[test]
1840    fn workspace_github_default_kind() {
1841        let f = write_tmp("workspace: {}\n");
1842        let m = load(f.path()).unwrap();
1843        let w = m.workspace.unwrap();
1844        assert_eq!(w.kind, WorkspaceKind::Github);
1845        assert!(w.root.is_none());
1846        assert!(!w.watch);
1847    }
1848
1849    #[test]
1850    fn workspace_local_without_root_errors() {
1851        let f = write_tmp("workspace:\n  kind: local\n");
1852        let err = load(f.path()).unwrap_err();
1853        assert!(err.message.contains("requires workspace.root"));
1854    }
1855
1856    #[test]
1857    fn workspace_unknown_key_rejected() {
1858        let f = write_tmp("workspace:\n  kind: local\n  root: ./x\n  bogus: 1\n");
1859        let err = load(f.path()).unwrap_err();
1860        assert!(err.message.contains("unknown workspace keys"));
1861    }
1862
1863    #[test]
1864    fn workspace_invalid_kind_rejected() {
1865        let f = write_tmp("workspace:\n  kind: docker\n  root: ./x\n");
1866        let err = load(f.path()).unwrap_err();
1867        assert!(err.message.contains("workspace.kind"));
1868    }
1869
1870    #[test]
1871    fn workspace_watch_invalid_for_github() {
1872        let f = write_tmp("workspace:\n  kind: github\n  watch: true\n");
1873        let err = load(f.path()).unwrap_err();
1874        assert!(err.message.contains("watch is only valid"));
1875    }
1876
1877    #[test]
1878    fn extensions_passthrough_parses() {
1879        let f = write_tmp(
1880            "extensions:\n  csv_http_server: true\n  csv_http_server_dir: temp/\n  arbitrary:\n    nested: 1\n",
1881        );
1882        let m = load(f.path()).unwrap();
1883        assert_eq!(
1884            m.extensions
1885                .get("csv_http_server")
1886                .and_then(|v| v.as_bool()),
1887            Some(true)
1888        );
1889        assert_eq!(
1890            m.extensions
1891                .get("csv_http_server_dir")
1892                .and_then(|v| v.as_str()),
1893            Some("temp/")
1894        );
1895        // Nested values pass through unchanged.
1896        assert_eq!(
1897            m.extensions
1898                .get("arbitrary")
1899                .and_then(|v| v.get("nested"))
1900                .and_then(|v| v.as_i64()),
1901            Some(1)
1902        );
1903    }
1904
1905    #[test]
1906    fn extensions_absent_defaults_to_empty() {
1907        let f = write_tmp("name: x\n");
1908        let m = load(f.path()).unwrap();
1909        assert!(m.extensions.is_empty());
1910    }
1911
1912    #[test]
1913    fn extensions_inner_keys_unvalidated() {
1914        // The framework intentionally does NOT validate keys inside
1915        // `extensions:` — they're downstream-binary concerns. Any shape
1916        // that's a YAML mapping must round-trip.
1917        let f = write_tmp(
1918            "extensions:\n  whatever_kglite_wants: foo\n  some_other_consumer: { a: 1, b: 2 }\n",
1919        );
1920        load(f.path()).unwrap();
1921    }
1922
1923    #[test]
1924    fn extensions_must_be_a_mapping() {
1925        let f = write_tmp("extensions: not-a-mapping\n");
1926        let err = load(f.path()).unwrap_err();
1927        assert!(err.message.contains("extensions must be a mapping"));
1928    }
1929
1930    #[test]
1931    fn env_file_key_parses() {
1932        let f = write_tmp("env_file: ../.env\n");
1933        let m = load(f.path()).unwrap();
1934        assert_eq!(m.env_file.as_deref(), Some("../.env"));
1935    }
1936
1937    #[test]
1938    fn env_file_unset_is_none() {
1939        let f = write_tmp("name: Demo\n");
1940        let m = load(f.path()).unwrap();
1941        assert!(m.env_file.is_none());
1942    }
1943
1944    #[test]
1945    fn find_workspace_works() {
1946        let dir = tempfile::tempdir().unwrap();
1947        let manifest = dir.path().join("workspace_mcp.yaml");
1948        std::fs::write(&manifest, "name: ws\n").unwrap();
1949        assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1950    }
1951
1952    #[test]
1953    fn find_workspace_walks_one_level_up_with_applies_to() {
1954        // Layout: <tmp>/parent/workspace_mcp.yaml (declares
1955        // workspace.applies_to: ./repos) + <tmp>/parent/repos/.
1956        // Discovery from <tmp>/parent/repos/ should walk up one level
1957        // and find the sibling manifest because applies_to matches.
1958        let dir = tempfile::tempdir().unwrap();
1959        let parent = dir.path().join("parent");
1960        std::fs::create_dir(&parent).unwrap();
1961        let manifest = parent.join("workspace_mcp.yaml");
1962        std::fs::write(
1963            &manifest,
1964            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1965        )
1966        .unwrap();
1967        let repos = parent.join("repos");
1968        std::fs::create_dir(&repos).unwrap();
1969
1970        // Primary location still works.
1971        assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1972
1973        // Parent-walk fallback resolves to the same manifest. Compare
1974        // canonicalised paths to handle macOS /private/var vs /var.
1975        let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1976        assert_eq!(
1977            found.canonicalize().unwrap(),
1978            manifest.canonicalize().unwrap()
1979        );
1980    }
1981
1982    #[test]
1983    fn find_workspace_ignores_parent_without_applies_to() {
1984        // Parent manifest exists but does NOT declare workspace.applies_to.
1985        // The parent-walk fallback must refuse to auto-detect it —
1986        // otherwise an unrelated workspace_mcp.yaml in a sibling dir
1987        // could surprise-attach to whatever --workspace path the
1988        // operator passes. Safe default: require the opt-in.
1989        let dir = tempfile::tempdir().unwrap();
1990        let parent = dir.path().join("parent");
1991        std::fs::create_dir(&parent).unwrap();
1992        let manifest = parent.join("workspace_mcp.yaml");
1993        std::fs::write(&manifest, "name: not for repos\n").unwrap();
1994        let repos = parent.join("repos");
1995        std::fs::create_dir(&repos).unwrap();
1996
1997        assert_eq!(
1998            find_workspace_manifest(&repos),
1999            None,
2000            "parent manifest without workspace.applies_to must NOT auto-attach"
2001        );
2002    }
2003
2004    #[test]
2005    fn find_workspace_ignores_parent_with_mismatched_applies_to() {
2006        // Parent manifest declares applies_to: ./repos but the
2007        // actual --workspace path is ./other_dir. The mismatch must
2008        // suppress auto-detection.
2009        let dir = tempfile::tempdir().unwrap();
2010        let parent = dir.path().join("parent");
2011        std::fs::create_dir(&parent).unwrap();
2012        let manifest = parent.join("workspace_mcp.yaml");
2013        std::fs::write(
2014            &manifest,
2015            "workspace:\n  kind: github\n  applies_to: ./repos\n",
2016        )
2017        .unwrap();
2018        let other = parent.join("other_dir");
2019        std::fs::create_dir(&other).unwrap();
2020
2021        assert_eq!(
2022            find_workspace_manifest(&other),
2023            None,
2024            "applies_to: ./repos must NOT match --workspace ./other_dir"
2025        );
2026    }
2027
2028    #[test]
2029    fn find_workspace_applies_to_wildcard_matches_any_child() {
2030        // applies_to: '*' (or './*') means "any direct child of the
2031        // manifest's parent dir." Three different child names should
2032        // all auto-detect the manifest.
2033        let dir = tempfile::tempdir().unwrap();
2034        let parent = dir.path().join("parent");
2035        std::fs::create_dir(&parent).unwrap();
2036        let manifest = parent.join("workspace_mcp.yaml");
2037        std::fs::write(&manifest, "workspace:\n  kind: github\n  applies_to: '*'\n").unwrap();
2038        for child_name in ["repos", "clones", "totally-different-name"] {
2039            let child = parent.join(child_name);
2040            std::fs::create_dir(&child).unwrap();
2041            let found =
2042                find_workspace_manifest(&child).expect("wildcard should match any direct child");
2043            assert_eq!(
2044                found.canonicalize().unwrap(),
2045                manifest.canonicalize().unwrap(),
2046                "wildcard should match child {child_name:?}"
2047            );
2048        }
2049    }
2050
2051    #[test]
2052    fn find_workspace_applies_to_glob_matches_prefix() {
2053        // applies_to: './prod-*' should match any direct child whose
2054        // basename starts with "prod-".
2055        let dir = tempfile::tempdir().unwrap();
2056        let parent = dir.path().join("parent");
2057        std::fs::create_dir(&parent).unwrap();
2058        let manifest = parent.join("workspace_mcp.yaml");
2059        std::fs::write(
2060            &manifest,
2061            "workspace:\n  kind: github\n  applies_to: ./prod-*\n",
2062        )
2063        .unwrap();
2064        // Match cases.
2065        for child_name in ["prod-api", "prod-web", "prod-"] {
2066            let child = parent.join(child_name);
2067            std::fs::create_dir(&child).unwrap();
2068            assert!(
2069                find_workspace_manifest(&child).is_some(),
2070                "prod-* should match {child_name:?}"
2071            );
2072        }
2073        // Non-match cases.
2074        for child_name in ["test-api", "stage-web", "random"] {
2075            let child = parent.join(child_name);
2076            std::fs::create_dir(&child).unwrap();
2077            assert_eq!(
2078                find_workspace_manifest(&child),
2079                None,
2080                "prod-* should NOT match {child_name:?}"
2081            );
2082        }
2083    }
2084
2085    #[test]
2086    fn find_workspace_applies_to_list_matches_any_entry() {
2087        // applies_to: [./repos, ./clones] should match either name
2088        // but reject anything else.
2089        let dir = tempfile::tempdir().unwrap();
2090        let parent = dir.path().join("parent");
2091        std::fs::create_dir(&parent).unwrap();
2092        let manifest = parent.join("workspace_mcp.yaml");
2093        std::fs::write(
2094            &manifest,
2095            "workspace:\n  kind: github\n  applies_to:\n    - ./repos\n    - ./clones\n",
2096        )
2097        .unwrap();
2098        for matching in ["repos", "clones"] {
2099            let child = parent.join(matching);
2100            std::fs::create_dir(&child).unwrap();
2101            assert!(
2102                find_workspace_manifest(&child).is_some(),
2103                "list should match {matching:?}"
2104            );
2105        }
2106        let other = parent.join("scratch");
2107        std::fs::create_dir(&other).unwrap();
2108        assert_eq!(
2109            find_workspace_manifest(&other),
2110            None,
2111            "list with [repos, clones] must NOT match scratch"
2112        );
2113    }
2114
2115    #[test]
2116    fn applies_to_rejects_deep_path_at_parse_time() {
2117        let f = write_tmp("workspace:\n  kind: github\n  applies_to: ./too/deep/path\n");
2118        let err = load(f.path()).unwrap_err();
2119        assert!(
2120            err.message.contains("must be a single path segment"),
2121            "got: {}",
2122            err.message
2123        );
2124    }
2125
2126    #[test]
2127    fn applies_to_rejects_invalid_glob_at_parse_time() {
2128        // globset rejects unterminated character class.
2129        let f = write_tmp("workspace:\n  kind: github\n  applies_to: './[unterminated'\n");
2130        let err = load(f.path()).unwrap_err();
2131        assert!(
2132            err.message.contains("invalid glob pattern"),
2133            "got: {}",
2134            err.message
2135        );
2136    }
2137
2138    #[test]
2139    fn applies_to_rejects_parent_relative() {
2140        // Bare `..` is caught by the `..` rejection branch. The
2141        // multi-segment form `../foo` is caught earlier by the
2142        // single-segment check; either is rejected.
2143        let f = write_tmp("workspace:\n  kind: github\n  applies_to: '..'\n");
2144        let err = load(f.path()).unwrap_err();
2145        assert!(err.message.contains("must not contain `..`"));
2146
2147        let f2 = write_tmp("workspace:\n  kind: github\n  applies_to: '../up'\n");
2148        let err2 = load(f2.path()).unwrap_err();
2149        assert!(err2.message.contains("must be a single path segment"));
2150    }
2151
2152    #[test]
2153    fn find_workspace_returns_none_when_missing_everywhere() {
2154        let dir = tempfile::tempdir().unwrap();
2155        let child = dir.path().join("child");
2156        std::fs::create_dir(&child).unwrap();
2157        // No manifest in either child or its parent (tmpdir root).
2158        assert_eq!(find_workspace_manifest(&child), None);
2159    }
2160
2161    #[test]
2162    fn find_workspace_primary_wins_over_parent_fallback() {
2163        // Both primary AND parent-fallback exist. The primary must
2164        // win — this anchors the precedence rule documented on
2165        // `find_workspace_manifest`. The parent declares applies_to
2166        // matching the child dir, so it WOULD be a valid fallback —
2167        // but the primary preempts it. If a future refactor swaps
2168        // the order, this test fails loudly.
2169        let dir = tempfile::tempdir().unwrap();
2170        let parent_manifest = dir.path().join("workspace_mcp.yaml");
2171        std::fs::write(
2172            &parent_manifest,
2173            "workspace:\n  kind: github\n  applies_to: ./repos\n",
2174        )
2175        .unwrap();
2176        let child = dir.path().join("repos");
2177        std::fs::create_dir(&child).unwrap();
2178        let child_manifest = child.join("workspace_mcp.yaml");
2179        std::fs::write(&child_manifest, "name: child\n").unwrap();
2180
2181        // Discovery from `child` should return the child manifest,
2182        // NOT the parent's. Compare canonicalised to handle the
2183        // macOS /private/var vs /var symlink consistently.
2184        let found = find_workspace_manifest(&child).expect("primary should resolve");
2185        assert_eq!(
2186            found.canonicalize().unwrap(),
2187            child_manifest.canonicalize().unwrap(),
2188            "primary location must win when both primary and parent fallback exist"
2189        );
2190    }
2191
2192    #[test]
2193    fn to_json_shape_is_stable() {
2194        let f = write_tmp(
2195            r#"
2196name: KGLite Codebase
2197source_roots: [src, lib]
2198trust:
2199  allow_embedder: true
2200embedder:
2201  module: kglite.embed
2202  class: SentenceTransformerEmbedder
2203builtins:
2204  save_graph: true
2205  temp_cleanup: on_overview
2206"#,
2207        );
2208        let m = load(f.path()).unwrap();
2209        let actual = m.to_json();
2210        let expected = serde_json::json!({
2211            "yaml_path": f.path().display().to_string(),
2212            "name": "KGLite Codebase",
2213            "instructions": null,
2214            "overview_prefix": null,
2215            "source_roots": ["src", "lib"],
2216            "trust": {
2217                "allow_python_tools": false,
2218                "allow_embedder": true,
2219                "allow_query_preprocessor": false,
2220            },
2221            "tools": [],
2222            "embedder": {
2223                "module": "kglite.embed",
2224                "class": "SentenceTransformerEmbedder",
2225                "kwargs": {},
2226            },
2227            "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
2228            "env_file": null,
2229            "workspace": null,
2230            "extensions": {},
2231            "skills": false,
2232        });
2233        assert_eq!(actual, expected);
2234    }
2235
2236    #[test]
2237    fn to_json_round_trips_tools_and_workspace() {
2238        let f = write_tmp(
2239            r#"
2240name: Full Surface
2241source_root: ./src
2242trust:
2243  allow_python_tools: true
2244tools:
2245  - name: nodes_for
2246    cypher: "MATCH (n {name: $name}) RETURN n"
2247    description: "fetch nodes by name"
2248  - name: run_query
2249    python: tools.py
2250    function: run
2251workspace:
2252  kind: local
2253  root: /tmp/ws
2254  watch: true
2255builtins:
2256  save_graph: false
2257env_file: .env.local
2258extensions:
2259  kglite:
2260    flavour: standard
2261"#,
2262        );
2263        let m = load(f.path()).unwrap();
2264        let v = m.to_json();
2265        assert_eq!(v["name"], "Full Surface");
2266        assert_eq!(v["trust"]["allow_python_tools"], true);
2267        assert_eq!(v["workspace"]["kind"], "local");
2268        assert_eq!(v["workspace"]["root"], "/tmp/ws");
2269        assert_eq!(v["workspace"]["watch"], true);
2270        assert_eq!(v["env_file"], ".env.local");
2271        assert_eq!(v["tools"][0]["kind"], "cypher");
2272        assert_eq!(v["tools"][0]["name"], "nodes_for");
2273        assert_eq!(v["tools"][1]["kind"], "python");
2274        assert_eq!(v["tools"][1]["name"], "run_query");
2275        assert_eq!(v["tools"][1]["python"], "tools.py");
2276        assert_eq!(v["tools"][1]["function"], "run");
2277        assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
2278    }
2279
2280    // ─── Skills schema (Phase 1a — manifest-level only) ───────────
2281
2282    #[test]
2283    fn skills_disabled_by_default() {
2284        let f = write_tmp("name: x\n");
2285        let m = load(f.path()).unwrap();
2286        assert_eq!(m.skills, SkillsSource::Disabled);
2287        assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2288    }
2289
2290    #[test]
2291    fn skills_explicit_false_disabled() {
2292        let f = write_tmp("name: x\nskills: false\n");
2293        let m = load(f.path()).unwrap();
2294        assert_eq!(m.skills, SkillsSource::Disabled);
2295    }
2296
2297    #[test]
2298    fn skills_bool_true_parses_to_single_bundled() {
2299        let f = write_tmp("name: x\nskills: true\n");
2300        let m = load(f.path()).unwrap();
2301        assert_eq!(m.skills, SkillsSource::Sources(vec![SkillSource::Bundled]));
2302        // JSON shape: list with one boolean true.
2303        let v = m.to_json();
2304        assert_eq!(v["skills"], serde_json::json!([true]));
2305    }
2306
2307    #[test]
2308    fn skills_path_string_parses_to_single_path() {
2309        let f = write_tmp("name: x\nskills: ./local-skills/\n");
2310        let m = load(f.path()).unwrap();
2311        assert_eq!(
2312            m.skills,
2313            SkillsSource::Sources(vec![SkillSource::Path("./local-skills/".into())])
2314        );
2315        // JSON round-trip preserves the operator-declared path verbatim.
2316        let v = m.to_json();
2317        assert_eq!(v["skills"], serde_json::json!(["./local-skills/"]));
2318    }
2319
2320    #[test]
2321    fn skills_list_polymorphic_parses() {
2322        let f =
2323            write_tmp("name: x\nskills:\n  - true\n  - ./local-overrides/\n  - ~/shared-skills/\n");
2324        let m = load(f.path()).unwrap();
2325        assert_eq!(
2326            m.skills,
2327            SkillsSource::Sources(vec![
2328                SkillSource::Bundled,
2329                SkillSource::Path("./local-overrides/".into()),
2330                SkillSource::Path("~/shared-skills/".into()),
2331            ])
2332        );
2333        // JSON preserves entry types: bool for bundled, string for paths.
2334        let v = m.to_json();
2335        assert_eq!(
2336            v["skills"],
2337            serde_json::json!([true, "./local-overrides/", "~/shared-skills/"])
2338        );
2339    }
2340
2341    #[test]
2342    fn skills_empty_list_parses_as_opt_in_with_no_root_sources() {
2343        // Empty list means "opt in but only the auto-detected project
2344        // layer fires." The registry treats this as `Sources(vec![])`,
2345        // not `Disabled`. Operators relying solely on
2346        // `<basename>.skills/` adjacent to the YAML use this form.
2347        let f = write_tmp("name: x\nskills: []\n");
2348        let m = load(f.path()).unwrap();
2349        assert_eq!(m.skills, SkillsSource::Sources(vec![]));
2350    }
2351
2352    #[test]
2353    fn skills_false_in_list_rejected() {
2354        let f = write_tmp("name: x\nskills:\n  - false\n");
2355        let err = load(f.path()).unwrap_err();
2356        assert!(
2357            err.message.contains("skills[0]")
2358                && err.message.contains("`false` is not a valid entry"),
2359            "unexpected: {}",
2360            err.message
2361        );
2362    }
2363
2364    #[test]
2365    fn skills_invalid_type_rejected() {
2366        let f = write_tmp("name: x\nskills: 42\n");
2367        let err = load(f.path()).unwrap_err();
2368        assert!(
2369            err.message.contains("skills must be"),
2370            "unexpected: {}",
2371            err.message
2372        );
2373    }
2374
2375    #[test]
2376    fn skills_empty_path_string_rejected() {
2377        let f = write_tmp("name: x\nskills: \"\"\n");
2378        let err = load(f.path()).unwrap_err();
2379        assert!(
2380            err.message.contains("non-empty string"),
2381            "unexpected: {}",
2382            err.message
2383        );
2384    }
2385
2386    #[test]
2387    fn skills_field_is_purely_additive_on_existing_manifests() {
2388        // A manifest written before the skills field existed (i.e. no
2389        // `skills:` declaration) must still parse cleanly with
2390        // SkillsSource::Disabled. This is the "no impact on existing
2391        // MCP servers" guarantee at the schema level.
2392        let f = write_tmp(
2393            r#"
2394name: legacy
2395source_roots: [src]
2396trust:
2397  allow_python_tools: true
2398workspace:
2399  kind: github
2400"#,
2401        );
2402        let m = load(f.path()).unwrap();
2403        assert_eq!(m.skills, SkillsSource::Disabled);
2404        assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2405    }
2406}