Skip to main content

mcp_methods/server/
manifest.rs

1//! YAML manifest schema + loader.
2//!
3//! A manifest is a YAML file declaring the tools, source roots, custom
4//! embedder, and trust gates the server should apply. The loader parses,
5//! validates, and returns a [`Manifest`]; consumers (CLI wiring, tool
6//! registration) operate on the validated structure.
7//!
8//! Path strings (`source_root`, `python:` tool paths, embedder module)
9//! are kept as the raw user input — relative-to-yaml resolution happens
10//! at the use site so the data stays pure and testable.
11//!
12//! Validation is fail-fast and user-facing: the caller surfaces
13//! [`ManifestError`] messages directly to the operator.
14//!
15//! Schema mirrors the Python `kglite.mcp_server.manifest` module 1:1 so
16//! a manifest written for the Python server boots unchanged on the new
17//! Rust server.
18
19// A handful of fields/helpers are exposed for downstream consumers
20// (e.g. kglite-mcp-server reads `CypherTool::cypher` directly when
21// registering manifest-declared tools) and so look unused from this
22// crate's perspective. Silence dead-code warnings rather than chase
23// every cross-crate use.
24#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34    "name",
35    "instructions",
36    "overview_prefix",
37    "source_root",
38    "source_roots",
39    "trust",
40    "tools",
41    "embedder",
42    "builtins",
43    "env_file",
44    "workspace",
45    "extensions",
46];
47const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
48const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
49const ALLOWED_TRUST_KEYS: &[&str] = &[
50    "allow_python_tools",
51    "allow_embedder",
52    "allow_query_preprocessor",
53];
54const ALLOWED_TOOL_KEYS: &[&str] = &[
55    "name",
56    "description",
57    "parameters",
58    "cypher",
59    "python",
60    "function",
61    "bundled",
62    "hidden",
63    // 0.3.34: per-deployment rename for bundled tools (the bundled
64    // override block already covers `description` and `hidden`; this
65    // adds the third axis — what the agent sees in `tools/list`).
66    "rename",
67];
68const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
69const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
70const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
71
72#[derive(Debug, Error)]
73#[error("{path}: {message}")]
74pub struct ManifestError {
75    pub path: String,
76    pub message: String,
77}
78
79impl ManifestError {
80    pub fn at(path: &Path, message: impl Into<String>) -> Self {
81        Self {
82            path: path.display().to_string(),
83            message: message.into(),
84        }
85    }
86
87    pub fn bare(message: impl Into<String>) -> Self {
88        Self {
89            path: "<manifest>".to_string(),
90            message: message.into(),
91        }
92    }
93}
94
95#[derive(Debug, Default, Clone)]
96pub struct TrustConfig {
97    pub allow_python_tools: bool,
98    pub allow_embedder: bool,
99    /// Advisory gate: the manifest declares that an extension-defined
100    /// query preprocessor hook is permitted to run. The framework does
101    /// not parse or execute the preprocessor itself — it lives in the
102    /// opaque `extensions:` passthrough — but downstream consumers
103    /// (e.g. kglite-mcp-server) read this flag and refuse to boot the
104    /// hook when it is false. Same pattern as `allow_embedder`.
105    pub allow_query_preprocessor: bool,
106}
107
108#[derive(Debug, Clone)]
109pub enum ToolSpec {
110    Cypher(CypherTool),
111    Python(PythonTool),
112    /// Override the agent-facing surface of a bundled tool (one the
113    /// downstream binary provides natively — `cypher_query`,
114    /// `graph_overview`, `read_source`, etc.). The framework parses
115    /// the override but does not enforce that the named tool exists;
116    /// the downstream consumer (e.g. `kglite-mcp-server`) is
117    /// responsible for validating the name against its bundled
118    /// catalogue at boot time and applying the override when
119    /// emitting `tools/list`.
120    ///
121    /// Pre-0.3.31 the only customisation path for the bundled tool
122    /// surface was the manifest's global `instructions:` block —
123    /// useful for first-message orientation but not attached to
124    /// individual tools. Bundled overrides let operators rewrite a
125    /// specific tool's `description` (what the agent sees in
126    /// `tools/list`) or `hidden`-flag it out entirely.
127    Bundled(BundledOverride),
128}
129
130impl ToolSpec {
131    pub fn name(&self) -> &str {
132        match self {
133            ToolSpec::Cypher(t) => &t.name,
134            ToolSpec::Python(t) => &t.name,
135            ToolSpec::Bundled(t) => &t.name,
136        }
137    }
138}
139
140#[derive(Debug, Clone)]
141pub struct CypherTool {
142    pub name: String,
143    pub cypher: String,
144    pub description: Option<String>,
145    pub parameters: Option<serde_json::Value>,
146}
147
148#[derive(Debug, Clone)]
149pub struct PythonTool {
150    pub name: String,
151    pub python: String,
152    pub function: String,
153    pub description: Option<String>,
154    pub parameters: Option<serde_json::Value>,
155}
156
157#[derive(Debug, Clone)]
158pub struct BundledOverride {
159    /// Name of the bundled tool to override (e.g. `cypher_query`,
160    /// `repo_management`). Validation against the downstream
161    /// binary's actual catalogue happens at the consumer's boot
162    /// time — the framework only checks shape here.
163    pub name: String,
164    /// New agent-facing description that replaces the bundled
165    /// tool's default. `None` means "do not override; keep the
166    /// default."
167    pub description: Option<String>,
168    /// When true, the downstream consumer should omit this tool
169    /// from `tools/list` AND reject calls to it. Defaults to
170    /// false (visible).
171    pub hidden: bool,
172    /// Per-deployment rename: expose the bundled tool to the agent
173    /// under this name instead of its canonical name. `None` keeps
174    /// the canonical name. Lets operators running multiple kglite
175    /// servers (each backed by a different graph) disambiguate
176    /// otherwise-identical tool surfaces — without rename, an agent
177    /// running three servers sees three copies of `cypher_query`,
178    /// each indistinguishable in ToolSearch results. With rename,
179    /// the same servers can expose `legal_cypher_query`,
180    /// `prospect_cypher_query`, `open_source_cypher_query`.
181    /// Must be a valid identifier (`^[a-zA-Z_][a-zA-Z0-9_]*$`);
182    /// validation against duplicates across the manifest's tools is
183    /// the downstream consumer's responsibility.
184    pub rename: Option<String>,
185}
186
187#[derive(Debug, Clone)]
188pub struct EmbedderConfig {
189    pub module: String,
190    pub class: String,
191    pub kwargs: serde_json::Map<String, serde_json::Value>,
192}
193
194#[derive(Debug, Default, Clone)]
195pub struct BuiltinsConfig {
196    pub save_graph: bool,
197    pub temp_cleanup: TempCleanup,
198}
199
200#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
201pub enum TempCleanup {
202    #[default]
203    Never,
204    OnOverview,
205}
206
207impl TempCleanup {
208    pub fn as_str(&self) -> &'static str {
209        match self {
210            TempCleanup::Never => "never",
211            TempCleanup::OnOverview => "on_overview",
212        }
213    }
214}
215
216#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
217pub enum WorkspaceKind {
218    /// Clone-and-track GitHub repos. The default when no `workspace:`
219    /// block is set and the operator passed `--workspace DIR`.
220    #[default]
221    Github,
222    /// Bind a fixed local directory as the active source root. No
223    /// cloning happens; `set_root_dir(path)` swaps the active root.
224    Local,
225}
226
227impl WorkspaceKind {
228    pub fn as_str(&self) -> &'static str {
229        match self {
230            WorkspaceKind::Github => "github",
231            WorkspaceKind::Local => "local",
232        }
233    }
234}
235
236#[derive(Debug, Clone, Default)]
237pub struct WorkspaceConfig {
238    pub kind: WorkspaceKind,
239    /// Local-mode only: path to the directory to bind as the source
240    /// root. Relative paths resolve against the YAML's parent dir.
241    pub root: Option<String>,
242    /// Local-mode only: wire the framework's file watcher to `root`
243    /// (debounced rebuild trigger via the post-activate hook).
244    pub watch: bool,
245    /// Optional opt-in for the [`find_workspace_manifest`] parent-walk
246    /// fallback. When set, this manifest is auto-discovered by
247    /// ``mcp-server --workspace DIR`` (and similar callers) only when
248    /// the operator's ``DIR`` matches the declaration here. When
249    /// unset, the parent-walk fallback NEVER fires for this manifest
250    /// — operators must pass ``--mcp-config`` explicitly.
251    ///
252    /// Values are glob patterns matching the workspace dir's basename
253    /// (single-segment match — parent-walk is always single-level).
254    /// Three forms:
255    ///
256    /// - **Single pattern** (`./repos`, `repos`, `*`, `a*`, `prod-?`):
257    ///   match against the workspace dir's basename. Literal strings
258    ///   like `repos` match only `repos`; glob patterns like `*` or
259    ///   `prod-*` match any name fitting the pattern.
260    /// - **List of patterns** (`[./repos, ./clones]`, `[prod-*, test-*]`):
261    ///   match if any pattern matches. Useful for curated subsets or
262    ///   multiple naming conventions in one manifest.
263    ///
264    /// Leading `./` is optional and stripped at parse time. Patterns
265    /// must be single-segment — `./a/b` is rejected. Invalid glob
266    /// syntax is rejected at parse time.
267    ///
268    /// Eliminates the accidental-discovery footgun where a workspace
269    /// manifest is auto-picked-up by an unrelated sibling dir. The
270    /// manifest's own declaration is the opt-in.
271    pub applies_to: Option<AppliesTo>,
272}
273
274/// Declaration of which workspace dirs the manifest applies to for
275/// the [`find_workspace_manifest`] parent-walk fallback. See
276/// [`WorkspaceConfig::applies_to`] for the full semantics. Each
277/// entry is a glob pattern (literal or with `*` / `?` / `[abc]`)
278/// matched against the workspace dir's basename.
279#[derive(Debug, Clone, PartialEq, Eq)]
280pub enum AppliesTo {
281    /// Single glob pattern. Matches if the workspace dir's basename
282    /// satisfies the pattern. Literal names (`repos`) match only
283    /// that name; `*` matches anything; `prod-*` matches anything
284    /// starting with `prod-`.
285    Pattern(String),
286    /// Multiple patterns. Matches if any pattern in the list matches.
287    Patterns(Vec<String>),
288}
289
290#[derive(Debug, Clone)]
291pub struct Manifest {
292    pub yaml_path: PathBuf,
293    pub name: Option<String>,
294    pub instructions: Option<String>,
295    pub overview_prefix: Option<String>,
296    pub source_roots: Vec<String>,
297    pub trust: TrustConfig,
298    pub tools: Vec<ToolSpec>,
299    pub embedder: Option<EmbedderConfig>,
300    pub builtins: BuiltinsConfig,
301    /// Optional explicit `.env` path (relative to the YAML or absolute).
302    /// When unset, the runtime walks upward from the start directory
303    /// looking for a `.env` file.
304    pub env_file: Option<String>,
305    /// Optional explicit workspace declaration. When set, this wins
306    /// over CLI `--workspace`/`--source-root` flags interpretation
307    /// (manifest is the source of truth — same rule as `source_root:`).
308    pub workspace: Option<WorkspaceConfig>,
309    /// Raw passthrough for downstream-binary-specific manifest keys.
310    /// The framework accepts any mapping under `extensions:` and stores
311    /// it here without validating the inner keys; downstream consumers
312    /// (e.g. kglite-mcp-server) read whatever they need from this map.
313    ///
314    /// This keeps the framework's strict-unknown-key validation strong
315    /// for the surfaces it owns (`builtins`, `workspace`, …) while
316    /// letting consumers add their own configuration namespace without
317    /// per-key framework round-trips.
318    pub extensions: serde_json::Map<String, serde_json::Value>,
319}
320
321impl Manifest {
322    /// JSON-friendly representation of the validated manifest for
323    /// FFI / RPC exposure (pyo3 wrappers, JSON-RPC bridges, etc.).
324    ///
325    /// The shape is stable across patch releases: fields can be added
326    /// non-breaking, but key renames or removals are breaking changes.
327    /// When adding a new field to `Manifest`, extend this method too —
328    /// the `to_json_shape_is_stable` test will fail until you do.
329    /// The `extensions` map is passed through unchanged; downstream
330    /// consumers parse their own namespace from it.
331    pub fn to_json(&self) -> serde_json::Value {
332        serde_json::json!({
333            "yaml_path": self.yaml_path.display().to_string(),
334            "name": self.name,
335            "instructions": self.instructions,
336            "overview_prefix": self.overview_prefix,
337            "source_roots": self.source_roots,
338            "trust": {
339                "allow_python_tools": self.trust.allow_python_tools,
340                "allow_embedder": self.trust.allow_embedder,
341                "allow_query_preprocessor": self.trust.allow_query_preprocessor,
342            },
343            "tools": self.tools.iter().map(|t| match t {
344                ToolSpec::Cypher(c) => serde_json::json!({
345                    "kind": "cypher",
346                    "name": c.name,
347                    "cypher": c.cypher,
348                    "description": c.description,
349                    "parameters": c.parameters,
350                }),
351                ToolSpec::Python(p) => serde_json::json!({
352                    "kind": "python",
353                    "name": p.name,
354                    "python": p.python,
355                    "function": p.function,
356                    "description": p.description,
357                    "parameters": p.parameters,
358                }),
359                ToolSpec::Bundled(b) => serde_json::json!({
360                    "kind": "bundled",
361                    "name": b.name,
362                    "description": b.description,
363                    "hidden": b.hidden,
364                    "rename": b.rename,
365                }),
366            }).collect::<Vec<_>>(),
367            "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
368                "module": e.module,
369                "class": e.class,
370                "kwargs": e.kwargs,
371            })),
372            "builtins": {
373                "save_graph": self.builtins.save_graph,
374                "temp_cleanup": self.builtins.temp_cleanup.as_str(),
375            },
376            "env_file": self.env_file,
377            "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
378                "kind": w.kind.as_str(),
379                "root": w.root,
380                "watch": w.watch,
381                "applies_to": w.applies_to.as_ref().map(|a| match a {
382                    AppliesTo::Pattern(p) => serde_json::Value::String(p.clone()),
383                    AppliesTo::Patterns(ps) => serde_json::Value::Array(
384                        ps.iter().map(|p| serde_json::Value::String(p.clone())).collect()
385                    ),
386                }),
387            })),
388            "extensions": self.extensions,
389        })
390    }
391}
392
393/// Auto-detect ``<basename>_mcp.yaml`` next to a graph file.
394pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
395    let stem = graph_path.file_stem()?;
396    let parent = graph_path.parent()?;
397    let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
398    if candidate.is_file() {
399        Some(candidate)
400    } else {
401        None
402    }
403}
404
405/// Auto-detect ``workspace_mcp.yaml`` for a workspace directory.
406///
407/// Checks two locations in strict priority order:
408///
409/// 1. **Primary** — ``<workspace_dir>/workspace_mcp.yaml``. The
410///    documented and recommended location. If this exists, it is
411///    returned unconditionally; the parent-walk fallback is NOT
412///    consulted even if a parent manifest also exists. No opt-in
413///    declaration required — the manifest sitting inside the
414///    workspace dir is itself the operator's intent.
415/// 2. **Parent-walk fallback** —
416///    ``<workspace_dir>/../workspace_mcp.yaml``. Triggered only when
417///    the primary is absent AND the parent manifest *declares* it
418///    applies to this specific workspace dir via the
419///    ``workspace.applies_to:`` field:
420///
421///    ```yaml
422///    # open_source/workspace_mcp.yaml
423///    workspace:
424///      kind: github
425///      applies_to: ./repos     # required for parent-walk discovery
426///    ```
427///
428///    The framework loads the parent manifest, canonicalises
429///    ``manifest.workspace.applies_to`` against the manifest's parent
430///    directory, and compares it to the actual ``workspace_dir``.
431///    Match → manifest is returned. No declaration or path mismatch
432///    → discovery returns ``None`` (operator must pass
433///    ``--mcp-config`` explicitly).
434///
435///    The natural layout for github-clone-tracker workspaces is:
436///
437///    ```text
438///    open_source/
439///    ├── workspace_mcp.yaml     # config sits beside the sandbox; declares
440///    │                          # workspace.applies_to: ./repos
441///    └── repos/                 # --workspace points here
442///    ```
443///
444///    The ``applies_to`` opt-in eliminates the accidental-discovery
445///    footgun where a manifest in a project root would auto-attach to
446///    any unrelated sibling dir. Operators who didn't author the
447///    manifest get the safe default (no auto-detection); operators
448///    who did get the ergonomic UX (no ``--mcp-config`` boilerplate).
449///
450/// Bounded to one level up; will not walk past the filesystem root.
451/// Symlink-safe via canonicalisation. Added per kglite operator
452/// feedback after the 0.6.x → 0.9.x migration audit.
453pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
454    let primary = workspace_dir.join("workspace_mcp.yaml");
455    if primary.is_file() {
456        return Some(primary);
457    }
458    // Parent-walk fallback. Compare against canonicalised paths to
459    // handle "/" (where parent == self) and symlinks consistently.
460    let parent = workspace_dir.parent()?;
461    let workspace_resolved = workspace_dir.canonicalize().ok()?;
462    let parent_resolved = parent.canonicalize().ok()?;
463    if parent_resolved == workspace_resolved {
464        // No real parent (filesystem root).
465        return None;
466    }
467    let fallback = parent.join("workspace_mcp.yaml");
468    if !fallback.is_file() {
469        return None;
470    }
471
472    // The fallback manifest must declare workspace.applies_to and
473    // that declaration must canonicalise to the actual workspace_dir.
474    // Otherwise the discovery is unsafe (could be accidental).
475    let manifest = match load(&fallback) {
476        Ok(m) => m,
477        Err(e) => {
478            tracing::warn!(
479                manifest = %fallback.display(),
480                error = %e,
481                "parent-walk manifest exists but failed to parse; ignoring"
482            );
483            return None;
484        }
485    };
486    let declared = manifest
487        .workspace
488        .as_ref()
489        .and_then(|w| w.applies_to.as_ref());
490    let Some(declared_applies_to) = declared else {
491        tracing::info!(
492            manifest = %fallback.display(),
493            "parent-walk manifest does not declare workspace.applies_to; \
494             ignoring (set workspace.applies_to: <pattern> to opt in)"
495        );
496        return None;
497    };
498    // Match the workspace dir's basename against the declared pattern(s).
499    // The parent-walk guarantee (workspace_dir.parent() == manifest_dir)
500    // is already established above — only the basename match is left.
501    let Some(basename) = workspace_resolved.file_name().and_then(|n| n.to_str()) else {
502        return None; // path with no usable basename, defensive
503    };
504    let patterns: Vec<&str> = match declared_applies_to {
505        AppliesTo::Pattern(p) => vec![p.as_str()],
506        AppliesTo::Patterns(ps) => ps.iter().map(String::as_str).collect(),
507    };
508    let matched = patterns.iter().any(|pat| {
509        match globset::Glob::new(pat) {
510            Ok(g) => g.compile_matcher().is_match(basename),
511            Err(_) => {
512                // Should not happen — patterns were validated at parse
513                // time. Defensive: treat as non-match.
514                false
515            }
516        }
517    });
518    if matched {
519        tracing::info!(
520            workspace_dir = %workspace_dir.display(),
521            manifest = %fallback.display(),
522            "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
523        );
524        Some(fallback)
525    } else {
526        tracing::info!(
527            workspace_dir = %workspace_resolved.display(),
528            manifest = %fallback.display(),
529            basename = %basename,
530            patterns = ?patterns,
531            "parent-walk manifest's workspace.applies_to does not match \
532             this workspace_dir's basename; ignoring"
533        );
534        None
535    }
536}
537
538/// Parse and validate a manifest YAML file.
539pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
540    let text = fs::read_to_string(yaml_path)
541        .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
542    let raw: serde_yaml::Value = serde_yaml::from_str(&text)
543        .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
544    let raw = match raw {
545        serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
546        v => v,
547    };
548    let map = raw
549        .as_mapping()
550        .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
551    build(map, yaml_path)
552}
553
554fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
555    check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
556
557    if raw.contains_key("source_root") && raw.contains_key("source_roots") {
558        return Err(ManifestError::at(
559            yaml_path,
560            "specify either source_root (str) or source_roots (list), not both",
561        ));
562    }
563
564    let mut source_roots: Vec<String> = Vec::new();
565    if let Some(v) = raw.get("source_root") {
566        let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
567            ManifestError::at(yaml_path, "source_root must be a non-empty string")
568        })?;
569        source_roots.push(s.to_string());
570    } else if let Some(v) = raw.get("source_roots") {
571        let seq = v.as_sequence().ok_or_else(|| {
572            ManifestError::at(
573                yaml_path,
574                "source_roots must be a list of non-empty strings",
575            )
576        })?;
577        if seq.is_empty() {
578            return Err(ManifestError::at(
579                yaml_path,
580                "source_roots must be non-empty when set",
581            ));
582        }
583        for item in seq {
584            let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
585                ManifestError::at(
586                    yaml_path,
587                    "source_roots must be a list of non-empty strings",
588                )
589            })?;
590            source_roots.push(s.to_string());
591        }
592    }
593
594    let trust = build_trust(raw.get("trust"), yaml_path)?;
595    let tools = build_tools(raw.get("tools"), yaml_path)?;
596    let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
597    let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
598    let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
599    let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
600
601    Ok(Manifest {
602        yaml_path: yaml_path.to_path_buf(),
603        name: optional_str(raw, "name", yaml_path)?,
604        instructions: optional_str(raw, "instructions", yaml_path)?,
605        overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
606        source_roots,
607        trust,
608        tools,
609        embedder,
610        builtins,
611        env_file: optional_str(raw, "env_file", yaml_path)?,
612        workspace,
613        extensions,
614    })
615}
616
617fn build_extensions(
618    raw: Option<&serde_yaml::Value>,
619    yaml_path: &Path,
620) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
621    let Some(raw) = raw else {
622        return Ok(serde_json::Map::new());
623    };
624    if matches!(raw, serde_yaml::Value::Null) {
625        return Ok(serde_json::Map::new());
626    }
627    if !raw.is_mapping() {
628        return Err(ManifestError::at(
629            yaml_path,
630            "extensions must be a mapping (downstream-binary-specific keys)",
631        ));
632    }
633    match yaml_to_json(raw.clone())? {
634        serde_json::Value::Object(o) => Ok(o),
635        _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
636    }
637}
638
639fn build_workspace(
640    raw: Option<&serde_yaml::Value>,
641    yaml_path: &Path,
642) -> Result<Option<WorkspaceConfig>, ManifestError> {
643    let Some(raw) = raw else { return Ok(None) };
644    if matches!(raw, serde_yaml::Value::Null) {
645        return Ok(None);
646    }
647    let map = raw
648        .as_mapping()
649        .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
650    check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
651    let kind = match map.get("kind") {
652        None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
653        Some(serde_yaml::Value::String(s)) => match s.as_str() {
654            "github" => WorkspaceKind::Github,
655            "local" => WorkspaceKind::Local,
656            other => {
657                return Err(ManifestError::at(
658                    yaml_path,
659                    format!(
660                        "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
661                    ),
662                ));
663            }
664        },
665        Some(_) => {
666            return Err(ManifestError::at(
667                yaml_path,
668                format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
669            ))
670        }
671    };
672    let root = match map.get("root") {
673        None | Some(serde_yaml::Value::Null) => None,
674        Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
675        _ => {
676            return Err(ManifestError::at(
677                yaml_path,
678                "workspace.root must be a non-empty string",
679            ))
680        }
681    };
682    let watch = match map.get("watch") {
683        None | Some(serde_yaml::Value::Null) => false,
684        Some(serde_yaml::Value::Bool(b)) => *b,
685        Some(_) => {
686            return Err(ManifestError::at(
687                yaml_path,
688                "workspace.watch must be a bool",
689            ))
690        }
691    };
692    let applies_to =
693        match map.get("applies_to") {
694            None | Some(serde_yaml::Value::Null) => None,
695            Some(serde_yaml::Value::String(s)) => {
696                Some(AppliesTo::Pattern(parse_applies_to_pattern(s, yaml_path)?))
697            }
698            Some(serde_yaml::Value::Sequence(seq)) => {
699                if seq.is_empty() {
700                    return Err(ManifestError::at(
701                        yaml_path,
702                        "workspace.applies_to: list must contain at least one pattern",
703                    ));
704                }
705                let mut patterns = Vec::with_capacity(seq.len());
706                for (i, item) in seq.iter().enumerate() {
707                    let s = item.as_str().ok_or_else(|| {
708                        ManifestError::at(
709                            yaml_path,
710                            format!("workspace.applies_to[{i}] must be a string"),
711                        )
712                    })?;
713                    let cleaned = parse_applies_to_pattern(s, yaml_path).map_err(|e| {
714                        ManifestError::at(
715                            yaml_path,
716                            format!("workspace.applies_to[{i}]: {}", e.message),
717                        )
718                    })?;
719                    patterns.push(cleaned);
720                }
721                Some(AppliesTo::Patterns(patterns))
722            }
723            _ => return Err(ManifestError::at(
724                yaml_path,
725                "workspace.applies_to must be a non-empty string (a pattern) or a list of patterns",
726            )),
727        };
728    if kind == WorkspaceKind::Local && root.is_none() {
729        return Err(ManifestError::at(
730            yaml_path,
731            "workspace.kind: local requires workspace.root to be set",
732        ));
733    }
734    if kind == WorkspaceKind::Github && watch {
735        return Err(ManifestError::at(
736            yaml_path,
737            "workspace.watch is only valid with workspace.kind: local",
738        ));
739    }
740    Ok(Some(WorkspaceConfig {
741        kind,
742        root,
743        watch,
744        applies_to,
745    }))
746}
747
748/// Parse + validate a single ``workspace.applies_to`` entry. Accepts
749/// any glob pattern matching a single path segment (no embedded
750/// slashes, no `..`). The leading ``./`` is optional and stripped.
751/// Validates glob syntax via `globset::Glob::new` so invalid patterns
752/// surface clear errors at boot.
753///
754/// Returns the cleaned pattern string (without `./` prefix) on
755/// success.
756fn parse_applies_to_pattern(raw: &str, yaml_path: &Path) -> Result<String, ManifestError> {
757    let trimmed = raw.trim();
758    if trimmed.is_empty() {
759        return Err(ManifestError::at(
760            yaml_path,
761            "workspace.applies_to: pattern must not be empty",
762        ));
763    }
764    // Strip a single leading `./` for ergonomic equivalence between
765    // `./repos` and `repos`. Both forms commonly appear in operator
766    // muscle memory; normalise so storage + glob matching is uniform.
767    let stripped = trimmed.strip_prefix("./").unwrap_or(trimmed);
768    if stripped.is_empty() {
769        return Err(ManifestError::at(
770            yaml_path,
771            "workspace.applies_to: pattern must not be empty after stripping `./` prefix",
772        ));
773    }
774    if stripped.contains('/') {
775        return Err(ManifestError::at(
776            yaml_path,
777            format!(
778                "workspace.applies_to: pattern {raw:?} must be a single path segment \
779                 (no embedded `/`) — parent-walk discovery is bounded to one level"
780            ),
781        ));
782    }
783    if stripped == ".." || stripped.starts_with("../") {
784        return Err(ManifestError::at(
785            yaml_path,
786            format!("workspace.applies_to: pattern {raw:?} must not contain `..`"),
787        ));
788    }
789    if Path::new(stripped).is_absolute() {
790        return Err(ManifestError::at(
791            yaml_path,
792            format!("workspace.applies_to: pattern {raw:?} must be relative, not absolute"),
793        ));
794    }
795    // Validate glob syntax. Construct a Glob to surface any syntax
796    // errors immediately — we don't keep the compiled form (cheap to
797    // re-compile at match time, keeps `WorkspaceConfig` Clone-cheap).
798    globset::Glob::new(stripped).map_err(|e| {
799        ManifestError::at(
800            yaml_path,
801            format!("workspace.applies_to: invalid glob pattern {raw:?}: {e}"),
802        )
803    })?;
804    Ok(stripped.to_string())
805}
806
807fn check_keys(
808    map: &serde_yaml::Mapping,
809    allowed: &[&str],
810    label: &str,
811    yaml_path: &Path,
812) -> Result<(), ManifestError> {
813    let mut unknown: Vec<String> = Vec::new();
814    for (k, _) in map {
815        let key = k.as_str().unwrap_or("<non-string-key>");
816        if !allowed.contains(&key) {
817            unknown.push(key.to_string());
818        }
819    }
820    if !unknown.is_empty() {
821        unknown.sort();
822        return Err(ManifestError::at(
823            yaml_path,
824            format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
825        ));
826    }
827    Ok(())
828}
829
830fn optional_str(
831    raw: &serde_yaml::Mapping,
832    key: &str,
833    yaml_path: &Path,
834) -> Result<Option<String>, ManifestError> {
835    match raw.get(key) {
836        None | Some(serde_yaml::Value::Null) => Ok(None),
837        Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
838        Some(_) => Err(ManifestError::at(
839            yaml_path,
840            format!("{key} must be a string"),
841        )),
842    }
843}
844
845fn build_trust(
846    raw: Option<&serde_yaml::Value>,
847    yaml_path: &Path,
848) -> Result<TrustConfig, ManifestError> {
849    let Some(raw) = raw else {
850        return Ok(TrustConfig::default());
851    };
852    let map = raw
853        .as_mapping()
854        .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
855    check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
856    let mut cfg = TrustConfig::default();
857    if let Some(v) = map.get("allow_python_tools") {
858        cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
859            ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
860        })?;
861    }
862    if let Some(v) = map.get("allow_embedder") {
863        cfg.allow_embedder = v
864            .as_bool()
865            .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
866    }
867    if let Some(v) = map.get("allow_query_preprocessor") {
868        cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
869            ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
870        })?;
871    }
872    Ok(cfg)
873}
874
875fn build_tools(
876    raw: Option<&serde_yaml::Value>,
877    yaml_path: &Path,
878) -> Result<Vec<ToolSpec>, ManifestError> {
879    let Some(raw) = raw else {
880        return Ok(Vec::new());
881    };
882    let seq = raw
883        .as_sequence()
884        .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
885    let mut tools: Vec<ToolSpec> = Vec::new();
886    let mut seen: BTreeMap<String, ()> = BTreeMap::new();
887    for (i, entry) in seq.iter().enumerate() {
888        let tool = build_tool(entry, i, yaml_path)?;
889        let name = tool.name().to_string();
890        if seen.insert(name.clone(), ()).is_some() {
891            return Err(ManifestError::at(
892                yaml_path,
893                format!("duplicate tool name: {name:?}"),
894            ));
895        }
896        tools.push(tool);
897    }
898    Ok(tools)
899}
900
901fn build_tool(
902    entry: &serde_yaml::Value,
903    idx: usize,
904    yaml_path: &Path,
905) -> Result<ToolSpec, ManifestError> {
906    let map = entry
907        .as_mapping()
908        .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
909    check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
910
911    // Kind detection. `cypher` and `python` are tool-creation kinds
912    // (operator declares a new named tool); `bundled` is a tool-
913    // override kind (operator picks a bundled tool name and customises
914    // its agent-facing surface). Exactly one must be present.
915    let has_cypher = map.contains_key("cypher");
916    let has_python = map.contains_key("python");
917    let has_bundled = map.contains_key("bundled");
918    let kinds_present: Vec<&str> = [
919        ("cypher", has_cypher),
920        ("python", has_python),
921        ("bundled", has_bundled),
922    ]
923    .into_iter()
924    .filter(|(_, p)| *p)
925    .map(|(k, _)| k)
926    .collect();
927    if kinds_present.is_empty() {
928        return Err(ManifestError::at(
929            yaml_path,
930            format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
931        ));
932    }
933    if kinds_present.len() > 1 {
934        return Err(ManifestError::at(
935            yaml_path,
936            format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
937        ));
938    }
939
940    // The `bundled` kind takes its name from the `bundled:` value
941    // itself (e.g. `bundled: cypher_query`) and forbids the
942    // tool-creation fields. Branch early so we don't run the
943    // tool-creation `name:` requirement against an override entry.
944    if has_bundled {
945        return build_bundled_override(map, idx, yaml_path);
946    }
947
948    let name = map
949        .get("name")
950        .and_then(|v| v.as_str())
951        .filter(|s| valid_identifier(s))
952        .ok_or_else(|| {
953            ManifestError::at(
954                yaml_path,
955                format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
956            )
957        })?
958        .to_string();
959
960    // `hidden:` is only valid on bundled overrides (`hidden:`-flagging
961    // a tool you're declaring inline doesn't make sense — just don't
962    // declare it). Reject early so the operator gets a clear error.
963    if map.contains_key("hidden") {
964        return Err(ManifestError::at(
965            yaml_path,
966            format!(
967                "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
968            ),
969        ));
970    }
971
972    let description = match map.get("description") {
973        None | Some(serde_yaml::Value::Null) => None,
974        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
975        Some(_) => {
976            return Err(ManifestError::at(
977                yaml_path,
978                format!("tools[{idx}] ({name:?}).description must be a string"),
979            ))
980        }
981    };
982
983    let parameters = match map.get("parameters") {
984        None | Some(serde_yaml::Value::Null) => None,
985        Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
986        Some(_) => {
987            return Err(ManifestError::at(
988                yaml_path,
989                format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
990            ))
991        }
992    };
993
994    if has_cypher {
995        let cypher = map
996            .get("cypher")
997            .and_then(|v| v.as_str())
998            .filter(|s| !s.trim().is_empty())
999            .ok_or_else(|| {
1000                ManifestError::at(
1001                    yaml_path,
1002                    format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
1003                )
1004            })?
1005            .to_string();
1006        return Ok(ToolSpec::Cypher(CypherTool {
1007            name,
1008            cypher,
1009            description,
1010            parameters,
1011        }));
1012    }
1013
1014    // python tool
1015    let python = map
1016        .get("python")
1017        .and_then(|v| v.as_str())
1018        .filter(|s| !s.is_empty())
1019        .ok_or_else(|| {
1020            ManifestError::at(
1021                yaml_path,
1022                format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
1023            )
1024        })?
1025        .to_string();
1026    let function = map
1027        .get("function")
1028        .and_then(|v| v.as_str())
1029        .filter(|s| valid_identifier(s))
1030        .ok_or_else(|| {
1031            ManifestError::at(
1032                yaml_path,
1033                format!(
1034                    "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
1035                ),
1036            )
1037        })?
1038        .to_string();
1039    Ok(ToolSpec::Python(PythonTool {
1040        name,
1041        python,
1042        function,
1043        description,
1044        parameters,
1045    }))
1046}
1047
1048/// Parse a `bundled:` override entry from `tools[idx]`. The caller
1049/// (`build_tool`) has already established that the entry has
1050/// `bundled:` set as the kind discriminator.
1051fn build_bundled_override(
1052    map: &serde_yaml::Mapping,
1053    idx: usize,
1054    yaml_path: &Path,
1055) -> Result<ToolSpec, ManifestError> {
1056    let name = map
1057        .get("bundled")
1058        .and_then(|v| v.as_str())
1059        .filter(|s| valid_identifier(s))
1060        .ok_or_else(|| {
1061            ManifestError::at(
1062                yaml_path,
1063                format!(
1064                    "tools[{idx}] `bundled:` must be a string naming a bundled tool \
1065                     (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
1066                ),
1067            )
1068        })?
1069        .to_string();
1070
1071    // Tool-creation fields are forbidden on override entries — the
1072    // override only customises an existing bundled tool's surface,
1073    // it doesn't declare a new tool. Catch these at parse time so
1074    // operators get a clear error rather than silent confusion.
1075    for forbidden in ["name", "parameters", "function"] {
1076        if map.contains_key(forbidden) {
1077            return Err(ManifestError::at(
1078                yaml_path,
1079                format!(
1080                    "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
1081                     (only `description:`, `hidden:`, and `rename:` are permitted on overrides)"
1082                ),
1083            ));
1084        }
1085    }
1086
1087    let description = match map.get("description") {
1088        None | Some(serde_yaml::Value::Null) => None,
1089        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1090        Some(_) => {
1091            return Err(ManifestError::at(
1092                yaml_path,
1093                format!("tools[{idx}] bundled override {name:?}.description must be a string"),
1094            ))
1095        }
1096    };
1097
1098    let hidden = match map.get("hidden") {
1099        None | Some(serde_yaml::Value::Null) => false,
1100        Some(serde_yaml::Value::Bool(b)) => *b,
1101        Some(_) => {
1102            return Err(ManifestError::at(
1103                yaml_path,
1104                format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
1105            ))
1106        }
1107    };
1108
1109    // 0.3.34: optional per-deployment rename. Validated as an
1110    // identifier here; cross-tool collision check is the consumer's
1111    // job (it knows what other names — bundled, cypher, python — it
1112    // has in scope).
1113    let rename = match map.get("rename") {
1114        None | Some(serde_yaml::Value::Null) => None,
1115        Some(serde_yaml::Value::String(s)) => {
1116            if !valid_identifier(s) {
1117                return Err(ManifestError::at(
1118                    yaml_path,
1119                    format!(
1120                        "tools[{idx}] bundled override {name:?}.rename must be a valid identifier \
1121                         (^[a-zA-Z_][a-zA-Z0-9_]*$), got {s:?}"
1122                    ),
1123                ));
1124            }
1125            Some(s.clone())
1126        }
1127        Some(_) => {
1128            return Err(ManifestError::at(
1129                yaml_path,
1130                format!("tools[{idx}] bundled override {name:?}.rename must be a string"),
1131            ))
1132        }
1133    };
1134
1135    Ok(ToolSpec::Bundled(BundledOverride {
1136        name,
1137        description,
1138        hidden,
1139        rename,
1140    }))
1141}
1142
1143fn build_embedder(
1144    raw: Option<&serde_yaml::Value>,
1145    yaml_path: &Path,
1146) -> Result<Option<EmbedderConfig>, ManifestError> {
1147    let Some(raw) = raw else { return Ok(None) };
1148    if matches!(raw, serde_yaml::Value::Null) {
1149        return Ok(None);
1150    }
1151    let map = raw
1152        .as_mapping()
1153        .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
1154    check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
1155    let module = map
1156        .get("module")
1157        .and_then(|v| v.as_str())
1158        .filter(|s| !s.is_empty())
1159        .ok_or_else(|| {
1160            ManifestError::at(
1161                yaml_path,
1162                "embedder.module must be a non-empty string (path or dotted name)",
1163            )
1164        })?
1165        .to_string();
1166    let class = map
1167        .get("class")
1168        .and_then(|v| v.as_str())
1169        .filter(|s| valid_identifier(s))
1170        .ok_or_else(|| {
1171            ManifestError::at(
1172                yaml_path,
1173                "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1174            )
1175        })?
1176        .to_string();
1177    let kwargs = match map.get("kwargs") {
1178        None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1179        Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1180            serde_json::Value::Object(o) => o,
1181            _ => {
1182                return Err(ManifestError::at(
1183                    yaml_path,
1184                    "embedder.kwargs must be a mapping",
1185                ))
1186            }
1187        },
1188        Some(_) => {
1189            return Err(ManifestError::at(
1190                yaml_path,
1191                "embedder.kwargs must be a mapping",
1192            ))
1193        }
1194    };
1195    Ok(Some(EmbedderConfig {
1196        module,
1197        class,
1198        kwargs,
1199    }))
1200}
1201
1202fn build_builtins(
1203    raw: Option<&serde_yaml::Value>,
1204    yaml_path: &Path,
1205) -> Result<BuiltinsConfig, ManifestError> {
1206    let Some(raw) = raw else {
1207        return Ok(BuiltinsConfig::default());
1208    };
1209    if matches!(raw, serde_yaml::Value::Null) {
1210        return Ok(BuiltinsConfig::default());
1211    }
1212    let map = raw
1213        .as_mapping()
1214        .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1215    check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1216    let mut cfg = BuiltinsConfig::default();
1217    if let Some(v) = map.get("save_graph") {
1218        cfg.save_graph = v
1219            .as_bool()
1220            .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1221    }
1222    if let Some(v) = map.get("temp_cleanup") {
1223        let s = v.as_str().ok_or_else(|| {
1224            ManifestError::at(
1225                yaml_path,
1226                format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1227            )
1228        })?;
1229        cfg.temp_cleanup = match s {
1230            "never" => TempCleanup::Never,
1231            "on_overview" => TempCleanup::OnOverview,
1232            other => {
1233                return Err(ManifestError::at(
1234                    yaml_path,
1235                    format!(
1236                        "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1237                    ),
1238                ))
1239            }
1240        };
1241    }
1242    Ok(cfg)
1243}
1244
1245fn valid_identifier(s: &str) -> bool {
1246    let mut chars = s.chars();
1247    match chars.next() {
1248        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1249        _ => return false,
1250    }
1251    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1252}
1253
1254fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1255    serde_json::to_value(&v)
1256        .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1257}
1258
1259#[derive(Debug, Deserialize)]
1260struct _Reserved;
1261
1262#[cfg(test)]
1263mod tests {
1264    use super::*;
1265
1266    fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1267        let mut f = tempfile::NamedTempFile::new().unwrap();
1268        std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1269        f
1270    }
1271
1272    #[test]
1273    fn loads_minimal_empty_manifest() {
1274        let f = write_tmp("");
1275        let m = load(f.path()).unwrap();
1276        assert_eq!(m.tools.len(), 0);
1277        assert_eq!(m.source_roots.len(), 0);
1278        assert!(!m.trust.allow_python_tools);
1279        assert!(!m.trust.allow_embedder);
1280        assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1281    }
1282
1283    #[test]
1284    fn loads_name_and_instructions() {
1285        let f = write_tmp("name: Demo\ninstructions: |\n  multi-line\n  block\n");
1286        let m = load(f.path()).unwrap();
1287        assert_eq!(m.name.as_deref(), Some("Demo"));
1288        assert!(m.instructions.unwrap().contains("multi-line"));
1289    }
1290
1291    #[test]
1292    fn rejects_unknown_top_key() {
1293        let f = write_tmp("bogus: 1\n");
1294        let err = load(f.path()).unwrap_err();
1295        assert!(err.message.contains("unknown top-level"));
1296    }
1297
1298    #[test]
1299    fn source_root_string_normalises_to_list() {
1300        let f = write_tmp("source_root: ./data\n");
1301        let m = load(f.path()).unwrap();
1302        assert_eq!(m.source_roots, vec!["./data".to_string()]);
1303    }
1304
1305    #[test]
1306    fn source_roots_list_preserved() {
1307        let f = write_tmp("source_roots:\n  - ./a\n  - ./b\n");
1308        let m = load(f.path()).unwrap();
1309        assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1310    }
1311
1312    #[test]
1313    fn rejects_both_source_root_and_source_roots() {
1314        let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1315        assert!(load(f.path()).unwrap_err().message.contains("not both"));
1316    }
1317
1318    #[test]
1319    fn cypher_tool_parses() {
1320        let f = write_tmp("tools:\n  - name: lookup\n    cypher: MATCH (n) RETURN n\n");
1321        let m = load(f.path()).unwrap();
1322        assert_eq!(m.tools.len(), 1);
1323        match &m.tools[0] {
1324            ToolSpec::Cypher(t) => {
1325                assert_eq!(t.name, "lookup");
1326                assert!(t.cypher.contains("MATCH"));
1327            }
1328            _ => panic!("expected cypher tool"),
1329        }
1330    }
1331
1332    #[test]
1333    fn python_tool_parses() {
1334        let f =
1335            write_tmp("tools:\n  - name: detail\n    python: ./tools.py\n    function: detail\n");
1336        let m = load(f.path()).unwrap();
1337        match &m.tools[0] {
1338            ToolSpec::Python(t) => {
1339                assert_eq!(t.python, "./tools.py");
1340                assert_eq!(t.function, "detail");
1341            }
1342            _ => panic!("expected python tool"),
1343        }
1344    }
1345
1346    #[test]
1347    fn rejects_tool_with_both_kinds() {
1348        let f = write_tmp(
1349            "tools:\n  - name: x\n    cypher: 'MATCH (n) RETURN n'\n    python: ./t.py\n    function: x\n",
1350        );
1351        assert!(load(f.path())
1352            .unwrap_err()
1353            .message
1354            .contains("multiple kinds"));
1355    }
1356
1357    #[test]
1358    fn rejects_tool_with_no_kind() {
1359        let f = write_tmp("tools:\n  - name: x\n");
1360        assert!(load(f.path())
1361            .unwrap_err()
1362            .message
1363            .contains("needs exactly one"));
1364    }
1365
1366    #[test]
1367    fn rejects_duplicate_tool_names() {
1368        let f = write_tmp(
1369            "tools:\n  - name: same\n    cypher: 'MATCH (n) RETURN n'\n  - name: same\n    cypher: 'MATCH (m) RETURN m'\n",
1370        );
1371        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1372    }
1373
1374    // ─── Bundled override shape (0.3.31) ────────────────────────
1375
1376    #[test]
1377    fn bundled_override_with_description_parses() {
1378        let f =
1379            write_tmp("tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n");
1380        let m = load(f.path()).unwrap();
1381        assert_eq!(m.tools.len(), 1);
1382        match &m.tools[0] {
1383            ToolSpec::Bundled(b) => {
1384                assert_eq!(b.name, "repo_management");
1385                assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1386                assert!(!b.hidden);
1387            }
1388            _ => panic!("expected bundled override"),
1389        }
1390    }
1391
1392    #[test]
1393    fn bundled_override_with_hidden_parses() {
1394        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: true\n");
1395        let m = load(f.path()).unwrap();
1396        match &m.tools[0] {
1397            ToolSpec::Bundled(b) => {
1398                assert_eq!(b.name, "ping");
1399                assert!(b.hidden);
1400                assert!(b.description.is_none());
1401            }
1402            _ => panic!("expected bundled override"),
1403        }
1404    }
1405
1406    #[test]
1407    fn bundled_override_alongside_cypher_tools_parses() {
1408        let f = write_tmp(
1409            "tools:\n\
1410             \x20\x20- bundled: cypher_query\n\
1411             \x20\x20\x20\x20description: \"Custom server description\"\n\
1412             \x20\x20- name: lookup\n\
1413             \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1414        );
1415        let m = load(f.path()).unwrap();
1416        assert_eq!(m.tools.len(), 2);
1417        assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1418        assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1419    }
1420
1421    #[test]
1422    fn rejects_bundled_with_cypher_kind() {
1423        let f =
1424            write_tmp("tools:\n  - bundled: cypher_query\n    cypher: \"MATCH (n) RETURN n\"\n");
1425        let err = load(f.path()).unwrap_err();
1426        assert!(
1427            err.message.contains("multiple kinds"),
1428            "got: {}",
1429            err.message
1430        );
1431    }
1432
1433    #[test]
1434    fn rejects_bundled_with_name_field() {
1435        let f = write_tmp("tools:\n  - bundled: ping\n    name: ping\n");
1436        let err = load(f.path()).unwrap_err();
1437        assert!(
1438            err.message.contains("cannot set `name:`"),
1439            "got: {}",
1440            err.message
1441        );
1442    }
1443
1444    #[test]
1445    fn rejects_bundled_with_parameters_field() {
1446        let f =
1447            write_tmp("tools:\n  - bundled: cypher_query\n    parameters:\n      type: object\n");
1448        let err = load(f.path()).unwrap_err();
1449        assert!(
1450            err.message.contains("cannot set `parameters:`"),
1451            "got: {}",
1452            err.message
1453        );
1454    }
1455
1456    #[test]
1457    fn rejects_bundled_with_non_bool_hidden() {
1458        let f = write_tmp("tools:\n  - bundled: ping\n    hidden: yes-please\n");
1459        let err = load(f.path()).unwrap_err();
1460        assert!(
1461            err.message.contains("hidden must be a bool"),
1462            "got: {}",
1463            err.message
1464        );
1465    }
1466
1467    #[test]
1468    fn rejects_hidden_on_cypher_tool() {
1469        let f = write_tmp(
1470            "tools:\n  - name: lookup\n    cypher: \"MATCH (n) RETURN n\"\n    hidden: true\n",
1471        );
1472        let err = load(f.path()).unwrap_err();
1473        assert!(
1474            err.message
1475                .contains("`hidden:` is only valid on `bundled:` override entries"),
1476            "got: {}",
1477            err.message
1478        );
1479    }
1480
1481    #[test]
1482    fn rejects_duplicate_bundled_overrides() {
1483        // The dedup check is on tool name; two `bundled: ping` entries
1484        // share the same name and should be rejected the same way
1485        // duplicate cypher tools are.
1486        let f = write_tmp(
1487            "tools:\n  - bundled: ping\n    hidden: true\n  - bundled: ping\n    description: \"x\"\n",
1488        );
1489        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1490    }
1491
1492    #[test]
1493    fn rejects_bundled_with_invalid_identifier() {
1494        let f = write_tmp("tools:\n  - bundled: \"123-bad\"\n    hidden: true\n");
1495        let err = load(f.path()).unwrap_err();
1496        assert!(
1497            err.message.contains("must be a string"),
1498            "got: {}",
1499            err.message
1500        );
1501    }
1502
1503    // 0.3.34 — `tools[].bundled: rename:` per-deployment override
1504    #[test]
1505    fn bundled_rename_parses_when_valid_identifier() {
1506        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n");
1507        let m = load(f.path()).unwrap();
1508        match &m.tools[0] {
1509            ToolSpec::Bundled(b) => {
1510                assert_eq!(b.name, "cypher_query");
1511                assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1512                assert!(!b.hidden);
1513                assert!(b.description.is_none());
1514            }
1515            _ => panic!("expected bundled override"),
1516        }
1517    }
1518
1519    #[test]
1520    fn bundled_rename_alongside_description_parses() {
1521        let f = write_tmp(
1522            "tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n    description: \"Legal-corpus cypher\"\n",
1523        );
1524        let m = load(f.path()).unwrap();
1525        match &m.tools[0] {
1526            ToolSpec::Bundled(b) => {
1527                assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1528                assert_eq!(b.description.as_deref(), Some("Legal-corpus cypher"));
1529            }
1530            _ => panic!("expected bundled override"),
1531        }
1532    }
1533
1534    #[test]
1535    fn bundled_rename_defaults_to_none() {
1536        let f = write_tmp("tools:\n  - bundled: cypher_query\n    description: \"x\"\n");
1537        let m = load(f.path()).unwrap();
1538        match &m.tools[0] {
1539            ToolSpec::Bundled(b) => assert!(b.rename.is_none()),
1540            _ => panic!("expected bundled override"),
1541        }
1542    }
1543
1544    #[test]
1545    fn rejects_bundled_rename_with_invalid_identifier() {
1546        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: \"123-bad\"\n");
1547        let err = load(f.path()).unwrap_err();
1548        assert!(
1549            err.message.contains("rename must be a valid identifier"),
1550            "got: {}",
1551            err.message
1552        );
1553    }
1554
1555    #[test]
1556    fn rejects_bundled_rename_with_non_string_value() {
1557        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: 42\n");
1558        let err = load(f.path()).unwrap_err();
1559        assert!(
1560            err.message.contains("rename must be a string"),
1561            "got: {}",
1562            err.message
1563        );
1564    }
1565
1566    #[test]
1567    fn bundled_rename_serialises_to_json() {
1568        let f = write_tmp("tools:\n  - bundled: cypher_query\n    rename: legal_cypher_query\n");
1569        let m = load(f.path()).unwrap();
1570        let json = m.to_json();
1571        let tools = json.get("tools").and_then(|t| t.as_array()).unwrap();
1572        let entry = &tools[0];
1573        assert_eq!(entry.get("kind").and_then(|v| v.as_str()), Some("bundled"));
1574        assert_eq!(
1575            entry.get("name").and_then(|v| v.as_str()),
1576            Some("cypher_query")
1577        );
1578        assert_eq!(
1579            entry.get("rename").and_then(|v| v.as_str()),
1580            Some("legal_cypher_query")
1581        );
1582    }
1583
1584    #[test]
1585    fn bundled_override_to_json_shape() {
1586        let f = write_tmp(
1587            "tools:\n  - bundled: repo_management\n    description: \"FIRST STEP\"\n    hidden: false\n",
1588        );
1589        let m = load(f.path()).unwrap();
1590        let v = m.to_json();
1591        assert_eq!(v["tools"][0]["kind"], "bundled");
1592        assert_eq!(v["tools"][0]["name"], "repo_management");
1593        assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1594        assert_eq!(v["tools"][0]["hidden"], false);
1595    }
1596
1597    #[test]
1598    fn embedder_parses() {
1599        let f = write_tmp(
1600            "embedder:\n  module: ./e.py\n  class: GraphEmbedder\n  kwargs:\n    cooldown: 900\n",
1601        );
1602        let m = load(f.path()).unwrap();
1603        let e = m.embedder.unwrap();
1604        assert_eq!(e.module, "./e.py");
1605        assert_eq!(e.class, "GraphEmbedder");
1606        assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1607    }
1608
1609    #[test]
1610    fn builtins_parses_temp_cleanup() {
1611        let f = write_tmp("builtins:\n  save_graph: true\n  temp_cleanup: on_overview\n");
1612        let m = load(f.path()).unwrap();
1613        assert!(m.builtins.save_graph);
1614        assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1615    }
1616
1617    #[test]
1618    fn rejects_invalid_temp_cleanup() {
1619        let f = write_tmp("builtins:\n  temp_cleanup: nuke\n");
1620        assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1621    }
1622
1623    #[test]
1624    fn allow_embedder_trust_parses() {
1625        let f = write_tmp("trust:\n  allow_embedder: true\n");
1626        let m = load(f.path()).unwrap();
1627        assert!(m.trust.allow_embedder);
1628    }
1629
1630    #[test]
1631    fn allow_query_preprocessor_trust_parses() {
1632        let f = write_tmp("trust:\n  allow_query_preprocessor: true\n");
1633        let m = load(f.path()).unwrap();
1634        assert!(m.trust.allow_query_preprocessor);
1635        assert!(!m.trust.allow_embedder);
1636        assert!(!m.trust.allow_python_tools);
1637    }
1638
1639    #[test]
1640    fn allow_query_preprocessor_rejects_non_bool() {
1641        let f = write_tmp("trust:\n  allow_query_preprocessor: \"yes\"\n");
1642        let err = load(f.path()).unwrap_err();
1643        assert!(err
1644            .message
1645            .contains("allow_query_preprocessor must be a bool"));
1646    }
1647
1648    #[test]
1649    fn find_sibling_works() {
1650        let dir = tempfile::tempdir().unwrap();
1651        let graph = dir.path().join("demo.kgl");
1652        std::fs::write(&graph, b"\x00").unwrap();
1653        let sibling = dir.path().join("demo_mcp.yaml");
1654        std::fs::write(&sibling, "name: x\n").unwrap();
1655        assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1656    }
1657
1658    #[test]
1659    fn workspace_local_parses() {
1660        let f = write_tmp("workspace:\n  kind: local\n  root: ./src\n  watch: true\n");
1661        let m = load(f.path()).unwrap();
1662        let w = m.workspace.unwrap();
1663        assert_eq!(w.kind, WorkspaceKind::Local);
1664        assert_eq!(w.root.as_deref(), Some("./src"));
1665        assert!(w.watch);
1666    }
1667
1668    #[test]
1669    fn workspace_github_default_kind() {
1670        let f = write_tmp("workspace: {}\n");
1671        let m = load(f.path()).unwrap();
1672        let w = m.workspace.unwrap();
1673        assert_eq!(w.kind, WorkspaceKind::Github);
1674        assert!(w.root.is_none());
1675        assert!(!w.watch);
1676    }
1677
1678    #[test]
1679    fn workspace_local_without_root_errors() {
1680        let f = write_tmp("workspace:\n  kind: local\n");
1681        let err = load(f.path()).unwrap_err();
1682        assert!(err.message.contains("requires workspace.root"));
1683    }
1684
1685    #[test]
1686    fn workspace_unknown_key_rejected() {
1687        let f = write_tmp("workspace:\n  kind: local\n  root: ./x\n  bogus: 1\n");
1688        let err = load(f.path()).unwrap_err();
1689        assert!(err.message.contains("unknown workspace keys"));
1690    }
1691
1692    #[test]
1693    fn workspace_invalid_kind_rejected() {
1694        let f = write_tmp("workspace:\n  kind: docker\n  root: ./x\n");
1695        let err = load(f.path()).unwrap_err();
1696        assert!(err.message.contains("workspace.kind"));
1697    }
1698
1699    #[test]
1700    fn workspace_watch_invalid_for_github() {
1701        let f = write_tmp("workspace:\n  kind: github\n  watch: true\n");
1702        let err = load(f.path()).unwrap_err();
1703        assert!(err.message.contains("watch is only valid"));
1704    }
1705
1706    #[test]
1707    fn extensions_passthrough_parses() {
1708        let f = write_tmp(
1709            "extensions:\n  csv_http_server: true\n  csv_http_server_dir: temp/\n  arbitrary:\n    nested: 1\n",
1710        );
1711        let m = load(f.path()).unwrap();
1712        assert_eq!(
1713            m.extensions
1714                .get("csv_http_server")
1715                .and_then(|v| v.as_bool()),
1716            Some(true)
1717        );
1718        assert_eq!(
1719            m.extensions
1720                .get("csv_http_server_dir")
1721                .and_then(|v| v.as_str()),
1722            Some("temp/")
1723        );
1724        // Nested values pass through unchanged.
1725        assert_eq!(
1726            m.extensions
1727                .get("arbitrary")
1728                .and_then(|v| v.get("nested"))
1729                .and_then(|v| v.as_i64()),
1730            Some(1)
1731        );
1732    }
1733
1734    #[test]
1735    fn extensions_absent_defaults_to_empty() {
1736        let f = write_tmp("name: x\n");
1737        let m = load(f.path()).unwrap();
1738        assert!(m.extensions.is_empty());
1739    }
1740
1741    #[test]
1742    fn extensions_inner_keys_unvalidated() {
1743        // The framework intentionally does NOT validate keys inside
1744        // `extensions:` — they're downstream-binary concerns. Any shape
1745        // that's a YAML mapping must round-trip.
1746        let f = write_tmp(
1747            "extensions:\n  whatever_kglite_wants: foo\n  some_other_consumer: { a: 1, b: 2 }\n",
1748        );
1749        load(f.path()).unwrap();
1750    }
1751
1752    #[test]
1753    fn extensions_must_be_a_mapping() {
1754        let f = write_tmp("extensions: not-a-mapping\n");
1755        let err = load(f.path()).unwrap_err();
1756        assert!(err.message.contains("extensions must be a mapping"));
1757    }
1758
1759    #[test]
1760    fn env_file_key_parses() {
1761        let f = write_tmp("env_file: ../.env\n");
1762        let m = load(f.path()).unwrap();
1763        assert_eq!(m.env_file.as_deref(), Some("../.env"));
1764    }
1765
1766    #[test]
1767    fn env_file_unset_is_none() {
1768        let f = write_tmp("name: Demo\n");
1769        let m = load(f.path()).unwrap();
1770        assert!(m.env_file.is_none());
1771    }
1772
1773    #[test]
1774    fn find_workspace_works() {
1775        let dir = tempfile::tempdir().unwrap();
1776        let manifest = dir.path().join("workspace_mcp.yaml");
1777        std::fs::write(&manifest, "name: ws\n").unwrap();
1778        assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1779    }
1780
1781    #[test]
1782    fn find_workspace_walks_one_level_up_with_applies_to() {
1783        // Layout: <tmp>/parent/workspace_mcp.yaml (declares
1784        // workspace.applies_to: ./repos) + <tmp>/parent/repos/.
1785        // Discovery from <tmp>/parent/repos/ should walk up one level
1786        // and find the sibling manifest because applies_to matches.
1787        let dir = tempfile::tempdir().unwrap();
1788        let parent = dir.path().join("parent");
1789        std::fs::create_dir(&parent).unwrap();
1790        let manifest = parent.join("workspace_mcp.yaml");
1791        std::fs::write(
1792            &manifest,
1793            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1794        )
1795        .unwrap();
1796        let repos = parent.join("repos");
1797        std::fs::create_dir(&repos).unwrap();
1798
1799        // Primary location still works.
1800        assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1801
1802        // Parent-walk fallback resolves to the same manifest. Compare
1803        // canonicalised paths to handle macOS /private/var vs /var.
1804        let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1805        assert_eq!(
1806            found.canonicalize().unwrap(),
1807            manifest.canonicalize().unwrap()
1808        );
1809    }
1810
1811    #[test]
1812    fn find_workspace_ignores_parent_without_applies_to() {
1813        // Parent manifest exists but does NOT declare workspace.applies_to.
1814        // The parent-walk fallback must refuse to auto-detect it —
1815        // otherwise an unrelated workspace_mcp.yaml in a sibling dir
1816        // could surprise-attach to whatever --workspace path the
1817        // operator passes. Safe default: require the opt-in.
1818        let dir = tempfile::tempdir().unwrap();
1819        let parent = dir.path().join("parent");
1820        std::fs::create_dir(&parent).unwrap();
1821        let manifest = parent.join("workspace_mcp.yaml");
1822        std::fs::write(&manifest, "name: not for repos\n").unwrap();
1823        let repos = parent.join("repos");
1824        std::fs::create_dir(&repos).unwrap();
1825
1826        assert_eq!(
1827            find_workspace_manifest(&repos),
1828            None,
1829            "parent manifest without workspace.applies_to must NOT auto-attach"
1830        );
1831    }
1832
1833    #[test]
1834    fn find_workspace_ignores_parent_with_mismatched_applies_to() {
1835        // Parent manifest declares applies_to: ./repos but the
1836        // actual --workspace path is ./other_dir. The mismatch must
1837        // suppress auto-detection.
1838        let dir = tempfile::tempdir().unwrap();
1839        let parent = dir.path().join("parent");
1840        std::fs::create_dir(&parent).unwrap();
1841        let manifest = parent.join("workspace_mcp.yaml");
1842        std::fs::write(
1843            &manifest,
1844            "workspace:\n  kind: github\n  applies_to: ./repos\n",
1845        )
1846        .unwrap();
1847        let other = parent.join("other_dir");
1848        std::fs::create_dir(&other).unwrap();
1849
1850        assert_eq!(
1851            find_workspace_manifest(&other),
1852            None,
1853            "applies_to: ./repos must NOT match --workspace ./other_dir"
1854        );
1855    }
1856
1857    #[test]
1858    fn find_workspace_applies_to_wildcard_matches_any_child() {
1859        // applies_to: '*' (or './*') means "any direct child of the
1860        // manifest's parent dir." Three different child names should
1861        // all auto-detect the manifest.
1862        let dir = tempfile::tempdir().unwrap();
1863        let parent = dir.path().join("parent");
1864        std::fs::create_dir(&parent).unwrap();
1865        let manifest = parent.join("workspace_mcp.yaml");
1866        std::fs::write(&manifest, "workspace:\n  kind: github\n  applies_to: '*'\n").unwrap();
1867        for child_name in ["repos", "clones", "totally-different-name"] {
1868            let child = parent.join(child_name);
1869            std::fs::create_dir(&child).unwrap();
1870            let found =
1871                find_workspace_manifest(&child).expect("wildcard should match any direct child");
1872            assert_eq!(
1873                found.canonicalize().unwrap(),
1874                manifest.canonicalize().unwrap(),
1875                "wildcard should match child {child_name:?}"
1876            );
1877        }
1878    }
1879
1880    #[test]
1881    fn find_workspace_applies_to_glob_matches_prefix() {
1882        // applies_to: './prod-*' should match any direct child whose
1883        // basename starts with "prod-".
1884        let dir = tempfile::tempdir().unwrap();
1885        let parent = dir.path().join("parent");
1886        std::fs::create_dir(&parent).unwrap();
1887        let manifest = parent.join("workspace_mcp.yaml");
1888        std::fs::write(
1889            &manifest,
1890            "workspace:\n  kind: github\n  applies_to: ./prod-*\n",
1891        )
1892        .unwrap();
1893        // Match cases.
1894        for child_name in ["prod-api", "prod-web", "prod-"] {
1895            let child = parent.join(child_name);
1896            std::fs::create_dir(&child).unwrap();
1897            assert!(
1898                find_workspace_manifest(&child).is_some(),
1899                "prod-* should match {child_name:?}"
1900            );
1901        }
1902        // Non-match cases.
1903        for child_name in ["test-api", "stage-web", "random"] {
1904            let child = parent.join(child_name);
1905            std::fs::create_dir(&child).unwrap();
1906            assert_eq!(
1907                find_workspace_manifest(&child),
1908                None,
1909                "prod-* should NOT match {child_name:?}"
1910            );
1911        }
1912    }
1913
1914    #[test]
1915    fn find_workspace_applies_to_list_matches_any_entry() {
1916        // applies_to: [./repos, ./clones] should match either name
1917        // but reject anything else.
1918        let dir = tempfile::tempdir().unwrap();
1919        let parent = dir.path().join("parent");
1920        std::fs::create_dir(&parent).unwrap();
1921        let manifest = parent.join("workspace_mcp.yaml");
1922        std::fs::write(
1923            &manifest,
1924            "workspace:\n  kind: github\n  applies_to:\n    - ./repos\n    - ./clones\n",
1925        )
1926        .unwrap();
1927        for matching in ["repos", "clones"] {
1928            let child = parent.join(matching);
1929            std::fs::create_dir(&child).unwrap();
1930            assert!(
1931                find_workspace_manifest(&child).is_some(),
1932                "list should match {matching:?}"
1933            );
1934        }
1935        let other = parent.join("scratch");
1936        std::fs::create_dir(&other).unwrap();
1937        assert_eq!(
1938            find_workspace_manifest(&other),
1939            None,
1940            "list with [repos, clones] must NOT match scratch"
1941        );
1942    }
1943
1944    #[test]
1945    fn applies_to_rejects_deep_path_at_parse_time() {
1946        let f = write_tmp("workspace:\n  kind: github\n  applies_to: ./too/deep/path\n");
1947        let err = load(f.path()).unwrap_err();
1948        assert!(
1949            err.message.contains("must be a single path segment"),
1950            "got: {}",
1951            err.message
1952        );
1953    }
1954
1955    #[test]
1956    fn applies_to_rejects_invalid_glob_at_parse_time() {
1957        // globset rejects unterminated character class.
1958        let f = write_tmp("workspace:\n  kind: github\n  applies_to: './[unterminated'\n");
1959        let err = load(f.path()).unwrap_err();
1960        assert!(
1961            err.message.contains("invalid glob pattern"),
1962            "got: {}",
1963            err.message
1964        );
1965    }
1966
1967    #[test]
1968    fn applies_to_rejects_parent_relative() {
1969        // Bare `..` is caught by the `..` rejection branch. The
1970        // multi-segment form `../foo` is caught earlier by the
1971        // single-segment check; either is rejected.
1972        let f = write_tmp("workspace:\n  kind: github\n  applies_to: '..'\n");
1973        let err = load(f.path()).unwrap_err();
1974        assert!(err.message.contains("must not contain `..`"));
1975
1976        let f2 = write_tmp("workspace:\n  kind: github\n  applies_to: '../up'\n");
1977        let err2 = load(f2.path()).unwrap_err();
1978        assert!(err2.message.contains("must be a single path segment"));
1979    }
1980
1981    #[test]
1982    fn find_workspace_returns_none_when_missing_everywhere() {
1983        let dir = tempfile::tempdir().unwrap();
1984        let child = dir.path().join("child");
1985        std::fs::create_dir(&child).unwrap();
1986        // No manifest in either child or its parent (tmpdir root).
1987        assert_eq!(find_workspace_manifest(&child), None);
1988    }
1989
1990    #[test]
1991    fn find_workspace_primary_wins_over_parent_fallback() {
1992        // Both primary AND parent-fallback exist. The primary must
1993        // win — this anchors the precedence rule documented on
1994        // `find_workspace_manifest`. The parent declares applies_to
1995        // matching the child dir, so it WOULD be a valid fallback —
1996        // but the primary preempts it. If a future refactor swaps
1997        // the order, this test fails loudly.
1998        let dir = tempfile::tempdir().unwrap();
1999        let parent_manifest = dir.path().join("workspace_mcp.yaml");
2000        std::fs::write(
2001            &parent_manifest,
2002            "workspace:\n  kind: github\n  applies_to: ./repos\n",
2003        )
2004        .unwrap();
2005        let child = dir.path().join("repos");
2006        std::fs::create_dir(&child).unwrap();
2007        let child_manifest = child.join("workspace_mcp.yaml");
2008        std::fs::write(&child_manifest, "name: child\n").unwrap();
2009
2010        // Discovery from `child` should return the child manifest,
2011        // NOT the parent's. Compare canonicalised to handle the
2012        // macOS /private/var vs /var symlink consistently.
2013        let found = find_workspace_manifest(&child).expect("primary should resolve");
2014        assert_eq!(
2015            found.canonicalize().unwrap(),
2016            child_manifest.canonicalize().unwrap(),
2017            "primary location must win when both primary and parent fallback exist"
2018        );
2019    }
2020
2021    #[test]
2022    fn to_json_shape_is_stable() {
2023        let f = write_tmp(
2024            r#"
2025name: KGLite Codebase
2026source_roots: [src, lib]
2027trust:
2028  allow_embedder: true
2029embedder:
2030  module: kglite.embed
2031  class: SentenceTransformerEmbedder
2032builtins:
2033  save_graph: true
2034  temp_cleanup: on_overview
2035"#,
2036        );
2037        let m = load(f.path()).unwrap();
2038        let actual = m.to_json();
2039        let expected = serde_json::json!({
2040            "yaml_path": f.path().display().to_string(),
2041            "name": "KGLite Codebase",
2042            "instructions": null,
2043            "overview_prefix": null,
2044            "source_roots": ["src", "lib"],
2045            "trust": {
2046                "allow_python_tools": false,
2047                "allow_embedder": true,
2048                "allow_query_preprocessor": false,
2049            },
2050            "tools": [],
2051            "embedder": {
2052                "module": "kglite.embed",
2053                "class": "SentenceTransformerEmbedder",
2054                "kwargs": {},
2055            },
2056            "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
2057            "env_file": null,
2058            "workspace": null,
2059            "extensions": {},
2060        });
2061        assert_eq!(actual, expected);
2062    }
2063
2064    #[test]
2065    fn to_json_round_trips_tools_and_workspace() {
2066        let f = write_tmp(
2067            r#"
2068name: Full Surface
2069source_root: ./src
2070trust:
2071  allow_python_tools: true
2072tools:
2073  - name: nodes_for
2074    cypher: "MATCH (n {name: $name}) RETURN n"
2075    description: "fetch nodes by name"
2076  - name: run_query
2077    python: tools.py
2078    function: run
2079workspace:
2080  kind: local
2081  root: /tmp/ws
2082  watch: true
2083builtins:
2084  save_graph: false
2085env_file: .env.local
2086extensions:
2087  kglite:
2088    flavour: standard
2089"#,
2090        );
2091        let m = load(f.path()).unwrap();
2092        let v = m.to_json();
2093        assert_eq!(v["name"], "Full Surface");
2094        assert_eq!(v["trust"]["allow_python_tools"], true);
2095        assert_eq!(v["workspace"]["kind"], "local");
2096        assert_eq!(v["workspace"]["root"], "/tmp/ws");
2097        assert_eq!(v["workspace"]["watch"], true);
2098        assert_eq!(v["env_file"], ".env.local");
2099        assert_eq!(v["tools"][0]["kind"], "cypher");
2100        assert_eq!(v["tools"][0]["name"], "nodes_for");
2101        assert_eq!(v["tools"][1]["kind"], "python");
2102        assert_eq!(v["tools"][1]["name"], "run_query");
2103        assert_eq!(v["tools"][1]["python"], "tools.py");
2104        assert_eq!(v["tools"][1]["function"], "run");
2105        assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
2106    }
2107}