Skip to main content

mcp_methods/server/
manifest.rs

1//! YAML manifest schema + loader.
2//!
3//! A manifest is a YAML file declaring the tools, source roots, custom
4//! embedder, and trust gates the server should apply. The loader parses,
5//! validates, and returns a [`Manifest`]; consumers (CLI wiring, tool
6//! registration) operate on the validated structure.
7//!
8//! Path strings (`source_root`, `python:` tool paths, embedder module)
9//! are kept as the raw user input — relative-to-yaml resolution happens
10//! at the use site so the data stays pure and testable.
11//!
12//! Validation is fail-fast and user-facing: the caller surfaces
13//! [`ManifestError`] messages directly to the operator.
14//!
15//! Schema mirrors the Python `kglite.mcp_server.manifest` module 1:1 so
16//! a manifest written for the Python server boots unchanged on the new
17//! Rust server.
18
19// A handful of fields/helpers are exposed for downstream consumers
20// (e.g. kglite-mcp-server reads `CypherTool::cypher` directly when
21// registering manifest-declared tools) and so look unused from this
22// crate's perspective. Silence dead-code warnings rather than chase
23// every cross-crate use.
24#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34    "name",
35    "instructions",
36    "overview_prefix",
37    "source_root",
38    "source_roots",
39    "trust",
40    "tools",
41    "embedder",
42    "builtins",
43    "env_file",
44    "workspace",
45    "extensions",
46];
47const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch"];
48const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
49const ALLOWED_TRUST_KEYS: &[&str] = &[
50    "allow_python_tools",
51    "allow_embedder",
52    "allow_query_preprocessor",
53];
54const ALLOWED_TOOL_KEYS: &[&str] = &[
55    "name",
56    "description",
57    "parameters",
58    "cypher",
59    "python",
60    "function",
61];
62const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
63const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
64const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
65
66#[derive(Debug, Error)]
67#[error("{path}: {message}")]
68pub struct ManifestError {
69    pub path: String,
70    pub message: String,
71}
72
73impl ManifestError {
74    pub fn at(path: &Path, message: impl Into<String>) -> Self {
75        Self {
76            path: path.display().to_string(),
77            message: message.into(),
78        }
79    }
80
81    pub fn bare(message: impl Into<String>) -> Self {
82        Self {
83            path: "<manifest>".to_string(),
84            message: message.into(),
85        }
86    }
87}
88
89#[derive(Debug, Default, Clone)]
90pub struct TrustConfig {
91    pub allow_python_tools: bool,
92    pub allow_embedder: bool,
93    /// Advisory gate: the manifest declares that an extension-defined
94    /// query preprocessor hook is permitted to run. The framework does
95    /// not parse or execute the preprocessor itself — it lives in the
96    /// opaque `extensions:` passthrough — but downstream consumers
97    /// (e.g. kglite-mcp-server) read this flag and refuse to boot the
98    /// hook when it is false. Same pattern as `allow_embedder`.
99    pub allow_query_preprocessor: bool,
100}
101
102#[derive(Debug, Clone)]
103pub enum ToolSpec {
104    Cypher(CypherTool),
105    Python(PythonTool),
106}
107
108impl ToolSpec {
109    pub fn name(&self) -> &str {
110        match self {
111            ToolSpec::Cypher(t) => &t.name,
112            ToolSpec::Python(t) => &t.name,
113        }
114    }
115}
116
117#[derive(Debug, Clone)]
118pub struct CypherTool {
119    pub name: String,
120    pub cypher: String,
121    pub description: Option<String>,
122    pub parameters: Option<serde_json::Value>,
123}
124
125#[derive(Debug, Clone)]
126pub struct PythonTool {
127    pub name: String,
128    pub python: String,
129    pub function: String,
130    pub description: Option<String>,
131    pub parameters: Option<serde_json::Value>,
132}
133
134#[derive(Debug, Clone)]
135pub struct EmbedderConfig {
136    pub module: String,
137    pub class: String,
138    pub kwargs: serde_json::Map<String, serde_json::Value>,
139}
140
141#[derive(Debug, Default, Clone)]
142pub struct BuiltinsConfig {
143    pub save_graph: bool,
144    pub temp_cleanup: TempCleanup,
145}
146
147#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
148pub enum TempCleanup {
149    #[default]
150    Never,
151    OnOverview,
152}
153
154impl TempCleanup {
155    pub fn as_str(&self) -> &'static str {
156        match self {
157            TempCleanup::Never => "never",
158            TempCleanup::OnOverview => "on_overview",
159        }
160    }
161}
162
163#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
164pub enum WorkspaceKind {
165    /// Clone-and-track GitHub repos. The default when no `workspace:`
166    /// block is set and the operator passed `--workspace DIR`.
167    #[default]
168    Github,
169    /// Bind a fixed local directory as the active source root. No
170    /// cloning happens; `set_root_dir(path)` swaps the active root.
171    Local,
172}
173
174impl WorkspaceKind {
175    pub fn as_str(&self) -> &'static str {
176        match self {
177            WorkspaceKind::Github => "github",
178            WorkspaceKind::Local => "local",
179        }
180    }
181}
182
183#[derive(Debug, Clone, Default)]
184pub struct WorkspaceConfig {
185    pub kind: WorkspaceKind,
186    /// Local-mode only: path to the directory to bind as the source
187    /// root. Relative paths resolve against the YAML's parent dir.
188    pub root: Option<String>,
189    /// Local-mode only: wire the framework's file watcher to `root`
190    /// (debounced rebuild trigger via the post-activate hook).
191    pub watch: bool,
192}
193
194#[derive(Debug, Clone)]
195pub struct Manifest {
196    pub yaml_path: PathBuf,
197    pub name: Option<String>,
198    pub instructions: Option<String>,
199    pub overview_prefix: Option<String>,
200    pub source_roots: Vec<String>,
201    pub trust: TrustConfig,
202    pub tools: Vec<ToolSpec>,
203    pub embedder: Option<EmbedderConfig>,
204    pub builtins: BuiltinsConfig,
205    /// Optional explicit `.env` path (relative to the YAML or absolute).
206    /// When unset, the runtime walks upward from the start directory
207    /// looking for a `.env` file.
208    pub env_file: Option<String>,
209    /// Optional explicit workspace declaration. When set, this wins
210    /// over CLI `--workspace`/`--source-root` flags interpretation
211    /// (manifest is the source of truth — same rule as `source_root:`).
212    pub workspace: Option<WorkspaceConfig>,
213    /// Raw passthrough for downstream-binary-specific manifest keys.
214    /// The framework accepts any mapping under `extensions:` and stores
215    /// it here without validating the inner keys; downstream consumers
216    /// (e.g. kglite-mcp-server) read whatever they need from this map.
217    ///
218    /// This keeps the framework's strict-unknown-key validation strong
219    /// for the surfaces it owns (`builtins`, `workspace`, …) while
220    /// letting consumers add their own configuration namespace without
221    /// per-key framework round-trips.
222    pub extensions: serde_json::Map<String, serde_json::Value>,
223}
224
225impl Manifest {
226    /// JSON-friendly representation of the validated manifest for
227    /// FFI / RPC exposure (pyo3 wrappers, JSON-RPC bridges, etc.).
228    ///
229    /// The shape is stable across patch releases: fields can be added
230    /// non-breaking, but key renames or removals are breaking changes.
231    /// When adding a new field to `Manifest`, extend this method too —
232    /// the `to_json_shape_is_stable` test will fail until you do.
233    /// The `extensions` map is passed through unchanged; downstream
234    /// consumers parse their own namespace from it.
235    pub fn to_json(&self) -> serde_json::Value {
236        serde_json::json!({
237            "yaml_path": self.yaml_path.display().to_string(),
238            "name": self.name,
239            "instructions": self.instructions,
240            "overview_prefix": self.overview_prefix,
241            "source_roots": self.source_roots,
242            "trust": {
243                "allow_python_tools": self.trust.allow_python_tools,
244                "allow_embedder": self.trust.allow_embedder,
245                "allow_query_preprocessor": self.trust.allow_query_preprocessor,
246            },
247            "tools": self.tools.iter().map(|t| match t {
248                ToolSpec::Cypher(c) => serde_json::json!({
249                    "kind": "cypher",
250                    "name": c.name,
251                    "cypher": c.cypher,
252                    "description": c.description,
253                    "parameters": c.parameters,
254                }),
255                ToolSpec::Python(p) => serde_json::json!({
256                    "kind": "python",
257                    "name": p.name,
258                    "python": p.python,
259                    "function": p.function,
260                    "description": p.description,
261                    "parameters": p.parameters,
262                }),
263            }).collect::<Vec<_>>(),
264            "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
265                "module": e.module,
266                "class": e.class,
267                "kwargs": e.kwargs,
268            })),
269            "builtins": {
270                "save_graph": self.builtins.save_graph,
271                "temp_cleanup": self.builtins.temp_cleanup.as_str(),
272            },
273            "env_file": self.env_file,
274            "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
275                "kind": w.kind.as_str(),
276                "root": w.root,
277                "watch": w.watch,
278            })),
279            "extensions": self.extensions,
280        })
281    }
282}
283
284/// Auto-detect ``<basename>_mcp.yaml`` next to a graph file.
285pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
286    let stem = graph_path.file_stem()?;
287    let parent = graph_path.parent()?;
288    let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
289    if candidate.is_file() {
290        Some(candidate)
291    } else {
292        None
293    }
294}
295
296/// Auto-detect ``workspace_mcp.yaml`` inside a workspace directory.
297pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
298    let candidate = workspace_dir.join("workspace_mcp.yaml");
299    if candidate.is_file() {
300        Some(candidate)
301    } else {
302        None
303    }
304}
305
306/// Parse and validate a manifest YAML file.
307pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
308    let text = fs::read_to_string(yaml_path)
309        .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
310    let raw: serde_yaml::Value = serde_yaml::from_str(&text)
311        .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
312    let raw = match raw {
313        serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
314        v => v,
315    };
316    let map = raw
317        .as_mapping()
318        .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
319    build(map, yaml_path)
320}
321
322fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
323    check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
324
325    if raw.contains_key("source_root") && raw.contains_key("source_roots") {
326        return Err(ManifestError::at(
327            yaml_path,
328            "specify either source_root (str) or source_roots (list), not both",
329        ));
330    }
331
332    let mut source_roots: Vec<String> = Vec::new();
333    if let Some(v) = raw.get("source_root") {
334        let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
335            ManifestError::at(yaml_path, "source_root must be a non-empty string")
336        })?;
337        source_roots.push(s.to_string());
338    } else if let Some(v) = raw.get("source_roots") {
339        let seq = v.as_sequence().ok_or_else(|| {
340            ManifestError::at(
341                yaml_path,
342                "source_roots must be a list of non-empty strings",
343            )
344        })?;
345        if seq.is_empty() {
346            return Err(ManifestError::at(
347                yaml_path,
348                "source_roots must be non-empty when set",
349            ));
350        }
351        for item in seq {
352            let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
353                ManifestError::at(
354                    yaml_path,
355                    "source_roots must be a list of non-empty strings",
356                )
357            })?;
358            source_roots.push(s.to_string());
359        }
360    }
361
362    let trust = build_trust(raw.get("trust"), yaml_path)?;
363    let tools = build_tools(raw.get("tools"), yaml_path)?;
364    let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
365    let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
366    let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
367    let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
368
369    Ok(Manifest {
370        yaml_path: yaml_path.to_path_buf(),
371        name: optional_str(raw, "name", yaml_path)?,
372        instructions: optional_str(raw, "instructions", yaml_path)?,
373        overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
374        source_roots,
375        trust,
376        tools,
377        embedder,
378        builtins,
379        env_file: optional_str(raw, "env_file", yaml_path)?,
380        workspace,
381        extensions,
382    })
383}
384
385fn build_extensions(
386    raw: Option<&serde_yaml::Value>,
387    yaml_path: &Path,
388) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
389    let Some(raw) = raw else {
390        return Ok(serde_json::Map::new());
391    };
392    if matches!(raw, serde_yaml::Value::Null) {
393        return Ok(serde_json::Map::new());
394    }
395    if !raw.is_mapping() {
396        return Err(ManifestError::at(
397            yaml_path,
398            "extensions must be a mapping (downstream-binary-specific keys)",
399        ));
400    }
401    match yaml_to_json(raw.clone())? {
402        serde_json::Value::Object(o) => Ok(o),
403        _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
404    }
405}
406
407fn build_workspace(
408    raw: Option<&serde_yaml::Value>,
409    yaml_path: &Path,
410) -> Result<Option<WorkspaceConfig>, ManifestError> {
411    let Some(raw) = raw else { return Ok(None) };
412    if matches!(raw, serde_yaml::Value::Null) {
413        return Ok(None);
414    }
415    let map = raw
416        .as_mapping()
417        .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
418    check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
419    let kind = match map.get("kind") {
420        None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
421        Some(serde_yaml::Value::String(s)) => match s.as_str() {
422            "github" => WorkspaceKind::Github,
423            "local" => WorkspaceKind::Local,
424            other => {
425                return Err(ManifestError::at(
426                    yaml_path,
427                    format!(
428                        "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
429                    ),
430                ));
431            }
432        },
433        Some(_) => {
434            return Err(ManifestError::at(
435                yaml_path,
436                format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
437            ))
438        }
439    };
440    let root = match map.get("root") {
441        None | Some(serde_yaml::Value::Null) => None,
442        Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
443        _ => {
444            return Err(ManifestError::at(
445                yaml_path,
446                "workspace.root must be a non-empty string",
447            ))
448        }
449    };
450    let watch = match map.get("watch") {
451        None | Some(serde_yaml::Value::Null) => false,
452        Some(serde_yaml::Value::Bool(b)) => *b,
453        Some(_) => {
454            return Err(ManifestError::at(
455                yaml_path,
456                "workspace.watch must be a bool",
457            ))
458        }
459    };
460    if kind == WorkspaceKind::Local && root.is_none() {
461        return Err(ManifestError::at(
462            yaml_path,
463            "workspace.kind: local requires workspace.root to be set",
464        ));
465    }
466    if kind == WorkspaceKind::Github && watch {
467        return Err(ManifestError::at(
468            yaml_path,
469            "workspace.watch is only valid with workspace.kind: local",
470        ));
471    }
472    Ok(Some(WorkspaceConfig { kind, root, watch }))
473}
474
475fn check_keys(
476    map: &serde_yaml::Mapping,
477    allowed: &[&str],
478    label: &str,
479    yaml_path: &Path,
480) -> Result<(), ManifestError> {
481    let mut unknown: Vec<String> = Vec::new();
482    for (k, _) in map {
483        let key = k.as_str().unwrap_or("<non-string-key>");
484        if !allowed.contains(&key) {
485            unknown.push(key.to_string());
486        }
487    }
488    if !unknown.is_empty() {
489        unknown.sort();
490        return Err(ManifestError::at(
491            yaml_path,
492            format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
493        ));
494    }
495    Ok(())
496}
497
498fn optional_str(
499    raw: &serde_yaml::Mapping,
500    key: &str,
501    yaml_path: &Path,
502) -> Result<Option<String>, ManifestError> {
503    match raw.get(key) {
504        None | Some(serde_yaml::Value::Null) => Ok(None),
505        Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
506        Some(_) => Err(ManifestError::at(
507            yaml_path,
508            format!("{key} must be a string"),
509        )),
510    }
511}
512
513fn build_trust(
514    raw: Option<&serde_yaml::Value>,
515    yaml_path: &Path,
516) -> Result<TrustConfig, ManifestError> {
517    let Some(raw) = raw else {
518        return Ok(TrustConfig::default());
519    };
520    let map = raw
521        .as_mapping()
522        .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
523    check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
524    let mut cfg = TrustConfig::default();
525    if let Some(v) = map.get("allow_python_tools") {
526        cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
527            ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
528        })?;
529    }
530    if let Some(v) = map.get("allow_embedder") {
531        cfg.allow_embedder = v
532            .as_bool()
533            .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
534    }
535    if let Some(v) = map.get("allow_query_preprocessor") {
536        cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
537            ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
538        })?;
539    }
540    Ok(cfg)
541}
542
543fn build_tools(
544    raw: Option<&serde_yaml::Value>,
545    yaml_path: &Path,
546) -> Result<Vec<ToolSpec>, ManifestError> {
547    let Some(raw) = raw else {
548        return Ok(Vec::new());
549    };
550    let seq = raw
551        .as_sequence()
552        .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
553    let mut tools: Vec<ToolSpec> = Vec::new();
554    let mut seen: BTreeMap<String, ()> = BTreeMap::new();
555    for (i, entry) in seq.iter().enumerate() {
556        let tool = build_tool(entry, i, yaml_path)?;
557        let name = tool.name().to_string();
558        if seen.insert(name.clone(), ()).is_some() {
559            return Err(ManifestError::at(
560                yaml_path,
561                format!("duplicate tool name: {name:?}"),
562            ));
563        }
564        tools.push(tool);
565    }
566    Ok(tools)
567}
568
569fn build_tool(
570    entry: &serde_yaml::Value,
571    idx: usize,
572    yaml_path: &Path,
573) -> Result<ToolSpec, ManifestError> {
574    let map = entry
575        .as_mapping()
576        .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
577    check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
578
579    let name = map
580        .get("name")
581        .and_then(|v| v.as_str())
582        .filter(|s| valid_identifier(s))
583        .ok_or_else(|| {
584            ManifestError::at(
585                yaml_path,
586                format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
587            )
588        })?
589        .to_string();
590
591    let has_cypher = map.contains_key("cypher");
592    let has_python = map.contains_key("python");
593    let kinds_present: Vec<&str> = [("cypher", has_cypher), ("python", has_python)]
594        .into_iter()
595        .filter(|(_, p)| *p)
596        .map(|(k, _)| k)
597        .collect();
598    if kinds_present.is_empty() {
599        return Err(ManifestError::at(
600            yaml_path,
601            format!("tools[{idx}] ({name:?}) needs exactly one of: [\"cypher\", \"python\"]"),
602        ));
603    }
604    if kinds_present.len() > 1 {
605        return Err(ManifestError::at(
606            yaml_path,
607            format!("tools[{idx}] ({name:?}) has multiple kinds set ({kinds_present:?}); pick one"),
608        ));
609    }
610
611    let description = match map.get("description") {
612        None | Some(serde_yaml::Value::Null) => None,
613        Some(serde_yaml::Value::String(s)) => Some(s.clone()),
614        Some(_) => {
615            return Err(ManifestError::at(
616                yaml_path,
617                format!("tools[{idx}] ({name:?}).description must be a string"),
618            ))
619        }
620    };
621
622    let parameters = match map.get("parameters") {
623        None | Some(serde_yaml::Value::Null) => None,
624        Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
625        Some(_) => {
626            return Err(ManifestError::at(
627                yaml_path,
628                format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
629            ))
630        }
631    };
632
633    if has_cypher {
634        let cypher = map
635            .get("cypher")
636            .and_then(|v| v.as_str())
637            .filter(|s| !s.trim().is_empty())
638            .ok_or_else(|| {
639                ManifestError::at(
640                    yaml_path,
641                    format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
642                )
643            })?
644            .to_string();
645        return Ok(ToolSpec::Cypher(CypherTool {
646            name,
647            cypher,
648            description,
649            parameters,
650        }));
651    }
652
653    // python tool
654    let python = map
655        .get("python")
656        .and_then(|v| v.as_str())
657        .filter(|s| !s.is_empty())
658        .ok_or_else(|| {
659            ManifestError::at(
660                yaml_path,
661                format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
662            )
663        })?
664        .to_string();
665    let function = map
666        .get("function")
667        .and_then(|v| v.as_str())
668        .filter(|s| valid_identifier(s))
669        .ok_or_else(|| {
670            ManifestError::at(
671                yaml_path,
672                format!(
673                    "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
674                ),
675            )
676        })?
677        .to_string();
678    Ok(ToolSpec::Python(PythonTool {
679        name,
680        python,
681        function,
682        description,
683        parameters,
684    }))
685}
686
687fn build_embedder(
688    raw: Option<&serde_yaml::Value>,
689    yaml_path: &Path,
690) -> Result<Option<EmbedderConfig>, ManifestError> {
691    let Some(raw) = raw else { return Ok(None) };
692    if matches!(raw, serde_yaml::Value::Null) {
693        return Ok(None);
694    }
695    let map = raw
696        .as_mapping()
697        .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
698    check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
699    let module = map
700        .get("module")
701        .and_then(|v| v.as_str())
702        .filter(|s| !s.is_empty())
703        .ok_or_else(|| {
704            ManifestError::at(
705                yaml_path,
706                "embedder.module must be a non-empty string (path or dotted name)",
707            )
708        })?
709        .to_string();
710    let class = map
711        .get("class")
712        .and_then(|v| v.as_str())
713        .filter(|s| valid_identifier(s))
714        .ok_or_else(|| {
715            ManifestError::at(
716                yaml_path,
717                "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
718            )
719        })?
720        .to_string();
721    let kwargs = match map.get("kwargs") {
722        None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
723        Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
724            serde_json::Value::Object(o) => o,
725            _ => {
726                return Err(ManifestError::at(
727                    yaml_path,
728                    "embedder.kwargs must be a mapping",
729                ))
730            }
731        },
732        Some(_) => {
733            return Err(ManifestError::at(
734                yaml_path,
735                "embedder.kwargs must be a mapping",
736            ))
737        }
738    };
739    Ok(Some(EmbedderConfig {
740        module,
741        class,
742        kwargs,
743    }))
744}
745
746fn build_builtins(
747    raw: Option<&serde_yaml::Value>,
748    yaml_path: &Path,
749) -> Result<BuiltinsConfig, ManifestError> {
750    let Some(raw) = raw else {
751        return Ok(BuiltinsConfig::default());
752    };
753    if matches!(raw, serde_yaml::Value::Null) {
754        return Ok(BuiltinsConfig::default());
755    }
756    let map = raw
757        .as_mapping()
758        .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
759    check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
760    let mut cfg = BuiltinsConfig::default();
761    if let Some(v) = map.get("save_graph") {
762        cfg.save_graph = v
763            .as_bool()
764            .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
765    }
766    if let Some(v) = map.get("temp_cleanup") {
767        let s = v.as_str().ok_or_else(|| {
768            ManifestError::at(
769                yaml_path,
770                format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
771            )
772        })?;
773        cfg.temp_cleanup = match s {
774            "never" => TempCleanup::Never,
775            "on_overview" => TempCleanup::OnOverview,
776            other => {
777                return Err(ManifestError::at(
778                    yaml_path,
779                    format!(
780                        "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
781                    ),
782                ))
783            }
784        };
785    }
786    Ok(cfg)
787}
788
789fn valid_identifier(s: &str) -> bool {
790    let mut chars = s.chars();
791    match chars.next() {
792        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
793        _ => return false,
794    }
795    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
796}
797
798fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
799    serde_json::to_value(&v)
800        .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
801}
802
803#[derive(Debug, Deserialize)]
804struct _Reserved;
805
806#[cfg(test)]
807mod tests {
808    use super::*;
809
810    fn write_tmp(text: &str) -> tempfile::NamedTempFile {
811        let mut f = tempfile::NamedTempFile::new().unwrap();
812        std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
813        f
814    }
815
816    #[test]
817    fn loads_minimal_empty_manifest() {
818        let f = write_tmp("");
819        let m = load(f.path()).unwrap();
820        assert_eq!(m.tools.len(), 0);
821        assert_eq!(m.source_roots.len(), 0);
822        assert!(!m.trust.allow_python_tools);
823        assert!(!m.trust.allow_embedder);
824        assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
825    }
826
827    #[test]
828    fn loads_name_and_instructions() {
829        let f = write_tmp("name: Demo\ninstructions: |\n  multi-line\n  block\n");
830        let m = load(f.path()).unwrap();
831        assert_eq!(m.name.as_deref(), Some("Demo"));
832        assert!(m.instructions.unwrap().contains("multi-line"));
833    }
834
835    #[test]
836    fn rejects_unknown_top_key() {
837        let f = write_tmp("bogus: 1\n");
838        let err = load(f.path()).unwrap_err();
839        assert!(err.message.contains("unknown top-level"));
840    }
841
842    #[test]
843    fn source_root_string_normalises_to_list() {
844        let f = write_tmp("source_root: ./data\n");
845        let m = load(f.path()).unwrap();
846        assert_eq!(m.source_roots, vec!["./data".to_string()]);
847    }
848
849    #[test]
850    fn source_roots_list_preserved() {
851        let f = write_tmp("source_roots:\n  - ./a\n  - ./b\n");
852        let m = load(f.path()).unwrap();
853        assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
854    }
855
856    #[test]
857    fn rejects_both_source_root_and_source_roots() {
858        let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
859        assert!(load(f.path()).unwrap_err().message.contains("not both"));
860    }
861
862    #[test]
863    fn cypher_tool_parses() {
864        let f = write_tmp("tools:\n  - name: lookup\n    cypher: MATCH (n) RETURN n\n");
865        let m = load(f.path()).unwrap();
866        assert_eq!(m.tools.len(), 1);
867        match &m.tools[0] {
868            ToolSpec::Cypher(t) => {
869                assert_eq!(t.name, "lookup");
870                assert!(t.cypher.contains("MATCH"));
871            }
872            _ => panic!("expected cypher tool"),
873        }
874    }
875
876    #[test]
877    fn python_tool_parses() {
878        let f =
879            write_tmp("tools:\n  - name: detail\n    python: ./tools.py\n    function: detail\n");
880        let m = load(f.path()).unwrap();
881        match &m.tools[0] {
882            ToolSpec::Python(t) => {
883                assert_eq!(t.python, "./tools.py");
884                assert_eq!(t.function, "detail");
885            }
886            _ => panic!("expected python tool"),
887        }
888    }
889
890    #[test]
891    fn rejects_tool_with_both_kinds() {
892        let f = write_tmp(
893            "tools:\n  - name: x\n    cypher: 'MATCH (n) RETURN n'\n    python: ./t.py\n    function: x\n",
894        );
895        assert!(load(f.path())
896            .unwrap_err()
897            .message
898            .contains("multiple kinds"));
899    }
900
901    #[test]
902    fn rejects_tool_with_no_kind() {
903        let f = write_tmp("tools:\n  - name: x\n");
904        assert!(load(f.path())
905            .unwrap_err()
906            .message
907            .contains("needs exactly one"));
908    }
909
910    #[test]
911    fn rejects_duplicate_tool_names() {
912        let f = write_tmp(
913            "tools:\n  - name: same\n    cypher: 'MATCH (n) RETURN n'\n  - name: same\n    cypher: 'MATCH (m) RETURN m'\n",
914        );
915        assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
916    }
917
918    #[test]
919    fn embedder_parses() {
920        let f = write_tmp(
921            "embedder:\n  module: ./e.py\n  class: GraphEmbedder\n  kwargs:\n    cooldown: 900\n",
922        );
923        let m = load(f.path()).unwrap();
924        let e = m.embedder.unwrap();
925        assert_eq!(e.module, "./e.py");
926        assert_eq!(e.class, "GraphEmbedder");
927        assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
928    }
929
930    #[test]
931    fn builtins_parses_temp_cleanup() {
932        let f = write_tmp("builtins:\n  save_graph: true\n  temp_cleanup: on_overview\n");
933        let m = load(f.path()).unwrap();
934        assert!(m.builtins.save_graph);
935        assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
936    }
937
938    #[test]
939    fn rejects_invalid_temp_cleanup() {
940        let f = write_tmp("builtins:\n  temp_cleanup: nuke\n");
941        assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
942    }
943
944    #[test]
945    fn allow_embedder_trust_parses() {
946        let f = write_tmp("trust:\n  allow_embedder: true\n");
947        let m = load(f.path()).unwrap();
948        assert!(m.trust.allow_embedder);
949    }
950
951    #[test]
952    fn allow_query_preprocessor_trust_parses() {
953        let f = write_tmp("trust:\n  allow_query_preprocessor: true\n");
954        let m = load(f.path()).unwrap();
955        assert!(m.trust.allow_query_preprocessor);
956        assert!(!m.trust.allow_embedder);
957        assert!(!m.trust.allow_python_tools);
958    }
959
960    #[test]
961    fn allow_query_preprocessor_rejects_non_bool() {
962        let f = write_tmp("trust:\n  allow_query_preprocessor: \"yes\"\n");
963        let err = load(f.path()).unwrap_err();
964        assert!(err
965            .message
966            .contains("allow_query_preprocessor must be a bool"));
967    }
968
969    #[test]
970    fn find_sibling_works() {
971        let dir = tempfile::tempdir().unwrap();
972        let graph = dir.path().join("demo.kgl");
973        std::fs::write(&graph, b"\x00").unwrap();
974        let sibling = dir.path().join("demo_mcp.yaml");
975        std::fs::write(&sibling, "name: x\n").unwrap();
976        assert_eq!(find_sibling_manifest(&graph), Some(sibling));
977    }
978
979    #[test]
980    fn workspace_local_parses() {
981        let f = write_tmp("workspace:\n  kind: local\n  root: ./src\n  watch: true\n");
982        let m = load(f.path()).unwrap();
983        let w = m.workspace.unwrap();
984        assert_eq!(w.kind, WorkspaceKind::Local);
985        assert_eq!(w.root.as_deref(), Some("./src"));
986        assert!(w.watch);
987    }
988
989    #[test]
990    fn workspace_github_default_kind() {
991        let f = write_tmp("workspace: {}\n");
992        let m = load(f.path()).unwrap();
993        let w = m.workspace.unwrap();
994        assert_eq!(w.kind, WorkspaceKind::Github);
995        assert!(w.root.is_none());
996        assert!(!w.watch);
997    }
998
999    #[test]
1000    fn workspace_local_without_root_errors() {
1001        let f = write_tmp("workspace:\n  kind: local\n");
1002        let err = load(f.path()).unwrap_err();
1003        assert!(err.message.contains("requires workspace.root"));
1004    }
1005
1006    #[test]
1007    fn workspace_unknown_key_rejected() {
1008        let f = write_tmp("workspace:\n  kind: local\n  root: ./x\n  bogus: 1\n");
1009        let err = load(f.path()).unwrap_err();
1010        assert!(err.message.contains("unknown workspace keys"));
1011    }
1012
1013    #[test]
1014    fn workspace_invalid_kind_rejected() {
1015        let f = write_tmp("workspace:\n  kind: docker\n  root: ./x\n");
1016        let err = load(f.path()).unwrap_err();
1017        assert!(err.message.contains("workspace.kind"));
1018    }
1019
1020    #[test]
1021    fn workspace_watch_invalid_for_github() {
1022        let f = write_tmp("workspace:\n  kind: github\n  watch: true\n");
1023        let err = load(f.path()).unwrap_err();
1024        assert!(err.message.contains("watch is only valid"));
1025    }
1026
1027    #[test]
1028    fn extensions_passthrough_parses() {
1029        let f = write_tmp(
1030            "extensions:\n  csv_http_server: true\n  csv_http_server_dir: temp/\n  arbitrary:\n    nested: 1\n",
1031        );
1032        let m = load(f.path()).unwrap();
1033        assert_eq!(
1034            m.extensions
1035                .get("csv_http_server")
1036                .and_then(|v| v.as_bool()),
1037            Some(true)
1038        );
1039        assert_eq!(
1040            m.extensions
1041                .get("csv_http_server_dir")
1042                .and_then(|v| v.as_str()),
1043            Some("temp/")
1044        );
1045        // Nested values pass through unchanged.
1046        assert_eq!(
1047            m.extensions
1048                .get("arbitrary")
1049                .and_then(|v| v.get("nested"))
1050                .and_then(|v| v.as_i64()),
1051            Some(1)
1052        );
1053    }
1054
1055    #[test]
1056    fn extensions_absent_defaults_to_empty() {
1057        let f = write_tmp("name: x\n");
1058        let m = load(f.path()).unwrap();
1059        assert!(m.extensions.is_empty());
1060    }
1061
1062    #[test]
1063    fn extensions_inner_keys_unvalidated() {
1064        // The framework intentionally does NOT validate keys inside
1065        // `extensions:` — they're downstream-binary concerns. Any shape
1066        // that's a YAML mapping must round-trip.
1067        let f = write_tmp(
1068            "extensions:\n  whatever_kglite_wants: foo\n  some_other_consumer: { a: 1, b: 2 }\n",
1069        );
1070        load(f.path()).unwrap();
1071    }
1072
1073    #[test]
1074    fn extensions_must_be_a_mapping() {
1075        let f = write_tmp("extensions: not-a-mapping\n");
1076        let err = load(f.path()).unwrap_err();
1077        assert!(err.message.contains("extensions must be a mapping"));
1078    }
1079
1080    #[test]
1081    fn env_file_key_parses() {
1082        let f = write_tmp("env_file: ../.env\n");
1083        let m = load(f.path()).unwrap();
1084        assert_eq!(m.env_file.as_deref(), Some("../.env"));
1085    }
1086
1087    #[test]
1088    fn env_file_unset_is_none() {
1089        let f = write_tmp("name: Demo\n");
1090        let m = load(f.path()).unwrap();
1091        assert!(m.env_file.is_none());
1092    }
1093
1094    #[test]
1095    fn find_workspace_works() {
1096        let dir = tempfile::tempdir().unwrap();
1097        let manifest = dir.path().join("workspace_mcp.yaml");
1098        std::fs::write(&manifest, "name: ws\n").unwrap();
1099        assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1100    }
1101
1102    #[test]
1103    fn to_json_shape_is_stable() {
1104        let f = write_tmp(
1105            r#"
1106name: KGLite Codebase
1107source_roots: [src, lib]
1108trust:
1109  allow_embedder: true
1110embedder:
1111  module: kglite.embed
1112  class: SentenceTransformerEmbedder
1113builtins:
1114  save_graph: true
1115  temp_cleanup: on_overview
1116"#,
1117        );
1118        let m = load(f.path()).unwrap();
1119        let actual = m.to_json();
1120        let expected = serde_json::json!({
1121            "yaml_path": f.path().display().to_string(),
1122            "name": "KGLite Codebase",
1123            "instructions": null,
1124            "overview_prefix": null,
1125            "source_roots": ["src", "lib"],
1126            "trust": {
1127                "allow_python_tools": false,
1128                "allow_embedder": true,
1129                "allow_query_preprocessor": false,
1130            },
1131            "tools": [],
1132            "embedder": {
1133                "module": "kglite.embed",
1134                "class": "SentenceTransformerEmbedder",
1135                "kwargs": {},
1136            },
1137            "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
1138            "env_file": null,
1139            "workspace": null,
1140            "extensions": {},
1141        });
1142        assert_eq!(actual, expected);
1143    }
1144
1145    #[test]
1146    fn to_json_round_trips_tools_and_workspace() {
1147        let f = write_tmp(
1148            r#"
1149name: Full Surface
1150source_root: ./src
1151trust:
1152  allow_python_tools: true
1153tools:
1154  - name: nodes_for
1155    cypher: "MATCH (n {name: $name}) RETURN n"
1156    description: "fetch nodes by name"
1157  - name: run_query
1158    python: tools.py
1159    function: run
1160workspace:
1161  kind: local
1162  root: /tmp/ws
1163  watch: true
1164builtins:
1165  save_graph: false
1166env_file: .env.local
1167extensions:
1168  kglite:
1169    flavour: standard
1170"#,
1171        );
1172        let m = load(f.path()).unwrap();
1173        let v = m.to_json();
1174        assert_eq!(v["name"], "Full Surface");
1175        assert_eq!(v["trust"]["allow_python_tools"], true);
1176        assert_eq!(v["workspace"]["kind"], "local");
1177        assert_eq!(v["workspace"]["root"], "/tmp/ws");
1178        assert_eq!(v["workspace"]["watch"], true);
1179        assert_eq!(v["env_file"], ".env.local");
1180        assert_eq!(v["tools"][0]["kind"], "cypher");
1181        assert_eq!(v["tools"][0]["name"], "nodes_for");
1182        assert_eq!(v["tools"][1]["kind"], "python");
1183        assert_eq!(v["tools"][1]["name"], "run_query");
1184        assert_eq!(v["tools"][1]["python"], "tools.py");
1185        assert_eq!(v["tools"][1]["function"], "run");
1186        assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
1187    }
1188}