1#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34 "name",
35 "instructions",
36 "overview_prefix",
37 "source_root",
38 "source_roots",
39 "trust",
40 "tools",
41 "embedder",
42 "builtins",
43 "env_file",
44 "workspace",
45 "extensions",
46];
47const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch"];
48const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
49const ALLOWED_TRUST_KEYS: &[&str] = &[
50 "allow_python_tools",
51 "allow_embedder",
52 "allow_query_preprocessor",
53];
54const ALLOWED_TOOL_KEYS: &[&str] = &[
55 "name",
56 "description",
57 "parameters",
58 "cypher",
59 "python",
60 "function",
61 "bundled",
62 "hidden",
63];
64const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
65const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
66const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
67
68#[derive(Debug, Error)]
69#[error("{path}: {message}")]
70pub struct ManifestError {
71 pub path: String,
72 pub message: String,
73}
74
75impl ManifestError {
76 pub fn at(path: &Path, message: impl Into<String>) -> Self {
77 Self {
78 path: path.display().to_string(),
79 message: message.into(),
80 }
81 }
82
83 pub fn bare(message: impl Into<String>) -> Self {
84 Self {
85 path: "<manifest>".to_string(),
86 message: message.into(),
87 }
88 }
89}
90
91#[derive(Debug, Default, Clone)]
92pub struct TrustConfig {
93 pub allow_python_tools: bool,
94 pub allow_embedder: bool,
95 pub allow_query_preprocessor: bool,
102}
103
104#[derive(Debug, Clone)]
105pub enum ToolSpec {
106 Cypher(CypherTool),
107 Python(PythonTool),
108 Bundled(BundledOverride),
124}
125
126impl ToolSpec {
127 pub fn name(&self) -> &str {
128 match self {
129 ToolSpec::Cypher(t) => &t.name,
130 ToolSpec::Python(t) => &t.name,
131 ToolSpec::Bundled(t) => &t.name,
132 }
133 }
134}
135
136#[derive(Debug, Clone)]
137pub struct CypherTool {
138 pub name: String,
139 pub cypher: String,
140 pub description: Option<String>,
141 pub parameters: Option<serde_json::Value>,
142}
143
144#[derive(Debug, Clone)]
145pub struct PythonTool {
146 pub name: String,
147 pub python: String,
148 pub function: String,
149 pub description: Option<String>,
150 pub parameters: Option<serde_json::Value>,
151}
152
153#[derive(Debug, Clone)]
154pub struct BundledOverride {
155 pub name: String,
160 pub description: Option<String>,
164 pub hidden: bool,
168}
169
170#[derive(Debug, Clone)]
171pub struct EmbedderConfig {
172 pub module: String,
173 pub class: String,
174 pub kwargs: serde_json::Map<String, serde_json::Value>,
175}
176
177#[derive(Debug, Default, Clone)]
178pub struct BuiltinsConfig {
179 pub save_graph: bool,
180 pub temp_cleanup: TempCleanup,
181}
182
183#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
184pub enum TempCleanup {
185 #[default]
186 Never,
187 OnOverview,
188}
189
190impl TempCleanup {
191 pub fn as_str(&self) -> &'static str {
192 match self {
193 TempCleanup::Never => "never",
194 TempCleanup::OnOverview => "on_overview",
195 }
196 }
197}
198
199#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
200pub enum WorkspaceKind {
201 #[default]
204 Github,
205 Local,
208}
209
210impl WorkspaceKind {
211 pub fn as_str(&self) -> &'static str {
212 match self {
213 WorkspaceKind::Github => "github",
214 WorkspaceKind::Local => "local",
215 }
216 }
217}
218
219#[derive(Debug, Clone, Default)]
220pub struct WorkspaceConfig {
221 pub kind: WorkspaceKind,
222 pub root: Option<String>,
225 pub watch: bool,
228}
229
230#[derive(Debug, Clone)]
231pub struct Manifest {
232 pub yaml_path: PathBuf,
233 pub name: Option<String>,
234 pub instructions: Option<String>,
235 pub overview_prefix: Option<String>,
236 pub source_roots: Vec<String>,
237 pub trust: TrustConfig,
238 pub tools: Vec<ToolSpec>,
239 pub embedder: Option<EmbedderConfig>,
240 pub builtins: BuiltinsConfig,
241 pub env_file: Option<String>,
245 pub workspace: Option<WorkspaceConfig>,
249 pub extensions: serde_json::Map<String, serde_json::Value>,
259}
260
261impl Manifest {
262 pub fn to_json(&self) -> serde_json::Value {
272 serde_json::json!({
273 "yaml_path": self.yaml_path.display().to_string(),
274 "name": self.name,
275 "instructions": self.instructions,
276 "overview_prefix": self.overview_prefix,
277 "source_roots": self.source_roots,
278 "trust": {
279 "allow_python_tools": self.trust.allow_python_tools,
280 "allow_embedder": self.trust.allow_embedder,
281 "allow_query_preprocessor": self.trust.allow_query_preprocessor,
282 },
283 "tools": self.tools.iter().map(|t| match t {
284 ToolSpec::Cypher(c) => serde_json::json!({
285 "kind": "cypher",
286 "name": c.name,
287 "cypher": c.cypher,
288 "description": c.description,
289 "parameters": c.parameters,
290 }),
291 ToolSpec::Python(p) => serde_json::json!({
292 "kind": "python",
293 "name": p.name,
294 "python": p.python,
295 "function": p.function,
296 "description": p.description,
297 "parameters": p.parameters,
298 }),
299 ToolSpec::Bundled(b) => serde_json::json!({
300 "kind": "bundled",
301 "name": b.name,
302 "description": b.description,
303 "hidden": b.hidden,
304 }),
305 }).collect::<Vec<_>>(),
306 "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
307 "module": e.module,
308 "class": e.class,
309 "kwargs": e.kwargs,
310 })),
311 "builtins": {
312 "save_graph": self.builtins.save_graph,
313 "temp_cleanup": self.builtins.temp_cleanup.as_str(),
314 },
315 "env_file": self.env_file,
316 "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
317 "kind": w.kind.as_str(),
318 "root": w.root,
319 "watch": w.watch,
320 })),
321 "extensions": self.extensions,
322 })
323 }
324}
325
326pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
328 let stem = graph_path.file_stem()?;
329 let parent = graph_path.parent()?;
330 let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
331 if candidate.is_file() {
332 Some(candidate)
333 } else {
334 None
335 }
336}
337
338pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
340 let candidate = workspace_dir.join("workspace_mcp.yaml");
341 if candidate.is_file() {
342 Some(candidate)
343 } else {
344 None
345 }
346}
347
348pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
350 let text = fs::read_to_string(yaml_path)
351 .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
352 let raw: serde_yaml::Value = serde_yaml::from_str(&text)
353 .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
354 let raw = match raw {
355 serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
356 v => v,
357 };
358 let map = raw
359 .as_mapping()
360 .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
361 build(map, yaml_path)
362}
363
364fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
365 check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
366
367 if raw.contains_key("source_root") && raw.contains_key("source_roots") {
368 return Err(ManifestError::at(
369 yaml_path,
370 "specify either source_root (str) or source_roots (list), not both",
371 ));
372 }
373
374 let mut source_roots: Vec<String> = Vec::new();
375 if let Some(v) = raw.get("source_root") {
376 let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
377 ManifestError::at(yaml_path, "source_root must be a non-empty string")
378 })?;
379 source_roots.push(s.to_string());
380 } else if let Some(v) = raw.get("source_roots") {
381 let seq = v.as_sequence().ok_or_else(|| {
382 ManifestError::at(
383 yaml_path,
384 "source_roots must be a list of non-empty strings",
385 )
386 })?;
387 if seq.is_empty() {
388 return Err(ManifestError::at(
389 yaml_path,
390 "source_roots must be non-empty when set",
391 ));
392 }
393 for item in seq {
394 let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
395 ManifestError::at(
396 yaml_path,
397 "source_roots must be a list of non-empty strings",
398 )
399 })?;
400 source_roots.push(s.to_string());
401 }
402 }
403
404 let trust = build_trust(raw.get("trust"), yaml_path)?;
405 let tools = build_tools(raw.get("tools"), yaml_path)?;
406 let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
407 let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
408 let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
409 let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
410
411 Ok(Manifest {
412 yaml_path: yaml_path.to_path_buf(),
413 name: optional_str(raw, "name", yaml_path)?,
414 instructions: optional_str(raw, "instructions", yaml_path)?,
415 overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
416 source_roots,
417 trust,
418 tools,
419 embedder,
420 builtins,
421 env_file: optional_str(raw, "env_file", yaml_path)?,
422 workspace,
423 extensions,
424 })
425}
426
427fn build_extensions(
428 raw: Option<&serde_yaml::Value>,
429 yaml_path: &Path,
430) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
431 let Some(raw) = raw else {
432 return Ok(serde_json::Map::new());
433 };
434 if matches!(raw, serde_yaml::Value::Null) {
435 return Ok(serde_json::Map::new());
436 }
437 if !raw.is_mapping() {
438 return Err(ManifestError::at(
439 yaml_path,
440 "extensions must be a mapping (downstream-binary-specific keys)",
441 ));
442 }
443 match yaml_to_json(raw.clone())? {
444 serde_json::Value::Object(o) => Ok(o),
445 _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
446 }
447}
448
449fn build_workspace(
450 raw: Option<&serde_yaml::Value>,
451 yaml_path: &Path,
452) -> Result<Option<WorkspaceConfig>, ManifestError> {
453 let Some(raw) = raw else { return Ok(None) };
454 if matches!(raw, serde_yaml::Value::Null) {
455 return Ok(None);
456 }
457 let map = raw
458 .as_mapping()
459 .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
460 check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
461 let kind = match map.get("kind") {
462 None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
463 Some(serde_yaml::Value::String(s)) => match s.as_str() {
464 "github" => WorkspaceKind::Github,
465 "local" => WorkspaceKind::Local,
466 other => {
467 return Err(ManifestError::at(
468 yaml_path,
469 format!(
470 "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
471 ),
472 ));
473 }
474 },
475 Some(_) => {
476 return Err(ManifestError::at(
477 yaml_path,
478 format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
479 ))
480 }
481 };
482 let root = match map.get("root") {
483 None | Some(serde_yaml::Value::Null) => None,
484 Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
485 _ => {
486 return Err(ManifestError::at(
487 yaml_path,
488 "workspace.root must be a non-empty string",
489 ))
490 }
491 };
492 let watch = match map.get("watch") {
493 None | Some(serde_yaml::Value::Null) => false,
494 Some(serde_yaml::Value::Bool(b)) => *b,
495 Some(_) => {
496 return Err(ManifestError::at(
497 yaml_path,
498 "workspace.watch must be a bool",
499 ))
500 }
501 };
502 if kind == WorkspaceKind::Local && root.is_none() {
503 return Err(ManifestError::at(
504 yaml_path,
505 "workspace.kind: local requires workspace.root to be set",
506 ));
507 }
508 if kind == WorkspaceKind::Github && watch {
509 return Err(ManifestError::at(
510 yaml_path,
511 "workspace.watch is only valid with workspace.kind: local",
512 ));
513 }
514 Ok(Some(WorkspaceConfig { kind, root, watch }))
515}
516
517fn check_keys(
518 map: &serde_yaml::Mapping,
519 allowed: &[&str],
520 label: &str,
521 yaml_path: &Path,
522) -> Result<(), ManifestError> {
523 let mut unknown: Vec<String> = Vec::new();
524 for (k, _) in map {
525 let key = k.as_str().unwrap_or("<non-string-key>");
526 if !allowed.contains(&key) {
527 unknown.push(key.to_string());
528 }
529 }
530 if !unknown.is_empty() {
531 unknown.sort();
532 return Err(ManifestError::at(
533 yaml_path,
534 format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
535 ));
536 }
537 Ok(())
538}
539
540fn optional_str(
541 raw: &serde_yaml::Mapping,
542 key: &str,
543 yaml_path: &Path,
544) -> Result<Option<String>, ManifestError> {
545 match raw.get(key) {
546 None | Some(serde_yaml::Value::Null) => Ok(None),
547 Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
548 Some(_) => Err(ManifestError::at(
549 yaml_path,
550 format!("{key} must be a string"),
551 )),
552 }
553}
554
555fn build_trust(
556 raw: Option<&serde_yaml::Value>,
557 yaml_path: &Path,
558) -> Result<TrustConfig, ManifestError> {
559 let Some(raw) = raw else {
560 return Ok(TrustConfig::default());
561 };
562 let map = raw
563 .as_mapping()
564 .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
565 check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
566 let mut cfg = TrustConfig::default();
567 if let Some(v) = map.get("allow_python_tools") {
568 cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
569 ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
570 })?;
571 }
572 if let Some(v) = map.get("allow_embedder") {
573 cfg.allow_embedder = v
574 .as_bool()
575 .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
576 }
577 if let Some(v) = map.get("allow_query_preprocessor") {
578 cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
579 ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
580 })?;
581 }
582 Ok(cfg)
583}
584
585fn build_tools(
586 raw: Option<&serde_yaml::Value>,
587 yaml_path: &Path,
588) -> Result<Vec<ToolSpec>, ManifestError> {
589 let Some(raw) = raw else {
590 return Ok(Vec::new());
591 };
592 let seq = raw
593 .as_sequence()
594 .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
595 let mut tools: Vec<ToolSpec> = Vec::new();
596 let mut seen: BTreeMap<String, ()> = BTreeMap::new();
597 for (i, entry) in seq.iter().enumerate() {
598 let tool = build_tool(entry, i, yaml_path)?;
599 let name = tool.name().to_string();
600 if seen.insert(name.clone(), ()).is_some() {
601 return Err(ManifestError::at(
602 yaml_path,
603 format!("duplicate tool name: {name:?}"),
604 ));
605 }
606 tools.push(tool);
607 }
608 Ok(tools)
609}
610
611fn build_tool(
612 entry: &serde_yaml::Value,
613 idx: usize,
614 yaml_path: &Path,
615) -> Result<ToolSpec, ManifestError> {
616 let map = entry
617 .as_mapping()
618 .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
619 check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
620
621 let has_cypher = map.contains_key("cypher");
626 let has_python = map.contains_key("python");
627 let has_bundled = map.contains_key("bundled");
628 let kinds_present: Vec<&str> = [
629 ("cypher", has_cypher),
630 ("python", has_python),
631 ("bundled", has_bundled),
632 ]
633 .into_iter()
634 .filter(|(_, p)| *p)
635 .map(|(k, _)| k)
636 .collect();
637 if kinds_present.is_empty() {
638 return Err(ManifestError::at(
639 yaml_path,
640 format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
641 ));
642 }
643 if kinds_present.len() > 1 {
644 return Err(ManifestError::at(
645 yaml_path,
646 format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
647 ));
648 }
649
650 if has_bundled {
655 return build_bundled_override(map, idx, yaml_path);
656 }
657
658 let name = map
659 .get("name")
660 .and_then(|v| v.as_str())
661 .filter(|s| valid_identifier(s))
662 .ok_or_else(|| {
663 ManifestError::at(
664 yaml_path,
665 format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
666 )
667 })?
668 .to_string();
669
670 if map.contains_key("hidden") {
674 return Err(ManifestError::at(
675 yaml_path,
676 format!(
677 "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
678 ),
679 ));
680 }
681
682 let description = match map.get("description") {
683 None | Some(serde_yaml::Value::Null) => None,
684 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
685 Some(_) => {
686 return Err(ManifestError::at(
687 yaml_path,
688 format!("tools[{idx}] ({name:?}).description must be a string"),
689 ))
690 }
691 };
692
693 let parameters = match map.get("parameters") {
694 None | Some(serde_yaml::Value::Null) => None,
695 Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
696 Some(_) => {
697 return Err(ManifestError::at(
698 yaml_path,
699 format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
700 ))
701 }
702 };
703
704 if has_cypher {
705 let cypher = map
706 .get("cypher")
707 .and_then(|v| v.as_str())
708 .filter(|s| !s.trim().is_empty())
709 .ok_or_else(|| {
710 ManifestError::at(
711 yaml_path,
712 format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
713 )
714 })?
715 .to_string();
716 return Ok(ToolSpec::Cypher(CypherTool {
717 name,
718 cypher,
719 description,
720 parameters,
721 }));
722 }
723
724 let python = map
726 .get("python")
727 .and_then(|v| v.as_str())
728 .filter(|s| !s.is_empty())
729 .ok_or_else(|| {
730 ManifestError::at(
731 yaml_path,
732 format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
733 )
734 })?
735 .to_string();
736 let function = map
737 .get("function")
738 .and_then(|v| v.as_str())
739 .filter(|s| valid_identifier(s))
740 .ok_or_else(|| {
741 ManifestError::at(
742 yaml_path,
743 format!(
744 "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
745 ),
746 )
747 })?
748 .to_string();
749 Ok(ToolSpec::Python(PythonTool {
750 name,
751 python,
752 function,
753 description,
754 parameters,
755 }))
756}
757
758fn build_bundled_override(
762 map: &serde_yaml::Mapping,
763 idx: usize,
764 yaml_path: &Path,
765) -> Result<ToolSpec, ManifestError> {
766 let name = map
767 .get("bundled")
768 .and_then(|v| v.as_str())
769 .filter(|s| valid_identifier(s))
770 .ok_or_else(|| {
771 ManifestError::at(
772 yaml_path,
773 format!(
774 "tools[{idx}] `bundled:` must be a string naming a bundled tool \
775 (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
776 ),
777 )
778 })?
779 .to_string();
780
781 for forbidden in ["name", "parameters", "function"] {
786 if map.contains_key(forbidden) {
787 return Err(ManifestError::at(
788 yaml_path,
789 format!(
790 "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
791 (only `description:` and `hidden:` are permitted on overrides)"
792 ),
793 ));
794 }
795 }
796
797 let description = match map.get("description") {
798 None | Some(serde_yaml::Value::Null) => None,
799 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
800 Some(_) => {
801 return Err(ManifestError::at(
802 yaml_path,
803 format!("tools[{idx}] bundled override {name:?}.description must be a string"),
804 ))
805 }
806 };
807
808 let hidden = match map.get("hidden") {
809 None | Some(serde_yaml::Value::Null) => false,
810 Some(serde_yaml::Value::Bool(b)) => *b,
811 Some(_) => {
812 return Err(ManifestError::at(
813 yaml_path,
814 format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
815 ))
816 }
817 };
818
819 Ok(ToolSpec::Bundled(BundledOverride {
820 name,
821 description,
822 hidden,
823 }))
824}
825
826fn build_embedder(
827 raw: Option<&serde_yaml::Value>,
828 yaml_path: &Path,
829) -> Result<Option<EmbedderConfig>, ManifestError> {
830 let Some(raw) = raw else { return Ok(None) };
831 if matches!(raw, serde_yaml::Value::Null) {
832 return Ok(None);
833 }
834 let map = raw
835 .as_mapping()
836 .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
837 check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
838 let module = map
839 .get("module")
840 .and_then(|v| v.as_str())
841 .filter(|s| !s.is_empty())
842 .ok_or_else(|| {
843 ManifestError::at(
844 yaml_path,
845 "embedder.module must be a non-empty string (path or dotted name)",
846 )
847 })?
848 .to_string();
849 let class = map
850 .get("class")
851 .and_then(|v| v.as_str())
852 .filter(|s| valid_identifier(s))
853 .ok_or_else(|| {
854 ManifestError::at(
855 yaml_path,
856 "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
857 )
858 })?
859 .to_string();
860 let kwargs = match map.get("kwargs") {
861 None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
862 Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
863 serde_json::Value::Object(o) => o,
864 _ => {
865 return Err(ManifestError::at(
866 yaml_path,
867 "embedder.kwargs must be a mapping",
868 ))
869 }
870 },
871 Some(_) => {
872 return Err(ManifestError::at(
873 yaml_path,
874 "embedder.kwargs must be a mapping",
875 ))
876 }
877 };
878 Ok(Some(EmbedderConfig {
879 module,
880 class,
881 kwargs,
882 }))
883}
884
885fn build_builtins(
886 raw: Option<&serde_yaml::Value>,
887 yaml_path: &Path,
888) -> Result<BuiltinsConfig, ManifestError> {
889 let Some(raw) = raw else {
890 return Ok(BuiltinsConfig::default());
891 };
892 if matches!(raw, serde_yaml::Value::Null) {
893 return Ok(BuiltinsConfig::default());
894 }
895 let map = raw
896 .as_mapping()
897 .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
898 check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
899 let mut cfg = BuiltinsConfig::default();
900 if let Some(v) = map.get("save_graph") {
901 cfg.save_graph = v
902 .as_bool()
903 .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
904 }
905 if let Some(v) = map.get("temp_cleanup") {
906 let s = v.as_str().ok_or_else(|| {
907 ManifestError::at(
908 yaml_path,
909 format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
910 )
911 })?;
912 cfg.temp_cleanup = match s {
913 "never" => TempCleanup::Never,
914 "on_overview" => TempCleanup::OnOverview,
915 other => {
916 return Err(ManifestError::at(
917 yaml_path,
918 format!(
919 "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
920 ),
921 ))
922 }
923 };
924 }
925 Ok(cfg)
926}
927
928fn valid_identifier(s: &str) -> bool {
929 let mut chars = s.chars();
930 match chars.next() {
931 Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
932 _ => return false,
933 }
934 chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
935}
936
937fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
938 serde_json::to_value(&v)
939 .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
940}
941
942#[derive(Debug, Deserialize)]
943struct _Reserved;
944
945#[cfg(test)]
946mod tests {
947 use super::*;
948
949 fn write_tmp(text: &str) -> tempfile::NamedTempFile {
950 let mut f = tempfile::NamedTempFile::new().unwrap();
951 std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
952 f
953 }
954
955 #[test]
956 fn loads_minimal_empty_manifest() {
957 let f = write_tmp("");
958 let m = load(f.path()).unwrap();
959 assert_eq!(m.tools.len(), 0);
960 assert_eq!(m.source_roots.len(), 0);
961 assert!(!m.trust.allow_python_tools);
962 assert!(!m.trust.allow_embedder);
963 assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
964 }
965
966 #[test]
967 fn loads_name_and_instructions() {
968 let f = write_tmp("name: Demo\ninstructions: |\n multi-line\n block\n");
969 let m = load(f.path()).unwrap();
970 assert_eq!(m.name.as_deref(), Some("Demo"));
971 assert!(m.instructions.unwrap().contains("multi-line"));
972 }
973
974 #[test]
975 fn rejects_unknown_top_key() {
976 let f = write_tmp("bogus: 1\n");
977 let err = load(f.path()).unwrap_err();
978 assert!(err.message.contains("unknown top-level"));
979 }
980
981 #[test]
982 fn source_root_string_normalises_to_list() {
983 let f = write_tmp("source_root: ./data\n");
984 let m = load(f.path()).unwrap();
985 assert_eq!(m.source_roots, vec!["./data".to_string()]);
986 }
987
988 #[test]
989 fn source_roots_list_preserved() {
990 let f = write_tmp("source_roots:\n - ./a\n - ./b\n");
991 let m = load(f.path()).unwrap();
992 assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
993 }
994
995 #[test]
996 fn rejects_both_source_root_and_source_roots() {
997 let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
998 assert!(load(f.path()).unwrap_err().message.contains("not both"));
999 }
1000
1001 #[test]
1002 fn cypher_tool_parses() {
1003 let f = write_tmp("tools:\n - name: lookup\n cypher: MATCH (n) RETURN n\n");
1004 let m = load(f.path()).unwrap();
1005 assert_eq!(m.tools.len(), 1);
1006 match &m.tools[0] {
1007 ToolSpec::Cypher(t) => {
1008 assert_eq!(t.name, "lookup");
1009 assert!(t.cypher.contains("MATCH"));
1010 }
1011 _ => panic!("expected cypher tool"),
1012 }
1013 }
1014
1015 #[test]
1016 fn python_tool_parses() {
1017 let f =
1018 write_tmp("tools:\n - name: detail\n python: ./tools.py\n function: detail\n");
1019 let m = load(f.path()).unwrap();
1020 match &m.tools[0] {
1021 ToolSpec::Python(t) => {
1022 assert_eq!(t.python, "./tools.py");
1023 assert_eq!(t.function, "detail");
1024 }
1025 _ => panic!("expected python tool"),
1026 }
1027 }
1028
1029 #[test]
1030 fn rejects_tool_with_both_kinds() {
1031 let f = write_tmp(
1032 "tools:\n - name: x\n cypher: 'MATCH (n) RETURN n'\n python: ./t.py\n function: x\n",
1033 );
1034 assert!(load(f.path())
1035 .unwrap_err()
1036 .message
1037 .contains("multiple kinds"));
1038 }
1039
1040 #[test]
1041 fn rejects_tool_with_no_kind() {
1042 let f = write_tmp("tools:\n - name: x\n");
1043 assert!(load(f.path())
1044 .unwrap_err()
1045 .message
1046 .contains("needs exactly one"));
1047 }
1048
1049 #[test]
1050 fn rejects_duplicate_tool_names() {
1051 let f = write_tmp(
1052 "tools:\n - name: same\n cypher: 'MATCH (n) RETURN n'\n - name: same\n cypher: 'MATCH (m) RETURN m'\n",
1053 );
1054 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1055 }
1056
1057 #[test]
1060 fn bundled_override_with_description_parses() {
1061 let f =
1062 write_tmp("tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n");
1063 let m = load(f.path()).unwrap();
1064 assert_eq!(m.tools.len(), 1);
1065 match &m.tools[0] {
1066 ToolSpec::Bundled(b) => {
1067 assert_eq!(b.name, "repo_management");
1068 assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1069 assert!(!b.hidden);
1070 }
1071 _ => panic!("expected bundled override"),
1072 }
1073 }
1074
1075 #[test]
1076 fn bundled_override_with_hidden_parses() {
1077 let f = write_tmp("tools:\n - bundled: ping\n hidden: true\n");
1078 let m = load(f.path()).unwrap();
1079 match &m.tools[0] {
1080 ToolSpec::Bundled(b) => {
1081 assert_eq!(b.name, "ping");
1082 assert!(b.hidden);
1083 assert!(b.description.is_none());
1084 }
1085 _ => panic!("expected bundled override"),
1086 }
1087 }
1088
1089 #[test]
1090 fn bundled_override_alongside_cypher_tools_parses() {
1091 let f = write_tmp(
1092 "tools:\n\
1093 \x20\x20- bundled: cypher_query\n\
1094 \x20\x20\x20\x20description: \"Custom server description\"\n\
1095 \x20\x20- name: lookup\n\
1096 \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1097 );
1098 let m = load(f.path()).unwrap();
1099 assert_eq!(m.tools.len(), 2);
1100 assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1101 assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1102 }
1103
1104 #[test]
1105 fn rejects_bundled_with_cypher_kind() {
1106 let f =
1107 write_tmp("tools:\n - bundled: cypher_query\n cypher: \"MATCH (n) RETURN n\"\n");
1108 let err = load(f.path()).unwrap_err();
1109 assert!(
1110 err.message.contains("multiple kinds"),
1111 "got: {}",
1112 err.message
1113 );
1114 }
1115
1116 #[test]
1117 fn rejects_bundled_with_name_field() {
1118 let f = write_tmp("tools:\n - bundled: ping\n name: ping\n");
1119 let err = load(f.path()).unwrap_err();
1120 assert!(
1121 err.message.contains("cannot set `name:`"),
1122 "got: {}",
1123 err.message
1124 );
1125 }
1126
1127 #[test]
1128 fn rejects_bundled_with_parameters_field() {
1129 let f =
1130 write_tmp("tools:\n - bundled: cypher_query\n parameters:\n type: object\n");
1131 let err = load(f.path()).unwrap_err();
1132 assert!(
1133 err.message.contains("cannot set `parameters:`"),
1134 "got: {}",
1135 err.message
1136 );
1137 }
1138
1139 #[test]
1140 fn rejects_bundled_with_non_bool_hidden() {
1141 let f = write_tmp("tools:\n - bundled: ping\n hidden: yes-please\n");
1142 let err = load(f.path()).unwrap_err();
1143 assert!(
1144 err.message.contains("hidden must be a bool"),
1145 "got: {}",
1146 err.message
1147 );
1148 }
1149
1150 #[test]
1151 fn rejects_hidden_on_cypher_tool() {
1152 let f = write_tmp(
1153 "tools:\n - name: lookup\n cypher: \"MATCH (n) RETURN n\"\n hidden: true\n",
1154 );
1155 let err = load(f.path()).unwrap_err();
1156 assert!(
1157 err.message
1158 .contains("`hidden:` is only valid on `bundled:` override entries"),
1159 "got: {}",
1160 err.message
1161 );
1162 }
1163
1164 #[test]
1165 fn rejects_duplicate_bundled_overrides() {
1166 let f = write_tmp(
1170 "tools:\n - bundled: ping\n hidden: true\n - bundled: ping\n description: \"x\"\n",
1171 );
1172 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1173 }
1174
1175 #[test]
1176 fn rejects_bundled_with_invalid_identifier() {
1177 let f = write_tmp("tools:\n - bundled: \"123-bad\"\n hidden: true\n");
1178 let err = load(f.path()).unwrap_err();
1179 assert!(
1180 err.message.contains("must be a string"),
1181 "got: {}",
1182 err.message
1183 );
1184 }
1185
1186 #[test]
1187 fn bundled_override_to_json_shape() {
1188 let f = write_tmp(
1189 "tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n hidden: false\n",
1190 );
1191 let m = load(f.path()).unwrap();
1192 let v = m.to_json();
1193 assert_eq!(v["tools"][0]["kind"], "bundled");
1194 assert_eq!(v["tools"][0]["name"], "repo_management");
1195 assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1196 assert_eq!(v["tools"][0]["hidden"], false);
1197 }
1198
1199 #[test]
1200 fn embedder_parses() {
1201 let f = write_tmp(
1202 "embedder:\n module: ./e.py\n class: GraphEmbedder\n kwargs:\n cooldown: 900\n",
1203 );
1204 let m = load(f.path()).unwrap();
1205 let e = m.embedder.unwrap();
1206 assert_eq!(e.module, "./e.py");
1207 assert_eq!(e.class, "GraphEmbedder");
1208 assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1209 }
1210
1211 #[test]
1212 fn builtins_parses_temp_cleanup() {
1213 let f = write_tmp("builtins:\n save_graph: true\n temp_cleanup: on_overview\n");
1214 let m = load(f.path()).unwrap();
1215 assert!(m.builtins.save_graph);
1216 assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1217 }
1218
1219 #[test]
1220 fn rejects_invalid_temp_cleanup() {
1221 let f = write_tmp("builtins:\n temp_cleanup: nuke\n");
1222 assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1223 }
1224
1225 #[test]
1226 fn allow_embedder_trust_parses() {
1227 let f = write_tmp("trust:\n allow_embedder: true\n");
1228 let m = load(f.path()).unwrap();
1229 assert!(m.trust.allow_embedder);
1230 }
1231
1232 #[test]
1233 fn allow_query_preprocessor_trust_parses() {
1234 let f = write_tmp("trust:\n allow_query_preprocessor: true\n");
1235 let m = load(f.path()).unwrap();
1236 assert!(m.trust.allow_query_preprocessor);
1237 assert!(!m.trust.allow_embedder);
1238 assert!(!m.trust.allow_python_tools);
1239 }
1240
1241 #[test]
1242 fn allow_query_preprocessor_rejects_non_bool() {
1243 let f = write_tmp("trust:\n allow_query_preprocessor: \"yes\"\n");
1244 let err = load(f.path()).unwrap_err();
1245 assert!(err
1246 .message
1247 .contains("allow_query_preprocessor must be a bool"));
1248 }
1249
1250 #[test]
1251 fn find_sibling_works() {
1252 let dir = tempfile::tempdir().unwrap();
1253 let graph = dir.path().join("demo.kgl");
1254 std::fs::write(&graph, b"\x00").unwrap();
1255 let sibling = dir.path().join("demo_mcp.yaml");
1256 std::fs::write(&sibling, "name: x\n").unwrap();
1257 assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1258 }
1259
1260 #[test]
1261 fn workspace_local_parses() {
1262 let f = write_tmp("workspace:\n kind: local\n root: ./src\n watch: true\n");
1263 let m = load(f.path()).unwrap();
1264 let w = m.workspace.unwrap();
1265 assert_eq!(w.kind, WorkspaceKind::Local);
1266 assert_eq!(w.root.as_deref(), Some("./src"));
1267 assert!(w.watch);
1268 }
1269
1270 #[test]
1271 fn workspace_github_default_kind() {
1272 let f = write_tmp("workspace: {}\n");
1273 let m = load(f.path()).unwrap();
1274 let w = m.workspace.unwrap();
1275 assert_eq!(w.kind, WorkspaceKind::Github);
1276 assert!(w.root.is_none());
1277 assert!(!w.watch);
1278 }
1279
1280 #[test]
1281 fn workspace_local_without_root_errors() {
1282 let f = write_tmp("workspace:\n kind: local\n");
1283 let err = load(f.path()).unwrap_err();
1284 assert!(err.message.contains("requires workspace.root"));
1285 }
1286
1287 #[test]
1288 fn workspace_unknown_key_rejected() {
1289 let f = write_tmp("workspace:\n kind: local\n root: ./x\n bogus: 1\n");
1290 let err = load(f.path()).unwrap_err();
1291 assert!(err.message.contains("unknown workspace keys"));
1292 }
1293
1294 #[test]
1295 fn workspace_invalid_kind_rejected() {
1296 let f = write_tmp("workspace:\n kind: docker\n root: ./x\n");
1297 let err = load(f.path()).unwrap_err();
1298 assert!(err.message.contains("workspace.kind"));
1299 }
1300
1301 #[test]
1302 fn workspace_watch_invalid_for_github() {
1303 let f = write_tmp("workspace:\n kind: github\n watch: true\n");
1304 let err = load(f.path()).unwrap_err();
1305 assert!(err.message.contains("watch is only valid"));
1306 }
1307
1308 #[test]
1309 fn extensions_passthrough_parses() {
1310 let f = write_tmp(
1311 "extensions:\n csv_http_server: true\n csv_http_server_dir: temp/\n arbitrary:\n nested: 1\n",
1312 );
1313 let m = load(f.path()).unwrap();
1314 assert_eq!(
1315 m.extensions
1316 .get("csv_http_server")
1317 .and_then(|v| v.as_bool()),
1318 Some(true)
1319 );
1320 assert_eq!(
1321 m.extensions
1322 .get("csv_http_server_dir")
1323 .and_then(|v| v.as_str()),
1324 Some("temp/")
1325 );
1326 assert_eq!(
1328 m.extensions
1329 .get("arbitrary")
1330 .and_then(|v| v.get("nested"))
1331 .and_then(|v| v.as_i64()),
1332 Some(1)
1333 );
1334 }
1335
1336 #[test]
1337 fn extensions_absent_defaults_to_empty() {
1338 let f = write_tmp("name: x\n");
1339 let m = load(f.path()).unwrap();
1340 assert!(m.extensions.is_empty());
1341 }
1342
1343 #[test]
1344 fn extensions_inner_keys_unvalidated() {
1345 let f = write_tmp(
1349 "extensions:\n whatever_kglite_wants: foo\n some_other_consumer: { a: 1, b: 2 }\n",
1350 );
1351 load(f.path()).unwrap();
1352 }
1353
1354 #[test]
1355 fn extensions_must_be_a_mapping() {
1356 let f = write_tmp("extensions: not-a-mapping\n");
1357 let err = load(f.path()).unwrap_err();
1358 assert!(err.message.contains("extensions must be a mapping"));
1359 }
1360
1361 #[test]
1362 fn env_file_key_parses() {
1363 let f = write_tmp("env_file: ../.env\n");
1364 let m = load(f.path()).unwrap();
1365 assert_eq!(m.env_file.as_deref(), Some("../.env"));
1366 }
1367
1368 #[test]
1369 fn env_file_unset_is_none() {
1370 let f = write_tmp("name: Demo\n");
1371 let m = load(f.path()).unwrap();
1372 assert!(m.env_file.is_none());
1373 }
1374
1375 #[test]
1376 fn find_workspace_works() {
1377 let dir = tempfile::tempdir().unwrap();
1378 let manifest = dir.path().join("workspace_mcp.yaml");
1379 std::fs::write(&manifest, "name: ws\n").unwrap();
1380 assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1381 }
1382
1383 #[test]
1384 fn to_json_shape_is_stable() {
1385 let f = write_tmp(
1386 r#"
1387name: KGLite Codebase
1388source_roots: [src, lib]
1389trust:
1390 allow_embedder: true
1391embedder:
1392 module: kglite.embed
1393 class: SentenceTransformerEmbedder
1394builtins:
1395 save_graph: true
1396 temp_cleanup: on_overview
1397"#,
1398 );
1399 let m = load(f.path()).unwrap();
1400 let actual = m.to_json();
1401 let expected = serde_json::json!({
1402 "yaml_path": f.path().display().to_string(),
1403 "name": "KGLite Codebase",
1404 "instructions": null,
1405 "overview_prefix": null,
1406 "source_roots": ["src", "lib"],
1407 "trust": {
1408 "allow_python_tools": false,
1409 "allow_embedder": true,
1410 "allow_query_preprocessor": false,
1411 },
1412 "tools": [],
1413 "embedder": {
1414 "module": "kglite.embed",
1415 "class": "SentenceTransformerEmbedder",
1416 "kwargs": {},
1417 },
1418 "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
1419 "env_file": null,
1420 "workspace": null,
1421 "extensions": {},
1422 });
1423 assert_eq!(actual, expected);
1424 }
1425
1426 #[test]
1427 fn to_json_round_trips_tools_and_workspace() {
1428 let f = write_tmp(
1429 r#"
1430name: Full Surface
1431source_root: ./src
1432trust:
1433 allow_python_tools: true
1434tools:
1435 - name: nodes_for
1436 cypher: "MATCH (n {name: $name}) RETURN n"
1437 description: "fetch nodes by name"
1438 - name: run_query
1439 python: tools.py
1440 function: run
1441workspace:
1442 kind: local
1443 root: /tmp/ws
1444 watch: true
1445builtins:
1446 save_graph: false
1447env_file: .env.local
1448extensions:
1449 kglite:
1450 flavour: standard
1451"#,
1452 );
1453 let m = load(f.path()).unwrap();
1454 let v = m.to_json();
1455 assert_eq!(v["name"], "Full Surface");
1456 assert_eq!(v["trust"]["allow_python_tools"], true);
1457 assert_eq!(v["workspace"]["kind"], "local");
1458 assert_eq!(v["workspace"]["root"], "/tmp/ws");
1459 assert_eq!(v["workspace"]["watch"], true);
1460 assert_eq!(v["env_file"], ".env.local");
1461 assert_eq!(v["tools"][0]["kind"], "cypher");
1462 assert_eq!(v["tools"][0]["name"], "nodes_for");
1463 assert_eq!(v["tools"][1]["kind"], "python");
1464 assert_eq!(v["tools"][1]["name"], "run_query");
1465 assert_eq!(v["tools"][1]["python"], "tools.py");
1466 assert_eq!(v["tools"][1]["function"], "run");
1467 assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
1468 }
1469}