1#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34 "name",
35 "instructions",
36 "overview_prefix",
37 "source_root",
38 "source_roots",
39 "trust",
40 "tools",
41 "embedder",
42 "builtins",
43 "env_file",
44 "workspace",
45 "extensions",
46];
47const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
48const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
49const ALLOWED_TRUST_KEYS: &[&str] = &[
50 "allow_python_tools",
51 "allow_embedder",
52 "allow_query_preprocessor",
53];
54const ALLOWED_TOOL_KEYS: &[&str] = &[
55 "name",
56 "description",
57 "parameters",
58 "cypher",
59 "python",
60 "function",
61 "bundled",
62 "hidden",
63 "rename",
67];
68const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
69const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
70const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
71
72#[derive(Debug, Error)]
73#[error("{path}: {message}")]
74pub struct ManifestError {
75 pub path: String,
76 pub message: String,
77}
78
79impl ManifestError {
80 pub fn at(path: &Path, message: impl Into<String>) -> Self {
81 Self {
82 path: path.display().to_string(),
83 message: message.into(),
84 }
85 }
86
87 pub fn bare(message: impl Into<String>) -> Self {
88 Self {
89 path: "<manifest>".to_string(),
90 message: message.into(),
91 }
92 }
93}
94
95#[derive(Debug, Default, Clone)]
96pub struct TrustConfig {
97 pub allow_python_tools: bool,
98 pub allow_embedder: bool,
99 pub allow_query_preprocessor: bool,
106}
107
108#[derive(Debug, Clone)]
109pub enum ToolSpec {
110 Cypher(CypherTool),
111 Python(PythonTool),
112 Bundled(BundledOverride),
128}
129
130impl ToolSpec {
131 pub fn name(&self) -> &str {
132 match self {
133 ToolSpec::Cypher(t) => &t.name,
134 ToolSpec::Python(t) => &t.name,
135 ToolSpec::Bundled(t) => &t.name,
136 }
137 }
138}
139
140#[derive(Debug, Clone)]
141pub struct CypherTool {
142 pub name: String,
143 pub cypher: String,
144 pub description: Option<String>,
145 pub parameters: Option<serde_json::Value>,
146}
147
148#[derive(Debug, Clone)]
149pub struct PythonTool {
150 pub name: String,
151 pub python: String,
152 pub function: String,
153 pub description: Option<String>,
154 pub parameters: Option<serde_json::Value>,
155}
156
157#[derive(Debug, Clone)]
158pub struct BundledOverride {
159 pub name: String,
164 pub description: Option<String>,
168 pub hidden: bool,
172 pub rename: Option<String>,
185}
186
187#[derive(Debug, Clone)]
188pub struct EmbedderConfig {
189 pub module: String,
190 pub class: String,
191 pub kwargs: serde_json::Map<String, serde_json::Value>,
192}
193
194#[derive(Debug, Default, Clone)]
195pub struct BuiltinsConfig {
196 pub save_graph: bool,
197 pub temp_cleanup: TempCleanup,
198}
199
200#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
201pub enum TempCleanup {
202 #[default]
203 Never,
204 OnOverview,
205}
206
207impl TempCleanup {
208 pub fn as_str(&self) -> &'static str {
209 match self {
210 TempCleanup::Never => "never",
211 TempCleanup::OnOverview => "on_overview",
212 }
213 }
214}
215
216#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
217pub enum WorkspaceKind {
218 #[default]
221 Github,
222 Local,
225}
226
227impl WorkspaceKind {
228 pub fn as_str(&self) -> &'static str {
229 match self {
230 WorkspaceKind::Github => "github",
231 WorkspaceKind::Local => "local",
232 }
233 }
234}
235
236#[derive(Debug, Clone, Default)]
237pub struct WorkspaceConfig {
238 pub kind: WorkspaceKind,
239 pub root: Option<String>,
242 pub watch: bool,
245 pub applies_to: Option<AppliesTo>,
272}
273
274#[derive(Debug, Clone, PartialEq, Eq)]
280pub enum AppliesTo {
281 Pattern(String),
286 Patterns(Vec<String>),
288}
289
290#[derive(Debug, Clone)]
291pub struct Manifest {
292 pub yaml_path: PathBuf,
293 pub name: Option<String>,
294 pub instructions: Option<String>,
295 pub overview_prefix: Option<String>,
296 pub source_roots: Vec<String>,
297 pub trust: TrustConfig,
298 pub tools: Vec<ToolSpec>,
299 pub embedder: Option<EmbedderConfig>,
300 pub builtins: BuiltinsConfig,
301 pub env_file: Option<String>,
305 pub workspace: Option<WorkspaceConfig>,
309 pub extensions: serde_json::Map<String, serde_json::Value>,
319}
320
321impl Manifest {
322 pub fn to_json(&self) -> serde_json::Value {
332 serde_json::json!({
333 "yaml_path": self.yaml_path.display().to_string(),
334 "name": self.name,
335 "instructions": self.instructions,
336 "overview_prefix": self.overview_prefix,
337 "source_roots": self.source_roots,
338 "trust": {
339 "allow_python_tools": self.trust.allow_python_tools,
340 "allow_embedder": self.trust.allow_embedder,
341 "allow_query_preprocessor": self.trust.allow_query_preprocessor,
342 },
343 "tools": self.tools.iter().map(|t| match t {
344 ToolSpec::Cypher(c) => serde_json::json!({
345 "kind": "cypher",
346 "name": c.name,
347 "cypher": c.cypher,
348 "description": c.description,
349 "parameters": c.parameters,
350 }),
351 ToolSpec::Python(p) => serde_json::json!({
352 "kind": "python",
353 "name": p.name,
354 "python": p.python,
355 "function": p.function,
356 "description": p.description,
357 "parameters": p.parameters,
358 }),
359 ToolSpec::Bundled(b) => serde_json::json!({
360 "kind": "bundled",
361 "name": b.name,
362 "description": b.description,
363 "hidden": b.hidden,
364 "rename": b.rename,
365 }),
366 }).collect::<Vec<_>>(),
367 "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
368 "module": e.module,
369 "class": e.class,
370 "kwargs": e.kwargs,
371 })),
372 "builtins": {
373 "save_graph": self.builtins.save_graph,
374 "temp_cleanup": self.builtins.temp_cleanup.as_str(),
375 },
376 "env_file": self.env_file,
377 "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
378 "kind": w.kind.as_str(),
379 "root": w.root,
380 "watch": w.watch,
381 "applies_to": w.applies_to.as_ref().map(|a| match a {
382 AppliesTo::Pattern(p) => serde_json::Value::String(p.clone()),
383 AppliesTo::Patterns(ps) => serde_json::Value::Array(
384 ps.iter().map(|p| serde_json::Value::String(p.clone())).collect()
385 ),
386 }),
387 })),
388 "extensions": self.extensions,
389 })
390 }
391}
392
393pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
395 let stem = graph_path.file_stem()?;
396 let parent = graph_path.parent()?;
397 let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
398 if candidate.is_file() {
399 Some(candidate)
400 } else {
401 None
402 }
403}
404
405pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
454 let primary = workspace_dir.join("workspace_mcp.yaml");
455 if primary.is_file() {
456 return Some(primary);
457 }
458 let parent = workspace_dir.parent()?;
461 let workspace_resolved = workspace_dir.canonicalize().ok()?;
462 let parent_resolved = parent.canonicalize().ok()?;
463 if parent_resolved == workspace_resolved {
464 return None;
466 }
467 let fallback = parent.join("workspace_mcp.yaml");
468 if !fallback.is_file() {
469 return None;
470 }
471
472 let manifest = match load(&fallback) {
476 Ok(m) => m,
477 Err(e) => {
478 tracing::warn!(
479 manifest = %fallback.display(),
480 error = %e,
481 "parent-walk manifest exists but failed to parse; ignoring"
482 );
483 return None;
484 }
485 };
486 let declared = manifest
487 .workspace
488 .as_ref()
489 .and_then(|w| w.applies_to.as_ref());
490 let Some(declared_applies_to) = declared else {
491 tracing::info!(
492 manifest = %fallback.display(),
493 "parent-walk manifest does not declare workspace.applies_to; \
494 ignoring (set workspace.applies_to: <pattern> to opt in)"
495 );
496 return None;
497 };
498 let Some(basename) = workspace_resolved.file_name().and_then(|n| n.to_str()) else {
502 return None; };
504 let patterns: Vec<&str> = match declared_applies_to {
505 AppliesTo::Pattern(p) => vec![p.as_str()],
506 AppliesTo::Patterns(ps) => ps.iter().map(String::as_str).collect(),
507 };
508 let matched = patterns.iter().any(|pat| {
509 match globset::Glob::new(pat) {
510 Ok(g) => g.compile_matcher().is_match(basename),
511 Err(_) => {
512 false
515 }
516 }
517 });
518 if matched {
519 tracing::info!(
520 workspace_dir = %workspace_dir.display(),
521 manifest = %fallback.display(),
522 "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
523 );
524 Some(fallback)
525 } else {
526 tracing::info!(
527 workspace_dir = %workspace_resolved.display(),
528 manifest = %fallback.display(),
529 basename = %basename,
530 patterns = ?patterns,
531 "parent-walk manifest's workspace.applies_to does not match \
532 this workspace_dir's basename; ignoring"
533 );
534 None
535 }
536}
537
538pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
540 let text = fs::read_to_string(yaml_path)
541 .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
542 let raw: serde_yaml::Value = serde_yaml::from_str(&text)
543 .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
544 let raw = match raw {
545 serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
546 v => v,
547 };
548 let map = raw
549 .as_mapping()
550 .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
551 build(map, yaml_path)
552}
553
554fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
555 check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
556
557 if raw.contains_key("source_root") && raw.contains_key("source_roots") {
558 return Err(ManifestError::at(
559 yaml_path,
560 "specify either source_root (str) or source_roots (list), not both",
561 ));
562 }
563
564 let mut source_roots: Vec<String> = Vec::new();
565 if let Some(v) = raw.get("source_root") {
566 let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
567 ManifestError::at(yaml_path, "source_root must be a non-empty string")
568 })?;
569 source_roots.push(s.to_string());
570 } else if let Some(v) = raw.get("source_roots") {
571 let seq = v.as_sequence().ok_or_else(|| {
572 ManifestError::at(
573 yaml_path,
574 "source_roots must be a list of non-empty strings",
575 )
576 })?;
577 if seq.is_empty() {
578 return Err(ManifestError::at(
579 yaml_path,
580 "source_roots must be non-empty when set",
581 ));
582 }
583 for item in seq {
584 let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
585 ManifestError::at(
586 yaml_path,
587 "source_roots must be a list of non-empty strings",
588 )
589 })?;
590 source_roots.push(s.to_string());
591 }
592 }
593
594 let trust = build_trust(raw.get("trust"), yaml_path)?;
595 let tools = build_tools(raw.get("tools"), yaml_path)?;
596 let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
597 let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
598 let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
599 let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
600
601 Ok(Manifest {
602 yaml_path: yaml_path.to_path_buf(),
603 name: optional_str(raw, "name", yaml_path)?,
604 instructions: optional_str(raw, "instructions", yaml_path)?,
605 overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
606 source_roots,
607 trust,
608 tools,
609 embedder,
610 builtins,
611 env_file: optional_str(raw, "env_file", yaml_path)?,
612 workspace,
613 extensions,
614 })
615}
616
617fn build_extensions(
618 raw: Option<&serde_yaml::Value>,
619 yaml_path: &Path,
620) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
621 let Some(raw) = raw else {
622 return Ok(serde_json::Map::new());
623 };
624 if matches!(raw, serde_yaml::Value::Null) {
625 return Ok(serde_json::Map::new());
626 }
627 if !raw.is_mapping() {
628 return Err(ManifestError::at(
629 yaml_path,
630 "extensions must be a mapping (downstream-binary-specific keys)",
631 ));
632 }
633 match yaml_to_json(raw.clone())? {
634 serde_json::Value::Object(o) => Ok(o),
635 _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
636 }
637}
638
639fn build_workspace(
640 raw: Option<&serde_yaml::Value>,
641 yaml_path: &Path,
642) -> Result<Option<WorkspaceConfig>, ManifestError> {
643 let Some(raw) = raw else { return Ok(None) };
644 if matches!(raw, serde_yaml::Value::Null) {
645 return Ok(None);
646 }
647 let map = raw
648 .as_mapping()
649 .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
650 check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
651 let kind = match map.get("kind") {
652 None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
653 Some(serde_yaml::Value::String(s)) => match s.as_str() {
654 "github" => WorkspaceKind::Github,
655 "local" => WorkspaceKind::Local,
656 other => {
657 return Err(ManifestError::at(
658 yaml_path,
659 format!(
660 "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
661 ),
662 ));
663 }
664 },
665 Some(_) => {
666 return Err(ManifestError::at(
667 yaml_path,
668 format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
669 ))
670 }
671 };
672 let root = match map.get("root") {
673 None | Some(serde_yaml::Value::Null) => None,
674 Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
675 _ => {
676 return Err(ManifestError::at(
677 yaml_path,
678 "workspace.root must be a non-empty string",
679 ))
680 }
681 };
682 let watch = match map.get("watch") {
683 None | Some(serde_yaml::Value::Null) => false,
684 Some(serde_yaml::Value::Bool(b)) => *b,
685 Some(_) => {
686 return Err(ManifestError::at(
687 yaml_path,
688 "workspace.watch must be a bool",
689 ))
690 }
691 };
692 let applies_to =
693 match map.get("applies_to") {
694 None | Some(serde_yaml::Value::Null) => None,
695 Some(serde_yaml::Value::String(s)) => {
696 Some(AppliesTo::Pattern(parse_applies_to_pattern(s, yaml_path)?))
697 }
698 Some(serde_yaml::Value::Sequence(seq)) => {
699 if seq.is_empty() {
700 return Err(ManifestError::at(
701 yaml_path,
702 "workspace.applies_to: list must contain at least one pattern",
703 ));
704 }
705 let mut patterns = Vec::with_capacity(seq.len());
706 for (i, item) in seq.iter().enumerate() {
707 let s = item.as_str().ok_or_else(|| {
708 ManifestError::at(
709 yaml_path,
710 format!("workspace.applies_to[{i}] must be a string"),
711 )
712 })?;
713 let cleaned = parse_applies_to_pattern(s, yaml_path).map_err(|e| {
714 ManifestError::at(
715 yaml_path,
716 format!("workspace.applies_to[{i}]: {}", e.message),
717 )
718 })?;
719 patterns.push(cleaned);
720 }
721 Some(AppliesTo::Patterns(patterns))
722 }
723 _ => return Err(ManifestError::at(
724 yaml_path,
725 "workspace.applies_to must be a non-empty string (a pattern) or a list of patterns",
726 )),
727 };
728 if kind == WorkspaceKind::Local && root.is_none() {
729 return Err(ManifestError::at(
730 yaml_path,
731 "workspace.kind: local requires workspace.root to be set",
732 ));
733 }
734 if kind == WorkspaceKind::Github && watch {
735 return Err(ManifestError::at(
736 yaml_path,
737 "workspace.watch is only valid with workspace.kind: local",
738 ));
739 }
740 Ok(Some(WorkspaceConfig {
741 kind,
742 root,
743 watch,
744 applies_to,
745 }))
746}
747
748fn parse_applies_to_pattern(raw: &str, yaml_path: &Path) -> Result<String, ManifestError> {
757 let trimmed = raw.trim();
758 if trimmed.is_empty() {
759 return Err(ManifestError::at(
760 yaml_path,
761 "workspace.applies_to: pattern must not be empty",
762 ));
763 }
764 let stripped = trimmed.strip_prefix("./").unwrap_or(trimmed);
768 if stripped.is_empty() {
769 return Err(ManifestError::at(
770 yaml_path,
771 "workspace.applies_to: pattern must not be empty after stripping `./` prefix",
772 ));
773 }
774 if stripped.contains('/') {
775 return Err(ManifestError::at(
776 yaml_path,
777 format!(
778 "workspace.applies_to: pattern {raw:?} must be a single path segment \
779 (no embedded `/`) — parent-walk discovery is bounded to one level"
780 ),
781 ));
782 }
783 if stripped == ".." || stripped.starts_with("../") {
784 return Err(ManifestError::at(
785 yaml_path,
786 format!("workspace.applies_to: pattern {raw:?} must not contain `..`"),
787 ));
788 }
789 if Path::new(stripped).is_absolute() {
790 return Err(ManifestError::at(
791 yaml_path,
792 format!("workspace.applies_to: pattern {raw:?} must be relative, not absolute"),
793 ));
794 }
795 globset::Glob::new(stripped).map_err(|e| {
799 ManifestError::at(
800 yaml_path,
801 format!("workspace.applies_to: invalid glob pattern {raw:?}: {e}"),
802 )
803 })?;
804 Ok(stripped.to_string())
805}
806
807fn check_keys(
808 map: &serde_yaml::Mapping,
809 allowed: &[&str],
810 label: &str,
811 yaml_path: &Path,
812) -> Result<(), ManifestError> {
813 let mut unknown: Vec<String> = Vec::new();
814 for (k, _) in map {
815 let key = k.as_str().unwrap_or("<non-string-key>");
816 if !allowed.contains(&key) {
817 unknown.push(key.to_string());
818 }
819 }
820 if !unknown.is_empty() {
821 unknown.sort();
822 return Err(ManifestError::at(
823 yaml_path,
824 format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
825 ));
826 }
827 Ok(())
828}
829
830fn optional_str(
831 raw: &serde_yaml::Mapping,
832 key: &str,
833 yaml_path: &Path,
834) -> Result<Option<String>, ManifestError> {
835 match raw.get(key) {
836 None | Some(serde_yaml::Value::Null) => Ok(None),
837 Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
838 Some(_) => Err(ManifestError::at(
839 yaml_path,
840 format!("{key} must be a string"),
841 )),
842 }
843}
844
845fn build_trust(
846 raw: Option<&serde_yaml::Value>,
847 yaml_path: &Path,
848) -> Result<TrustConfig, ManifestError> {
849 let Some(raw) = raw else {
850 return Ok(TrustConfig::default());
851 };
852 let map = raw
853 .as_mapping()
854 .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
855 check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
856 let mut cfg = TrustConfig::default();
857 if let Some(v) = map.get("allow_python_tools") {
858 cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
859 ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
860 })?;
861 }
862 if let Some(v) = map.get("allow_embedder") {
863 cfg.allow_embedder = v
864 .as_bool()
865 .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
866 }
867 if let Some(v) = map.get("allow_query_preprocessor") {
868 cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
869 ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
870 })?;
871 }
872 Ok(cfg)
873}
874
875fn build_tools(
876 raw: Option<&serde_yaml::Value>,
877 yaml_path: &Path,
878) -> Result<Vec<ToolSpec>, ManifestError> {
879 let Some(raw) = raw else {
880 return Ok(Vec::new());
881 };
882 let seq = raw
883 .as_sequence()
884 .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
885 let mut tools: Vec<ToolSpec> = Vec::new();
886 let mut seen: BTreeMap<String, ()> = BTreeMap::new();
887 for (i, entry) in seq.iter().enumerate() {
888 let tool = build_tool(entry, i, yaml_path)?;
889 let name = tool.name().to_string();
890 if seen.insert(name.clone(), ()).is_some() {
891 return Err(ManifestError::at(
892 yaml_path,
893 format!("duplicate tool name: {name:?}"),
894 ));
895 }
896 tools.push(tool);
897 }
898 Ok(tools)
899}
900
901fn build_tool(
902 entry: &serde_yaml::Value,
903 idx: usize,
904 yaml_path: &Path,
905) -> Result<ToolSpec, ManifestError> {
906 let map = entry
907 .as_mapping()
908 .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
909 check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
910
911 let has_cypher = map.contains_key("cypher");
916 let has_python = map.contains_key("python");
917 let has_bundled = map.contains_key("bundled");
918 let kinds_present: Vec<&str> = [
919 ("cypher", has_cypher),
920 ("python", has_python),
921 ("bundled", has_bundled),
922 ]
923 .into_iter()
924 .filter(|(_, p)| *p)
925 .map(|(k, _)| k)
926 .collect();
927 if kinds_present.is_empty() {
928 return Err(ManifestError::at(
929 yaml_path,
930 format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
931 ));
932 }
933 if kinds_present.len() > 1 {
934 return Err(ManifestError::at(
935 yaml_path,
936 format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
937 ));
938 }
939
940 if has_bundled {
945 return build_bundled_override(map, idx, yaml_path);
946 }
947
948 let name = map
949 .get("name")
950 .and_then(|v| v.as_str())
951 .filter(|s| valid_identifier(s))
952 .ok_or_else(|| {
953 ManifestError::at(
954 yaml_path,
955 format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
956 )
957 })?
958 .to_string();
959
960 if map.contains_key("hidden") {
964 return Err(ManifestError::at(
965 yaml_path,
966 format!(
967 "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
968 ),
969 ));
970 }
971
972 let description = match map.get("description") {
973 None | Some(serde_yaml::Value::Null) => None,
974 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
975 Some(_) => {
976 return Err(ManifestError::at(
977 yaml_path,
978 format!("tools[{idx}] ({name:?}).description must be a string"),
979 ))
980 }
981 };
982
983 let parameters = match map.get("parameters") {
984 None | Some(serde_yaml::Value::Null) => None,
985 Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
986 Some(_) => {
987 return Err(ManifestError::at(
988 yaml_path,
989 format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
990 ))
991 }
992 };
993
994 if has_cypher {
995 let cypher = map
996 .get("cypher")
997 .and_then(|v| v.as_str())
998 .filter(|s| !s.trim().is_empty())
999 .ok_or_else(|| {
1000 ManifestError::at(
1001 yaml_path,
1002 format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
1003 )
1004 })?
1005 .to_string();
1006 return Ok(ToolSpec::Cypher(CypherTool {
1007 name,
1008 cypher,
1009 description,
1010 parameters,
1011 }));
1012 }
1013
1014 let python = map
1016 .get("python")
1017 .and_then(|v| v.as_str())
1018 .filter(|s| !s.is_empty())
1019 .ok_or_else(|| {
1020 ManifestError::at(
1021 yaml_path,
1022 format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
1023 )
1024 })?
1025 .to_string();
1026 let function = map
1027 .get("function")
1028 .and_then(|v| v.as_str())
1029 .filter(|s| valid_identifier(s))
1030 .ok_or_else(|| {
1031 ManifestError::at(
1032 yaml_path,
1033 format!(
1034 "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
1035 ),
1036 )
1037 })?
1038 .to_string();
1039 Ok(ToolSpec::Python(PythonTool {
1040 name,
1041 python,
1042 function,
1043 description,
1044 parameters,
1045 }))
1046}
1047
1048fn build_bundled_override(
1052 map: &serde_yaml::Mapping,
1053 idx: usize,
1054 yaml_path: &Path,
1055) -> Result<ToolSpec, ManifestError> {
1056 let name = map
1057 .get("bundled")
1058 .and_then(|v| v.as_str())
1059 .filter(|s| valid_identifier(s))
1060 .ok_or_else(|| {
1061 ManifestError::at(
1062 yaml_path,
1063 format!(
1064 "tools[{idx}] `bundled:` must be a string naming a bundled tool \
1065 (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
1066 ),
1067 )
1068 })?
1069 .to_string();
1070
1071 for forbidden in ["name", "parameters", "function"] {
1076 if map.contains_key(forbidden) {
1077 return Err(ManifestError::at(
1078 yaml_path,
1079 format!(
1080 "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
1081 (only `description:`, `hidden:`, and `rename:` are permitted on overrides)"
1082 ),
1083 ));
1084 }
1085 }
1086
1087 let description = match map.get("description") {
1088 None | Some(serde_yaml::Value::Null) => None,
1089 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1090 Some(_) => {
1091 return Err(ManifestError::at(
1092 yaml_path,
1093 format!("tools[{idx}] bundled override {name:?}.description must be a string"),
1094 ))
1095 }
1096 };
1097
1098 let hidden = match map.get("hidden") {
1099 None | Some(serde_yaml::Value::Null) => false,
1100 Some(serde_yaml::Value::Bool(b)) => *b,
1101 Some(_) => {
1102 return Err(ManifestError::at(
1103 yaml_path,
1104 format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
1105 ))
1106 }
1107 };
1108
1109 let rename = match map.get("rename") {
1114 None | Some(serde_yaml::Value::Null) => None,
1115 Some(serde_yaml::Value::String(s)) => {
1116 if !valid_identifier(s) {
1117 return Err(ManifestError::at(
1118 yaml_path,
1119 format!(
1120 "tools[{idx}] bundled override {name:?}.rename must be a valid identifier \
1121 (^[a-zA-Z_][a-zA-Z0-9_]*$), got {s:?}"
1122 ),
1123 ));
1124 }
1125 Some(s.clone())
1126 }
1127 Some(_) => {
1128 return Err(ManifestError::at(
1129 yaml_path,
1130 format!("tools[{idx}] bundled override {name:?}.rename must be a string"),
1131 ))
1132 }
1133 };
1134
1135 Ok(ToolSpec::Bundled(BundledOverride {
1136 name,
1137 description,
1138 hidden,
1139 rename,
1140 }))
1141}
1142
1143fn build_embedder(
1144 raw: Option<&serde_yaml::Value>,
1145 yaml_path: &Path,
1146) -> Result<Option<EmbedderConfig>, ManifestError> {
1147 let Some(raw) = raw else { return Ok(None) };
1148 if matches!(raw, serde_yaml::Value::Null) {
1149 return Ok(None);
1150 }
1151 let map = raw
1152 .as_mapping()
1153 .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
1154 check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
1155 let module = map
1156 .get("module")
1157 .and_then(|v| v.as_str())
1158 .filter(|s| !s.is_empty())
1159 .ok_or_else(|| {
1160 ManifestError::at(
1161 yaml_path,
1162 "embedder.module must be a non-empty string (path or dotted name)",
1163 )
1164 })?
1165 .to_string();
1166 let class = map
1167 .get("class")
1168 .and_then(|v| v.as_str())
1169 .filter(|s| valid_identifier(s))
1170 .ok_or_else(|| {
1171 ManifestError::at(
1172 yaml_path,
1173 "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1174 )
1175 })?
1176 .to_string();
1177 let kwargs = match map.get("kwargs") {
1178 None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1179 Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1180 serde_json::Value::Object(o) => o,
1181 _ => {
1182 return Err(ManifestError::at(
1183 yaml_path,
1184 "embedder.kwargs must be a mapping",
1185 ))
1186 }
1187 },
1188 Some(_) => {
1189 return Err(ManifestError::at(
1190 yaml_path,
1191 "embedder.kwargs must be a mapping",
1192 ))
1193 }
1194 };
1195 Ok(Some(EmbedderConfig {
1196 module,
1197 class,
1198 kwargs,
1199 }))
1200}
1201
1202fn build_builtins(
1203 raw: Option<&serde_yaml::Value>,
1204 yaml_path: &Path,
1205) -> Result<BuiltinsConfig, ManifestError> {
1206 let Some(raw) = raw else {
1207 return Ok(BuiltinsConfig::default());
1208 };
1209 if matches!(raw, serde_yaml::Value::Null) {
1210 return Ok(BuiltinsConfig::default());
1211 }
1212 let map = raw
1213 .as_mapping()
1214 .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1215 check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1216 let mut cfg = BuiltinsConfig::default();
1217 if let Some(v) = map.get("save_graph") {
1218 cfg.save_graph = v
1219 .as_bool()
1220 .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1221 }
1222 if let Some(v) = map.get("temp_cleanup") {
1223 let s = v.as_str().ok_or_else(|| {
1224 ManifestError::at(
1225 yaml_path,
1226 format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1227 )
1228 })?;
1229 cfg.temp_cleanup = match s {
1230 "never" => TempCleanup::Never,
1231 "on_overview" => TempCleanup::OnOverview,
1232 other => {
1233 return Err(ManifestError::at(
1234 yaml_path,
1235 format!(
1236 "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1237 ),
1238 ))
1239 }
1240 };
1241 }
1242 Ok(cfg)
1243}
1244
1245fn valid_identifier(s: &str) -> bool {
1246 let mut chars = s.chars();
1247 match chars.next() {
1248 Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1249 _ => return false,
1250 }
1251 chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1252}
1253
1254fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1255 serde_json::to_value(&v)
1256 .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1257}
1258
1259#[derive(Debug, Deserialize)]
1260struct _Reserved;
1261
1262#[cfg(test)]
1263mod tests {
1264 use super::*;
1265
1266 fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1267 let mut f = tempfile::NamedTempFile::new().unwrap();
1268 std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1269 f
1270 }
1271
1272 #[test]
1273 fn loads_minimal_empty_manifest() {
1274 let f = write_tmp("");
1275 let m = load(f.path()).unwrap();
1276 assert_eq!(m.tools.len(), 0);
1277 assert_eq!(m.source_roots.len(), 0);
1278 assert!(!m.trust.allow_python_tools);
1279 assert!(!m.trust.allow_embedder);
1280 assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1281 }
1282
1283 #[test]
1284 fn loads_name_and_instructions() {
1285 let f = write_tmp("name: Demo\ninstructions: |\n multi-line\n block\n");
1286 let m = load(f.path()).unwrap();
1287 assert_eq!(m.name.as_deref(), Some("Demo"));
1288 assert!(m.instructions.unwrap().contains("multi-line"));
1289 }
1290
1291 #[test]
1292 fn rejects_unknown_top_key() {
1293 let f = write_tmp("bogus: 1\n");
1294 let err = load(f.path()).unwrap_err();
1295 assert!(err.message.contains("unknown top-level"));
1296 }
1297
1298 #[test]
1299 fn source_root_string_normalises_to_list() {
1300 let f = write_tmp("source_root: ./data\n");
1301 let m = load(f.path()).unwrap();
1302 assert_eq!(m.source_roots, vec!["./data".to_string()]);
1303 }
1304
1305 #[test]
1306 fn source_roots_list_preserved() {
1307 let f = write_tmp("source_roots:\n - ./a\n - ./b\n");
1308 let m = load(f.path()).unwrap();
1309 assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1310 }
1311
1312 #[test]
1313 fn rejects_both_source_root_and_source_roots() {
1314 let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1315 assert!(load(f.path()).unwrap_err().message.contains("not both"));
1316 }
1317
1318 #[test]
1319 fn cypher_tool_parses() {
1320 let f = write_tmp("tools:\n - name: lookup\n cypher: MATCH (n) RETURN n\n");
1321 let m = load(f.path()).unwrap();
1322 assert_eq!(m.tools.len(), 1);
1323 match &m.tools[0] {
1324 ToolSpec::Cypher(t) => {
1325 assert_eq!(t.name, "lookup");
1326 assert!(t.cypher.contains("MATCH"));
1327 }
1328 _ => panic!("expected cypher tool"),
1329 }
1330 }
1331
1332 #[test]
1333 fn python_tool_parses() {
1334 let f =
1335 write_tmp("tools:\n - name: detail\n python: ./tools.py\n function: detail\n");
1336 let m = load(f.path()).unwrap();
1337 match &m.tools[0] {
1338 ToolSpec::Python(t) => {
1339 assert_eq!(t.python, "./tools.py");
1340 assert_eq!(t.function, "detail");
1341 }
1342 _ => panic!("expected python tool"),
1343 }
1344 }
1345
1346 #[test]
1347 fn rejects_tool_with_both_kinds() {
1348 let f = write_tmp(
1349 "tools:\n - name: x\n cypher: 'MATCH (n) RETURN n'\n python: ./t.py\n function: x\n",
1350 );
1351 assert!(load(f.path())
1352 .unwrap_err()
1353 .message
1354 .contains("multiple kinds"));
1355 }
1356
1357 #[test]
1358 fn rejects_tool_with_no_kind() {
1359 let f = write_tmp("tools:\n - name: x\n");
1360 assert!(load(f.path())
1361 .unwrap_err()
1362 .message
1363 .contains("needs exactly one"));
1364 }
1365
1366 #[test]
1367 fn rejects_duplicate_tool_names() {
1368 let f = write_tmp(
1369 "tools:\n - name: same\n cypher: 'MATCH (n) RETURN n'\n - name: same\n cypher: 'MATCH (m) RETURN m'\n",
1370 );
1371 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1372 }
1373
1374 #[test]
1377 fn bundled_override_with_description_parses() {
1378 let f =
1379 write_tmp("tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n");
1380 let m = load(f.path()).unwrap();
1381 assert_eq!(m.tools.len(), 1);
1382 match &m.tools[0] {
1383 ToolSpec::Bundled(b) => {
1384 assert_eq!(b.name, "repo_management");
1385 assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1386 assert!(!b.hidden);
1387 }
1388 _ => panic!("expected bundled override"),
1389 }
1390 }
1391
1392 #[test]
1393 fn bundled_override_with_hidden_parses() {
1394 let f = write_tmp("tools:\n - bundled: ping\n hidden: true\n");
1395 let m = load(f.path()).unwrap();
1396 match &m.tools[0] {
1397 ToolSpec::Bundled(b) => {
1398 assert_eq!(b.name, "ping");
1399 assert!(b.hidden);
1400 assert!(b.description.is_none());
1401 }
1402 _ => panic!("expected bundled override"),
1403 }
1404 }
1405
1406 #[test]
1407 fn bundled_override_alongside_cypher_tools_parses() {
1408 let f = write_tmp(
1409 "tools:\n\
1410 \x20\x20- bundled: cypher_query\n\
1411 \x20\x20\x20\x20description: \"Custom server description\"\n\
1412 \x20\x20- name: lookup\n\
1413 \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1414 );
1415 let m = load(f.path()).unwrap();
1416 assert_eq!(m.tools.len(), 2);
1417 assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1418 assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1419 }
1420
1421 #[test]
1422 fn rejects_bundled_with_cypher_kind() {
1423 let f =
1424 write_tmp("tools:\n - bundled: cypher_query\n cypher: \"MATCH (n) RETURN n\"\n");
1425 let err = load(f.path()).unwrap_err();
1426 assert!(
1427 err.message.contains("multiple kinds"),
1428 "got: {}",
1429 err.message
1430 );
1431 }
1432
1433 #[test]
1434 fn rejects_bundled_with_name_field() {
1435 let f = write_tmp("tools:\n - bundled: ping\n name: ping\n");
1436 let err = load(f.path()).unwrap_err();
1437 assert!(
1438 err.message.contains("cannot set `name:`"),
1439 "got: {}",
1440 err.message
1441 );
1442 }
1443
1444 #[test]
1445 fn rejects_bundled_with_parameters_field() {
1446 let f =
1447 write_tmp("tools:\n - bundled: cypher_query\n parameters:\n type: object\n");
1448 let err = load(f.path()).unwrap_err();
1449 assert!(
1450 err.message.contains("cannot set `parameters:`"),
1451 "got: {}",
1452 err.message
1453 );
1454 }
1455
1456 #[test]
1457 fn rejects_bundled_with_non_bool_hidden() {
1458 let f = write_tmp("tools:\n - bundled: ping\n hidden: yes-please\n");
1459 let err = load(f.path()).unwrap_err();
1460 assert!(
1461 err.message.contains("hidden must be a bool"),
1462 "got: {}",
1463 err.message
1464 );
1465 }
1466
1467 #[test]
1468 fn rejects_hidden_on_cypher_tool() {
1469 let f = write_tmp(
1470 "tools:\n - name: lookup\n cypher: \"MATCH (n) RETURN n\"\n hidden: true\n",
1471 );
1472 let err = load(f.path()).unwrap_err();
1473 assert!(
1474 err.message
1475 .contains("`hidden:` is only valid on `bundled:` override entries"),
1476 "got: {}",
1477 err.message
1478 );
1479 }
1480
1481 #[test]
1482 fn rejects_duplicate_bundled_overrides() {
1483 let f = write_tmp(
1487 "tools:\n - bundled: ping\n hidden: true\n - bundled: ping\n description: \"x\"\n",
1488 );
1489 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1490 }
1491
1492 #[test]
1493 fn rejects_bundled_with_invalid_identifier() {
1494 let f = write_tmp("tools:\n - bundled: \"123-bad\"\n hidden: true\n");
1495 let err = load(f.path()).unwrap_err();
1496 assert!(
1497 err.message.contains("must be a string"),
1498 "got: {}",
1499 err.message
1500 );
1501 }
1502
1503 #[test]
1505 fn bundled_rename_parses_when_valid_identifier() {
1506 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n");
1507 let m = load(f.path()).unwrap();
1508 match &m.tools[0] {
1509 ToolSpec::Bundled(b) => {
1510 assert_eq!(b.name, "cypher_query");
1511 assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1512 assert!(!b.hidden);
1513 assert!(b.description.is_none());
1514 }
1515 _ => panic!("expected bundled override"),
1516 }
1517 }
1518
1519 #[test]
1520 fn bundled_rename_alongside_description_parses() {
1521 let f = write_tmp(
1522 "tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n description: \"Legal-corpus cypher\"\n",
1523 );
1524 let m = load(f.path()).unwrap();
1525 match &m.tools[0] {
1526 ToolSpec::Bundled(b) => {
1527 assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1528 assert_eq!(b.description.as_deref(), Some("Legal-corpus cypher"));
1529 }
1530 _ => panic!("expected bundled override"),
1531 }
1532 }
1533
1534 #[test]
1535 fn bundled_rename_defaults_to_none() {
1536 let f = write_tmp("tools:\n - bundled: cypher_query\n description: \"x\"\n");
1537 let m = load(f.path()).unwrap();
1538 match &m.tools[0] {
1539 ToolSpec::Bundled(b) => assert!(b.rename.is_none()),
1540 _ => panic!("expected bundled override"),
1541 }
1542 }
1543
1544 #[test]
1545 fn rejects_bundled_rename_with_invalid_identifier() {
1546 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: \"123-bad\"\n");
1547 let err = load(f.path()).unwrap_err();
1548 assert!(
1549 err.message.contains("rename must be a valid identifier"),
1550 "got: {}",
1551 err.message
1552 );
1553 }
1554
1555 #[test]
1556 fn rejects_bundled_rename_with_non_string_value() {
1557 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: 42\n");
1558 let err = load(f.path()).unwrap_err();
1559 assert!(
1560 err.message.contains("rename must be a string"),
1561 "got: {}",
1562 err.message
1563 );
1564 }
1565
1566 #[test]
1567 fn bundled_rename_serialises_to_json() {
1568 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n");
1569 let m = load(f.path()).unwrap();
1570 let json = m.to_json();
1571 let tools = json.get("tools").and_then(|t| t.as_array()).unwrap();
1572 let entry = &tools[0];
1573 assert_eq!(entry.get("kind").and_then(|v| v.as_str()), Some("bundled"));
1574 assert_eq!(
1575 entry.get("name").and_then(|v| v.as_str()),
1576 Some("cypher_query")
1577 );
1578 assert_eq!(
1579 entry.get("rename").and_then(|v| v.as_str()),
1580 Some("legal_cypher_query")
1581 );
1582 }
1583
1584 #[test]
1585 fn bundled_override_to_json_shape() {
1586 let f = write_tmp(
1587 "tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n hidden: false\n",
1588 );
1589 let m = load(f.path()).unwrap();
1590 let v = m.to_json();
1591 assert_eq!(v["tools"][0]["kind"], "bundled");
1592 assert_eq!(v["tools"][0]["name"], "repo_management");
1593 assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1594 assert_eq!(v["tools"][0]["hidden"], false);
1595 }
1596
1597 #[test]
1598 fn embedder_parses() {
1599 let f = write_tmp(
1600 "embedder:\n module: ./e.py\n class: GraphEmbedder\n kwargs:\n cooldown: 900\n",
1601 );
1602 let m = load(f.path()).unwrap();
1603 let e = m.embedder.unwrap();
1604 assert_eq!(e.module, "./e.py");
1605 assert_eq!(e.class, "GraphEmbedder");
1606 assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1607 }
1608
1609 #[test]
1610 fn builtins_parses_temp_cleanup() {
1611 let f = write_tmp("builtins:\n save_graph: true\n temp_cleanup: on_overview\n");
1612 let m = load(f.path()).unwrap();
1613 assert!(m.builtins.save_graph);
1614 assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1615 }
1616
1617 #[test]
1618 fn rejects_invalid_temp_cleanup() {
1619 let f = write_tmp("builtins:\n temp_cleanup: nuke\n");
1620 assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1621 }
1622
1623 #[test]
1624 fn allow_embedder_trust_parses() {
1625 let f = write_tmp("trust:\n allow_embedder: true\n");
1626 let m = load(f.path()).unwrap();
1627 assert!(m.trust.allow_embedder);
1628 }
1629
1630 #[test]
1631 fn allow_query_preprocessor_trust_parses() {
1632 let f = write_tmp("trust:\n allow_query_preprocessor: true\n");
1633 let m = load(f.path()).unwrap();
1634 assert!(m.trust.allow_query_preprocessor);
1635 assert!(!m.trust.allow_embedder);
1636 assert!(!m.trust.allow_python_tools);
1637 }
1638
1639 #[test]
1640 fn allow_query_preprocessor_rejects_non_bool() {
1641 let f = write_tmp("trust:\n allow_query_preprocessor: \"yes\"\n");
1642 let err = load(f.path()).unwrap_err();
1643 assert!(err
1644 .message
1645 .contains("allow_query_preprocessor must be a bool"));
1646 }
1647
1648 #[test]
1649 fn find_sibling_works() {
1650 let dir = tempfile::tempdir().unwrap();
1651 let graph = dir.path().join("demo.kgl");
1652 std::fs::write(&graph, b"\x00").unwrap();
1653 let sibling = dir.path().join("demo_mcp.yaml");
1654 std::fs::write(&sibling, "name: x\n").unwrap();
1655 assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1656 }
1657
1658 #[test]
1659 fn workspace_local_parses() {
1660 let f = write_tmp("workspace:\n kind: local\n root: ./src\n watch: true\n");
1661 let m = load(f.path()).unwrap();
1662 let w = m.workspace.unwrap();
1663 assert_eq!(w.kind, WorkspaceKind::Local);
1664 assert_eq!(w.root.as_deref(), Some("./src"));
1665 assert!(w.watch);
1666 }
1667
1668 #[test]
1669 fn workspace_github_default_kind() {
1670 let f = write_tmp("workspace: {}\n");
1671 let m = load(f.path()).unwrap();
1672 let w = m.workspace.unwrap();
1673 assert_eq!(w.kind, WorkspaceKind::Github);
1674 assert!(w.root.is_none());
1675 assert!(!w.watch);
1676 }
1677
1678 #[test]
1679 fn workspace_local_without_root_errors() {
1680 let f = write_tmp("workspace:\n kind: local\n");
1681 let err = load(f.path()).unwrap_err();
1682 assert!(err.message.contains("requires workspace.root"));
1683 }
1684
1685 #[test]
1686 fn workspace_unknown_key_rejected() {
1687 let f = write_tmp("workspace:\n kind: local\n root: ./x\n bogus: 1\n");
1688 let err = load(f.path()).unwrap_err();
1689 assert!(err.message.contains("unknown workspace keys"));
1690 }
1691
1692 #[test]
1693 fn workspace_invalid_kind_rejected() {
1694 let f = write_tmp("workspace:\n kind: docker\n root: ./x\n");
1695 let err = load(f.path()).unwrap_err();
1696 assert!(err.message.contains("workspace.kind"));
1697 }
1698
1699 #[test]
1700 fn workspace_watch_invalid_for_github() {
1701 let f = write_tmp("workspace:\n kind: github\n watch: true\n");
1702 let err = load(f.path()).unwrap_err();
1703 assert!(err.message.contains("watch is only valid"));
1704 }
1705
1706 #[test]
1707 fn extensions_passthrough_parses() {
1708 let f = write_tmp(
1709 "extensions:\n csv_http_server: true\n csv_http_server_dir: temp/\n arbitrary:\n nested: 1\n",
1710 );
1711 let m = load(f.path()).unwrap();
1712 assert_eq!(
1713 m.extensions
1714 .get("csv_http_server")
1715 .and_then(|v| v.as_bool()),
1716 Some(true)
1717 );
1718 assert_eq!(
1719 m.extensions
1720 .get("csv_http_server_dir")
1721 .and_then(|v| v.as_str()),
1722 Some("temp/")
1723 );
1724 assert_eq!(
1726 m.extensions
1727 .get("arbitrary")
1728 .and_then(|v| v.get("nested"))
1729 .and_then(|v| v.as_i64()),
1730 Some(1)
1731 );
1732 }
1733
1734 #[test]
1735 fn extensions_absent_defaults_to_empty() {
1736 let f = write_tmp("name: x\n");
1737 let m = load(f.path()).unwrap();
1738 assert!(m.extensions.is_empty());
1739 }
1740
1741 #[test]
1742 fn extensions_inner_keys_unvalidated() {
1743 let f = write_tmp(
1747 "extensions:\n whatever_kglite_wants: foo\n some_other_consumer: { a: 1, b: 2 }\n",
1748 );
1749 load(f.path()).unwrap();
1750 }
1751
1752 #[test]
1753 fn extensions_must_be_a_mapping() {
1754 let f = write_tmp("extensions: not-a-mapping\n");
1755 let err = load(f.path()).unwrap_err();
1756 assert!(err.message.contains("extensions must be a mapping"));
1757 }
1758
1759 #[test]
1760 fn env_file_key_parses() {
1761 let f = write_tmp("env_file: ../.env\n");
1762 let m = load(f.path()).unwrap();
1763 assert_eq!(m.env_file.as_deref(), Some("../.env"));
1764 }
1765
1766 #[test]
1767 fn env_file_unset_is_none() {
1768 let f = write_tmp("name: Demo\n");
1769 let m = load(f.path()).unwrap();
1770 assert!(m.env_file.is_none());
1771 }
1772
1773 #[test]
1774 fn find_workspace_works() {
1775 let dir = tempfile::tempdir().unwrap();
1776 let manifest = dir.path().join("workspace_mcp.yaml");
1777 std::fs::write(&manifest, "name: ws\n").unwrap();
1778 assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1779 }
1780
1781 #[test]
1782 fn find_workspace_walks_one_level_up_with_applies_to() {
1783 let dir = tempfile::tempdir().unwrap();
1788 let parent = dir.path().join("parent");
1789 std::fs::create_dir(&parent).unwrap();
1790 let manifest = parent.join("workspace_mcp.yaml");
1791 std::fs::write(
1792 &manifest,
1793 "workspace:\n kind: github\n applies_to: ./repos\n",
1794 )
1795 .unwrap();
1796 let repos = parent.join("repos");
1797 std::fs::create_dir(&repos).unwrap();
1798
1799 assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1801
1802 let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1805 assert_eq!(
1806 found.canonicalize().unwrap(),
1807 manifest.canonicalize().unwrap()
1808 );
1809 }
1810
1811 #[test]
1812 fn find_workspace_ignores_parent_without_applies_to() {
1813 let dir = tempfile::tempdir().unwrap();
1819 let parent = dir.path().join("parent");
1820 std::fs::create_dir(&parent).unwrap();
1821 let manifest = parent.join("workspace_mcp.yaml");
1822 std::fs::write(&manifest, "name: not for repos\n").unwrap();
1823 let repos = parent.join("repos");
1824 std::fs::create_dir(&repos).unwrap();
1825
1826 assert_eq!(
1827 find_workspace_manifest(&repos),
1828 None,
1829 "parent manifest without workspace.applies_to must NOT auto-attach"
1830 );
1831 }
1832
1833 #[test]
1834 fn find_workspace_ignores_parent_with_mismatched_applies_to() {
1835 let dir = tempfile::tempdir().unwrap();
1839 let parent = dir.path().join("parent");
1840 std::fs::create_dir(&parent).unwrap();
1841 let manifest = parent.join("workspace_mcp.yaml");
1842 std::fs::write(
1843 &manifest,
1844 "workspace:\n kind: github\n applies_to: ./repos\n",
1845 )
1846 .unwrap();
1847 let other = parent.join("other_dir");
1848 std::fs::create_dir(&other).unwrap();
1849
1850 assert_eq!(
1851 find_workspace_manifest(&other),
1852 None,
1853 "applies_to: ./repos must NOT match --workspace ./other_dir"
1854 );
1855 }
1856
1857 #[test]
1858 fn find_workspace_applies_to_wildcard_matches_any_child() {
1859 let dir = tempfile::tempdir().unwrap();
1863 let parent = dir.path().join("parent");
1864 std::fs::create_dir(&parent).unwrap();
1865 let manifest = parent.join("workspace_mcp.yaml");
1866 std::fs::write(&manifest, "workspace:\n kind: github\n applies_to: '*'\n").unwrap();
1867 for child_name in ["repos", "clones", "totally-different-name"] {
1868 let child = parent.join(child_name);
1869 std::fs::create_dir(&child).unwrap();
1870 let found =
1871 find_workspace_manifest(&child).expect("wildcard should match any direct child");
1872 assert_eq!(
1873 found.canonicalize().unwrap(),
1874 manifest.canonicalize().unwrap(),
1875 "wildcard should match child {child_name:?}"
1876 );
1877 }
1878 }
1879
1880 #[test]
1881 fn find_workspace_applies_to_glob_matches_prefix() {
1882 let dir = tempfile::tempdir().unwrap();
1885 let parent = dir.path().join("parent");
1886 std::fs::create_dir(&parent).unwrap();
1887 let manifest = parent.join("workspace_mcp.yaml");
1888 std::fs::write(
1889 &manifest,
1890 "workspace:\n kind: github\n applies_to: ./prod-*\n",
1891 )
1892 .unwrap();
1893 for child_name in ["prod-api", "prod-web", "prod-"] {
1895 let child = parent.join(child_name);
1896 std::fs::create_dir(&child).unwrap();
1897 assert!(
1898 find_workspace_manifest(&child).is_some(),
1899 "prod-* should match {child_name:?}"
1900 );
1901 }
1902 for child_name in ["test-api", "stage-web", "random"] {
1904 let child = parent.join(child_name);
1905 std::fs::create_dir(&child).unwrap();
1906 assert_eq!(
1907 find_workspace_manifest(&child),
1908 None,
1909 "prod-* should NOT match {child_name:?}"
1910 );
1911 }
1912 }
1913
1914 #[test]
1915 fn find_workspace_applies_to_list_matches_any_entry() {
1916 let dir = tempfile::tempdir().unwrap();
1919 let parent = dir.path().join("parent");
1920 std::fs::create_dir(&parent).unwrap();
1921 let manifest = parent.join("workspace_mcp.yaml");
1922 std::fs::write(
1923 &manifest,
1924 "workspace:\n kind: github\n applies_to:\n - ./repos\n - ./clones\n",
1925 )
1926 .unwrap();
1927 for matching in ["repos", "clones"] {
1928 let child = parent.join(matching);
1929 std::fs::create_dir(&child).unwrap();
1930 assert!(
1931 find_workspace_manifest(&child).is_some(),
1932 "list should match {matching:?}"
1933 );
1934 }
1935 let other = parent.join("scratch");
1936 std::fs::create_dir(&other).unwrap();
1937 assert_eq!(
1938 find_workspace_manifest(&other),
1939 None,
1940 "list with [repos, clones] must NOT match scratch"
1941 );
1942 }
1943
1944 #[test]
1945 fn applies_to_rejects_deep_path_at_parse_time() {
1946 let f = write_tmp("workspace:\n kind: github\n applies_to: ./too/deep/path\n");
1947 let err = load(f.path()).unwrap_err();
1948 assert!(
1949 err.message.contains("must be a single path segment"),
1950 "got: {}",
1951 err.message
1952 );
1953 }
1954
1955 #[test]
1956 fn applies_to_rejects_invalid_glob_at_parse_time() {
1957 let f = write_tmp("workspace:\n kind: github\n applies_to: './[unterminated'\n");
1959 let err = load(f.path()).unwrap_err();
1960 assert!(
1961 err.message.contains("invalid glob pattern"),
1962 "got: {}",
1963 err.message
1964 );
1965 }
1966
1967 #[test]
1968 fn applies_to_rejects_parent_relative() {
1969 let f = write_tmp("workspace:\n kind: github\n applies_to: '..'\n");
1973 let err = load(f.path()).unwrap_err();
1974 assert!(err.message.contains("must not contain `..`"));
1975
1976 let f2 = write_tmp("workspace:\n kind: github\n applies_to: '../up'\n");
1977 let err2 = load(f2.path()).unwrap_err();
1978 assert!(err2.message.contains("must be a single path segment"));
1979 }
1980
1981 #[test]
1982 fn find_workspace_returns_none_when_missing_everywhere() {
1983 let dir = tempfile::tempdir().unwrap();
1984 let child = dir.path().join("child");
1985 std::fs::create_dir(&child).unwrap();
1986 assert_eq!(find_workspace_manifest(&child), None);
1988 }
1989
1990 #[test]
1991 fn find_workspace_primary_wins_over_parent_fallback() {
1992 let dir = tempfile::tempdir().unwrap();
1999 let parent_manifest = dir.path().join("workspace_mcp.yaml");
2000 std::fs::write(
2001 &parent_manifest,
2002 "workspace:\n kind: github\n applies_to: ./repos\n",
2003 )
2004 .unwrap();
2005 let child = dir.path().join("repos");
2006 std::fs::create_dir(&child).unwrap();
2007 let child_manifest = child.join("workspace_mcp.yaml");
2008 std::fs::write(&child_manifest, "name: child\n").unwrap();
2009
2010 let found = find_workspace_manifest(&child).expect("primary should resolve");
2014 assert_eq!(
2015 found.canonicalize().unwrap(),
2016 child_manifest.canonicalize().unwrap(),
2017 "primary location must win when both primary and parent fallback exist"
2018 );
2019 }
2020
2021 #[test]
2022 fn to_json_shape_is_stable() {
2023 let f = write_tmp(
2024 r#"
2025name: KGLite Codebase
2026source_roots: [src, lib]
2027trust:
2028 allow_embedder: true
2029embedder:
2030 module: kglite.embed
2031 class: SentenceTransformerEmbedder
2032builtins:
2033 save_graph: true
2034 temp_cleanup: on_overview
2035"#,
2036 );
2037 let m = load(f.path()).unwrap();
2038 let actual = m.to_json();
2039 let expected = serde_json::json!({
2040 "yaml_path": f.path().display().to_string(),
2041 "name": "KGLite Codebase",
2042 "instructions": null,
2043 "overview_prefix": null,
2044 "source_roots": ["src", "lib"],
2045 "trust": {
2046 "allow_python_tools": false,
2047 "allow_embedder": true,
2048 "allow_query_preprocessor": false,
2049 },
2050 "tools": [],
2051 "embedder": {
2052 "module": "kglite.embed",
2053 "class": "SentenceTransformerEmbedder",
2054 "kwargs": {},
2055 },
2056 "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
2057 "env_file": null,
2058 "workspace": null,
2059 "extensions": {},
2060 });
2061 assert_eq!(actual, expected);
2062 }
2063
2064 #[test]
2065 fn to_json_round_trips_tools_and_workspace() {
2066 let f = write_tmp(
2067 r#"
2068name: Full Surface
2069source_root: ./src
2070trust:
2071 allow_python_tools: true
2072tools:
2073 - name: nodes_for
2074 cypher: "MATCH (n {name: $name}) RETURN n"
2075 description: "fetch nodes by name"
2076 - name: run_query
2077 python: tools.py
2078 function: run
2079workspace:
2080 kind: local
2081 root: /tmp/ws
2082 watch: true
2083builtins:
2084 save_graph: false
2085env_file: .env.local
2086extensions:
2087 kglite:
2088 flavour: standard
2089"#,
2090 );
2091 let m = load(f.path()).unwrap();
2092 let v = m.to_json();
2093 assert_eq!(v["name"], "Full Surface");
2094 assert_eq!(v["trust"]["allow_python_tools"], true);
2095 assert_eq!(v["workspace"]["kind"], "local");
2096 assert_eq!(v["workspace"]["root"], "/tmp/ws");
2097 assert_eq!(v["workspace"]["watch"], true);
2098 assert_eq!(v["env_file"], ".env.local");
2099 assert_eq!(v["tools"][0]["kind"], "cypher");
2100 assert_eq!(v["tools"][0]["name"], "nodes_for");
2101 assert_eq!(v["tools"][1]["kind"], "python");
2102 assert_eq!(v["tools"][1]["name"], "run_query");
2103 assert_eq!(v["tools"][1]["python"], "tools.py");
2104 assert_eq!(v["tools"][1]["function"], "run");
2105 assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
2106 }
2107}