1#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34 "name",
35 "instructions",
36 "overview_prefix",
37 "source_root",
38 "source_roots",
39 "trust",
40 "tools",
41 "embedder",
42 "builtins",
43 "env_file",
44 "workspace",
45 "extensions",
46 "skills",
47];
48const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
49const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
50const ALLOWED_TRUST_KEYS: &[&str] = &["allow_python_tools", "allow_embedder"];
51const ALLOWED_TOOL_KEYS: &[&str] = &[
52 "name",
53 "description",
54 "parameters",
55 "cypher",
56 "python",
57 "function",
58 "bundled",
59 "hidden",
60 "rename",
64];
65const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
66const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
67const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
68
69#[derive(Debug, Error)]
70#[error("{path}: {message}")]
71pub struct ManifestError {
72 pub path: String,
73 pub message: String,
74}
75
76impl ManifestError {
77 pub fn at(path: &Path, message: impl Into<String>) -> Self {
78 Self {
79 path: path.display().to_string(),
80 message: message.into(),
81 }
82 }
83
84 pub fn bare(message: impl Into<String>) -> Self {
85 Self {
86 path: "<manifest>".to_string(),
87 message: message.into(),
88 }
89 }
90}
91
92#[derive(Debug, Default, Clone)]
93pub struct TrustConfig {
94 pub allow_python_tools: bool,
95 pub allow_embedder: bool,
96}
97
98#[derive(Debug, Clone)]
99pub enum ToolSpec {
100 Cypher(CypherTool),
101 Python(PythonTool),
102 Bundled(BundledOverride),
118}
119
120impl ToolSpec {
121 pub fn name(&self) -> &str {
122 match self {
123 ToolSpec::Cypher(t) => &t.name,
124 ToolSpec::Python(t) => &t.name,
125 ToolSpec::Bundled(t) => &t.name,
126 }
127 }
128}
129
130#[derive(Debug, Clone)]
131pub struct CypherTool {
132 pub name: String,
133 pub cypher: String,
134 pub description: Option<String>,
135 pub parameters: Option<serde_json::Value>,
136}
137
138#[derive(Debug, Clone)]
139pub struct PythonTool {
140 pub name: String,
141 pub python: String,
142 pub function: String,
143 pub description: Option<String>,
144 pub parameters: Option<serde_json::Value>,
145}
146
147#[derive(Debug, Clone)]
148pub struct BundledOverride {
149 pub name: String,
154 pub description: Option<String>,
158 pub hidden: bool,
162 pub rename: Option<String>,
175}
176
177#[derive(Debug, Clone)]
178pub struct EmbedderConfig {
179 pub module: String,
180 pub class: String,
181 pub kwargs: serde_json::Map<String, serde_json::Value>,
182}
183
184#[derive(Debug, Default, Clone)]
185pub struct BuiltinsConfig {
186 pub save_graph: bool,
187 pub temp_cleanup: TempCleanup,
188}
189
190#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
191pub enum TempCleanup {
192 #[default]
193 Never,
194 OnOverview,
195}
196
197impl TempCleanup {
198 pub fn as_str(&self) -> &'static str {
199 match self {
200 TempCleanup::Never => "never",
201 TempCleanup::OnOverview => "on_overview",
202 }
203 }
204}
205
206#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
207pub enum WorkspaceKind {
208 #[default]
211 Github,
212 Local,
215}
216
217impl WorkspaceKind {
218 pub fn as_str(&self) -> &'static str {
219 match self {
220 WorkspaceKind::Github => "github",
221 WorkspaceKind::Local => "local",
222 }
223 }
224}
225
226#[derive(Debug, Clone, Default)]
227pub struct WorkspaceConfig {
228 pub kind: WorkspaceKind,
229 pub root: Option<String>,
232 pub watch: bool,
235 pub applies_to: Option<AppliesTo>,
262}
263
264#[derive(Debug, Clone, PartialEq, Eq)]
270pub enum AppliesTo {
271 Pattern(String),
276 Patterns(Vec<String>),
278}
279
280#[derive(Debug, Clone, PartialEq, Eq)]
289pub enum SkillSource {
290 Bundled,
294 Path(String),
299}
300
301#[derive(Debug, Clone, Default, PartialEq, Eq)]
313pub enum SkillsSource {
314 #[default]
316 Disabled,
317 Sources(Vec<SkillSource>),
322}
323
324#[derive(Debug, Clone)]
325pub struct Manifest {
326 pub yaml_path: PathBuf,
327 pub name: Option<String>,
328 pub instructions: Option<String>,
329 pub overview_prefix: Option<String>,
330 pub source_roots: Vec<String>,
331 pub trust: TrustConfig,
332 pub tools: Vec<ToolSpec>,
333 pub embedder: Option<EmbedderConfig>,
334 pub builtins: BuiltinsConfig,
335 pub env_file: Option<String>,
339 pub workspace: Option<WorkspaceConfig>,
343 pub extensions: serde_json::Map<String, serde_json::Value>,
353 pub skills: SkillsSource,
365}
366
367impl Manifest {
368 pub fn to_json(&self) -> serde_json::Value {
378 serde_json::json!({
379 "yaml_path": self.yaml_path.display().to_string(),
380 "name": self.name,
381 "instructions": self.instructions,
382 "overview_prefix": self.overview_prefix,
383 "source_roots": self.source_roots,
384 "trust": {
385 "allow_python_tools": self.trust.allow_python_tools,
386 "allow_embedder": self.trust.allow_embedder,
387 },
388 "tools": self.tools.iter().map(|t| match t {
389 ToolSpec::Cypher(c) => serde_json::json!({
390 "kind": "cypher",
391 "name": c.name,
392 "cypher": c.cypher,
393 "description": c.description,
394 "parameters": c.parameters,
395 }),
396 ToolSpec::Python(p) => serde_json::json!({
397 "kind": "python",
398 "name": p.name,
399 "python": p.python,
400 "function": p.function,
401 "description": p.description,
402 "parameters": p.parameters,
403 }),
404 ToolSpec::Bundled(b) => serde_json::json!({
405 "kind": "bundled",
406 "name": b.name,
407 "description": b.description,
408 "hidden": b.hidden,
409 "rename": b.rename,
410 }),
411 }).collect::<Vec<_>>(),
412 "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
413 "module": e.module,
414 "class": e.class,
415 "kwargs": e.kwargs,
416 })),
417 "builtins": {
418 "save_graph": self.builtins.save_graph,
419 "temp_cleanup": self.builtins.temp_cleanup.as_str(),
420 },
421 "env_file": self.env_file,
422 "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
423 "kind": w.kind.as_str(),
424 "root": w.root,
425 "watch": w.watch,
426 "applies_to": w.applies_to.as_ref().map(|a| match a {
427 AppliesTo::Pattern(p) => serde_json::Value::String(p.clone()),
428 AppliesTo::Patterns(ps) => serde_json::Value::Array(
429 ps.iter().map(|p| serde_json::Value::String(p.clone())).collect()
430 ),
431 }),
432 })),
433 "extensions": self.extensions,
434 "skills": self.skills_to_json(),
435 })
436 }
437
438 fn skills_to_json(&self) -> serde_json::Value {
451 match &self.skills {
452 SkillsSource::Disabled => serde_json::Value::Bool(false),
453 SkillsSource::Sources(sources) => {
454 let arr: Vec<serde_json::Value> = sources
455 .iter()
456 .map(|s| match s {
457 SkillSource::Bundled => serde_json::Value::Bool(true),
458 SkillSource::Path(p) => serde_json::Value::String(p.clone()),
459 })
460 .collect();
461 serde_json::Value::Array(arr)
462 }
463 }
464 }
465}
466
467pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
469 let stem = graph_path.file_stem()?;
470 let parent = graph_path.parent()?;
471 let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
472 if candidate.is_file() {
473 Some(candidate)
474 } else {
475 None
476 }
477}
478
479pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
528 let primary = workspace_dir.join("workspace_mcp.yaml");
529 if primary.is_file() {
530 return Some(primary);
531 }
532 let parent = workspace_dir.parent()?;
535 let workspace_resolved = workspace_dir.canonicalize().ok()?;
536 let parent_resolved = parent.canonicalize().ok()?;
537 if parent_resolved == workspace_resolved {
538 return None;
540 }
541 let fallback = parent.join("workspace_mcp.yaml");
542 if !fallback.is_file() {
543 return None;
544 }
545
546 let manifest = match load(&fallback) {
550 Ok(m) => m,
551 Err(e) => {
552 tracing::warn!(
553 manifest = %fallback.display(),
554 error = %e,
555 "parent-walk manifest exists but failed to parse; ignoring"
556 );
557 return None;
558 }
559 };
560 let declared = manifest
561 .workspace
562 .as_ref()
563 .and_then(|w| w.applies_to.as_ref());
564 let Some(declared_applies_to) = declared else {
565 tracing::info!(
566 manifest = %fallback.display(),
567 "parent-walk manifest does not declare workspace.applies_to; \
568 ignoring (set workspace.applies_to: <pattern> to opt in)"
569 );
570 return None;
571 };
572 let Some(basename) = workspace_resolved.file_name().and_then(|n| n.to_str()) else {
576 return None; };
578 let patterns: Vec<&str> = match declared_applies_to {
579 AppliesTo::Pattern(p) => vec![p.as_str()],
580 AppliesTo::Patterns(ps) => ps.iter().map(String::as_str).collect(),
581 };
582 let matched = patterns.iter().any(|pat| {
583 match globset::Glob::new(pat) {
584 Ok(g) => g.compile_matcher().is_match(basename),
585 Err(_) => {
586 false
589 }
590 }
591 });
592 if matched {
593 tracing::info!(
594 workspace_dir = %workspace_dir.display(),
595 manifest = %fallback.display(),
596 "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
597 );
598 Some(fallback)
599 } else {
600 tracing::info!(
601 workspace_dir = %workspace_resolved.display(),
602 manifest = %fallback.display(),
603 basename = %basename,
604 patterns = ?patterns,
605 "parent-walk manifest's workspace.applies_to does not match \
606 this workspace_dir's basename; ignoring"
607 );
608 None
609 }
610}
611
612pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
614 let text = fs::read_to_string(yaml_path)
615 .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
616 let raw: serde_yaml::Value = serde_yaml::from_str(&text)
617 .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
618 let raw = match raw {
619 serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
620 v => v,
621 };
622 let map = raw
623 .as_mapping()
624 .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
625 build(map, yaml_path)
626}
627
628fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
629 check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
630
631 if raw.contains_key("source_root") && raw.contains_key("source_roots") {
632 return Err(ManifestError::at(
633 yaml_path,
634 "specify either source_root (str) or source_roots (list), not both",
635 ));
636 }
637
638 let mut source_roots: Vec<String> = Vec::new();
639 if let Some(v) = raw.get("source_root") {
640 let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
641 ManifestError::at(yaml_path, "source_root must be a non-empty string")
642 })?;
643 source_roots.push(s.to_string());
644 } else if let Some(v) = raw.get("source_roots") {
645 let seq = v.as_sequence().ok_or_else(|| {
646 ManifestError::at(
647 yaml_path,
648 "source_roots must be a list of non-empty strings",
649 )
650 })?;
651 if seq.is_empty() {
652 return Err(ManifestError::at(
653 yaml_path,
654 "source_roots must be non-empty when set",
655 ));
656 }
657 for item in seq {
658 let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
659 ManifestError::at(
660 yaml_path,
661 "source_roots must be a list of non-empty strings",
662 )
663 })?;
664 source_roots.push(s.to_string());
665 }
666 }
667
668 let trust = build_trust(raw.get("trust"), yaml_path)?;
669 let tools = build_tools(raw.get("tools"), yaml_path)?;
670 let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
671 let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
672 let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
673 let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
674 let skills = build_skills(raw.get("skills"), yaml_path)?;
675
676 Ok(Manifest {
677 yaml_path: yaml_path.to_path_buf(),
678 name: optional_str(raw, "name", yaml_path)?,
679 instructions: optional_str(raw, "instructions", yaml_path)?,
680 overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
681 source_roots,
682 trust,
683 tools,
684 embedder,
685 builtins,
686 env_file: optional_str(raw, "env_file", yaml_path)?,
687 workspace,
688 extensions,
689 skills,
690 })
691}
692
693fn build_skills(
715 raw: Option<&serde_yaml::Value>,
716 yaml_path: &Path,
717) -> Result<SkillsSource, ManifestError> {
718 use serde_yaml::Value;
719
720 match raw {
721 None | Some(Value::Null) | Some(Value::Bool(false)) => Ok(SkillsSource::Disabled),
722 Some(Value::Bool(true)) => Ok(SkillsSource::Sources(vec![SkillSource::Bundled])),
723 Some(Value::String(s)) => {
724 if s.is_empty() {
725 return Err(ManifestError::at(
726 yaml_path,
727 "skills: path must be a non-empty string",
728 ));
729 }
730 Ok(SkillsSource::Sources(vec![SkillSource::Path(s.clone())]))
731 }
732 Some(Value::Sequence(seq)) => {
733 let mut sources = Vec::with_capacity(seq.len());
734 for (idx, item) in seq.iter().enumerate() {
735 match item {
736 Value::Bool(true) => sources.push(SkillSource::Bundled),
737 Value::Bool(false) => {
738 return Err(ManifestError::at(
739 yaml_path,
740 format!(
741 "skills[{idx}]: `false` is not a valid entry in a `skills:` \
742 list (only `true` for bundled, or a path string)"
743 ),
744 ));
745 }
746 Value::String(s) => {
747 if s.is_empty() {
748 return Err(ManifestError::at(
749 yaml_path,
750 format!("skills[{idx}]: path must be a non-empty string"),
751 ));
752 }
753 sources.push(SkillSource::Path(s.clone()));
754 }
755 _ => {
756 return Err(ManifestError::at(
757 yaml_path,
758 format!(
759 "skills[{idx}]: each entry must be `true` (for bundled) or a \
760 path string"
761 ),
762 ));
763 }
764 }
765 }
766 Ok(SkillsSource::Sources(sources))
767 }
768 Some(_) => Err(ManifestError::at(
769 yaml_path,
770 "skills must be `false`, `true`, a path string, or a list of \
771 (true | path string) entries",
772 )),
773 }
774}
775
776fn build_extensions(
777 raw: Option<&serde_yaml::Value>,
778 yaml_path: &Path,
779) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
780 let Some(raw) = raw else {
781 return Ok(serde_json::Map::new());
782 };
783 if matches!(raw, serde_yaml::Value::Null) {
784 return Ok(serde_json::Map::new());
785 }
786 if !raw.is_mapping() {
787 return Err(ManifestError::at(
788 yaml_path,
789 "extensions must be a mapping (downstream-binary-specific keys)",
790 ));
791 }
792 match yaml_to_json(raw.clone())? {
793 serde_json::Value::Object(o) => Ok(o),
794 _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
795 }
796}
797
798fn build_workspace(
799 raw: Option<&serde_yaml::Value>,
800 yaml_path: &Path,
801) -> Result<Option<WorkspaceConfig>, ManifestError> {
802 let Some(raw) = raw else { return Ok(None) };
803 if matches!(raw, serde_yaml::Value::Null) {
804 return Ok(None);
805 }
806 let map = raw
807 .as_mapping()
808 .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
809 check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
810 let kind = match map.get("kind") {
811 None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
812 Some(serde_yaml::Value::String(s)) => match s.as_str() {
813 "github" => WorkspaceKind::Github,
814 "local" => WorkspaceKind::Local,
815 other => {
816 return Err(ManifestError::at(
817 yaml_path,
818 format!(
819 "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
820 ),
821 ));
822 }
823 },
824 Some(_) => {
825 return Err(ManifestError::at(
826 yaml_path,
827 format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
828 ))
829 }
830 };
831 let root = match map.get("root") {
832 None | Some(serde_yaml::Value::Null) => None,
833 Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
834 _ => {
835 return Err(ManifestError::at(
836 yaml_path,
837 "workspace.root must be a non-empty string",
838 ))
839 }
840 };
841 let watch = match map.get("watch") {
842 None | Some(serde_yaml::Value::Null) => false,
843 Some(serde_yaml::Value::Bool(b)) => *b,
844 Some(_) => {
845 return Err(ManifestError::at(
846 yaml_path,
847 "workspace.watch must be a bool",
848 ))
849 }
850 };
851 let applies_to =
852 match map.get("applies_to") {
853 None | Some(serde_yaml::Value::Null) => None,
854 Some(serde_yaml::Value::String(s)) => {
855 Some(AppliesTo::Pattern(parse_applies_to_pattern(s, yaml_path)?))
856 }
857 Some(serde_yaml::Value::Sequence(seq)) => {
858 if seq.is_empty() {
859 return Err(ManifestError::at(
860 yaml_path,
861 "workspace.applies_to: list must contain at least one pattern",
862 ));
863 }
864 let mut patterns = Vec::with_capacity(seq.len());
865 for (i, item) in seq.iter().enumerate() {
866 let s = item.as_str().ok_or_else(|| {
867 ManifestError::at(
868 yaml_path,
869 format!("workspace.applies_to[{i}] must be a string"),
870 )
871 })?;
872 let cleaned = parse_applies_to_pattern(s, yaml_path).map_err(|e| {
873 ManifestError::at(
874 yaml_path,
875 format!("workspace.applies_to[{i}]: {}", e.message),
876 )
877 })?;
878 patterns.push(cleaned);
879 }
880 Some(AppliesTo::Patterns(patterns))
881 }
882 _ => return Err(ManifestError::at(
883 yaml_path,
884 "workspace.applies_to must be a non-empty string (a pattern) or a list of patterns",
885 )),
886 };
887 if kind == WorkspaceKind::Local && root.is_none() {
888 return Err(ManifestError::at(
889 yaml_path,
890 "workspace.kind: local requires workspace.root to be set",
891 ));
892 }
893 if kind == WorkspaceKind::Github && watch {
894 return Err(ManifestError::at(
895 yaml_path,
896 "workspace.watch is only valid with workspace.kind: local",
897 ));
898 }
899 Ok(Some(WorkspaceConfig {
900 kind,
901 root,
902 watch,
903 applies_to,
904 }))
905}
906
907fn parse_applies_to_pattern(raw: &str, yaml_path: &Path) -> Result<String, ManifestError> {
916 let trimmed = raw.trim();
917 if trimmed.is_empty() {
918 return Err(ManifestError::at(
919 yaml_path,
920 "workspace.applies_to: pattern must not be empty",
921 ));
922 }
923 let stripped = trimmed.strip_prefix("./").unwrap_or(trimmed);
927 if stripped.is_empty() {
928 return Err(ManifestError::at(
929 yaml_path,
930 "workspace.applies_to: pattern must not be empty after stripping `./` prefix",
931 ));
932 }
933 if stripped.contains('/') {
934 return Err(ManifestError::at(
935 yaml_path,
936 format!(
937 "workspace.applies_to: pattern {raw:?} must be a single path segment \
938 (no embedded `/`) — parent-walk discovery is bounded to one level"
939 ),
940 ));
941 }
942 if stripped == ".." || stripped.starts_with("../") {
943 return Err(ManifestError::at(
944 yaml_path,
945 format!("workspace.applies_to: pattern {raw:?} must not contain `..`"),
946 ));
947 }
948 if Path::new(stripped).is_absolute() {
949 return Err(ManifestError::at(
950 yaml_path,
951 format!("workspace.applies_to: pattern {raw:?} must be relative, not absolute"),
952 ));
953 }
954 globset::Glob::new(stripped).map_err(|e| {
958 ManifestError::at(
959 yaml_path,
960 format!("workspace.applies_to: invalid glob pattern {raw:?}: {e}"),
961 )
962 })?;
963 Ok(stripped.to_string())
964}
965
966fn check_keys(
967 map: &serde_yaml::Mapping,
968 allowed: &[&str],
969 label: &str,
970 yaml_path: &Path,
971) -> Result<(), ManifestError> {
972 let mut unknown: Vec<String> = Vec::new();
973 for (k, _) in map {
974 let key = k.as_str().unwrap_or("<non-string-key>");
975 if !allowed.contains(&key) {
976 unknown.push(key.to_string());
977 }
978 }
979 if !unknown.is_empty() {
980 unknown.sort();
981 return Err(ManifestError::at(
982 yaml_path,
983 format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
984 ));
985 }
986 Ok(())
987}
988
989fn optional_str(
990 raw: &serde_yaml::Mapping,
991 key: &str,
992 yaml_path: &Path,
993) -> Result<Option<String>, ManifestError> {
994 match raw.get(key) {
995 None | Some(serde_yaml::Value::Null) => Ok(None),
996 Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
997 Some(_) => Err(ManifestError::at(
998 yaml_path,
999 format!("{key} must be a string"),
1000 )),
1001 }
1002}
1003
1004fn build_trust(
1005 raw: Option<&serde_yaml::Value>,
1006 yaml_path: &Path,
1007) -> Result<TrustConfig, ManifestError> {
1008 let Some(raw) = raw else {
1009 return Ok(TrustConfig::default());
1010 };
1011 let map = raw
1012 .as_mapping()
1013 .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
1014 check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
1015 let mut cfg = TrustConfig::default();
1016 if let Some(v) = map.get("allow_python_tools") {
1017 cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
1018 ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
1019 })?;
1020 }
1021 if let Some(v) = map.get("allow_embedder") {
1022 cfg.allow_embedder = v
1023 .as_bool()
1024 .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
1025 }
1026 Ok(cfg)
1027}
1028
1029fn build_tools(
1030 raw: Option<&serde_yaml::Value>,
1031 yaml_path: &Path,
1032) -> Result<Vec<ToolSpec>, ManifestError> {
1033 let Some(raw) = raw else {
1034 return Ok(Vec::new());
1035 };
1036 let seq = raw
1037 .as_sequence()
1038 .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
1039 let mut tools: Vec<ToolSpec> = Vec::new();
1040 let mut seen: BTreeMap<String, ()> = BTreeMap::new();
1041 for (i, entry) in seq.iter().enumerate() {
1042 let tool = build_tool(entry, i, yaml_path)?;
1043 let name = tool.name().to_string();
1044 if seen.insert(name.clone(), ()).is_some() {
1045 return Err(ManifestError::at(
1046 yaml_path,
1047 format!("duplicate tool name: {name:?}"),
1048 ));
1049 }
1050 tools.push(tool);
1051 }
1052 Ok(tools)
1053}
1054
1055fn build_tool(
1056 entry: &serde_yaml::Value,
1057 idx: usize,
1058 yaml_path: &Path,
1059) -> Result<ToolSpec, ManifestError> {
1060 let map = entry
1061 .as_mapping()
1062 .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
1063 check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
1064
1065 let has_cypher = map.contains_key("cypher");
1070 let has_python = map.contains_key("python");
1071 let has_bundled = map.contains_key("bundled");
1072 let kinds_present: Vec<&str> = [
1073 ("cypher", has_cypher),
1074 ("python", has_python),
1075 ("bundled", has_bundled),
1076 ]
1077 .into_iter()
1078 .filter(|(_, p)| *p)
1079 .map(|(k, _)| k)
1080 .collect();
1081 if kinds_present.is_empty() {
1082 return Err(ManifestError::at(
1083 yaml_path,
1084 format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
1085 ));
1086 }
1087 if kinds_present.len() > 1 {
1088 return Err(ManifestError::at(
1089 yaml_path,
1090 format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
1091 ));
1092 }
1093
1094 if has_bundled {
1099 return build_bundled_override(map, idx, yaml_path);
1100 }
1101
1102 let name = map
1103 .get("name")
1104 .and_then(|v| v.as_str())
1105 .filter(|s| valid_identifier(s))
1106 .ok_or_else(|| {
1107 ManifestError::at(
1108 yaml_path,
1109 format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
1110 )
1111 })?
1112 .to_string();
1113
1114 if map.contains_key("hidden") {
1118 return Err(ManifestError::at(
1119 yaml_path,
1120 format!(
1121 "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
1122 ),
1123 ));
1124 }
1125
1126 let description = match map.get("description") {
1127 None | Some(serde_yaml::Value::Null) => None,
1128 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1129 Some(_) => {
1130 return Err(ManifestError::at(
1131 yaml_path,
1132 format!("tools[{idx}] ({name:?}).description must be a string"),
1133 ))
1134 }
1135 };
1136
1137 let parameters = match map.get("parameters") {
1138 None | Some(serde_yaml::Value::Null) => None,
1139 Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
1140 Some(_) => {
1141 return Err(ManifestError::at(
1142 yaml_path,
1143 format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
1144 ))
1145 }
1146 };
1147
1148 if has_cypher {
1149 let cypher = map
1150 .get("cypher")
1151 .and_then(|v| v.as_str())
1152 .filter(|s| !s.trim().is_empty())
1153 .ok_or_else(|| {
1154 ManifestError::at(
1155 yaml_path,
1156 format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
1157 )
1158 })?
1159 .to_string();
1160 return Ok(ToolSpec::Cypher(CypherTool {
1161 name,
1162 cypher,
1163 description,
1164 parameters,
1165 }));
1166 }
1167
1168 let python = map
1170 .get("python")
1171 .and_then(|v| v.as_str())
1172 .filter(|s| !s.is_empty())
1173 .ok_or_else(|| {
1174 ManifestError::at(
1175 yaml_path,
1176 format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
1177 )
1178 })?
1179 .to_string();
1180 let function = map
1181 .get("function")
1182 .and_then(|v| v.as_str())
1183 .filter(|s| valid_identifier(s))
1184 .ok_or_else(|| {
1185 ManifestError::at(
1186 yaml_path,
1187 format!(
1188 "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
1189 ),
1190 )
1191 })?
1192 .to_string();
1193 Ok(ToolSpec::Python(PythonTool {
1194 name,
1195 python,
1196 function,
1197 description,
1198 parameters,
1199 }))
1200}
1201
1202fn build_bundled_override(
1206 map: &serde_yaml::Mapping,
1207 idx: usize,
1208 yaml_path: &Path,
1209) -> Result<ToolSpec, ManifestError> {
1210 let name = map
1211 .get("bundled")
1212 .and_then(|v| v.as_str())
1213 .filter(|s| valid_identifier(s))
1214 .ok_or_else(|| {
1215 ManifestError::at(
1216 yaml_path,
1217 format!(
1218 "tools[{idx}] `bundled:` must be a string naming a bundled tool \
1219 (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
1220 ),
1221 )
1222 })?
1223 .to_string();
1224
1225 for forbidden in ["name", "parameters", "function"] {
1230 if map.contains_key(forbidden) {
1231 return Err(ManifestError::at(
1232 yaml_path,
1233 format!(
1234 "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
1235 (only `description:`, `hidden:`, and `rename:` are permitted on overrides)"
1236 ),
1237 ));
1238 }
1239 }
1240
1241 let description = match map.get("description") {
1242 None | Some(serde_yaml::Value::Null) => None,
1243 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1244 Some(_) => {
1245 return Err(ManifestError::at(
1246 yaml_path,
1247 format!("tools[{idx}] bundled override {name:?}.description must be a string"),
1248 ))
1249 }
1250 };
1251
1252 let hidden = match map.get("hidden") {
1253 None | Some(serde_yaml::Value::Null) => false,
1254 Some(serde_yaml::Value::Bool(b)) => *b,
1255 Some(_) => {
1256 return Err(ManifestError::at(
1257 yaml_path,
1258 format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
1259 ))
1260 }
1261 };
1262
1263 let rename = match map.get("rename") {
1268 None | Some(serde_yaml::Value::Null) => None,
1269 Some(serde_yaml::Value::String(s)) => {
1270 if !valid_identifier(s) {
1271 return Err(ManifestError::at(
1272 yaml_path,
1273 format!(
1274 "tools[{idx}] bundled override {name:?}.rename must be a valid identifier \
1275 (^[a-zA-Z_][a-zA-Z0-9_]*$), got {s:?}"
1276 ),
1277 ));
1278 }
1279 Some(s.clone())
1280 }
1281 Some(_) => {
1282 return Err(ManifestError::at(
1283 yaml_path,
1284 format!("tools[{idx}] bundled override {name:?}.rename must be a string"),
1285 ))
1286 }
1287 };
1288
1289 Ok(ToolSpec::Bundled(BundledOverride {
1290 name,
1291 description,
1292 hidden,
1293 rename,
1294 }))
1295}
1296
1297fn build_embedder(
1298 raw: Option<&serde_yaml::Value>,
1299 yaml_path: &Path,
1300) -> Result<Option<EmbedderConfig>, ManifestError> {
1301 let Some(raw) = raw else { return Ok(None) };
1302 if matches!(raw, serde_yaml::Value::Null) {
1303 return Ok(None);
1304 }
1305 let map = raw
1306 .as_mapping()
1307 .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
1308 check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
1309 let module = map
1310 .get("module")
1311 .and_then(|v| v.as_str())
1312 .filter(|s| !s.is_empty())
1313 .ok_or_else(|| {
1314 ManifestError::at(
1315 yaml_path,
1316 "embedder.module must be a non-empty string (path or dotted name)",
1317 )
1318 })?
1319 .to_string();
1320 let class = map
1321 .get("class")
1322 .and_then(|v| v.as_str())
1323 .filter(|s| valid_identifier(s))
1324 .ok_or_else(|| {
1325 ManifestError::at(
1326 yaml_path,
1327 "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1328 )
1329 })?
1330 .to_string();
1331 let kwargs = match map.get("kwargs") {
1332 None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1333 Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1334 serde_json::Value::Object(o) => o,
1335 _ => {
1336 return Err(ManifestError::at(
1337 yaml_path,
1338 "embedder.kwargs must be a mapping",
1339 ))
1340 }
1341 },
1342 Some(_) => {
1343 return Err(ManifestError::at(
1344 yaml_path,
1345 "embedder.kwargs must be a mapping",
1346 ))
1347 }
1348 };
1349 Ok(Some(EmbedderConfig {
1350 module,
1351 class,
1352 kwargs,
1353 }))
1354}
1355
1356fn build_builtins(
1357 raw: Option<&serde_yaml::Value>,
1358 yaml_path: &Path,
1359) -> Result<BuiltinsConfig, ManifestError> {
1360 let Some(raw) = raw else {
1361 return Ok(BuiltinsConfig::default());
1362 };
1363 if matches!(raw, serde_yaml::Value::Null) {
1364 return Ok(BuiltinsConfig::default());
1365 }
1366 let map = raw
1367 .as_mapping()
1368 .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1369 check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1370 let mut cfg = BuiltinsConfig::default();
1371 if let Some(v) = map.get("save_graph") {
1372 cfg.save_graph = v
1373 .as_bool()
1374 .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1375 }
1376 if let Some(v) = map.get("temp_cleanup") {
1377 let s = v.as_str().ok_or_else(|| {
1378 ManifestError::at(
1379 yaml_path,
1380 format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1381 )
1382 })?;
1383 cfg.temp_cleanup = match s {
1384 "never" => TempCleanup::Never,
1385 "on_overview" => TempCleanup::OnOverview,
1386 other => {
1387 return Err(ManifestError::at(
1388 yaml_path,
1389 format!(
1390 "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1391 ),
1392 ))
1393 }
1394 };
1395 }
1396 Ok(cfg)
1397}
1398
1399fn valid_identifier(s: &str) -> bool {
1400 let mut chars = s.chars();
1401 match chars.next() {
1402 Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1403 _ => return false,
1404 }
1405 chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1406}
1407
1408fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1409 serde_json::to_value(&v)
1410 .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1411}
1412
1413#[derive(Debug, Deserialize)]
1414struct _Reserved;
1415
1416#[cfg(test)]
1417mod tests {
1418 use super::*;
1419
1420 fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1421 let mut f = tempfile::NamedTempFile::new().unwrap();
1422 std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1423 f
1424 }
1425
1426 #[test]
1427 fn loads_minimal_empty_manifest() {
1428 let f = write_tmp("");
1429 let m = load(f.path()).unwrap();
1430 assert_eq!(m.tools.len(), 0);
1431 assert_eq!(m.source_roots.len(), 0);
1432 assert!(!m.trust.allow_python_tools);
1433 assert!(!m.trust.allow_embedder);
1434 assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1435 }
1436
1437 #[test]
1438 fn loads_name_and_instructions() {
1439 let f = write_tmp("name: Demo\ninstructions: |\n multi-line\n block\n");
1440 let m = load(f.path()).unwrap();
1441 assert_eq!(m.name.as_deref(), Some("Demo"));
1442 assert!(m.instructions.unwrap().contains("multi-line"));
1443 }
1444
1445 #[test]
1446 fn rejects_unknown_top_key() {
1447 let f = write_tmp("bogus: 1\n");
1448 let err = load(f.path()).unwrap_err();
1449 assert!(err.message.contains("unknown top-level"));
1450 }
1451
1452 #[test]
1453 fn source_root_string_normalises_to_list() {
1454 let f = write_tmp("source_root: ./data\n");
1455 let m = load(f.path()).unwrap();
1456 assert_eq!(m.source_roots, vec!["./data".to_string()]);
1457 }
1458
1459 #[test]
1460 fn source_roots_list_preserved() {
1461 let f = write_tmp("source_roots:\n - ./a\n - ./b\n");
1462 let m = load(f.path()).unwrap();
1463 assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1464 }
1465
1466 #[test]
1467 fn rejects_both_source_root_and_source_roots() {
1468 let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1469 assert!(load(f.path()).unwrap_err().message.contains("not both"));
1470 }
1471
1472 #[test]
1473 fn cypher_tool_parses() {
1474 let f = write_tmp("tools:\n - name: lookup\n cypher: MATCH (n) RETURN n\n");
1475 let m = load(f.path()).unwrap();
1476 assert_eq!(m.tools.len(), 1);
1477 match &m.tools[0] {
1478 ToolSpec::Cypher(t) => {
1479 assert_eq!(t.name, "lookup");
1480 assert!(t.cypher.contains("MATCH"));
1481 }
1482 _ => panic!("expected cypher tool"),
1483 }
1484 }
1485
1486 #[test]
1487 fn python_tool_parses() {
1488 let f =
1489 write_tmp("tools:\n - name: detail\n python: ./tools.py\n function: detail\n");
1490 let m = load(f.path()).unwrap();
1491 match &m.tools[0] {
1492 ToolSpec::Python(t) => {
1493 assert_eq!(t.python, "./tools.py");
1494 assert_eq!(t.function, "detail");
1495 }
1496 _ => panic!("expected python tool"),
1497 }
1498 }
1499
1500 #[test]
1501 fn rejects_tool_with_both_kinds() {
1502 let f = write_tmp(
1503 "tools:\n - name: x\n cypher: 'MATCH (n) RETURN n'\n python: ./t.py\n function: x\n",
1504 );
1505 assert!(load(f.path())
1506 .unwrap_err()
1507 .message
1508 .contains("multiple kinds"));
1509 }
1510
1511 #[test]
1512 fn rejects_tool_with_no_kind() {
1513 let f = write_tmp("tools:\n - name: x\n");
1514 assert!(load(f.path())
1515 .unwrap_err()
1516 .message
1517 .contains("needs exactly one"));
1518 }
1519
1520 #[test]
1521 fn rejects_duplicate_tool_names() {
1522 let f = write_tmp(
1523 "tools:\n - name: same\n cypher: 'MATCH (n) RETURN n'\n - name: same\n cypher: 'MATCH (m) RETURN m'\n",
1524 );
1525 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1526 }
1527
1528 #[test]
1531 fn bundled_override_with_description_parses() {
1532 let f =
1533 write_tmp("tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n");
1534 let m = load(f.path()).unwrap();
1535 assert_eq!(m.tools.len(), 1);
1536 match &m.tools[0] {
1537 ToolSpec::Bundled(b) => {
1538 assert_eq!(b.name, "repo_management");
1539 assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1540 assert!(!b.hidden);
1541 }
1542 _ => panic!("expected bundled override"),
1543 }
1544 }
1545
1546 #[test]
1547 fn bundled_override_with_hidden_parses() {
1548 let f = write_tmp("tools:\n - bundled: ping\n hidden: true\n");
1549 let m = load(f.path()).unwrap();
1550 match &m.tools[0] {
1551 ToolSpec::Bundled(b) => {
1552 assert_eq!(b.name, "ping");
1553 assert!(b.hidden);
1554 assert!(b.description.is_none());
1555 }
1556 _ => panic!("expected bundled override"),
1557 }
1558 }
1559
1560 #[test]
1561 fn bundled_override_alongside_cypher_tools_parses() {
1562 let f = write_tmp(
1563 "tools:\n\
1564 \x20\x20- bundled: cypher_query\n\
1565 \x20\x20\x20\x20description: \"Custom server description\"\n\
1566 \x20\x20- name: lookup\n\
1567 \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1568 );
1569 let m = load(f.path()).unwrap();
1570 assert_eq!(m.tools.len(), 2);
1571 assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1572 assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1573 }
1574
1575 #[test]
1576 fn rejects_bundled_with_cypher_kind() {
1577 let f =
1578 write_tmp("tools:\n - bundled: cypher_query\n cypher: \"MATCH (n) RETURN n\"\n");
1579 let err = load(f.path()).unwrap_err();
1580 assert!(
1581 err.message.contains("multiple kinds"),
1582 "got: {}",
1583 err.message
1584 );
1585 }
1586
1587 #[test]
1588 fn rejects_bundled_with_name_field() {
1589 let f = write_tmp("tools:\n - bundled: ping\n name: ping\n");
1590 let err = load(f.path()).unwrap_err();
1591 assert!(
1592 err.message.contains("cannot set `name:`"),
1593 "got: {}",
1594 err.message
1595 );
1596 }
1597
1598 #[test]
1599 fn rejects_bundled_with_parameters_field() {
1600 let f =
1601 write_tmp("tools:\n - bundled: cypher_query\n parameters:\n type: object\n");
1602 let err = load(f.path()).unwrap_err();
1603 assert!(
1604 err.message.contains("cannot set `parameters:`"),
1605 "got: {}",
1606 err.message
1607 );
1608 }
1609
1610 #[test]
1611 fn rejects_bundled_with_non_bool_hidden() {
1612 let f = write_tmp("tools:\n - bundled: ping\n hidden: yes-please\n");
1613 let err = load(f.path()).unwrap_err();
1614 assert!(
1615 err.message.contains("hidden must be a bool"),
1616 "got: {}",
1617 err.message
1618 );
1619 }
1620
1621 #[test]
1622 fn rejects_hidden_on_cypher_tool() {
1623 let f = write_tmp(
1624 "tools:\n - name: lookup\n cypher: \"MATCH (n) RETURN n\"\n hidden: true\n",
1625 );
1626 let err = load(f.path()).unwrap_err();
1627 assert!(
1628 err.message
1629 .contains("`hidden:` is only valid on `bundled:` override entries"),
1630 "got: {}",
1631 err.message
1632 );
1633 }
1634
1635 #[test]
1636 fn rejects_duplicate_bundled_overrides() {
1637 let f = write_tmp(
1641 "tools:\n - bundled: ping\n hidden: true\n - bundled: ping\n description: \"x\"\n",
1642 );
1643 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1644 }
1645
1646 #[test]
1647 fn rejects_bundled_with_invalid_identifier() {
1648 let f = write_tmp("tools:\n - bundled: \"123-bad\"\n hidden: true\n");
1649 let err = load(f.path()).unwrap_err();
1650 assert!(
1651 err.message.contains("must be a string"),
1652 "got: {}",
1653 err.message
1654 );
1655 }
1656
1657 #[test]
1659 fn bundled_rename_parses_when_valid_identifier() {
1660 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n");
1661 let m = load(f.path()).unwrap();
1662 match &m.tools[0] {
1663 ToolSpec::Bundled(b) => {
1664 assert_eq!(b.name, "cypher_query");
1665 assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1666 assert!(!b.hidden);
1667 assert!(b.description.is_none());
1668 }
1669 _ => panic!("expected bundled override"),
1670 }
1671 }
1672
1673 #[test]
1674 fn bundled_rename_alongside_description_parses() {
1675 let f = write_tmp(
1676 "tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n description: \"Legal-corpus cypher\"\n",
1677 );
1678 let m = load(f.path()).unwrap();
1679 match &m.tools[0] {
1680 ToolSpec::Bundled(b) => {
1681 assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1682 assert_eq!(b.description.as_deref(), Some("Legal-corpus cypher"));
1683 }
1684 _ => panic!("expected bundled override"),
1685 }
1686 }
1687
1688 #[test]
1689 fn bundled_rename_defaults_to_none() {
1690 let f = write_tmp("tools:\n - bundled: cypher_query\n description: \"x\"\n");
1691 let m = load(f.path()).unwrap();
1692 match &m.tools[0] {
1693 ToolSpec::Bundled(b) => assert!(b.rename.is_none()),
1694 _ => panic!("expected bundled override"),
1695 }
1696 }
1697
1698 #[test]
1699 fn rejects_bundled_rename_with_invalid_identifier() {
1700 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: \"123-bad\"\n");
1701 let err = load(f.path()).unwrap_err();
1702 assert!(
1703 err.message.contains("rename must be a valid identifier"),
1704 "got: {}",
1705 err.message
1706 );
1707 }
1708
1709 #[test]
1710 fn rejects_bundled_rename_with_non_string_value() {
1711 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: 42\n");
1712 let err = load(f.path()).unwrap_err();
1713 assert!(
1714 err.message.contains("rename must be a string"),
1715 "got: {}",
1716 err.message
1717 );
1718 }
1719
1720 #[test]
1721 fn bundled_rename_serialises_to_json() {
1722 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n");
1723 let m = load(f.path()).unwrap();
1724 let json = m.to_json();
1725 let tools = json.get("tools").and_then(|t| t.as_array()).unwrap();
1726 let entry = &tools[0];
1727 assert_eq!(entry.get("kind").and_then(|v| v.as_str()), Some("bundled"));
1728 assert_eq!(
1729 entry.get("name").and_then(|v| v.as_str()),
1730 Some("cypher_query")
1731 );
1732 assert_eq!(
1733 entry.get("rename").and_then(|v| v.as_str()),
1734 Some("legal_cypher_query")
1735 );
1736 }
1737
1738 #[test]
1739 fn bundled_override_to_json_shape() {
1740 let f = write_tmp(
1741 "tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n hidden: false\n",
1742 );
1743 let m = load(f.path()).unwrap();
1744 let v = m.to_json();
1745 assert_eq!(v["tools"][0]["kind"], "bundled");
1746 assert_eq!(v["tools"][0]["name"], "repo_management");
1747 assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1748 assert_eq!(v["tools"][0]["hidden"], false);
1749 }
1750
1751 #[test]
1752 fn embedder_parses() {
1753 let f = write_tmp(
1754 "embedder:\n module: ./e.py\n class: GraphEmbedder\n kwargs:\n cooldown: 900\n",
1755 );
1756 let m = load(f.path()).unwrap();
1757 let e = m.embedder.unwrap();
1758 assert_eq!(e.module, "./e.py");
1759 assert_eq!(e.class, "GraphEmbedder");
1760 assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1761 }
1762
1763 #[test]
1764 fn builtins_parses_temp_cleanup() {
1765 let f = write_tmp("builtins:\n save_graph: true\n temp_cleanup: on_overview\n");
1766 let m = load(f.path()).unwrap();
1767 assert!(m.builtins.save_graph);
1768 assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1769 }
1770
1771 #[test]
1772 fn rejects_invalid_temp_cleanup() {
1773 let f = write_tmp("builtins:\n temp_cleanup: nuke\n");
1774 assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1775 }
1776
1777 #[test]
1778 fn allow_embedder_trust_parses() {
1779 let f = write_tmp("trust:\n allow_embedder: true\n");
1780 let m = load(f.path()).unwrap();
1781 assert!(m.trust.allow_embedder);
1782 }
1783
1784 #[test]
1785 fn retired_allow_query_preprocessor_is_rejected_as_unknown() {
1786 let f = write_tmp("trust:\n allow_query_preprocessor: true\n");
1790 let err = load(f.path()).unwrap_err();
1791 assert!(err.message.contains("trust keys"));
1792 assert!(err.message.contains("allow_query_preprocessor"));
1793 }
1794
1795 #[test]
1796 fn find_sibling_works() {
1797 let dir = tempfile::tempdir().unwrap();
1798 let graph = dir.path().join("demo.kgl");
1799 std::fs::write(&graph, b"\x00").unwrap();
1800 let sibling = dir.path().join("demo_mcp.yaml");
1801 std::fs::write(&sibling, "name: x\n").unwrap();
1802 assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1803 }
1804
1805 #[test]
1806 fn workspace_local_parses() {
1807 let f = write_tmp("workspace:\n kind: local\n root: ./src\n watch: true\n");
1808 let m = load(f.path()).unwrap();
1809 let w = m.workspace.unwrap();
1810 assert_eq!(w.kind, WorkspaceKind::Local);
1811 assert_eq!(w.root.as_deref(), Some("./src"));
1812 assert!(w.watch);
1813 }
1814
1815 #[test]
1816 fn workspace_github_default_kind() {
1817 let f = write_tmp("workspace: {}\n");
1818 let m = load(f.path()).unwrap();
1819 let w = m.workspace.unwrap();
1820 assert_eq!(w.kind, WorkspaceKind::Github);
1821 assert!(w.root.is_none());
1822 assert!(!w.watch);
1823 }
1824
1825 #[test]
1826 fn workspace_local_without_root_errors() {
1827 let f = write_tmp("workspace:\n kind: local\n");
1828 let err = load(f.path()).unwrap_err();
1829 assert!(err.message.contains("requires workspace.root"));
1830 }
1831
1832 #[test]
1833 fn workspace_unknown_key_rejected() {
1834 let f = write_tmp("workspace:\n kind: local\n root: ./x\n bogus: 1\n");
1835 let err = load(f.path()).unwrap_err();
1836 assert!(err.message.contains("unknown workspace keys"));
1837 }
1838
1839 #[test]
1840 fn workspace_invalid_kind_rejected() {
1841 let f = write_tmp("workspace:\n kind: docker\n root: ./x\n");
1842 let err = load(f.path()).unwrap_err();
1843 assert!(err.message.contains("workspace.kind"));
1844 }
1845
1846 #[test]
1847 fn workspace_watch_invalid_for_github() {
1848 let f = write_tmp("workspace:\n kind: github\n watch: true\n");
1849 let err = load(f.path()).unwrap_err();
1850 assert!(err.message.contains("watch is only valid"));
1851 }
1852
1853 #[test]
1854 fn extensions_passthrough_parses() {
1855 let f = write_tmp(
1856 "extensions:\n csv_http_server: true\n csv_http_server_dir: temp/\n arbitrary:\n nested: 1\n",
1857 );
1858 let m = load(f.path()).unwrap();
1859 assert_eq!(
1860 m.extensions
1861 .get("csv_http_server")
1862 .and_then(|v| v.as_bool()),
1863 Some(true)
1864 );
1865 assert_eq!(
1866 m.extensions
1867 .get("csv_http_server_dir")
1868 .and_then(|v| v.as_str()),
1869 Some("temp/")
1870 );
1871 assert_eq!(
1873 m.extensions
1874 .get("arbitrary")
1875 .and_then(|v| v.get("nested"))
1876 .and_then(|v| v.as_i64()),
1877 Some(1)
1878 );
1879 }
1880
1881 #[test]
1882 fn extensions_absent_defaults_to_empty() {
1883 let f = write_tmp("name: x\n");
1884 let m = load(f.path()).unwrap();
1885 assert!(m.extensions.is_empty());
1886 }
1887
1888 #[test]
1889 fn extensions_inner_keys_unvalidated() {
1890 let f = write_tmp(
1894 "extensions:\n whatever_kglite_wants: foo\n some_other_consumer: { a: 1, b: 2 }\n",
1895 );
1896 load(f.path()).unwrap();
1897 }
1898
1899 #[test]
1900 fn extensions_must_be_a_mapping() {
1901 let f = write_tmp("extensions: not-a-mapping\n");
1902 let err = load(f.path()).unwrap_err();
1903 assert!(err.message.contains("extensions must be a mapping"));
1904 }
1905
1906 #[test]
1907 fn env_file_key_parses() {
1908 let f = write_tmp("env_file: ../.env\n");
1909 let m = load(f.path()).unwrap();
1910 assert_eq!(m.env_file.as_deref(), Some("../.env"));
1911 }
1912
1913 #[test]
1914 fn env_file_unset_is_none() {
1915 let f = write_tmp("name: Demo\n");
1916 let m = load(f.path()).unwrap();
1917 assert!(m.env_file.is_none());
1918 }
1919
1920 #[test]
1921 fn find_workspace_works() {
1922 let dir = tempfile::tempdir().unwrap();
1923 let manifest = dir.path().join("workspace_mcp.yaml");
1924 std::fs::write(&manifest, "name: ws\n").unwrap();
1925 assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1926 }
1927
1928 #[test]
1929 fn find_workspace_walks_one_level_up_with_applies_to() {
1930 let dir = tempfile::tempdir().unwrap();
1935 let parent = dir.path().join("parent");
1936 std::fs::create_dir(&parent).unwrap();
1937 let manifest = parent.join("workspace_mcp.yaml");
1938 std::fs::write(
1939 &manifest,
1940 "workspace:\n kind: github\n applies_to: ./repos\n",
1941 )
1942 .unwrap();
1943 let repos = parent.join("repos");
1944 std::fs::create_dir(&repos).unwrap();
1945
1946 assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1948
1949 let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1952 assert_eq!(
1953 found.canonicalize().unwrap(),
1954 manifest.canonicalize().unwrap()
1955 );
1956 }
1957
1958 #[test]
1959 fn find_workspace_ignores_parent_without_applies_to() {
1960 let dir = tempfile::tempdir().unwrap();
1966 let parent = dir.path().join("parent");
1967 std::fs::create_dir(&parent).unwrap();
1968 let manifest = parent.join("workspace_mcp.yaml");
1969 std::fs::write(&manifest, "name: not for repos\n").unwrap();
1970 let repos = parent.join("repos");
1971 std::fs::create_dir(&repos).unwrap();
1972
1973 assert_eq!(
1974 find_workspace_manifest(&repos),
1975 None,
1976 "parent manifest without workspace.applies_to must NOT auto-attach"
1977 );
1978 }
1979
1980 #[test]
1981 fn find_workspace_ignores_parent_with_mismatched_applies_to() {
1982 let dir = tempfile::tempdir().unwrap();
1986 let parent = dir.path().join("parent");
1987 std::fs::create_dir(&parent).unwrap();
1988 let manifest = parent.join("workspace_mcp.yaml");
1989 std::fs::write(
1990 &manifest,
1991 "workspace:\n kind: github\n applies_to: ./repos\n",
1992 )
1993 .unwrap();
1994 let other = parent.join("other_dir");
1995 std::fs::create_dir(&other).unwrap();
1996
1997 assert_eq!(
1998 find_workspace_manifest(&other),
1999 None,
2000 "applies_to: ./repos must NOT match --workspace ./other_dir"
2001 );
2002 }
2003
2004 #[test]
2005 fn find_workspace_applies_to_wildcard_matches_any_child() {
2006 let dir = tempfile::tempdir().unwrap();
2010 let parent = dir.path().join("parent");
2011 std::fs::create_dir(&parent).unwrap();
2012 let manifest = parent.join("workspace_mcp.yaml");
2013 std::fs::write(&manifest, "workspace:\n kind: github\n applies_to: '*'\n").unwrap();
2014 for child_name in ["repos", "clones", "totally-different-name"] {
2015 let child = parent.join(child_name);
2016 std::fs::create_dir(&child).unwrap();
2017 let found =
2018 find_workspace_manifest(&child).expect("wildcard should match any direct child");
2019 assert_eq!(
2020 found.canonicalize().unwrap(),
2021 manifest.canonicalize().unwrap(),
2022 "wildcard should match child {child_name:?}"
2023 );
2024 }
2025 }
2026
2027 #[test]
2028 fn find_workspace_applies_to_glob_matches_prefix() {
2029 let dir = tempfile::tempdir().unwrap();
2032 let parent = dir.path().join("parent");
2033 std::fs::create_dir(&parent).unwrap();
2034 let manifest = parent.join("workspace_mcp.yaml");
2035 std::fs::write(
2036 &manifest,
2037 "workspace:\n kind: github\n applies_to: ./prod-*\n",
2038 )
2039 .unwrap();
2040 for child_name in ["prod-api", "prod-web", "prod-"] {
2042 let child = parent.join(child_name);
2043 std::fs::create_dir(&child).unwrap();
2044 assert!(
2045 find_workspace_manifest(&child).is_some(),
2046 "prod-* should match {child_name:?}"
2047 );
2048 }
2049 for child_name in ["test-api", "stage-web", "random"] {
2051 let child = parent.join(child_name);
2052 std::fs::create_dir(&child).unwrap();
2053 assert_eq!(
2054 find_workspace_manifest(&child),
2055 None,
2056 "prod-* should NOT match {child_name:?}"
2057 );
2058 }
2059 }
2060
2061 #[test]
2062 fn find_workspace_applies_to_list_matches_any_entry() {
2063 let dir = tempfile::tempdir().unwrap();
2066 let parent = dir.path().join("parent");
2067 std::fs::create_dir(&parent).unwrap();
2068 let manifest = parent.join("workspace_mcp.yaml");
2069 std::fs::write(
2070 &manifest,
2071 "workspace:\n kind: github\n applies_to:\n - ./repos\n - ./clones\n",
2072 )
2073 .unwrap();
2074 for matching in ["repos", "clones"] {
2075 let child = parent.join(matching);
2076 std::fs::create_dir(&child).unwrap();
2077 assert!(
2078 find_workspace_manifest(&child).is_some(),
2079 "list should match {matching:?}"
2080 );
2081 }
2082 let other = parent.join("scratch");
2083 std::fs::create_dir(&other).unwrap();
2084 assert_eq!(
2085 find_workspace_manifest(&other),
2086 None,
2087 "list with [repos, clones] must NOT match scratch"
2088 );
2089 }
2090
2091 #[test]
2092 fn applies_to_rejects_deep_path_at_parse_time() {
2093 let f = write_tmp("workspace:\n kind: github\n applies_to: ./too/deep/path\n");
2094 let err = load(f.path()).unwrap_err();
2095 assert!(
2096 err.message.contains("must be a single path segment"),
2097 "got: {}",
2098 err.message
2099 );
2100 }
2101
2102 #[test]
2103 fn applies_to_rejects_invalid_glob_at_parse_time() {
2104 let f = write_tmp("workspace:\n kind: github\n applies_to: './[unterminated'\n");
2106 let err = load(f.path()).unwrap_err();
2107 assert!(
2108 err.message.contains("invalid glob pattern"),
2109 "got: {}",
2110 err.message
2111 );
2112 }
2113
2114 #[test]
2115 fn applies_to_rejects_parent_relative() {
2116 let f = write_tmp("workspace:\n kind: github\n applies_to: '..'\n");
2120 let err = load(f.path()).unwrap_err();
2121 assert!(err.message.contains("must not contain `..`"));
2122
2123 let f2 = write_tmp("workspace:\n kind: github\n applies_to: '../up'\n");
2124 let err2 = load(f2.path()).unwrap_err();
2125 assert!(err2.message.contains("must be a single path segment"));
2126 }
2127
2128 #[test]
2129 fn find_workspace_returns_none_when_missing_everywhere() {
2130 let dir = tempfile::tempdir().unwrap();
2131 let child = dir.path().join("child");
2132 std::fs::create_dir(&child).unwrap();
2133 assert_eq!(find_workspace_manifest(&child), None);
2135 }
2136
2137 #[test]
2138 fn find_workspace_primary_wins_over_parent_fallback() {
2139 let dir = tempfile::tempdir().unwrap();
2146 let parent_manifest = dir.path().join("workspace_mcp.yaml");
2147 std::fs::write(
2148 &parent_manifest,
2149 "workspace:\n kind: github\n applies_to: ./repos\n",
2150 )
2151 .unwrap();
2152 let child = dir.path().join("repos");
2153 std::fs::create_dir(&child).unwrap();
2154 let child_manifest = child.join("workspace_mcp.yaml");
2155 std::fs::write(&child_manifest, "name: child\n").unwrap();
2156
2157 let found = find_workspace_manifest(&child).expect("primary should resolve");
2161 assert_eq!(
2162 found.canonicalize().unwrap(),
2163 child_manifest.canonicalize().unwrap(),
2164 "primary location must win when both primary and parent fallback exist"
2165 );
2166 }
2167
2168 #[test]
2169 fn to_json_shape_is_stable() {
2170 let f = write_tmp(
2171 r#"
2172name: KGLite Codebase
2173source_roots: [src, lib]
2174trust:
2175 allow_embedder: true
2176embedder:
2177 module: kglite.embed
2178 class: SentenceTransformerEmbedder
2179builtins:
2180 save_graph: true
2181 temp_cleanup: on_overview
2182"#,
2183 );
2184 let m = load(f.path()).unwrap();
2185 let actual = m.to_json();
2186 let expected = serde_json::json!({
2187 "yaml_path": f.path().display().to_string(),
2188 "name": "KGLite Codebase",
2189 "instructions": null,
2190 "overview_prefix": null,
2191 "source_roots": ["src", "lib"],
2192 "trust": {
2193 "allow_python_tools": false,
2194 "allow_embedder": true,
2195 },
2196 "tools": [],
2197 "embedder": {
2198 "module": "kglite.embed",
2199 "class": "SentenceTransformerEmbedder",
2200 "kwargs": {},
2201 },
2202 "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
2203 "env_file": null,
2204 "workspace": null,
2205 "extensions": {},
2206 "skills": false,
2207 });
2208 assert_eq!(actual, expected);
2209 }
2210
2211 #[test]
2212 fn to_json_round_trips_tools_and_workspace() {
2213 let f = write_tmp(
2214 r#"
2215name: Full Surface
2216source_root: ./src
2217trust:
2218 allow_python_tools: true
2219tools:
2220 - name: nodes_for
2221 cypher: "MATCH (n {name: $name}) RETURN n"
2222 description: "fetch nodes by name"
2223 - name: run_query
2224 python: tools.py
2225 function: run
2226workspace:
2227 kind: local
2228 root: /tmp/ws
2229 watch: true
2230builtins:
2231 save_graph: false
2232env_file: .env.local
2233extensions:
2234 kglite:
2235 flavour: standard
2236"#,
2237 );
2238 let m = load(f.path()).unwrap();
2239 let v = m.to_json();
2240 assert_eq!(v["name"], "Full Surface");
2241 assert_eq!(v["trust"]["allow_python_tools"], true);
2242 assert_eq!(v["workspace"]["kind"], "local");
2243 assert_eq!(v["workspace"]["root"], "/tmp/ws");
2244 assert_eq!(v["workspace"]["watch"], true);
2245 assert_eq!(v["env_file"], ".env.local");
2246 assert_eq!(v["tools"][0]["kind"], "cypher");
2247 assert_eq!(v["tools"][0]["name"], "nodes_for");
2248 assert_eq!(v["tools"][1]["kind"], "python");
2249 assert_eq!(v["tools"][1]["name"], "run_query");
2250 assert_eq!(v["tools"][1]["python"], "tools.py");
2251 assert_eq!(v["tools"][1]["function"], "run");
2252 assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
2253 }
2254
2255 #[test]
2258 fn skills_disabled_by_default() {
2259 let f = write_tmp("name: x\n");
2260 let m = load(f.path()).unwrap();
2261 assert_eq!(m.skills, SkillsSource::Disabled);
2262 assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2263 }
2264
2265 #[test]
2266 fn skills_explicit_false_disabled() {
2267 let f = write_tmp("name: x\nskills: false\n");
2268 let m = load(f.path()).unwrap();
2269 assert_eq!(m.skills, SkillsSource::Disabled);
2270 }
2271
2272 #[test]
2273 fn skills_bool_true_parses_to_single_bundled() {
2274 let f = write_tmp("name: x\nskills: true\n");
2275 let m = load(f.path()).unwrap();
2276 assert_eq!(m.skills, SkillsSource::Sources(vec![SkillSource::Bundled]));
2277 let v = m.to_json();
2279 assert_eq!(v["skills"], serde_json::json!([true]));
2280 }
2281
2282 #[test]
2283 fn skills_path_string_parses_to_single_path() {
2284 let f = write_tmp("name: x\nskills: ./local-skills/\n");
2285 let m = load(f.path()).unwrap();
2286 assert_eq!(
2287 m.skills,
2288 SkillsSource::Sources(vec![SkillSource::Path("./local-skills/".into())])
2289 );
2290 let v = m.to_json();
2292 assert_eq!(v["skills"], serde_json::json!(["./local-skills/"]));
2293 }
2294
2295 #[test]
2296 fn skills_list_polymorphic_parses() {
2297 let f =
2298 write_tmp("name: x\nskills:\n - true\n - ./local-overrides/\n - ~/shared-skills/\n");
2299 let m = load(f.path()).unwrap();
2300 assert_eq!(
2301 m.skills,
2302 SkillsSource::Sources(vec![
2303 SkillSource::Bundled,
2304 SkillSource::Path("./local-overrides/".into()),
2305 SkillSource::Path("~/shared-skills/".into()),
2306 ])
2307 );
2308 let v = m.to_json();
2310 assert_eq!(
2311 v["skills"],
2312 serde_json::json!([true, "./local-overrides/", "~/shared-skills/"])
2313 );
2314 }
2315
2316 #[test]
2317 fn skills_empty_list_parses_as_opt_in_with_no_root_sources() {
2318 let f = write_tmp("name: x\nskills: []\n");
2323 let m = load(f.path()).unwrap();
2324 assert_eq!(m.skills, SkillsSource::Sources(vec![]));
2325 }
2326
2327 #[test]
2328 fn skills_false_in_list_rejected() {
2329 let f = write_tmp("name: x\nskills:\n - false\n");
2330 let err = load(f.path()).unwrap_err();
2331 assert!(
2332 err.message.contains("skills[0]")
2333 && err.message.contains("`false` is not a valid entry"),
2334 "unexpected: {}",
2335 err.message
2336 );
2337 }
2338
2339 #[test]
2340 fn skills_invalid_type_rejected() {
2341 let f = write_tmp("name: x\nskills: 42\n");
2342 let err = load(f.path()).unwrap_err();
2343 assert!(
2344 err.message.contains("skills must be"),
2345 "unexpected: {}",
2346 err.message
2347 );
2348 }
2349
2350 #[test]
2351 fn skills_empty_path_string_rejected() {
2352 let f = write_tmp("name: x\nskills: \"\"\n");
2353 let err = load(f.path()).unwrap_err();
2354 assert!(
2355 err.message.contains("non-empty string"),
2356 "unexpected: {}",
2357 err.message
2358 );
2359 }
2360
2361 #[test]
2362 fn skills_field_is_purely_additive_on_existing_manifests() {
2363 let f = write_tmp(
2368 r#"
2369name: legacy
2370source_roots: [src]
2371trust:
2372 allow_python_tools: true
2373workspace:
2374 kind: github
2375"#,
2376 );
2377 let m = load(f.path()).unwrap();
2378 assert_eq!(m.skills, SkillsSource::Disabled);
2379 assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2380 }
2381}