1#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34 "name",
35 "instructions",
36 "overview_prefix",
37 "source_root",
38 "source_roots",
39 "trust",
40 "tools",
41 "embedder",
42 "builtins",
43 "env_file",
44 "workspace",
45 "extensions",
46 "skills",
47];
48const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
49const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
50const ALLOWED_TRUST_KEYS: &[&str] = &[
51 "allow_python_tools",
52 "allow_embedder",
53 "allow_query_preprocessor",
54];
55const ALLOWED_TOOL_KEYS: &[&str] = &[
56 "name",
57 "description",
58 "parameters",
59 "cypher",
60 "python",
61 "function",
62 "bundled",
63 "hidden",
64 "rename",
68];
69const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
70const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
71const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
72
73#[derive(Debug, Error)]
74#[error("{path}: {message}")]
75pub struct ManifestError {
76 pub path: String,
77 pub message: String,
78}
79
80impl ManifestError {
81 pub fn at(path: &Path, message: impl Into<String>) -> Self {
82 Self {
83 path: path.display().to_string(),
84 message: message.into(),
85 }
86 }
87
88 pub fn bare(message: impl Into<String>) -> Self {
89 Self {
90 path: "<manifest>".to_string(),
91 message: message.into(),
92 }
93 }
94}
95
96#[derive(Debug, Default, Clone)]
97pub struct TrustConfig {
98 pub allow_python_tools: bool,
99 pub allow_embedder: bool,
100 pub allow_query_preprocessor: bool,
107}
108
109#[derive(Debug, Clone)]
110pub enum ToolSpec {
111 Cypher(CypherTool),
112 Python(PythonTool),
113 Bundled(BundledOverride),
129}
130
131impl ToolSpec {
132 pub fn name(&self) -> &str {
133 match self {
134 ToolSpec::Cypher(t) => &t.name,
135 ToolSpec::Python(t) => &t.name,
136 ToolSpec::Bundled(t) => &t.name,
137 }
138 }
139}
140
141#[derive(Debug, Clone)]
142pub struct CypherTool {
143 pub name: String,
144 pub cypher: String,
145 pub description: Option<String>,
146 pub parameters: Option<serde_json::Value>,
147}
148
149#[derive(Debug, Clone)]
150pub struct PythonTool {
151 pub name: String,
152 pub python: String,
153 pub function: String,
154 pub description: Option<String>,
155 pub parameters: Option<serde_json::Value>,
156}
157
158#[derive(Debug, Clone)]
159pub struct BundledOverride {
160 pub name: String,
165 pub description: Option<String>,
169 pub hidden: bool,
173 pub rename: Option<String>,
186}
187
188#[derive(Debug, Clone)]
189pub struct EmbedderConfig {
190 pub module: String,
191 pub class: String,
192 pub kwargs: serde_json::Map<String, serde_json::Value>,
193}
194
195#[derive(Debug, Default, Clone)]
196pub struct BuiltinsConfig {
197 pub save_graph: bool,
198 pub temp_cleanup: TempCleanup,
199}
200
201#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
202pub enum TempCleanup {
203 #[default]
204 Never,
205 OnOverview,
206}
207
208impl TempCleanup {
209 pub fn as_str(&self) -> &'static str {
210 match self {
211 TempCleanup::Never => "never",
212 TempCleanup::OnOverview => "on_overview",
213 }
214 }
215}
216
217#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
218pub enum WorkspaceKind {
219 #[default]
222 Github,
223 Local,
226}
227
228impl WorkspaceKind {
229 pub fn as_str(&self) -> &'static str {
230 match self {
231 WorkspaceKind::Github => "github",
232 WorkspaceKind::Local => "local",
233 }
234 }
235}
236
237#[derive(Debug, Clone, Default)]
238pub struct WorkspaceConfig {
239 pub kind: WorkspaceKind,
240 pub root: Option<String>,
243 pub watch: bool,
246 pub applies_to: Option<AppliesTo>,
273}
274
275#[derive(Debug, Clone, PartialEq, Eq)]
281pub enum AppliesTo {
282 Pattern(String),
287 Patterns(Vec<String>),
289}
290
291#[derive(Debug, Clone, PartialEq, Eq)]
300pub enum SkillSource {
301 Bundled,
305 Path(String),
310}
311
312#[derive(Debug, Clone, Default, PartialEq, Eq)]
324pub enum SkillsSource {
325 #[default]
327 Disabled,
328 Sources(Vec<SkillSource>),
333}
334
335#[derive(Debug, Clone)]
336pub struct Manifest {
337 pub yaml_path: PathBuf,
338 pub name: Option<String>,
339 pub instructions: Option<String>,
340 pub overview_prefix: Option<String>,
341 pub source_roots: Vec<String>,
342 pub trust: TrustConfig,
343 pub tools: Vec<ToolSpec>,
344 pub embedder: Option<EmbedderConfig>,
345 pub builtins: BuiltinsConfig,
346 pub env_file: Option<String>,
350 pub workspace: Option<WorkspaceConfig>,
354 pub extensions: serde_json::Map<String, serde_json::Value>,
364 pub skills: SkillsSource,
376}
377
378impl Manifest {
379 pub fn to_json(&self) -> serde_json::Value {
389 serde_json::json!({
390 "yaml_path": self.yaml_path.display().to_string(),
391 "name": self.name,
392 "instructions": self.instructions,
393 "overview_prefix": self.overview_prefix,
394 "source_roots": self.source_roots,
395 "trust": {
396 "allow_python_tools": self.trust.allow_python_tools,
397 "allow_embedder": self.trust.allow_embedder,
398 "allow_query_preprocessor": self.trust.allow_query_preprocessor,
399 },
400 "tools": self.tools.iter().map(|t| match t {
401 ToolSpec::Cypher(c) => serde_json::json!({
402 "kind": "cypher",
403 "name": c.name,
404 "cypher": c.cypher,
405 "description": c.description,
406 "parameters": c.parameters,
407 }),
408 ToolSpec::Python(p) => serde_json::json!({
409 "kind": "python",
410 "name": p.name,
411 "python": p.python,
412 "function": p.function,
413 "description": p.description,
414 "parameters": p.parameters,
415 }),
416 ToolSpec::Bundled(b) => serde_json::json!({
417 "kind": "bundled",
418 "name": b.name,
419 "description": b.description,
420 "hidden": b.hidden,
421 "rename": b.rename,
422 }),
423 }).collect::<Vec<_>>(),
424 "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
425 "module": e.module,
426 "class": e.class,
427 "kwargs": e.kwargs,
428 })),
429 "builtins": {
430 "save_graph": self.builtins.save_graph,
431 "temp_cleanup": self.builtins.temp_cleanup.as_str(),
432 },
433 "env_file": self.env_file,
434 "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
435 "kind": w.kind.as_str(),
436 "root": w.root,
437 "watch": w.watch,
438 "applies_to": w.applies_to.as_ref().map(|a| match a {
439 AppliesTo::Pattern(p) => serde_json::Value::String(p.clone()),
440 AppliesTo::Patterns(ps) => serde_json::Value::Array(
441 ps.iter().map(|p| serde_json::Value::String(p.clone())).collect()
442 ),
443 }),
444 })),
445 "extensions": self.extensions,
446 "skills": self.skills_to_json(),
447 })
448 }
449
450 fn skills_to_json(&self) -> serde_json::Value {
463 match &self.skills {
464 SkillsSource::Disabled => serde_json::Value::Bool(false),
465 SkillsSource::Sources(sources) => {
466 let arr: Vec<serde_json::Value> = sources
467 .iter()
468 .map(|s| match s {
469 SkillSource::Bundled => serde_json::Value::Bool(true),
470 SkillSource::Path(p) => serde_json::Value::String(p.clone()),
471 })
472 .collect();
473 serde_json::Value::Array(arr)
474 }
475 }
476 }
477}
478
479pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
481 let stem = graph_path.file_stem()?;
482 let parent = graph_path.parent()?;
483 let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
484 if candidate.is_file() {
485 Some(candidate)
486 } else {
487 None
488 }
489}
490
491pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
540 let primary = workspace_dir.join("workspace_mcp.yaml");
541 if primary.is_file() {
542 return Some(primary);
543 }
544 let parent = workspace_dir.parent()?;
547 let workspace_resolved = workspace_dir.canonicalize().ok()?;
548 let parent_resolved = parent.canonicalize().ok()?;
549 if parent_resolved == workspace_resolved {
550 return None;
552 }
553 let fallback = parent.join("workspace_mcp.yaml");
554 if !fallback.is_file() {
555 return None;
556 }
557
558 let manifest = match load(&fallback) {
562 Ok(m) => m,
563 Err(e) => {
564 tracing::warn!(
565 manifest = %fallback.display(),
566 error = %e,
567 "parent-walk manifest exists but failed to parse; ignoring"
568 );
569 return None;
570 }
571 };
572 let declared = manifest
573 .workspace
574 .as_ref()
575 .and_then(|w| w.applies_to.as_ref());
576 let Some(declared_applies_to) = declared else {
577 tracing::info!(
578 manifest = %fallback.display(),
579 "parent-walk manifest does not declare workspace.applies_to; \
580 ignoring (set workspace.applies_to: <pattern> to opt in)"
581 );
582 return None;
583 };
584 let Some(basename) = workspace_resolved.file_name().and_then(|n| n.to_str()) else {
588 return None; };
590 let patterns: Vec<&str> = match declared_applies_to {
591 AppliesTo::Pattern(p) => vec![p.as_str()],
592 AppliesTo::Patterns(ps) => ps.iter().map(String::as_str).collect(),
593 };
594 let matched = patterns.iter().any(|pat| {
595 match globset::Glob::new(pat) {
596 Ok(g) => g.compile_matcher().is_match(basename),
597 Err(_) => {
598 false
601 }
602 }
603 });
604 if matched {
605 tracing::info!(
606 workspace_dir = %workspace_dir.display(),
607 manifest = %fallback.display(),
608 "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
609 );
610 Some(fallback)
611 } else {
612 tracing::info!(
613 workspace_dir = %workspace_resolved.display(),
614 manifest = %fallback.display(),
615 basename = %basename,
616 patterns = ?patterns,
617 "parent-walk manifest's workspace.applies_to does not match \
618 this workspace_dir's basename; ignoring"
619 );
620 None
621 }
622}
623
624pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
626 let text = fs::read_to_string(yaml_path)
627 .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
628 let raw: serde_yaml::Value = serde_yaml::from_str(&text)
629 .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
630 let raw = match raw {
631 serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
632 v => v,
633 };
634 let map = raw
635 .as_mapping()
636 .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
637 build(map, yaml_path)
638}
639
640fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
641 check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
642
643 if raw.contains_key("source_root") && raw.contains_key("source_roots") {
644 return Err(ManifestError::at(
645 yaml_path,
646 "specify either source_root (str) or source_roots (list), not both",
647 ));
648 }
649
650 let mut source_roots: Vec<String> = Vec::new();
651 if let Some(v) = raw.get("source_root") {
652 let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
653 ManifestError::at(yaml_path, "source_root must be a non-empty string")
654 })?;
655 source_roots.push(s.to_string());
656 } else if let Some(v) = raw.get("source_roots") {
657 let seq = v.as_sequence().ok_or_else(|| {
658 ManifestError::at(
659 yaml_path,
660 "source_roots must be a list of non-empty strings",
661 )
662 })?;
663 if seq.is_empty() {
664 return Err(ManifestError::at(
665 yaml_path,
666 "source_roots must be non-empty when set",
667 ));
668 }
669 for item in seq {
670 let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
671 ManifestError::at(
672 yaml_path,
673 "source_roots must be a list of non-empty strings",
674 )
675 })?;
676 source_roots.push(s.to_string());
677 }
678 }
679
680 let trust = build_trust(raw.get("trust"), yaml_path)?;
681 let tools = build_tools(raw.get("tools"), yaml_path)?;
682 let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
683 let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
684 let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
685 let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
686 let skills = build_skills(raw.get("skills"), yaml_path)?;
687
688 Ok(Manifest {
689 yaml_path: yaml_path.to_path_buf(),
690 name: optional_str(raw, "name", yaml_path)?,
691 instructions: optional_str(raw, "instructions", yaml_path)?,
692 overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
693 source_roots,
694 trust,
695 tools,
696 embedder,
697 builtins,
698 env_file: optional_str(raw, "env_file", yaml_path)?,
699 workspace,
700 extensions,
701 skills,
702 })
703}
704
705fn build_skills(
727 raw: Option<&serde_yaml::Value>,
728 yaml_path: &Path,
729) -> Result<SkillsSource, ManifestError> {
730 use serde_yaml::Value;
731
732 match raw {
733 None | Some(Value::Null) | Some(Value::Bool(false)) => Ok(SkillsSource::Disabled),
734 Some(Value::Bool(true)) => Ok(SkillsSource::Sources(vec![SkillSource::Bundled])),
735 Some(Value::String(s)) => {
736 if s.is_empty() {
737 return Err(ManifestError::at(
738 yaml_path,
739 "skills: path must be a non-empty string",
740 ));
741 }
742 Ok(SkillsSource::Sources(vec![SkillSource::Path(s.clone())]))
743 }
744 Some(Value::Sequence(seq)) => {
745 let mut sources = Vec::with_capacity(seq.len());
746 for (idx, item) in seq.iter().enumerate() {
747 match item {
748 Value::Bool(true) => sources.push(SkillSource::Bundled),
749 Value::Bool(false) => {
750 return Err(ManifestError::at(
751 yaml_path,
752 format!(
753 "skills[{idx}]: `false` is not a valid entry in a `skills:` \
754 list (only `true` for bundled, or a path string)"
755 ),
756 ));
757 }
758 Value::String(s) => {
759 if s.is_empty() {
760 return Err(ManifestError::at(
761 yaml_path,
762 format!("skills[{idx}]: path must be a non-empty string"),
763 ));
764 }
765 sources.push(SkillSource::Path(s.clone()));
766 }
767 _ => {
768 return Err(ManifestError::at(
769 yaml_path,
770 format!(
771 "skills[{idx}]: each entry must be `true` (for bundled) or a \
772 path string"
773 ),
774 ));
775 }
776 }
777 }
778 Ok(SkillsSource::Sources(sources))
779 }
780 Some(_) => Err(ManifestError::at(
781 yaml_path,
782 "skills must be `false`, `true`, a path string, or a list of \
783 (true | path string) entries",
784 )),
785 }
786}
787
788fn build_extensions(
789 raw: Option<&serde_yaml::Value>,
790 yaml_path: &Path,
791) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
792 let Some(raw) = raw else {
793 return Ok(serde_json::Map::new());
794 };
795 if matches!(raw, serde_yaml::Value::Null) {
796 return Ok(serde_json::Map::new());
797 }
798 if !raw.is_mapping() {
799 return Err(ManifestError::at(
800 yaml_path,
801 "extensions must be a mapping (downstream-binary-specific keys)",
802 ));
803 }
804 match yaml_to_json(raw.clone())? {
805 serde_json::Value::Object(o) => Ok(o),
806 _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
807 }
808}
809
810fn build_workspace(
811 raw: Option<&serde_yaml::Value>,
812 yaml_path: &Path,
813) -> Result<Option<WorkspaceConfig>, ManifestError> {
814 let Some(raw) = raw else { return Ok(None) };
815 if matches!(raw, serde_yaml::Value::Null) {
816 return Ok(None);
817 }
818 let map = raw
819 .as_mapping()
820 .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
821 check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
822 let kind = match map.get("kind") {
823 None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
824 Some(serde_yaml::Value::String(s)) => match s.as_str() {
825 "github" => WorkspaceKind::Github,
826 "local" => WorkspaceKind::Local,
827 other => {
828 return Err(ManifestError::at(
829 yaml_path,
830 format!(
831 "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
832 ),
833 ));
834 }
835 },
836 Some(_) => {
837 return Err(ManifestError::at(
838 yaml_path,
839 format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
840 ))
841 }
842 };
843 let root = match map.get("root") {
844 None | Some(serde_yaml::Value::Null) => None,
845 Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
846 _ => {
847 return Err(ManifestError::at(
848 yaml_path,
849 "workspace.root must be a non-empty string",
850 ))
851 }
852 };
853 let watch = match map.get("watch") {
854 None | Some(serde_yaml::Value::Null) => false,
855 Some(serde_yaml::Value::Bool(b)) => *b,
856 Some(_) => {
857 return Err(ManifestError::at(
858 yaml_path,
859 "workspace.watch must be a bool",
860 ))
861 }
862 };
863 let applies_to =
864 match map.get("applies_to") {
865 None | Some(serde_yaml::Value::Null) => None,
866 Some(serde_yaml::Value::String(s)) => {
867 Some(AppliesTo::Pattern(parse_applies_to_pattern(s, yaml_path)?))
868 }
869 Some(serde_yaml::Value::Sequence(seq)) => {
870 if seq.is_empty() {
871 return Err(ManifestError::at(
872 yaml_path,
873 "workspace.applies_to: list must contain at least one pattern",
874 ));
875 }
876 let mut patterns = Vec::with_capacity(seq.len());
877 for (i, item) in seq.iter().enumerate() {
878 let s = item.as_str().ok_or_else(|| {
879 ManifestError::at(
880 yaml_path,
881 format!("workspace.applies_to[{i}] must be a string"),
882 )
883 })?;
884 let cleaned = parse_applies_to_pattern(s, yaml_path).map_err(|e| {
885 ManifestError::at(
886 yaml_path,
887 format!("workspace.applies_to[{i}]: {}", e.message),
888 )
889 })?;
890 patterns.push(cleaned);
891 }
892 Some(AppliesTo::Patterns(patterns))
893 }
894 _ => return Err(ManifestError::at(
895 yaml_path,
896 "workspace.applies_to must be a non-empty string (a pattern) or a list of patterns",
897 )),
898 };
899 if kind == WorkspaceKind::Local && root.is_none() {
900 return Err(ManifestError::at(
901 yaml_path,
902 "workspace.kind: local requires workspace.root to be set",
903 ));
904 }
905 if kind == WorkspaceKind::Github && watch {
906 return Err(ManifestError::at(
907 yaml_path,
908 "workspace.watch is only valid with workspace.kind: local",
909 ));
910 }
911 Ok(Some(WorkspaceConfig {
912 kind,
913 root,
914 watch,
915 applies_to,
916 }))
917}
918
919fn parse_applies_to_pattern(raw: &str, yaml_path: &Path) -> Result<String, ManifestError> {
928 let trimmed = raw.trim();
929 if trimmed.is_empty() {
930 return Err(ManifestError::at(
931 yaml_path,
932 "workspace.applies_to: pattern must not be empty",
933 ));
934 }
935 let stripped = trimmed.strip_prefix("./").unwrap_or(trimmed);
939 if stripped.is_empty() {
940 return Err(ManifestError::at(
941 yaml_path,
942 "workspace.applies_to: pattern must not be empty after stripping `./` prefix",
943 ));
944 }
945 if stripped.contains('/') {
946 return Err(ManifestError::at(
947 yaml_path,
948 format!(
949 "workspace.applies_to: pattern {raw:?} must be a single path segment \
950 (no embedded `/`) — parent-walk discovery is bounded to one level"
951 ),
952 ));
953 }
954 if stripped == ".." || stripped.starts_with("../") {
955 return Err(ManifestError::at(
956 yaml_path,
957 format!("workspace.applies_to: pattern {raw:?} must not contain `..`"),
958 ));
959 }
960 if Path::new(stripped).is_absolute() {
961 return Err(ManifestError::at(
962 yaml_path,
963 format!("workspace.applies_to: pattern {raw:?} must be relative, not absolute"),
964 ));
965 }
966 globset::Glob::new(stripped).map_err(|e| {
970 ManifestError::at(
971 yaml_path,
972 format!("workspace.applies_to: invalid glob pattern {raw:?}: {e}"),
973 )
974 })?;
975 Ok(stripped.to_string())
976}
977
978fn check_keys(
979 map: &serde_yaml::Mapping,
980 allowed: &[&str],
981 label: &str,
982 yaml_path: &Path,
983) -> Result<(), ManifestError> {
984 let mut unknown: Vec<String> = Vec::new();
985 for (k, _) in map {
986 let key = k.as_str().unwrap_or("<non-string-key>");
987 if !allowed.contains(&key) {
988 unknown.push(key.to_string());
989 }
990 }
991 if !unknown.is_empty() {
992 unknown.sort();
993 return Err(ManifestError::at(
994 yaml_path,
995 format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
996 ));
997 }
998 Ok(())
999}
1000
1001fn optional_str(
1002 raw: &serde_yaml::Mapping,
1003 key: &str,
1004 yaml_path: &Path,
1005) -> Result<Option<String>, ManifestError> {
1006 match raw.get(key) {
1007 None | Some(serde_yaml::Value::Null) => Ok(None),
1008 Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
1009 Some(_) => Err(ManifestError::at(
1010 yaml_path,
1011 format!("{key} must be a string"),
1012 )),
1013 }
1014}
1015
1016fn build_trust(
1017 raw: Option<&serde_yaml::Value>,
1018 yaml_path: &Path,
1019) -> Result<TrustConfig, ManifestError> {
1020 let Some(raw) = raw else {
1021 return Ok(TrustConfig::default());
1022 };
1023 let map = raw
1024 .as_mapping()
1025 .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
1026 check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
1027 let mut cfg = TrustConfig::default();
1028 if let Some(v) = map.get("allow_python_tools") {
1029 cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
1030 ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
1031 })?;
1032 }
1033 if let Some(v) = map.get("allow_embedder") {
1034 cfg.allow_embedder = v
1035 .as_bool()
1036 .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
1037 }
1038 if let Some(v) = map.get("allow_query_preprocessor") {
1039 cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
1040 ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
1041 })?;
1042 }
1043 Ok(cfg)
1044}
1045
1046fn build_tools(
1047 raw: Option<&serde_yaml::Value>,
1048 yaml_path: &Path,
1049) -> Result<Vec<ToolSpec>, ManifestError> {
1050 let Some(raw) = raw else {
1051 return Ok(Vec::new());
1052 };
1053 let seq = raw
1054 .as_sequence()
1055 .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
1056 let mut tools: Vec<ToolSpec> = Vec::new();
1057 let mut seen: BTreeMap<String, ()> = BTreeMap::new();
1058 for (i, entry) in seq.iter().enumerate() {
1059 let tool = build_tool(entry, i, yaml_path)?;
1060 let name = tool.name().to_string();
1061 if seen.insert(name.clone(), ()).is_some() {
1062 return Err(ManifestError::at(
1063 yaml_path,
1064 format!("duplicate tool name: {name:?}"),
1065 ));
1066 }
1067 tools.push(tool);
1068 }
1069 Ok(tools)
1070}
1071
1072fn build_tool(
1073 entry: &serde_yaml::Value,
1074 idx: usize,
1075 yaml_path: &Path,
1076) -> Result<ToolSpec, ManifestError> {
1077 let map = entry
1078 .as_mapping()
1079 .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
1080 check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
1081
1082 let has_cypher = map.contains_key("cypher");
1087 let has_python = map.contains_key("python");
1088 let has_bundled = map.contains_key("bundled");
1089 let kinds_present: Vec<&str> = [
1090 ("cypher", has_cypher),
1091 ("python", has_python),
1092 ("bundled", has_bundled),
1093 ]
1094 .into_iter()
1095 .filter(|(_, p)| *p)
1096 .map(|(k, _)| k)
1097 .collect();
1098 if kinds_present.is_empty() {
1099 return Err(ManifestError::at(
1100 yaml_path,
1101 format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
1102 ));
1103 }
1104 if kinds_present.len() > 1 {
1105 return Err(ManifestError::at(
1106 yaml_path,
1107 format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
1108 ));
1109 }
1110
1111 if has_bundled {
1116 return build_bundled_override(map, idx, yaml_path);
1117 }
1118
1119 let name = map
1120 .get("name")
1121 .and_then(|v| v.as_str())
1122 .filter(|s| valid_identifier(s))
1123 .ok_or_else(|| {
1124 ManifestError::at(
1125 yaml_path,
1126 format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
1127 )
1128 })?
1129 .to_string();
1130
1131 if map.contains_key("hidden") {
1135 return Err(ManifestError::at(
1136 yaml_path,
1137 format!(
1138 "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
1139 ),
1140 ));
1141 }
1142
1143 let description = match map.get("description") {
1144 None | Some(serde_yaml::Value::Null) => None,
1145 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1146 Some(_) => {
1147 return Err(ManifestError::at(
1148 yaml_path,
1149 format!("tools[{idx}] ({name:?}).description must be a string"),
1150 ))
1151 }
1152 };
1153
1154 let parameters = match map.get("parameters") {
1155 None | Some(serde_yaml::Value::Null) => None,
1156 Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
1157 Some(_) => {
1158 return Err(ManifestError::at(
1159 yaml_path,
1160 format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
1161 ))
1162 }
1163 };
1164
1165 if has_cypher {
1166 let cypher = map
1167 .get("cypher")
1168 .and_then(|v| v.as_str())
1169 .filter(|s| !s.trim().is_empty())
1170 .ok_or_else(|| {
1171 ManifestError::at(
1172 yaml_path,
1173 format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
1174 )
1175 })?
1176 .to_string();
1177 return Ok(ToolSpec::Cypher(CypherTool {
1178 name,
1179 cypher,
1180 description,
1181 parameters,
1182 }));
1183 }
1184
1185 let python = map
1187 .get("python")
1188 .and_then(|v| v.as_str())
1189 .filter(|s| !s.is_empty())
1190 .ok_or_else(|| {
1191 ManifestError::at(
1192 yaml_path,
1193 format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
1194 )
1195 })?
1196 .to_string();
1197 let function = map
1198 .get("function")
1199 .and_then(|v| v.as_str())
1200 .filter(|s| valid_identifier(s))
1201 .ok_or_else(|| {
1202 ManifestError::at(
1203 yaml_path,
1204 format!(
1205 "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
1206 ),
1207 )
1208 })?
1209 .to_string();
1210 Ok(ToolSpec::Python(PythonTool {
1211 name,
1212 python,
1213 function,
1214 description,
1215 parameters,
1216 }))
1217}
1218
1219fn build_bundled_override(
1223 map: &serde_yaml::Mapping,
1224 idx: usize,
1225 yaml_path: &Path,
1226) -> Result<ToolSpec, ManifestError> {
1227 let name = map
1228 .get("bundled")
1229 .and_then(|v| v.as_str())
1230 .filter(|s| valid_identifier(s))
1231 .ok_or_else(|| {
1232 ManifestError::at(
1233 yaml_path,
1234 format!(
1235 "tools[{idx}] `bundled:` must be a string naming a bundled tool \
1236 (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
1237 ),
1238 )
1239 })?
1240 .to_string();
1241
1242 for forbidden in ["name", "parameters", "function"] {
1247 if map.contains_key(forbidden) {
1248 return Err(ManifestError::at(
1249 yaml_path,
1250 format!(
1251 "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
1252 (only `description:`, `hidden:`, and `rename:` are permitted on overrides)"
1253 ),
1254 ));
1255 }
1256 }
1257
1258 let description = match map.get("description") {
1259 None | Some(serde_yaml::Value::Null) => None,
1260 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1261 Some(_) => {
1262 return Err(ManifestError::at(
1263 yaml_path,
1264 format!("tools[{idx}] bundled override {name:?}.description must be a string"),
1265 ))
1266 }
1267 };
1268
1269 let hidden = match map.get("hidden") {
1270 None | Some(serde_yaml::Value::Null) => false,
1271 Some(serde_yaml::Value::Bool(b)) => *b,
1272 Some(_) => {
1273 return Err(ManifestError::at(
1274 yaml_path,
1275 format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
1276 ))
1277 }
1278 };
1279
1280 let rename = match map.get("rename") {
1285 None | Some(serde_yaml::Value::Null) => None,
1286 Some(serde_yaml::Value::String(s)) => {
1287 if !valid_identifier(s) {
1288 return Err(ManifestError::at(
1289 yaml_path,
1290 format!(
1291 "tools[{idx}] bundled override {name:?}.rename must be a valid identifier \
1292 (^[a-zA-Z_][a-zA-Z0-9_]*$), got {s:?}"
1293 ),
1294 ));
1295 }
1296 Some(s.clone())
1297 }
1298 Some(_) => {
1299 return Err(ManifestError::at(
1300 yaml_path,
1301 format!("tools[{idx}] bundled override {name:?}.rename must be a string"),
1302 ))
1303 }
1304 };
1305
1306 Ok(ToolSpec::Bundled(BundledOverride {
1307 name,
1308 description,
1309 hidden,
1310 rename,
1311 }))
1312}
1313
1314fn build_embedder(
1315 raw: Option<&serde_yaml::Value>,
1316 yaml_path: &Path,
1317) -> Result<Option<EmbedderConfig>, ManifestError> {
1318 let Some(raw) = raw else { return Ok(None) };
1319 if matches!(raw, serde_yaml::Value::Null) {
1320 return Ok(None);
1321 }
1322 let map = raw
1323 .as_mapping()
1324 .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
1325 check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
1326 let module = map
1327 .get("module")
1328 .and_then(|v| v.as_str())
1329 .filter(|s| !s.is_empty())
1330 .ok_or_else(|| {
1331 ManifestError::at(
1332 yaml_path,
1333 "embedder.module must be a non-empty string (path or dotted name)",
1334 )
1335 })?
1336 .to_string();
1337 let class = map
1338 .get("class")
1339 .and_then(|v| v.as_str())
1340 .filter(|s| valid_identifier(s))
1341 .ok_or_else(|| {
1342 ManifestError::at(
1343 yaml_path,
1344 "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1345 )
1346 })?
1347 .to_string();
1348 let kwargs = match map.get("kwargs") {
1349 None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1350 Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1351 serde_json::Value::Object(o) => o,
1352 _ => {
1353 return Err(ManifestError::at(
1354 yaml_path,
1355 "embedder.kwargs must be a mapping",
1356 ))
1357 }
1358 },
1359 Some(_) => {
1360 return Err(ManifestError::at(
1361 yaml_path,
1362 "embedder.kwargs must be a mapping",
1363 ))
1364 }
1365 };
1366 Ok(Some(EmbedderConfig {
1367 module,
1368 class,
1369 kwargs,
1370 }))
1371}
1372
1373fn build_builtins(
1374 raw: Option<&serde_yaml::Value>,
1375 yaml_path: &Path,
1376) -> Result<BuiltinsConfig, ManifestError> {
1377 let Some(raw) = raw else {
1378 return Ok(BuiltinsConfig::default());
1379 };
1380 if matches!(raw, serde_yaml::Value::Null) {
1381 return Ok(BuiltinsConfig::default());
1382 }
1383 let map = raw
1384 .as_mapping()
1385 .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1386 check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1387 let mut cfg = BuiltinsConfig::default();
1388 if let Some(v) = map.get("save_graph") {
1389 cfg.save_graph = v
1390 .as_bool()
1391 .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1392 }
1393 if let Some(v) = map.get("temp_cleanup") {
1394 let s = v.as_str().ok_or_else(|| {
1395 ManifestError::at(
1396 yaml_path,
1397 format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1398 )
1399 })?;
1400 cfg.temp_cleanup = match s {
1401 "never" => TempCleanup::Never,
1402 "on_overview" => TempCleanup::OnOverview,
1403 other => {
1404 return Err(ManifestError::at(
1405 yaml_path,
1406 format!(
1407 "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1408 ),
1409 ))
1410 }
1411 };
1412 }
1413 Ok(cfg)
1414}
1415
1416fn valid_identifier(s: &str) -> bool {
1417 let mut chars = s.chars();
1418 match chars.next() {
1419 Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1420 _ => return false,
1421 }
1422 chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1423}
1424
1425fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1426 serde_json::to_value(&v)
1427 .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1428}
1429
1430#[derive(Debug, Deserialize)]
1431struct _Reserved;
1432
1433#[cfg(test)]
1434mod tests {
1435 use super::*;
1436
1437 fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1438 let mut f = tempfile::NamedTempFile::new().unwrap();
1439 std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1440 f
1441 }
1442
1443 #[test]
1444 fn loads_minimal_empty_manifest() {
1445 let f = write_tmp("");
1446 let m = load(f.path()).unwrap();
1447 assert_eq!(m.tools.len(), 0);
1448 assert_eq!(m.source_roots.len(), 0);
1449 assert!(!m.trust.allow_python_tools);
1450 assert!(!m.trust.allow_embedder);
1451 assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1452 }
1453
1454 #[test]
1455 fn loads_name_and_instructions() {
1456 let f = write_tmp("name: Demo\ninstructions: |\n multi-line\n block\n");
1457 let m = load(f.path()).unwrap();
1458 assert_eq!(m.name.as_deref(), Some("Demo"));
1459 assert!(m.instructions.unwrap().contains("multi-line"));
1460 }
1461
1462 #[test]
1463 fn rejects_unknown_top_key() {
1464 let f = write_tmp("bogus: 1\n");
1465 let err = load(f.path()).unwrap_err();
1466 assert!(err.message.contains("unknown top-level"));
1467 }
1468
1469 #[test]
1470 fn source_root_string_normalises_to_list() {
1471 let f = write_tmp("source_root: ./data\n");
1472 let m = load(f.path()).unwrap();
1473 assert_eq!(m.source_roots, vec!["./data".to_string()]);
1474 }
1475
1476 #[test]
1477 fn source_roots_list_preserved() {
1478 let f = write_tmp("source_roots:\n - ./a\n - ./b\n");
1479 let m = load(f.path()).unwrap();
1480 assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1481 }
1482
1483 #[test]
1484 fn rejects_both_source_root_and_source_roots() {
1485 let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1486 assert!(load(f.path()).unwrap_err().message.contains("not both"));
1487 }
1488
1489 #[test]
1490 fn cypher_tool_parses() {
1491 let f = write_tmp("tools:\n - name: lookup\n cypher: MATCH (n) RETURN n\n");
1492 let m = load(f.path()).unwrap();
1493 assert_eq!(m.tools.len(), 1);
1494 match &m.tools[0] {
1495 ToolSpec::Cypher(t) => {
1496 assert_eq!(t.name, "lookup");
1497 assert!(t.cypher.contains("MATCH"));
1498 }
1499 _ => panic!("expected cypher tool"),
1500 }
1501 }
1502
1503 #[test]
1504 fn python_tool_parses() {
1505 let f =
1506 write_tmp("tools:\n - name: detail\n python: ./tools.py\n function: detail\n");
1507 let m = load(f.path()).unwrap();
1508 match &m.tools[0] {
1509 ToolSpec::Python(t) => {
1510 assert_eq!(t.python, "./tools.py");
1511 assert_eq!(t.function, "detail");
1512 }
1513 _ => panic!("expected python tool"),
1514 }
1515 }
1516
1517 #[test]
1518 fn rejects_tool_with_both_kinds() {
1519 let f = write_tmp(
1520 "tools:\n - name: x\n cypher: 'MATCH (n) RETURN n'\n python: ./t.py\n function: x\n",
1521 );
1522 assert!(load(f.path())
1523 .unwrap_err()
1524 .message
1525 .contains("multiple kinds"));
1526 }
1527
1528 #[test]
1529 fn rejects_tool_with_no_kind() {
1530 let f = write_tmp("tools:\n - name: x\n");
1531 assert!(load(f.path())
1532 .unwrap_err()
1533 .message
1534 .contains("needs exactly one"));
1535 }
1536
1537 #[test]
1538 fn rejects_duplicate_tool_names() {
1539 let f = write_tmp(
1540 "tools:\n - name: same\n cypher: 'MATCH (n) RETURN n'\n - name: same\n cypher: 'MATCH (m) RETURN m'\n",
1541 );
1542 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1543 }
1544
1545 #[test]
1548 fn bundled_override_with_description_parses() {
1549 let f =
1550 write_tmp("tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n");
1551 let m = load(f.path()).unwrap();
1552 assert_eq!(m.tools.len(), 1);
1553 match &m.tools[0] {
1554 ToolSpec::Bundled(b) => {
1555 assert_eq!(b.name, "repo_management");
1556 assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1557 assert!(!b.hidden);
1558 }
1559 _ => panic!("expected bundled override"),
1560 }
1561 }
1562
1563 #[test]
1564 fn bundled_override_with_hidden_parses() {
1565 let f = write_tmp("tools:\n - bundled: ping\n hidden: true\n");
1566 let m = load(f.path()).unwrap();
1567 match &m.tools[0] {
1568 ToolSpec::Bundled(b) => {
1569 assert_eq!(b.name, "ping");
1570 assert!(b.hidden);
1571 assert!(b.description.is_none());
1572 }
1573 _ => panic!("expected bundled override"),
1574 }
1575 }
1576
1577 #[test]
1578 fn bundled_override_alongside_cypher_tools_parses() {
1579 let f = write_tmp(
1580 "tools:\n\
1581 \x20\x20- bundled: cypher_query\n\
1582 \x20\x20\x20\x20description: \"Custom server description\"\n\
1583 \x20\x20- name: lookup\n\
1584 \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1585 );
1586 let m = load(f.path()).unwrap();
1587 assert_eq!(m.tools.len(), 2);
1588 assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1589 assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1590 }
1591
1592 #[test]
1593 fn rejects_bundled_with_cypher_kind() {
1594 let f =
1595 write_tmp("tools:\n - bundled: cypher_query\n cypher: \"MATCH (n) RETURN n\"\n");
1596 let err = load(f.path()).unwrap_err();
1597 assert!(
1598 err.message.contains("multiple kinds"),
1599 "got: {}",
1600 err.message
1601 );
1602 }
1603
1604 #[test]
1605 fn rejects_bundled_with_name_field() {
1606 let f = write_tmp("tools:\n - bundled: ping\n name: ping\n");
1607 let err = load(f.path()).unwrap_err();
1608 assert!(
1609 err.message.contains("cannot set `name:`"),
1610 "got: {}",
1611 err.message
1612 );
1613 }
1614
1615 #[test]
1616 fn rejects_bundled_with_parameters_field() {
1617 let f =
1618 write_tmp("tools:\n - bundled: cypher_query\n parameters:\n type: object\n");
1619 let err = load(f.path()).unwrap_err();
1620 assert!(
1621 err.message.contains("cannot set `parameters:`"),
1622 "got: {}",
1623 err.message
1624 );
1625 }
1626
1627 #[test]
1628 fn rejects_bundled_with_non_bool_hidden() {
1629 let f = write_tmp("tools:\n - bundled: ping\n hidden: yes-please\n");
1630 let err = load(f.path()).unwrap_err();
1631 assert!(
1632 err.message.contains("hidden must be a bool"),
1633 "got: {}",
1634 err.message
1635 );
1636 }
1637
1638 #[test]
1639 fn rejects_hidden_on_cypher_tool() {
1640 let f = write_tmp(
1641 "tools:\n - name: lookup\n cypher: \"MATCH (n) RETURN n\"\n hidden: true\n",
1642 );
1643 let err = load(f.path()).unwrap_err();
1644 assert!(
1645 err.message
1646 .contains("`hidden:` is only valid on `bundled:` override entries"),
1647 "got: {}",
1648 err.message
1649 );
1650 }
1651
1652 #[test]
1653 fn rejects_duplicate_bundled_overrides() {
1654 let f = write_tmp(
1658 "tools:\n - bundled: ping\n hidden: true\n - bundled: ping\n description: \"x\"\n",
1659 );
1660 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1661 }
1662
1663 #[test]
1664 fn rejects_bundled_with_invalid_identifier() {
1665 let f = write_tmp("tools:\n - bundled: \"123-bad\"\n hidden: true\n");
1666 let err = load(f.path()).unwrap_err();
1667 assert!(
1668 err.message.contains("must be a string"),
1669 "got: {}",
1670 err.message
1671 );
1672 }
1673
1674 #[test]
1676 fn bundled_rename_parses_when_valid_identifier() {
1677 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n");
1678 let m = load(f.path()).unwrap();
1679 match &m.tools[0] {
1680 ToolSpec::Bundled(b) => {
1681 assert_eq!(b.name, "cypher_query");
1682 assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1683 assert!(!b.hidden);
1684 assert!(b.description.is_none());
1685 }
1686 _ => panic!("expected bundled override"),
1687 }
1688 }
1689
1690 #[test]
1691 fn bundled_rename_alongside_description_parses() {
1692 let f = write_tmp(
1693 "tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n description: \"Legal-corpus cypher\"\n",
1694 );
1695 let m = load(f.path()).unwrap();
1696 match &m.tools[0] {
1697 ToolSpec::Bundled(b) => {
1698 assert_eq!(b.rename.as_deref(), Some("legal_cypher_query"));
1699 assert_eq!(b.description.as_deref(), Some("Legal-corpus cypher"));
1700 }
1701 _ => panic!("expected bundled override"),
1702 }
1703 }
1704
1705 #[test]
1706 fn bundled_rename_defaults_to_none() {
1707 let f = write_tmp("tools:\n - bundled: cypher_query\n description: \"x\"\n");
1708 let m = load(f.path()).unwrap();
1709 match &m.tools[0] {
1710 ToolSpec::Bundled(b) => assert!(b.rename.is_none()),
1711 _ => panic!("expected bundled override"),
1712 }
1713 }
1714
1715 #[test]
1716 fn rejects_bundled_rename_with_invalid_identifier() {
1717 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: \"123-bad\"\n");
1718 let err = load(f.path()).unwrap_err();
1719 assert!(
1720 err.message.contains("rename must be a valid identifier"),
1721 "got: {}",
1722 err.message
1723 );
1724 }
1725
1726 #[test]
1727 fn rejects_bundled_rename_with_non_string_value() {
1728 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: 42\n");
1729 let err = load(f.path()).unwrap_err();
1730 assert!(
1731 err.message.contains("rename must be a string"),
1732 "got: {}",
1733 err.message
1734 );
1735 }
1736
1737 #[test]
1738 fn bundled_rename_serialises_to_json() {
1739 let f = write_tmp("tools:\n - bundled: cypher_query\n rename: legal_cypher_query\n");
1740 let m = load(f.path()).unwrap();
1741 let json = m.to_json();
1742 let tools = json.get("tools").and_then(|t| t.as_array()).unwrap();
1743 let entry = &tools[0];
1744 assert_eq!(entry.get("kind").and_then(|v| v.as_str()), Some("bundled"));
1745 assert_eq!(
1746 entry.get("name").and_then(|v| v.as_str()),
1747 Some("cypher_query")
1748 );
1749 assert_eq!(
1750 entry.get("rename").and_then(|v| v.as_str()),
1751 Some("legal_cypher_query")
1752 );
1753 }
1754
1755 #[test]
1756 fn bundled_override_to_json_shape() {
1757 let f = write_tmp(
1758 "tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n hidden: false\n",
1759 );
1760 let m = load(f.path()).unwrap();
1761 let v = m.to_json();
1762 assert_eq!(v["tools"][0]["kind"], "bundled");
1763 assert_eq!(v["tools"][0]["name"], "repo_management");
1764 assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1765 assert_eq!(v["tools"][0]["hidden"], false);
1766 }
1767
1768 #[test]
1769 fn embedder_parses() {
1770 let f = write_tmp(
1771 "embedder:\n module: ./e.py\n class: GraphEmbedder\n kwargs:\n cooldown: 900\n",
1772 );
1773 let m = load(f.path()).unwrap();
1774 let e = m.embedder.unwrap();
1775 assert_eq!(e.module, "./e.py");
1776 assert_eq!(e.class, "GraphEmbedder");
1777 assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1778 }
1779
1780 #[test]
1781 fn builtins_parses_temp_cleanup() {
1782 let f = write_tmp("builtins:\n save_graph: true\n temp_cleanup: on_overview\n");
1783 let m = load(f.path()).unwrap();
1784 assert!(m.builtins.save_graph);
1785 assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1786 }
1787
1788 #[test]
1789 fn rejects_invalid_temp_cleanup() {
1790 let f = write_tmp("builtins:\n temp_cleanup: nuke\n");
1791 assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1792 }
1793
1794 #[test]
1795 fn allow_embedder_trust_parses() {
1796 let f = write_tmp("trust:\n allow_embedder: true\n");
1797 let m = load(f.path()).unwrap();
1798 assert!(m.trust.allow_embedder);
1799 }
1800
1801 #[test]
1802 fn allow_query_preprocessor_trust_parses() {
1803 let f = write_tmp("trust:\n allow_query_preprocessor: true\n");
1804 let m = load(f.path()).unwrap();
1805 assert!(m.trust.allow_query_preprocessor);
1806 assert!(!m.trust.allow_embedder);
1807 assert!(!m.trust.allow_python_tools);
1808 }
1809
1810 #[test]
1811 fn allow_query_preprocessor_rejects_non_bool() {
1812 let f = write_tmp("trust:\n allow_query_preprocessor: \"yes\"\n");
1813 let err = load(f.path()).unwrap_err();
1814 assert!(err
1815 .message
1816 .contains("allow_query_preprocessor must be a bool"));
1817 }
1818
1819 #[test]
1820 fn find_sibling_works() {
1821 let dir = tempfile::tempdir().unwrap();
1822 let graph = dir.path().join("demo.kgl");
1823 std::fs::write(&graph, b"\x00").unwrap();
1824 let sibling = dir.path().join("demo_mcp.yaml");
1825 std::fs::write(&sibling, "name: x\n").unwrap();
1826 assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1827 }
1828
1829 #[test]
1830 fn workspace_local_parses() {
1831 let f = write_tmp("workspace:\n kind: local\n root: ./src\n watch: true\n");
1832 let m = load(f.path()).unwrap();
1833 let w = m.workspace.unwrap();
1834 assert_eq!(w.kind, WorkspaceKind::Local);
1835 assert_eq!(w.root.as_deref(), Some("./src"));
1836 assert!(w.watch);
1837 }
1838
1839 #[test]
1840 fn workspace_github_default_kind() {
1841 let f = write_tmp("workspace: {}\n");
1842 let m = load(f.path()).unwrap();
1843 let w = m.workspace.unwrap();
1844 assert_eq!(w.kind, WorkspaceKind::Github);
1845 assert!(w.root.is_none());
1846 assert!(!w.watch);
1847 }
1848
1849 #[test]
1850 fn workspace_local_without_root_errors() {
1851 let f = write_tmp("workspace:\n kind: local\n");
1852 let err = load(f.path()).unwrap_err();
1853 assert!(err.message.contains("requires workspace.root"));
1854 }
1855
1856 #[test]
1857 fn workspace_unknown_key_rejected() {
1858 let f = write_tmp("workspace:\n kind: local\n root: ./x\n bogus: 1\n");
1859 let err = load(f.path()).unwrap_err();
1860 assert!(err.message.contains("unknown workspace keys"));
1861 }
1862
1863 #[test]
1864 fn workspace_invalid_kind_rejected() {
1865 let f = write_tmp("workspace:\n kind: docker\n root: ./x\n");
1866 let err = load(f.path()).unwrap_err();
1867 assert!(err.message.contains("workspace.kind"));
1868 }
1869
1870 #[test]
1871 fn workspace_watch_invalid_for_github() {
1872 let f = write_tmp("workspace:\n kind: github\n watch: true\n");
1873 let err = load(f.path()).unwrap_err();
1874 assert!(err.message.contains("watch is only valid"));
1875 }
1876
1877 #[test]
1878 fn extensions_passthrough_parses() {
1879 let f = write_tmp(
1880 "extensions:\n csv_http_server: true\n csv_http_server_dir: temp/\n arbitrary:\n nested: 1\n",
1881 );
1882 let m = load(f.path()).unwrap();
1883 assert_eq!(
1884 m.extensions
1885 .get("csv_http_server")
1886 .and_then(|v| v.as_bool()),
1887 Some(true)
1888 );
1889 assert_eq!(
1890 m.extensions
1891 .get("csv_http_server_dir")
1892 .and_then(|v| v.as_str()),
1893 Some("temp/")
1894 );
1895 assert_eq!(
1897 m.extensions
1898 .get("arbitrary")
1899 .and_then(|v| v.get("nested"))
1900 .and_then(|v| v.as_i64()),
1901 Some(1)
1902 );
1903 }
1904
1905 #[test]
1906 fn extensions_absent_defaults_to_empty() {
1907 let f = write_tmp("name: x\n");
1908 let m = load(f.path()).unwrap();
1909 assert!(m.extensions.is_empty());
1910 }
1911
1912 #[test]
1913 fn extensions_inner_keys_unvalidated() {
1914 let f = write_tmp(
1918 "extensions:\n whatever_kglite_wants: foo\n some_other_consumer: { a: 1, b: 2 }\n",
1919 );
1920 load(f.path()).unwrap();
1921 }
1922
1923 #[test]
1924 fn extensions_must_be_a_mapping() {
1925 let f = write_tmp("extensions: not-a-mapping\n");
1926 let err = load(f.path()).unwrap_err();
1927 assert!(err.message.contains("extensions must be a mapping"));
1928 }
1929
1930 #[test]
1931 fn env_file_key_parses() {
1932 let f = write_tmp("env_file: ../.env\n");
1933 let m = load(f.path()).unwrap();
1934 assert_eq!(m.env_file.as_deref(), Some("../.env"));
1935 }
1936
1937 #[test]
1938 fn env_file_unset_is_none() {
1939 let f = write_tmp("name: Demo\n");
1940 let m = load(f.path()).unwrap();
1941 assert!(m.env_file.is_none());
1942 }
1943
1944 #[test]
1945 fn find_workspace_works() {
1946 let dir = tempfile::tempdir().unwrap();
1947 let manifest = dir.path().join("workspace_mcp.yaml");
1948 std::fs::write(&manifest, "name: ws\n").unwrap();
1949 assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1950 }
1951
1952 #[test]
1953 fn find_workspace_walks_one_level_up_with_applies_to() {
1954 let dir = tempfile::tempdir().unwrap();
1959 let parent = dir.path().join("parent");
1960 std::fs::create_dir(&parent).unwrap();
1961 let manifest = parent.join("workspace_mcp.yaml");
1962 std::fs::write(
1963 &manifest,
1964 "workspace:\n kind: github\n applies_to: ./repos\n",
1965 )
1966 .unwrap();
1967 let repos = parent.join("repos");
1968 std::fs::create_dir(&repos).unwrap();
1969
1970 assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1972
1973 let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1976 assert_eq!(
1977 found.canonicalize().unwrap(),
1978 manifest.canonicalize().unwrap()
1979 );
1980 }
1981
1982 #[test]
1983 fn find_workspace_ignores_parent_without_applies_to() {
1984 let dir = tempfile::tempdir().unwrap();
1990 let parent = dir.path().join("parent");
1991 std::fs::create_dir(&parent).unwrap();
1992 let manifest = parent.join("workspace_mcp.yaml");
1993 std::fs::write(&manifest, "name: not for repos\n").unwrap();
1994 let repos = parent.join("repos");
1995 std::fs::create_dir(&repos).unwrap();
1996
1997 assert_eq!(
1998 find_workspace_manifest(&repos),
1999 None,
2000 "parent manifest without workspace.applies_to must NOT auto-attach"
2001 );
2002 }
2003
2004 #[test]
2005 fn find_workspace_ignores_parent_with_mismatched_applies_to() {
2006 let dir = tempfile::tempdir().unwrap();
2010 let parent = dir.path().join("parent");
2011 std::fs::create_dir(&parent).unwrap();
2012 let manifest = parent.join("workspace_mcp.yaml");
2013 std::fs::write(
2014 &manifest,
2015 "workspace:\n kind: github\n applies_to: ./repos\n",
2016 )
2017 .unwrap();
2018 let other = parent.join("other_dir");
2019 std::fs::create_dir(&other).unwrap();
2020
2021 assert_eq!(
2022 find_workspace_manifest(&other),
2023 None,
2024 "applies_to: ./repos must NOT match --workspace ./other_dir"
2025 );
2026 }
2027
2028 #[test]
2029 fn find_workspace_applies_to_wildcard_matches_any_child() {
2030 let dir = tempfile::tempdir().unwrap();
2034 let parent = dir.path().join("parent");
2035 std::fs::create_dir(&parent).unwrap();
2036 let manifest = parent.join("workspace_mcp.yaml");
2037 std::fs::write(&manifest, "workspace:\n kind: github\n applies_to: '*'\n").unwrap();
2038 for child_name in ["repos", "clones", "totally-different-name"] {
2039 let child = parent.join(child_name);
2040 std::fs::create_dir(&child).unwrap();
2041 let found =
2042 find_workspace_manifest(&child).expect("wildcard should match any direct child");
2043 assert_eq!(
2044 found.canonicalize().unwrap(),
2045 manifest.canonicalize().unwrap(),
2046 "wildcard should match child {child_name:?}"
2047 );
2048 }
2049 }
2050
2051 #[test]
2052 fn find_workspace_applies_to_glob_matches_prefix() {
2053 let dir = tempfile::tempdir().unwrap();
2056 let parent = dir.path().join("parent");
2057 std::fs::create_dir(&parent).unwrap();
2058 let manifest = parent.join("workspace_mcp.yaml");
2059 std::fs::write(
2060 &manifest,
2061 "workspace:\n kind: github\n applies_to: ./prod-*\n",
2062 )
2063 .unwrap();
2064 for child_name in ["prod-api", "prod-web", "prod-"] {
2066 let child = parent.join(child_name);
2067 std::fs::create_dir(&child).unwrap();
2068 assert!(
2069 find_workspace_manifest(&child).is_some(),
2070 "prod-* should match {child_name:?}"
2071 );
2072 }
2073 for child_name in ["test-api", "stage-web", "random"] {
2075 let child = parent.join(child_name);
2076 std::fs::create_dir(&child).unwrap();
2077 assert_eq!(
2078 find_workspace_manifest(&child),
2079 None,
2080 "prod-* should NOT match {child_name:?}"
2081 );
2082 }
2083 }
2084
2085 #[test]
2086 fn find_workspace_applies_to_list_matches_any_entry() {
2087 let dir = tempfile::tempdir().unwrap();
2090 let parent = dir.path().join("parent");
2091 std::fs::create_dir(&parent).unwrap();
2092 let manifest = parent.join("workspace_mcp.yaml");
2093 std::fs::write(
2094 &manifest,
2095 "workspace:\n kind: github\n applies_to:\n - ./repos\n - ./clones\n",
2096 )
2097 .unwrap();
2098 for matching in ["repos", "clones"] {
2099 let child = parent.join(matching);
2100 std::fs::create_dir(&child).unwrap();
2101 assert!(
2102 find_workspace_manifest(&child).is_some(),
2103 "list should match {matching:?}"
2104 );
2105 }
2106 let other = parent.join("scratch");
2107 std::fs::create_dir(&other).unwrap();
2108 assert_eq!(
2109 find_workspace_manifest(&other),
2110 None,
2111 "list with [repos, clones] must NOT match scratch"
2112 );
2113 }
2114
2115 #[test]
2116 fn applies_to_rejects_deep_path_at_parse_time() {
2117 let f = write_tmp("workspace:\n kind: github\n applies_to: ./too/deep/path\n");
2118 let err = load(f.path()).unwrap_err();
2119 assert!(
2120 err.message.contains("must be a single path segment"),
2121 "got: {}",
2122 err.message
2123 );
2124 }
2125
2126 #[test]
2127 fn applies_to_rejects_invalid_glob_at_parse_time() {
2128 let f = write_tmp("workspace:\n kind: github\n applies_to: './[unterminated'\n");
2130 let err = load(f.path()).unwrap_err();
2131 assert!(
2132 err.message.contains("invalid glob pattern"),
2133 "got: {}",
2134 err.message
2135 );
2136 }
2137
2138 #[test]
2139 fn applies_to_rejects_parent_relative() {
2140 let f = write_tmp("workspace:\n kind: github\n applies_to: '..'\n");
2144 let err = load(f.path()).unwrap_err();
2145 assert!(err.message.contains("must not contain `..`"));
2146
2147 let f2 = write_tmp("workspace:\n kind: github\n applies_to: '../up'\n");
2148 let err2 = load(f2.path()).unwrap_err();
2149 assert!(err2.message.contains("must be a single path segment"));
2150 }
2151
2152 #[test]
2153 fn find_workspace_returns_none_when_missing_everywhere() {
2154 let dir = tempfile::tempdir().unwrap();
2155 let child = dir.path().join("child");
2156 std::fs::create_dir(&child).unwrap();
2157 assert_eq!(find_workspace_manifest(&child), None);
2159 }
2160
2161 #[test]
2162 fn find_workspace_primary_wins_over_parent_fallback() {
2163 let dir = tempfile::tempdir().unwrap();
2170 let parent_manifest = dir.path().join("workspace_mcp.yaml");
2171 std::fs::write(
2172 &parent_manifest,
2173 "workspace:\n kind: github\n applies_to: ./repos\n",
2174 )
2175 .unwrap();
2176 let child = dir.path().join("repos");
2177 std::fs::create_dir(&child).unwrap();
2178 let child_manifest = child.join("workspace_mcp.yaml");
2179 std::fs::write(&child_manifest, "name: child\n").unwrap();
2180
2181 let found = find_workspace_manifest(&child).expect("primary should resolve");
2185 assert_eq!(
2186 found.canonicalize().unwrap(),
2187 child_manifest.canonicalize().unwrap(),
2188 "primary location must win when both primary and parent fallback exist"
2189 );
2190 }
2191
2192 #[test]
2193 fn to_json_shape_is_stable() {
2194 let f = write_tmp(
2195 r#"
2196name: KGLite Codebase
2197source_roots: [src, lib]
2198trust:
2199 allow_embedder: true
2200embedder:
2201 module: kglite.embed
2202 class: SentenceTransformerEmbedder
2203builtins:
2204 save_graph: true
2205 temp_cleanup: on_overview
2206"#,
2207 );
2208 let m = load(f.path()).unwrap();
2209 let actual = m.to_json();
2210 let expected = serde_json::json!({
2211 "yaml_path": f.path().display().to_string(),
2212 "name": "KGLite Codebase",
2213 "instructions": null,
2214 "overview_prefix": null,
2215 "source_roots": ["src", "lib"],
2216 "trust": {
2217 "allow_python_tools": false,
2218 "allow_embedder": true,
2219 "allow_query_preprocessor": false,
2220 },
2221 "tools": [],
2222 "embedder": {
2223 "module": "kglite.embed",
2224 "class": "SentenceTransformerEmbedder",
2225 "kwargs": {},
2226 },
2227 "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
2228 "env_file": null,
2229 "workspace": null,
2230 "extensions": {},
2231 "skills": false,
2232 });
2233 assert_eq!(actual, expected);
2234 }
2235
2236 #[test]
2237 fn to_json_round_trips_tools_and_workspace() {
2238 let f = write_tmp(
2239 r#"
2240name: Full Surface
2241source_root: ./src
2242trust:
2243 allow_python_tools: true
2244tools:
2245 - name: nodes_for
2246 cypher: "MATCH (n {name: $name}) RETURN n"
2247 description: "fetch nodes by name"
2248 - name: run_query
2249 python: tools.py
2250 function: run
2251workspace:
2252 kind: local
2253 root: /tmp/ws
2254 watch: true
2255builtins:
2256 save_graph: false
2257env_file: .env.local
2258extensions:
2259 kglite:
2260 flavour: standard
2261"#,
2262 );
2263 let m = load(f.path()).unwrap();
2264 let v = m.to_json();
2265 assert_eq!(v["name"], "Full Surface");
2266 assert_eq!(v["trust"]["allow_python_tools"], true);
2267 assert_eq!(v["workspace"]["kind"], "local");
2268 assert_eq!(v["workspace"]["root"], "/tmp/ws");
2269 assert_eq!(v["workspace"]["watch"], true);
2270 assert_eq!(v["env_file"], ".env.local");
2271 assert_eq!(v["tools"][0]["kind"], "cypher");
2272 assert_eq!(v["tools"][0]["name"], "nodes_for");
2273 assert_eq!(v["tools"][1]["kind"], "python");
2274 assert_eq!(v["tools"][1]["name"], "run_query");
2275 assert_eq!(v["tools"][1]["python"], "tools.py");
2276 assert_eq!(v["tools"][1]["function"], "run");
2277 assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
2278 }
2279
2280 #[test]
2283 fn skills_disabled_by_default() {
2284 let f = write_tmp("name: x\n");
2285 let m = load(f.path()).unwrap();
2286 assert_eq!(m.skills, SkillsSource::Disabled);
2287 assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2288 }
2289
2290 #[test]
2291 fn skills_explicit_false_disabled() {
2292 let f = write_tmp("name: x\nskills: false\n");
2293 let m = load(f.path()).unwrap();
2294 assert_eq!(m.skills, SkillsSource::Disabled);
2295 }
2296
2297 #[test]
2298 fn skills_bool_true_parses_to_single_bundled() {
2299 let f = write_tmp("name: x\nskills: true\n");
2300 let m = load(f.path()).unwrap();
2301 assert_eq!(m.skills, SkillsSource::Sources(vec![SkillSource::Bundled]));
2302 let v = m.to_json();
2304 assert_eq!(v["skills"], serde_json::json!([true]));
2305 }
2306
2307 #[test]
2308 fn skills_path_string_parses_to_single_path() {
2309 let f = write_tmp("name: x\nskills: ./local-skills/\n");
2310 let m = load(f.path()).unwrap();
2311 assert_eq!(
2312 m.skills,
2313 SkillsSource::Sources(vec![SkillSource::Path("./local-skills/".into())])
2314 );
2315 let v = m.to_json();
2317 assert_eq!(v["skills"], serde_json::json!(["./local-skills/"]));
2318 }
2319
2320 #[test]
2321 fn skills_list_polymorphic_parses() {
2322 let f =
2323 write_tmp("name: x\nskills:\n - true\n - ./local-overrides/\n - ~/shared-skills/\n");
2324 let m = load(f.path()).unwrap();
2325 assert_eq!(
2326 m.skills,
2327 SkillsSource::Sources(vec![
2328 SkillSource::Bundled,
2329 SkillSource::Path("./local-overrides/".into()),
2330 SkillSource::Path("~/shared-skills/".into()),
2331 ])
2332 );
2333 let v = m.to_json();
2335 assert_eq!(
2336 v["skills"],
2337 serde_json::json!([true, "./local-overrides/", "~/shared-skills/"])
2338 );
2339 }
2340
2341 #[test]
2342 fn skills_empty_list_parses_as_opt_in_with_no_root_sources() {
2343 let f = write_tmp("name: x\nskills: []\n");
2348 let m = load(f.path()).unwrap();
2349 assert_eq!(m.skills, SkillsSource::Sources(vec![]));
2350 }
2351
2352 #[test]
2353 fn skills_false_in_list_rejected() {
2354 let f = write_tmp("name: x\nskills:\n - false\n");
2355 let err = load(f.path()).unwrap_err();
2356 assert!(
2357 err.message.contains("skills[0]")
2358 && err.message.contains("`false` is not a valid entry"),
2359 "unexpected: {}",
2360 err.message
2361 );
2362 }
2363
2364 #[test]
2365 fn skills_invalid_type_rejected() {
2366 let f = write_tmp("name: x\nskills: 42\n");
2367 let err = load(f.path()).unwrap_err();
2368 assert!(
2369 err.message.contains("skills must be"),
2370 "unexpected: {}",
2371 err.message
2372 );
2373 }
2374
2375 #[test]
2376 fn skills_empty_path_string_rejected() {
2377 let f = write_tmp("name: x\nskills: \"\"\n");
2378 let err = load(f.path()).unwrap_err();
2379 assert!(
2380 err.message.contains("non-empty string"),
2381 "unexpected: {}",
2382 err.message
2383 );
2384 }
2385
2386 #[test]
2387 fn skills_field_is_purely_additive_on_existing_manifests() {
2388 let f = write_tmp(
2393 r#"
2394name: legacy
2395source_roots: [src]
2396trust:
2397 allow_python_tools: true
2398workspace:
2399 kind: github
2400"#,
2401 );
2402 let m = load(f.path()).unwrap();
2403 assert_eq!(m.skills, SkillsSource::Disabled);
2404 assert_eq!(m.to_json()["skills"], serde_json::Value::Bool(false));
2405 }
2406}