1#![allow(dead_code)]
25
26use std::collections::BTreeMap;
27use std::fs;
28use std::path::{Path, PathBuf};
29
30use serde::Deserialize;
31use thiserror::Error;
32
33const ALLOWED_TOP_KEYS: &[&str] = &[
34 "name",
35 "instructions",
36 "overview_prefix",
37 "source_root",
38 "source_roots",
39 "trust",
40 "tools",
41 "embedder",
42 "builtins",
43 "env_file",
44 "workspace",
45 "extensions",
46];
47const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
48const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
49const ALLOWED_TRUST_KEYS: &[&str] = &[
50 "allow_python_tools",
51 "allow_embedder",
52 "allow_query_preprocessor",
53];
54const ALLOWED_TOOL_KEYS: &[&str] = &[
55 "name",
56 "description",
57 "parameters",
58 "cypher",
59 "python",
60 "function",
61 "bundled",
62 "hidden",
63];
64const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
65const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
66const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
67
68#[derive(Debug, Error)]
69#[error("{path}: {message}")]
70pub struct ManifestError {
71 pub path: String,
72 pub message: String,
73}
74
75impl ManifestError {
76 pub fn at(path: &Path, message: impl Into<String>) -> Self {
77 Self {
78 path: path.display().to_string(),
79 message: message.into(),
80 }
81 }
82
83 pub fn bare(message: impl Into<String>) -> Self {
84 Self {
85 path: "<manifest>".to_string(),
86 message: message.into(),
87 }
88 }
89}
90
91#[derive(Debug, Default, Clone)]
92pub struct TrustConfig {
93 pub allow_python_tools: bool,
94 pub allow_embedder: bool,
95 pub allow_query_preprocessor: bool,
102}
103
104#[derive(Debug, Clone)]
105pub enum ToolSpec {
106 Cypher(CypherTool),
107 Python(PythonTool),
108 Bundled(BundledOverride),
124}
125
126impl ToolSpec {
127 pub fn name(&self) -> &str {
128 match self {
129 ToolSpec::Cypher(t) => &t.name,
130 ToolSpec::Python(t) => &t.name,
131 ToolSpec::Bundled(t) => &t.name,
132 }
133 }
134}
135
136#[derive(Debug, Clone)]
137pub struct CypherTool {
138 pub name: String,
139 pub cypher: String,
140 pub description: Option<String>,
141 pub parameters: Option<serde_json::Value>,
142}
143
144#[derive(Debug, Clone)]
145pub struct PythonTool {
146 pub name: String,
147 pub python: String,
148 pub function: String,
149 pub description: Option<String>,
150 pub parameters: Option<serde_json::Value>,
151}
152
153#[derive(Debug, Clone)]
154pub struct BundledOverride {
155 pub name: String,
160 pub description: Option<String>,
164 pub hidden: bool,
168}
169
170#[derive(Debug, Clone)]
171pub struct EmbedderConfig {
172 pub module: String,
173 pub class: String,
174 pub kwargs: serde_json::Map<String, serde_json::Value>,
175}
176
177#[derive(Debug, Default, Clone)]
178pub struct BuiltinsConfig {
179 pub save_graph: bool,
180 pub temp_cleanup: TempCleanup,
181}
182
183#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
184pub enum TempCleanup {
185 #[default]
186 Never,
187 OnOverview,
188}
189
190impl TempCleanup {
191 pub fn as_str(&self) -> &'static str {
192 match self {
193 TempCleanup::Never => "never",
194 TempCleanup::OnOverview => "on_overview",
195 }
196 }
197}
198
199#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
200pub enum WorkspaceKind {
201 #[default]
204 Github,
205 Local,
208}
209
210impl WorkspaceKind {
211 pub fn as_str(&self) -> &'static str {
212 match self {
213 WorkspaceKind::Github => "github",
214 WorkspaceKind::Local => "local",
215 }
216 }
217}
218
219#[derive(Debug, Clone, Default)]
220pub struct WorkspaceConfig {
221 pub kind: WorkspaceKind,
222 pub root: Option<String>,
225 pub watch: bool,
228 pub applies_to: Option<AppliesTo>,
255}
256
257#[derive(Debug, Clone, PartialEq, Eq)]
263pub enum AppliesTo {
264 Pattern(String),
269 Patterns(Vec<String>),
271}
272
273#[derive(Debug, Clone)]
274pub struct Manifest {
275 pub yaml_path: PathBuf,
276 pub name: Option<String>,
277 pub instructions: Option<String>,
278 pub overview_prefix: Option<String>,
279 pub source_roots: Vec<String>,
280 pub trust: TrustConfig,
281 pub tools: Vec<ToolSpec>,
282 pub embedder: Option<EmbedderConfig>,
283 pub builtins: BuiltinsConfig,
284 pub env_file: Option<String>,
288 pub workspace: Option<WorkspaceConfig>,
292 pub extensions: serde_json::Map<String, serde_json::Value>,
302}
303
304impl Manifest {
305 pub fn to_json(&self) -> serde_json::Value {
315 serde_json::json!({
316 "yaml_path": self.yaml_path.display().to_string(),
317 "name": self.name,
318 "instructions": self.instructions,
319 "overview_prefix": self.overview_prefix,
320 "source_roots": self.source_roots,
321 "trust": {
322 "allow_python_tools": self.trust.allow_python_tools,
323 "allow_embedder": self.trust.allow_embedder,
324 "allow_query_preprocessor": self.trust.allow_query_preprocessor,
325 },
326 "tools": self.tools.iter().map(|t| match t {
327 ToolSpec::Cypher(c) => serde_json::json!({
328 "kind": "cypher",
329 "name": c.name,
330 "cypher": c.cypher,
331 "description": c.description,
332 "parameters": c.parameters,
333 }),
334 ToolSpec::Python(p) => serde_json::json!({
335 "kind": "python",
336 "name": p.name,
337 "python": p.python,
338 "function": p.function,
339 "description": p.description,
340 "parameters": p.parameters,
341 }),
342 ToolSpec::Bundled(b) => serde_json::json!({
343 "kind": "bundled",
344 "name": b.name,
345 "description": b.description,
346 "hidden": b.hidden,
347 }),
348 }).collect::<Vec<_>>(),
349 "embedder": self.embedder.as_ref().map(|e| serde_json::json!({
350 "module": e.module,
351 "class": e.class,
352 "kwargs": e.kwargs,
353 })),
354 "builtins": {
355 "save_graph": self.builtins.save_graph,
356 "temp_cleanup": self.builtins.temp_cleanup.as_str(),
357 },
358 "env_file": self.env_file,
359 "workspace": self.workspace.as_ref().map(|w| serde_json::json!({
360 "kind": w.kind.as_str(),
361 "root": w.root,
362 "watch": w.watch,
363 "applies_to": w.applies_to.as_ref().map(|a| match a {
364 AppliesTo::Pattern(p) => serde_json::Value::String(p.clone()),
365 AppliesTo::Patterns(ps) => serde_json::Value::Array(
366 ps.iter().map(|p| serde_json::Value::String(p.clone())).collect()
367 ),
368 }),
369 })),
370 "extensions": self.extensions,
371 })
372 }
373}
374
375pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
377 let stem = graph_path.file_stem()?;
378 let parent = graph_path.parent()?;
379 let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
380 if candidate.is_file() {
381 Some(candidate)
382 } else {
383 None
384 }
385}
386
387pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
436 let primary = workspace_dir.join("workspace_mcp.yaml");
437 if primary.is_file() {
438 return Some(primary);
439 }
440 let parent = workspace_dir.parent()?;
443 let workspace_resolved = workspace_dir.canonicalize().ok()?;
444 let parent_resolved = parent.canonicalize().ok()?;
445 if parent_resolved == workspace_resolved {
446 return None;
448 }
449 let fallback = parent.join("workspace_mcp.yaml");
450 if !fallback.is_file() {
451 return None;
452 }
453
454 let manifest = match load(&fallback) {
458 Ok(m) => m,
459 Err(e) => {
460 tracing::warn!(
461 manifest = %fallback.display(),
462 error = %e,
463 "parent-walk manifest exists but failed to parse; ignoring"
464 );
465 return None;
466 }
467 };
468 let declared = manifest
469 .workspace
470 .as_ref()
471 .and_then(|w| w.applies_to.as_ref());
472 let Some(declared_applies_to) = declared else {
473 tracing::info!(
474 manifest = %fallback.display(),
475 "parent-walk manifest does not declare workspace.applies_to; \
476 ignoring (set workspace.applies_to: <pattern> to opt in)"
477 );
478 return None;
479 };
480 let Some(basename) = workspace_resolved.file_name().and_then(|n| n.to_str()) else {
484 return None; };
486 let patterns: Vec<&str> = match declared_applies_to {
487 AppliesTo::Pattern(p) => vec![p.as_str()],
488 AppliesTo::Patterns(ps) => ps.iter().map(String::as_str).collect(),
489 };
490 let matched = patterns.iter().any(|pat| {
491 match globset::Glob::new(pat) {
492 Ok(g) => g.compile_matcher().is_match(basename),
493 Err(_) => {
494 false
497 }
498 }
499 });
500 if matched {
501 tracing::info!(
502 workspace_dir = %workspace_dir.display(),
503 manifest = %fallback.display(),
504 "manifest discovered via parent-walk fallback (workspace.applies_to matched)"
505 );
506 Some(fallback)
507 } else {
508 tracing::info!(
509 workspace_dir = %workspace_resolved.display(),
510 manifest = %fallback.display(),
511 basename = %basename,
512 patterns = ?patterns,
513 "parent-walk manifest's workspace.applies_to does not match \
514 this workspace_dir's basename; ignoring"
515 );
516 None
517 }
518}
519
520pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
522 let text = fs::read_to_string(yaml_path)
523 .map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
524 let raw: serde_yaml::Value = serde_yaml::from_str(&text)
525 .map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
526 let raw = match raw {
527 serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
528 v => v,
529 };
530 let map = raw
531 .as_mapping()
532 .ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
533 build(map, yaml_path)
534}
535
536fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
537 check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
538
539 if raw.contains_key("source_root") && raw.contains_key("source_roots") {
540 return Err(ManifestError::at(
541 yaml_path,
542 "specify either source_root (str) or source_roots (list), not both",
543 ));
544 }
545
546 let mut source_roots: Vec<String> = Vec::new();
547 if let Some(v) = raw.get("source_root") {
548 let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
549 ManifestError::at(yaml_path, "source_root must be a non-empty string")
550 })?;
551 source_roots.push(s.to_string());
552 } else if let Some(v) = raw.get("source_roots") {
553 let seq = v.as_sequence().ok_or_else(|| {
554 ManifestError::at(
555 yaml_path,
556 "source_roots must be a list of non-empty strings",
557 )
558 })?;
559 if seq.is_empty() {
560 return Err(ManifestError::at(
561 yaml_path,
562 "source_roots must be non-empty when set",
563 ));
564 }
565 for item in seq {
566 let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
567 ManifestError::at(
568 yaml_path,
569 "source_roots must be a list of non-empty strings",
570 )
571 })?;
572 source_roots.push(s.to_string());
573 }
574 }
575
576 let trust = build_trust(raw.get("trust"), yaml_path)?;
577 let tools = build_tools(raw.get("tools"), yaml_path)?;
578 let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
579 let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
580 let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
581 let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
582
583 Ok(Manifest {
584 yaml_path: yaml_path.to_path_buf(),
585 name: optional_str(raw, "name", yaml_path)?,
586 instructions: optional_str(raw, "instructions", yaml_path)?,
587 overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
588 source_roots,
589 trust,
590 tools,
591 embedder,
592 builtins,
593 env_file: optional_str(raw, "env_file", yaml_path)?,
594 workspace,
595 extensions,
596 })
597}
598
599fn build_extensions(
600 raw: Option<&serde_yaml::Value>,
601 yaml_path: &Path,
602) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
603 let Some(raw) = raw else {
604 return Ok(serde_json::Map::new());
605 };
606 if matches!(raw, serde_yaml::Value::Null) {
607 return Ok(serde_json::Map::new());
608 }
609 if !raw.is_mapping() {
610 return Err(ManifestError::at(
611 yaml_path,
612 "extensions must be a mapping (downstream-binary-specific keys)",
613 ));
614 }
615 match yaml_to_json(raw.clone())? {
616 serde_json::Value::Object(o) => Ok(o),
617 _ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
618 }
619}
620
621fn build_workspace(
622 raw: Option<&serde_yaml::Value>,
623 yaml_path: &Path,
624) -> Result<Option<WorkspaceConfig>, ManifestError> {
625 let Some(raw) = raw else { return Ok(None) };
626 if matches!(raw, serde_yaml::Value::Null) {
627 return Ok(None);
628 }
629 let map = raw
630 .as_mapping()
631 .ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
632 check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
633 let kind = match map.get("kind") {
634 None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
635 Some(serde_yaml::Value::String(s)) => match s.as_str() {
636 "github" => WorkspaceKind::Github,
637 "local" => WorkspaceKind::Local,
638 other => {
639 return Err(ManifestError::at(
640 yaml_path,
641 format!(
642 "workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
643 ),
644 ));
645 }
646 },
647 Some(_) => {
648 return Err(ManifestError::at(
649 yaml_path,
650 format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
651 ))
652 }
653 };
654 let root = match map.get("root") {
655 None | Some(serde_yaml::Value::Null) => None,
656 Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
657 _ => {
658 return Err(ManifestError::at(
659 yaml_path,
660 "workspace.root must be a non-empty string",
661 ))
662 }
663 };
664 let watch = match map.get("watch") {
665 None | Some(serde_yaml::Value::Null) => false,
666 Some(serde_yaml::Value::Bool(b)) => *b,
667 Some(_) => {
668 return Err(ManifestError::at(
669 yaml_path,
670 "workspace.watch must be a bool",
671 ))
672 }
673 };
674 let applies_to =
675 match map.get("applies_to") {
676 None | Some(serde_yaml::Value::Null) => None,
677 Some(serde_yaml::Value::String(s)) => {
678 Some(AppliesTo::Pattern(parse_applies_to_pattern(s, yaml_path)?))
679 }
680 Some(serde_yaml::Value::Sequence(seq)) => {
681 if seq.is_empty() {
682 return Err(ManifestError::at(
683 yaml_path,
684 "workspace.applies_to: list must contain at least one pattern",
685 ));
686 }
687 let mut patterns = Vec::with_capacity(seq.len());
688 for (i, item) in seq.iter().enumerate() {
689 let s = item.as_str().ok_or_else(|| {
690 ManifestError::at(
691 yaml_path,
692 format!("workspace.applies_to[{i}] must be a string"),
693 )
694 })?;
695 let cleaned = parse_applies_to_pattern(s, yaml_path).map_err(|e| {
696 ManifestError::at(
697 yaml_path,
698 format!("workspace.applies_to[{i}]: {}", e.message),
699 )
700 })?;
701 patterns.push(cleaned);
702 }
703 Some(AppliesTo::Patterns(patterns))
704 }
705 _ => return Err(ManifestError::at(
706 yaml_path,
707 "workspace.applies_to must be a non-empty string (a pattern) or a list of patterns",
708 )),
709 };
710 if kind == WorkspaceKind::Local && root.is_none() {
711 return Err(ManifestError::at(
712 yaml_path,
713 "workspace.kind: local requires workspace.root to be set",
714 ));
715 }
716 if kind == WorkspaceKind::Github && watch {
717 return Err(ManifestError::at(
718 yaml_path,
719 "workspace.watch is only valid with workspace.kind: local",
720 ));
721 }
722 Ok(Some(WorkspaceConfig {
723 kind,
724 root,
725 watch,
726 applies_to,
727 }))
728}
729
730fn parse_applies_to_pattern(raw: &str, yaml_path: &Path) -> Result<String, ManifestError> {
739 let trimmed = raw.trim();
740 if trimmed.is_empty() {
741 return Err(ManifestError::at(
742 yaml_path,
743 "workspace.applies_to: pattern must not be empty",
744 ));
745 }
746 let stripped = trimmed.strip_prefix("./").unwrap_or(trimmed);
750 if stripped.is_empty() {
751 return Err(ManifestError::at(
752 yaml_path,
753 "workspace.applies_to: pattern must not be empty after stripping `./` prefix",
754 ));
755 }
756 if stripped.contains('/') {
757 return Err(ManifestError::at(
758 yaml_path,
759 format!(
760 "workspace.applies_to: pattern {raw:?} must be a single path segment \
761 (no embedded `/`) — parent-walk discovery is bounded to one level"
762 ),
763 ));
764 }
765 if stripped == ".." || stripped.starts_with("../") {
766 return Err(ManifestError::at(
767 yaml_path,
768 format!("workspace.applies_to: pattern {raw:?} must not contain `..`"),
769 ));
770 }
771 if Path::new(stripped).is_absolute() {
772 return Err(ManifestError::at(
773 yaml_path,
774 format!("workspace.applies_to: pattern {raw:?} must be relative, not absolute"),
775 ));
776 }
777 globset::Glob::new(stripped).map_err(|e| {
781 ManifestError::at(
782 yaml_path,
783 format!("workspace.applies_to: invalid glob pattern {raw:?}: {e}"),
784 )
785 })?;
786 Ok(stripped.to_string())
787}
788
789fn check_keys(
790 map: &serde_yaml::Mapping,
791 allowed: &[&str],
792 label: &str,
793 yaml_path: &Path,
794) -> Result<(), ManifestError> {
795 let mut unknown: Vec<String> = Vec::new();
796 for (k, _) in map {
797 let key = k.as_str().unwrap_or("<non-string-key>");
798 if !allowed.contains(&key) {
799 unknown.push(key.to_string());
800 }
801 }
802 if !unknown.is_empty() {
803 unknown.sort();
804 return Err(ManifestError::at(
805 yaml_path,
806 format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
807 ));
808 }
809 Ok(())
810}
811
812fn optional_str(
813 raw: &serde_yaml::Mapping,
814 key: &str,
815 yaml_path: &Path,
816) -> Result<Option<String>, ManifestError> {
817 match raw.get(key) {
818 None | Some(serde_yaml::Value::Null) => Ok(None),
819 Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
820 Some(_) => Err(ManifestError::at(
821 yaml_path,
822 format!("{key} must be a string"),
823 )),
824 }
825}
826
827fn build_trust(
828 raw: Option<&serde_yaml::Value>,
829 yaml_path: &Path,
830) -> Result<TrustConfig, ManifestError> {
831 let Some(raw) = raw else {
832 return Ok(TrustConfig::default());
833 };
834 let map = raw
835 .as_mapping()
836 .ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
837 check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
838 let mut cfg = TrustConfig::default();
839 if let Some(v) = map.get("allow_python_tools") {
840 cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
841 ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
842 })?;
843 }
844 if let Some(v) = map.get("allow_embedder") {
845 cfg.allow_embedder = v
846 .as_bool()
847 .ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
848 }
849 if let Some(v) = map.get("allow_query_preprocessor") {
850 cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
851 ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
852 })?;
853 }
854 Ok(cfg)
855}
856
857fn build_tools(
858 raw: Option<&serde_yaml::Value>,
859 yaml_path: &Path,
860) -> Result<Vec<ToolSpec>, ManifestError> {
861 let Some(raw) = raw else {
862 return Ok(Vec::new());
863 };
864 let seq = raw
865 .as_sequence()
866 .ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
867 let mut tools: Vec<ToolSpec> = Vec::new();
868 let mut seen: BTreeMap<String, ()> = BTreeMap::new();
869 for (i, entry) in seq.iter().enumerate() {
870 let tool = build_tool(entry, i, yaml_path)?;
871 let name = tool.name().to_string();
872 if seen.insert(name.clone(), ()).is_some() {
873 return Err(ManifestError::at(
874 yaml_path,
875 format!("duplicate tool name: {name:?}"),
876 ));
877 }
878 tools.push(tool);
879 }
880 Ok(tools)
881}
882
883fn build_tool(
884 entry: &serde_yaml::Value,
885 idx: usize,
886 yaml_path: &Path,
887) -> Result<ToolSpec, ManifestError> {
888 let map = entry
889 .as_mapping()
890 .ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
891 check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
892
893 let has_cypher = map.contains_key("cypher");
898 let has_python = map.contains_key("python");
899 let has_bundled = map.contains_key("bundled");
900 let kinds_present: Vec<&str> = [
901 ("cypher", has_cypher),
902 ("python", has_python),
903 ("bundled", has_bundled),
904 ]
905 .into_iter()
906 .filter(|(_, p)| *p)
907 .map(|(k, _)| k)
908 .collect();
909 if kinds_present.is_empty() {
910 return Err(ManifestError::at(
911 yaml_path,
912 format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
913 ));
914 }
915 if kinds_present.len() > 1 {
916 return Err(ManifestError::at(
917 yaml_path,
918 format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
919 ));
920 }
921
922 if has_bundled {
927 return build_bundled_override(map, idx, yaml_path);
928 }
929
930 let name = map
931 .get("name")
932 .and_then(|v| v.as_str())
933 .filter(|s| valid_identifier(s))
934 .ok_or_else(|| {
935 ManifestError::at(
936 yaml_path,
937 format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
938 )
939 })?
940 .to_string();
941
942 if map.contains_key("hidden") {
946 return Err(ManifestError::at(
947 yaml_path,
948 format!(
949 "tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
950 ),
951 ));
952 }
953
954 let description = match map.get("description") {
955 None | Some(serde_yaml::Value::Null) => None,
956 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
957 Some(_) => {
958 return Err(ManifestError::at(
959 yaml_path,
960 format!("tools[{idx}] ({name:?}).description must be a string"),
961 ))
962 }
963 };
964
965 let parameters = match map.get("parameters") {
966 None | Some(serde_yaml::Value::Null) => None,
967 Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
968 Some(_) => {
969 return Err(ManifestError::at(
970 yaml_path,
971 format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
972 ))
973 }
974 };
975
976 if has_cypher {
977 let cypher = map
978 .get("cypher")
979 .and_then(|v| v.as_str())
980 .filter(|s| !s.trim().is_empty())
981 .ok_or_else(|| {
982 ManifestError::at(
983 yaml_path,
984 format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
985 )
986 })?
987 .to_string();
988 return Ok(ToolSpec::Cypher(CypherTool {
989 name,
990 cypher,
991 description,
992 parameters,
993 }));
994 }
995
996 let python = map
998 .get("python")
999 .and_then(|v| v.as_str())
1000 .filter(|s| !s.is_empty())
1001 .ok_or_else(|| {
1002 ManifestError::at(
1003 yaml_path,
1004 format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
1005 )
1006 })?
1007 .to_string();
1008 let function = map
1009 .get("function")
1010 .and_then(|v| v.as_str())
1011 .filter(|s| valid_identifier(s))
1012 .ok_or_else(|| {
1013 ManifestError::at(
1014 yaml_path,
1015 format!(
1016 "tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
1017 ),
1018 )
1019 })?
1020 .to_string();
1021 Ok(ToolSpec::Python(PythonTool {
1022 name,
1023 python,
1024 function,
1025 description,
1026 parameters,
1027 }))
1028}
1029
1030fn build_bundled_override(
1034 map: &serde_yaml::Mapping,
1035 idx: usize,
1036 yaml_path: &Path,
1037) -> Result<ToolSpec, ManifestError> {
1038 let name = map
1039 .get("bundled")
1040 .and_then(|v| v.as_str())
1041 .filter(|s| valid_identifier(s))
1042 .ok_or_else(|| {
1043 ManifestError::at(
1044 yaml_path,
1045 format!(
1046 "tools[{idx}] `bundled:` must be a string naming a bundled tool \
1047 (must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
1048 ),
1049 )
1050 })?
1051 .to_string();
1052
1053 for forbidden in ["name", "parameters", "function"] {
1058 if map.contains_key(forbidden) {
1059 return Err(ManifestError::at(
1060 yaml_path,
1061 format!(
1062 "tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
1063 (only `description:` and `hidden:` are permitted on overrides)"
1064 ),
1065 ));
1066 }
1067 }
1068
1069 let description = match map.get("description") {
1070 None | Some(serde_yaml::Value::Null) => None,
1071 Some(serde_yaml::Value::String(s)) => Some(s.clone()),
1072 Some(_) => {
1073 return Err(ManifestError::at(
1074 yaml_path,
1075 format!("tools[{idx}] bundled override {name:?}.description must be a string"),
1076 ))
1077 }
1078 };
1079
1080 let hidden = match map.get("hidden") {
1081 None | Some(serde_yaml::Value::Null) => false,
1082 Some(serde_yaml::Value::Bool(b)) => *b,
1083 Some(_) => {
1084 return Err(ManifestError::at(
1085 yaml_path,
1086 format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
1087 ))
1088 }
1089 };
1090
1091 Ok(ToolSpec::Bundled(BundledOverride {
1092 name,
1093 description,
1094 hidden,
1095 }))
1096}
1097
1098fn build_embedder(
1099 raw: Option<&serde_yaml::Value>,
1100 yaml_path: &Path,
1101) -> Result<Option<EmbedderConfig>, ManifestError> {
1102 let Some(raw) = raw else { return Ok(None) };
1103 if matches!(raw, serde_yaml::Value::Null) {
1104 return Ok(None);
1105 }
1106 let map = raw
1107 .as_mapping()
1108 .ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
1109 check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
1110 let module = map
1111 .get("module")
1112 .and_then(|v| v.as_str())
1113 .filter(|s| !s.is_empty())
1114 .ok_or_else(|| {
1115 ManifestError::at(
1116 yaml_path,
1117 "embedder.module must be a non-empty string (path or dotted name)",
1118 )
1119 })?
1120 .to_string();
1121 let class = map
1122 .get("class")
1123 .and_then(|v| v.as_str())
1124 .filter(|s| valid_identifier(s))
1125 .ok_or_else(|| {
1126 ManifestError::at(
1127 yaml_path,
1128 "embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
1129 )
1130 })?
1131 .to_string();
1132 let kwargs = match map.get("kwargs") {
1133 None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
1134 Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
1135 serde_json::Value::Object(o) => o,
1136 _ => {
1137 return Err(ManifestError::at(
1138 yaml_path,
1139 "embedder.kwargs must be a mapping",
1140 ))
1141 }
1142 },
1143 Some(_) => {
1144 return Err(ManifestError::at(
1145 yaml_path,
1146 "embedder.kwargs must be a mapping",
1147 ))
1148 }
1149 };
1150 Ok(Some(EmbedderConfig {
1151 module,
1152 class,
1153 kwargs,
1154 }))
1155}
1156
1157fn build_builtins(
1158 raw: Option<&serde_yaml::Value>,
1159 yaml_path: &Path,
1160) -> Result<BuiltinsConfig, ManifestError> {
1161 let Some(raw) = raw else {
1162 return Ok(BuiltinsConfig::default());
1163 };
1164 if matches!(raw, serde_yaml::Value::Null) {
1165 return Ok(BuiltinsConfig::default());
1166 }
1167 let map = raw
1168 .as_mapping()
1169 .ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
1170 check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
1171 let mut cfg = BuiltinsConfig::default();
1172 if let Some(v) = map.get("save_graph") {
1173 cfg.save_graph = v
1174 .as_bool()
1175 .ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
1176 }
1177 if let Some(v) = map.get("temp_cleanup") {
1178 let s = v.as_str().ok_or_else(|| {
1179 ManifestError::at(
1180 yaml_path,
1181 format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
1182 )
1183 })?;
1184 cfg.temp_cleanup = match s {
1185 "never" => TempCleanup::Never,
1186 "on_overview" => TempCleanup::OnOverview,
1187 other => {
1188 return Err(ManifestError::at(
1189 yaml_path,
1190 format!(
1191 "builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
1192 ),
1193 ))
1194 }
1195 };
1196 }
1197 Ok(cfg)
1198}
1199
1200fn valid_identifier(s: &str) -> bool {
1201 let mut chars = s.chars();
1202 match chars.next() {
1203 Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
1204 _ => return false,
1205 }
1206 chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
1207}
1208
1209fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
1210 serde_json::to_value(&v)
1211 .map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
1212}
1213
1214#[derive(Debug, Deserialize)]
1215struct _Reserved;
1216
1217#[cfg(test)]
1218mod tests {
1219 use super::*;
1220
1221 fn write_tmp(text: &str) -> tempfile::NamedTempFile {
1222 let mut f = tempfile::NamedTempFile::new().unwrap();
1223 std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
1224 f
1225 }
1226
1227 #[test]
1228 fn loads_minimal_empty_manifest() {
1229 let f = write_tmp("");
1230 let m = load(f.path()).unwrap();
1231 assert_eq!(m.tools.len(), 0);
1232 assert_eq!(m.source_roots.len(), 0);
1233 assert!(!m.trust.allow_python_tools);
1234 assert!(!m.trust.allow_embedder);
1235 assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
1236 }
1237
1238 #[test]
1239 fn loads_name_and_instructions() {
1240 let f = write_tmp("name: Demo\ninstructions: |\n multi-line\n block\n");
1241 let m = load(f.path()).unwrap();
1242 assert_eq!(m.name.as_deref(), Some("Demo"));
1243 assert!(m.instructions.unwrap().contains("multi-line"));
1244 }
1245
1246 #[test]
1247 fn rejects_unknown_top_key() {
1248 let f = write_tmp("bogus: 1\n");
1249 let err = load(f.path()).unwrap_err();
1250 assert!(err.message.contains("unknown top-level"));
1251 }
1252
1253 #[test]
1254 fn source_root_string_normalises_to_list() {
1255 let f = write_tmp("source_root: ./data\n");
1256 let m = load(f.path()).unwrap();
1257 assert_eq!(m.source_roots, vec!["./data".to_string()]);
1258 }
1259
1260 #[test]
1261 fn source_roots_list_preserved() {
1262 let f = write_tmp("source_roots:\n - ./a\n - ./b\n");
1263 let m = load(f.path()).unwrap();
1264 assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
1265 }
1266
1267 #[test]
1268 fn rejects_both_source_root_and_source_roots() {
1269 let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
1270 assert!(load(f.path()).unwrap_err().message.contains("not both"));
1271 }
1272
1273 #[test]
1274 fn cypher_tool_parses() {
1275 let f = write_tmp("tools:\n - name: lookup\n cypher: MATCH (n) RETURN n\n");
1276 let m = load(f.path()).unwrap();
1277 assert_eq!(m.tools.len(), 1);
1278 match &m.tools[0] {
1279 ToolSpec::Cypher(t) => {
1280 assert_eq!(t.name, "lookup");
1281 assert!(t.cypher.contains("MATCH"));
1282 }
1283 _ => panic!("expected cypher tool"),
1284 }
1285 }
1286
1287 #[test]
1288 fn python_tool_parses() {
1289 let f =
1290 write_tmp("tools:\n - name: detail\n python: ./tools.py\n function: detail\n");
1291 let m = load(f.path()).unwrap();
1292 match &m.tools[0] {
1293 ToolSpec::Python(t) => {
1294 assert_eq!(t.python, "./tools.py");
1295 assert_eq!(t.function, "detail");
1296 }
1297 _ => panic!("expected python tool"),
1298 }
1299 }
1300
1301 #[test]
1302 fn rejects_tool_with_both_kinds() {
1303 let f = write_tmp(
1304 "tools:\n - name: x\n cypher: 'MATCH (n) RETURN n'\n python: ./t.py\n function: x\n",
1305 );
1306 assert!(load(f.path())
1307 .unwrap_err()
1308 .message
1309 .contains("multiple kinds"));
1310 }
1311
1312 #[test]
1313 fn rejects_tool_with_no_kind() {
1314 let f = write_tmp("tools:\n - name: x\n");
1315 assert!(load(f.path())
1316 .unwrap_err()
1317 .message
1318 .contains("needs exactly one"));
1319 }
1320
1321 #[test]
1322 fn rejects_duplicate_tool_names() {
1323 let f = write_tmp(
1324 "tools:\n - name: same\n cypher: 'MATCH (n) RETURN n'\n - name: same\n cypher: 'MATCH (m) RETURN m'\n",
1325 );
1326 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1327 }
1328
1329 #[test]
1332 fn bundled_override_with_description_parses() {
1333 let f =
1334 write_tmp("tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n");
1335 let m = load(f.path()).unwrap();
1336 assert_eq!(m.tools.len(), 1);
1337 match &m.tools[0] {
1338 ToolSpec::Bundled(b) => {
1339 assert_eq!(b.name, "repo_management");
1340 assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
1341 assert!(!b.hidden);
1342 }
1343 _ => panic!("expected bundled override"),
1344 }
1345 }
1346
1347 #[test]
1348 fn bundled_override_with_hidden_parses() {
1349 let f = write_tmp("tools:\n - bundled: ping\n hidden: true\n");
1350 let m = load(f.path()).unwrap();
1351 match &m.tools[0] {
1352 ToolSpec::Bundled(b) => {
1353 assert_eq!(b.name, "ping");
1354 assert!(b.hidden);
1355 assert!(b.description.is_none());
1356 }
1357 _ => panic!("expected bundled override"),
1358 }
1359 }
1360
1361 #[test]
1362 fn bundled_override_alongside_cypher_tools_parses() {
1363 let f = write_tmp(
1364 "tools:\n\
1365 \x20\x20- bundled: cypher_query\n\
1366 \x20\x20\x20\x20description: \"Custom server description\"\n\
1367 \x20\x20- name: lookup\n\
1368 \x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
1369 );
1370 let m = load(f.path()).unwrap();
1371 assert_eq!(m.tools.len(), 2);
1372 assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
1373 assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
1374 }
1375
1376 #[test]
1377 fn rejects_bundled_with_cypher_kind() {
1378 let f =
1379 write_tmp("tools:\n - bundled: cypher_query\n cypher: \"MATCH (n) RETURN n\"\n");
1380 let err = load(f.path()).unwrap_err();
1381 assert!(
1382 err.message.contains("multiple kinds"),
1383 "got: {}",
1384 err.message
1385 );
1386 }
1387
1388 #[test]
1389 fn rejects_bundled_with_name_field() {
1390 let f = write_tmp("tools:\n - bundled: ping\n name: ping\n");
1391 let err = load(f.path()).unwrap_err();
1392 assert!(
1393 err.message.contains("cannot set `name:`"),
1394 "got: {}",
1395 err.message
1396 );
1397 }
1398
1399 #[test]
1400 fn rejects_bundled_with_parameters_field() {
1401 let f =
1402 write_tmp("tools:\n - bundled: cypher_query\n parameters:\n type: object\n");
1403 let err = load(f.path()).unwrap_err();
1404 assert!(
1405 err.message.contains("cannot set `parameters:`"),
1406 "got: {}",
1407 err.message
1408 );
1409 }
1410
1411 #[test]
1412 fn rejects_bundled_with_non_bool_hidden() {
1413 let f = write_tmp("tools:\n - bundled: ping\n hidden: yes-please\n");
1414 let err = load(f.path()).unwrap_err();
1415 assert!(
1416 err.message.contains("hidden must be a bool"),
1417 "got: {}",
1418 err.message
1419 );
1420 }
1421
1422 #[test]
1423 fn rejects_hidden_on_cypher_tool() {
1424 let f = write_tmp(
1425 "tools:\n - name: lookup\n cypher: \"MATCH (n) RETURN n\"\n hidden: true\n",
1426 );
1427 let err = load(f.path()).unwrap_err();
1428 assert!(
1429 err.message
1430 .contains("`hidden:` is only valid on `bundled:` override entries"),
1431 "got: {}",
1432 err.message
1433 );
1434 }
1435
1436 #[test]
1437 fn rejects_duplicate_bundled_overrides() {
1438 let f = write_tmp(
1442 "tools:\n - bundled: ping\n hidden: true\n - bundled: ping\n description: \"x\"\n",
1443 );
1444 assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
1445 }
1446
1447 #[test]
1448 fn rejects_bundled_with_invalid_identifier() {
1449 let f = write_tmp("tools:\n - bundled: \"123-bad\"\n hidden: true\n");
1450 let err = load(f.path()).unwrap_err();
1451 assert!(
1452 err.message.contains("must be a string"),
1453 "got: {}",
1454 err.message
1455 );
1456 }
1457
1458 #[test]
1459 fn bundled_override_to_json_shape() {
1460 let f = write_tmp(
1461 "tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n hidden: false\n",
1462 );
1463 let m = load(f.path()).unwrap();
1464 let v = m.to_json();
1465 assert_eq!(v["tools"][0]["kind"], "bundled");
1466 assert_eq!(v["tools"][0]["name"], "repo_management");
1467 assert_eq!(v["tools"][0]["description"], "FIRST STEP");
1468 assert_eq!(v["tools"][0]["hidden"], false);
1469 }
1470
1471 #[test]
1472 fn embedder_parses() {
1473 let f = write_tmp(
1474 "embedder:\n module: ./e.py\n class: GraphEmbedder\n kwargs:\n cooldown: 900\n",
1475 );
1476 let m = load(f.path()).unwrap();
1477 let e = m.embedder.unwrap();
1478 assert_eq!(e.module, "./e.py");
1479 assert_eq!(e.class, "GraphEmbedder");
1480 assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
1481 }
1482
1483 #[test]
1484 fn builtins_parses_temp_cleanup() {
1485 let f = write_tmp("builtins:\n save_graph: true\n temp_cleanup: on_overview\n");
1486 let m = load(f.path()).unwrap();
1487 assert!(m.builtins.save_graph);
1488 assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
1489 }
1490
1491 #[test]
1492 fn rejects_invalid_temp_cleanup() {
1493 let f = write_tmp("builtins:\n temp_cleanup: nuke\n");
1494 assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
1495 }
1496
1497 #[test]
1498 fn allow_embedder_trust_parses() {
1499 let f = write_tmp("trust:\n allow_embedder: true\n");
1500 let m = load(f.path()).unwrap();
1501 assert!(m.trust.allow_embedder);
1502 }
1503
1504 #[test]
1505 fn allow_query_preprocessor_trust_parses() {
1506 let f = write_tmp("trust:\n allow_query_preprocessor: true\n");
1507 let m = load(f.path()).unwrap();
1508 assert!(m.trust.allow_query_preprocessor);
1509 assert!(!m.trust.allow_embedder);
1510 assert!(!m.trust.allow_python_tools);
1511 }
1512
1513 #[test]
1514 fn allow_query_preprocessor_rejects_non_bool() {
1515 let f = write_tmp("trust:\n allow_query_preprocessor: \"yes\"\n");
1516 let err = load(f.path()).unwrap_err();
1517 assert!(err
1518 .message
1519 .contains("allow_query_preprocessor must be a bool"));
1520 }
1521
1522 #[test]
1523 fn find_sibling_works() {
1524 let dir = tempfile::tempdir().unwrap();
1525 let graph = dir.path().join("demo.kgl");
1526 std::fs::write(&graph, b"\x00").unwrap();
1527 let sibling = dir.path().join("demo_mcp.yaml");
1528 std::fs::write(&sibling, "name: x\n").unwrap();
1529 assert_eq!(find_sibling_manifest(&graph), Some(sibling));
1530 }
1531
1532 #[test]
1533 fn workspace_local_parses() {
1534 let f = write_tmp("workspace:\n kind: local\n root: ./src\n watch: true\n");
1535 let m = load(f.path()).unwrap();
1536 let w = m.workspace.unwrap();
1537 assert_eq!(w.kind, WorkspaceKind::Local);
1538 assert_eq!(w.root.as_deref(), Some("./src"));
1539 assert!(w.watch);
1540 }
1541
1542 #[test]
1543 fn workspace_github_default_kind() {
1544 let f = write_tmp("workspace: {}\n");
1545 let m = load(f.path()).unwrap();
1546 let w = m.workspace.unwrap();
1547 assert_eq!(w.kind, WorkspaceKind::Github);
1548 assert!(w.root.is_none());
1549 assert!(!w.watch);
1550 }
1551
1552 #[test]
1553 fn workspace_local_without_root_errors() {
1554 let f = write_tmp("workspace:\n kind: local\n");
1555 let err = load(f.path()).unwrap_err();
1556 assert!(err.message.contains("requires workspace.root"));
1557 }
1558
1559 #[test]
1560 fn workspace_unknown_key_rejected() {
1561 let f = write_tmp("workspace:\n kind: local\n root: ./x\n bogus: 1\n");
1562 let err = load(f.path()).unwrap_err();
1563 assert!(err.message.contains("unknown workspace keys"));
1564 }
1565
1566 #[test]
1567 fn workspace_invalid_kind_rejected() {
1568 let f = write_tmp("workspace:\n kind: docker\n root: ./x\n");
1569 let err = load(f.path()).unwrap_err();
1570 assert!(err.message.contains("workspace.kind"));
1571 }
1572
1573 #[test]
1574 fn workspace_watch_invalid_for_github() {
1575 let f = write_tmp("workspace:\n kind: github\n watch: true\n");
1576 let err = load(f.path()).unwrap_err();
1577 assert!(err.message.contains("watch is only valid"));
1578 }
1579
1580 #[test]
1581 fn extensions_passthrough_parses() {
1582 let f = write_tmp(
1583 "extensions:\n csv_http_server: true\n csv_http_server_dir: temp/\n arbitrary:\n nested: 1\n",
1584 );
1585 let m = load(f.path()).unwrap();
1586 assert_eq!(
1587 m.extensions
1588 .get("csv_http_server")
1589 .and_then(|v| v.as_bool()),
1590 Some(true)
1591 );
1592 assert_eq!(
1593 m.extensions
1594 .get("csv_http_server_dir")
1595 .and_then(|v| v.as_str()),
1596 Some("temp/")
1597 );
1598 assert_eq!(
1600 m.extensions
1601 .get("arbitrary")
1602 .and_then(|v| v.get("nested"))
1603 .and_then(|v| v.as_i64()),
1604 Some(1)
1605 );
1606 }
1607
1608 #[test]
1609 fn extensions_absent_defaults_to_empty() {
1610 let f = write_tmp("name: x\n");
1611 let m = load(f.path()).unwrap();
1612 assert!(m.extensions.is_empty());
1613 }
1614
1615 #[test]
1616 fn extensions_inner_keys_unvalidated() {
1617 let f = write_tmp(
1621 "extensions:\n whatever_kglite_wants: foo\n some_other_consumer: { a: 1, b: 2 }\n",
1622 );
1623 load(f.path()).unwrap();
1624 }
1625
1626 #[test]
1627 fn extensions_must_be_a_mapping() {
1628 let f = write_tmp("extensions: not-a-mapping\n");
1629 let err = load(f.path()).unwrap_err();
1630 assert!(err.message.contains("extensions must be a mapping"));
1631 }
1632
1633 #[test]
1634 fn env_file_key_parses() {
1635 let f = write_tmp("env_file: ../.env\n");
1636 let m = load(f.path()).unwrap();
1637 assert_eq!(m.env_file.as_deref(), Some("../.env"));
1638 }
1639
1640 #[test]
1641 fn env_file_unset_is_none() {
1642 let f = write_tmp("name: Demo\n");
1643 let m = load(f.path()).unwrap();
1644 assert!(m.env_file.is_none());
1645 }
1646
1647 #[test]
1648 fn find_workspace_works() {
1649 let dir = tempfile::tempdir().unwrap();
1650 let manifest = dir.path().join("workspace_mcp.yaml");
1651 std::fs::write(&manifest, "name: ws\n").unwrap();
1652 assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
1653 }
1654
1655 #[test]
1656 fn find_workspace_walks_one_level_up_with_applies_to() {
1657 let dir = tempfile::tempdir().unwrap();
1662 let parent = dir.path().join("parent");
1663 std::fs::create_dir(&parent).unwrap();
1664 let manifest = parent.join("workspace_mcp.yaml");
1665 std::fs::write(
1666 &manifest,
1667 "workspace:\n kind: github\n applies_to: ./repos\n",
1668 )
1669 .unwrap();
1670 let repos = parent.join("repos");
1671 std::fs::create_dir(&repos).unwrap();
1672
1673 assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
1675
1676 let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
1679 assert_eq!(
1680 found.canonicalize().unwrap(),
1681 manifest.canonicalize().unwrap()
1682 );
1683 }
1684
1685 #[test]
1686 fn find_workspace_ignores_parent_without_applies_to() {
1687 let dir = tempfile::tempdir().unwrap();
1693 let parent = dir.path().join("parent");
1694 std::fs::create_dir(&parent).unwrap();
1695 let manifest = parent.join("workspace_mcp.yaml");
1696 std::fs::write(&manifest, "name: not for repos\n").unwrap();
1697 let repos = parent.join("repos");
1698 std::fs::create_dir(&repos).unwrap();
1699
1700 assert_eq!(
1701 find_workspace_manifest(&repos),
1702 None,
1703 "parent manifest without workspace.applies_to must NOT auto-attach"
1704 );
1705 }
1706
1707 #[test]
1708 fn find_workspace_ignores_parent_with_mismatched_applies_to() {
1709 let dir = tempfile::tempdir().unwrap();
1713 let parent = dir.path().join("parent");
1714 std::fs::create_dir(&parent).unwrap();
1715 let manifest = parent.join("workspace_mcp.yaml");
1716 std::fs::write(
1717 &manifest,
1718 "workspace:\n kind: github\n applies_to: ./repos\n",
1719 )
1720 .unwrap();
1721 let other = parent.join("other_dir");
1722 std::fs::create_dir(&other).unwrap();
1723
1724 assert_eq!(
1725 find_workspace_manifest(&other),
1726 None,
1727 "applies_to: ./repos must NOT match --workspace ./other_dir"
1728 );
1729 }
1730
1731 #[test]
1732 fn find_workspace_applies_to_wildcard_matches_any_child() {
1733 let dir = tempfile::tempdir().unwrap();
1737 let parent = dir.path().join("parent");
1738 std::fs::create_dir(&parent).unwrap();
1739 let manifest = parent.join("workspace_mcp.yaml");
1740 std::fs::write(&manifest, "workspace:\n kind: github\n applies_to: '*'\n").unwrap();
1741 for child_name in ["repos", "clones", "totally-different-name"] {
1742 let child = parent.join(child_name);
1743 std::fs::create_dir(&child).unwrap();
1744 let found =
1745 find_workspace_manifest(&child).expect("wildcard should match any direct child");
1746 assert_eq!(
1747 found.canonicalize().unwrap(),
1748 manifest.canonicalize().unwrap(),
1749 "wildcard should match child {child_name:?}"
1750 );
1751 }
1752 }
1753
1754 #[test]
1755 fn find_workspace_applies_to_glob_matches_prefix() {
1756 let dir = tempfile::tempdir().unwrap();
1759 let parent = dir.path().join("parent");
1760 std::fs::create_dir(&parent).unwrap();
1761 let manifest = parent.join("workspace_mcp.yaml");
1762 std::fs::write(
1763 &manifest,
1764 "workspace:\n kind: github\n applies_to: ./prod-*\n",
1765 )
1766 .unwrap();
1767 for child_name in ["prod-api", "prod-web", "prod-"] {
1769 let child = parent.join(child_name);
1770 std::fs::create_dir(&child).unwrap();
1771 assert!(
1772 find_workspace_manifest(&child).is_some(),
1773 "prod-* should match {child_name:?}"
1774 );
1775 }
1776 for child_name in ["test-api", "stage-web", "random"] {
1778 let child = parent.join(child_name);
1779 std::fs::create_dir(&child).unwrap();
1780 assert_eq!(
1781 find_workspace_manifest(&child),
1782 None,
1783 "prod-* should NOT match {child_name:?}"
1784 );
1785 }
1786 }
1787
1788 #[test]
1789 fn find_workspace_applies_to_list_matches_any_entry() {
1790 let dir = tempfile::tempdir().unwrap();
1793 let parent = dir.path().join("parent");
1794 std::fs::create_dir(&parent).unwrap();
1795 let manifest = parent.join("workspace_mcp.yaml");
1796 std::fs::write(
1797 &manifest,
1798 "workspace:\n kind: github\n applies_to:\n - ./repos\n - ./clones\n",
1799 )
1800 .unwrap();
1801 for matching in ["repos", "clones"] {
1802 let child = parent.join(matching);
1803 std::fs::create_dir(&child).unwrap();
1804 assert!(
1805 find_workspace_manifest(&child).is_some(),
1806 "list should match {matching:?}"
1807 );
1808 }
1809 let other = parent.join("scratch");
1810 std::fs::create_dir(&other).unwrap();
1811 assert_eq!(
1812 find_workspace_manifest(&other),
1813 None,
1814 "list with [repos, clones] must NOT match scratch"
1815 );
1816 }
1817
1818 #[test]
1819 fn applies_to_rejects_deep_path_at_parse_time() {
1820 let f = write_tmp("workspace:\n kind: github\n applies_to: ./too/deep/path\n");
1821 let err = load(f.path()).unwrap_err();
1822 assert!(
1823 err.message.contains("must be a single path segment"),
1824 "got: {}",
1825 err.message
1826 );
1827 }
1828
1829 #[test]
1830 fn applies_to_rejects_invalid_glob_at_parse_time() {
1831 let f = write_tmp("workspace:\n kind: github\n applies_to: './[unterminated'\n");
1833 let err = load(f.path()).unwrap_err();
1834 assert!(
1835 err.message.contains("invalid glob pattern"),
1836 "got: {}",
1837 err.message
1838 );
1839 }
1840
1841 #[test]
1842 fn applies_to_rejects_parent_relative() {
1843 let f = write_tmp("workspace:\n kind: github\n applies_to: '..'\n");
1847 let err = load(f.path()).unwrap_err();
1848 assert!(err.message.contains("must not contain `..`"));
1849
1850 let f2 = write_tmp("workspace:\n kind: github\n applies_to: '../up'\n");
1851 let err2 = load(f2.path()).unwrap_err();
1852 assert!(err2.message.contains("must be a single path segment"));
1853 }
1854
1855 #[test]
1856 fn find_workspace_returns_none_when_missing_everywhere() {
1857 let dir = tempfile::tempdir().unwrap();
1858 let child = dir.path().join("child");
1859 std::fs::create_dir(&child).unwrap();
1860 assert_eq!(find_workspace_manifest(&child), None);
1862 }
1863
1864 #[test]
1865 fn find_workspace_primary_wins_over_parent_fallback() {
1866 let dir = tempfile::tempdir().unwrap();
1873 let parent_manifest = dir.path().join("workspace_mcp.yaml");
1874 std::fs::write(
1875 &parent_manifest,
1876 "workspace:\n kind: github\n applies_to: ./repos\n",
1877 )
1878 .unwrap();
1879 let child = dir.path().join("repos");
1880 std::fs::create_dir(&child).unwrap();
1881 let child_manifest = child.join("workspace_mcp.yaml");
1882 std::fs::write(&child_manifest, "name: child\n").unwrap();
1883
1884 let found = find_workspace_manifest(&child).expect("primary should resolve");
1888 assert_eq!(
1889 found.canonicalize().unwrap(),
1890 child_manifest.canonicalize().unwrap(),
1891 "primary location must win when both primary and parent fallback exist"
1892 );
1893 }
1894
1895 #[test]
1896 fn to_json_shape_is_stable() {
1897 let f = write_tmp(
1898 r#"
1899name: KGLite Codebase
1900source_roots: [src, lib]
1901trust:
1902 allow_embedder: true
1903embedder:
1904 module: kglite.embed
1905 class: SentenceTransformerEmbedder
1906builtins:
1907 save_graph: true
1908 temp_cleanup: on_overview
1909"#,
1910 );
1911 let m = load(f.path()).unwrap();
1912 let actual = m.to_json();
1913 let expected = serde_json::json!({
1914 "yaml_path": f.path().display().to_string(),
1915 "name": "KGLite Codebase",
1916 "instructions": null,
1917 "overview_prefix": null,
1918 "source_roots": ["src", "lib"],
1919 "trust": {
1920 "allow_python_tools": false,
1921 "allow_embedder": true,
1922 "allow_query_preprocessor": false,
1923 },
1924 "tools": [],
1925 "embedder": {
1926 "module": "kglite.embed",
1927 "class": "SentenceTransformerEmbedder",
1928 "kwargs": {},
1929 },
1930 "builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
1931 "env_file": null,
1932 "workspace": null,
1933 "extensions": {},
1934 });
1935 assert_eq!(actual, expected);
1936 }
1937
1938 #[test]
1939 fn to_json_round_trips_tools_and_workspace() {
1940 let f = write_tmp(
1941 r#"
1942name: Full Surface
1943source_root: ./src
1944trust:
1945 allow_python_tools: true
1946tools:
1947 - name: nodes_for
1948 cypher: "MATCH (n {name: $name}) RETURN n"
1949 description: "fetch nodes by name"
1950 - name: run_query
1951 python: tools.py
1952 function: run
1953workspace:
1954 kind: local
1955 root: /tmp/ws
1956 watch: true
1957builtins:
1958 save_graph: false
1959env_file: .env.local
1960extensions:
1961 kglite:
1962 flavour: standard
1963"#,
1964 );
1965 let m = load(f.path()).unwrap();
1966 let v = m.to_json();
1967 assert_eq!(v["name"], "Full Surface");
1968 assert_eq!(v["trust"]["allow_python_tools"], true);
1969 assert_eq!(v["workspace"]["kind"], "local");
1970 assert_eq!(v["workspace"]["root"], "/tmp/ws");
1971 assert_eq!(v["workspace"]["watch"], true);
1972 assert_eq!(v["env_file"], ".env.local");
1973 assert_eq!(v["tools"][0]["kind"], "cypher");
1974 assert_eq!(v["tools"][0]["name"], "nodes_for");
1975 assert_eq!(v["tools"][1]["kind"], "python");
1976 assert_eq!(v["tools"][1]["name"], "run_query");
1977 assert_eq!(v["tools"][1]["python"], "tools.py");
1978 assert_eq!(v["tools"][1]["function"], "run");
1979 assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
1980 }
1981}