#![allow(dead_code)]
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use serde::Deserialize;
use thiserror::Error;
const ALLOWED_TOP_KEYS: &[&str] = &[
"name",
"instructions",
"overview_prefix",
"source_root",
"source_roots",
"trust",
"tools",
"embedder",
"builtins",
"env_file",
"workspace",
"extensions",
];
const ALLOWED_WORKSPACE_KEYS: &[&str] = &["kind", "root", "watch", "applies_to"];
const VALID_WORKSPACE_KIND: &[&str] = &["github", "local"];
const ALLOWED_TRUST_KEYS: &[&str] = &[
"allow_python_tools",
"allow_embedder",
"allow_query_preprocessor",
];
const ALLOWED_TOOL_KEYS: &[&str] = &[
"name",
"description",
"parameters",
"cypher",
"python",
"function",
"bundled",
"hidden",
];
const ALLOWED_EMBEDDER_KEYS: &[&str] = &["module", "class", "kwargs"];
const ALLOWED_BUILTIN_KEYS: &[&str] = &["save_graph", "temp_cleanup"];
const VALID_TEMP_CLEANUP: &[&str] = &["never", "on_overview"];
#[derive(Debug, Error)]
#[error("{path}: {message}")]
pub struct ManifestError {
pub path: String,
pub message: String,
}
impl ManifestError {
pub fn at(path: &Path, message: impl Into<String>) -> Self {
Self {
path: path.display().to_string(),
message: message.into(),
}
}
pub fn bare(message: impl Into<String>) -> Self {
Self {
path: "<manifest>".to_string(),
message: message.into(),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct TrustConfig {
pub allow_python_tools: bool,
pub allow_embedder: bool,
pub allow_query_preprocessor: bool,
}
#[derive(Debug, Clone)]
pub enum ToolSpec {
Cypher(CypherTool),
Python(PythonTool),
Bundled(BundledOverride),
}
impl ToolSpec {
pub fn name(&self) -> &str {
match self {
ToolSpec::Cypher(t) => &t.name,
ToolSpec::Python(t) => &t.name,
ToolSpec::Bundled(t) => &t.name,
}
}
}
#[derive(Debug, Clone)]
pub struct CypherTool {
pub name: String,
pub cypher: String,
pub description: Option<String>,
pub parameters: Option<serde_json::Value>,
}
#[derive(Debug, Clone)]
pub struct PythonTool {
pub name: String,
pub python: String,
pub function: String,
pub description: Option<String>,
pub parameters: Option<serde_json::Value>,
}
#[derive(Debug, Clone)]
pub struct BundledOverride {
pub name: String,
pub description: Option<String>,
pub hidden: bool,
}
#[derive(Debug, Clone)]
pub struct EmbedderConfig {
pub module: String,
pub class: String,
pub kwargs: serde_json::Map<String, serde_json::Value>,
}
#[derive(Debug, Default, Clone)]
pub struct BuiltinsConfig {
pub save_graph: bool,
pub temp_cleanup: TempCleanup,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum TempCleanup {
#[default]
Never,
OnOverview,
}
impl TempCleanup {
pub fn as_str(&self) -> &'static str {
match self {
TempCleanup::Never => "never",
TempCleanup::OnOverview => "on_overview",
}
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum WorkspaceKind {
#[default]
Github,
Local,
}
impl WorkspaceKind {
pub fn as_str(&self) -> &'static str {
match self {
WorkspaceKind::Github => "github",
WorkspaceKind::Local => "local",
}
}
}
#[derive(Debug, Clone, Default)]
pub struct WorkspaceConfig {
pub kind: WorkspaceKind,
pub root: Option<String>,
pub watch: bool,
pub applies_to: Option<String>,
}
#[derive(Debug, Clone)]
pub struct Manifest {
pub yaml_path: PathBuf,
pub name: Option<String>,
pub instructions: Option<String>,
pub overview_prefix: Option<String>,
pub source_roots: Vec<String>,
pub trust: TrustConfig,
pub tools: Vec<ToolSpec>,
pub embedder: Option<EmbedderConfig>,
pub builtins: BuiltinsConfig,
pub env_file: Option<String>,
pub workspace: Option<WorkspaceConfig>,
pub extensions: serde_json::Map<String, serde_json::Value>,
}
impl Manifest {
pub fn to_json(&self) -> serde_json::Value {
serde_json::json!({
"yaml_path": self.yaml_path.display().to_string(),
"name": self.name,
"instructions": self.instructions,
"overview_prefix": self.overview_prefix,
"source_roots": self.source_roots,
"trust": {
"allow_python_tools": self.trust.allow_python_tools,
"allow_embedder": self.trust.allow_embedder,
"allow_query_preprocessor": self.trust.allow_query_preprocessor,
},
"tools": self.tools.iter().map(|t| match t {
ToolSpec::Cypher(c) => serde_json::json!({
"kind": "cypher",
"name": c.name,
"cypher": c.cypher,
"description": c.description,
"parameters": c.parameters,
}),
ToolSpec::Python(p) => serde_json::json!({
"kind": "python",
"name": p.name,
"python": p.python,
"function": p.function,
"description": p.description,
"parameters": p.parameters,
}),
ToolSpec::Bundled(b) => serde_json::json!({
"kind": "bundled",
"name": b.name,
"description": b.description,
"hidden": b.hidden,
}),
}).collect::<Vec<_>>(),
"embedder": self.embedder.as_ref().map(|e| serde_json::json!({
"module": e.module,
"class": e.class,
"kwargs": e.kwargs,
})),
"builtins": {
"save_graph": self.builtins.save_graph,
"temp_cleanup": self.builtins.temp_cleanup.as_str(),
},
"env_file": self.env_file,
"workspace": self.workspace.as_ref().map(|w| serde_json::json!({
"kind": w.kind.as_str(),
"root": w.root,
"watch": w.watch,
"applies_to": w.applies_to,
})),
"extensions": self.extensions,
})
}
}
pub fn find_sibling_manifest(graph_path: &Path) -> Option<PathBuf> {
let stem = graph_path.file_stem()?;
let parent = graph_path.parent()?;
let candidate = parent.join(format!("{}_mcp.yaml", stem.to_string_lossy()));
if candidate.is_file() {
Some(candidate)
} else {
None
}
}
pub fn find_workspace_manifest(workspace_dir: &Path) -> Option<PathBuf> {
let primary = workspace_dir.join("workspace_mcp.yaml");
if primary.is_file() {
return Some(primary);
}
let parent = workspace_dir.parent()?;
let workspace_resolved = workspace_dir.canonicalize().ok()?;
let parent_resolved = parent.canonicalize().ok()?;
if parent_resolved == workspace_resolved {
return None;
}
let fallback = parent.join("workspace_mcp.yaml");
if !fallback.is_file() {
return None;
}
let manifest = match load(&fallback) {
Ok(m) => m,
Err(e) => {
tracing::warn!(
manifest = %fallback.display(),
error = %e,
"parent-walk manifest exists but failed to parse; ignoring"
);
return None;
}
};
let declared = manifest
.workspace
.as_ref()
.and_then(|w| w.applies_to.as_ref());
let Some(declared_path) = declared else {
tracing::info!(
manifest = %fallback.display(),
"parent-walk manifest does not declare workspace.applies_to; \
ignoring (set workspace.applies_to: <relative path> to opt in)"
);
return None;
};
let manifest_dir = fallback.parent()?;
let declared_abs = match manifest_dir.join(declared_path).canonicalize() {
Ok(p) => p,
Err(e) => {
tracing::warn!(
manifest = %fallback.display(),
applies_to = %declared_path,
error = %e,
"parent-walk manifest's workspace.applies_to cannot be resolved; ignoring"
);
return None;
}
};
if declared_abs == workspace_resolved {
tracing::info!(
workspace_dir = %workspace_dir.display(),
manifest = %fallback.display(),
"manifest discovered via parent-walk fallback (workspace.applies_to matched)"
);
Some(fallback)
} else {
tracing::info!(
workspace_dir = %workspace_resolved.display(),
manifest = %fallback.display(),
declared = %declared_abs.display(),
"parent-walk manifest's workspace.applies_to does not match \
this workspace_dir; ignoring"
);
None
}
}
pub fn load(yaml_path: &Path) -> Result<Manifest, ManifestError> {
let text = fs::read_to_string(yaml_path)
.map_err(|e| ManifestError::at(yaml_path, format!("read error: {e}")))?;
let raw: serde_yaml::Value = serde_yaml::from_str(&text)
.map_err(|e| ManifestError::at(yaml_path, format!("YAML parse error: {e}")))?;
let raw = match raw {
serde_yaml::Value::Null => serde_yaml::Value::Mapping(serde_yaml::Mapping::new()),
v => v,
};
let map = raw
.as_mapping()
.ok_or_else(|| ManifestError::at(yaml_path, "top-level must be a mapping"))?;
build(map, yaml_path)
}
fn build(raw: &serde_yaml::Mapping, yaml_path: &Path) -> Result<Manifest, ManifestError> {
check_keys(raw, ALLOWED_TOP_KEYS, "top-level keys", yaml_path)?;
if raw.contains_key("source_root") && raw.contains_key("source_roots") {
return Err(ManifestError::at(
yaml_path,
"specify either source_root (str) or source_roots (list), not both",
));
}
let mut source_roots: Vec<String> = Vec::new();
if let Some(v) = raw.get("source_root") {
let s = v.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
ManifestError::at(yaml_path, "source_root must be a non-empty string")
})?;
source_roots.push(s.to_string());
} else if let Some(v) = raw.get("source_roots") {
let seq = v.as_sequence().ok_or_else(|| {
ManifestError::at(
yaml_path,
"source_roots must be a list of non-empty strings",
)
})?;
if seq.is_empty() {
return Err(ManifestError::at(
yaml_path,
"source_roots must be non-empty when set",
));
}
for item in seq {
let s = item.as_str().filter(|s| !s.is_empty()).ok_or_else(|| {
ManifestError::at(
yaml_path,
"source_roots must be a list of non-empty strings",
)
})?;
source_roots.push(s.to_string());
}
}
let trust = build_trust(raw.get("trust"), yaml_path)?;
let tools = build_tools(raw.get("tools"), yaml_path)?;
let embedder = build_embedder(raw.get("embedder"), yaml_path)?;
let builtins = build_builtins(raw.get("builtins"), yaml_path)?;
let workspace = build_workspace(raw.get("workspace"), yaml_path)?;
let extensions = build_extensions(raw.get("extensions"), yaml_path)?;
Ok(Manifest {
yaml_path: yaml_path.to_path_buf(),
name: optional_str(raw, "name", yaml_path)?,
instructions: optional_str(raw, "instructions", yaml_path)?,
overview_prefix: optional_str(raw, "overview_prefix", yaml_path)?,
source_roots,
trust,
tools,
embedder,
builtins,
env_file: optional_str(raw, "env_file", yaml_path)?,
workspace,
extensions,
})
}
fn build_extensions(
raw: Option<&serde_yaml::Value>,
yaml_path: &Path,
) -> Result<serde_json::Map<String, serde_json::Value>, ManifestError> {
let Some(raw) = raw else {
return Ok(serde_json::Map::new());
};
if matches!(raw, serde_yaml::Value::Null) {
return Ok(serde_json::Map::new());
}
if !raw.is_mapping() {
return Err(ManifestError::at(
yaml_path,
"extensions must be a mapping (downstream-binary-specific keys)",
));
}
match yaml_to_json(raw.clone())? {
serde_json::Value::Object(o) => Ok(o),
_ => Err(ManifestError::at(yaml_path, "extensions must be a mapping")),
}
}
fn build_workspace(
raw: Option<&serde_yaml::Value>,
yaml_path: &Path,
) -> Result<Option<WorkspaceConfig>, ManifestError> {
let Some(raw) = raw else { return Ok(None) };
if matches!(raw, serde_yaml::Value::Null) {
return Ok(None);
}
let map = raw
.as_mapping()
.ok_or_else(|| ManifestError::at(yaml_path, "workspace must be a mapping"))?;
check_keys(map, ALLOWED_WORKSPACE_KEYS, "workspace keys", yaml_path)?;
let kind = match map.get("kind") {
None | Some(serde_yaml::Value::Null) => WorkspaceKind::default(),
Some(serde_yaml::Value::String(s)) => match s.as_str() {
"github" => WorkspaceKind::Github,
"local" => WorkspaceKind::Local,
other => {
return Err(ManifestError::at(
yaml_path,
format!(
"workspace.kind must be one of {VALID_WORKSPACE_KIND:?}, got {other:?}"
),
));
}
},
Some(_) => {
return Err(ManifestError::at(
yaml_path,
format!("workspace.kind must be one of {VALID_WORKSPACE_KIND:?}"),
))
}
};
let root = match map.get("root") {
None | Some(serde_yaml::Value::Null) => None,
Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
_ => {
return Err(ManifestError::at(
yaml_path,
"workspace.root must be a non-empty string",
))
}
};
let watch = match map.get("watch") {
None | Some(serde_yaml::Value::Null) => false,
Some(serde_yaml::Value::Bool(b)) => *b,
Some(_) => {
return Err(ManifestError::at(
yaml_path,
"workspace.watch must be a bool",
))
}
};
let applies_to = match map.get("applies_to") {
None | Some(serde_yaml::Value::Null) => None,
Some(serde_yaml::Value::String(s)) if !s.is_empty() => Some(s.clone()),
_ => {
return Err(ManifestError::at(
yaml_path,
"workspace.applies_to must be a non-empty string (a relative path)",
))
}
};
if kind == WorkspaceKind::Local && root.is_none() {
return Err(ManifestError::at(
yaml_path,
"workspace.kind: local requires workspace.root to be set",
));
}
if kind == WorkspaceKind::Github && watch {
return Err(ManifestError::at(
yaml_path,
"workspace.watch is only valid with workspace.kind: local",
));
}
Ok(Some(WorkspaceConfig {
kind,
root,
watch,
applies_to,
}))
}
fn check_keys(
map: &serde_yaml::Mapping,
allowed: &[&str],
label: &str,
yaml_path: &Path,
) -> Result<(), ManifestError> {
let mut unknown: Vec<String> = Vec::new();
for (k, _) in map {
let key = k.as_str().unwrap_or("<non-string-key>");
if !allowed.contains(&key) {
unknown.push(key.to_string());
}
}
if !unknown.is_empty() {
unknown.sort();
return Err(ManifestError::at(
yaml_path,
format!("unknown {label}: {unknown:?}. Allowed: {allowed:?}"),
));
}
Ok(())
}
fn optional_str(
raw: &serde_yaml::Mapping,
key: &str,
yaml_path: &Path,
) -> Result<Option<String>, ManifestError> {
match raw.get(key) {
None | Some(serde_yaml::Value::Null) => Ok(None),
Some(serde_yaml::Value::String(s)) => Ok(Some(s.clone())),
Some(_) => Err(ManifestError::at(
yaml_path,
format!("{key} must be a string"),
)),
}
}
fn build_trust(
raw: Option<&serde_yaml::Value>,
yaml_path: &Path,
) -> Result<TrustConfig, ManifestError> {
let Some(raw) = raw else {
return Ok(TrustConfig::default());
};
let map = raw
.as_mapping()
.ok_or_else(|| ManifestError::at(yaml_path, "trust must be a mapping"))?;
check_keys(map, ALLOWED_TRUST_KEYS, "trust keys", yaml_path)?;
let mut cfg = TrustConfig::default();
if let Some(v) = map.get("allow_python_tools") {
cfg.allow_python_tools = v.as_bool().ok_or_else(|| {
ManifestError::at(yaml_path, "trust.allow_python_tools must be a bool")
})?;
}
if let Some(v) = map.get("allow_embedder") {
cfg.allow_embedder = v
.as_bool()
.ok_or_else(|| ManifestError::at(yaml_path, "trust.allow_embedder must be a bool"))?;
}
if let Some(v) = map.get("allow_query_preprocessor") {
cfg.allow_query_preprocessor = v.as_bool().ok_or_else(|| {
ManifestError::at(yaml_path, "trust.allow_query_preprocessor must be a bool")
})?;
}
Ok(cfg)
}
fn build_tools(
raw: Option<&serde_yaml::Value>,
yaml_path: &Path,
) -> Result<Vec<ToolSpec>, ManifestError> {
let Some(raw) = raw else {
return Ok(Vec::new());
};
let seq = raw
.as_sequence()
.ok_or_else(|| ManifestError::at(yaml_path, "tools must be a list"))?;
let mut tools: Vec<ToolSpec> = Vec::new();
let mut seen: BTreeMap<String, ()> = BTreeMap::new();
for (i, entry) in seq.iter().enumerate() {
let tool = build_tool(entry, i, yaml_path)?;
let name = tool.name().to_string();
if seen.insert(name.clone(), ()).is_some() {
return Err(ManifestError::at(
yaml_path,
format!("duplicate tool name: {name:?}"),
));
}
tools.push(tool);
}
Ok(tools)
}
fn build_tool(
entry: &serde_yaml::Value,
idx: usize,
yaml_path: &Path,
) -> Result<ToolSpec, ManifestError> {
let map = entry
.as_mapping()
.ok_or_else(|| ManifestError::at(yaml_path, format!("tools[{idx}] must be a mapping")))?;
check_keys(map, ALLOWED_TOOL_KEYS, "tool keys", yaml_path)?;
let has_cypher = map.contains_key("cypher");
let has_python = map.contains_key("python");
let has_bundled = map.contains_key("bundled");
let kinds_present: Vec<&str> = [
("cypher", has_cypher),
("python", has_python),
("bundled", has_bundled),
]
.into_iter()
.filter(|(_, p)| *p)
.map(|(k, _)| k)
.collect();
if kinds_present.is_empty() {
return Err(ManifestError::at(
yaml_path,
format!("tools[{idx}] needs exactly one of: [\"cypher\", \"python\", \"bundled\"]"),
));
}
if kinds_present.len() > 1 {
return Err(ManifestError::at(
yaml_path,
format!("tools[{idx}] has multiple kinds set ({kinds_present:?}); pick exactly one"),
));
}
if has_bundled {
return build_bundled_override(map, idx, yaml_path);
}
let name = map
.get("name")
.and_then(|v| v.as_str())
.filter(|s| valid_identifier(s))
.ok_or_else(|| {
ManifestError::at(
yaml_path,
format!("tools[{idx}] needs a string `name:` matching ^[a-zA-Z_][a-zA-Z0-9_]*$"),
)
})?
.to_string();
if map.contains_key("hidden") {
return Err(ManifestError::at(
yaml_path,
format!(
"tools[{idx}] ({name:?}) `hidden:` is only valid on `bundled:` override entries"
),
));
}
let description = match map.get("description") {
None | Some(serde_yaml::Value::Null) => None,
Some(serde_yaml::Value::String(s)) => Some(s.clone()),
Some(_) => {
return Err(ManifestError::at(
yaml_path,
format!("tools[{idx}] ({name:?}).description must be a string"),
))
}
};
let parameters = match map.get("parameters") {
None | Some(serde_yaml::Value::Null) => None,
Some(v) if v.is_mapping() => Some(yaml_to_json(v.clone())?),
Some(_) => {
return Err(ManifestError::at(
yaml_path,
format!("tools[{idx}] ({name:?}).parameters must be a mapping"),
))
}
};
if has_cypher {
let cypher = map
.get("cypher")
.and_then(|v| v.as_str())
.filter(|s| !s.trim().is_empty())
.ok_or_else(|| {
ManifestError::at(
yaml_path,
format!("tools[{idx}] ({name:?}).cypher must be a non-empty string"),
)
})?
.to_string();
return Ok(ToolSpec::Cypher(CypherTool {
name,
cypher,
description,
parameters,
}));
}
let python = map
.get("python")
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty())
.ok_or_else(|| {
ManifestError::at(
yaml_path,
format!("tools[{idx}] ({name:?}).python must be a non-empty path string"),
)
})?
.to_string();
let function = map
.get("function")
.and_then(|v| v.as_str())
.filter(|s| valid_identifier(s))
.ok_or_else(|| {
ManifestError::at(
yaml_path,
format!(
"tools[{idx}] ({name:?}) python tools need `function:` set to a valid Python identifier"
),
)
})?
.to_string();
Ok(ToolSpec::Python(PythonTool {
name,
python,
function,
description,
parameters,
}))
}
fn build_bundled_override(
map: &serde_yaml::Mapping,
idx: usize,
yaml_path: &Path,
) -> Result<ToolSpec, ManifestError> {
let name = map
.get("bundled")
.and_then(|v| v.as_str())
.filter(|s| valid_identifier(s))
.ok_or_else(|| {
ManifestError::at(
yaml_path,
format!(
"tools[{idx}] `bundled:` must be a string naming a bundled tool \
(must match ^[a-zA-Z_][a-zA-Z0-9_]*$)"
),
)
})?
.to_string();
for forbidden in ["name", "parameters", "function"] {
if map.contains_key(forbidden) {
return Err(ManifestError::at(
yaml_path,
format!(
"tools[{idx}] bundled override {name:?} cannot set `{forbidden}:` \
(only `description:` and `hidden:` are permitted on overrides)"
),
));
}
}
let description = match map.get("description") {
None | Some(serde_yaml::Value::Null) => None,
Some(serde_yaml::Value::String(s)) => Some(s.clone()),
Some(_) => {
return Err(ManifestError::at(
yaml_path,
format!("tools[{idx}] bundled override {name:?}.description must be a string"),
))
}
};
let hidden = match map.get("hidden") {
None | Some(serde_yaml::Value::Null) => false,
Some(serde_yaml::Value::Bool(b)) => *b,
Some(_) => {
return Err(ManifestError::at(
yaml_path,
format!("tools[{idx}] bundled override {name:?}.hidden must be a bool"),
))
}
};
Ok(ToolSpec::Bundled(BundledOverride {
name,
description,
hidden,
}))
}
fn build_embedder(
raw: Option<&serde_yaml::Value>,
yaml_path: &Path,
) -> Result<Option<EmbedderConfig>, ManifestError> {
let Some(raw) = raw else { return Ok(None) };
if matches!(raw, serde_yaml::Value::Null) {
return Ok(None);
}
let map = raw
.as_mapping()
.ok_or_else(|| ManifestError::at(yaml_path, "embedder must be a mapping"))?;
check_keys(map, ALLOWED_EMBEDDER_KEYS, "embedder keys", yaml_path)?;
let module = map
.get("module")
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty())
.ok_or_else(|| {
ManifestError::at(
yaml_path,
"embedder.module must be a non-empty string (path or dotted name)",
)
})?
.to_string();
let class = map
.get("class")
.and_then(|v| v.as_str())
.filter(|s| valid_identifier(s))
.ok_or_else(|| {
ManifestError::at(
yaml_path,
"embedder.class must be a valid identifier matching ^[a-zA-Z_][a-zA-Z0-9_]*$",
)
})?
.to_string();
let kwargs = match map.get("kwargs") {
None | Some(serde_yaml::Value::Null) => serde_json::Map::new(),
Some(v) if v.is_mapping() => match yaml_to_json(v.clone())? {
serde_json::Value::Object(o) => o,
_ => {
return Err(ManifestError::at(
yaml_path,
"embedder.kwargs must be a mapping",
))
}
},
Some(_) => {
return Err(ManifestError::at(
yaml_path,
"embedder.kwargs must be a mapping",
))
}
};
Ok(Some(EmbedderConfig {
module,
class,
kwargs,
}))
}
fn build_builtins(
raw: Option<&serde_yaml::Value>,
yaml_path: &Path,
) -> Result<BuiltinsConfig, ManifestError> {
let Some(raw) = raw else {
return Ok(BuiltinsConfig::default());
};
if matches!(raw, serde_yaml::Value::Null) {
return Ok(BuiltinsConfig::default());
}
let map = raw
.as_mapping()
.ok_or_else(|| ManifestError::at(yaml_path, "builtins must be a mapping"))?;
check_keys(map, ALLOWED_BUILTIN_KEYS, "builtins keys", yaml_path)?;
let mut cfg = BuiltinsConfig::default();
if let Some(v) = map.get("save_graph") {
cfg.save_graph = v
.as_bool()
.ok_or_else(|| ManifestError::at(yaml_path, "builtins.save_graph must be a bool"))?;
}
if let Some(v) = map.get("temp_cleanup") {
let s = v.as_str().ok_or_else(|| {
ManifestError::at(
yaml_path,
format!("builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}"),
)
})?;
cfg.temp_cleanup = match s {
"never" => TempCleanup::Never,
"on_overview" => TempCleanup::OnOverview,
other => {
return Err(ManifestError::at(
yaml_path,
format!(
"builtins.temp_cleanup must be one of {VALID_TEMP_CLEANUP:?}, got {other:?}"
),
))
}
};
}
Ok(cfg)
}
fn valid_identifier(s: &str) -> bool {
let mut chars = s.chars();
match chars.next() {
Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
_ => return false,
}
chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
}
fn yaml_to_json(v: serde_yaml::Value) -> Result<serde_json::Value, ManifestError> {
serde_json::to_value(&v)
.map_err(|e| ManifestError::bare(format!("yaml→json conversion failed: {e}")))
}
#[derive(Debug, Deserialize)]
struct _Reserved;
#[cfg(test)]
mod tests {
use super::*;
fn write_tmp(text: &str) -> tempfile::NamedTempFile {
let mut f = tempfile::NamedTempFile::new().unwrap();
std::io::Write::write_all(&mut f, text.as_bytes()).unwrap();
f
}
#[test]
fn loads_minimal_empty_manifest() {
let f = write_tmp("");
let m = load(f.path()).unwrap();
assert_eq!(m.tools.len(), 0);
assert_eq!(m.source_roots.len(), 0);
assert!(!m.trust.allow_python_tools);
assert!(!m.trust.allow_embedder);
assert_eq!(m.builtins.temp_cleanup, TempCleanup::Never);
}
#[test]
fn loads_name_and_instructions() {
let f = write_tmp("name: Demo\ninstructions: |\n multi-line\n block\n");
let m = load(f.path()).unwrap();
assert_eq!(m.name.as_deref(), Some("Demo"));
assert!(m.instructions.unwrap().contains("multi-line"));
}
#[test]
fn rejects_unknown_top_key() {
let f = write_tmp("bogus: 1\n");
let err = load(f.path()).unwrap_err();
assert!(err.message.contains("unknown top-level"));
}
#[test]
fn source_root_string_normalises_to_list() {
let f = write_tmp("source_root: ./data\n");
let m = load(f.path()).unwrap();
assert_eq!(m.source_roots, vec!["./data".to_string()]);
}
#[test]
fn source_roots_list_preserved() {
let f = write_tmp("source_roots:\n - ./a\n - ./b\n");
let m = load(f.path()).unwrap();
assert_eq!(m.source_roots, vec!["./a".to_string(), "./b".to_string()]);
}
#[test]
fn rejects_both_source_root_and_source_roots() {
let f = write_tmp("source_root: ./a\nsource_roots: [./b]\n");
assert!(load(f.path()).unwrap_err().message.contains("not both"));
}
#[test]
fn cypher_tool_parses() {
let f = write_tmp("tools:\n - name: lookup\n cypher: MATCH (n) RETURN n\n");
let m = load(f.path()).unwrap();
assert_eq!(m.tools.len(), 1);
match &m.tools[0] {
ToolSpec::Cypher(t) => {
assert_eq!(t.name, "lookup");
assert!(t.cypher.contains("MATCH"));
}
_ => panic!("expected cypher tool"),
}
}
#[test]
fn python_tool_parses() {
let f =
write_tmp("tools:\n - name: detail\n python: ./tools.py\n function: detail\n");
let m = load(f.path()).unwrap();
match &m.tools[0] {
ToolSpec::Python(t) => {
assert_eq!(t.python, "./tools.py");
assert_eq!(t.function, "detail");
}
_ => panic!("expected python tool"),
}
}
#[test]
fn rejects_tool_with_both_kinds() {
let f = write_tmp(
"tools:\n - name: x\n cypher: 'MATCH (n) RETURN n'\n python: ./t.py\n function: x\n",
);
assert!(load(f.path())
.unwrap_err()
.message
.contains("multiple kinds"));
}
#[test]
fn rejects_tool_with_no_kind() {
let f = write_tmp("tools:\n - name: x\n");
assert!(load(f.path())
.unwrap_err()
.message
.contains("needs exactly one"));
}
#[test]
fn rejects_duplicate_tool_names() {
let f = write_tmp(
"tools:\n - name: same\n cypher: 'MATCH (n) RETURN n'\n - name: same\n cypher: 'MATCH (m) RETURN m'\n",
);
assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
}
#[test]
fn bundled_override_with_description_parses() {
let f =
write_tmp("tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n");
let m = load(f.path()).unwrap();
assert_eq!(m.tools.len(), 1);
match &m.tools[0] {
ToolSpec::Bundled(b) => {
assert_eq!(b.name, "repo_management");
assert_eq!(b.description.as_deref(), Some("FIRST STEP"));
assert!(!b.hidden);
}
_ => panic!("expected bundled override"),
}
}
#[test]
fn bundled_override_with_hidden_parses() {
let f = write_tmp("tools:\n - bundled: ping\n hidden: true\n");
let m = load(f.path()).unwrap();
match &m.tools[0] {
ToolSpec::Bundled(b) => {
assert_eq!(b.name, "ping");
assert!(b.hidden);
assert!(b.description.is_none());
}
_ => panic!("expected bundled override"),
}
}
#[test]
fn bundled_override_alongside_cypher_tools_parses() {
let f = write_tmp(
"tools:\n\
\x20\x20- bundled: cypher_query\n\
\x20\x20\x20\x20description: \"Custom server description\"\n\
\x20\x20- name: lookup\n\
\x20\x20\x20\x20cypher: \"MATCH (n) RETURN n\"\n",
);
let m = load(f.path()).unwrap();
assert_eq!(m.tools.len(), 2);
assert!(matches!(m.tools[0], ToolSpec::Bundled(_)));
assert!(matches!(m.tools[1], ToolSpec::Cypher(_)));
}
#[test]
fn rejects_bundled_with_cypher_kind() {
let f =
write_tmp("tools:\n - bundled: cypher_query\n cypher: \"MATCH (n) RETURN n\"\n");
let err = load(f.path()).unwrap_err();
assert!(
err.message.contains("multiple kinds"),
"got: {}",
err.message
);
}
#[test]
fn rejects_bundled_with_name_field() {
let f = write_tmp("tools:\n - bundled: ping\n name: ping\n");
let err = load(f.path()).unwrap_err();
assert!(
err.message.contains("cannot set `name:`"),
"got: {}",
err.message
);
}
#[test]
fn rejects_bundled_with_parameters_field() {
let f =
write_tmp("tools:\n - bundled: cypher_query\n parameters:\n type: object\n");
let err = load(f.path()).unwrap_err();
assert!(
err.message.contains("cannot set `parameters:`"),
"got: {}",
err.message
);
}
#[test]
fn rejects_bundled_with_non_bool_hidden() {
let f = write_tmp("tools:\n - bundled: ping\n hidden: yes-please\n");
let err = load(f.path()).unwrap_err();
assert!(
err.message.contains("hidden must be a bool"),
"got: {}",
err.message
);
}
#[test]
fn rejects_hidden_on_cypher_tool() {
let f = write_tmp(
"tools:\n - name: lookup\n cypher: \"MATCH (n) RETURN n\"\n hidden: true\n",
);
let err = load(f.path()).unwrap_err();
assert!(
err.message
.contains("`hidden:` is only valid on `bundled:` override entries"),
"got: {}",
err.message
);
}
#[test]
fn rejects_duplicate_bundled_overrides() {
let f = write_tmp(
"tools:\n - bundled: ping\n hidden: true\n - bundled: ping\n description: \"x\"\n",
);
assert!(load(f.path()).unwrap_err().message.contains("duplicate"));
}
#[test]
fn rejects_bundled_with_invalid_identifier() {
let f = write_tmp("tools:\n - bundled: \"123-bad\"\n hidden: true\n");
let err = load(f.path()).unwrap_err();
assert!(
err.message.contains("must be a string"),
"got: {}",
err.message
);
}
#[test]
fn bundled_override_to_json_shape() {
let f = write_tmp(
"tools:\n - bundled: repo_management\n description: \"FIRST STEP\"\n hidden: false\n",
);
let m = load(f.path()).unwrap();
let v = m.to_json();
assert_eq!(v["tools"][0]["kind"], "bundled");
assert_eq!(v["tools"][0]["name"], "repo_management");
assert_eq!(v["tools"][0]["description"], "FIRST STEP");
assert_eq!(v["tools"][0]["hidden"], false);
}
#[test]
fn embedder_parses() {
let f = write_tmp(
"embedder:\n module: ./e.py\n class: GraphEmbedder\n kwargs:\n cooldown: 900\n",
);
let m = load(f.path()).unwrap();
let e = m.embedder.unwrap();
assert_eq!(e.module, "./e.py");
assert_eq!(e.class, "GraphEmbedder");
assert_eq!(e.kwargs.get("cooldown").unwrap().as_i64(), Some(900));
}
#[test]
fn builtins_parses_temp_cleanup() {
let f = write_tmp("builtins:\n save_graph: true\n temp_cleanup: on_overview\n");
let m = load(f.path()).unwrap();
assert!(m.builtins.save_graph);
assert_eq!(m.builtins.temp_cleanup, TempCleanup::OnOverview);
}
#[test]
fn rejects_invalid_temp_cleanup() {
let f = write_tmp("builtins:\n temp_cleanup: nuke\n");
assert!(load(f.path()).unwrap_err().message.contains("temp_cleanup"));
}
#[test]
fn allow_embedder_trust_parses() {
let f = write_tmp("trust:\n allow_embedder: true\n");
let m = load(f.path()).unwrap();
assert!(m.trust.allow_embedder);
}
#[test]
fn allow_query_preprocessor_trust_parses() {
let f = write_tmp("trust:\n allow_query_preprocessor: true\n");
let m = load(f.path()).unwrap();
assert!(m.trust.allow_query_preprocessor);
assert!(!m.trust.allow_embedder);
assert!(!m.trust.allow_python_tools);
}
#[test]
fn allow_query_preprocessor_rejects_non_bool() {
let f = write_tmp("trust:\n allow_query_preprocessor: \"yes\"\n");
let err = load(f.path()).unwrap_err();
assert!(err
.message
.contains("allow_query_preprocessor must be a bool"));
}
#[test]
fn find_sibling_works() {
let dir = tempfile::tempdir().unwrap();
let graph = dir.path().join("demo.kgl");
std::fs::write(&graph, b"\x00").unwrap();
let sibling = dir.path().join("demo_mcp.yaml");
std::fs::write(&sibling, "name: x\n").unwrap();
assert_eq!(find_sibling_manifest(&graph), Some(sibling));
}
#[test]
fn workspace_local_parses() {
let f = write_tmp("workspace:\n kind: local\n root: ./src\n watch: true\n");
let m = load(f.path()).unwrap();
let w = m.workspace.unwrap();
assert_eq!(w.kind, WorkspaceKind::Local);
assert_eq!(w.root.as_deref(), Some("./src"));
assert!(w.watch);
}
#[test]
fn workspace_github_default_kind() {
let f = write_tmp("workspace: {}\n");
let m = load(f.path()).unwrap();
let w = m.workspace.unwrap();
assert_eq!(w.kind, WorkspaceKind::Github);
assert!(w.root.is_none());
assert!(!w.watch);
}
#[test]
fn workspace_local_without_root_errors() {
let f = write_tmp("workspace:\n kind: local\n");
let err = load(f.path()).unwrap_err();
assert!(err.message.contains("requires workspace.root"));
}
#[test]
fn workspace_unknown_key_rejected() {
let f = write_tmp("workspace:\n kind: local\n root: ./x\n bogus: 1\n");
let err = load(f.path()).unwrap_err();
assert!(err.message.contains("unknown workspace keys"));
}
#[test]
fn workspace_invalid_kind_rejected() {
let f = write_tmp("workspace:\n kind: docker\n root: ./x\n");
let err = load(f.path()).unwrap_err();
assert!(err.message.contains("workspace.kind"));
}
#[test]
fn workspace_watch_invalid_for_github() {
let f = write_tmp("workspace:\n kind: github\n watch: true\n");
let err = load(f.path()).unwrap_err();
assert!(err.message.contains("watch is only valid"));
}
#[test]
fn extensions_passthrough_parses() {
let f = write_tmp(
"extensions:\n csv_http_server: true\n csv_http_server_dir: temp/\n arbitrary:\n nested: 1\n",
);
let m = load(f.path()).unwrap();
assert_eq!(
m.extensions
.get("csv_http_server")
.and_then(|v| v.as_bool()),
Some(true)
);
assert_eq!(
m.extensions
.get("csv_http_server_dir")
.and_then(|v| v.as_str()),
Some("temp/")
);
assert_eq!(
m.extensions
.get("arbitrary")
.and_then(|v| v.get("nested"))
.and_then(|v| v.as_i64()),
Some(1)
);
}
#[test]
fn extensions_absent_defaults_to_empty() {
let f = write_tmp("name: x\n");
let m = load(f.path()).unwrap();
assert!(m.extensions.is_empty());
}
#[test]
fn extensions_inner_keys_unvalidated() {
let f = write_tmp(
"extensions:\n whatever_kglite_wants: foo\n some_other_consumer: { a: 1, b: 2 }\n",
);
load(f.path()).unwrap();
}
#[test]
fn extensions_must_be_a_mapping() {
let f = write_tmp("extensions: not-a-mapping\n");
let err = load(f.path()).unwrap_err();
assert!(err.message.contains("extensions must be a mapping"));
}
#[test]
fn env_file_key_parses() {
let f = write_tmp("env_file: ../.env\n");
let m = load(f.path()).unwrap();
assert_eq!(m.env_file.as_deref(), Some("../.env"));
}
#[test]
fn env_file_unset_is_none() {
let f = write_tmp("name: Demo\n");
let m = load(f.path()).unwrap();
assert!(m.env_file.is_none());
}
#[test]
fn find_workspace_works() {
let dir = tempfile::tempdir().unwrap();
let manifest = dir.path().join("workspace_mcp.yaml");
std::fs::write(&manifest, "name: ws\n").unwrap();
assert_eq!(find_workspace_manifest(dir.path()), Some(manifest));
}
#[test]
fn find_workspace_walks_one_level_up_with_applies_to() {
let dir = tempfile::tempdir().unwrap();
let parent = dir.path().join("parent");
std::fs::create_dir(&parent).unwrap();
let manifest = parent.join("workspace_mcp.yaml");
std::fs::write(
&manifest,
"workspace:\n kind: github\n applies_to: ./repos\n",
)
.unwrap();
let repos = parent.join("repos");
std::fs::create_dir(&repos).unwrap();
assert_eq!(find_workspace_manifest(&parent), Some(manifest.clone()));
let found = find_workspace_manifest(&repos).expect("parent fallback should fire");
assert_eq!(
found.canonicalize().unwrap(),
manifest.canonicalize().unwrap()
);
}
#[test]
fn find_workspace_ignores_parent_without_applies_to() {
let dir = tempfile::tempdir().unwrap();
let parent = dir.path().join("parent");
std::fs::create_dir(&parent).unwrap();
let manifest = parent.join("workspace_mcp.yaml");
std::fs::write(&manifest, "name: not for repos\n").unwrap();
let repos = parent.join("repos");
std::fs::create_dir(&repos).unwrap();
assert_eq!(
find_workspace_manifest(&repos),
None,
"parent manifest without workspace.applies_to must NOT auto-attach"
);
}
#[test]
fn find_workspace_ignores_parent_with_mismatched_applies_to() {
let dir = tempfile::tempdir().unwrap();
let parent = dir.path().join("parent");
std::fs::create_dir(&parent).unwrap();
let manifest = parent.join("workspace_mcp.yaml");
std::fs::write(
&manifest,
"workspace:\n kind: github\n applies_to: ./repos\n",
)
.unwrap();
let other = parent.join("other_dir");
std::fs::create_dir(&other).unwrap();
assert_eq!(
find_workspace_manifest(&other),
None,
"applies_to: ./repos must NOT match --workspace ./other_dir"
);
}
#[test]
fn find_workspace_returns_none_when_missing_everywhere() {
let dir = tempfile::tempdir().unwrap();
let child = dir.path().join("child");
std::fs::create_dir(&child).unwrap();
assert_eq!(find_workspace_manifest(&child), None);
}
#[test]
fn find_workspace_primary_wins_over_parent_fallback() {
let dir = tempfile::tempdir().unwrap();
let parent_manifest = dir.path().join("workspace_mcp.yaml");
std::fs::write(
&parent_manifest,
"workspace:\n kind: github\n applies_to: ./repos\n",
)
.unwrap();
let child = dir.path().join("repos");
std::fs::create_dir(&child).unwrap();
let child_manifest = child.join("workspace_mcp.yaml");
std::fs::write(&child_manifest, "name: child\n").unwrap();
let found = find_workspace_manifest(&child).expect("primary should resolve");
assert_eq!(
found.canonicalize().unwrap(),
child_manifest.canonicalize().unwrap(),
"primary location must win when both primary and parent fallback exist"
);
}
#[test]
fn to_json_shape_is_stable() {
let f = write_tmp(
r#"
name: KGLite Codebase
source_roots: [src, lib]
trust:
allow_embedder: true
embedder:
module: kglite.embed
class: SentenceTransformerEmbedder
builtins:
save_graph: true
temp_cleanup: on_overview
"#,
);
let m = load(f.path()).unwrap();
let actual = m.to_json();
let expected = serde_json::json!({
"yaml_path": f.path().display().to_string(),
"name": "KGLite Codebase",
"instructions": null,
"overview_prefix": null,
"source_roots": ["src", "lib"],
"trust": {
"allow_python_tools": false,
"allow_embedder": true,
"allow_query_preprocessor": false,
},
"tools": [],
"embedder": {
"module": "kglite.embed",
"class": "SentenceTransformerEmbedder",
"kwargs": {},
},
"builtins": { "save_graph": true, "temp_cleanup": "on_overview" },
"env_file": null,
"workspace": null,
"extensions": {},
});
assert_eq!(actual, expected);
}
#[test]
fn to_json_round_trips_tools_and_workspace() {
let f = write_tmp(
r#"
name: Full Surface
source_root: ./src
trust:
allow_python_tools: true
tools:
- name: nodes_for
cypher: "MATCH (n {name: $name}) RETURN n"
description: "fetch nodes by name"
- name: run_query
python: tools.py
function: run
workspace:
kind: local
root: /tmp/ws
watch: true
builtins:
save_graph: false
env_file: .env.local
extensions:
kglite:
flavour: standard
"#,
);
let m = load(f.path()).unwrap();
let v = m.to_json();
assert_eq!(v["name"], "Full Surface");
assert_eq!(v["trust"]["allow_python_tools"], true);
assert_eq!(v["workspace"]["kind"], "local");
assert_eq!(v["workspace"]["root"], "/tmp/ws");
assert_eq!(v["workspace"]["watch"], true);
assert_eq!(v["env_file"], ".env.local");
assert_eq!(v["tools"][0]["kind"], "cypher");
assert_eq!(v["tools"][0]["name"], "nodes_for");
assert_eq!(v["tools"][1]["kind"], "python");
assert_eq!(v["tools"][1]["name"], "run_query");
assert_eq!(v["tools"][1]["python"], "tools.py");
assert_eq!(v["tools"][1]["function"], "run");
assert_eq!(v["extensions"]["kglite"]["flavour"], "standard");
}
}