Skip to main content

mlua_swarm/lua/
agent_md_loader.rs

1//! agent.md frontmatter + body loader — turns agent-profiles
2//! `agents/*.md` files into `AgentDef`s.
3//!
4//! ## Input format
5//!
6//! ```text
7//! ---
8//! name: impl-lead
9//! description: Implementation worker ...
10//! model: sonnet
11//! effort: high
12//! tools: Read, Edit, Write, Grep, Glob
13//! worker_binding: mse-worker-coder
14//! permissionMode: bypassPermissions
15//! memory: user
16//! abtest: true
17//! ---
18//! <Markdown system prompt body>
19//! ```
20//!
21//! ## Output
22//!
23//! A `Vec<AgentDef>` — each entry carries `profile: Some(AgentProfile
24//! { ... })`, `kind` defaults to `AgentKind::Operator`, and `spec` is
25//! `Value::Null`. The backend configuration (`spec`) is injected
26//! separately by the caller — on the Operator-construction path.
27//!
28//! ## Scope
29//!
30//! - Only YAML frontmatter delimited by `---` is accepted. TOML and
31//!   JSON are not supported.
32//! - `tools` accepts both a CSV string (`"Read, Edit"`) and a YAML
33//!   array (`["Read", "Edit"]`).
34//! - `worker_binding` is the Claude Code SubAgent definition name this
35//!   agent binds to at spawn time — first-class (not dumped into
36//!   `extras`) because the compiler and the WS thin path read it
37//!   directly (see `AgentProfile::worker_binding`).
38//! - Any field beyond the known set (`name` / `description` / `model`
39//!   / `effort` / `tools` / `worker_binding`) is dumped into an
40//!   `extras` `Value` — a future-proof carry for C-C-specific fields.
41//! - The body is kept verbatim, from just after the closing `---` to
42//!   the end of the file.
43
44use crate::blueprint::{AgentDef, AgentKind, AgentProfile};
45use serde_json::{Map, Value};
46use std::fs;
47use std::path::Path;
48
49/// Errors specific to the agent.md loader.
50#[derive(Debug)]
51pub enum LoadError {
52    /// Reading the file failed (not found, permissions, etc.).
53    Io(std::io::Error),
54    /// The `---` frontmatter delimiter was not found, or the body
55    /// could not be separated.
56    NoFrontmatter {
57        /// Path (or source label) of the offending file.
58        path: String,
59    },
60    /// Frontmatter YAML failed to parse.
61    Yaml {
62        /// Path (or source label) of the offending file.
63        path: String,
64        /// The underlying YAML parse error.
65        source: serde_yaml::Error,
66    },
67    /// Frontmatter has no `name` field, so we cannot determine an
68    /// agent identifier.
69    MissingName {
70        /// Path (or source label) of the offending file.
71        path: String,
72    },
73}
74
75impl std::fmt::Display for LoadError {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        match self {
78            LoadError::Io(e) => write!(f, "io error: {e}"),
79            LoadError::NoFrontmatter { path } => {
80                write!(f, "no frontmatter delimiter `---` in {path}")
81            }
82            LoadError::Yaml { path, source } => write!(f, "yaml parse error in {path}: {source}"),
83            LoadError::MissingName { path } => {
84                write!(f, "frontmatter missing required `name` field in {path}")
85            }
86        }
87    }
88}
89
90impl std::error::Error for LoadError {}
91
92impl From<std::io::Error> for LoadError {
93    fn from(e: std::io::Error) -> Self {
94        LoadError::Io(e)
95    }
96}
97
98/// Turn a single `agent.md` file into an `AgentDef`.
99///
100/// **`kind` must be provided explicitly by the caller.** The old
101/// hardcoded `Operator` default was structurally wrong: an agent.md
102/// has no knowledge of deployment and should not decide `kind` in the
103/// loader. The caller passes the kind after resolving the cascade —
104/// `Blueprint.default_agent_kind` → the sibling `$agent_md` override
105/// → `CompilerHints.kind_override`. `spec` is produced as
106/// `Value::Null`; the caller overwrites it if needed.
107pub fn load_file(path: impl AsRef<Path>, kind: AgentKind) -> Result<AgentDef, LoadError> {
108    let path = path.as_ref();
109    let text = fs::read_to_string(path)?;
110    parse(&text, &path.display().to_string(), kind)
111}
112
113/// Load every `*.md` under `dir`. Sorted ascending by file name.
114///
115/// Files without frontmatter — explanatory docs that are not agents —
116/// are **skipped**; `NoFrontmatter` is not turned into an error.
117/// Files that have frontmatter but fail to parse or lack `name` do
118/// propagate their errors.
119///
120/// `kind` applies uniformly to every file — the global default for
121/// this directory scope. To differentiate per file, the caller calls
122/// `load_file(path, per_file_kind)` directly.
123pub fn load_dir(dir: impl AsRef<Path>, kind: AgentKind) -> Result<Vec<AgentDef>, LoadError> {
124    let dir = dir.as_ref();
125    let mut entries: Vec<_> = fs::read_dir(dir)?
126        .filter_map(|e| e.ok())
127        .map(|e| e.path())
128        .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("md"))
129        .collect();
130    entries.sort();
131    let mut out = Vec::new();
132    for p in entries {
133        match load_file(&p, kind.clone()) {
134            Ok(def) => out.push(def),
135            Err(LoadError::NoFrontmatter { .. }) => continue,
136            Err(e) => return Err(e),
137        }
138    }
139    Ok(out)
140}
141
142/// Turn the text of an agent.md into an `AgentDef`. `pub` so unit
143/// tests can reach it. `kind` must be provided by the caller — same
144/// contract as `load_file`.
145pub fn parse(text: &str, source_label: &str, kind: AgentKind) -> Result<AgentDef, LoadError> {
146    let (front, body) = split_frontmatter(text).ok_or_else(|| LoadError::NoFrontmatter {
147        path: source_label.into(),
148    })?;
149    let yaml: Value = serde_yaml::from_str(front).map_err(|e| LoadError::Yaml {
150        path: source_label.into(),
151        source: e,
152    })?;
153    let obj = yaml.as_object().cloned().unwrap_or_default();
154
155    let name = obj
156        .get("name")
157        .and_then(|v| v.as_str())
158        .map(|s| s.to_string())
159        .ok_or_else(|| LoadError::MissingName {
160            path: source_label.into(),
161        })?;
162
163    let description = obj
164        .get("description")
165        .and_then(|v| v.as_str())
166        .map(|s| s.trim().to_string());
167    let model = obj
168        .get("model")
169        .and_then(|v| v.as_str())
170        .map(|s| s.to_string());
171    let effort = obj
172        .get("effort")
173        .and_then(|v| v.as_str())
174        .map(|s| s.to_string());
175    let tools = obj.get("tools").map(normalize_tools).unwrap_or_default();
176    let worker_binding = obj
177        .get("worker_binding")
178        .and_then(|v| v.as_str())
179        .map(|s| s.to_string());
180
181    // Dump everything outside the known set into `extras` — a
182    // future-proof carry for C-C-specific fields.
183    let known = [
184        "name",
185        "description",
186        "model",
187        "effort",
188        "tools",
189        "worker_binding",
190    ];
191    let mut extras = Map::new();
192    for (k, v) in &obj {
193        if !known.contains(&k.as_str()) {
194            extras.insert(k.clone(), v.clone());
195        }
196    }
197
198    let version_hash = Some(compute_body_hash(body));
199
200    let profile = AgentProfile {
201        system_prompt: body.to_string(),
202        model,
203        effort,
204        tools,
205        description: description.clone(),
206        extras: if extras.is_empty() {
207            Value::Null
208        } else {
209            Value::Object(extras)
210        },
211        version_hash,
212        worker_binding,
213    };
214
215    Ok(AgentDef {
216        name,
217        kind,
218        spec: Value::Null,
219        profile: Some(profile),
220        meta: None,
221    })
222}
223
224/// Compute the content hash of an agent body (its `system_prompt`).
225///
226/// 32-byte blake3, hex-encoded. This is the same form that populates
227/// `AgentProfile.version_hash`, and the same form recomputed by the
228/// `patch_applier.lua` post-hook when it detects a
229/// `/agents/N/profile/system_prompt` replacement — the
230/// `host.content_hash` primitive is also blake3 — so the Phase 1
231/// hash-consistency guarantee holds.
232pub fn compute_body_hash(body: &str) -> String {
233    blake3::hash(body.as_bytes()).to_hex().to_string()
234}
235
236/// Split `---\n...\n---\n<body>` into `(frontmatter, body)`. Returns
237/// `None` when the delimiter is missing.
238fn split_frontmatter(text: &str) -> Option<(&str, &str)> {
239    let t = text
240        .strip_prefix("---\n")
241        .or_else(|| text.strip_prefix("---\r\n"))?;
242    // Find the next `---` line.
243    let mut search_from = 0;
244    while let Some(idx) = t[search_from..].find("---") {
245        let abs = search_from + idx;
246        // Require line-start.
247        if abs == 0 || t.as_bytes()[abs - 1] == b'\n' {
248            let after = &t[abs + 3..];
249            let body = after
250                .strip_prefix("\r\n")
251                .or_else(|| after.strip_prefix('\n'))
252                .unwrap_or(after);
253            return Some((&t[..abs], body));
254        }
255        search_from = abs + 3;
256    }
257    None
258}
259
260/// Normalise the frontmatter's `tools` field to a `Vec<String>`.
261/// Accepted forms: CSV string (`"Read, Edit"`) or YAML array
262/// (`["Read", "Edit"]`).
263fn normalize_tools(v: &Value) -> Vec<String> {
264    if let Some(arr) = v.as_array() {
265        return arr
266            .iter()
267            .filter_map(|x| x.as_str().map(|s| s.trim().to_string()))
268            .filter(|s| !s.is_empty())
269            .collect();
270    }
271    if let Some(s) = v.as_str() {
272        return s
273            .split(',')
274            .map(|s| s.trim().to_string())
275            .filter(|s| !s.is_empty())
276            .collect();
277    }
278    Vec::new()
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284
285    const SAMPLE: &str = "---\nname: impl-lead\ndescription: Implementation worker\nmodel: sonnet\neffort: high\ntools: Read, Edit, Grep\npermissionMode: bypassPermissions\nmemory: user\nabtest: true\n---\nYou are the implementation lead.\n\nWork in the caller-provided task directory.\n";
286
287    #[test]
288    fn parses_full_frontmatter() {
289        let def = parse(SAMPLE, "sample", AgentKind::Operator).expect("parse ok");
290        assert_eq!(def.name, "impl-lead");
291        assert!(matches!(def.kind, AgentKind::Operator));
292        let p = def.profile.expect("profile present");
293        assert_eq!(p.model.as_deref(), Some("sonnet"));
294        assert_eq!(p.effort.as_deref(), Some("high"));
295        assert_eq!(p.tools, vec!["Read", "Edit", "Grep"]);
296        assert_eq!(p.description.as_deref(), Some("Implementation worker"));
297        assert!(p
298            .system_prompt
299            .starts_with("You are the implementation lead."));
300        // extras: permissionMode / memory / abtest
301        let extras = p.extras.as_object().expect("extras object");
302        assert_eq!(
303            extras.get("permissionMode").and_then(|v| v.as_str()),
304            Some("bypassPermissions")
305        );
306        assert_eq!(extras.get("memory").and_then(|v| v.as_str()), Some("user"));
307        assert_eq!(extras.get("abtest").and_then(|v| v.as_bool()), Some(true));
308        // no worker_binding in SAMPLE → None, and not dumped into extras.
309        assert_eq!(p.worker_binding, None);
310        assert!(extras.get("worker_binding").is_none());
311    }
312
313    #[test]
314    fn worker_binding_extracted_as_first_class_field() {
315        let t = "---\nname: x\nworker_binding: mse-worker-coder\n---\nbody\n";
316        let def = parse(t, "x", AgentKind::Operator).unwrap();
317        let p = def.profile.expect("profile present");
318        assert_eq!(p.worker_binding.as_deref(), Some("mse-worker-coder"));
319        // must not leak into extras alongside the first-class field.
320        assert!(matches!(p.extras, Value::Null));
321    }
322
323    #[test]
324    fn worker_binding_absent_is_none_not_extras() {
325        let t = "---\nname: x\nmodel: sonnet\n---\nbody\n";
326        let def = parse(t, "x", AgentKind::Operator).unwrap();
327        let p = def.profile.expect("profile present");
328        assert_eq!(p.worker_binding, None);
329    }
330
331    #[test]
332    fn tools_accepts_yaml_array() {
333        let t = "---\nname: x\ntools:\n  - Read\n  - Edit\n---\nbody\n";
334        let def = parse(t, "x", AgentKind::Operator).unwrap();
335        assert_eq!(def.profile.unwrap().tools, vec!["Read", "Edit"]);
336    }
337
338    #[test]
339    fn missing_name_errors() {
340        let t = "---\nmodel: sonnet\n---\nbody\n";
341        assert!(matches!(
342            parse(t, "x", AgentKind::Operator),
343            Err(LoadError::MissingName { .. })
344        ));
345    }
346
347    #[test]
348    fn no_frontmatter_errors() {
349        let t = "plain body without frontmatter";
350        assert!(matches!(
351            parse(t, "x", AgentKind::Operator),
352            Err(LoadError::NoFrontmatter { .. })
353        ));
354    }
355
356    #[test]
357    fn body_preserves_markdown() {
358        let t = "---\nname: x\n---\n# Heading\n\nparagraph with `code`.\n";
359        let p = parse(t, "x", AgentKind::Operator).unwrap().profile.unwrap();
360        assert_eq!(p.system_prompt, "# Heading\n\nparagraph with `code`.\n");
361    }
362
363    #[test]
364    fn populates_version_hash_from_body() {
365        let def = parse(SAMPLE, "sample", AgentKind::Operator).unwrap();
366        let p = def.profile.unwrap();
367        let expected = compute_body_hash(&p.system_prompt);
368        assert_eq!(p.version_hash.as_deref(), Some(expected.as_str()));
369        // blake3 hex = 64 chars
370        assert_eq!(expected.len(), 64);
371    }
372
373    #[test]
374    fn version_hash_changes_with_body() {
375        let t1 = "---\nname: x\n---\nbody one\n";
376        let t2 = "---\nname: x\n---\nbody two\n";
377        let h1 = parse(t1, "x", AgentKind::Operator)
378            .unwrap()
379            .profile
380            .unwrap()
381            .version_hash;
382        let h2 = parse(t2, "x", AgentKind::Operator)
383            .unwrap()
384            .profile
385            .unwrap()
386            .version_hash;
387        assert!(h1.is_some() && h2.is_some());
388        assert_ne!(h1, h2);
389    }
390
391    #[test]
392    fn version_hash_stable_across_frontmatter_reorder() {
393        // Reordering the frontmatter must not affect the body → hash stays the same.
394        let t1 = "---\nname: x\nmodel: sonnet\n---\nsame body\n";
395        let t2 = "---\nmodel: sonnet\nname: x\n---\nsame body\n";
396        let h1 = parse(t1, "x", AgentKind::Operator)
397            .unwrap()
398            .profile
399            .unwrap()
400            .version_hash;
401        let h2 = parse(t2, "x", AgentKind::Operator)
402            .unwrap()
403            .profile
404            .unwrap()
405            .version_hash;
406        assert_eq!(h1, h2);
407    }
408}