Skip to main content

mlua_swarm/lua/
agent_md_loader.rs

1//! agent.md frontmatter + body loader — turns agent-profiles
2//! `agents/*.md` files into `AgentDef`s.
3//!
4//! ## Input format
5//!
6//! ```text
7//! ---
8//! name: impl-lead
9//! description: Implementation worker ...
10//! model: sonnet
11//! effort: high
12//! tools: Read, Edit, Write, Grep, Glob
13//! permissionMode: bypassPermissions
14//! memory: user
15//! abtest: true
16//! ---
17//! <Markdown system prompt body>
18//! ```
19//!
20//! ## Output
21//!
22//! A `Vec<AgentDef>` — each entry carries `profile: Some(AgentProfile
23//! { ... })`, `kind` defaults to `AgentKind::Operator`, and `spec` is
24//! `Value::Null`. The backend configuration (`spec`) is injected
25//! separately by the caller — on the Operator-construction path.
26//!
27//! ## Scope
28//!
29//! - Only YAML frontmatter delimited by `---` is accepted. TOML and
30//!   JSON are not supported.
31//! - `tools` accepts both a CSV string (`"Read, Edit"`) and a YAML
32//!   array (`["Read", "Edit"]`).
33//! - Any field beyond the known set (`name` / `description` / `model`
34//!   / `effort` / `tools`) is dumped into an `extras` `Value` — a
35//!   future-proof carry for C-C-specific fields.
36//! - The body is kept verbatim, from just after the closing `---` to
37//!   the end of the file.
38
39use crate::blueprint::{AgentDef, AgentKind, AgentProfile};
40use serde_json::{Map, Value};
41use std::fs;
42use std::path::Path;
43
44/// Errors specific to the agent.md loader.
45#[derive(Debug)]
46pub enum LoadError {
47    /// Reading the file failed (not found, permissions, etc.).
48    Io(std::io::Error),
49    /// The `---` frontmatter delimiter was not found, or the body
50    /// could not be separated.
51    NoFrontmatter {
52        /// Path (or source label) of the offending file.
53        path: String,
54    },
55    /// Frontmatter YAML failed to parse.
56    Yaml {
57        /// Path (or source label) of the offending file.
58        path: String,
59        /// The underlying YAML parse error.
60        source: serde_yaml::Error,
61    },
62    /// Frontmatter has no `name` field, so we cannot determine an
63    /// agent identifier.
64    MissingName {
65        /// Path (or source label) of the offending file.
66        path: String,
67    },
68}
69
70impl std::fmt::Display for LoadError {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        match self {
73            LoadError::Io(e) => write!(f, "io error: {e}"),
74            LoadError::NoFrontmatter { path } => {
75                write!(f, "no frontmatter delimiter `---` in {path}")
76            }
77            LoadError::Yaml { path, source } => write!(f, "yaml parse error in {path}: {source}"),
78            LoadError::MissingName { path } => {
79                write!(f, "frontmatter missing required `name` field in {path}")
80            }
81        }
82    }
83}
84
85impl std::error::Error for LoadError {}
86
87impl From<std::io::Error> for LoadError {
88    fn from(e: std::io::Error) -> Self {
89        LoadError::Io(e)
90    }
91}
92
93/// Turn a single `agent.md` file into an `AgentDef`.
94///
95/// **`kind` must be provided explicitly by the caller.** The old
96/// hardcoded `Operator` default was structurally wrong: an agent.md
97/// has no knowledge of deployment and should not decide `kind` in the
98/// loader. The caller passes the kind after resolving the cascade —
99/// `Blueprint.default_agent_kind` → the sibling `$agent_md` override
100/// → `CompilerHints.kind_override`. `spec` is produced as
101/// `Value::Null`; the caller overwrites it if needed.
102pub fn load_file(path: impl AsRef<Path>, kind: AgentKind) -> Result<AgentDef, LoadError> {
103    let path = path.as_ref();
104    let text = fs::read_to_string(path)?;
105    parse(&text, &path.display().to_string(), kind)
106}
107
108/// Load every `*.md` under `dir`. Sorted ascending by file name.
109///
110/// Files without frontmatter — explanatory docs that are not agents —
111/// are **skipped**; `NoFrontmatter` is not turned into an error.
112/// Files that have frontmatter but fail to parse or lack `name` do
113/// propagate their errors.
114///
115/// `kind` applies uniformly to every file — the global default for
116/// this directory scope. To differentiate per file, the caller calls
117/// `load_file(path, per_file_kind)` directly.
118pub fn load_dir(dir: impl AsRef<Path>, kind: AgentKind) -> Result<Vec<AgentDef>, LoadError> {
119    let dir = dir.as_ref();
120    let mut entries: Vec<_> = fs::read_dir(dir)?
121        .filter_map(|e| e.ok())
122        .map(|e| e.path())
123        .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("md"))
124        .collect();
125    entries.sort();
126    let mut out = Vec::new();
127    for p in entries {
128        match load_file(&p, kind.clone()) {
129            Ok(def) => out.push(def),
130            Err(LoadError::NoFrontmatter { .. }) => continue,
131            Err(e) => return Err(e),
132        }
133    }
134    Ok(out)
135}
136
137/// Turn the text of an agent.md into an `AgentDef`. `pub` so unit
138/// tests can reach it. `kind` must be provided by the caller — same
139/// contract as `load_file`.
140pub fn parse(text: &str, source_label: &str, kind: AgentKind) -> Result<AgentDef, LoadError> {
141    let (front, body) = split_frontmatter(text).ok_or_else(|| LoadError::NoFrontmatter {
142        path: source_label.into(),
143    })?;
144    let yaml: Value = serde_yaml::from_str(front).map_err(|e| LoadError::Yaml {
145        path: source_label.into(),
146        source: e,
147    })?;
148    let obj = yaml.as_object().cloned().unwrap_or_default();
149
150    let name = obj
151        .get("name")
152        .and_then(|v| v.as_str())
153        .map(|s| s.to_string())
154        .ok_or_else(|| LoadError::MissingName {
155            path: source_label.into(),
156        })?;
157
158    let description = obj
159        .get("description")
160        .and_then(|v| v.as_str())
161        .map(|s| s.trim().to_string());
162    let model = obj
163        .get("model")
164        .and_then(|v| v.as_str())
165        .map(|s| s.to_string());
166    let effort = obj
167        .get("effort")
168        .and_then(|v| v.as_str())
169        .map(|s| s.to_string());
170    let tools = obj.get("tools").map(normalize_tools).unwrap_or_default();
171
172    // Dump everything outside the known set into `extras` — a
173    // future-proof carry for C-C-specific fields.
174    let known = ["name", "description", "model", "effort", "tools"];
175    let mut extras = Map::new();
176    for (k, v) in &obj {
177        if !known.contains(&k.as_str()) {
178            extras.insert(k.clone(), v.clone());
179        }
180    }
181
182    let version_hash = Some(compute_body_hash(body));
183
184    let profile = AgentProfile {
185        system_prompt: body.to_string(),
186        model,
187        effort,
188        tools,
189        description: description.clone(),
190        extras: if extras.is_empty() {
191            Value::Null
192        } else {
193            Value::Object(extras)
194        },
195        version_hash,
196    };
197
198    Ok(AgentDef {
199        name,
200        kind,
201        spec: Value::Null,
202        profile: Some(profile),
203        meta: None,
204    })
205}
206
207/// Compute the content hash of an agent body (its `system_prompt`).
208///
209/// 32-byte blake3, hex-encoded. This is the same form that populates
210/// `AgentProfile.version_hash`, and the same form recomputed by the
211/// `patch_applier.lua` post-hook when it detects a
212/// `/agents/N/profile/system_prompt` replacement — the
213/// `host.content_hash` primitive is also blake3 — so the Phase 1
214/// hash-consistency guarantee holds.
215pub fn compute_body_hash(body: &str) -> String {
216    blake3::hash(body.as_bytes()).to_hex().to_string()
217}
218
219/// Split `---\n...\n---\n<body>` into `(frontmatter, body)`. Returns
220/// `None` when the delimiter is missing.
221fn split_frontmatter(text: &str) -> Option<(&str, &str)> {
222    let t = text
223        .strip_prefix("---\n")
224        .or_else(|| text.strip_prefix("---\r\n"))?;
225    // Find the next `---` line.
226    let mut search_from = 0;
227    while let Some(idx) = t[search_from..].find("---") {
228        let abs = search_from + idx;
229        // Require line-start.
230        if abs == 0 || t.as_bytes()[abs - 1] == b'\n' {
231            let after = &t[abs + 3..];
232            let body = after
233                .strip_prefix("\r\n")
234                .or_else(|| after.strip_prefix('\n'))
235                .unwrap_or(after);
236            return Some((&t[..abs], body));
237        }
238        search_from = abs + 3;
239    }
240    None
241}
242
243/// Normalise the frontmatter's `tools` field to a `Vec<String>`.
244/// Accepted forms: CSV string (`"Read, Edit"`) or YAML array
245/// (`["Read", "Edit"]`).
246fn normalize_tools(v: &Value) -> Vec<String> {
247    if let Some(arr) = v.as_array() {
248        return arr
249            .iter()
250            .filter_map(|x| x.as_str().map(|s| s.trim().to_string()))
251            .filter(|s| !s.is_empty())
252            .collect();
253    }
254    if let Some(s) = v.as_str() {
255        return s
256            .split(',')
257            .map(|s| s.trim().to_string())
258            .filter(|s| !s.is_empty())
259            .collect();
260    }
261    Vec::new()
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267
268    const SAMPLE: &str = "---\nname: impl-lead\ndescription: Implementation worker\nmodel: sonnet\neffort: high\ntools: Read, Edit, Grep\npermissionMode: bypassPermissions\nmemory: user\nabtest: true\n---\nYou are the implementation lead.\n\nWork in the caller-provided task directory.\n";
269
270    #[test]
271    fn parses_full_frontmatter() {
272        let def = parse(SAMPLE, "sample", AgentKind::Operator).expect("parse ok");
273        assert_eq!(def.name, "impl-lead");
274        assert!(matches!(def.kind, AgentKind::Operator));
275        let p = def.profile.expect("profile present");
276        assert_eq!(p.model.as_deref(), Some("sonnet"));
277        assert_eq!(p.effort.as_deref(), Some("high"));
278        assert_eq!(p.tools, vec!["Read", "Edit", "Grep"]);
279        assert_eq!(p.description.as_deref(), Some("Implementation worker"));
280        assert!(p
281            .system_prompt
282            .starts_with("You are the implementation lead."));
283        // extras: permissionMode / memory / abtest
284        let extras = p.extras.as_object().expect("extras object");
285        assert_eq!(
286            extras.get("permissionMode").and_then(|v| v.as_str()),
287            Some("bypassPermissions")
288        );
289        assert_eq!(extras.get("memory").and_then(|v| v.as_str()), Some("user"));
290        assert_eq!(extras.get("abtest").and_then(|v| v.as_bool()), Some(true));
291    }
292
293    #[test]
294    fn tools_accepts_yaml_array() {
295        let t = "---\nname: x\ntools:\n  - Read\n  - Edit\n---\nbody\n";
296        let def = parse(t, "x", AgentKind::Operator).unwrap();
297        assert_eq!(def.profile.unwrap().tools, vec!["Read", "Edit"]);
298    }
299
300    #[test]
301    fn missing_name_errors() {
302        let t = "---\nmodel: sonnet\n---\nbody\n";
303        assert!(matches!(
304            parse(t, "x", AgentKind::Operator),
305            Err(LoadError::MissingName { .. })
306        ));
307    }
308
309    #[test]
310    fn no_frontmatter_errors() {
311        let t = "plain body without frontmatter";
312        assert!(matches!(
313            parse(t, "x", AgentKind::Operator),
314            Err(LoadError::NoFrontmatter { .. })
315        ));
316    }
317
318    #[test]
319    fn body_preserves_markdown() {
320        let t = "---\nname: x\n---\n# Heading\n\nparagraph with `code`.\n";
321        let p = parse(t, "x", AgentKind::Operator).unwrap().profile.unwrap();
322        assert_eq!(p.system_prompt, "# Heading\n\nparagraph with `code`.\n");
323    }
324
325    #[test]
326    fn populates_version_hash_from_body() {
327        let def = parse(SAMPLE, "sample", AgentKind::Operator).unwrap();
328        let p = def.profile.unwrap();
329        let expected = compute_body_hash(&p.system_prompt);
330        assert_eq!(p.version_hash.as_deref(), Some(expected.as_str()));
331        // blake3 hex = 64 chars
332        assert_eq!(expected.len(), 64);
333    }
334
335    #[test]
336    fn version_hash_changes_with_body() {
337        let t1 = "---\nname: x\n---\nbody one\n";
338        let t2 = "---\nname: x\n---\nbody two\n";
339        let h1 = parse(t1, "x", AgentKind::Operator)
340            .unwrap()
341            .profile
342            .unwrap()
343            .version_hash;
344        let h2 = parse(t2, "x", AgentKind::Operator)
345            .unwrap()
346            .profile
347            .unwrap()
348            .version_hash;
349        assert!(h1.is_some() && h2.is_some());
350        assert_ne!(h1, h2);
351    }
352
353    #[test]
354    fn version_hash_stable_across_frontmatter_reorder() {
355        // Reordering the frontmatter must not affect the body → hash stays the same.
356        let t1 = "---\nname: x\nmodel: sonnet\n---\nsame body\n";
357        let t2 = "---\nmodel: sonnet\nname: x\n---\nsame body\n";
358        let h1 = parse(t1, "x", AgentKind::Operator)
359            .unwrap()
360            .profile
361            .unwrap()
362            .version_hash;
363        let h2 = parse(t2, "x", AgentKind::Operator)
364            .unwrap()
365            .profile
366            .unwrap()
367            .version_hash;
368        assert_eq!(h1, h2);
369    }
370}