Skip to main content

mlua_swarm/blueprint/
loader.rs

1//! Blueprint loader (Phase B). Loads a Blueprint from a JSON / YAML file
2//! and recursively expands the internal `{"$file": "..."}` refs.
3//!
4//! ## File-ref expansion
5//!
6//! Anywhere inside the JSON value, this form is replaced by the referenced
7//! file's contents **as a raw string**. Paths are resolved **relative to
8//! the Blueprint file's directory**:
9//!
10//! ```jsonc
11//! { "$file": "prompts/system-writer.md" }
12//! ```
13//!
14//! Typical uses:
15//!
16//! - Externalising a large prompt out of a flow `Step.in`:
17//!   `{"op":"lit","value":{"$file":"prompts/x.md"}}`.
18//! - Externalising any field inside `AgentDef.spec` (system_prompt, args,
19//!   etc.).
20//! - Externalising per-agent or global `hints`.
21//!
22//! ## Agent-md ref expansion (structured ref)
23//!
24//! Specialised ref that expands an `agent.md` (frontmatter + body) into
25//! an **`AgentDef` object**:
26//!
27//! ```jsonc
28//! {
29//!   "agents": [
30//!     { "$agent_md": "agents/domain-researcher.md" }
31//!   ]
32//! }
33//! ```
34//!
35//! Where `$file` returns a raw string, `$agent_md` runs the file through
36//! `agent_md_loader::parse` and returns a fully-populated `AgentDef` JSON
37//! object with `profile.system_prompt`, `meta`, `spec`, and so on already
38//! filled in. Path hygiene matches `$file`: absolute paths and `..` are
39//! rejected.
40
41use crate::blueprint::{default_global_agent_kind, AgentKind, Blueprint};
42use serde_json::Value;
43use std::path::{Path, PathBuf};
44use thiserror::Error;
45
46/// Everything that can go wrong while loading and `$file`/`$agent_md`
47/// expanding a Blueprint from disk.
48#[derive(Debug, Error)]
49pub enum LoadError {
50    /// Reading the Blueprint file (or a referenced `$file`/`$agent_md`)
51    /// failed.
52    #[error("io: {0}")]
53    Io(#[from] std::io::Error),
54    /// The `.json` file did not parse as JSON.
55    #[error("json parse: {0}")]
56    Json(#[from] serde_json::Error),
57    /// The `.yaml`/`.yml` file did not parse as YAML.
58    #[error("yaml parse: {0}")]
59    Yaml(#[from] serde_yaml::Error),
60    /// The file extension is not one of `.json` / `.yaml` / `.yml`.
61    #[error("unsupported extension: {0:?} (expected .json / .yaml / .yml)")]
62    UnknownFormat(Option<String>),
63    /// A `$file`/`$agent_md` ref failed path hygiene checks or the
64    /// referenced file could not be read/parsed.
65    #[error("$file ref expansion at {path:?}: {msg}")]
66    FileRef {
67        /// The resolved (or rejected) path of the ref.
68        path: PathBuf,
69        /// Human-readable description of what went wrong.
70        msg: String,
71    },
72    /// The expanded JSON value did not deserialize into a `Blueprint`.
73    #[error("blueprint shape invalid: {0}")]
74    Shape(String),
75}
76
77/// Load a Blueprint from a file path. Detects JSON vs. YAML by
78/// extension, recursively expands `$file` refs, and parses the result
79/// into a typed `Blueprint`.
80pub fn load_blueprint_from_path<P: AsRef<Path>>(path: P) -> Result<Blueprint, LoadError> {
81    let path = path.as_ref();
82    let raw = std::fs::read_to_string(path)?;
83    let ext = path
84        .extension()
85        .and_then(|e| e.to_str())
86        .map(|s| s.to_lowercase());
87    let value: Value = match ext.as_deref() {
88        Some("json") => serde_json::from_str(&raw)?,
89        Some("yaml") | Some("yml") => {
90            let yv: serde_yaml::Value = serde_yaml::from_str(&raw)?;
91            serde_json::to_value(yv)
92                .map_err(|e| LoadError::Shape(format!("yaml→json convert: {e}")))?
93        }
94        other => return Err(LoadError::UnknownFormat(other.map(|s| s.to_string()))),
95    };
96    let base = path
97        .parent()
98        .unwrap_or_else(|| Path::new("."))
99        .to_path_buf();
100    // Steps (1) and (3) of the four-layer cascade: pre-read the BP JSON's
101    // top-level `default_agent_kind`. If it is absent, fall back to the
102    // schema's `Default` impl (`Operator`). The value is passed into
103    // `expand_file_refs` and used as the loader-side kind default when a
104    // `$agent_md` has no sibling override. Step (2), the caller-side
105    // (CLI) override, is out of this function's scope — an upper layer
106    // (the server seed handler) is responsible for overwriting the
107    // pre-read value with the CLI value.
108    let default_kind = pre_read_default_agent_kind(&value);
109    let resolved = expand_file_refs(value, &base, default_kind)?;
110    let bp: Blueprint = serde_json::from_value(resolved)
111        .map_err(|e| LoadError::Shape(format!("typed parse: {e}")))?;
112    Ok(bp)
113}
114
115/// Pull `default_agent_kind` out of the raw BP JSON top level. Falls
116/// back to the schema's `Default` impl (`Operator`) if the key is
117/// missing or its type does not match. This is the first stage of
118/// resolving the default kind used inside `expand_file_refs` when a
119/// `$agent_md` has no sibling `kind` override.
120pub fn pre_read_default_agent_kind(val: &Value) -> AgentKind {
121    val.get("default_agent_kind")
122        .and_then(|v| serde_json::from_value::<AgentKind>(v.clone()).ok())
123        .unwrap_or_else(default_global_agent_kind)
124}
125
126/// Takes a JSON value: an object whose only key is `"$file": "path"` is
127/// replaced with the referenced file's contents; other objects / arrays
128/// recurse; scalars pass through unchanged.
129///
130/// Path hygiene: absolute paths and `..` parent-directory escapes are
131/// **rejected**, sandboxing all refs to the Blueprint's base-directory
132/// subtree. That structurally prevents accidentally pulling in
133/// `/etc/passwd` or `~/.ssh/id_rsa`. The trust boundary is spelled out
134/// explicitly.
135///
136/// Shared path hygiene for `$file` and `$agent_md`: absolute paths and
137/// `..` parent escapes are rejected; refs are sandboxed inside the
138/// base-directory subtree; the resolved absolute path is returned.
139fn resolve_ref_path(rel: &str, base: &Path) -> Result<PathBuf, LoadError> {
140    let rel_path = Path::new(rel);
141    if rel_path.is_absolute() {
142        return Err(LoadError::FileRef {
143            path: rel_path.to_path_buf(),
144            msg: "absolute path not allowed (must be relative to Blueprint dir)".into(),
145        });
146    }
147    if rel_path
148        .components()
149        .any(|c| matches!(c, std::path::Component::ParentDir))
150    {
151        return Err(LoadError::FileRef {
152            path: rel_path.to_path_buf(),
153            msg: "'..' parent-dir escape not allowed".into(),
154        });
155    }
156    Ok(base.join(rel_path))
157}
158
159/// `default_kind` is the fallback used when a `$agent_md` has no sibling
160/// `kind` — it should already be resolved by upper layers of the
161/// four-layer cascade. Callers resolve the BP top-level
162/// `default_agent_kind` and any CLI override before calling this
163/// function and pass in the literal kind.
164pub fn expand_file_refs(
165    val: Value,
166    base: &Path,
167    default_kind: AgentKind,
168) -> Result<Value, LoadError> {
169    match val {
170        Value::Object(map) => {
171            // `$file`: a single-key raw-string substitution.
172            if map.len() == 1 {
173                if let Some(Value::String(rel)) = map.get("$file") {
174                    let full = resolve_ref_path(rel, base)?;
175                    let content =
176                        std::fs::read_to_string(&full).map_err(|e| LoadError::FileRef {
177                            path: full.clone(),
178                            msg: e.to_string(),
179                        })?;
180                    return Ok(Value::String(content));
181                }
182            }
183            // `$agent_md` accepts either a single-key object or an object
184            // with sibling keys. Sibling keys are shallow-merged onto the
185            // expanded AgentDef object, so the caller's values override
186            // whatever the AgentDef itself carried. Typical use: keep the
187            // name and profile from the agent.md but override only
188            // `spec.operator_ref` or `meta` at the call site.
189            //
190            // Kind resolution cascade: (a) if a sibling `"kind"` literal
191            // is present, use it as-is; (b) otherwise, fall back to the
192            // `default_kind` argument, which the caller already resolved
193            // upstream from BP `default_agent_kind` or the CLI default.
194            if let Some(Value::String(rel)) = map.get("$agent_md") {
195                let full = resolve_ref_path(rel, base)?;
196                // Peek at the sibling "kind"; fall back to `default_kind`
197                // if absent.
198                let resolved_kind = map
199                    .get("kind")
200                    .and_then(|v| serde_json::from_value::<AgentKind>(v.clone()).ok())
201                    .unwrap_or_else(|| default_kind.clone());
202                let def =
203                    crate::lua::agent_md_loader::load_file(&full, resolved_kind).map_err(|e| {
204                        LoadError::FileRef {
205                            path: full.clone(),
206                            msg: format!("agent_md parse: {e}"),
207                        }
208                    })?;
209                let mut def_v = serde_json::to_value(&def).map_err(|e| LoadError::FileRef {
210                    path: full.clone(),
211                    msg: format!("agent_md serialize: {e}"),
212                })?;
213                if let Value::Object(def_map) = &mut def_v {
214                    for (k, v) in map {
215                        if k == "$agent_md" {
216                            continue;
217                        }
218                        // Recursively expand the sibling before applying
219                        // it as a shallow override.
220                        let expanded = expand_file_refs(v, base, default_kind.clone())?;
221                        def_map.insert(k, expanded);
222                    }
223                }
224                return Ok(def_v);
225            }
226            let mut new_map = serde_json::Map::with_capacity(map.len());
227            for (k, v) in map {
228                new_map.insert(k, expand_file_refs(v, base, default_kind.clone())?);
229            }
230            Ok(Value::Object(new_map))
231        }
232        Value::Array(arr) => {
233            let mut new_arr = Vec::with_capacity(arr.len());
234            for v in arr {
235                new_arr.push(expand_file_refs(v, base, default_kind.clone())?);
236            }
237            Ok(Value::Array(new_arr))
238        }
239        other => Ok(other),
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use serde_json::json;
247    use std::fs;
248    use tempfile::TempDir;
249
250    fn write_md(dir: &Path, rel: &str, content: &str) -> PathBuf {
251        let p = dir.join(rel);
252        if let Some(parent) = p.parent() {
253            fs::create_dir_all(parent).unwrap();
254        }
255        fs::write(&p, content).unwrap();
256        p
257    }
258
259    const AGENT_MD: &str = "---\n\
260name: researcher\n\
261description: focus on XX/YY sites\n\
262model: sonnet\n\
263---\n\
264You are a researcher. Focus on XX/YY sites.\n";
265
266    #[test]
267    fn agent_md_ref_expands_to_typed_agent_def_object() {
268        let dir = TempDir::new().unwrap();
269        write_md(dir.path(), "agents/r.md", AGENT_MD);
270
271        let bp = json!({
272            "agents": [ { "$agent_md": "agents/r.md" } ]
273        });
274        let resolved = expand_file_refs(bp, dir.path(), AgentKind::Operator).expect("expand ok");
275
276        let agent = &resolved["agents"][0];
277        assert!(agent.is_object(), "expanded value is JSON object");
278        assert_eq!(agent["name"], "researcher");
279        assert_eq!(agent["kind"], "operator", "default kind from loader");
280        assert!(
281            agent["profile"]["system_prompt"]
282                .as_str()
283                .unwrap()
284                .contains("You are a researcher"),
285            "profile.system_prompt baked from body, got: {:?}",
286            agent["profile"]
287        );
288    }
289
290    #[test]
291    fn agent_md_ref_rejects_absolute_path() {
292        let dir = TempDir::new().unwrap();
293        let bp = json!({ "$agent_md": "/etc/passwd" });
294        let err = expand_file_refs(bp, dir.path(), AgentKind::Operator).expect_err("abs rejected");
295        assert!(format!("{err}").contains("absolute path"), "got: {err}");
296    }
297
298    #[test]
299    fn agent_md_ref_rejects_parent_dir_escape() {
300        let dir = TempDir::new().unwrap();
301        let bp = json!({ "$agent_md": "../escape.md" });
302        let err = expand_file_refs(bp, dir.path(), AgentKind::Operator).expect_err(".. rejected");
303        assert!(format!("{err}").contains("parent-dir escape"), "got: {err}");
304    }
305
306    #[test]
307    fn agent_md_ref_merges_sibling_keys_as_shallow_override() {
308        let dir = TempDir::new().unwrap();
309        write_md(dir.path(), "agents/r.md", AGENT_MD);
310        let bp = json!({
311            "$agent_md": "agents/r.md",
312            "spec": { "operator_ref": "ws-sid-42" },
313        });
314        let resolved = expand_file_refs(bp, dir.path(), AgentKind::Operator).expect("expand ok");
315        assert_eq!(resolved["name"], "researcher", "name from md preserved");
316        assert_eq!(
317            resolved["spec"]["operator_ref"], "ws-sid-42",
318            "sibling spec overrides md default (= Null)"
319        );
320        assert!(
321            resolved["profile"]["system_prompt"]
322                .as_str()
323                .unwrap()
324                .contains("You are a researcher"),
325            "profile from md preserved"
326        );
327    }
328
329    #[test]
330    fn file_ref_still_returns_raw_string_unchanged() {
331        let dir = TempDir::new().unwrap();
332        write_md(dir.path(), "prompts/raw.md", "raw body content");
333        let bp = json!({ "$file": "prompts/raw.md" });
334        let resolved = expand_file_refs(bp, dir.path(), AgentKind::Operator).expect("expand ok");
335        assert_eq!(resolved, json!("raw body content"));
336    }
337}