Skip to main content

car_builder/
lib.rs

1//! Natural-language → validated [`Workflow`] manifest builder.
2//!
3//! The Builder turns a plain-English goal into a runnable `car-workflow`
4//! definition: it prompts a model to emit the manifest, parses it (tolerating
5//! fences/preamble), and — crucially — **validates it with
6//! [`car_workflow::verify_workflow`]** rather than trusting the model. Invalid
7//! manifests are fed back to the model with their specific errors, up to a
8//! bounded number of repair attempts.
9//!
10//! ## Design: generation is injected
11//!
12//! [`build_workflow`] is generic over an async `generate(prompt) -> text`
13//! closure, so this crate has **no inference dependency**: the `car build` CLI
14//! wires the real `car-inference` engine (a server/FFI surface is a follow-up),
15//! while tests inject a fake generator. The reusable, deterministic part —
16//! prompt → parse → verify → repair-loop — is unit-tested without a model.
17//!
18//! `valid` means the manifest passed `verify_workflow` (graph structure +
19//! per-stage proposal verification) and, when a catalog of tools is provided,
20//! references only known tools. Semantic findings — edge-condition keys and
21//! state dependencies no stage produces — are surfaced as non-blocking
22//! `warnings` (and fed back as repair hints), since they can have false
23//! positives for keys produced at runtime; they do not flip `valid`.
24//!
25//! ```no_run
26//! # async fn ex() {
27//! use car_builder::{build_workflow, BuildRequest, ToolCatalog};
28//! let req = BuildRequest {
29//!     goal: "research a stock, then have a human approve the summary".into(),
30//!     catalog: ToolCatalog::default(),
31//!     existing: None,
32//!     feedback: None,
33//!     max_attempts: 3,
34//! };
35//! let result = build_workflow(|prompt| async move {
36//!     // call your model here
37//!     Ok::<_, String>(prompt) // placeholder
38//! }, &req).await;
39//! if result.valid { /* save result.workflow */ }
40//! # }
41//! ```
42
43mod parse;
44mod prompt;
45
46pub use parse::parse_workflow;
47pub use prompt::{build_prompt, AgentInfo, ToolCatalog, ToolInfo};
48
49use car_workflow::{verify_workflow, Workflow};
50use std::future::Future;
51
52/// A request to build (or update) a workflow from a goal.
53pub struct BuildRequest {
54    /// Plain-English description of what the workflow should do.
55    pub goal: String,
56    /// Agents/tools/models the builder may compose.
57    pub catalog: ToolCatalog,
58    /// When set, the builder edits this workflow instead of creating a new one.
59    pub existing: Option<Workflow>,
60    /// Human revision feedback from an approve/revise loop.
61    pub feedback: Option<String>,
62    /// Maximum generate→validate attempts (clamped to ≥ 1).
63    pub max_attempts: u32,
64}
65
66/// The outcome of a build.
67pub struct BuildResult {
68    /// The best workflow produced. `Some` and `valid` on success; on failure it
69    /// holds the last parseable-but-invalid attempt (if any) for inspection.
70    pub workflow: Option<Workflow>,
71    /// Whether `workflow` passed `verify_workflow` with no errors.
72    pub valid: bool,
73    /// Validation/parse errors from the final attempt (empty when valid).
74    pub issues: Vec<String>,
75    /// Advisory semantic findings (e.g. an edge condition keyed on a state value
76    /// no stage produces). Non-blocking — present even when `valid` is true; they
77    /// are also fed back as repair hints while other errors are being fixed.
78    pub warnings: Vec<String>,
79    /// How many attempts were made.
80    pub attempts: u32,
81    /// Raw text of the last model response (for debugging).
82    pub raw: Option<String>,
83}
84
85/// Build a validated workflow from `req`, repairing up to `req.max_attempts`.
86///
87/// `generate` is called with a prompt and must return the model's text. The loop
88/// stops early on the first manifest that passes `verify_workflow`.
89pub async fn build_workflow<F, Fut>(generate: F, req: &BuildRequest) -> BuildResult
90where
91    F: Fn(String) -> Fut + Send + Sync,
92    Fut: Future<Output = Result<String, String>> + Send,
93{
94    let max = req.max_attempts.max(1);
95    let mut prior_issues: Vec<String> = Vec::new();
96    let mut last_raw: Option<String> = None;
97    let mut last_invalid: Option<Workflow> = None;
98    let mut last_issues: Vec<String> = Vec::new();
99    let mut last_warnings: Vec<String> = Vec::new();
100
101    for attempt in 1..=max {
102        let prompt = build_prompt(req, &prior_issues);
103
104        let text = match generate(prompt).await {
105            Ok(t) => t,
106            Err(e) => {
107                // Generation itself failed — a transient model/transport error,
108                // not something the model can "repair". Record it for the final
109                // report but leave `prior_issues` untouched so the next attempt
110                // re-sends the same (correct) prompt rather than a fake repair
111                // instruction.
112                last_issues = vec![format!("generation failed: {e}")];
113                continue;
114            }
115        };
116        last_raw = Some(text.clone());
117
118        let workflow = match parse_workflow(&text) {
119            Ok(wf) => wf,
120            Err(e) => {
121                prior_issues =
122                    vec![format!("Your output did not parse as a workflow JSON object: {e}. Return ONLY the JSON object.")];
123                last_issues = prior_issues.clone();
124                continue;
125            }
126        };
127
128        // Structural verification + (when a catalog is supplied) tool-existence
129        // cross-check. Both feed the same repair channel.
130        let mut errors: Vec<String> = verify_workflow(&workflow)
131            .issues
132            .iter()
133            .filter(|i| i.severity == "error")
134            .map(|i| match &i.stage_id {
135                Some(s) => format!("{s}: {}", i.message),
136                None => i.message.clone(),
137            })
138            .collect();
139        errors.extend(catalog_issues(&workflow, &req.catalog));
140
141        // Advisory: existence checks that may have false positives (keys produced
142        // at runtime), so they never block success — only nudge repair.
143        let warnings = car_workflow::semantic_issues(&workflow);
144
145        if errors.is_empty() {
146            return BuildResult {
147                workflow: Some(workflow),
148                valid: true,
149                issues: Vec::new(),
150                warnings,
151                attempts: attempt,
152                raw: last_raw,
153            };
154        }
155
156        // Feed the hard errors AND the advisory warnings back for the next
157        // repair attempt (the model fixes both while it's already revising).
158        let mut feedback = errors.clone();
159        feedback.extend(warnings.iter().cloned());
160        prior_issues = feedback;
161        last_issues = errors;
162        last_warnings = warnings;
163        last_invalid = Some(workflow);
164    }
165
166    BuildResult {
167        workflow: last_invalid,
168        valid: false,
169        issues: last_issues,
170        warnings: last_warnings,
171        attempts: max,
172        raw: last_raw,
173    }
174}
175
176/// Cross-check tools referenced by the workflow against the catalog.
177///
178/// An empty `catalog.tools` imposes no constraint (the model is free to name
179/// tools the runtime will resolve at execution time). When tools ARE listed,
180/// any `Action.tool` or agent tool outside the set is reported so the repair
181/// loop can correct invented names. Pattern agents are inline `AgentSpec`s
182/// (not references), so only tool names are cross-checkable here.
183fn catalog_issues(workflow: &Workflow, catalog: &ToolCatalog) -> Vec<String> {
184    if catalog.tools.is_empty() {
185        return Vec::new();
186    }
187    let known: std::collections::HashSet<&str> =
188        catalog.tools.iter().map(|t| t.name.as_str()).collect();
189    let mut issues = Vec::new();
190    for stage in &workflow.stages {
191        match &stage.step {
192            car_workflow::StageStep::Proposal(ps) => {
193                for action in &ps.proposal.actions {
194                    if let Some(tool) = &action.tool {
195                        if !known.contains(tool.as_str()) {
196                            issues.push(format!(
197                                "{}: action uses unknown tool '{}'",
198                                stage.id, tool
199                            ));
200                        }
201                    }
202                }
203            }
204            car_workflow::StageStep::Pattern(p) => {
205                for agent in &p.agents {
206                    for tool in &agent.tools {
207                        if !known.contains(tool.as_str()) {
208                            issues.push(format!(
209                                "{}: agent '{}' uses unknown tool '{}'",
210                                stage.id, agent.name, tool
211                            ));
212                        }
213                    }
214                }
215            }
216            _ => {}
217        }
218    }
219    issues
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use std::sync::atomic::{AtomicUsize, Ordering};
226
227    fn req(goal: &str) -> BuildRequest {
228        BuildRequest {
229            goal: goal.into(),
230            catalog: ToolCatalog::default(),
231            existing: None,
232            feedback: None,
233            max_attempts: 3,
234        }
235    }
236
237    const VALID_WF: &str = r#"{
238        "id": "wf", "name": "WF", "start": "gate",
239        "stages": [
240            {"id": "gate", "name": "Approve", "step": {"type":"approval","prompt":"ok?","fields":[],"output_key":"approval"}}
241        ],
242        "edges": []
243    }"#;
244
245    // A workflow whose start stage doesn't exist — verify_workflow flags an error.
246    const INVALID_WF: &str = r#"{
247        "id": "wf", "name": "WF", "start": "missing",
248        "stages": [
249            {"id": "gate", "name": "Approve", "step": {"type":"approval","prompt":"ok?","fields":[],"output_key":"approval"}}
250        ],
251        "edges": []
252    }"#;
253
254    #[tokio::test]
255    async fn valid_on_first_attempt() {
256        let result = build_workflow(|_p| async { Ok::<_, String>(VALID_WF.to_string()) }, &req("x")).await;
257        assert!(result.valid);
258        assert_eq!(result.attempts, 1);
259        assert_eq!(result.workflow.unwrap().id, "wf");
260    }
261
262    #[tokio::test]
263    async fn repairs_then_succeeds() {
264        // First attempt returns an invalid workflow; second returns a valid one.
265        let calls = AtomicUsize::new(0);
266        let result = build_workflow(
267            |prompt: String| {
268                let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
269                async move {
270                    if n == 1 {
271                        Ok::<_, String>(INVALID_WF.to_string())
272                    } else {
273                        // The repair prompt must carry the prior error.
274                        assert!(prompt.contains("FAILED validation"));
275                        assert!(prompt.contains("missing"));
276                        Ok(VALID_WF.to_string())
277                    }
278                }
279            },
280            &req("x"),
281        )
282        .await;
283        assert!(result.valid);
284        assert_eq!(result.attempts, 2);
285    }
286
287    #[tokio::test]
288    async fn gives_up_after_max_attempts_with_issues() {
289        let result = build_workflow(
290            |_p| async { Ok::<_, String>(INVALID_WF.to_string()) },
291            &req("x"),
292        )
293        .await;
294        assert!(!result.valid);
295        assert_eq!(result.attempts, 3);
296        assert!(!result.issues.is_empty());
297        // The last invalid workflow is retained for inspection.
298        assert!(result.workflow.is_some());
299        assert!(result.issues.iter().any(|i| i.contains("missing")));
300    }
301
302    #[tokio::test]
303    async fn unparseable_output_is_repaired_as_a_parse_issue() {
304        let calls = AtomicUsize::new(0);
305        let result = build_workflow(
306            |prompt: String| {
307                let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
308                async move {
309                    if n == 1 {
310                        Ok::<_, String>("I'm sorry, I can't do that".to_string())
311                    } else {
312                        assert!(prompt.contains("did not parse"));
313                        Ok(VALID_WF.to_string())
314                    }
315                }
316            },
317            &req("x"),
318        )
319        .await;
320        assert!(result.valid);
321        assert_eq!(result.attempts, 2);
322    }
323
324    #[tokio::test]
325    async fn generation_error_then_recovers() {
326        let calls = AtomicUsize::new(0);
327        let result = build_workflow(
328            |_prompt: String| {
329                let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
330                async move {
331                    if n == 1 {
332                        Err::<String, String>("transport boom".into())
333                    } else {
334                        Ok(VALID_WF.to_string())
335                    }
336                }
337            },
338            &req("x"),
339        )
340        .await;
341        assert!(result.valid);
342        assert_eq!(result.attempts, 2);
343    }
344
345    #[tokio::test]
346    async fn all_generation_errors_yield_no_workflow_with_issues() {
347        let result = build_workflow(
348            |_p| async { Err::<String, String>("boom".into()) },
349            &req("x"),
350        )
351        .await;
352        assert!(!result.valid);
353        assert!(result.workflow.is_none());
354        assert!(result.issues.iter().any(|i| i.contains("generation failed")));
355    }
356
357    #[tokio::test]
358    async fn catalog_cross_check_rejects_unknown_tool() {
359        // VALID_WF passes structure, but if a catalog lists tools and the
360        // workflow used an unknown one, it should be flagged. Use a proposal
361        // workflow that references a tool absent from the catalog.
362        const PROPOSAL_WF: &str = r#"{
363            "id":"wf","name":"WF","start":"do",
364            "stages":[{"id":"do","name":"Do","step":{"type":"proposal","proposal":{
365                "id":"p","source":"builder","actions":[
366                    {"id":"a","type":"tool_call","tool":"made_up_tool","parameters":{}}
367                ],"context":{}}}}],
368            "edges":[]
369        }"#;
370        let mut r = req("x");
371        r.catalog = ToolCatalog {
372            tools: vec![ToolInfo { name: "real_tool".into(), description: String::new() }],
373            ..Default::default()
374        };
375        r.max_attempts = 1;
376        let result =
377            build_workflow(|_p| async { Ok::<_, String>(PROPOSAL_WF.to_string()) }, &r).await;
378        assert!(!result.valid, "unknown tool must fail the catalog cross-check");
379        assert!(result.issues.iter().any(|i| i.contains("made_up_tool")));
380    }
381
382    #[tokio::test]
383    async fn empty_catalog_imposes_no_tool_constraint() {
384        const PROPOSAL_WF: &str = r#"{
385            "id":"wf","name":"WF","start":"do",
386            "stages":[{"id":"do","name":"Do","step":{"type":"proposal","proposal":{
387                "id":"p","source":"builder","actions":[
388                    {"id":"a","type":"tool_call","tool":"anything","parameters":{}}
389                ],"context":{}}}}],
390            "edges":[]
391        }"#;
392        let result =
393            build_workflow(|_p| async { Ok::<_, String>(PROPOSAL_WF.to_string()) }, &req("x")).await;
394        assert!(result.valid, "empty catalog should not constrain tool names");
395    }
396
397    // Structurally valid (gate -> done), but the edge branches on
398    // `approval.decision` while the gate declares no fields → semantic warning.
399    const SEMANTIC_WARN_WF: &str = r#"{
400        "id":"wf","name":"WF","start":"gate",
401        "stages":[
402            {"id":"gate","name":"Gate","step":{"type":"approval","prompt":"ok?","fields":[],"output_key":"approval"}},
403            {"id":"done","name":"Done","step":{"type":"proposal","proposal":{"id":"p","source":"b","actions":[],"context":{}}}}
404        ],
405        "edges":[{"from":"gate","to":"done","conditions":[{"key":"approval.decision","operator":"eq","value":"approve"}],"label":""}]
406    }"#;
407
408    #[tokio::test]
409    async fn valid_with_semantic_warnings_does_not_block() {
410        let result = build_workflow(
411            |_p| async { Ok::<_, String>(SEMANTIC_WARN_WF.to_string()) },
412            &req("x"),
413        )
414        .await;
415        // Structural verify + catalog pass → valid, but the dangling edge key is
416        // surfaced as a non-blocking warning.
417        assert!(result.valid);
418        assert_eq!(result.attempts, 1);
419        assert!(result
420            .warnings
421            .iter()
422            .any(|w| w.contains("approval.decision")));
423    }
424
425    #[tokio::test]
426    async fn max_attempts_is_clamped_to_at_least_one() {
427        let mut r = req("x");
428        r.max_attempts = 0;
429        let result = build_workflow(|_p| async { Ok::<_, String>(VALID_WF.to_string()) }, &r).await;
430        assert!(result.valid);
431        assert_eq!(result.attempts, 1);
432    }
433}