car-builder 0.22.0

//! Natural-language → validated [`Workflow`] manifest builder.
//!
//! The Builder turns a plain-English goal into a runnable `car-workflow`
//! definition: it prompts a model to emit the manifest, parses it (tolerating
//! fences/preamble), and — crucially — **validates it with
//! [`car_workflow::verify_workflow`]** rather than trusting the model. Invalid
//! manifests are fed back to the model with their specific errors, up to a
//! bounded number of repair attempts.
//!
//! ## Design: generation is injected
//!
//! [`build_workflow`] is generic over an async `generate(prompt) -> text`
//! closure, so this crate has **no inference dependency**: the `car build` CLI
//! wires the real `car-inference` engine (a server/FFI surface is a follow-up),
//! while tests inject a fake generator. The reusable, deterministic part —
//! prompt → parse → verify → repair-loop — is unit-tested without a model.
//!
//! `valid` means the manifest passed `verify_workflow` (graph structure +
//! per-stage proposal verification) and, when a catalog of tools is provided,
//! references only known tools. Semantic findings — edge-condition keys and
//! state dependencies no stage produces — are surfaced as non-blocking
//! `warnings` (and fed back as repair hints), since they can have false
//! positives for keys produced at runtime; they do not flip `valid`.
//!
//! ```no_run
//! # async fn ex() {
//! use car_builder::{build_workflow, BuildRequest, ToolCatalog};
//! let req = BuildRequest {
//!     goal: "research a stock, then have a human approve the summary".into(),
//!     catalog: ToolCatalog::default(),
//!     existing: None,
//!     feedback: None,
//!     max_attempts: 3,
//! };
//! let result = build_workflow(|prompt| async move {
//!     // call your model here
//!     Ok::<_, String>(prompt) // placeholder
//! }, &req).await;
//! if result.valid { /* save result.workflow */ }
//! # }
//! ```

mod parse;
mod prompt;

pub use parse::parse_workflow;
pub use prompt::{build_prompt, AgentInfo, ToolCatalog, ToolInfo};

use car_workflow::{verify_workflow, Workflow};
use std::future::Future;

/// A request to build (or update) a workflow from a goal.
pub struct BuildRequest {
    /// Plain-English description of what the workflow should do.
    pub goal: String,
    /// Agents/tools/models the builder may compose.
    pub catalog: ToolCatalog,
    /// When set, the builder edits this workflow instead of creating a new one.
    pub existing: Option<Workflow>,
    /// Human revision feedback from an approve/revise loop.
    pub feedback: Option<String>,
    /// Maximum generate→validate attempts (clamped to ≥ 1).
    pub max_attempts: u32,
}

/// The outcome of a build.
pub struct BuildResult {
    /// The best workflow produced. `Some` and `valid` on success; on failure it
    /// holds the last parseable-but-invalid attempt (if any) for inspection.
    pub workflow: Option<Workflow>,
    /// Whether `workflow` passed `verify_workflow` with no errors.
    pub valid: bool,
    /// Validation/parse errors from the final attempt (empty when valid).
    pub issues: Vec<String>,
    /// Advisory semantic findings (e.g. an edge condition keyed on a state value
    /// no stage produces). Non-blocking — present even when `valid` is true; they
    /// are also fed back as repair hints while other errors are being fixed.
    pub warnings: Vec<String>,
    /// How many attempts were made.
    pub attempts: u32,
    /// Raw text of the last model response (for debugging).
    pub raw: Option<String>,
}

/// Build a validated workflow from `req`, repairing up to `req.max_attempts`.
///
/// `generate` is called with a prompt and must return the model's text. The loop
/// stops early on the first manifest that passes `verify_workflow`.
pub async fn build_workflow<F, Fut>(generate: F, req: &BuildRequest) -> BuildResult
where
    F: Fn(String) -> Fut + Send + Sync,
    Fut: Future<Output = Result<String, String>> + Send,
{
    let max = req.max_attempts.max(1);
    let mut prior_issues: Vec<String> = Vec::new();
    let mut last_raw: Option<String> = None;
    let mut last_invalid: Option<Workflow> = None;
    let mut last_issues: Vec<String> = Vec::new();
    let mut last_warnings: Vec<String> = Vec::new();

    for attempt in 1..=max {
        let prompt = build_prompt(req, &prior_issues);

        let text = match generate(prompt).await {
            Ok(t) => t,
            Err(e) => {
                // Generation itself failed — a transient model/transport error,
                // not something the model can "repair". Record it for the final
                // report but leave `prior_issues` untouched so the next attempt
                // re-sends the same (correct) prompt rather than a fake repair
                // instruction.
                last_issues = vec![format!("generation failed: {e}")];
                continue;
            }
        };
        last_raw = Some(text.clone());

        let workflow = match parse_workflow(&text) {
            Ok(wf) => wf,
            Err(e) => {
                prior_issues =
                    vec![format!("Your output did not parse as a workflow JSON object: {e}. Return ONLY the JSON object.")];
                last_issues = prior_issues.clone();
                continue;
            }
        };

        // Structural verification + (when a catalog is supplied) tool-existence
        // cross-check. Both feed the same repair channel.
        let mut errors: Vec<String> = verify_workflow(&workflow)
            .issues
            .iter()
            .filter(|i| i.severity == "error")
            .map(|i| match &i.stage_id {
                Some(s) => format!("{s}: {}", i.message),
                None => i.message.clone(),
            })
            .collect();
        errors.extend(catalog_issues(&workflow, &req.catalog));

        // Advisory: existence checks that may have false positives (keys produced
        // at runtime), so they never block success — only nudge repair.
        let warnings = car_workflow::semantic_issues(&workflow);

        if errors.is_empty() {
            return BuildResult {
                workflow: Some(workflow),
                valid: true,
                issues: Vec::new(),
                warnings,
                attempts: attempt,
                raw: last_raw,
            };
        }

        // Feed the hard errors AND the advisory warnings back for the next
        // repair attempt (the model fixes both while it's already revising).
        let mut feedback = errors.clone();
        feedback.extend(warnings.iter().cloned());
        prior_issues = feedback;
        last_issues = errors;
        last_warnings = warnings;
        last_invalid = Some(workflow);
    }

    BuildResult {
        workflow: last_invalid,
        valid: false,
        issues: last_issues,
        warnings: last_warnings,
        attempts: max,
        raw: last_raw,
    }
}

/// Cross-check tools referenced by the workflow against the catalog.
///
/// An empty `catalog.tools` imposes no constraint (the model is free to name
/// tools the runtime will resolve at execution time). When tools ARE listed,
/// any `Action.tool` or agent tool outside the set is reported so the repair
/// loop can correct invented names. Pattern agents are inline `AgentSpec`s
/// (not references), so only tool names are cross-checkable here.
fn catalog_issues(workflow: &Workflow, catalog: &ToolCatalog) -> Vec<String> {
    if catalog.tools.is_empty() {
        return Vec::new();
    }
    let known: std::collections::HashSet<&str> =
        catalog.tools.iter().map(|t| t.name.as_str()).collect();
    let mut issues = Vec::new();
    for stage in &workflow.stages {
        match &stage.step {
            car_workflow::StageStep::Proposal(ps) => {
                for action in &ps.proposal.actions {
                    if let Some(tool) = &action.tool {
                        if !known.contains(tool.as_str()) {
                            issues.push(format!(
                                "{}: action uses unknown tool '{}'",
                                stage.id, tool
                            ));
                        }
                    }
                }
            }
            car_workflow::StageStep::Pattern(p) => {
                for agent in &p.agents {
                    for tool in &agent.tools {
                        if !known.contains(tool.as_str()) {
                            issues.push(format!(
                                "{}: agent '{}' uses unknown tool '{}'",
                                stage.id, agent.name, tool
                            ));
                        }
                    }
                }
            }
            _ => {}
        }
    }
    issues
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::atomic::{AtomicUsize, Ordering};

    fn req(goal: &str) -> BuildRequest {
        BuildRequest {
            goal: goal.into(),
            catalog: ToolCatalog::default(),
            existing: None,
            feedback: None,
            max_attempts: 3,
        }
    }

    const VALID_WF: &str = r#"{
        "id": "wf", "name": "WF", "start": "gate",
        "stages": [
            {"id": "gate", "name": "Approve", "step": {"type":"approval","prompt":"ok?","fields":[],"output_key":"approval"}}
        ],
        "edges": []
    }"#;

    // A workflow whose start stage doesn't exist — verify_workflow flags an error.
    const INVALID_WF: &str = r#"{
        "id": "wf", "name": "WF", "start": "missing",
        "stages": [
            {"id": "gate", "name": "Approve", "step": {"type":"approval","prompt":"ok?","fields":[],"output_key":"approval"}}
        ],
        "edges": []
    }"#;

    #[tokio::test]
    async fn valid_on_first_attempt() {
        let result = build_workflow(|_p| async { Ok::<_, String>(VALID_WF.to_string()) }, &req("x")).await;
        assert!(result.valid);
        assert_eq!(result.attempts, 1);
        assert_eq!(result.workflow.unwrap().id, "wf");
    }

    #[tokio::test]
    async fn repairs_then_succeeds() {
        // First attempt returns an invalid workflow; second returns a valid one.
        let calls = AtomicUsize::new(0);
        let result = build_workflow(
            |prompt: String| {
                let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
                async move {
                    if n == 1 {
                        Ok::<_, String>(INVALID_WF.to_string())
                    } else {
                        // The repair prompt must carry the prior error.
                        assert!(prompt.contains("FAILED validation"));
                        assert!(prompt.contains("missing"));
                        Ok(VALID_WF.to_string())
                    }
                }
            },
            &req("x"),
        )
        .await;
        assert!(result.valid);
        assert_eq!(result.attempts, 2);
    }

    #[tokio::test]
    async fn gives_up_after_max_attempts_with_issues() {
        let result = build_workflow(
            |_p| async { Ok::<_, String>(INVALID_WF.to_string()) },
            &req("x"),
        )
        .await;
        assert!(!result.valid);
        assert_eq!(result.attempts, 3);
        assert!(!result.issues.is_empty());
        // The last invalid workflow is retained for inspection.
        assert!(result.workflow.is_some());
        assert!(result.issues.iter().any(|i| i.contains("missing")));
    }

    #[tokio::test]
    async fn unparseable_output_is_repaired_as_a_parse_issue() {
        let calls = AtomicUsize::new(0);
        let result = build_workflow(
            |prompt: String| {
                let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
                async move {
                    if n == 1 {
                        Ok::<_, String>("I'm sorry, I can't do that".to_string())
                    } else {
                        assert!(prompt.contains("did not parse"));
                        Ok(VALID_WF.to_string())
                    }
                }
            },
            &req("x"),
        )
        .await;
        assert!(result.valid);
        assert_eq!(result.attempts, 2);
    }

    #[tokio::test]
    async fn generation_error_then_recovers() {
        let calls = AtomicUsize::new(0);
        let result = build_workflow(
            |_prompt: String| {
                let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
                async move {
                    if n == 1 {
                        Err::<String, String>("transport boom".into())
                    } else {
                        Ok(VALID_WF.to_string())
                    }
                }
            },
            &req("x"),
        )
        .await;
        assert!(result.valid);
        assert_eq!(result.attempts, 2);
    }

    #[tokio::test]
    async fn all_generation_errors_yield_no_workflow_with_issues() {
        let result = build_workflow(
            |_p| async { Err::<String, String>("boom".into()) },
            &req("x"),
        )
        .await;
        assert!(!result.valid);
        assert!(result.workflow.is_none());
        assert!(result.issues.iter().any(|i| i.contains("generation failed")));
    }

    #[tokio::test]
    async fn catalog_cross_check_rejects_unknown_tool() {
        // VALID_WF passes structure, but if a catalog lists tools and the
        // workflow used an unknown one, it should be flagged. Use a proposal
        // workflow that references a tool absent from the catalog.
        const PROPOSAL_WF: &str = r#"{
            "id":"wf","name":"WF","start":"do",
            "stages":[{"id":"do","name":"Do","step":{"type":"proposal","proposal":{
                "id":"p","source":"builder","actions":[
                    {"id":"a","type":"tool_call","tool":"made_up_tool","parameters":{}}
                ],"context":{}}}}],
            "edges":[]
        }"#;
        let mut r = req("x");
        r.catalog = ToolCatalog {
            tools: vec![ToolInfo { name: "real_tool".into(), description: String::new() }],
            ..Default::default()
        };
        r.max_attempts = 1;
        let result =
            build_workflow(|_p| async { Ok::<_, String>(PROPOSAL_WF.to_string()) }, &r).await;
        assert!(!result.valid, "unknown tool must fail the catalog cross-check");
        assert!(result.issues.iter().any(|i| i.contains("made_up_tool")));
    }

    #[tokio::test]
    async fn empty_catalog_imposes_no_tool_constraint() {
        const PROPOSAL_WF: &str = r#"{
            "id":"wf","name":"WF","start":"do",
            "stages":[{"id":"do","name":"Do","step":{"type":"proposal","proposal":{
                "id":"p","source":"builder","actions":[
                    {"id":"a","type":"tool_call","tool":"anything","parameters":{}}
                ],"context":{}}}}],
            "edges":[]
        }"#;
        let result =
            build_workflow(|_p| async { Ok::<_, String>(PROPOSAL_WF.to_string()) }, &req("x")).await;
        assert!(result.valid, "empty catalog should not constrain tool names");
    }

    // Structurally valid (gate -> done), but the edge branches on
    // `approval.decision` while the gate declares no fields → semantic warning.
    const SEMANTIC_WARN_WF: &str = r#"{
        "id":"wf","name":"WF","start":"gate",
        "stages":[
            {"id":"gate","name":"Gate","step":{"type":"approval","prompt":"ok?","fields":[],"output_key":"approval"}},
            {"id":"done","name":"Done","step":{"type":"proposal","proposal":{"id":"p","source":"b","actions":[],"context":{}}}}
        ],
        "edges":[{"from":"gate","to":"done","conditions":[{"key":"approval.decision","operator":"eq","value":"approve"}],"label":""}]
    }"#;

    #[tokio::test]
    async fn valid_with_semantic_warnings_does_not_block() {
        let result = build_workflow(
            |_p| async { Ok::<_, String>(SEMANTIC_WARN_WF.to_string()) },
            &req("x"),
        )
        .await;
        // Structural verify + catalog pass → valid, but the dangling edge key is
        // surfaced as a non-blocking warning.
        assert!(result.valid);
        assert_eq!(result.attempts, 1);
        assert!(result
            .warnings
            .iter()
            .any(|w| w.contains("approval.decision")));
    }

    #[tokio::test]
    async fn max_attempts_is_clamped_to_at_least_one() {
        let mut r = req("x");
        r.max_attempts = 0;
        let result = build_workflow(|_p| async { Ok::<_, String>(VALID_WF.to_string()) }, &r).await;
        assert!(result.valid);
        assert_eq!(result.attempts, 1);
    }
}