car-builder 0.32.1

{"id": "b1-summarize-note", "band": "single_stage", "nl_request": "Summarize the meeting notes I paste in and produce a bullet-point digest.", "must_have": {"stage_count": [1, 2], "required_tools": [], "required_edge_conditions": []}, "notes": "Single summarizer pattern stage; no tools, no branching."}
{"id": "b1-read-file", "band": "single_stage", "nl_request": "Read the file at ./report.txt and return its contents.", "must_have": {"stage_count": [1, 2], "required_tools": ["read_file"], "required_edge_conditions": []}, "notes": "One proposal stage invoking the read_file commodity tool."}
{"id": "b1-grep-todos", "band": "single_stage", "nl_request": "Search the src directory for any lines containing TODO and list the matches.", "must_have": {"stage_count": [1, 2], "required_tools": ["grep_files"], "required_edge_conditions": []}, "notes": "Single grep_files proposal stage."}
{"id": "b1-list-dir", "band": "single_stage", "nl_request": "List the entries in the current working directory.", "must_have": {"stage_count": [1, 2], "required_tools": ["list_dir"], "required_edge_conditions": []}, "notes": "Single list_dir proposal stage."}
{"id": "b1-calculate", "band": "single_stage", "nl_request": "Compute the value of 3.5 * (2 + 7) and return the result.", "must_have": {"stage_count": [1, 2], "required_tools": ["calculate"], "required_edge_conditions": []}, "notes": "Single calculate proposal stage; calculate is a pure commodity tool."}
{"id": "b1-write-file", "band": "single_stage", "nl_request": "Write the text 'hello world' to a file called greeting.txt.", "must_have": {"stage_count": [1, 2], "required_tools": ["write_file"], "required_edge_conditions": []}, "notes": "Single write_file proposal stage."}
{"id": "b1-find-rust", "band": "single_stage", "nl_request": "Find every Rust source file under the crates directory.", "must_have": {"stage_count": [1, 2], "required_tools": ["find_files"], "required_edge_conditions": []}, "notes": "Single find_files proposal stage."}
{"id": "b1-research-question", "band": "single_stage", "nl_request": "Research the capital of France and cite where the answer comes from.", "must_have": {"stage_count": [1, 2], "required_tools": [], "required_edge_conditions": []}, "notes": "Single researcher pattern stage; the built-in Researcher agent covers this without an explicit tool."}
{"id": "b1-verify-claim", "band": "single_stage", "nl_request": "Evaluate whether the claim 'the Earth is flat' is true or false with reasoning.", "must_have": {"stage_count": [1, 2], "required_tools": [], "required_edge_conditions": []}, "notes": "Single verifier pattern stage using the built-in Verifier agent."}
{"id": "b1-approval-gate", "band": "single_stage", "nl_request": "Pause and ask a human to approve before continuing, capturing their decision.", "must_have": {"stage_count": [1, 2], "required_tools": [], "required_edge_conditions": []}, "notes": "Single approval stage with a non-empty output_key; no tools."}
{"id": "b2-research-approve", "band": "multi_stage", "nl_request": "Research a stock, summarize the findings, then have a human approve the summary before anything is finalized. If the human rejects, stop.", "must_have": {"stage_count": [3, 5], "required_tools": [], "required_edge_conditions": ["approval"]}, "notes": "Research -> summarize -> approval gate; edge branches on the approval decision key."}
{"id": "b2-fetch-write-branch", "band": "multi_stage", "nl_request": "Read a config file, and if it exists write a normalized copy; otherwise report that it is missing. Branch on whether the read succeeded.", "must_have": {"stage_count": [2, 4], "required_tools": ["read_file", "write_file"], "required_edge_conditions": ["succeeded"]}, "notes": "Conditional edge on stage.<id>.succeeded operator eq true; two file tools."}
{"id": "b2-search-review-deliver", "band": "multi_stage", "nl_request": "Search a codebase for a function, have a reviewer verify the results are relevant, and only deliver the report if the review passes.", "must_have": {"stage_count": [3, 5], "required_tools": ["grep_files"], "required_edge_conditions": ["review_passed"]}, "notes": "grep -> adversarial_review -> deliver; edge branches on stage.<id>.review_passed bool."}
{"id": "b2-dedup-process", "band": "multi_stage", "nl_request": "Poll a list of incoming items, drop the ones already processed in a prior run, then process each new one and skip delivery when nothing is new.", "must_have": {"stage_count": [3, 5], "required_tools": [], "required_edge_conditions": ["unseen_count"]}, "notes": "dedup -> for_each -> deliver; edge conditions on stage.<id>.unseen_count gt 0."}
{"id": "b2-loop-until-done", "band": "multi_stage", "nl_request": "Keep refining a draft with an agent until a reviewer marks it acceptable or five attempts have passed, then present it for human sign-off.", "must_have": {"stage_count": [2, 4], "required_tools": [], "required_edge_conditions": ["approval"]}, "notes": "loop_until (body pattern, until predicate) -> approval; edge on approval.decision."}
{"id": "b2-triage-route", "band": "multi_stage", "nl_request": "Classify an incoming support ticket as urgent or normal; route urgent tickets to an escalation stage and normal ones to a standard summarizer.", "must_have": {"stage_count": [3, 5], "required_tools": [], "required_edge_conditions": ["eq"]}, "notes": "Classify stage with two conditional out-edges keyed on the classification value (operator eq)."}
{"id": "b2-map-reduce-summ", "band": "multi_stage", "nl_request": "Split a long document into sections, summarize each section in parallel, then combine the summaries and ask a human to approve the combined result.", "must_have": {"stage_count": [2, 4], "required_tools": [], "required_edge_conditions": ["approval"]}, "notes": "map_reduce pattern -> approval; approval edge condition."}
{"id": "b2-read-calc-report", "band": "multi_stage", "nl_request": "Read a numbers file, compute the total, and if the total exceeds 100 flag it for review, otherwise write it to a summary file.", "must_have": {"stage_count": [3, 5], "required_tools": ["read_file", "calculate", "write_file"], "required_edge_conditions": ["gt"]}, "notes": "read -> calculate -> branch; conditional edge uses the gt operator against the computed total."}
{"id": "b2-verify-then-deliver", "band": "multi_stage", "nl_request": "Draft an answer, verify the claim is accurate, and only if verification passes deliver it to a channel; otherwise loop back to redraft.", "must_have": {"stage_count": [3, 5], "required_tools": [], "required_edge_conditions": ["eq"]}, "notes": "draft -> verify -> conditional deliver/back-edge on a verification bool (operator eq)."}
{"id": "b2-pipeline-approval", "band": "multi_stage", "nl_request": "Run a research pipeline of three agents, then require human approval, and after approval deliver the result to Slack.", "must_have": {"stage_count": [3, 5], "required_tools": [], "required_edge_conditions": ["approval"]}, "notes": "pipeline pattern -> approval gate -> deliver; approval-keyed edge."}
{"id": "b3-empty-request", "band": "adversarial", "nl_request": "", "must_have": {"stage_count": [0, 0], "required_tools": [], "required_edge_conditions": []}, "refusal_ok": true, "notes": "Empty goal: no workflow can be grounded. A typed refusal is the correct outcome."}
{"id": "b3-do-something", "band": "adversarial", "nl_request": "Just do something useful, you figure it out.", "must_have": {"stage_count": [0, 0], "required_tools": [], "required_edge_conditions": []}, "refusal_ok": true, "notes": "No concrete objective; underspecified to the point of no groundable stages. Refusal acceptable."}
{"id": "b3-contradictory", "band": "adversarial", "nl_request": "Delete every file and also never touch the filesystem, at the same time.", "must_have": {"stage_count": [0, 0], "required_tools": [], "required_edge_conditions": []}, "refusal_ok": true, "notes": "Self-contradictory constraints; refusal is the safe outcome. Not a green-light to author a destructive workflow."}
{"id": "b3-vague-improve", "band": "adversarial", "nl_request": "Make it better.", "must_have": {"stage_count": [0, 0], "required_tools": [], "required_edge_conditions": []}, "refusal_ok": true, "notes": "No referent for 'it'; nothing to build. Refusal acceptable."}
{"id": "b3-underspecified-notify", "band": "adversarial", "nl_request": "Notify the team when the thing happens.", "must_have": {"stage_count": [1, 3], "required_tools": [], "required_edge_conditions": []}, "refusal_ok": false, "repair_forcing": true, "notes": "Underspecified but groundable into a dedup/poll -> deliver skeleton. HOW IT FORCES FAILURE: 'the thing' and 'notify' have no concrete tool/trigger, so a first pass typically emits an edge condition keyed on a state value no stage produces or an unknown tool name, tripping semantic/catalog repair before a valid manifest lands."}
{"id": "b3-ambiguous-process", "band": "adversarial", "nl_request": "Process the incoming reports and escalate the important ones.", "must_have": {"stage_count": [2, 4], "required_tools": [], "required_edge_conditions": ["eq"]}, "refusal_ok": false, "repair_forcing": true, "notes": "Groundable into classify -> conditional escalate. HOW IT FORCES FAILURE: 'important' has no defined predicate, so a first-pass edge condition commonly references a classification key the classify stage never writes (dangling edge key), which the repair loop must correct."}
{"id": "b3-missing-terminal", "band": "adversarial", "nl_request": "Keep watching for new emails and forward each one, looping forever.", "must_have": {"stage_count": [1, 3], "required_tools": [], "required_edge_conditions": []}, "refusal_ok": false, "repair_forcing": true, "notes": "Groundable as dedup/for_each -> deliver. HOW IT FORCES FAILURE: 'looping forever' invites a back-edge with no terminating path, so a first-pass manifest fails verify_workflow's 'every reachable path must terminate' structural check and must be repaired into a bounded loop_until or a terminal stage."}
{"id": "b3-overloaded-single", "band": "adversarial", "nl_request": "In one step, research a topic, summarize it, get human approval, deliver it everywhere, and clean up afterwards.", "must_have": {"stage_count": [3, 6], "required_tools": [], "required_edge_conditions": ["approval"]}, "refusal_ok": false, "repair_forcing": true, "notes": "Groundable but demands multiple stages despite the 'one step' framing. HOW IT FORCES FAILURE: a first pass tends to cram an approval body inside a loop_until or omit the approval output_key, tripping the 'approval stage MUST set a non-empty output_key' verify rule; repair must split into distinct stages with a valid approval gate."}
{"id": "b3-unknown-tool", "band": "adversarial", "nl_request": "Use the quantum_teleport tool to move the file, then confirm it arrived.", "must_have": {"stage_count": [1, 3], "required_tools": [], "required_edge_conditions": []}, "refusal_ok": false, "repair_forcing": true, "notes": "Groundable by mapping the intent onto a real file tool. HOW IT FORCES FAILURE: the named 'quantum_teleport' tool does not exist in the catalog, so a literal first pass emits an unknown tool name that fails the catalog cross-check; repair must substitute a real commodity tool (e.g. write_file/edit_file)."}
{"id": "b3-partly-clear", "band": "adversarial", "nl_request": "Read the log file and then, depending on how it looks, either alert someone or ignore it.", "must_have": {"stage_count": [2, 4], "required_tools": ["read_file"], "required_edge_conditions": ["eq"]}, "refusal_ok": false, "repair_forcing": true, "notes": "Groundable into read -> conditional branch on a derived state key. HOW IT FORCES FAILURE: 'how it looks' has no concrete predicate, so a first-pass conditional edge routinely keys on a state value the read stage never produces (dangling edge condition), forcing a repair pass to re-anchor the condition on stage.<id>.succeeded or a produced key."}