use std::collections::HashSet;
use std::fs;
use std::io::IsTerminal;
use std::path::{Path, PathBuf};
use std::time::{Instant, SystemTime, UNIX_EPOCH};
use crate::cli::DemoArgs;
use crate::commands::run::{execute_run, CliLlmMockMode, RunOutcome, RunProfileOptions};
const SCENARIOS: &[Scenario] = &[
Scenario {
id: "merge-captain",
title: "Merge Captain triages 3 PRs",
description: "A merge_captain persona triages three mocked PRs (trivial, risky, buggy), \
asks an LLM per PR, and emits a structured supervision receipt. \
Demonstrates persona supervision, approval gates, and trust receipts.",
script: include_str!("../../assets/demo/merge-captain/scenario.harn"),
tape: include_str!("../../assets/demo/merge-captain/tape.jsonl"),
},
Scenario {
id: "review-captain",
title: "Review Captain inspects a 5-file diff",
description: "A review_captain reviews a 5-file diff, asks one clarifying question \
(HITL surfaced via the receipt), then renders a verdict with \
reasoning. Demonstrates clarifying-question loops and structured \
review receipts.",
script: include_str!("../../assets/demo/review-captain/scenario.harn"),
tape: include_str!("../../assets/demo/review-captain/tape.jsonl"),
},
Scenario {
id: "provider-race",
title: "Provider race with cost attribution",
description: "Race three providers on one prompt with `parallel each`, pick the \
lowest-latency winner, and emit a cost-attribution receipt. \
Previews the routing_policy primitive (#1649).",
script: include_str!("../../assets/demo/provider-race/scenario.harn"),
tape: include_str!("../../assets/demo/provider-race/tape.jsonl"),
},
Scenario {
id: "routing-policy",
title: "routing_policy escalates a cheap chain to a frontier link",
description: "Drive the v0.8.40 routing_policy primitive through three mocked tasks: a \
clean cheap-link success, a 429 that fails over to the frontier, and a \
TODO-poisoned cheap reply that the lint verifier escalates. Demonstrates \
per-attempt routing receipts, failover, and verifier-signal escalation \
(canary scenario for the demo gate, #2437).",
script: include_str!("../../assets/demo/routing-policy/scenario.harn"),
tape: include_str!("../../assets/demo/routing-policy/tape.jsonl"),
},
Scenario {
id: "stdlib-toolkit",
title: "stdlib toolkit assembles an XML system-prompt context",
description: "Walk the new clone / deep_merge / unique / dict_from_pairs / to_xml / \
from_xml / word_wrap / indent / repeat built-ins through a realistic \
pre-flight render: layer per-task overrides onto a shared defaults dict, \
dedupe the operator's `previous_chats` list, emit an XML `<context>` \
block, round-trip it back through the parser, and frame the result in a \
60-column prompt margin. Fully offline.",
script: include_str!("../../assets/demo/stdlib-toolkit/scenario.harn"),
tape: include_str!("../../assets/demo/stdlib-toolkit/tape.jsonl"),
},
Scenario {
id: "command-capture",
title: "run_command preserves a slow command's full output past a `| tail` filter",
description: "Walk the std/agent/command_capture recognizer: rewrite `producer | tail/wc/grep` \
pipelines to `producer | tee '<capture>' 2>/dev/null | filter` so the agent \
still sees the filtered output while the producer's COMPLETE output is \
preserved on disk, show the cases it deliberately leaves untouched (head, \
grep -q, command substitution, subshell grouping), then materialize a capture \
and demonstrate the post-run `output_capture` hint that lets an agent read the \
full output instead of re-running a slow command. Fully offline — no LLM, no \
subprocess.",
script: include_str!("../../assets/demo/command-capture/scenario.harn"),
tape: include_str!("../../assets/demo/command-capture/tape.jsonl"),
},
Scenario {
id: "compaction-policy",
title: "compaction.{policy,check,run} drives a session through the lifecycle",
description: "Declare a per-session compaction policy with thresholds, call \
`compaction.check` to get a `compact_now` / `defer` decision, then drive \
the canonical #2323 lifecycle via `compaction.run`. Demonstrates the \
lifted-from-TUI policy primitive (#2505) entirely offline using a custom \
summarize closure.",
script: include_str!("../../assets/demo/compaction-policy/scenario.harn"),
tape: include_str!("../../assets/demo/compaction-policy/tape.jsonl"),
},
Scenario {
id: "mcp-host",
title: "harn.mcp.* host primitive lazy-spawn + status round-trip",
description: "Drive the supervised MCP-host primitive (#2504): register two lazy MCP \
server specs, snapshot the supervision status (restart_count, circuit, \
cache_entries), stop one of them, and re-snapshot. Stays fully offline \
because lazy spawn doesn't try to connect. The receipt asserts every \
initial entry starts with the circuit closed and cache empty — the \
invariants downstream observability hooks lean on.",
script: include_str!("../../assets/demo/mcp-host/scenario.harn"),
tape: include_str!("../../assets/demo/mcp-host/tape.jsonl"),
},
Scenario {
id: "http-transport",
title: "http_etag / http_choose / http_not_modified / http_upgrade_ws / http_push_hints",
description: "Drive the A.12 transport-completeness builtins (#2515) offline: pick a \
content type from a simulated `Accept` header via `http_choose`, derive a \
strong ETag from the JSON payload, build the matching `http_not_modified` \
envelope, assemble the `http_upgrade_ws` envelope with subprotocol \
negotiation, and decorate an `http_ok` envelope with `http_push_hints` so \
the codec emits one `Link: <path>; rel=preload; as=...` header per asset. \
The conformance test in `crates/harn-serve/tests/\
transport_conformance.rs` exercises the live HTTP / WS path; the demo \
keeps the offline smoke covered.",
script: include_str!("../../assets/demo/http-transport/scenario.harn"),
tape: include_str!("../../assets/demo/http-transport/tape.jsonl"),
},
Scenario {
id: "harn-site",
title: "harn serve site — a .harn file answers its own HTTP routes",
description: "Drive the `harn serve site` handler contract (#2574) offline: a routed \
`pub fn` receives a request dict and returns an `http_*` envelope. The \
scenario calls a GET handler, a POST handler that echoes its body, and a \
conditional GET that returns 200 then 304 via `http_not_modified`, plus the \
`on_message` WebSocket frame callback — capturing each status in a receipt. \
The live socket path (routing, multipart, WS upgrade) is covered by \
`crates/harn-serve/tests/site_hosting.rs`; the demo keeps the offline \
handler-contract smoke covered.",
script: include_str!("../../assets/demo/harn-site/scenario.harn"),
tape: include_str!("../../assets/demo/harn-site/tape.jsonl"),
},
Scenario {
id: "obs-primitive",
title: "harness.obs.* spans + counter/histogram/gauge + audit roundtrip",
description: "Drive the standardized observability primitive (#2513): open a span over a \
simulated `harn.session.put`, record one of each instrument variant \
(counter / histogram / gauge), emit a structured log inside the span, then \
drain the in-process buffer and surface a receipt of events-by-kind plus \
the bound request_id. Validates vocabulary at emit time and routes through \
the `test` backend so the scenario stays fully offline.",
script: include_str!("../../assets/demo/obs-primitive/scenario.harn"),
tape: include_str!("../../assets/demo/obs-primitive/tape.jsonl"),
},
Scenario {
id: "edit-rename-symbol",
title: "edit.rename_symbol rewrites a Rust struct across the workspace",
description: "Stage a tiny Rust workspace, build the typed symbol graph (#2434), then \
drive `edit_rename_symbol` (#2508) through dry-run + applied + conflict \
paths: a workspace-scoped plan with per-edit byte/(row,col) spans, the \
same plan committed for real with identifier-context rewrites (skipping \
string literals and comments), and a follow-up rename whose new name \
already shadows another identifier — host short-circuits without \
touching disk. Fully offline.",
script: include_str!("../../assets/demo/edit-rename-symbol/scenario.harn"),
tape: include_str!("../../assets/demo/edit-rename-symbol/tape.jsonl"),
},
Scenario {
id: "edit-language-coverage",
title: "edit.capabilities + apply_node span the B.7 tier-1 languages",
description: "Read the per-language AST-precise edit capability matrix (#2519), show \
graceful degradation for a language with no grammar (Dockerfile returns \
`unsupported_language` plus a text-edit fallback), then drive \
`edit_apply_node` (dry-run) against bundled JSON and CSS seeds to prove a \
format-preserving edit round-trips on the data/markup grammars added in \
B.7. Fully offline.",
script: include_str!("../../assets/demo/edit-language-coverage/scenario.harn"),
tape: include_str!("../../assets/demo/edit-language-coverage/tape.jsonl"),
},
Scenario {
id: "edit-refactor",
title: "edit.extract_function / add_parameter / change_return_type preview as diffs",
description: "Drive the B.8 structured-refactoring primitives (#2520) offline against a \
bundled seed workspace, all in dry-run mode: extract two statements of a \
Python function into `compute_subtotal` (capturing `base`/`qty` but not the \
module-level `audit`), append a trailing parameter to a Rust function and \
fill the argument at all three call sites, and rewrite that function's \
return type — each previewed as a unified diff against a throw-away \
staged-fs overlay so no bytes hit disk. The conformance test in \
`crates/harn-vm`/`conformance` exercises the apply path; this demo keeps \
the offline preview smoke covered.",
script: include_str!("../../assets/demo/edit-refactor/scenario.harn"),
tape: include_str!("../../assets/demo/edit-refactor/tape.jsonl"),
},
Scenario {
id: "prompt-guidance",
title: "tool guidance rides with the tool, and the prompt is auditable",
description: "Drive the unified prompt-fragment assembler: a tool carries a `guidance` \
string that the runtime injects as a capability-gated system-prompt fragment \
(`requires_tools: [<that tool>]`), so a 'always update the TODO tracker' \
instruction appears only when the todo tool is present and never otherwise — \
instruction and tool share one source of truth and cannot drift. Calls \
`prompt_explain(options)` with and without the tool, prints the assembled \
system string plus the per-fragment provenance (included/excluded + reason + \
bytes) an operator inspects, and proves the only difference between the two \
prompts is exactly the gated fragment. Fully offline — no LLM, no network.",
script: include_str!("../../assets/demo/prompt-guidance/scenario.harn"),
tape: include_str!("../../assets/demo/prompt-guidance/tape.jsonl"),
},
];
#[derive(Clone, Copy)]
struct Scenario {
id: &'static str,
title: &'static str,
description: &'static str,
script: &'static str,
tape: &'static str,
}
pub fn scenario_ids() -> Vec<&'static str> {
SCENARIOS.iter().map(|s| s.id).collect()
}
pub(crate) async fn run(args: DemoArgs) -> i32 {
if args.list {
print_list_table(args.json);
return 0;
}
let Some(scenario_id) = args.scenario.clone() else {
if args.json {
print_list_table(true);
return 0;
}
if std::io::stdout().is_terminal() {
return interactive_pick(&args).await;
}
return run_scenario(&args, SCENARIOS[0]).await;
};
let Some(scenario) = lookup_scenario(&scenario_id) else {
eprintln!("error: unknown scenario `{scenario_id}`");
eprintln!();
print_list_table(false);
return 2;
};
run_scenario(&args, *scenario).await
}
fn lookup_scenario(id: &str) -> Option<&'static Scenario> {
SCENARIOS.iter().find(|s| s.id == id)
}
fn print_list_table(as_json: bool) {
if as_json {
let entries: Vec<serde_json::Value> = SCENARIOS
.iter()
.map(|s| {
serde_json::json!({
"id": s.id,
"title": s.title,
"description": s.description,
})
})
.collect();
println!(
"{}",
serde_json::to_string_pretty(&serde_json::json!({"scenarios": entries}))
.unwrap_or_default()
);
return;
}
println!("Available demos (replayable offline, no API keys required):");
println!();
for s in SCENARIOS {
println!(" {:<16} {}", s.id, s.title);
for line in wrap_text(s.description, 70) {
println!(" {line}");
}
println!();
}
println!("Run a scenario: harn demo <id>");
println!("Use real provider: harn demo <id> --live");
}
fn wrap_text(text: &str, width: usize) -> Vec<String> {
let mut lines = Vec::new();
let mut current = String::new();
for word in text.split_whitespace() {
if current.is_empty() {
current.push_str(word);
continue;
}
if current.len() + 1 + word.len() > width {
lines.push(std::mem::take(&mut current));
current.push_str(word);
} else {
current.push(' ');
current.push_str(word);
}
}
if !current.is_empty() {
lines.push(current);
}
lines
}
async fn interactive_pick(args: &DemoArgs) -> i32 {
use std::io::Write;
println!("Pick a Harn demo (offline replay, no API keys required):");
println!();
for (idx, s) in SCENARIOS.iter().enumerate() {
println!(" {}) {:<16} {}", idx + 1, s.id, s.title);
}
println!();
print!("Choice [1-{}, default 1]: ", SCENARIOS.len());
let _ = std::io::stdout().flush();
let mut buf = String::new();
let n = std::io::stdin().read_line(&mut buf).unwrap_or(0);
let trimmed = buf.trim();
let pick: usize = if n == 0 || trimmed.is_empty() {
1
} else {
match trimmed.parse::<usize>() {
Ok(n) if (1..=SCENARIOS.len()).contains(&n) => n,
_ => {
eprintln!("error: invalid selection `{trimmed}`");
return 2;
}
}
};
run_scenario(args, SCENARIOS[pick - 1]).await
}
async fn run_scenario(args: &DemoArgs, scenario: Scenario) -> i32 {
let staged = match stage_scenario(scenario) {
Ok(s) => s,
Err(error) => {
eprintln!("error: {error}");
return 1;
}
};
if !args.json {
println!("=== harn demo · {} ===", scenario.id);
println!("{}", scenario.title);
println!();
if !args.live {
println!("(offline replay — no API keys required)");
println!();
}
}
let llm_mock_mode = if args.live {
if !args.json {
println!("(--live: routing through the configured provider — set HARN_LLM_PROVIDER if none is wired)");
println!();
}
CliLlmMockMode::Off
} else {
CliLlmMockMode::Replay {
fixture_path: staged.tape_path.clone(),
}
};
let started = Instant::now();
let outcome = execute_run(
staged.script_path.to_string_lossy().as_ref(),
false,
HashSet::new(),
Vec::new(),
Vec::new(),
llm_mock_mode,
None,
RunProfileOptions::default(),
)
.await;
let elapsed = started.elapsed();
if !args.json && !outcome.stdout.is_empty() {
print!("{}", outcome.stdout);
}
if !outcome.stderr.is_empty() {
eprint!("{}", outcome.stderr);
}
if outcome.exit_code != 0 {
if !args.json {
eprintln!(
"error: demo `{}` failed (exit {})",
scenario.id, outcome.exit_code
);
if args.live && live_failure_looks_like_provider_misconfig(&outcome) {
eprintln!();
eprintln!("hint: --live needs a configured LLM provider. Re-run without --live");
eprintln!(" to use the bundled offline tape, or run `harn quickstart`");
eprintln!(" to wire a provider.");
}
} else {
print_json_summary(scenario, &outcome, elapsed.as_millis(), None);
}
return outcome.exit_code;
}
let receipt_dir = if args.no_record {
None
} else {
match write_run_record(scenario, &outcome) {
Ok(path) => Some(path),
Err(error) => {
eprintln!("warning: failed to write demo run record: {error}");
None
}
}
};
if args.json {
print_json_summary(
scenario,
&outcome,
elapsed.as_millis(),
receipt_dir.as_deref(),
);
} else {
println!();
println!("--- demo complete in {} ms ---", elapsed.as_millis());
if let Some(dir) = &receipt_dir {
println!(" run record: {}", dir.join("run.json").display());
}
println!();
println!("Next steps:");
println!(" harn demo --list list every bundled scenario");
if !args.live {
println!(
" harn demo {} --live run again against the configured provider",
scenario.id
);
}
println!(" harn portal browse run records in the UI");
}
0
}
struct StagedScenario {
_temp_root: tempfile::TempDir,
script_path: PathBuf,
tape_path: PathBuf,
}
fn stage_scenario(scenario: Scenario) -> Result<StagedScenario, String> {
let dir = tempfile::Builder::new()
.prefix(&format!("harn-demo-{}-", scenario.id))
.tempdir()
.map_err(|e| format!("failed to create demo tempdir: {e}"))?;
let script_path = dir.path().join(format!("{}.harn", scenario.id));
let tape_path = dir.path().join(format!("{}.tape.jsonl", scenario.id));
fs::write(&script_path, scenario.script)
.map_err(|e| format!("failed to stage demo script: {e}"))?;
fs::write(&tape_path, scenario.tape).map_err(|e| format!("failed to stage demo tape: {e}"))?;
Ok(StagedScenario {
_temp_root: dir,
script_path,
tape_path,
})
}
fn write_run_record(scenario: Scenario, outcome: &RunOutcome) -> Result<PathBuf, String> {
let cwd = std::env::current_dir().map_err(|e| format!("cwd: {e}"))?;
let runs_root = cwd.join(".harn-runs");
let ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let started_iso = time::OffsetDateTime::from_unix_timestamp(ts as i64)
.ok()
.and_then(|t| {
t.format(&time::format_description::well_known::Rfc3339)
.ok()
})
.unwrap_or_else(|| format!("1970-01-01T00:00:{ts:02}Z"));
let dir = runs_root.join(format!("demo-{}-{ts}", scenario.id));
fs::create_dir_all(&dir).map_err(|e| format!("create {}: {e}", dir.display()))?;
let record = serde_json::json!({
"_type": "run_record",
"id": format!("demo-{}-{ts}", scenario.id),
"workflow_id": format!("harn-demo:{}", scenario.id),
"workflow_name": scenario.title,
"task": scenario.id,
"status": if outcome.exit_code == 0 { "complete" } else { "failed" },
"started_at": started_iso,
"finished_at": started_iso,
"stages": [],
"transitions": [],
"checkpoints": [],
"pending_nodes": [],
"completed_nodes": [],
"child_runs": [],
"artifacts": [],
"policy": {},
"metadata": {
"demo": {
"scenario": scenario.id,
"title": scenario.title,
"description": scenario.description,
"exit_code": outcome.exit_code,
"stdout": outcome.stdout,
"stderr": outcome.stderr,
"recorded_at_unix_seconds": ts,
}
},
});
let path = dir.join("run.json");
fs::write(
&path,
serde_json::to_string_pretty(&record).unwrap_or_default(),
)
.map_err(|e| format!("write {}: {e}", path.display()))?;
Ok(dir)
}
fn live_failure_looks_like_provider_misconfig(outcome: &RunOutcome) -> bool {
let blob = format!("{}{}", outcome.stderr, outcome.stdout);
blob.contains("category: auth")
|| blob.contains("auth_failure")
|| blob.contains("HTTP 401")
|| blob.contains("HTTP 403")
|| blob.contains("api_key")
|| blob.contains("HARN_LLM_PROVIDER")
|| blob.contains("no provider configured")
}
fn print_json_summary(
scenario: Scenario,
outcome: &RunOutcome,
elapsed_ms: u128,
record_dir: Option<&Path>,
) {
let record = serde_json::json!({
"scenario": scenario.id,
"title": scenario.title,
"exit_code": outcome.exit_code,
"elapsed_ms": elapsed_ms,
"stdout": outcome.stdout,
"stderr": outcome.stderr,
"run_record_dir": record_dir.map(|p| p.display().to_string()),
});
println!(
"{}",
serde_json::to_string_pretty(&record).unwrap_or_default()
);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scenarios_have_unique_nonempty_ids() {
let mut seen = HashSet::new();
for s in SCENARIOS {
assert!(!s.id.is_empty(), "scenario id is empty");
assert!(!s.title.is_empty(), "scenario {} has empty title", s.id);
assert!(
!s.description.is_empty(),
"scenario {} has empty description",
s.id
);
assert!(!s.script.is_empty(), "scenario {} script is empty", s.id);
assert!(!s.tape.is_empty(), "scenario {} tape is empty", s.id);
assert!(seen.insert(s.id), "duplicate scenario id: {}", s.id);
}
}
#[test]
fn scenario_tape_lines_parse_as_json() {
for s in SCENARIOS {
for (i, line) in s.tape.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
serde_json::from_str::<serde_json::Value>(line).unwrap_or_else(|e| {
panic!("scenario {} tape line {} is invalid JSON: {e}", s.id, i + 1)
});
}
}
}
#[test]
fn scenario_ids_match_assets_dir_names() {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let assets = std::path::Path::new(manifest_dir).join("assets/demo");
for s in SCENARIOS {
let dir = assets.join(s.id);
assert!(dir.is_dir(), "missing demo asset dir for {}", s.id);
assert!(
dir.join("scenario.harn").is_file(),
"missing scenario.harn for {}",
s.id
);
assert!(
dir.join("tape.jsonl").is_file(),
"missing tape.jsonl for {}",
s.id
);
}
}
}