1#![recursion_limit = "256"]
2
3pub mod acp;
4pub mod cli;
5pub mod commands;
6pub mod config;
7pub mod env_guard;
8pub mod format;
9pub mod package;
10mod provider_bootstrap;
11pub mod skill_loader;
12pub mod skill_provenance;
13pub mod test_runner;
14#[doc(hidden)]
15pub mod tests;
16
17use clap::{error::ErrorKind, CommandFactory, Parser as ClapParser};
18use std::path::{Path, PathBuf};
19use std::sync::Arc;
20use std::{env, fs, process, thread};
21
22use cli::{
23 Cli, Command, CompletionShell, EvalCommand, MergeCaptainCommand, MergeCaptainMockCommand,
24 ModelInfoArgs, PackageArtifactsCommand, PackageCacheCommand, PackageCommand, PersonaCommand,
25 PersonaSupervisionCommand, ProvidersCommand, RunsCommand, ServeCommand, SkillCommand,
26 SkillKeyCommand, SkillTrustCommand, SkillsCommand, ToolCommand,
27};
28use harn_lexer::Lexer;
29use harn_parser::{DiagnosticSeverity, Parser, TypeChecker};
30
31pub const CLI_RUNTIME_STACK_SIZE: usize = 16 * 1024 * 1024;
32
33#[cfg(feature = "hostlib")]
34pub(crate) fn install_default_hostlib(vm: &mut harn_vm::Vm) {
35 let _ = harn_hostlib::install_default(vm);
36}
37
38#[cfg(not(feature = "hostlib"))]
39pub(crate) fn install_default_hostlib(_vm: &mut harn_vm::Vm) {}
40
41pub fn run() {
44 let handle = thread::Builder::new()
45 .name("harn-cli".to_string())
46 .stack_size(CLI_RUNTIME_STACK_SIZE)
47 .spawn(|| {
48 let runtime = tokio::runtime::Builder::new_multi_thread()
49 .enable_all()
50 .build()
51 .unwrap_or_else(|error| {
52 eprintln!("failed to start async runtime: {error}");
53 process::exit(1);
54 });
55 runtime.block_on(async_main());
56 })
57 .unwrap_or_else(|error| {
58 eprintln!("failed to start CLI runtime thread: {error}");
59 process::exit(1);
60 });
61
62 if let Err(payload) = handle.join() {
63 std::panic::resume_unwind(payload);
64 }
65}
66
67async fn async_main() {
68 let raw_args = normalize_serve_args(env::args().collect());
69 if raw_args.len() == 2 && raw_args[1].ends_with(".harn") {
70 provider_bootstrap::maybe_seed_ollama_for_run_file(Path::new(&raw_args[1]), false, false)
71 .await;
72 commands::run::run_file(
73 &raw_args[1],
74 false,
75 std::collections::HashSet::new(),
76 Vec::new(),
77 commands::run::CliLlmMockMode::Off,
78 None,
79 commands::run::RunProfileOptions::default(),
80 )
81 .await;
82 return;
83 }
84
85 let cli = match Cli::try_parse_from(&raw_args) {
86 Ok(cli) => cli,
87 Err(error) => {
88 if matches!(
89 error.kind(),
90 ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
91 ) {
92 error.exit();
93 }
94 error.exit();
95 }
96 };
97
98 match cli.command.expect("clap requires a command") {
99 Command::Version => print_version(),
100 Command::Upgrade(args) => {
101 if let Err(error) = commands::upgrade::run(args).await {
102 eprintln!("error: {error}");
103 process::exit(1);
104 }
105 }
106 Command::Skill(args) => match args.command {
107 SkillCommand::Key(key_args) => match key_args.command {
108 SkillKeyCommand::Generate(generate) => commands::skill::run_key_generate(&generate),
109 },
110 SkillCommand::Sign(sign) => commands::skill::run_sign(&sign),
111 SkillCommand::Endorse(endorse) => commands::skill::run_endorse(&endorse),
112 SkillCommand::Verify(verify) => commands::skill::run_verify(&verify),
113 SkillCommand::WhoSigned(who_signed) => {
114 commands::skill::run_who_signed(&who_signed).await
115 }
116 SkillCommand::Trust(trust_args) => match trust_args.command {
117 SkillTrustCommand::Add(add) => commands::skill::run_trust_add(&add),
118 SkillTrustCommand::List(list) => commands::skill::run_trust_list(&list),
119 },
120 SkillCommand::New(new_args) => commands::skills::run_new(&new_args),
121 },
122 Command::Run(args) => {
123 if !args.explain_cost {
124 match (args.eval.as_deref(), args.file.as_deref()) {
125 (Some(code), None) => {
126 provider_bootstrap::maybe_seed_ollama_for_inline(
127 code,
128 args.yes,
129 args.llm_mock.is_some(),
130 )
131 .await;
132 }
133 (None, Some(file)) => {
134 provider_bootstrap::maybe_seed_ollama_for_run_file(
135 Path::new(file),
136 args.yes,
137 args.llm_mock.is_some(),
138 )
139 .await;
140 }
141 _ => {}
142 }
143 }
144 let denied =
145 commands::run::build_denied_builtins(args.deny.as_deref(), args.allow.as_deref());
146 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
147 commands::run::CliLlmMockMode::Replay {
148 fixture_path: PathBuf::from(path),
149 }
150 } else if let Some(path) = args.llm_mock_record.as_ref() {
151 commands::run::CliLlmMockMode::Record {
152 fixture_path: PathBuf::from(path),
153 }
154 } else {
155 commands::run::CliLlmMockMode::Off
156 };
157 let attestation = args.attest.then(|| commands::run::RunAttestationOptions {
158 receipt_out: args.receipt_out.as_ref().map(PathBuf::from),
159 agent_id: args.attest_agent.clone(),
160 });
161 let profile_options = run_profile_options(&args.profile);
162
163 match (args.eval.as_deref(), args.file.as_deref()) {
164 (Some(code), None) => {
165 let (wrapped, tmp) = commands::run::prepare_eval_temp_file(code)
166 .unwrap_or_else(|e| command_error(&e));
167 let tmp_path: PathBuf = tmp.path().to_path_buf();
168 fs::write(&tmp_path, &wrapped).unwrap_or_else(|e| {
169 command_error(&format!("failed to write temp file for -e: {e}"))
170 });
171 let tmp_str = tmp_path.to_string_lossy().into_owned();
172 if args.explain_cost {
173 commands::run::run_explain_cost_file_with_skill_dirs(&tmp_str);
174 } else {
175 commands::run::run_file_with_skill_dirs(
176 &tmp_str,
177 args.trace,
178 denied,
179 args.argv.clone(),
180 args.skill_dir.clone(),
181 llm_mock_mode.clone(),
182 attestation.clone(),
183 profile_options.clone(),
184 )
185 .await;
186 }
187 drop(tmp);
188 }
189 (None, Some(file)) => {
190 if args.explain_cost {
191 commands::run::run_explain_cost_file_with_skill_dirs(file);
192 } else {
193 commands::run::run_file_with_skill_dirs(
194 file,
195 args.trace,
196 denied,
197 args.argv.clone(),
198 args.skill_dir.clone(),
199 llm_mock_mode,
200 attestation,
201 profile_options,
202 )
203 .await
204 }
205 }
206 (Some(_), Some(_)) => command_error(
207 "`harn run` accepts either `-e <code>` or `<file.harn>`, not both",
208 ),
209 (None, None) => {
210 command_error("`harn run` requires either `-e <code>` or `<file.harn>`")
211 }
212 }
213 }
214 Command::Check(args) => {
215 if args.provider_matrix {
216 let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
217 let extensions = package::load_runtime_extensions(&cwd);
218 package::install_runtime_extensions(&extensions);
219 commands::check::provider_matrix::run(args.format, args.filter.as_deref());
220 return;
221 }
222 if args.connector_matrix {
223 commands::check::connector_matrix::run(
224 args.format,
225 args.filter.as_deref(),
226 &args.targets,
227 );
228 return;
229 }
230 let mut target_strings: Vec<String> = args.targets.clone();
231 if args.workspace {
232 let anchor = target_strings.first().map(Path::new);
233 match package::load_workspace_config(anchor) {
234 Some((workspace, manifest_dir)) if !workspace.pipelines.is_empty() => {
235 for pipeline in &workspace.pipelines {
236 let candidate = Path::new(pipeline);
237 let resolved = if candidate.is_absolute() {
238 candidate.to_path_buf()
239 } else {
240 manifest_dir.join(candidate)
241 };
242 target_strings.push(resolved.to_string_lossy().into_owned());
243 }
244 }
245 Some(_) => command_error(
246 "--workspace requires `[workspace].pipelines` in the nearest harn.toml",
247 ),
248 None => command_error(
249 "--workspace could not find a harn.toml walking up from the target(s)",
250 ),
251 }
252 }
253 if target_strings.is_empty() {
254 command_error(
255 "`harn check` requires at least one target path, or `--workspace` with `[workspace].pipelines`",
256 );
257 }
258 for target in &target_strings {
259 if let Err(error) = package::validate_runtime_manifest_extensions(Path::new(target))
260 {
261 command_error(&format!("manifest extension validation failed: {error}"));
262 }
263 }
264 let targets: Vec<&str> = target_strings.iter().map(String::as_str).collect();
265 let files = commands::check::collect_harn_targets(&targets);
266 if files.is_empty() {
267 command_error("no .harn files found under the given target(s)");
268 }
269 let module_graph = commands::check::build_module_graph(&files);
270 let cross_file_imports = commands::check::collect_cross_file_imports(&module_graph);
271 let mut should_fail = false;
272 for file in &files {
273 let mut config = package::load_check_config(Some(file));
274 if let Some(path) = args.host_capabilities.as_ref() {
275 config.host_capabilities_path = Some(path.clone());
276 }
277 if let Some(path) = args.bundle_root.as_ref() {
278 config.bundle_root = Some(path.clone());
279 }
280 if args.strict_types {
281 config.strict_types = true;
282 }
283 if let Some(sev) = args.preflight.as_deref() {
284 config.preflight_severity = Some(sev.to_string());
285 }
286 let outcome = commands::check::check_file_inner(
287 file,
288 &config,
289 &cross_file_imports,
290 &module_graph,
291 args.invariants,
292 );
293 should_fail |= outcome.should_fail(config.strict);
294 }
295 if should_fail {
296 process::exit(1);
297 }
298 }
299 Command::Config(args) => {
300 if let Err(error) = commands::config_cmd::run(args).await {
301 command_error(&error);
302 }
303 }
304 Command::Explain(args) => {
305 let code = commands::explain::run_explain(&args);
306 if code != 0 {
307 process::exit(code);
308 }
309 }
310 Command::Contracts(args) => {
311 commands::contracts::handle_contracts_command(args).await;
312 }
313 Command::Connect(args) => {
314 commands::connect::run_connect(*args).await;
315 }
316 Command::Lint(args) => {
317 let targets: Vec<&str> = args.targets.iter().map(String::as_str).collect();
318 let files = commands::check::collect_harn_targets(&targets);
319 let prompt_files = commands::check::collect_prompt_targets(&targets);
320 if files.is_empty() && prompt_files.is_empty() {
321 command_error("no .harn or .harn.prompt files found under the given target(s)");
322 }
323 let module_graph = commands::check::build_module_graph(&files);
324 let cross_file_imports = commands::check::collect_cross_file_imports(&module_graph);
325 if args.fix {
326 for file in &files {
327 let mut config = package::load_check_config(Some(file));
328 commands::check::apply_harn_lint_config(file, &mut config);
329 let require_header = args.require_file_header
330 || commands::check::harn_lint_require_file_header(file);
331 let complexity_threshold =
332 commands::check::harn_lint_complexity_threshold(file);
333 let persona_step_allowlist =
334 commands::check::harn_lint_persona_step_allowlist(file);
335 commands::check::lint_fix_file(
336 file,
337 &config,
338 &cross_file_imports,
339 &module_graph,
340 require_header,
341 complexity_threshold,
342 &persona_step_allowlist,
343 );
344 }
345 for file in &prompt_files {
346 let threshold =
347 commands::check::harn_lint_template_variant_branch_threshold(file);
348 let disabled = commands::check::harn_lint_disabled_rules(file);
349 commands::check::lint_prompt_file_inner(file, threshold, &disabled);
354 }
355 } else {
356 let mut should_fail = false;
357 for file in &files {
358 let mut config = package::load_check_config(Some(file));
359 commands::check::apply_harn_lint_config(file, &mut config);
360 let require_header = args.require_file_header
361 || commands::check::harn_lint_require_file_header(file);
362 let complexity_threshold =
363 commands::check::harn_lint_complexity_threshold(file);
364 let persona_step_allowlist =
365 commands::check::harn_lint_persona_step_allowlist(file);
366 let outcome = commands::check::lint_file_inner(
367 file,
368 &config,
369 &cross_file_imports,
370 &module_graph,
371 require_header,
372 complexity_threshold,
373 &persona_step_allowlist,
374 );
375 should_fail |= outcome.should_fail(config.strict);
376 }
377 for file in &prompt_files {
378 let threshold =
379 commands::check::harn_lint_template_variant_branch_threshold(file);
380 let disabled = commands::check::harn_lint_disabled_rules(file);
381 let config = package::load_check_config(Some(file));
382 let outcome =
383 commands::check::lint_prompt_file_inner(file, threshold, &disabled);
384 should_fail |= outcome.should_fail(config.strict);
385 }
386 if should_fail {
387 process::exit(1);
388 }
389 }
390 }
391 Command::Fmt(args) => {
392 let targets: Vec<&str> = args.targets.iter().map(String::as_str).collect();
393 let anchor = targets.first().map(Path::new).unwrap_or(Path::new("."));
396 let loaded = match config::load_for_path(anchor) {
397 Ok(c) => c,
398 Err(e) => {
399 eprintln!("warning: {e}");
400 config::HarnConfig::default()
401 }
402 };
403 let mut opts = harn_fmt::FmtOptions::default();
404 if let Some(w) = loaded.fmt.line_width {
405 opts.line_width = w;
406 }
407 if let Some(w) = loaded.fmt.separator_width {
408 opts.separator_width = w;
409 }
410 if let Some(w) = args.line_width {
411 opts.line_width = w;
412 }
413 if let Some(w) = args.separator_width {
414 opts.separator_width = w;
415 }
416 commands::check::fmt_targets(
417 &targets,
418 commands::check::FmtMode::from_check_flag(args.check),
419 &opts,
420 );
421 }
422 Command::Test(args) => {
423 if args.target.as_deref() == Some("agents-conformance") {
424 if args.selection.is_some() {
425 command_error(
426 "`harn test agents-conformance` does not accept a second positional target; use --category instead",
427 );
428 }
429 if args.evals || args.determinism || args.record || args.replay || args.watch {
430 command_error(
431 "`harn test agents-conformance` cannot be combined with --evals, --determinism, --record, --replay, or --watch",
432 );
433 }
434 let Some(target_url) = args.agents_target.clone() else {
435 command_error("`harn test agents-conformance` requires --target <url>");
436 };
437 commands::agents_conformance::run_agents_conformance(
438 commands::agents_conformance::AgentsConformanceConfig {
439 target_url,
440 api_key: args.agents_api_key.clone(),
441 categories: args.agents_category.clone(),
442 timeout_ms: args.timeout,
443 verbose: args.verbose,
444 json: args.json,
445 json_out: args.json_out.clone(),
446 workspace_id: args.agents_workspace_id.clone(),
447 session_id: args.agents_session_id.clone(),
448 },
449 )
450 .await;
451 return;
452 }
453 if args.target.as_deref() == Some("protocols") {
454 if args.evals || args.determinism || args.record || args.replay || args.watch {
455 command_error(
456 "`harn test protocols` cannot be combined with --evals, --determinism, --record, --replay, or --watch",
457 );
458 }
459 if args.junit.is_some()
460 || args.agents_target.is_some()
461 || args.agents_api_key.is_some()
462 || !args.agents_category.is_empty()
463 || args.json
464 || args.json_out.is_some()
465 || args.agents_workspace_id.is_some()
466 || args.agents_session_id.is_some()
467 || args.parallel
468 || !args.skill_dir.is_empty()
469 {
470 command_error(
471 "`harn test protocols` accepts only --filter, --verbose, --timing, and an optional fixture selection",
472 );
473 }
474 commands::protocol_conformance::run_protocol_conformance(
475 args.selection.as_deref(),
476 args.filter.as_deref(),
477 args.verbose || args.timing,
478 );
479 return;
480 }
481 if args.evals {
482 if args.determinism || args.record || args.replay || args.watch {
483 command_error("--evals cannot be combined with --determinism, --record, --replay, or --watch");
484 }
485 if args.target.as_deref() != Some("package") || args.selection.is_some() {
486 command_error("package evals are run with `harn test package --evals`");
487 }
488 run_package_evals();
489 } else if args.determinism {
490 let cli_skill_dirs: Vec<PathBuf> =
491 args.skill_dir.iter().map(PathBuf::from).collect();
492 if args.watch {
493 command_error("--determinism cannot be combined with --watch");
494 }
495 if args.record || args.replay {
496 command_error("--determinism manages its own record/replay cycle");
497 }
498 if let Some(t) = args.target.as_deref() {
499 if t == "conformance" {
500 commands::test::run_conformance_determinism_tests(
501 t,
502 args.selection.as_deref(),
503 args.filter.as_deref(),
504 args.timeout,
505 &cli_skill_dirs,
506 )
507 .await;
508 } else if args.selection.is_some() {
509 command_error(
510 "only `harn test conformance` accepts a second positional target",
511 );
512 } else {
513 commands::test::run_determinism_tests(
514 t,
515 args.filter.as_deref(),
516 args.timeout,
517 &cli_skill_dirs,
518 )
519 .await;
520 }
521 } else {
522 let test_dir = if PathBuf::from("tests").is_dir() {
523 "tests".to_string()
524 } else {
525 command_error("no path specified and no tests/ directory found");
526 };
527 if args.selection.is_some() {
528 command_error(
529 "only `harn test conformance` accepts a second positional target",
530 );
531 }
532 commands::test::run_determinism_tests(
533 &test_dir,
534 args.filter.as_deref(),
535 args.timeout,
536 &cli_skill_dirs,
537 )
538 .await;
539 }
540 } else {
541 let cli_skill_dirs: Vec<PathBuf> =
542 args.skill_dir.iter().map(PathBuf::from).collect();
543 if args.record {
544 harn_vm::llm::set_replay_mode(
545 harn_vm::llm::LlmReplayMode::Record,
546 ".harn-fixtures",
547 );
548 } else if args.replay {
549 harn_vm::llm::set_replay_mode(
550 harn_vm::llm::LlmReplayMode::Replay,
551 ".harn-fixtures",
552 );
553 }
554
555 if let Some(t) = args.target.as_deref() {
556 if t == "conformance" {
557 commands::test::run_conformance_tests(
558 t,
559 args.selection.as_deref(),
560 args.filter.as_deref(),
561 args.junit.as_deref(),
562 args.timeout,
563 commands::test::ConformanceRunOptions {
564 verbose: args.verbose,
565 timing: args.timing,
566 differential_optimizations: args.differential_optimizations,
567 cli_skill_dirs: &cli_skill_dirs,
568 },
569 )
570 .await;
571 } else if args.selection.is_some() {
572 command_error(
573 "only `harn test conformance` accepts a second positional target",
574 );
575 } else if args.watch {
576 commands::test::run_watch_tests(
577 t,
578 args.filter.as_deref(),
579 args.timeout,
580 args.parallel,
581 &cli_skill_dirs,
582 )
583 .await;
584 } else {
585 commands::test::run_user_tests(
586 t,
587 args.filter.as_deref(),
588 args.timeout,
589 args.parallel,
590 &cli_skill_dirs,
591 )
592 .await;
593 }
594 } else {
595 let test_dir = if PathBuf::from("tests").is_dir() {
596 "tests".to_string()
597 } else {
598 command_error("no path specified and no tests/ directory found");
599 };
600 if args.selection.is_some() {
601 command_error(
602 "only `harn test conformance` accepts a second positional target",
603 );
604 }
605 if args.watch {
606 commands::test::run_watch_tests(
607 &test_dir,
608 args.filter.as_deref(),
609 args.timeout,
610 args.parallel,
611 &cli_skill_dirs,
612 )
613 .await;
614 } else {
615 commands::test::run_user_tests(
616 &test_dir,
617 args.filter.as_deref(),
618 args.timeout,
619 args.parallel,
620 &cli_skill_dirs,
621 )
622 .await;
623 }
624 }
625 }
626 }
627 Command::Init(args) => commands::init::init_project(args.name.as_deref(), args.template),
628 Command::New(args) => match commands::init::resolve_new_args(&args) {
629 Ok((name, template)) => commands::init::init_project(name.as_deref(), template),
630 Err(error) => {
631 eprintln!("error: {error}");
632 process::exit(1);
633 }
634 },
635 Command::Doctor(args) => {
636 commands::doctor::run_doctor_with_options(commands::doctor::DoctorOptions {
637 network: !args.no_network,
638 json: args.json,
639 })
640 .await
641 }
642 Command::Models(args) => commands::models::run(args).await,
643 Command::Local(args) => commands::local::run(args).await,
644 Command::Providers(args) => match args.command {
645 ProvidersCommand::Refresh(refresh) => {
646 if let Err(error) = commands::providers::run_refresh(&refresh).await {
647 command_error(&error);
648 }
649 }
650 ProvidersCommand::Validate(validate) => {
651 if let Err(error) = commands::providers::run_validate(&validate) {
652 command_error(&error);
653 }
654 }
655 ProvidersCommand::Export(export) => {
656 if let Err(error) = commands::providers::run_export(&export) {
657 command_error(&error);
658 }
659 }
660 },
661 Command::Try(args) => commands::try_cmd::run(args).await,
662 Command::Quickstart(args) => {
663 if let Err(error) = commands::quickstart::run_quickstart(&args).await {
664 command_error(&error);
665 }
666 }
667 Command::Demo(args) => {
668 let code = commands::demo::run(args).await;
669 if code != 0 {
670 process::exit(code);
671 }
672 }
673 Command::Serve(args) => match args.command {
674 ServeCommand::Acp(args) => {
675 if let Err(error) = commands::serve::run_acp_server(&args).await {
676 command_error(&error);
677 }
678 }
679 ServeCommand::A2a(args) => {
680 if let Err(error) = commands::serve::run_a2a_server(&args).await {
681 command_error(&error);
682 }
683 }
684 ServeCommand::Api(args) => {
685 if let Err(error) = commands::serve::run_api_server(&args).await {
686 command_error(&error);
687 }
688 }
689 ServeCommand::Mcp(args) => {
690 if let Err(error) = commands::serve::run_mcp_server(&args).await {
691 command_error(&error);
692 }
693 }
694 },
695 Command::Connector(args) => {
696 if let Err(error) = commands::connector::handle_connector_command(args).await {
697 eprintln!("error: {error}");
698 process::exit(1);
699 }
700 }
701 Command::Mcp(args) => commands::mcp::handle_mcp_command(&args.command).await,
702 Command::Watch(args) => {
703 let denied =
704 commands::run::build_denied_builtins(args.deny.as_deref(), args.allow.as_deref());
705 commands::run::run_watch(&args.file, denied).await;
706 }
707 Command::Portal(args) => {
708 commands::portal::run_portal(
709 &args.dir,
710 args.manifest,
711 args.persona_state_dir,
712 &args.host,
713 args.port,
714 args.open,
715 args.allow_remote_launch,
716 )
717 .await
718 }
719 Command::Trigger(args) => {
720 if let Err(error) = commands::trigger::handle(args).await {
721 eprintln!("error: {error}");
722 process::exit(1);
723 }
724 }
725 Command::Flow(args) => match commands::flow::run_flow(&args) {
726 Ok(code) => {
727 if code != 0 {
728 process::exit(code);
729 }
730 }
731 Err(error) => command_error(&error),
732 },
733 Command::Workflow(args) => match commands::workflow::handle(args) {
734 Ok(code) => {
735 if code != 0 {
736 process::exit(code);
737 }
738 }
739 Err(error) => command_error(&error),
740 },
741 Command::Supervisor(args) => {
742 if let Err(error) = commands::supervisor::handle(args).await {
743 eprintln!("error: {error}");
744 process::exit(1);
745 }
746 }
747 Command::Trace(args) => {
748 if let Err(error) = commands::trace::handle(args).await {
749 eprintln!("error: {error}");
750 process::exit(1);
751 }
752 }
753 Command::Crystallize(args) => {
754 if let Err(error) = commands::crystallize::run(args) {
755 eprintln!("error: {error}");
756 process::exit(1);
757 }
758 }
759 Command::Trust(args) | Command::TrustGraph(args) => {
760 if let Err(error) = commands::trust::handle(args).await {
761 eprintln!("error: {error}");
762 process::exit(1);
763 }
764 }
765 Command::Verify(args) => {
766 if let Err(error) = verify_provenance_receipt(&args.receipt, args.json) {
767 eprintln!("error: {error}");
768 process::exit(1);
769 }
770 }
771 Command::Completions(args) => print_completions(args.shell),
772 Command::Orchestrator(args) => {
773 if let Err(error) = commands::orchestrator::handle(args).await {
774 eprintln!("error: {error}");
775 process::exit(1);
776 }
777 }
778 Command::Playground(args) => {
779 provider_bootstrap::maybe_seed_ollama_for_playground(
780 Path::new(&args.host),
781 Path::new(&args.script),
782 args.yes,
783 args.llm.is_some(),
784 args.llm_mock.is_some(),
785 )
786 .await;
787 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
788 commands::run::CliLlmMockMode::Replay {
789 fixture_path: PathBuf::from(path),
790 }
791 } else if let Some(path) = args.llm_mock_record.as_ref() {
792 commands::run::CliLlmMockMode::Record {
793 fixture_path: PathBuf::from(path),
794 }
795 } else {
796 commands::run::CliLlmMockMode::Off
797 };
798 if let Err(error) = commands::playground::run_command(args, llm_mock_mode).await {
799 eprint!("{error}");
800 process::exit(1);
801 }
802 }
803 Command::Runs(args) => match args.command {
804 RunsCommand::Inspect(inspect) => {
805 inspect_run_record(&inspect.path, inspect.compare.as_deref())
806 }
807 },
808 Command::Session(args) => commands::session::run(args),
809 Command::Replay(args) => replay_run_record(&args.path),
810 Command::Eval(args) => match args.command {
811 Some(EvalCommand::Prompt(prompt_args)) => {
812 let code = commands::eval_prompt::run(prompt_args).await;
813 if code != 0 {
814 process::exit(code);
815 }
816 }
817 Some(EvalCommand::ToolCalls(tool_calls_args)) => {
818 let code = commands::eval_tool_calls::run(tool_calls_args).await;
819 if code != 0 {
820 process::exit(code);
821 }
822 }
823 None => {
824 let Some(path) = args.path else {
825 eprintln!(
826 "error: `harn eval` requires a path or a subcommand (e.g. `prompt`)."
827 );
828 eprintln!("See `harn eval --help`.");
829 process::exit(2);
830 };
831 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
832 commands::run::CliLlmMockMode::Replay {
833 fixture_path: PathBuf::from(path),
834 }
835 } else if let Some(path) = args.llm_mock_record.as_ref() {
836 commands::run::CliLlmMockMode::Record {
837 fixture_path: PathBuf::from(path),
838 }
839 } else {
840 commands::run::CliLlmMockMode::Off
841 };
842 eval_run_record(
843 &path,
844 args.compare.as_deref(),
845 args.structural_experiment.as_deref(),
846 &args.argv,
847 &llm_mock_mode,
848 )
849 }
850 },
851 Command::Repl => commands::repl::run_repl().await,
852 Command::Bench(args) => commands::bench::run(args).await,
853 Command::Precompile(args) => commands::precompile::run(args),
854 Command::TestBench(args) => commands::test_bench::run(args.command).await,
855 Command::Viz(args) => commands::viz::run_viz(&args.file, args.output.as_deref()),
856 Command::Install(args) => package::install_packages(
857 args.frozen || args.locked || args.offline,
858 args.refetch.as_deref(),
859 args.offline,
860 args.json,
861 ),
862 Command::Add(args) => package::add_package_with_registry(
863 &args.name_or_spec,
864 args.alias.as_deref(),
865 args.git.as_deref(),
866 args.tag.as_deref(),
867 args.rev.as_deref(),
868 args.branch.as_deref(),
869 args.path.as_deref(),
870 args.registry.as_deref(),
871 ),
872 Command::Update(args) => {
873 package::update_packages(args.alias.as_deref(), args.all, args.json)
874 }
875 Command::Remove(args) => package::remove_package(&args.alias),
876 Command::Lock => package::lock_packages(),
877 Command::Package(args) => match args.command {
878 PackageCommand::List(list) => package::list_packages(list.json),
879 PackageCommand::Doctor(doctor) => package::doctor_packages(doctor.json),
880 PackageCommand::Search(search) => package::search_package_registry(
881 search.query.as_deref(),
882 search.registry.as_deref(),
883 search.json,
884 ),
885 PackageCommand::Info(info) => {
886 package::show_package_registry_info(&info.name, info.registry.as_deref(), info.json)
887 }
888 PackageCommand::Check(check) => {
889 package::check_package(check.package.as_deref(), check.json)
890 }
891 PackageCommand::Pack(pack) => package::pack_package(
892 pack.package.as_deref(),
893 pack.output.as_deref(),
894 pack.dry_run,
895 pack.json,
896 ),
897 PackageCommand::Docs(docs) => package::generate_package_docs(
898 docs.package.as_deref(),
899 docs.output.as_deref(),
900 docs.check,
901 ),
902 PackageCommand::Cache(cache) => match cache.command {
903 PackageCacheCommand::List => package::list_package_cache(),
904 PackageCacheCommand::Clean(clean) => package::clean_package_cache(clean.all),
905 PackageCacheCommand::Verify(verify) => {
906 package::verify_package_cache(verify.materialized)
907 }
908 },
909 PackageCommand::Outdated(args) => package::outdated_packages(
910 args.refresh,
911 args.remote,
912 args.registry.as_deref(),
913 args.json,
914 ),
915 PackageCommand::Audit(args) => {
916 package::audit_packages(args.registry.as_deref(), args.skip_materialized, args.json)
917 }
918 PackageCommand::Artifacts(args) => match args.command {
919 PackageArtifactsCommand::Manifest(manifest) => {
920 package::artifacts_manifest(manifest.output.as_deref())
921 }
922 PackageArtifactsCommand::Check(check) => {
923 package::artifacts_check(&check.manifest, check.json)
924 }
925 },
926 },
927 Command::Publish(args) => package::publish_package(
928 args.package.as_deref(),
929 args.dry_run,
930 args.registry.as_deref(),
931 args.json,
932 ),
933 Command::MergeCaptain(args) => match args.command {
934 MergeCaptainCommand::Run(run) => {
935 let code = commands::merge_captain::run_driver(&run);
936 if code != 0 {
937 process::exit(code);
938 }
939 }
940 MergeCaptainCommand::Ladder(ladder) => {
941 let code = commands::merge_captain::run_ladder(&ladder);
942 if code != 0 {
943 process::exit(code);
944 }
945 }
946 MergeCaptainCommand::Iterate(iterate) => {
947 let code = commands::merge_captain::run_iterate(&iterate);
948 if code != 0 {
949 process::exit(code);
950 }
951 }
952 MergeCaptainCommand::Audit(audit) => {
953 let code = commands::merge_captain::run_audit(&audit);
954 if code != 0 {
955 process::exit(code);
956 }
957 }
958 MergeCaptainCommand::Mock(mock) => {
959 let code = match mock {
960 MergeCaptainMockCommand::Init(args) => {
961 commands::merge_captain_mock::run_init(&args)
962 }
963 MergeCaptainMockCommand::Step(args) => {
964 commands::merge_captain_mock::run_step(&args)
965 }
966 MergeCaptainMockCommand::Status(args) => {
967 commands::merge_captain_mock::run_status(&args)
968 }
969 MergeCaptainMockCommand::Serve(args) => {
970 commands::merge_captain_mock::run_serve(&args).await
971 }
972 MergeCaptainMockCommand::Cleanup(args) => {
973 commands::merge_captain_mock::run_cleanup(&args)
974 }
975 MergeCaptainMockCommand::Scenarios => {
976 commands::merge_captain_mock::run_scenarios()
977 }
978 };
979 if code != 0 {
980 process::exit(code);
981 }
982 }
983 },
984 Command::Persona(args) => match args.command {
985 PersonaCommand::New(new) => {
986 if let Err(error) = commands::persona_scaffold::run_new(&new) {
987 eprintln!("error: {error}");
988 process::exit(1);
989 }
990 }
991 PersonaCommand::Doctor(doctor) => {
992 if let Err(error) =
993 commands::persona_doctor::run_doctor(args.manifest.as_deref(), &doctor).await
994 {
995 eprintln!("error: {error}");
996 process::exit(1);
997 }
998 }
999 PersonaCommand::Check(check) => {
1000 commands::persona::run_check(args.manifest.as_deref(), &check)
1001 }
1002 PersonaCommand::List(list) => {
1003 commands::persona::run_list(args.manifest.as_deref(), &list)
1004 }
1005 PersonaCommand::Inspect(inspect) => {
1006 commands::persona::run_inspect(args.manifest.as_deref(), &inspect)
1007 }
1008 PersonaCommand::Status(status) => {
1009 if let Err(error) = commands::persona::run_status(
1010 args.manifest.as_deref(),
1011 &args.state_dir,
1012 &status,
1013 )
1014 .await
1015 {
1016 eprintln!("error: {error}");
1017 process::exit(1);
1018 }
1019 }
1020 PersonaCommand::Pause(control) => {
1021 if let Err(error) = commands::persona::run_pause(
1022 args.manifest.as_deref(),
1023 &args.state_dir,
1024 &control,
1025 )
1026 .await
1027 {
1028 eprintln!("error: {error}");
1029 process::exit(1);
1030 }
1031 }
1032 PersonaCommand::Resume(control) => {
1033 if let Err(error) = commands::persona::run_resume(
1034 args.manifest.as_deref(),
1035 &args.state_dir,
1036 &control,
1037 )
1038 .await
1039 {
1040 eprintln!("error: {error}");
1041 process::exit(1);
1042 }
1043 }
1044 PersonaCommand::Disable(control) => {
1045 if let Err(error) = commands::persona::run_disable(
1046 args.manifest.as_deref(),
1047 &args.state_dir,
1048 &control,
1049 )
1050 .await
1051 {
1052 eprintln!("error: {error}");
1053 process::exit(1);
1054 }
1055 }
1056 PersonaCommand::Tick(tick) => {
1057 if let Err(error) =
1058 commands::persona::run_tick(args.manifest.as_deref(), &args.state_dir, &tick)
1059 .await
1060 {
1061 eprintln!("error: {error}");
1062 process::exit(1);
1063 }
1064 }
1065 PersonaCommand::Trigger(trigger) => {
1066 if let Err(error) = commands::persona::run_trigger(
1067 args.manifest.as_deref(),
1068 &args.state_dir,
1069 &trigger,
1070 )
1071 .await
1072 {
1073 eprintln!("error: {error}");
1074 process::exit(1);
1075 }
1076 }
1077 PersonaCommand::Spend(spend) => {
1078 if let Err(error) =
1079 commands::persona::run_spend(args.manifest.as_deref(), &args.state_dir, &spend)
1080 .await
1081 {
1082 eprintln!("error: {error}");
1083 process::exit(1);
1084 }
1085 }
1086 PersonaCommand::Supervision(supervision) => match supervision.command {
1087 PersonaSupervisionCommand::Tail(tail) => {
1088 if let Err(error) = commands::persona_supervision::run_tail(
1089 args.manifest.as_deref(),
1090 &args.state_dir,
1091 &tail,
1092 )
1093 .await
1094 {
1095 eprintln!("error: {error}");
1096 process::exit(1);
1097 }
1098 }
1099 },
1100 },
1101 Command::ModelInfo(args) => {
1102 if !print_model_info(&args).await {
1103 process::exit(1);
1104 }
1105 }
1106 Command::ProviderCatalog(args) => print_provider_catalog(args.available_only),
1107 Command::ProviderReady(args) => {
1108 run_provider_ready(
1109 &args.provider,
1110 args.model.as_deref(),
1111 args.base_url.as_deref(),
1112 args.json,
1113 )
1114 .await
1115 }
1116 Command::ProviderProbe(args) => commands::provider::run_provider_probe(args).await,
1117 Command::ProviderToolProbe(args) => commands::provider::run_provider_tool_probe(args).await,
1118 Command::Skills(args) => match args.command {
1119 SkillsCommand::List(list) => commands::skills::run_list(&list),
1120 SkillsCommand::Inspect(inspect) => commands::skills::run_inspect(&inspect),
1121 SkillsCommand::Match(matcher) => commands::skills::run_match(&matcher),
1122 SkillsCommand::Install(install) => commands::skills::run_install(&install),
1123 SkillsCommand::New(new_args) => commands::skills::run_new(&new_args),
1124 },
1125 Command::Tool(args) => match args.command {
1126 ToolCommand::New(new_args) => {
1127 if let Err(error) = commands::tool::run_new(&new_args) {
1128 eprintln!("error: {error}");
1129 process::exit(1);
1130 }
1131 }
1132 },
1133 Command::DumpHighlightKeywords(args) => {
1134 commands::dump_highlight_keywords::run(&args.output, args.check);
1135 }
1136 Command::DumpTriggerQuickref(args) => {
1137 commands::dump_trigger_quickref::run(&args.output, args.check);
1138 }
1139 Command::DumpConnectorMatrix(args) => {
1140 commands::check::connector_matrix::run_docs(&args.output, &args.sources, args.check);
1141 }
1142 Command::DumpProtocolArtifacts(args) => {
1143 commands::dump_protocol_artifacts::run(&args.output_dir, args.check);
1144 }
1145 }
1146}
1147
1148fn run_profile_options(args: &cli::ProfileArgs) -> commands::run::RunProfileOptions {
1149 commands::run::RunProfileOptions {
1150 text: args.text,
1151 json_path: args.json_path.clone(),
1152 }
1153}
1154
1155fn print_completions(shell: CompletionShell) {
1156 let mut command = Cli::command();
1157 let shell = clap_complete::Shell::from(shell);
1158 clap_complete::generate(shell, &mut command, "harn", &mut std::io::stdout());
1159}
1160
1161fn normalize_serve_args(mut raw_args: Vec<String>) -> Vec<String> {
1162 if raw_args.len() > 2
1163 && raw_args.get(1).is_some_and(|arg| arg == "serve")
1164 && !matches!(
1165 raw_args.get(2).map(String::as_str),
1166 Some("acp" | "a2a" | "api" | "mcp" | "-h" | "--help")
1167 )
1168 {
1169 raw_args.insert(2, "a2a".to_string());
1170 }
1171 raw_args
1172}
1173
1174fn print_version() {
1175 println!(
1176 r#"
1177 ╱▔▔╲
1178 ╱ ╲ harn v{}
1179 │ ◆ │ the agent harness language
1180 │ │
1181 ╰──╯╱
1182 ╱╱
1183"#,
1184 env!("CARGO_PKG_VERSION")
1185 );
1186}
1187
1188async fn print_model_info(args: &ModelInfoArgs) -> bool {
1189 let resolved = harn_vm::llm_config::resolve_model_info(&args.model);
1190 let api_key_result = harn_vm::llm::resolve_api_key(&resolved.provider);
1191 let api_key_set = api_key_result.is_ok();
1192 let api_key = api_key_result.unwrap_or_default();
1193 let context_window =
1194 harn_vm::llm::fetch_provider_max_context(&resolved.provider, &resolved.id, &api_key).await;
1195 let readiness = local_openai_readiness(&resolved.provider, &resolved.id, &api_key).await;
1196 let catalog = harn_vm::llm_config::model_catalog_entry(&resolved.id);
1197 let runtime_context_window = catalog
1198 .as_ref()
1199 .and_then(|entry| entry.runtime_context_window);
1200 let capabilities = harn_vm::llm::capabilities::lookup(&resolved.provider, &resolved.id);
1201 let mut payload = serde_json::json!({
1202 "alias": args.model,
1203 "id": resolved.id,
1204 "provider": resolved.provider,
1205 "resolved_alias": resolved.alias,
1206 "tool_format": resolved.tool_format,
1207 "tier": resolved.tier,
1208 "api_key_set": api_key_set,
1209 "context_window": context_window,
1210 "runtime_context_window": runtime_context_window,
1211 "readiness": readiness,
1212 "catalog": catalog,
1213 "capabilities": {
1214 "native_tools": capabilities.native_tools,
1215 "defer_loading": capabilities.defer_loading,
1216 "tool_search": capabilities.tool_search,
1217 "max_tools": capabilities.max_tools,
1218 "prompt_caching": capabilities.prompt_caching,
1219 "vision": capabilities.vision,
1220 "vision_supported": capabilities.vision_supported,
1221 "audio": capabilities.audio,
1222 "pdf": capabilities.pdf,
1223 "files_api_supported": capabilities.files_api_supported,
1224 "json_schema": capabilities.json_schema,
1225 "prefers_xml_scaffolding": capabilities.prefers_xml_scaffolding,
1226 "prefers_markdown_scaffolding": capabilities.prefers_markdown_scaffolding,
1227 "structured_output_mode": capabilities.structured_output_mode,
1228 "supports_assistant_prefill": capabilities.supports_assistant_prefill,
1229 "prefers_role_developer": capabilities.prefers_role_developer,
1230 "prefers_xml_tools": capabilities.prefers_xml_tools,
1231 "thinking": !capabilities.thinking_modes.is_empty(),
1232 "thinking_block_style": capabilities.thinking_block_style,
1233 "thinking_modes": capabilities.thinking_modes,
1234 "interleaved_thinking_supported": capabilities.interleaved_thinking_supported,
1235 "anthropic_beta_features": capabilities.anthropic_beta_features,
1236 "preserve_thinking": capabilities.preserve_thinking,
1237 "server_parser": capabilities.server_parser,
1238 "honors_chat_template_kwargs": capabilities.honors_chat_template_kwargs,
1239 "recommended_endpoint": capabilities.recommended_endpoint,
1240 "text_tool_wire_format_supported": capabilities.text_tool_wire_format_supported,
1241 },
1242 "qc_default_model": harn_vm::llm_config::qc_default_model(&resolved.provider),
1243 });
1244
1245 let should_verify = args.verify || args.warm;
1246 let mut ok = true;
1247 if should_verify {
1248 if resolved.provider == "ollama" {
1249 let mut readiness = harn_vm::llm::OllamaReadinessOptions::new(resolved.id.clone());
1250 readiness.warm = args.warm;
1251 readiness.observe_loaded = true;
1252 readiness.keep_alive = args
1253 .keep_alive
1254 .as_deref()
1255 .and_then(harn_vm::llm::normalize_ollama_keep_alive);
1256 let result = harn_vm::llm::ollama_readiness(readiness).await;
1257 ok = result.valid;
1258 payload["readiness"] = serde_json::to_value(&result).unwrap_or_else(|error| {
1259 serde_json::json!({
1260 "valid": false,
1261 "status": "serialization_error",
1262 "message": format!("failed to serialize readiness result: {error}"),
1263 })
1264 });
1265 } else {
1266 ok = false;
1267 payload["readiness"] = serde_json::json!({
1268 "valid": false,
1269 "status": "unsupported_provider",
1270 "message": format!(
1271 "model-info --verify is only supported for Ollama models; resolved provider is '{}'",
1272 resolved.provider
1273 ),
1274 "provider": resolved.provider,
1275 });
1276 }
1277 }
1278
1279 println!(
1280 "{}",
1281 serde_json::to_string(&payload).unwrap_or_else(|error| {
1282 command_error(&format!("failed to serialize model info: {error}"))
1283 })
1284 );
1285 ok
1286}
1287
1288async fn local_openai_readiness(
1289 provider: &str,
1290 model: &str,
1291 api_key: &str,
1292) -> Option<serde_json::Value> {
1293 let def = harn_vm::llm_config::provider_config(provider)?;
1294 if def.auth_style != "none" || !harn_vm::llm::supports_model_readiness_probe(&def) {
1295 return None;
1296 }
1297 let readiness = harn_vm::llm::probe_openai_compatible_model(provider, model, api_key).await;
1298 Some(serde_json::json!({
1299 "valid": readiness.valid,
1300 "category": readiness.category,
1301 "message": readiness.message,
1302 "provider": readiness.provider,
1303 "model": readiness.model,
1304 "url": readiness.url,
1305 "status": readiness.status,
1306 "available_models": readiness.available_models,
1307 }))
1308}
1309
1310fn print_provider_catalog(available_only: bool) {
1311 let provider_names = if available_only {
1312 harn_vm::llm_config::available_provider_names()
1313 } else {
1314 harn_vm::llm_config::provider_names()
1315 };
1316 let providers: Vec<_> = provider_names
1317 .into_iter()
1318 .filter_map(|name| {
1319 harn_vm::llm_config::provider_config(&name).map(|def| {
1320 serde_json::json!({
1321 "name": name,
1322 "display_name": def.display_name,
1323 "icon": def.icon,
1324 "base_url": harn_vm::llm_config::resolve_base_url(&def),
1325 "base_url_env": def.base_url_env,
1326 "auth_style": def.auth_style,
1327 "auth_envs": harn_vm::llm_config::auth_env_names(&def.auth_env),
1328 "auth_available": harn_vm::llm_config::provider_key_available(&name),
1329 "features": def.features,
1330 "cost_per_1k_in": def.cost_per_1k_in,
1331 "cost_per_1k_out": def.cost_per_1k_out,
1332 "latency_p50_ms": def.latency_p50_ms,
1333 })
1334 })
1335 })
1336 .collect();
1337 let models: Vec<_> = harn_vm::llm_config::model_catalog_entries()
1338 .into_iter()
1339 .map(|(id, model)| {
1340 serde_json::json!({
1341 "id": id,
1342 "name": model.name,
1343 "provider": model.provider,
1344 "context_window": model.context_window,
1345 "runtime_context_window": model.runtime_context_window,
1346 "stream_timeout": model.stream_timeout,
1347 "capabilities": model.capabilities,
1348 "pricing": model.pricing,
1349 })
1350 })
1351 .collect();
1352 let aliases: Vec<_> = harn_vm::llm_config::alias_entries()
1353 .into_iter()
1354 .map(|(name, alias)| {
1355 serde_json::json!({
1356 "name": name,
1357 "id": alias.id,
1358 "provider": alias.provider,
1359 "tool_format": alias.tool_format,
1360 "tool_calling": harn_vm::llm_config::alias_tool_calling_entry(&name),
1361 })
1362 })
1363 .collect();
1364 let payload = serde_json::json!({
1365 "providers": providers,
1366 "known_model_names": harn_vm::llm_config::known_model_names(),
1367 "available_providers": harn_vm::llm_config::available_provider_names(),
1368 "aliases": aliases,
1369 "models": models,
1370 "qc_defaults": harn_vm::llm_config::qc_defaults(),
1371 });
1372 println!(
1373 "{}",
1374 serde_json::to_string(&payload).unwrap_or_else(|error| {
1375 command_error(&format!("failed to serialize provider catalog: {error}"))
1376 })
1377 );
1378}
1379
1380async fn run_provider_ready(
1381 provider: &str,
1382 model: Option<&str>,
1383 base_url: Option<&str>,
1384 json: bool,
1385) {
1386 let readiness =
1387 harn_vm::llm::readiness::probe_provider_readiness(provider, model, base_url).await;
1388 if json {
1389 match serde_json::to_string_pretty(&readiness) {
1390 Ok(payload) => println!("{payload}"),
1391 Err(error) => command_error(&format!("failed to serialize readiness result: {error}")),
1392 }
1393 } else if readiness.ok {
1394 println!("{}", readiness.message);
1395 } else {
1396 eprintln!("{}", readiness.message);
1397 }
1398 if !readiness.ok {
1399 process::exit(1);
1400 }
1401}
1402
1403fn command_error(message: &str) -> ! {
1404 Cli::command()
1405 .error(ErrorKind::ValueValidation, message)
1406 .exit()
1407}
1408
1409fn verify_provenance_receipt(path: &str, json: bool) -> Result<(), String> {
1410 let raw =
1411 fs::read_to_string(path).map_err(|error| format!("failed to read {path}: {error}"))?;
1412 let receipt: harn_vm::ProvenanceReceipt = serde_json::from_str(&raw)
1413 .map_err(|error| format!("failed to parse provenance receipt {path}: {error}"))?;
1414 let report = harn_vm::verify_receipt(&receipt);
1415 if json {
1416 println!(
1417 "{}",
1418 serde_json::to_string_pretty(&report).map_err(|error| error.to_string())?
1419 );
1420 } else if report.verified {
1421 println!(
1422 "verified receipt={} events={} receipt_hash={} event_root_hash={}",
1423 report.receipt_id.unwrap_or_else(|| "-".to_string()),
1424 report.event_count,
1425 report.receipt_hash.unwrap_or_else(|| "-".to_string()),
1426 report.event_root_hash.unwrap_or_else(|| "-".to_string())
1427 );
1428 } else {
1429 println!(
1430 "failed receipt={} events={}",
1431 report.receipt_id.unwrap_or_else(|| "-".to_string()),
1432 report.event_count
1433 );
1434 for error in &report.errors {
1435 println!(" {error}");
1436 }
1437 return Err("provenance receipt verification failed".to_string());
1438 }
1439 Ok(())
1440}
1441
1442fn load_run_record_or_exit(path: &Path) -> harn_vm::orchestration::RunRecord {
1443 match harn_vm::orchestration::load_run_record(path) {
1444 Ok(run) => run,
1445 Err(error) => {
1446 eprintln!("Failed to load run record: {error}");
1447 process::exit(1);
1448 }
1449 }
1450}
1451
1452fn load_eval_suite_manifest_or_exit(path: &Path) -> harn_vm::orchestration::EvalSuiteManifest {
1453 harn_vm::orchestration::load_eval_suite_manifest(path).unwrap_or_else(|error| {
1454 eprintln!("Failed to load eval manifest {}: {error}", path.display());
1455 process::exit(1);
1456 })
1457}
1458
1459fn load_eval_pack_manifest_or_exit(path: &Path) -> harn_vm::orchestration::EvalPackManifest {
1460 harn_vm::orchestration::load_eval_pack_manifest(path).unwrap_or_else(|error| {
1461 eprintln!("Failed to load eval pack {}: {error}", path.display());
1462 process::exit(1);
1463 })
1464}
1465
1466fn load_persona_eval_ladder_manifest_or_exit(
1467 path: &Path,
1468) -> harn_vm::orchestration::PersonaEvalLadderManifest {
1469 harn_vm::orchestration::load_persona_eval_ladder_manifest(path).unwrap_or_else(|error| {
1470 eprintln!(
1471 "Failed to load persona eval ladder {}: {error}",
1472 path.display()
1473 );
1474 process::exit(1);
1475 })
1476}
1477
1478fn file_looks_like_eval_manifest(path: &Path) -> bool {
1479 if path.file_name().and_then(|name| name.to_str()) == Some("harn.eval.toml") {
1480 return true;
1481 }
1482 if path.extension().and_then(|ext| ext.to_str()) == Some("toml") {
1483 let Ok(content) = fs::read_to_string(path) else {
1484 return false;
1485 };
1486 return toml::from_str::<harn_vm::orchestration::EvalPackManifest>(&content)
1487 .is_ok_and(|manifest| !manifest.cases.is_empty() || !manifest.ladders.is_empty());
1488 }
1489 let Ok(content) = fs::read_to_string(path) else {
1490 return false;
1491 };
1492 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1493 return false;
1494 };
1495 json.get("_type").and_then(|value| value.as_str()) == Some("eval_suite_manifest")
1496 || json.get("cases").is_some()
1497}
1498
1499fn file_looks_like_eval_pack_manifest(path: &Path) -> bool {
1500 if path.file_name().and_then(|name| name.to_str()) == Some("harn.eval.toml") {
1501 return true;
1502 }
1503 if path.extension().and_then(|ext| ext.to_str()) == Some("toml") {
1504 return file_looks_like_eval_manifest(path);
1505 }
1506 let Ok(content) = fs::read_to_string(path) else {
1507 return false;
1508 };
1509 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1510 return false;
1511 };
1512 json.get("version").is_some()
1513 && (json.get("cases").is_some() || json.get("ladders").is_some())
1514 && json.get("_type").and_then(|value| value.as_str()) != Some("eval_suite_manifest")
1515}
1516
1517fn file_looks_like_persona_eval_ladder_manifest(path: &Path) -> bool {
1518 let Ok(content) = fs::read_to_string(path) else {
1519 return false;
1520 };
1521 if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
1522 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1523 return false;
1524 };
1525 return json.get("_type").and_then(|value| value.as_str())
1526 == Some("persona_eval_ladder_manifest")
1527 || json.get("timeout_tiers").is_some()
1528 || json.get("timeout-tiers").is_some();
1529 }
1530 toml::from_str::<harn_vm::orchestration::PersonaEvalLadderManifest>(&content).is_ok_and(
1531 |manifest| {
1532 manifest
1533 .type_name
1534 .eq_ignore_ascii_case("persona_eval_ladder_manifest")
1535 || (!manifest.timeout_tiers.is_empty() && manifest.backend.path.is_some())
1536 },
1537 )
1538}
1539
1540fn collect_run_record_paths(path: &str) -> Vec<PathBuf> {
1541 let path = Path::new(path);
1542 if path.is_file() {
1543 return vec![path.to_path_buf()];
1544 }
1545 if path.is_dir() {
1546 let mut entries: Vec<PathBuf> = fs::read_dir(path)
1547 .unwrap_or_else(|error| {
1548 eprintln!("Failed to read run directory {}: {error}", path.display());
1549 process::exit(1);
1550 })
1551 .filter_map(|entry| entry.ok().map(|entry| entry.path()))
1552 .filter(|entry| entry.extension().and_then(|ext| ext.to_str()) == Some("json"))
1553 .collect();
1554 entries.sort();
1555 return entries;
1556 }
1557 eprintln!("Run path does not exist: {}", path.display());
1558 process::exit(1);
1559}
1560
1561fn print_run_diff(diff: &harn_vm::orchestration::RunDiffReport) {
1562 println!(
1563 "Diff: {} -> {} [{} -> {}]",
1564 diff.left_run_id, diff.right_run_id, diff.left_status, diff.right_status
1565 );
1566 println!("Identical: {}", diff.identical);
1567 println!("Stage diffs: {}", diff.stage_diffs.len());
1568 println!("Tool diffs: {}", diff.tool_diffs.len());
1569 println!("Observability diffs: {}", diff.observability_diffs.len());
1570 println!("Transition delta: {}", diff.transition_count_delta);
1571 println!("Artifact delta: {}", diff.artifact_count_delta);
1572 println!("Checkpoint delta: {}", diff.checkpoint_count_delta);
1573 for stage in &diff.stage_diffs {
1574 println!("- {} [{}]", stage.node_id, stage.change);
1575 for detail in &stage.details {
1576 println!(" {}", detail);
1577 }
1578 }
1579 for tool in &diff.tool_diffs {
1580 println!("- tool {} [{}]", tool.tool_name, tool.args_hash);
1581 println!(" left: {:?}", tool.left_result);
1582 println!(" right: {:?}", tool.right_result);
1583 }
1584 for item in &diff.observability_diffs {
1585 println!("- {} [{}]", item.label, item.section);
1586 for detail in &item.details {
1587 println!(" {}", detail);
1588 }
1589 }
1590}
1591
1592fn inspect_run_record(path: &str, compare: Option<&str>) {
1593 let run = load_run_record_or_exit(Path::new(path));
1594 println!("Run: {}", run.id);
1595 println!(
1596 "Workflow: {}",
1597 run.workflow_name
1598 .clone()
1599 .unwrap_or_else(|| run.workflow_id.clone())
1600 );
1601 println!("Status: {}", run.status);
1602 println!("Task: {}", run.task);
1603 println!("Stages: {}", run.stages.len());
1604 println!("Artifacts: {}", run.artifacts.len());
1605 println!("Transitions: {}", run.transitions.len());
1606 println!("Checkpoints: {}", run.checkpoints.len());
1607 println!("HITL questions: {}", run.hitl_questions.len());
1608 if let Some(observability) = &run.observability {
1609 println!("Planner rounds: {}", observability.planner_rounds.len());
1610 println!("Research facts: {}", observability.research_fact_count);
1611 println!("Workers: {}", observability.worker_lineage.len());
1612 println!(
1613 "Action graph: {} nodes / {} edges",
1614 observability.action_graph_nodes.len(),
1615 observability.action_graph_edges.len()
1616 );
1617 println!(
1618 "Transcript pointers: {}",
1619 observability.transcript_pointers.len()
1620 );
1621 println!("Daemon events: {}", observability.daemon_events.len());
1622 }
1623 if let Some(parent_worker_id) = run
1624 .metadata
1625 .get("parent_worker_id")
1626 .and_then(|value| value.as_str())
1627 {
1628 println!("Parent worker: {}", parent_worker_id);
1629 }
1630 if let Some(parent_stage_id) = run
1631 .metadata
1632 .get("parent_stage_id")
1633 .and_then(|value| value.as_str())
1634 {
1635 println!("Parent stage: {}", parent_stage_id);
1636 }
1637 if run
1638 .metadata
1639 .get("delegated")
1640 .and_then(|value| value.as_bool())
1641 .unwrap_or(false)
1642 {
1643 println!("Delegated: true");
1644 }
1645 println!(
1646 "Pending nodes: {}",
1647 if run.pending_nodes.is_empty() {
1648 "-".to_string()
1649 } else {
1650 run.pending_nodes.join(", ")
1651 }
1652 );
1653 println!(
1654 "Replay fixture: {}",
1655 if run.replay_fixture.is_some() {
1656 "embedded"
1657 } else {
1658 "derived"
1659 }
1660 );
1661 for stage in &run.stages {
1662 let worker = stage.metadata.get("worker");
1663 let worker_suffix = worker
1664 .and_then(|value| value.get("name"))
1665 .and_then(|value| value.as_str())
1666 .map(|name| format!(" worker={name}"))
1667 .unwrap_or_default();
1668 println!(
1669 "- {} [{}] status={} outcome={} branch={}{}",
1670 stage.node_id,
1671 stage.kind,
1672 stage.status,
1673 stage.outcome,
1674 stage.branch.clone().unwrap_or_else(|| "-".to_string()),
1675 worker_suffix,
1676 );
1677 if let Some(worker) = worker {
1678 if let Some(worker_id) = worker.get("id").and_then(|value| value.as_str()) {
1679 println!(" worker_id: {}", worker_id);
1680 }
1681 if let Some(child_run_id) = worker.get("child_run_id").and_then(|value| value.as_str())
1682 {
1683 println!(" child_run_id: {}", child_run_id);
1684 }
1685 if let Some(child_run_path) = worker
1686 .get("child_run_path")
1687 .and_then(|value| value.as_str())
1688 {
1689 println!(" child_run_path: {}", child_run_path);
1690 }
1691 }
1692 }
1693 if let Some(observability) = &run.observability {
1694 for round in &observability.planner_rounds {
1695 println!(
1696 "- planner {} iterations={} llm_calls={} tools={} research_facts={}",
1697 round.node_id,
1698 round.iteration_count,
1699 round.llm_call_count,
1700 round.tool_execution_count,
1701 round.research_facts.len()
1702 );
1703 }
1704 for pointer in &observability.transcript_pointers {
1705 println!(
1706 "- transcript {} [{}] available={} {}",
1707 pointer.label,
1708 pointer.kind,
1709 pointer.available,
1710 pointer
1711 .path
1712 .clone()
1713 .unwrap_or_else(|| pointer.location.clone())
1714 );
1715 }
1716 for event in &observability.daemon_events {
1717 println!(
1718 "- daemon {} [{:?}] at {}",
1719 event.name, event.kind, event.timestamp
1720 );
1721 println!(" id: {}", event.daemon_id);
1722 println!(" persist_path: {}", event.persist_path);
1723 if let Some(summary) = &event.payload_summary {
1724 println!(" payload: {}", summary);
1725 }
1726 }
1727 }
1728 if let Some(compare_path) = compare {
1729 let baseline = load_run_record_or_exit(Path::new(compare_path));
1730 print_run_diff(&harn_vm::orchestration::diff_run_records(&baseline, &run));
1731 }
1732}
1733
1734fn replay_run_record(path: &str) {
1735 let run = load_run_record_or_exit(Path::new(path));
1736 println!("Replay: {}", run.id);
1737 for stage in &run.stages {
1738 println!(
1739 "[{}] status={} outcome={} branch={}",
1740 stage.node_id,
1741 stage.status,
1742 stage.outcome,
1743 stage.branch.clone().unwrap_or_else(|| "-".to_string())
1744 );
1745 if let Some(text) = &stage.visible_text {
1746 println!(" visible: {}", text);
1747 }
1748 if let Some(verification) = &stage.verification {
1749 println!(" verification: {}", verification);
1750 }
1751 }
1752 if let Some(transcript) = &run.transcript {
1753 println!(
1754 "Transcript events persisted: {}",
1755 transcript["events"]
1756 .as_array()
1757 .map(|v| v.len())
1758 .unwrap_or(0)
1759 );
1760 }
1761 let fixture = run
1762 .replay_fixture
1763 .clone()
1764 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1765 let report = harn_vm::orchestration::evaluate_run_against_fixture(&run, &fixture);
1766 println!(
1767 "Embedded replay fixture: {}",
1768 if report.pass { "PASS" } else { "FAIL" }
1769 );
1770 for transition in &run.transitions {
1771 println!(
1772 "transition {} -> {} ({})",
1773 transition
1774 .from_node_id
1775 .clone()
1776 .unwrap_or_else(|| "start".to_string()),
1777 transition.to_node_id,
1778 transition
1779 .branch
1780 .clone()
1781 .unwrap_or_else(|| "default".to_string())
1782 );
1783 }
1784}
1785
1786fn eval_run_record(
1787 path: &str,
1788 compare: Option<&str>,
1789 structural_experiment: Option<&str>,
1790 argv: &[String],
1791 llm_mock_mode: &commands::run::CliLlmMockMode,
1792) {
1793 if let Some(experiment) = structural_experiment {
1794 let path_buf = PathBuf::from(path);
1795 if !path_buf.is_file() || path_buf.extension().and_then(|ext| ext.to_str()) != Some("harn")
1796 {
1797 eprintln!(
1798 "--structural-experiment currently requires a .harn pipeline path, got {}",
1799 path
1800 );
1801 process::exit(1);
1802 }
1803 if compare.is_some() {
1804 eprintln!("--compare cannot be combined with --structural-experiment");
1805 process::exit(1);
1806 }
1807 if matches!(llm_mock_mode, commands::run::CliLlmMockMode::Record { .. }) {
1808 eprintln!("--llm-mock-record cannot be combined with --structural-experiment");
1809 process::exit(1);
1810 }
1811 let path_buf = fs::canonicalize(&path_buf).unwrap_or_else(|error| {
1812 command_error(&format!(
1813 "failed to canonicalize structural eval pipeline {}: {error}",
1814 path_buf.display()
1815 ))
1816 });
1817 run_structural_experiment_eval(&path_buf, experiment, argv, llm_mock_mode);
1818 return;
1819 }
1820
1821 let path_buf = PathBuf::from(path);
1822 if path_buf.is_file() && file_looks_like_persona_eval_ladder_manifest(&path_buf) {
1823 if compare.is_some() {
1824 eprintln!("--compare is not supported with persona eval ladder manifests");
1825 process::exit(1);
1826 }
1827 let manifest = load_persona_eval_ladder_manifest_or_exit(&path_buf);
1828 let report =
1829 harn_vm::orchestration::run_persona_eval_ladder(&manifest).unwrap_or_else(|error| {
1830 eprintln!(
1831 "Failed to evaluate persona eval ladder {}: {error}",
1832 path_buf.display()
1833 );
1834 process::exit(1);
1835 });
1836 print_persona_ladder_report(&report);
1837 if !report.pass {
1838 process::exit(1);
1839 }
1840 return;
1841 }
1842
1843 if path_buf.is_file() && file_looks_like_eval_pack_manifest(&path_buf) {
1844 if compare.is_some() {
1845 eprintln!("--compare is not supported with eval pack manifests");
1846 process::exit(1);
1847 }
1848 let manifest = load_eval_pack_manifest_or_exit(&path_buf);
1849 let report = harn_vm::orchestration::evaluate_eval_pack_manifest(&manifest).unwrap_or_else(
1850 |error| {
1851 eprintln!(
1852 "Failed to evaluate eval pack {}: {error}",
1853 path_buf.display()
1854 );
1855 process::exit(1);
1856 },
1857 );
1858 print_eval_pack_report(&report);
1859 if !report.pass {
1860 process::exit(1);
1861 }
1862 return;
1863 }
1864
1865 if path_buf.is_file() && file_looks_like_eval_manifest(&path_buf) {
1866 if compare.is_some() {
1867 eprintln!("--compare is not supported with eval suite manifests");
1868 process::exit(1);
1869 }
1870 let manifest = load_eval_suite_manifest_or_exit(&path_buf);
1871 let suite = harn_vm::orchestration::evaluate_run_suite_manifest(&manifest).unwrap_or_else(
1872 |error| {
1873 eprintln!(
1874 "Failed to evaluate manifest {}: {error}",
1875 path_buf.display()
1876 );
1877 process::exit(1);
1878 },
1879 );
1880 println!(
1881 "{} {} passed, {} failed, {} total",
1882 if suite.pass { "PASS" } else { "FAIL" },
1883 suite.passed,
1884 suite.failed,
1885 suite.total
1886 );
1887 for case in &suite.cases {
1888 println!(
1889 "- {} [{}] {}",
1890 case.label.clone().unwrap_or_else(|| case.run_id.clone()),
1891 case.workflow_id,
1892 if case.pass { "PASS" } else { "FAIL" }
1893 );
1894 if let Some(path) = &case.source_path {
1895 println!(" path: {}", path);
1896 }
1897 if let Some(comparison) = &case.comparison {
1898 println!(" baseline identical: {}", comparison.identical);
1899 if !comparison.identical {
1900 println!(
1901 " baseline status: {} -> {}",
1902 comparison.left_status, comparison.right_status
1903 );
1904 }
1905 }
1906 for failure in &case.failures {
1907 println!(" {}", failure);
1908 }
1909 }
1910 if !suite.pass {
1911 process::exit(1);
1912 }
1913 return;
1914 }
1915
1916 let paths = collect_run_record_paths(path);
1917 if paths.len() > 1 {
1918 let mut cases = Vec::new();
1919 for path in &paths {
1920 let run = load_run_record_or_exit(path);
1921 let fixture = run
1922 .replay_fixture
1923 .clone()
1924 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1925 cases.push((run, fixture, Some(path.display().to_string())));
1926 }
1927 let suite = harn_vm::orchestration::evaluate_run_suite(cases);
1928 println!(
1929 "{} {} passed, {} failed, {} total",
1930 if suite.pass { "PASS" } else { "FAIL" },
1931 suite.passed,
1932 suite.failed,
1933 suite.total
1934 );
1935 for case in &suite.cases {
1936 println!(
1937 "- {} [{}] {}",
1938 case.run_id,
1939 case.workflow_id,
1940 if case.pass { "PASS" } else { "FAIL" }
1941 );
1942 if let Some(path) = &case.source_path {
1943 println!(" path: {}", path);
1944 }
1945 if let Some(comparison) = &case.comparison {
1946 println!(" baseline identical: {}", comparison.identical);
1947 }
1948 for failure in &case.failures {
1949 println!(" {}", failure);
1950 }
1951 }
1952 if !suite.pass {
1953 process::exit(1);
1954 }
1955 return;
1956 }
1957
1958 let run = load_run_record_or_exit(&paths[0]);
1959 let fixture = run
1960 .replay_fixture
1961 .clone()
1962 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1963 let report = harn_vm::orchestration::evaluate_run_against_fixture(&run, &fixture);
1964 println!("{}", if report.pass { "PASS" } else { "FAIL" });
1965 println!("Stages: {}", report.stage_count);
1966 if let Some(compare_path) = compare {
1967 let baseline = load_run_record_or_exit(Path::new(compare_path));
1968 print_run_diff(&harn_vm::orchestration::diff_run_records(&baseline, &run));
1969 }
1970 if !report.failures.is_empty() {
1971 for failure in &report.failures {
1972 println!("- {}", failure);
1973 }
1974 }
1975 if !report.pass {
1976 process::exit(1);
1977 }
1978}
1979
1980fn print_eval_pack_report(report: &harn_vm::orchestration::EvalPackReport) {
1981 println!(
1982 "{} {} passed, {} blocking failed, {} warning, {} informational, {} total",
1983 if report.pass { "PASS" } else { "FAIL" },
1984 report.passed,
1985 report.blocking_failed,
1986 report.warning_failed,
1987 report.informational_failed,
1988 report.total
1989 );
1990 for case in &report.cases {
1991 println!(
1992 "- {} [{}] {} ({})",
1993 case.label,
1994 case.workflow_id,
1995 if case.pass { "PASS" } else { "FAIL" },
1996 case.severity
1997 );
1998 if let Some(path) = &case.source_path {
1999 println!(" path: {}", path);
2000 }
2001 if let Some(comparison) = &case.comparison {
2002 println!(" baseline identical: {}", comparison.identical);
2003 if !comparison.identical {
2004 println!(
2005 " baseline status: {} -> {}",
2006 comparison.left_status, comparison.right_status
2007 );
2008 }
2009 }
2010 for failure in &case.failures {
2011 println!(" {}", failure);
2012 }
2013 for warning in &case.warnings {
2014 println!(" warning: {}", warning);
2015 }
2016 for item in &case.informational {
2017 println!(" info: {}", item);
2018 }
2019 }
2020 for ladder in &report.ladders {
2021 println!(
2022 "- ladder {} [{}] {} ({}) first_correct={}/{}",
2023 ladder.id,
2024 ladder.persona,
2025 if ladder.pass { "PASS" } else { "FAIL" },
2026 ladder.severity,
2027 ladder.first_correct_route.as_deref().unwrap_or("<none>"),
2028 ladder.first_correct_tier.as_deref().unwrap_or("<none>")
2029 );
2030 println!(" artifacts: {}", ladder.artifact_root);
2031 for tier in &ladder.tiers {
2032 println!(
2033 " - {} [{}] {} tools={} models={} latency={}ms cost=${:.6}",
2034 tier.timeout_tier,
2035 tier.route_id,
2036 tier.outcome,
2037 tier.tool_calls,
2038 tier.model_calls,
2039 tier.latency_ms,
2040 tier.cost_usd
2041 );
2042 for reason in &tier.degradation_reasons {
2043 println!(" {}", reason);
2044 }
2045 }
2046 }
2047}
2048
2049fn print_persona_ladder_report(report: &harn_vm::orchestration::PersonaEvalLadderReport) {
2050 println!(
2051 "{} ladder {} passed, {} degraded/looped, {} total",
2052 if report.pass { "PASS" } else { "FAIL" },
2053 report.passed,
2054 report.failed,
2055 report.total
2056 );
2057 println!(
2058 "first_correct: {}/{}",
2059 report.first_correct_route.as_deref().unwrap_or("<none>"),
2060 report.first_correct_tier.as_deref().unwrap_or("<none>")
2061 );
2062 println!("artifacts: {}", report.artifact_root);
2063 for tier in &report.tiers {
2064 println!(
2065 "- {} [{}] {} tools={} models={} latency={}ms cost=${:.6}",
2066 tier.timeout_tier,
2067 tier.route_id,
2068 tier.outcome,
2069 tier.tool_calls,
2070 tier.model_calls,
2071 tier.latency_ms,
2072 tier.cost_usd
2073 );
2074 for reason in &tier.degradation_reasons {
2075 println!(" {}", reason);
2076 }
2077 }
2078}
2079
2080fn run_package_evals() {
2081 let paths = package::load_package_eval_pack_paths(None).unwrap_or_else(|error| {
2082 eprintln!("{error}");
2083 process::exit(1);
2084 });
2085 let mut all_pass = true;
2086 for path in &paths {
2087 println!("Eval pack: {}", path.display());
2088 let manifest = load_eval_pack_manifest_or_exit(path);
2089 let report = harn_vm::orchestration::evaluate_eval_pack_manifest(&manifest).unwrap_or_else(
2090 |error| {
2091 eprintln!("Failed to evaluate eval pack {}: {error}", path.display());
2092 process::exit(1);
2093 },
2094 );
2095 print_eval_pack_report(&report);
2096 all_pass &= report.pass;
2097 }
2098 if !all_pass {
2099 process::exit(1);
2100 }
2101}
2102
2103fn run_structural_experiment_eval(
2104 path: &Path,
2105 experiment: &str,
2106 argv: &[String],
2107 llm_mock_mode: &commands::run::CliLlmMockMode,
2108) {
2109 let baseline_dir = tempfile::Builder::new()
2110 .prefix("harn-eval-baseline-")
2111 .tempdir()
2112 .unwrap_or_else(|error| {
2113 command_error(&format!("failed to create baseline tempdir: {error}"))
2114 });
2115 let variant_dir = tempfile::Builder::new()
2116 .prefix("harn-eval-variant-")
2117 .tempdir()
2118 .unwrap_or_else(|error| {
2119 command_error(&format!("failed to create variant tempdir: {error}"))
2120 });
2121
2122 let baseline = spawn_eval_pipeline_run(path, baseline_dir.path(), None, argv, llm_mock_mode);
2123 if !baseline.status.success() {
2124 relay_subprocess_failure("baseline", &baseline);
2125 }
2126
2127 let variant = spawn_eval_pipeline_run(
2128 path,
2129 variant_dir.path(),
2130 Some(experiment),
2131 argv,
2132 llm_mock_mode,
2133 );
2134 if !variant.status.success() {
2135 relay_subprocess_failure("variant", &variant);
2136 }
2137
2138 let baseline_runs = collect_structural_eval_runs(baseline_dir.path());
2139 let variant_runs = collect_structural_eval_runs(variant_dir.path());
2140 if baseline_runs.is_empty() || variant_runs.is_empty() {
2141 eprintln!(
2142 "structural eval expected workflow run records under {} and {}, but one side was empty",
2143 baseline_dir.path().display(),
2144 variant_dir.path().display()
2145 );
2146 process::exit(1);
2147 }
2148 if baseline_runs.len() != variant_runs.len() {
2149 eprintln!(
2150 "structural eval produced different run counts: baseline={} variant={}",
2151 baseline_runs.len(),
2152 variant_runs.len()
2153 );
2154 process::exit(1);
2155 }
2156
2157 let mut baseline_ok = 0usize;
2158 let mut variant_ok = 0usize;
2159 let mut any_failures = false;
2160
2161 println!("Structural experiment: {}", experiment);
2162 println!("Cases: {}", baseline_runs.len());
2163 for (baseline_run, variant_run) in baseline_runs.iter().zip(variant_runs.iter()) {
2164 let baseline_fixture = baseline_run
2165 .replay_fixture
2166 .clone()
2167 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(baseline_run));
2168 let variant_fixture = variant_run
2169 .replay_fixture
2170 .clone()
2171 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(variant_run));
2172 let baseline_report =
2173 harn_vm::orchestration::evaluate_run_against_fixture(baseline_run, &baseline_fixture);
2174 let variant_report =
2175 harn_vm::orchestration::evaluate_run_against_fixture(variant_run, &variant_fixture);
2176 let diff = harn_vm::orchestration::diff_run_records(baseline_run, variant_run);
2177 if baseline_report.pass {
2178 baseline_ok += 1;
2179 }
2180 if variant_report.pass {
2181 variant_ok += 1;
2182 }
2183 any_failures |= !baseline_report.pass || !variant_report.pass;
2184 println!(
2185 "- {} [{}]",
2186 variant_run
2187 .workflow_name
2188 .clone()
2189 .unwrap_or_else(|| variant_run.workflow_id.clone()),
2190 variant_run.task
2191 );
2192 println!(
2193 " baseline: {}",
2194 if baseline_report.pass { "PASS" } else { "FAIL" }
2195 );
2196 for failure in &baseline_report.failures {
2197 println!(" {}", failure);
2198 }
2199 println!(
2200 " variant: {}",
2201 if variant_report.pass { "PASS" } else { "FAIL" }
2202 );
2203 for failure in &variant_report.failures {
2204 println!(" {}", failure);
2205 }
2206 println!(" diff identical: {}", diff.identical);
2207 println!(" stage diffs: {}", diff.stage_diffs.len());
2208 println!(" tool diffs: {}", diff.tool_diffs.len());
2209 println!(" observability diffs: {}", diff.observability_diffs.len());
2210 }
2211
2212 println!("Baseline {} / {} passed", baseline_ok, baseline_runs.len());
2213 println!("Variant {} / {} passed", variant_ok, variant_runs.len());
2214
2215 if any_failures {
2216 process::exit(1);
2217 }
2218}
2219
2220fn spawn_eval_pipeline_run(
2221 path: &Path,
2222 run_dir: &Path,
2223 structural_experiment: Option<&str>,
2224 argv: &[String],
2225 llm_mock_mode: &commands::run::CliLlmMockMode,
2226) -> std::process::Output {
2227 let exe = env::current_exe().unwrap_or_else(|error| {
2228 command_error(&format!("failed to resolve current executable: {error}"))
2229 });
2230 let mut command = std::process::Command::new(exe);
2231 command.current_dir(path.parent().unwrap_or_else(|| Path::new(".")));
2232 command.arg("run");
2233 match llm_mock_mode {
2234 commands::run::CliLlmMockMode::Off => {}
2235 commands::run::CliLlmMockMode::Replay { fixture_path } => {
2236 command
2237 .arg("--llm-mock")
2238 .arg(absolute_cli_path(fixture_path));
2239 }
2240 commands::run::CliLlmMockMode::Record { fixture_path } => {
2241 command
2242 .arg("--llm-mock-record")
2243 .arg(absolute_cli_path(fixture_path));
2244 }
2245 }
2246 command.arg(path);
2247 if !argv.is_empty() {
2248 command.arg("--");
2249 command.args(argv);
2250 }
2251 command.env(harn_vm::runtime_paths::HARN_RUN_DIR_ENV, run_dir);
2252 if let Some(experiment) = structural_experiment {
2253 command.env("HARN_STRUCTURAL_EXPERIMENT", experiment);
2254 }
2255 command.output().unwrap_or_else(|error| {
2256 command_error(&format!(
2257 "failed to spawn `harn run {}` for structural eval: {error}",
2258 path.display()
2259 ))
2260 })
2261}
2262
2263fn absolute_cli_path(path: &Path) -> PathBuf {
2264 if path.is_absolute() {
2265 return path.to_path_buf();
2266 }
2267 env::current_dir()
2268 .unwrap_or_else(|_| PathBuf::from("."))
2269 .join(path)
2270}
2271
2272fn relay_subprocess_failure(label: &str, output: &std::process::Output) -> ! {
2273 let stdout = String::from_utf8_lossy(&output.stdout);
2274 let stderr = String::from_utf8_lossy(&output.stderr);
2275 if !stdout.trim().is_empty() {
2276 eprintln!("[{label}] stdout:\n{stdout}");
2277 }
2278 if !stderr.trim().is_empty() {
2279 eprintln!("[{label}] stderr:\n{stderr}");
2280 }
2281 process::exit(output.status.code().unwrap_or(1));
2282}
2283
2284fn collect_structural_eval_runs(dir: &Path) -> Vec<harn_vm::orchestration::RunRecord> {
2285 let mut paths: Vec<PathBuf> = fs::read_dir(dir)
2286 .unwrap_or_else(|error| {
2287 command_error(&format!(
2288 "failed to read structural eval run dir {}: {error}",
2289 dir.display()
2290 ))
2291 })
2292 .filter_map(|entry| entry.ok().map(|entry| entry.path()))
2293 .filter(|entry| entry.extension().and_then(|ext| ext.to_str()) == Some("json"))
2294 .collect();
2295 paths.sort();
2296 let mut runs: Vec<_> = paths
2297 .iter()
2298 .map(|path| load_run_record_or_exit(path))
2299 .collect();
2300 runs.sort_by(|left, right| {
2301 (
2302 left.started_at.as_str(),
2303 left.workflow_id.as_str(),
2304 left.task.as_str(),
2305 )
2306 .cmp(&(
2307 right.started_at.as_str(),
2308 right.workflow_id.as_str(),
2309 right.task.as_str(),
2310 ))
2311 });
2312 runs
2313}
2314
2315pub(crate) fn parse_source_file(path: &str) -> (String, Vec<harn_parser::SNode>) {
2317 let source = match fs::read_to_string(path) {
2318 Ok(s) => s,
2319 Err(e) => {
2320 eprintln!("Error reading {path}: {e}");
2321 process::exit(1);
2322 }
2323 };
2324
2325 let mut lexer = Lexer::new(&source);
2326 let tokens = match lexer.tokenize() {
2327 Ok(t) => t,
2328 Err(e) => {
2329 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2330 &source,
2331 path,
2332 &error_span_from_lex(&e),
2333 "error",
2334 &e.to_string(),
2335 Some("here"),
2336 None,
2337 );
2338 eprint!("{diagnostic}");
2339 process::exit(1);
2340 }
2341 };
2342
2343 let mut parser = Parser::new(tokens);
2344 let program = match parser.parse() {
2345 Ok(p) => p,
2346 Err(err) => {
2347 if parser.all_errors().is_empty() {
2348 let span = error_span_from_parse(&err);
2349 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2350 &source,
2351 path,
2352 &span,
2353 "error",
2354 &harn_parser::diagnostic::parser_error_message(&err),
2355 Some(harn_parser::diagnostic::parser_error_label(&err)),
2356 harn_parser::diagnostic::parser_error_help(&err),
2357 );
2358 eprint!("{diagnostic}");
2359 } else {
2360 for e in parser.all_errors() {
2361 let span = error_span_from_parse(e);
2362 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2363 &source,
2364 path,
2365 &span,
2366 "error",
2367 &harn_parser::diagnostic::parser_error_message(e),
2368 Some(harn_parser::diagnostic::parser_error_label(e)),
2369 harn_parser::diagnostic::parser_error_help(e),
2370 );
2371 eprint!("{diagnostic}");
2372 }
2373 }
2374 process::exit(1);
2375 }
2376 };
2377
2378 (source, program)
2379}
2380
2381fn error_span_from_lex(e: &harn_lexer::LexerError) -> harn_lexer::Span {
2382 match e {
2383 harn_lexer::LexerError::UnexpectedCharacter(_, span)
2384 | harn_lexer::LexerError::UnterminatedString(span)
2385 | harn_lexer::LexerError::UnterminatedBlockComment(span) => *span,
2386 }
2387}
2388
2389fn error_span_from_parse(e: &harn_parser::ParserError) -> harn_lexer::Span {
2390 match e {
2391 harn_parser::ParserError::Unexpected { span, .. } => *span,
2392 harn_parser::ParserError::UnexpectedEof { span, .. } => *span,
2393 }
2394}
2395
2396pub(crate) async fn execute(source: &str, source_path: Option<&Path>) -> Result<String, String> {
2398 execute_with_skill_dirs(source, source_path, &[]).await
2399}
2400
2401pub(crate) async fn execute_with_skill_dirs(
2402 source: &str,
2403 source_path: Option<&Path>,
2404 cli_skill_dirs: &[PathBuf],
2405) -> Result<String, String> {
2406 let mut lexer = Lexer::new(source);
2407 let tokens = lexer.tokenize().map_err(|e| e.to_string())?;
2408 let mut parser = Parser::new(tokens);
2409 let program = parser.parse().map_err(|e| e.to_string())?;
2410
2411 let mut checker = TypeChecker::new();
2416 if let Some(path) = source_path {
2417 let graph = harn_modules::build(&[path.to_path_buf()]);
2418 if let Some(imported) = graph.imported_names_for_file(path) {
2419 checker = checker.with_imported_names(imported);
2420 }
2421 if let Some(imported) = graph.imported_type_declarations_for_file(path) {
2422 checker = checker.with_imported_type_decls(imported);
2423 }
2424 if let Some(imported) = graph.imported_callable_declarations_for_file(path) {
2425 checker = checker.with_imported_callable_decls(imported);
2426 }
2427 }
2428 let type_diagnostics = checker.check(&program);
2429 let mut warning_lines = Vec::new();
2430 for diag in &type_diagnostics {
2431 match diag.severity {
2432 DiagnosticSeverity::Error => return Err(diag.message.clone()),
2433 DiagnosticSeverity::Warning => {
2434 warning_lines.push(format!("warning: {}", diag.message));
2435 }
2436 }
2437 }
2438
2439 let chunk = harn_vm::Compiler::new()
2440 .compile(&program)
2441 .map_err(|e| e.to_string())?;
2442
2443 let local = tokio::task::LocalSet::new();
2444 local
2445 .run_until(async {
2446 let mut vm = harn_vm::Vm::new();
2447 harn_vm::register_vm_stdlib(&mut vm);
2448 install_default_hostlib(&mut vm);
2449 let source_parent = source_path
2450 .and_then(|p| p.parent())
2451 .unwrap_or(std::path::Path::new("."));
2452 let project_root = harn_vm::stdlib::process::find_project_root(source_parent);
2453 let store_base = project_root.as_deref().unwrap_or(source_parent);
2454 let execution_cwd = std::env::current_dir()
2455 .unwrap_or_else(|_| std::path::PathBuf::from("."))
2456 .to_string_lossy()
2457 .into_owned();
2458 let source_dir = source_parent.to_string_lossy().into_owned();
2459 if source_path.is_some_and(is_conformance_path) {
2460 harn_vm::event_log::install_memory_for_current_thread(64);
2461 }
2462 harn_vm::register_store_builtins(&mut vm, store_base);
2463 harn_vm::register_metadata_builtins(&mut vm, store_base);
2464 let pipeline_name = source_path
2465 .and_then(|p| p.file_stem())
2466 .and_then(|s| s.to_str())
2467 .unwrap_or("default");
2468 harn_vm::register_checkpoint_builtins(&mut vm, store_base, pipeline_name);
2469 harn_vm::stdlib::process::set_thread_execution_context(Some(
2470 harn_vm::orchestration::RunExecutionRecord {
2471 cwd: Some(execution_cwd),
2472 source_dir: Some(source_dir),
2473 env: std::collections::BTreeMap::new(),
2474 adapter: None,
2475 repo_path: None,
2476 worktree_path: None,
2477 branch: None,
2478 base_ref: None,
2479 cleanup: None,
2480 },
2481 ));
2482 if let Some(ref root) = project_root {
2483 vm.set_project_root(root);
2484 }
2485 if let Some(path) = source_path {
2486 if let Some(parent) = path.parent() {
2487 if !parent.as_os_str().is_empty() {
2488 vm.set_source_dir(parent);
2489 }
2490 }
2491 }
2492 let loaded = skill_loader::load_skills(&skill_loader::SkillLoaderInputs {
2496 cli_dirs: cli_skill_dirs.to_vec(),
2497 source_path: source_path.map(Path::to_path_buf),
2498 });
2499 skill_loader::emit_loader_warnings(&loaded.loader_warnings);
2500 skill_loader::install_skills_global(&mut vm, &loaded);
2501 if let Some(path) = source_path {
2502 let extensions = package::load_runtime_extensions(path);
2503 package::install_runtime_extensions(&extensions);
2504 package::install_manifest_triggers(&mut vm, &extensions)
2505 .await
2506 .map_err(|error| format!("failed to install manifest triggers: {error}"))?;
2507 package::install_manifest_hooks(&mut vm, &extensions)
2508 .await
2509 .map_err(|error| format!("failed to install manifest hooks: {error}"))?;
2510 }
2511 let _event_log = harn_vm::event_log::active_event_log()
2512 .unwrap_or_else(|| harn_vm::event_log::install_memory_for_current_thread(64));
2513 let connector_clients_installed =
2514 should_install_default_connector_clients(source, source_path);
2515 if connector_clients_installed {
2516 install_default_connector_clients(store_base)
2517 .await
2518 .map_err(|error| format!("failed to initialize connector clients: {error}"))?;
2519 }
2520 let execution_result = vm.execute(&chunk).await.map_err(|e| e.to_string());
2521 harn_vm::egress::reset_egress_policy_for_host();
2522 if connector_clients_installed {
2523 harn_vm::clear_active_connector_clients();
2524 }
2525 harn_vm::stdlib::process::set_thread_execution_context(None);
2526 execution_result?;
2527 let mut output = String::new();
2528 for wl in &warning_lines {
2529 output.push_str(wl);
2530 output.push('\n');
2531 }
2532 output.push_str(vm.output());
2533 Ok(output)
2534 })
2535 .await
2536}
2537
2538fn should_install_default_connector_clients(source: &str, source_path: Option<&Path>) -> bool {
2539 if !source_path.is_some_and(is_conformance_path) {
2540 return true;
2541 }
2542 source.contains("connector_call")
2543 || source.contains("std/connectors")
2544 || source.contains("connectors/")
2545}
2546
2547fn is_conformance_path(path: &Path) -> bool {
2548 path.components()
2549 .any(|component| component.as_os_str() == "conformance")
2550}
2551
2552async fn install_default_connector_clients(base_dir: &Path) -> Result<(), String> {
2553 let event_log = harn_vm::event_log::active_event_log()
2554 .unwrap_or_else(|| harn_vm::event_log::install_memory_for_current_thread(64));
2555 let secret_namespace = connector_secret_namespace(base_dir);
2556 let secrets: Arc<dyn harn_vm::secrets::SecretProvider> = Arc::new(
2557 harn_vm::secrets::configured_default_chain(secret_namespace)
2558 .map_err(|error| format!("failed to configure secret providers: {error}"))?,
2559 );
2560
2561 let registry = harn_vm::ConnectorRegistry::default();
2562 let metrics = Arc::new(harn_vm::MetricsRegistry::default());
2563 let inbox = Arc::new(
2564 harn_vm::InboxIndex::new(event_log.clone(), metrics.clone())
2565 .await
2566 .map_err(|error| error.to_string())?,
2567 );
2568 registry
2569 .init_all(harn_vm::ConnectorCtx {
2570 event_log,
2571 secrets,
2572 inbox,
2573 metrics,
2574 rate_limiter: Arc::new(harn_vm::RateLimiterFactory::default()),
2575 })
2576 .await
2577 .map_err(|error| error.to_string())?;
2578 let clients = registry.client_map().await;
2579 harn_vm::install_active_connector_clients(clients);
2580 Ok(())
2581}
2582
2583fn connector_secret_namespace(base_dir: &Path) -> String {
2584 match std::env::var("HARN_SECRET_NAMESPACE") {
2585 Ok(namespace) if !namespace.trim().is_empty() => namespace,
2586 _ => {
2587 let leaf = base_dir
2588 .file_name()
2589 .and_then(|name| name.to_str())
2590 .filter(|name| !name.is_empty())
2591 .unwrap_or("workspace");
2592 format!("harn/{leaf}")
2593 }
2594 }
2595}
2596
2597#[cfg(test)]
2598mod main_tests {
2599 use super::{normalize_serve_args, should_install_default_connector_clients};
2600 use std::path::Path;
2601
2602 #[test]
2603 fn normalize_serve_args_inserts_a2a_for_legacy_shape() {
2604 let args = normalize_serve_args(vec![
2605 "harn".to_string(),
2606 "serve".to_string(),
2607 "--port".to_string(),
2608 "3000".to_string(),
2609 "agent.harn".to_string(),
2610 ]);
2611 assert_eq!(
2612 args,
2613 vec![
2614 "harn".to_string(),
2615 "serve".to_string(),
2616 "a2a".to_string(),
2617 "--port".to_string(),
2618 "3000".to_string(),
2619 "agent.harn".to_string(),
2620 ]
2621 );
2622 }
2623
2624 #[test]
2625 fn normalize_serve_args_preserves_explicit_subcommands() {
2626 let args = normalize_serve_args(vec![
2627 "harn".to_string(),
2628 "serve".to_string(),
2629 "acp".to_string(),
2630 "server.harn".to_string(),
2631 ]);
2632 assert_eq!(
2633 args,
2634 vec![
2635 "harn".to_string(),
2636 "serve".to_string(),
2637 "acp".to_string(),
2638 "server.harn".to_string(),
2639 ]
2640 );
2641 }
2642
2643 #[test]
2644 fn conformance_skips_connector_clients_unless_fixture_uses_connectors() {
2645 let path = Path::new("conformance/tests/language/basic.harn");
2646 assert!(!should_install_default_connector_clients(
2647 "println(1)",
2648 Some(path)
2649 ));
2650 assert!(!should_install_default_connector_clients(
2651 "trust_graph_verify_chain()",
2652 Some(path)
2653 ));
2654 assert!(should_install_default_connector_clients(
2655 "import { post_message } from \"std/connectors/slack\"",
2656 Some(path)
2657 ));
2658 assert!(should_install_default_connector_clients(
2659 "println(1)",
2660 Some(Path::new("examples/demo.harn"))
2661 ));
2662 }
2663}