1#![recursion_limit = "256"]
2
3pub mod acp;
4pub mod cli;
5pub mod commands;
6pub mod config;
7pub mod env_guard;
8pub mod format;
9pub mod package;
10mod provider_bootstrap;
11pub mod skill_loader;
12pub mod skill_provenance;
13pub mod test_runner;
14#[doc(hidden)]
15pub mod tests;
16
17use clap::{error::ErrorKind, CommandFactory, Parser as ClapParser};
18use std::path::{Path, PathBuf};
19use std::sync::Arc;
20use std::{env, fs, process, thread};
21
22use cli::{
23 Cli, Command, CompletionShell, MergeCaptainCommand, MergeCaptainMockCommand, ModelInfoArgs,
24 PackageArtifactsCommand, PackageCacheCommand, PackageCommand, PersonaCommand,
25 PersonaSupervisionCommand, ProvidersCommand, RunsCommand, ServeCommand, SkillCommand,
26 SkillKeyCommand, SkillTrustCommand, SkillsCommand, ToolCommand,
27};
28use harn_lexer::Lexer;
29use harn_parser::{DiagnosticSeverity, Parser, TypeChecker};
30
31pub const CLI_RUNTIME_STACK_SIZE: usize = 16 * 1024 * 1024;
32
33#[cfg(feature = "hostlib")]
34pub(crate) fn install_default_hostlib(vm: &mut harn_vm::Vm) {
35 let _ = harn_hostlib::install_default(vm);
36}
37
38#[cfg(not(feature = "hostlib"))]
39pub(crate) fn install_default_hostlib(_vm: &mut harn_vm::Vm) {}
40
41pub fn run() {
44 let handle = thread::Builder::new()
45 .name("harn-cli".to_string())
46 .stack_size(CLI_RUNTIME_STACK_SIZE)
47 .spawn(|| {
48 let runtime = tokio::runtime::Builder::new_multi_thread()
49 .enable_all()
50 .build()
51 .unwrap_or_else(|error| {
52 eprintln!("failed to start async runtime: {error}");
53 process::exit(1);
54 });
55 runtime.block_on(async_main());
56 })
57 .unwrap_or_else(|error| {
58 eprintln!("failed to start CLI runtime thread: {error}");
59 process::exit(1);
60 });
61
62 if let Err(payload) = handle.join() {
63 std::panic::resume_unwind(payload);
64 }
65}
66
67async fn async_main() {
68 let raw_args = normalize_serve_args(env::args().collect());
69 if raw_args.len() == 2 && raw_args[1].ends_with(".harn") {
70 provider_bootstrap::maybe_seed_ollama_for_run_file(Path::new(&raw_args[1]), false, false)
71 .await;
72 commands::run::run_file(
73 &raw_args[1],
74 false,
75 std::collections::HashSet::new(),
76 Vec::new(),
77 commands::run::CliLlmMockMode::Off,
78 None,
79 commands::run::RunProfileOptions::default(),
80 )
81 .await;
82 return;
83 }
84
85 let cli = match Cli::try_parse_from(&raw_args) {
86 Ok(cli) => cli,
87 Err(error) => {
88 if matches!(
89 error.kind(),
90 ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
91 ) {
92 error.exit();
93 }
94 error.exit();
95 }
96 };
97
98 match cli.command.expect("clap requires a command") {
99 Command::Version => print_version(),
100 Command::Skill(args) => match args.command {
101 SkillCommand::Key(key_args) => match key_args.command {
102 SkillKeyCommand::Generate(generate) => commands::skill::run_key_generate(&generate),
103 },
104 SkillCommand::Sign(sign) => commands::skill::run_sign(&sign),
105 SkillCommand::Endorse(endorse) => commands::skill::run_endorse(&endorse),
106 SkillCommand::Verify(verify) => commands::skill::run_verify(&verify),
107 SkillCommand::WhoSigned(who_signed) => {
108 commands::skill::run_who_signed(&who_signed).await
109 }
110 SkillCommand::Trust(trust_args) => match trust_args.command {
111 SkillTrustCommand::Add(add) => commands::skill::run_trust_add(&add),
112 SkillTrustCommand::List(list) => commands::skill::run_trust_list(&list),
113 },
114 SkillCommand::New(new_args) => commands::skills::run_new(&new_args),
115 },
116 Command::Run(args) => {
117 if !args.explain_cost {
118 match (args.eval.as_deref(), args.file.as_deref()) {
119 (Some(code), None) => {
120 provider_bootstrap::maybe_seed_ollama_for_inline(
121 code,
122 args.yes,
123 args.llm_mock.is_some(),
124 )
125 .await;
126 }
127 (None, Some(file)) => {
128 provider_bootstrap::maybe_seed_ollama_for_run_file(
129 Path::new(file),
130 args.yes,
131 args.llm_mock.is_some(),
132 )
133 .await;
134 }
135 _ => {}
136 }
137 }
138 let denied =
139 commands::run::build_denied_builtins(args.deny.as_deref(), args.allow.as_deref());
140 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
141 commands::run::CliLlmMockMode::Replay {
142 fixture_path: PathBuf::from(path),
143 }
144 } else if let Some(path) = args.llm_mock_record.as_ref() {
145 commands::run::CliLlmMockMode::Record {
146 fixture_path: PathBuf::from(path),
147 }
148 } else {
149 commands::run::CliLlmMockMode::Off
150 };
151 let attestation = args.attest.then(|| commands::run::RunAttestationOptions {
152 receipt_out: args.receipt_out.as_ref().map(PathBuf::from),
153 agent_id: args.attest_agent.clone(),
154 });
155 let profile_options = run_profile_options(&args.profile);
156
157 match (args.eval.as_deref(), args.file.as_deref()) {
158 (Some(code), None) => {
159 let (wrapped, tmp) = commands::run::prepare_eval_temp_file(code)
160 .unwrap_or_else(|e| command_error(&e));
161 let tmp_path: PathBuf = tmp.path().to_path_buf();
162 fs::write(&tmp_path, &wrapped).unwrap_or_else(|e| {
163 command_error(&format!("failed to write temp file for -e: {e}"))
164 });
165 let tmp_str = tmp_path.to_string_lossy().into_owned();
166 if args.explain_cost {
167 commands::run::run_explain_cost_file_with_skill_dirs(&tmp_str);
168 } else {
169 commands::run::run_file_with_skill_dirs(
170 &tmp_str,
171 args.trace,
172 denied,
173 args.argv.clone(),
174 args.skill_dir.clone(),
175 llm_mock_mode.clone(),
176 attestation.clone(),
177 profile_options.clone(),
178 )
179 .await;
180 }
181 drop(tmp);
182 }
183 (None, Some(file)) => {
184 if args.explain_cost {
185 commands::run::run_explain_cost_file_with_skill_dirs(file);
186 } else {
187 commands::run::run_file_with_skill_dirs(
188 file,
189 args.trace,
190 denied,
191 args.argv.clone(),
192 args.skill_dir.clone(),
193 llm_mock_mode,
194 attestation,
195 profile_options,
196 )
197 .await
198 }
199 }
200 (Some(_), Some(_)) => command_error(
201 "`harn run` accepts either `-e <code>` or `<file.harn>`, not both",
202 ),
203 (None, None) => {
204 command_error("`harn run` requires either `-e <code>` or `<file.harn>`")
205 }
206 }
207 }
208 Command::Check(args) => {
209 if args.provider_matrix {
210 let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
211 let extensions = package::load_runtime_extensions(&cwd);
212 package::install_runtime_extensions(&extensions);
213 commands::check::provider_matrix::run(args.format, args.filter.as_deref());
214 return;
215 }
216 if args.connector_matrix {
217 commands::check::connector_matrix::run(
218 args.format,
219 args.filter.as_deref(),
220 &args.targets,
221 );
222 return;
223 }
224 let mut target_strings: Vec<String> = args.targets.clone();
225 if args.workspace {
226 let anchor = target_strings.first().map(Path::new);
227 match package::load_workspace_config(anchor) {
228 Some((workspace, manifest_dir)) if !workspace.pipelines.is_empty() => {
229 for pipeline in &workspace.pipelines {
230 let candidate = Path::new(pipeline);
231 let resolved = if candidate.is_absolute() {
232 candidate.to_path_buf()
233 } else {
234 manifest_dir.join(candidate)
235 };
236 target_strings.push(resolved.to_string_lossy().into_owned());
237 }
238 }
239 Some(_) => command_error(
240 "--workspace requires `[workspace].pipelines` in the nearest harn.toml",
241 ),
242 None => command_error(
243 "--workspace could not find a harn.toml walking up from the target(s)",
244 ),
245 }
246 }
247 if target_strings.is_empty() {
248 command_error(
249 "`harn check` requires at least one target path, or `--workspace` with `[workspace].pipelines`",
250 );
251 }
252 for target in &target_strings {
253 if let Err(error) = package::validate_runtime_manifest_extensions(Path::new(target))
254 {
255 command_error(&format!("manifest extension validation failed: {error}"));
256 }
257 }
258 let targets: Vec<&str> = target_strings.iter().map(String::as_str).collect();
259 let files = commands::check::collect_harn_targets(&targets);
260 if files.is_empty() {
261 command_error("no .harn files found under the given target(s)");
262 }
263 let module_graph = commands::check::build_module_graph(&files);
264 let cross_file_imports = commands::check::collect_cross_file_imports(&module_graph);
265 let mut should_fail = false;
266 for file in &files {
267 let mut config = package::load_check_config(Some(file));
268 if let Some(path) = args.host_capabilities.as_ref() {
269 config.host_capabilities_path = Some(path.clone());
270 }
271 if let Some(path) = args.bundle_root.as_ref() {
272 config.bundle_root = Some(path.clone());
273 }
274 if args.strict_types {
275 config.strict_types = true;
276 }
277 if let Some(sev) = args.preflight.as_deref() {
278 config.preflight_severity = Some(sev.to_string());
279 }
280 let outcome = commands::check::check_file_inner(
281 file,
282 &config,
283 &cross_file_imports,
284 &module_graph,
285 args.invariants,
286 );
287 should_fail |= outcome.should_fail(config.strict);
288 }
289 if should_fail {
290 process::exit(1);
291 }
292 }
293 Command::Config(args) => {
294 if let Err(error) = commands::config_cmd::run(args).await {
295 command_error(&error);
296 }
297 }
298 Command::Explain(args) => {
299 let code = commands::explain::run_explain(&args);
300 if code != 0 {
301 process::exit(code);
302 }
303 }
304 Command::Contracts(args) => {
305 commands::contracts::handle_contracts_command(args).await;
306 }
307 Command::Connect(args) => {
308 commands::connect::run_connect(*args).await;
309 }
310 Command::Lint(args) => {
311 let targets: Vec<&str> = args.targets.iter().map(String::as_str).collect();
312 let files = commands::check::collect_harn_targets(&targets);
313 if files.is_empty() {
314 command_error("no .harn files found under the given target(s)");
315 }
316 let module_graph = commands::check::build_module_graph(&files);
317 let cross_file_imports = commands::check::collect_cross_file_imports(&module_graph);
318 if args.fix {
319 for file in &files {
320 let mut config = package::load_check_config(Some(file));
321 commands::check::apply_harn_lint_config(file, &mut config);
322 let require_header = args.require_file_header
323 || commands::check::harn_lint_require_file_header(file);
324 let complexity_threshold =
325 commands::check::harn_lint_complexity_threshold(file);
326 let persona_step_allowlist =
327 commands::check::harn_lint_persona_step_allowlist(file);
328 commands::check::lint_fix_file(
329 file,
330 &config,
331 &cross_file_imports,
332 &module_graph,
333 require_header,
334 complexity_threshold,
335 &persona_step_allowlist,
336 );
337 }
338 } else {
339 let mut should_fail = false;
340 for file in &files {
341 let mut config = package::load_check_config(Some(file));
342 commands::check::apply_harn_lint_config(file, &mut config);
343 let require_header = args.require_file_header
344 || commands::check::harn_lint_require_file_header(file);
345 let complexity_threshold =
346 commands::check::harn_lint_complexity_threshold(file);
347 let persona_step_allowlist =
348 commands::check::harn_lint_persona_step_allowlist(file);
349 let outcome = commands::check::lint_file_inner(
350 file,
351 &config,
352 &cross_file_imports,
353 &module_graph,
354 require_header,
355 complexity_threshold,
356 &persona_step_allowlist,
357 );
358 should_fail |= outcome.should_fail(config.strict);
359 }
360 if should_fail {
361 process::exit(1);
362 }
363 }
364 }
365 Command::Fmt(args) => {
366 let targets: Vec<&str> = args.targets.iter().map(String::as_str).collect();
367 let anchor = targets.first().map(Path::new).unwrap_or(Path::new("."));
370 let loaded = match config::load_for_path(anchor) {
371 Ok(c) => c,
372 Err(e) => {
373 eprintln!("warning: {e}");
374 config::HarnConfig::default()
375 }
376 };
377 let mut opts = harn_fmt::FmtOptions::default();
378 if let Some(w) = loaded.fmt.line_width {
379 opts.line_width = w;
380 }
381 if let Some(w) = loaded.fmt.separator_width {
382 opts.separator_width = w;
383 }
384 if let Some(w) = args.line_width {
385 opts.line_width = w;
386 }
387 if let Some(w) = args.separator_width {
388 opts.separator_width = w;
389 }
390 commands::check::fmt_targets(
391 &targets,
392 commands::check::FmtMode::from_check_flag(args.check),
393 &opts,
394 );
395 }
396 Command::Test(args) => {
397 if args.target.as_deref() == Some("agents-conformance") {
398 if args.selection.is_some() {
399 command_error(
400 "`harn test agents-conformance` does not accept a second positional target; use --category instead",
401 );
402 }
403 if args.evals || args.determinism || args.record || args.replay || args.watch {
404 command_error(
405 "`harn test agents-conformance` cannot be combined with --evals, --determinism, --record, --replay, or --watch",
406 );
407 }
408 let Some(target_url) = args.agents_target.clone() else {
409 command_error("`harn test agents-conformance` requires --target <url>");
410 };
411 commands::agents_conformance::run_agents_conformance(
412 commands::agents_conformance::AgentsConformanceConfig {
413 target_url,
414 api_key: args.agents_api_key.clone(),
415 categories: args.agents_category.clone(),
416 timeout_ms: args.timeout,
417 verbose: args.verbose,
418 json: args.json,
419 json_out: args.json_out.clone(),
420 workspace_id: args.agents_workspace_id.clone(),
421 session_id: args.agents_session_id.clone(),
422 },
423 )
424 .await;
425 return;
426 }
427 if args.target.as_deref() == Some("protocols") {
428 if args.evals || args.determinism || args.record || args.replay || args.watch {
429 command_error(
430 "`harn test protocols` cannot be combined with --evals, --determinism, --record, --replay, or --watch",
431 );
432 }
433 if args.junit.is_some()
434 || args.agents_target.is_some()
435 || args.agents_api_key.is_some()
436 || !args.agents_category.is_empty()
437 || args.json
438 || args.json_out.is_some()
439 || args.agents_workspace_id.is_some()
440 || args.agents_session_id.is_some()
441 || args.parallel
442 || !args.skill_dir.is_empty()
443 {
444 command_error(
445 "`harn test protocols` accepts only --filter, --verbose, --timing, and an optional fixture selection",
446 );
447 }
448 commands::protocol_conformance::run_protocol_conformance(
449 args.selection.as_deref(),
450 args.filter.as_deref(),
451 args.verbose || args.timing,
452 );
453 return;
454 }
455 if args.evals {
456 if args.determinism || args.record || args.replay || args.watch {
457 command_error("--evals cannot be combined with --determinism, --record, --replay, or --watch");
458 }
459 if args.target.as_deref() != Some("package") || args.selection.is_some() {
460 command_error("package evals are run with `harn test package --evals`");
461 }
462 run_package_evals();
463 } else if args.determinism {
464 if args.watch {
465 command_error("--determinism cannot be combined with --watch");
466 }
467 if args.record || args.replay {
468 command_error("--determinism manages its own record/replay cycle");
469 }
470 if let Some(t) = args.target.as_deref() {
471 if t == "conformance" {
472 commands::test::run_conformance_determinism_tests(
473 t,
474 args.selection.as_deref(),
475 args.filter.as_deref(),
476 args.timeout,
477 )
478 .await;
479 } else if args.selection.is_some() {
480 command_error(
481 "only `harn test conformance` accepts a second positional target",
482 );
483 } else {
484 commands::test::run_determinism_tests(
485 t,
486 args.filter.as_deref(),
487 args.timeout,
488 )
489 .await;
490 }
491 } else {
492 let test_dir = if PathBuf::from("tests").is_dir() {
493 "tests".to_string()
494 } else {
495 command_error("no path specified and no tests/ directory found");
496 };
497 if args.selection.is_some() {
498 command_error(
499 "only `harn test conformance` accepts a second positional target",
500 );
501 }
502 commands::test::run_determinism_tests(
503 &test_dir,
504 args.filter.as_deref(),
505 args.timeout,
506 )
507 .await;
508 }
509 } else {
510 if args.record {
511 harn_vm::llm::set_replay_mode(
512 harn_vm::llm::LlmReplayMode::Record,
513 ".harn-fixtures",
514 );
515 } else if args.replay {
516 harn_vm::llm::set_replay_mode(
517 harn_vm::llm::LlmReplayMode::Replay,
518 ".harn-fixtures",
519 );
520 }
521
522 if let Some(t) = args.target.as_deref() {
523 if t == "conformance" {
524 commands::test::run_conformance_tests(
525 t,
526 args.selection.as_deref(),
527 args.filter.as_deref(),
528 args.junit.as_deref(),
529 args.timeout,
530 args.verbose,
531 args.timing,
532 args.differential_optimizations,
533 )
534 .await;
535 } else if args.selection.is_some() {
536 command_error(
537 "only `harn test conformance` accepts a second positional target",
538 );
539 } else if args.watch {
540 commands::test::run_watch_tests(
541 t,
542 args.filter.as_deref(),
543 args.timeout,
544 args.parallel,
545 )
546 .await;
547 } else {
548 commands::test::run_user_tests(
549 t,
550 args.filter.as_deref(),
551 args.timeout,
552 args.parallel,
553 )
554 .await;
555 }
556 } else {
557 let test_dir = if PathBuf::from("tests").is_dir() {
558 "tests".to_string()
559 } else {
560 command_error("no path specified and no tests/ directory found");
561 };
562 if args.selection.is_some() {
563 command_error(
564 "only `harn test conformance` accepts a second positional target",
565 );
566 }
567 if args.watch {
568 commands::test::run_watch_tests(
569 &test_dir,
570 args.filter.as_deref(),
571 args.timeout,
572 args.parallel,
573 )
574 .await;
575 } else {
576 commands::test::run_user_tests(
577 &test_dir,
578 args.filter.as_deref(),
579 args.timeout,
580 args.parallel,
581 )
582 .await;
583 }
584 }
585 }
586 }
587 Command::Init(args) => commands::init::init_project(args.name.as_deref(), args.template),
588 Command::New(args) => match commands::init::resolve_new_args(&args) {
589 Ok((name, template)) => commands::init::init_project(name.as_deref(), template),
590 Err(error) => {
591 eprintln!("error: {error}");
592 process::exit(1);
593 }
594 },
595 Command::Doctor(args) => {
596 commands::doctor::run_doctor_with_options(commands::doctor::DoctorOptions {
597 network: !args.no_network,
598 json: args.json,
599 })
600 .await
601 }
602 Command::Models(args) => commands::models::run(args).await,
603 Command::Local(args) => commands::local::run(args).await,
604 Command::Providers(args) => match args.command {
605 ProvidersCommand::Refresh(refresh) => {
606 if let Err(error) = commands::providers::run_refresh(&refresh).await {
607 command_error(&error);
608 }
609 }
610 ProvidersCommand::Validate(validate) => {
611 if let Err(error) = commands::providers::run_validate(&validate) {
612 command_error(&error);
613 }
614 }
615 ProvidersCommand::Export(export) => {
616 if let Err(error) = commands::providers::run_export(&export) {
617 command_error(&error);
618 }
619 }
620 },
621 Command::Try(args) => commands::try_cmd::run(args).await,
622 Command::Quickstart(args) => {
623 if let Err(error) = commands::quickstart::run_quickstart(&args).await {
624 command_error(&error);
625 }
626 }
627 Command::Serve(args) => match args.command {
628 ServeCommand::Acp(args) => {
629 if let Err(error) = commands::serve::run_acp_server(&args).await {
630 command_error(&error);
631 }
632 }
633 ServeCommand::A2a(args) => {
634 if let Err(error) = commands::serve::run_a2a_server(&args).await {
635 command_error(&error);
636 }
637 }
638 ServeCommand::Api(args) => {
639 if let Err(error) = commands::serve::run_api_server(&args).await {
640 command_error(&error);
641 }
642 }
643 ServeCommand::Mcp(args) => {
644 if let Err(error) = commands::serve::run_mcp_server(&args).await {
645 command_error(&error);
646 }
647 }
648 },
649 Command::Connector(args) => {
650 if let Err(error) = commands::connector::handle_connector_command(args).await {
651 eprintln!("error: {error}");
652 process::exit(1);
653 }
654 }
655 Command::Mcp(args) => commands::mcp::handle_mcp_command(&args.command).await,
656 Command::Watch(args) => {
657 let denied =
658 commands::run::build_denied_builtins(args.deny.as_deref(), args.allow.as_deref());
659 commands::run::run_watch(&args.file, denied).await;
660 }
661 Command::Portal(args) => {
662 commands::portal::run_portal(
663 &args.dir,
664 args.manifest,
665 args.persona_state_dir,
666 &args.host,
667 args.port,
668 args.open,
669 args.allow_remote_launch,
670 )
671 .await
672 }
673 Command::Trigger(args) => {
674 if let Err(error) = commands::trigger::handle(args).await {
675 eprintln!("error: {error}");
676 process::exit(1);
677 }
678 }
679 Command::Flow(args) => match commands::flow::run_flow(&args) {
680 Ok(code) => {
681 if code != 0 {
682 process::exit(code);
683 }
684 }
685 Err(error) => command_error(&error),
686 },
687 Command::Workflow(args) => match commands::workflow::handle(args) {
688 Ok(code) => {
689 if code != 0 {
690 process::exit(code);
691 }
692 }
693 Err(error) => command_error(&error),
694 },
695 Command::Supervisor(args) => {
696 if let Err(error) = commands::supervisor::handle(args).await {
697 eprintln!("error: {error}");
698 process::exit(1);
699 }
700 }
701 Command::Trace(args) => {
702 if let Err(error) = commands::trace::handle(args).await {
703 eprintln!("error: {error}");
704 process::exit(1);
705 }
706 }
707 Command::Crystallize(args) => {
708 if let Err(error) = commands::crystallize::run(args) {
709 eprintln!("error: {error}");
710 process::exit(1);
711 }
712 }
713 Command::Trust(args) | Command::TrustGraph(args) => {
714 if let Err(error) = commands::trust::handle(args).await {
715 eprintln!("error: {error}");
716 process::exit(1);
717 }
718 }
719 Command::Verify(args) => {
720 if let Err(error) = verify_provenance_receipt(&args.receipt, args.json) {
721 eprintln!("error: {error}");
722 process::exit(1);
723 }
724 }
725 Command::Completions(args) => print_completions(args.shell),
726 Command::Orchestrator(args) => {
727 if let Err(error) = commands::orchestrator::handle(args).await {
728 eprintln!("error: {error}");
729 process::exit(1);
730 }
731 }
732 Command::Playground(args) => {
733 provider_bootstrap::maybe_seed_ollama_for_playground(
734 Path::new(&args.host),
735 Path::new(&args.script),
736 args.yes,
737 args.llm.is_some(),
738 args.llm_mock.is_some(),
739 )
740 .await;
741 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
742 commands::run::CliLlmMockMode::Replay {
743 fixture_path: PathBuf::from(path),
744 }
745 } else if let Some(path) = args.llm_mock_record.as_ref() {
746 commands::run::CliLlmMockMode::Record {
747 fixture_path: PathBuf::from(path),
748 }
749 } else {
750 commands::run::CliLlmMockMode::Off
751 };
752 if let Err(error) = commands::playground::run_command(args, llm_mock_mode).await {
753 eprint!("{error}");
754 process::exit(1);
755 }
756 }
757 Command::Runs(args) => match args.command {
758 RunsCommand::Inspect(inspect) => {
759 inspect_run_record(&inspect.path, inspect.compare.as_deref())
760 }
761 },
762 Command::Session(args) => commands::session::run(args),
763 Command::Replay(args) => replay_run_record(&args.path),
764 Command::Eval(args) => {
765 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
766 commands::run::CliLlmMockMode::Replay {
767 fixture_path: PathBuf::from(path),
768 }
769 } else if let Some(path) = args.llm_mock_record.as_ref() {
770 commands::run::CliLlmMockMode::Record {
771 fixture_path: PathBuf::from(path),
772 }
773 } else {
774 commands::run::CliLlmMockMode::Off
775 };
776 eval_run_record(
777 &args.path,
778 args.compare.as_deref(),
779 args.structural_experiment.as_deref(),
780 &args.argv,
781 &llm_mock_mode,
782 )
783 }
784 Command::Repl => commands::repl::run_repl().await,
785 Command::Bench(args) => commands::bench::run(args).await,
786 Command::TestBench(args) => commands::test_bench::run(args.command).await,
787 Command::Viz(args) => commands::viz::run_viz(&args.file, args.output.as_deref()),
788 Command::Install(args) => package::install_packages(
789 args.frozen || args.locked || args.offline,
790 args.refetch.as_deref(),
791 args.offline,
792 args.json,
793 ),
794 Command::Add(args) => package::add_package_with_registry(
795 &args.name_or_spec,
796 args.alias.as_deref(),
797 args.git.as_deref(),
798 args.tag.as_deref(),
799 args.rev.as_deref(),
800 args.branch.as_deref(),
801 args.path.as_deref(),
802 args.registry.as_deref(),
803 ),
804 Command::Update(args) => {
805 package::update_packages(args.alias.as_deref(), args.all, args.json)
806 }
807 Command::Remove(args) => package::remove_package(&args.alias),
808 Command::Lock => package::lock_packages(),
809 Command::Package(args) => match args.command {
810 PackageCommand::List(list) => package::list_packages(list.json),
811 PackageCommand::Doctor(doctor) => package::doctor_packages(doctor.json),
812 PackageCommand::Search(search) => package::search_package_registry(
813 search.query.as_deref(),
814 search.registry.as_deref(),
815 search.json,
816 ),
817 PackageCommand::Info(info) => {
818 package::show_package_registry_info(&info.name, info.registry.as_deref(), info.json)
819 }
820 PackageCommand::Check(check) => {
821 package::check_package(check.package.as_deref(), check.json)
822 }
823 PackageCommand::Pack(pack) => package::pack_package(
824 pack.package.as_deref(),
825 pack.output.as_deref(),
826 pack.dry_run,
827 pack.json,
828 ),
829 PackageCommand::Docs(docs) => package::generate_package_docs(
830 docs.package.as_deref(),
831 docs.output.as_deref(),
832 docs.check,
833 ),
834 PackageCommand::Cache(cache) => match cache.command {
835 PackageCacheCommand::List => package::list_package_cache(),
836 PackageCacheCommand::Clean(clean) => package::clean_package_cache(clean.all),
837 PackageCacheCommand::Verify(verify) => {
838 package::verify_package_cache(verify.materialized)
839 }
840 },
841 PackageCommand::Outdated(args) => package::outdated_packages(
842 args.refresh,
843 args.remote,
844 args.registry.as_deref(),
845 args.json,
846 ),
847 PackageCommand::Audit(args) => {
848 package::audit_packages(args.registry.as_deref(), args.skip_materialized, args.json)
849 }
850 PackageCommand::Artifacts(args) => match args.command {
851 PackageArtifactsCommand::Manifest(manifest) => {
852 package::artifacts_manifest(manifest.output.as_deref())
853 }
854 PackageArtifactsCommand::Check(check) => {
855 package::artifacts_check(&check.manifest, check.json)
856 }
857 },
858 },
859 Command::Publish(args) => package::publish_package(
860 args.package.as_deref(),
861 args.dry_run,
862 args.registry.as_deref(),
863 args.json,
864 ),
865 Command::MergeCaptain(args) => match args.command {
866 MergeCaptainCommand::Run(run) => {
867 let code = commands::merge_captain::run_driver(&run);
868 if code != 0 {
869 process::exit(code);
870 }
871 }
872 MergeCaptainCommand::Ladder(ladder) => {
873 let code = commands::merge_captain::run_ladder(&ladder);
874 if code != 0 {
875 process::exit(code);
876 }
877 }
878 MergeCaptainCommand::Iterate(iterate) => {
879 let code = commands::merge_captain::run_iterate(&iterate);
880 if code != 0 {
881 process::exit(code);
882 }
883 }
884 MergeCaptainCommand::Audit(audit) => {
885 let code = commands::merge_captain::run_audit(&audit);
886 if code != 0 {
887 process::exit(code);
888 }
889 }
890 MergeCaptainCommand::Mock(mock) => {
891 let code = match mock {
892 MergeCaptainMockCommand::Init(args) => {
893 commands::merge_captain_mock::run_init(&args)
894 }
895 MergeCaptainMockCommand::Step(args) => {
896 commands::merge_captain_mock::run_step(&args)
897 }
898 MergeCaptainMockCommand::Status(args) => {
899 commands::merge_captain_mock::run_status(&args)
900 }
901 MergeCaptainMockCommand::Serve(args) => {
902 commands::merge_captain_mock::run_serve(&args).await
903 }
904 MergeCaptainMockCommand::Cleanup(args) => {
905 commands::merge_captain_mock::run_cleanup(&args)
906 }
907 MergeCaptainMockCommand::Scenarios => {
908 commands::merge_captain_mock::run_scenarios()
909 }
910 };
911 if code != 0 {
912 process::exit(code);
913 }
914 }
915 },
916 Command::Persona(args) => match args.command {
917 PersonaCommand::New(new) => {
918 if let Err(error) = commands::persona_scaffold::run_new(&new) {
919 eprintln!("error: {error}");
920 process::exit(1);
921 }
922 }
923 PersonaCommand::Doctor(doctor) => {
924 if let Err(error) =
925 commands::persona_doctor::run_doctor(args.manifest.as_deref(), &doctor).await
926 {
927 eprintln!("error: {error}");
928 process::exit(1);
929 }
930 }
931 PersonaCommand::Check(check) => {
932 commands::persona::run_check(args.manifest.as_deref(), &check)
933 }
934 PersonaCommand::List(list) => {
935 commands::persona::run_list(args.manifest.as_deref(), &list)
936 }
937 PersonaCommand::Inspect(inspect) => {
938 commands::persona::run_inspect(args.manifest.as_deref(), &inspect)
939 }
940 PersonaCommand::Status(status) => {
941 if let Err(error) = commands::persona::run_status(
942 args.manifest.as_deref(),
943 &args.state_dir,
944 &status,
945 )
946 .await
947 {
948 eprintln!("error: {error}");
949 process::exit(1);
950 }
951 }
952 PersonaCommand::Pause(control) => {
953 if let Err(error) = commands::persona::run_pause(
954 args.manifest.as_deref(),
955 &args.state_dir,
956 &control,
957 )
958 .await
959 {
960 eprintln!("error: {error}");
961 process::exit(1);
962 }
963 }
964 PersonaCommand::Resume(control) => {
965 if let Err(error) = commands::persona::run_resume(
966 args.manifest.as_deref(),
967 &args.state_dir,
968 &control,
969 )
970 .await
971 {
972 eprintln!("error: {error}");
973 process::exit(1);
974 }
975 }
976 PersonaCommand::Disable(control) => {
977 if let Err(error) = commands::persona::run_disable(
978 args.manifest.as_deref(),
979 &args.state_dir,
980 &control,
981 )
982 .await
983 {
984 eprintln!("error: {error}");
985 process::exit(1);
986 }
987 }
988 PersonaCommand::Tick(tick) => {
989 if let Err(error) =
990 commands::persona::run_tick(args.manifest.as_deref(), &args.state_dir, &tick)
991 .await
992 {
993 eprintln!("error: {error}");
994 process::exit(1);
995 }
996 }
997 PersonaCommand::Trigger(trigger) => {
998 if let Err(error) = commands::persona::run_trigger(
999 args.manifest.as_deref(),
1000 &args.state_dir,
1001 &trigger,
1002 )
1003 .await
1004 {
1005 eprintln!("error: {error}");
1006 process::exit(1);
1007 }
1008 }
1009 PersonaCommand::Spend(spend) => {
1010 if let Err(error) =
1011 commands::persona::run_spend(args.manifest.as_deref(), &args.state_dir, &spend)
1012 .await
1013 {
1014 eprintln!("error: {error}");
1015 process::exit(1);
1016 }
1017 }
1018 PersonaCommand::Supervision(supervision) => match supervision.command {
1019 PersonaSupervisionCommand::Tail(tail) => {
1020 if let Err(error) = commands::persona_supervision::run_tail(
1021 args.manifest.as_deref(),
1022 &args.state_dir,
1023 &tail,
1024 )
1025 .await
1026 {
1027 eprintln!("error: {error}");
1028 process::exit(1);
1029 }
1030 }
1031 },
1032 },
1033 Command::ModelInfo(args) => {
1034 if !print_model_info(&args).await {
1035 process::exit(1);
1036 }
1037 }
1038 Command::ProviderCatalog(args) => print_provider_catalog(args.available_only),
1039 Command::ProviderReady(args) => {
1040 run_provider_ready(
1041 &args.provider,
1042 args.model.as_deref(),
1043 args.base_url.as_deref(),
1044 args.json,
1045 )
1046 .await
1047 }
1048 Command::ProviderProbe(args) => commands::provider::run_provider_probe(args).await,
1049 Command::ProviderToolProbe(args) => commands::provider::run_provider_tool_probe(args).await,
1050 Command::Skills(args) => match args.command {
1051 SkillsCommand::List(list) => commands::skills::run_list(&list),
1052 SkillsCommand::Inspect(inspect) => commands::skills::run_inspect(&inspect),
1053 SkillsCommand::Match(matcher) => commands::skills::run_match(&matcher),
1054 SkillsCommand::Install(install) => commands::skills::run_install(&install),
1055 SkillsCommand::New(new_args) => commands::skills::run_new(&new_args),
1056 },
1057 Command::Tool(args) => match args.command {
1058 ToolCommand::New(new_args) => {
1059 if let Err(error) = commands::tool::run_new(&new_args) {
1060 eprintln!("error: {error}");
1061 process::exit(1);
1062 }
1063 }
1064 },
1065 Command::DumpHighlightKeywords(args) => {
1066 commands::dump_highlight_keywords::run(&args.output, args.check);
1067 }
1068 Command::DumpTriggerQuickref(args) => {
1069 commands::dump_trigger_quickref::run(&args.output, args.check);
1070 }
1071 Command::DumpConnectorMatrix(args) => {
1072 commands::check::connector_matrix::run_docs(&args.output, &args.sources, args.check);
1073 }
1074 Command::DumpProtocolArtifacts(args) => {
1075 commands::dump_protocol_artifacts::run(&args.output_dir, args.check);
1076 }
1077 }
1078}
1079
1080fn run_profile_options(args: &cli::ProfileArgs) -> commands::run::RunProfileOptions {
1081 commands::run::RunProfileOptions {
1082 text: args.text,
1083 json_path: args.json_path.clone(),
1084 }
1085}
1086
1087fn print_completions(shell: CompletionShell) {
1088 let mut command = Cli::command();
1089 let shell = clap_complete::Shell::from(shell);
1090 clap_complete::generate(shell, &mut command, "harn", &mut std::io::stdout());
1091}
1092
1093fn normalize_serve_args(mut raw_args: Vec<String>) -> Vec<String> {
1094 if raw_args.len() > 2
1095 && raw_args.get(1).is_some_and(|arg| arg == "serve")
1096 && !matches!(
1097 raw_args.get(2).map(String::as_str),
1098 Some("acp" | "a2a" | "api" | "mcp" | "-h" | "--help")
1099 )
1100 {
1101 raw_args.insert(2, "a2a".to_string());
1102 }
1103 raw_args
1104}
1105
1106fn print_version() {
1107 println!(
1108 r#"
1109 ╱▔▔╲
1110 ╱ ╲ harn v{}
1111 │ ◆ │ the agent harness language
1112 │ │
1113 ╰──╯╱
1114 ╱╱
1115"#,
1116 env!("CARGO_PKG_VERSION")
1117 );
1118}
1119
1120async fn print_model_info(args: &ModelInfoArgs) -> bool {
1121 let resolved = harn_vm::llm_config::resolve_model_info(&args.model);
1122 let api_key_result = harn_vm::llm::resolve_api_key(&resolved.provider);
1123 let api_key_set = api_key_result.is_ok();
1124 let api_key = api_key_result.unwrap_or_default();
1125 let context_window =
1126 harn_vm::llm::fetch_provider_max_context(&resolved.provider, &resolved.id, &api_key).await;
1127 let readiness = local_openai_readiness(&resolved.provider, &resolved.id, &api_key).await;
1128 let catalog = harn_vm::llm_config::model_catalog_entry(&resolved.id);
1129 let runtime_context_window = catalog
1130 .as_ref()
1131 .and_then(|entry| entry.runtime_context_window);
1132 let capabilities = harn_vm::llm::capabilities::lookup(&resolved.provider, &resolved.id);
1133 let mut payload = serde_json::json!({
1134 "alias": args.model,
1135 "id": resolved.id,
1136 "provider": resolved.provider,
1137 "resolved_alias": resolved.alias,
1138 "tool_format": resolved.tool_format,
1139 "tier": resolved.tier,
1140 "api_key_set": api_key_set,
1141 "context_window": context_window,
1142 "runtime_context_window": runtime_context_window,
1143 "readiness": readiness,
1144 "catalog": catalog,
1145 "capabilities": {
1146 "native_tools": capabilities.native_tools,
1147 "defer_loading": capabilities.defer_loading,
1148 "tool_search": capabilities.tool_search,
1149 "max_tools": capabilities.max_tools,
1150 "prompt_caching": capabilities.prompt_caching,
1151 "vision": capabilities.vision,
1152 "vision_supported": capabilities.vision_supported,
1153 "audio": capabilities.audio,
1154 "pdf": capabilities.pdf,
1155 "files_api_supported": capabilities.files_api_supported,
1156 "json_schema": capabilities.json_schema,
1157 "thinking": !capabilities.thinking_modes.is_empty(),
1158 "thinking_modes": capabilities.thinking_modes,
1159 "interleaved_thinking_supported": capabilities.interleaved_thinking_supported,
1160 "anthropic_beta_features": capabilities.anthropic_beta_features,
1161 "preserve_thinking": capabilities.preserve_thinking,
1162 "server_parser": capabilities.server_parser,
1163 "honors_chat_template_kwargs": capabilities.honors_chat_template_kwargs,
1164 "recommended_endpoint": capabilities.recommended_endpoint,
1165 "text_tool_wire_format_supported": capabilities.text_tool_wire_format_supported,
1166 },
1167 "qc_default_model": harn_vm::llm_config::qc_default_model(&resolved.provider),
1168 });
1169
1170 let should_verify = args.verify || args.warm;
1171 let mut ok = true;
1172 if should_verify {
1173 if resolved.provider == "ollama" {
1174 let mut readiness = harn_vm::llm::OllamaReadinessOptions::new(resolved.id.clone());
1175 readiness.warm = args.warm;
1176 readiness.observe_loaded = true;
1177 readiness.keep_alive = args
1178 .keep_alive
1179 .as_deref()
1180 .and_then(harn_vm::llm::normalize_ollama_keep_alive);
1181 let result = harn_vm::llm::ollama_readiness(readiness).await;
1182 ok = result.valid;
1183 payload["readiness"] = serde_json::to_value(&result).unwrap_or_else(|error| {
1184 serde_json::json!({
1185 "valid": false,
1186 "status": "serialization_error",
1187 "message": format!("failed to serialize readiness result: {error}"),
1188 })
1189 });
1190 } else {
1191 ok = false;
1192 payload["readiness"] = serde_json::json!({
1193 "valid": false,
1194 "status": "unsupported_provider",
1195 "message": format!(
1196 "model-info --verify is only supported for Ollama models; resolved provider is '{}'",
1197 resolved.provider
1198 ),
1199 "provider": resolved.provider,
1200 });
1201 }
1202 }
1203
1204 println!(
1205 "{}",
1206 serde_json::to_string(&payload).unwrap_or_else(|error| {
1207 command_error(&format!("failed to serialize model info: {error}"))
1208 })
1209 );
1210 ok
1211}
1212
1213async fn local_openai_readiness(
1214 provider: &str,
1215 model: &str,
1216 api_key: &str,
1217) -> Option<serde_json::Value> {
1218 let def = harn_vm::llm_config::provider_config(provider)?;
1219 if def.auth_style != "none" || !harn_vm::llm::supports_model_readiness_probe(&def) {
1220 return None;
1221 }
1222 let readiness = harn_vm::llm::probe_openai_compatible_model(provider, model, api_key).await;
1223 Some(serde_json::json!({
1224 "valid": readiness.valid,
1225 "category": readiness.category,
1226 "message": readiness.message,
1227 "provider": readiness.provider,
1228 "model": readiness.model,
1229 "url": readiness.url,
1230 "status": readiness.status,
1231 "available_models": readiness.available_models,
1232 }))
1233}
1234
1235fn print_provider_catalog(available_only: bool) {
1236 let provider_names = if available_only {
1237 harn_vm::llm_config::available_provider_names()
1238 } else {
1239 harn_vm::llm_config::provider_names()
1240 };
1241 let providers: Vec<_> = provider_names
1242 .into_iter()
1243 .filter_map(|name| {
1244 harn_vm::llm_config::provider_config(&name).map(|def| {
1245 serde_json::json!({
1246 "name": name,
1247 "display_name": def.display_name,
1248 "icon": def.icon,
1249 "base_url": harn_vm::llm_config::resolve_base_url(&def),
1250 "base_url_env": def.base_url_env,
1251 "auth_style": def.auth_style,
1252 "auth_envs": harn_vm::llm_config::auth_env_names(&def.auth_env),
1253 "auth_available": harn_vm::llm_config::provider_key_available(&name),
1254 "features": def.features,
1255 "cost_per_1k_in": def.cost_per_1k_in,
1256 "cost_per_1k_out": def.cost_per_1k_out,
1257 "latency_p50_ms": def.latency_p50_ms,
1258 })
1259 })
1260 })
1261 .collect();
1262 let models: Vec<_> = harn_vm::llm_config::model_catalog_entries()
1263 .into_iter()
1264 .map(|(id, model)| {
1265 serde_json::json!({
1266 "id": id,
1267 "name": model.name,
1268 "provider": model.provider,
1269 "context_window": model.context_window,
1270 "runtime_context_window": model.runtime_context_window,
1271 "stream_timeout": model.stream_timeout,
1272 "capabilities": model.capabilities,
1273 "pricing": model.pricing,
1274 })
1275 })
1276 .collect();
1277 let aliases: Vec<_> = harn_vm::llm_config::alias_entries()
1278 .into_iter()
1279 .map(|(name, alias)| {
1280 serde_json::json!({
1281 "name": name,
1282 "id": alias.id,
1283 "provider": alias.provider,
1284 "tool_format": alias.tool_format,
1285 "tool_calling": harn_vm::llm_config::alias_tool_calling_entry(&name),
1286 })
1287 })
1288 .collect();
1289 let payload = serde_json::json!({
1290 "providers": providers,
1291 "known_model_names": harn_vm::llm_config::known_model_names(),
1292 "available_providers": harn_vm::llm_config::available_provider_names(),
1293 "aliases": aliases,
1294 "models": models,
1295 "qc_defaults": harn_vm::llm_config::qc_defaults(),
1296 });
1297 println!(
1298 "{}",
1299 serde_json::to_string(&payload).unwrap_or_else(|error| {
1300 command_error(&format!("failed to serialize provider catalog: {error}"))
1301 })
1302 );
1303}
1304
1305async fn run_provider_ready(
1306 provider: &str,
1307 model: Option<&str>,
1308 base_url: Option<&str>,
1309 json: bool,
1310) {
1311 let readiness =
1312 harn_vm::llm::readiness::probe_provider_readiness(provider, model, base_url).await;
1313 if json {
1314 match serde_json::to_string_pretty(&readiness) {
1315 Ok(payload) => println!("{payload}"),
1316 Err(error) => command_error(&format!("failed to serialize readiness result: {error}")),
1317 }
1318 } else if readiness.ok {
1319 println!("{}", readiness.message);
1320 } else {
1321 eprintln!("{}", readiness.message);
1322 }
1323 if !readiness.ok {
1324 process::exit(1);
1325 }
1326}
1327
1328fn command_error(message: &str) -> ! {
1329 Cli::command()
1330 .error(ErrorKind::ValueValidation, message)
1331 .exit()
1332}
1333
1334fn verify_provenance_receipt(path: &str, json: bool) -> Result<(), String> {
1335 let raw =
1336 fs::read_to_string(path).map_err(|error| format!("failed to read {path}: {error}"))?;
1337 let receipt: harn_vm::ProvenanceReceipt = serde_json::from_str(&raw)
1338 .map_err(|error| format!("failed to parse provenance receipt {path}: {error}"))?;
1339 let report = harn_vm::verify_receipt(&receipt);
1340 if json {
1341 println!(
1342 "{}",
1343 serde_json::to_string_pretty(&report).map_err(|error| error.to_string())?
1344 );
1345 } else if report.verified {
1346 println!(
1347 "verified receipt={} events={} receipt_hash={} event_root_hash={}",
1348 report.receipt_id.unwrap_or_else(|| "-".to_string()),
1349 report.event_count,
1350 report.receipt_hash.unwrap_or_else(|| "-".to_string()),
1351 report.event_root_hash.unwrap_or_else(|| "-".to_string())
1352 );
1353 } else {
1354 println!(
1355 "failed receipt={} events={}",
1356 report.receipt_id.unwrap_or_else(|| "-".to_string()),
1357 report.event_count
1358 );
1359 for error in &report.errors {
1360 println!(" {error}");
1361 }
1362 return Err("provenance receipt verification failed".to_string());
1363 }
1364 Ok(())
1365}
1366
1367fn load_run_record_or_exit(path: &Path) -> harn_vm::orchestration::RunRecord {
1368 match harn_vm::orchestration::load_run_record(path) {
1369 Ok(run) => run,
1370 Err(error) => {
1371 eprintln!("Failed to load run record: {error}");
1372 process::exit(1);
1373 }
1374 }
1375}
1376
1377fn load_eval_suite_manifest_or_exit(path: &Path) -> harn_vm::orchestration::EvalSuiteManifest {
1378 harn_vm::orchestration::load_eval_suite_manifest(path).unwrap_or_else(|error| {
1379 eprintln!("Failed to load eval manifest {}: {error}", path.display());
1380 process::exit(1);
1381 })
1382}
1383
1384fn load_eval_pack_manifest_or_exit(path: &Path) -> harn_vm::orchestration::EvalPackManifest {
1385 harn_vm::orchestration::load_eval_pack_manifest(path).unwrap_or_else(|error| {
1386 eprintln!("Failed to load eval pack {}: {error}", path.display());
1387 process::exit(1);
1388 })
1389}
1390
1391fn load_persona_eval_ladder_manifest_or_exit(
1392 path: &Path,
1393) -> harn_vm::orchestration::PersonaEvalLadderManifest {
1394 harn_vm::orchestration::load_persona_eval_ladder_manifest(path).unwrap_or_else(|error| {
1395 eprintln!(
1396 "Failed to load persona eval ladder {}: {error}",
1397 path.display()
1398 );
1399 process::exit(1);
1400 })
1401}
1402
1403fn file_looks_like_eval_manifest(path: &Path) -> bool {
1404 if path.file_name().and_then(|name| name.to_str()) == Some("harn.eval.toml") {
1405 return true;
1406 }
1407 if path.extension().and_then(|ext| ext.to_str()) == Some("toml") {
1408 let Ok(content) = fs::read_to_string(path) else {
1409 return false;
1410 };
1411 return toml::from_str::<harn_vm::orchestration::EvalPackManifest>(&content)
1412 .is_ok_and(|manifest| !manifest.cases.is_empty() || !manifest.ladders.is_empty());
1413 }
1414 let Ok(content) = fs::read_to_string(path) else {
1415 return false;
1416 };
1417 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1418 return false;
1419 };
1420 json.get("_type").and_then(|value| value.as_str()) == Some("eval_suite_manifest")
1421 || json.get("cases").is_some()
1422}
1423
1424fn file_looks_like_eval_pack_manifest(path: &Path) -> bool {
1425 if path.file_name().and_then(|name| name.to_str()) == Some("harn.eval.toml") {
1426 return true;
1427 }
1428 if path.extension().and_then(|ext| ext.to_str()) == Some("toml") {
1429 return file_looks_like_eval_manifest(path);
1430 }
1431 let Ok(content) = fs::read_to_string(path) else {
1432 return false;
1433 };
1434 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1435 return false;
1436 };
1437 json.get("version").is_some()
1438 && (json.get("cases").is_some() || json.get("ladders").is_some())
1439 && json.get("_type").and_then(|value| value.as_str()) != Some("eval_suite_manifest")
1440}
1441
1442fn file_looks_like_persona_eval_ladder_manifest(path: &Path) -> bool {
1443 let Ok(content) = fs::read_to_string(path) else {
1444 return false;
1445 };
1446 if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
1447 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1448 return false;
1449 };
1450 return json.get("_type").and_then(|value| value.as_str())
1451 == Some("persona_eval_ladder_manifest")
1452 || json.get("timeout_tiers").is_some()
1453 || json.get("timeout-tiers").is_some();
1454 }
1455 toml::from_str::<harn_vm::orchestration::PersonaEvalLadderManifest>(&content).is_ok_and(
1456 |manifest| {
1457 manifest
1458 .type_name
1459 .eq_ignore_ascii_case("persona_eval_ladder_manifest")
1460 || (!manifest.timeout_tiers.is_empty() && manifest.backend.path.is_some())
1461 },
1462 )
1463}
1464
1465fn collect_run_record_paths(path: &str) -> Vec<PathBuf> {
1466 let path = Path::new(path);
1467 if path.is_file() {
1468 return vec![path.to_path_buf()];
1469 }
1470 if path.is_dir() {
1471 let mut entries: Vec<PathBuf> = fs::read_dir(path)
1472 .unwrap_or_else(|error| {
1473 eprintln!("Failed to read run directory {}: {error}", path.display());
1474 process::exit(1);
1475 })
1476 .filter_map(|entry| entry.ok().map(|entry| entry.path()))
1477 .filter(|entry| entry.extension().and_then(|ext| ext.to_str()) == Some("json"))
1478 .collect();
1479 entries.sort();
1480 return entries;
1481 }
1482 eprintln!("Run path does not exist: {}", path.display());
1483 process::exit(1);
1484}
1485
1486fn print_run_diff(diff: &harn_vm::orchestration::RunDiffReport) {
1487 println!(
1488 "Diff: {} -> {} [{} -> {}]",
1489 diff.left_run_id, diff.right_run_id, diff.left_status, diff.right_status
1490 );
1491 println!("Identical: {}", diff.identical);
1492 println!("Stage diffs: {}", diff.stage_diffs.len());
1493 println!("Tool diffs: {}", diff.tool_diffs.len());
1494 println!("Observability diffs: {}", diff.observability_diffs.len());
1495 println!("Transition delta: {}", diff.transition_count_delta);
1496 println!("Artifact delta: {}", diff.artifact_count_delta);
1497 println!("Checkpoint delta: {}", diff.checkpoint_count_delta);
1498 for stage in &diff.stage_diffs {
1499 println!("- {} [{}]", stage.node_id, stage.change);
1500 for detail in &stage.details {
1501 println!(" {}", detail);
1502 }
1503 }
1504 for tool in &diff.tool_diffs {
1505 println!("- tool {} [{}]", tool.tool_name, tool.args_hash);
1506 println!(" left: {:?}", tool.left_result);
1507 println!(" right: {:?}", tool.right_result);
1508 }
1509 for item in &diff.observability_diffs {
1510 println!("- {} [{}]", item.label, item.section);
1511 for detail in &item.details {
1512 println!(" {}", detail);
1513 }
1514 }
1515}
1516
1517fn inspect_run_record(path: &str, compare: Option<&str>) {
1518 let run = load_run_record_or_exit(Path::new(path));
1519 println!("Run: {}", run.id);
1520 println!(
1521 "Workflow: {}",
1522 run.workflow_name
1523 .clone()
1524 .unwrap_or_else(|| run.workflow_id.clone())
1525 );
1526 println!("Status: {}", run.status);
1527 println!("Task: {}", run.task);
1528 println!("Stages: {}", run.stages.len());
1529 println!("Artifacts: {}", run.artifacts.len());
1530 println!("Transitions: {}", run.transitions.len());
1531 println!("Checkpoints: {}", run.checkpoints.len());
1532 println!("HITL questions: {}", run.hitl_questions.len());
1533 if let Some(observability) = &run.observability {
1534 println!("Planner rounds: {}", observability.planner_rounds.len());
1535 println!("Research facts: {}", observability.research_fact_count);
1536 println!("Workers: {}", observability.worker_lineage.len());
1537 println!(
1538 "Action graph: {} nodes / {} edges",
1539 observability.action_graph_nodes.len(),
1540 observability.action_graph_edges.len()
1541 );
1542 println!(
1543 "Transcript pointers: {}",
1544 observability.transcript_pointers.len()
1545 );
1546 println!("Daemon events: {}", observability.daemon_events.len());
1547 }
1548 if let Some(parent_worker_id) = run
1549 .metadata
1550 .get("parent_worker_id")
1551 .and_then(|value| value.as_str())
1552 {
1553 println!("Parent worker: {}", parent_worker_id);
1554 }
1555 if let Some(parent_stage_id) = run
1556 .metadata
1557 .get("parent_stage_id")
1558 .and_then(|value| value.as_str())
1559 {
1560 println!("Parent stage: {}", parent_stage_id);
1561 }
1562 if run
1563 .metadata
1564 .get("delegated")
1565 .and_then(|value| value.as_bool())
1566 .unwrap_or(false)
1567 {
1568 println!("Delegated: true");
1569 }
1570 println!(
1571 "Pending nodes: {}",
1572 if run.pending_nodes.is_empty() {
1573 "-".to_string()
1574 } else {
1575 run.pending_nodes.join(", ")
1576 }
1577 );
1578 println!(
1579 "Replay fixture: {}",
1580 if run.replay_fixture.is_some() {
1581 "embedded"
1582 } else {
1583 "derived"
1584 }
1585 );
1586 for stage in &run.stages {
1587 let worker = stage.metadata.get("worker");
1588 let worker_suffix = worker
1589 .and_then(|value| value.get("name"))
1590 .and_then(|value| value.as_str())
1591 .map(|name| format!(" worker={name}"))
1592 .unwrap_or_default();
1593 println!(
1594 "- {} [{}] status={} outcome={} branch={}{}",
1595 stage.node_id,
1596 stage.kind,
1597 stage.status,
1598 stage.outcome,
1599 stage.branch.clone().unwrap_or_else(|| "-".to_string()),
1600 worker_suffix,
1601 );
1602 if let Some(worker) = worker {
1603 if let Some(worker_id) = worker.get("id").and_then(|value| value.as_str()) {
1604 println!(" worker_id: {}", worker_id);
1605 }
1606 if let Some(child_run_id) = worker.get("child_run_id").and_then(|value| value.as_str())
1607 {
1608 println!(" child_run_id: {}", child_run_id);
1609 }
1610 if let Some(child_run_path) = worker
1611 .get("child_run_path")
1612 .and_then(|value| value.as_str())
1613 {
1614 println!(" child_run_path: {}", child_run_path);
1615 }
1616 }
1617 }
1618 if let Some(observability) = &run.observability {
1619 for round in &observability.planner_rounds {
1620 println!(
1621 "- planner {} iterations={} llm_calls={} tools={} research_facts={}",
1622 round.node_id,
1623 round.iteration_count,
1624 round.llm_call_count,
1625 round.tool_execution_count,
1626 round.research_facts.len()
1627 );
1628 }
1629 for pointer in &observability.transcript_pointers {
1630 println!(
1631 "- transcript {} [{}] available={} {}",
1632 pointer.label,
1633 pointer.kind,
1634 pointer.available,
1635 pointer
1636 .path
1637 .clone()
1638 .unwrap_or_else(|| pointer.location.clone())
1639 );
1640 }
1641 for event in &observability.daemon_events {
1642 println!(
1643 "- daemon {} [{:?}] at {}",
1644 event.name, event.kind, event.timestamp
1645 );
1646 println!(" id: {}", event.daemon_id);
1647 println!(" persist_path: {}", event.persist_path);
1648 if let Some(summary) = &event.payload_summary {
1649 println!(" payload: {}", summary);
1650 }
1651 }
1652 }
1653 if let Some(compare_path) = compare {
1654 let baseline = load_run_record_or_exit(Path::new(compare_path));
1655 print_run_diff(&harn_vm::orchestration::diff_run_records(&baseline, &run));
1656 }
1657}
1658
1659fn replay_run_record(path: &str) {
1660 let run = load_run_record_or_exit(Path::new(path));
1661 println!("Replay: {}", run.id);
1662 for stage in &run.stages {
1663 println!(
1664 "[{}] status={} outcome={} branch={}",
1665 stage.node_id,
1666 stage.status,
1667 stage.outcome,
1668 stage.branch.clone().unwrap_or_else(|| "-".to_string())
1669 );
1670 if let Some(text) = &stage.visible_text {
1671 println!(" visible: {}", text);
1672 }
1673 if let Some(verification) = &stage.verification {
1674 println!(" verification: {}", verification);
1675 }
1676 }
1677 if let Some(transcript) = &run.transcript {
1678 println!(
1679 "Transcript events persisted: {}",
1680 transcript["events"]
1681 .as_array()
1682 .map(|v| v.len())
1683 .unwrap_or(0)
1684 );
1685 }
1686 let fixture = run
1687 .replay_fixture
1688 .clone()
1689 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1690 let report = harn_vm::orchestration::evaluate_run_against_fixture(&run, &fixture);
1691 println!(
1692 "Embedded replay fixture: {}",
1693 if report.pass { "PASS" } else { "FAIL" }
1694 );
1695 for transition in &run.transitions {
1696 println!(
1697 "transition {} -> {} ({})",
1698 transition
1699 .from_node_id
1700 .clone()
1701 .unwrap_or_else(|| "start".to_string()),
1702 transition.to_node_id,
1703 transition
1704 .branch
1705 .clone()
1706 .unwrap_or_else(|| "default".to_string())
1707 );
1708 }
1709}
1710
1711fn eval_run_record(
1712 path: &str,
1713 compare: Option<&str>,
1714 structural_experiment: Option<&str>,
1715 argv: &[String],
1716 llm_mock_mode: &commands::run::CliLlmMockMode,
1717) {
1718 if let Some(experiment) = structural_experiment {
1719 let path_buf = PathBuf::from(path);
1720 if !path_buf.is_file() || path_buf.extension().and_then(|ext| ext.to_str()) != Some("harn")
1721 {
1722 eprintln!(
1723 "--structural-experiment currently requires a .harn pipeline path, got {}",
1724 path
1725 );
1726 process::exit(1);
1727 }
1728 if compare.is_some() {
1729 eprintln!("--compare cannot be combined with --structural-experiment");
1730 process::exit(1);
1731 }
1732 if matches!(llm_mock_mode, commands::run::CliLlmMockMode::Record { .. }) {
1733 eprintln!("--llm-mock-record cannot be combined with --structural-experiment");
1734 process::exit(1);
1735 }
1736 let path_buf = fs::canonicalize(&path_buf).unwrap_or_else(|error| {
1737 command_error(&format!(
1738 "failed to canonicalize structural eval pipeline {}: {error}",
1739 path_buf.display()
1740 ))
1741 });
1742 run_structural_experiment_eval(&path_buf, experiment, argv, llm_mock_mode);
1743 return;
1744 }
1745
1746 let path_buf = PathBuf::from(path);
1747 if path_buf.is_file() && file_looks_like_persona_eval_ladder_manifest(&path_buf) {
1748 if compare.is_some() {
1749 eprintln!("--compare is not supported with persona eval ladder manifests");
1750 process::exit(1);
1751 }
1752 let manifest = load_persona_eval_ladder_manifest_or_exit(&path_buf);
1753 let report =
1754 harn_vm::orchestration::run_persona_eval_ladder(&manifest).unwrap_or_else(|error| {
1755 eprintln!(
1756 "Failed to evaluate persona eval ladder {}: {error}",
1757 path_buf.display()
1758 );
1759 process::exit(1);
1760 });
1761 print_persona_ladder_report(&report);
1762 if !report.pass {
1763 process::exit(1);
1764 }
1765 return;
1766 }
1767
1768 if path_buf.is_file() && file_looks_like_eval_pack_manifest(&path_buf) {
1769 if compare.is_some() {
1770 eprintln!("--compare is not supported with eval pack manifests");
1771 process::exit(1);
1772 }
1773 let manifest = load_eval_pack_manifest_or_exit(&path_buf);
1774 let report = harn_vm::orchestration::evaluate_eval_pack_manifest(&manifest).unwrap_or_else(
1775 |error| {
1776 eprintln!(
1777 "Failed to evaluate eval pack {}: {error}",
1778 path_buf.display()
1779 );
1780 process::exit(1);
1781 },
1782 );
1783 print_eval_pack_report(&report);
1784 if !report.pass {
1785 process::exit(1);
1786 }
1787 return;
1788 }
1789
1790 if path_buf.is_file() && file_looks_like_eval_manifest(&path_buf) {
1791 if compare.is_some() {
1792 eprintln!("--compare is not supported with eval suite manifests");
1793 process::exit(1);
1794 }
1795 let manifest = load_eval_suite_manifest_or_exit(&path_buf);
1796 let suite = harn_vm::orchestration::evaluate_run_suite_manifest(&manifest).unwrap_or_else(
1797 |error| {
1798 eprintln!(
1799 "Failed to evaluate manifest {}: {error}",
1800 path_buf.display()
1801 );
1802 process::exit(1);
1803 },
1804 );
1805 println!(
1806 "{} {} passed, {} failed, {} total",
1807 if suite.pass { "PASS" } else { "FAIL" },
1808 suite.passed,
1809 suite.failed,
1810 suite.total
1811 );
1812 for case in &suite.cases {
1813 println!(
1814 "- {} [{}] {}",
1815 case.label.clone().unwrap_or_else(|| case.run_id.clone()),
1816 case.workflow_id,
1817 if case.pass { "PASS" } else { "FAIL" }
1818 );
1819 if let Some(path) = &case.source_path {
1820 println!(" path: {}", path);
1821 }
1822 if let Some(comparison) = &case.comparison {
1823 println!(" baseline identical: {}", comparison.identical);
1824 if !comparison.identical {
1825 println!(
1826 " baseline status: {} -> {}",
1827 comparison.left_status, comparison.right_status
1828 );
1829 }
1830 }
1831 for failure in &case.failures {
1832 println!(" {}", failure);
1833 }
1834 }
1835 if !suite.pass {
1836 process::exit(1);
1837 }
1838 return;
1839 }
1840
1841 let paths = collect_run_record_paths(path);
1842 if paths.len() > 1 {
1843 let mut cases = Vec::new();
1844 for path in &paths {
1845 let run = load_run_record_or_exit(path);
1846 let fixture = run
1847 .replay_fixture
1848 .clone()
1849 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1850 cases.push((run, fixture, Some(path.display().to_string())));
1851 }
1852 let suite = harn_vm::orchestration::evaluate_run_suite(cases);
1853 println!(
1854 "{} {} passed, {} failed, {} total",
1855 if suite.pass { "PASS" } else { "FAIL" },
1856 suite.passed,
1857 suite.failed,
1858 suite.total
1859 );
1860 for case in &suite.cases {
1861 println!(
1862 "- {} [{}] {}",
1863 case.run_id,
1864 case.workflow_id,
1865 if case.pass { "PASS" } else { "FAIL" }
1866 );
1867 if let Some(path) = &case.source_path {
1868 println!(" path: {}", path);
1869 }
1870 if let Some(comparison) = &case.comparison {
1871 println!(" baseline identical: {}", comparison.identical);
1872 }
1873 for failure in &case.failures {
1874 println!(" {}", failure);
1875 }
1876 }
1877 if !suite.pass {
1878 process::exit(1);
1879 }
1880 return;
1881 }
1882
1883 let run = load_run_record_or_exit(&paths[0]);
1884 let fixture = run
1885 .replay_fixture
1886 .clone()
1887 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1888 let report = harn_vm::orchestration::evaluate_run_against_fixture(&run, &fixture);
1889 println!("{}", if report.pass { "PASS" } else { "FAIL" });
1890 println!("Stages: {}", report.stage_count);
1891 if let Some(compare_path) = compare {
1892 let baseline = load_run_record_or_exit(Path::new(compare_path));
1893 print_run_diff(&harn_vm::orchestration::diff_run_records(&baseline, &run));
1894 }
1895 if !report.failures.is_empty() {
1896 for failure in &report.failures {
1897 println!("- {}", failure);
1898 }
1899 }
1900 if !report.pass {
1901 process::exit(1);
1902 }
1903}
1904
1905fn print_eval_pack_report(report: &harn_vm::orchestration::EvalPackReport) {
1906 println!(
1907 "{} {} passed, {} blocking failed, {} warning, {} informational, {} total",
1908 if report.pass { "PASS" } else { "FAIL" },
1909 report.passed,
1910 report.blocking_failed,
1911 report.warning_failed,
1912 report.informational_failed,
1913 report.total
1914 );
1915 for case in &report.cases {
1916 println!(
1917 "- {} [{}] {} ({})",
1918 case.label,
1919 case.workflow_id,
1920 if case.pass { "PASS" } else { "FAIL" },
1921 case.severity
1922 );
1923 if let Some(path) = &case.source_path {
1924 println!(" path: {}", path);
1925 }
1926 if let Some(comparison) = &case.comparison {
1927 println!(" baseline identical: {}", comparison.identical);
1928 if !comparison.identical {
1929 println!(
1930 " baseline status: {} -> {}",
1931 comparison.left_status, comparison.right_status
1932 );
1933 }
1934 }
1935 for failure in &case.failures {
1936 println!(" {}", failure);
1937 }
1938 for warning in &case.warnings {
1939 println!(" warning: {}", warning);
1940 }
1941 for item in &case.informational {
1942 println!(" info: {}", item);
1943 }
1944 }
1945 for ladder in &report.ladders {
1946 println!(
1947 "- ladder {} [{}] {} ({}) first_correct={}/{}",
1948 ladder.id,
1949 ladder.persona,
1950 if ladder.pass { "PASS" } else { "FAIL" },
1951 ladder.severity,
1952 ladder.first_correct_route.as_deref().unwrap_or("<none>"),
1953 ladder.first_correct_tier.as_deref().unwrap_or("<none>")
1954 );
1955 println!(" artifacts: {}", ladder.artifact_root);
1956 for tier in &ladder.tiers {
1957 println!(
1958 " - {} [{}] {} tools={} models={} latency={}ms cost=${:.6}",
1959 tier.timeout_tier,
1960 tier.route_id,
1961 tier.outcome,
1962 tier.tool_calls,
1963 tier.model_calls,
1964 tier.latency_ms,
1965 tier.cost_usd
1966 );
1967 for reason in &tier.degradation_reasons {
1968 println!(" {}", reason);
1969 }
1970 }
1971 }
1972}
1973
1974fn print_persona_ladder_report(report: &harn_vm::orchestration::PersonaEvalLadderReport) {
1975 println!(
1976 "{} ladder {} passed, {} degraded/looped, {} total",
1977 if report.pass { "PASS" } else { "FAIL" },
1978 report.passed,
1979 report.failed,
1980 report.total
1981 );
1982 println!(
1983 "first_correct: {}/{}",
1984 report.first_correct_route.as_deref().unwrap_or("<none>"),
1985 report.first_correct_tier.as_deref().unwrap_or("<none>")
1986 );
1987 println!("artifacts: {}", report.artifact_root);
1988 for tier in &report.tiers {
1989 println!(
1990 "- {} [{}] {} tools={} models={} latency={}ms cost=${:.6}",
1991 tier.timeout_tier,
1992 tier.route_id,
1993 tier.outcome,
1994 tier.tool_calls,
1995 tier.model_calls,
1996 tier.latency_ms,
1997 tier.cost_usd
1998 );
1999 for reason in &tier.degradation_reasons {
2000 println!(" {}", reason);
2001 }
2002 }
2003}
2004
2005fn run_package_evals() {
2006 let paths = package::load_package_eval_pack_paths(None).unwrap_or_else(|error| {
2007 eprintln!("{error}");
2008 process::exit(1);
2009 });
2010 let mut all_pass = true;
2011 for path in &paths {
2012 println!("Eval pack: {}", path.display());
2013 let manifest = load_eval_pack_manifest_or_exit(path);
2014 let report = harn_vm::orchestration::evaluate_eval_pack_manifest(&manifest).unwrap_or_else(
2015 |error| {
2016 eprintln!("Failed to evaluate eval pack {}: {error}", path.display());
2017 process::exit(1);
2018 },
2019 );
2020 print_eval_pack_report(&report);
2021 all_pass &= report.pass;
2022 }
2023 if !all_pass {
2024 process::exit(1);
2025 }
2026}
2027
2028fn run_structural_experiment_eval(
2029 path: &Path,
2030 experiment: &str,
2031 argv: &[String],
2032 llm_mock_mode: &commands::run::CliLlmMockMode,
2033) {
2034 let baseline_dir = tempfile::Builder::new()
2035 .prefix("harn-eval-baseline-")
2036 .tempdir()
2037 .unwrap_or_else(|error| {
2038 command_error(&format!("failed to create baseline tempdir: {error}"))
2039 });
2040 let variant_dir = tempfile::Builder::new()
2041 .prefix("harn-eval-variant-")
2042 .tempdir()
2043 .unwrap_or_else(|error| {
2044 command_error(&format!("failed to create variant tempdir: {error}"))
2045 });
2046
2047 let baseline = spawn_eval_pipeline_run(path, baseline_dir.path(), None, argv, llm_mock_mode);
2048 if !baseline.status.success() {
2049 relay_subprocess_failure("baseline", &baseline);
2050 }
2051
2052 let variant = spawn_eval_pipeline_run(
2053 path,
2054 variant_dir.path(),
2055 Some(experiment),
2056 argv,
2057 llm_mock_mode,
2058 );
2059 if !variant.status.success() {
2060 relay_subprocess_failure("variant", &variant);
2061 }
2062
2063 let baseline_runs = collect_structural_eval_runs(baseline_dir.path());
2064 let variant_runs = collect_structural_eval_runs(variant_dir.path());
2065 if baseline_runs.is_empty() || variant_runs.is_empty() {
2066 eprintln!(
2067 "structural eval expected workflow run records under {} and {}, but one side was empty",
2068 baseline_dir.path().display(),
2069 variant_dir.path().display()
2070 );
2071 process::exit(1);
2072 }
2073 if baseline_runs.len() != variant_runs.len() {
2074 eprintln!(
2075 "structural eval produced different run counts: baseline={} variant={}",
2076 baseline_runs.len(),
2077 variant_runs.len()
2078 );
2079 process::exit(1);
2080 }
2081
2082 let mut baseline_ok = 0usize;
2083 let mut variant_ok = 0usize;
2084 let mut any_failures = false;
2085
2086 println!("Structural experiment: {}", experiment);
2087 println!("Cases: {}", baseline_runs.len());
2088 for (baseline_run, variant_run) in baseline_runs.iter().zip(variant_runs.iter()) {
2089 let baseline_fixture = baseline_run
2090 .replay_fixture
2091 .clone()
2092 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(baseline_run));
2093 let variant_fixture = variant_run
2094 .replay_fixture
2095 .clone()
2096 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(variant_run));
2097 let baseline_report =
2098 harn_vm::orchestration::evaluate_run_against_fixture(baseline_run, &baseline_fixture);
2099 let variant_report =
2100 harn_vm::orchestration::evaluate_run_against_fixture(variant_run, &variant_fixture);
2101 let diff = harn_vm::orchestration::diff_run_records(baseline_run, variant_run);
2102 if baseline_report.pass {
2103 baseline_ok += 1;
2104 }
2105 if variant_report.pass {
2106 variant_ok += 1;
2107 }
2108 any_failures |= !baseline_report.pass || !variant_report.pass;
2109 println!(
2110 "- {} [{}]",
2111 variant_run
2112 .workflow_name
2113 .clone()
2114 .unwrap_or_else(|| variant_run.workflow_id.clone()),
2115 variant_run.task
2116 );
2117 println!(
2118 " baseline: {}",
2119 if baseline_report.pass { "PASS" } else { "FAIL" }
2120 );
2121 for failure in &baseline_report.failures {
2122 println!(" {}", failure);
2123 }
2124 println!(
2125 " variant: {}",
2126 if variant_report.pass { "PASS" } else { "FAIL" }
2127 );
2128 for failure in &variant_report.failures {
2129 println!(" {}", failure);
2130 }
2131 println!(" diff identical: {}", diff.identical);
2132 println!(" stage diffs: {}", diff.stage_diffs.len());
2133 println!(" tool diffs: {}", diff.tool_diffs.len());
2134 println!(" observability diffs: {}", diff.observability_diffs.len());
2135 }
2136
2137 println!("Baseline {} / {} passed", baseline_ok, baseline_runs.len());
2138 println!("Variant {} / {} passed", variant_ok, variant_runs.len());
2139
2140 if any_failures {
2141 process::exit(1);
2142 }
2143}
2144
2145fn spawn_eval_pipeline_run(
2146 path: &Path,
2147 run_dir: &Path,
2148 structural_experiment: Option<&str>,
2149 argv: &[String],
2150 llm_mock_mode: &commands::run::CliLlmMockMode,
2151) -> std::process::Output {
2152 let exe = env::current_exe().unwrap_or_else(|error| {
2153 command_error(&format!("failed to resolve current executable: {error}"))
2154 });
2155 let mut command = std::process::Command::new(exe);
2156 command.current_dir(path.parent().unwrap_or_else(|| Path::new(".")));
2157 command.arg("run");
2158 match llm_mock_mode {
2159 commands::run::CliLlmMockMode::Off => {}
2160 commands::run::CliLlmMockMode::Replay { fixture_path } => {
2161 command
2162 .arg("--llm-mock")
2163 .arg(absolute_cli_path(fixture_path));
2164 }
2165 commands::run::CliLlmMockMode::Record { fixture_path } => {
2166 command
2167 .arg("--llm-mock-record")
2168 .arg(absolute_cli_path(fixture_path));
2169 }
2170 }
2171 command.arg(path);
2172 if !argv.is_empty() {
2173 command.arg("--");
2174 command.args(argv);
2175 }
2176 command.env(harn_vm::runtime_paths::HARN_RUN_DIR_ENV, run_dir);
2177 if let Some(experiment) = structural_experiment {
2178 command.env("HARN_STRUCTURAL_EXPERIMENT", experiment);
2179 }
2180 command.output().unwrap_or_else(|error| {
2181 command_error(&format!(
2182 "failed to spawn `harn run {}` for structural eval: {error}",
2183 path.display()
2184 ))
2185 })
2186}
2187
2188fn absolute_cli_path(path: &Path) -> PathBuf {
2189 if path.is_absolute() {
2190 return path.to_path_buf();
2191 }
2192 env::current_dir()
2193 .unwrap_or_else(|_| PathBuf::from("."))
2194 .join(path)
2195}
2196
2197fn relay_subprocess_failure(label: &str, output: &std::process::Output) -> ! {
2198 let stdout = String::from_utf8_lossy(&output.stdout);
2199 let stderr = String::from_utf8_lossy(&output.stderr);
2200 if !stdout.trim().is_empty() {
2201 eprintln!("[{label}] stdout:\n{stdout}");
2202 }
2203 if !stderr.trim().is_empty() {
2204 eprintln!("[{label}] stderr:\n{stderr}");
2205 }
2206 process::exit(output.status.code().unwrap_or(1));
2207}
2208
2209fn collect_structural_eval_runs(dir: &Path) -> Vec<harn_vm::orchestration::RunRecord> {
2210 let mut paths: Vec<PathBuf> = fs::read_dir(dir)
2211 .unwrap_or_else(|error| {
2212 command_error(&format!(
2213 "failed to read structural eval run dir {}: {error}",
2214 dir.display()
2215 ))
2216 })
2217 .filter_map(|entry| entry.ok().map(|entry| entry.path()))
2218 .filter(|entry| entry.extension().and_then(|ext| ext.to_str()) == Some("json"))
2219 .collect();
2220 paths.sort();
2221 let mut runs: Vec<_> = paths
2222 .iter()
2223 .map(|path| load_run_record_or_exit(path))
2224 .collect();
2225 runs.sort_by(|left, right| {
2226 (
2227 left.started_at.as_str(),
2228 left.workflow_id.as_str(),
2229 left.task.as_str(),
2230 )
2231 .cmp(&(
2232 right.started_at.as_str(),
2233 right.workflow_id.as_str(),
2234 right.task.as_str(),
2235 ))
2236 });
2237 runs
2238}
2239
2240pub(crate) fn parse_source_file(path: &str) -> (String, Vec<harn_parser::SNode>) {
2242 let source = match fs::read_to_string(path) {
2243 Ok(s) => s,
2244 Err(e) => {
2245 eprintln!("Error reading {path}: {e}");
2246 process::exit(1);
2247 }
2248 };
2249
2250 let mut lexer = Lexer::new(&source);
2251 let tokens = match lexer.tokenize() {
2252 Ok(t) => t,
2253 Err(e) => {
2254 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2255 &source,
2256 path,
2257 &error_span_from_lex(&e),
2258 "error",
2259 &e.to_string(),
2260 Some("here"),
2261 None,
2262 );
2263 eprint!("{diagnostic}");
2264 process::exit(1);
2265 }
2266 };
2267
2268 let mut parser = Parser::new(tokens);
2269 let program = match parser.parse() {
2270 Ok(p) => p,
2271 Err(err) => {
2272 if parser.all_errors().is_empty() {
2273 let span = error_span_from_parse(&err);
2274 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2275 &source,
2276 path,
2277 &span,
2278 "error",
2279 &harn_parser::diagnostic::parser_error_message(&err),
2280 Some(harn_parser::diagnostic::parser_error_label(&err)),
2281 harn_parser::diagnostic::parser_error_help(&err),
2282 );
2283 eprint!("{diagnostic}");
2284 } else {
2285 for e in parser.all_errors() {
2286 let span = error_span_from_parse(e);
2287 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2288 &source,
2289 path,
2290 &span,
2291 "error",
2292 &harn_parser::diagnostic::parser_error_message(e),
2293 Some(harn_parser::diagnostic::parser_error_label(e)),
2294 harn_parser::diagnostic::parser_error_help(e),
2295 );
2296 eprint!("{diagnostic}");
2297 }
2298 }
2299 process::exit(1);
2300 }
2301 };
2302
2303 (source, program)
2304}
2305
2306fn error_span_from_lex(e: &harn_lexer::LexerError) -> harn_lexer::Span {
2307 match e {
2308 harn_lexer::LexerError::UnexpectedCharacter(_, span)
2309 | harn_lexer::LexerError::UnterminatedString(span)
2310 | harn_lexer::LexerError::UnterminatedBlockComment(span) => *span,
2311 }
2312}
2313
2314fn error_span_from_parse(e: &harn_parser::ParserError) -> harn_lexer::Span {
2315 match e {
2316 harn_parser::ParserError::Unexpected { span, .. } => *span,
2317 harn_parser::ParserError::UnexpectedEof { span, .. } => *span,
2318 }
2319}
2320
2321pub(crate) async fn execute(source: &str, source_path: Option<&Path>) -> Result<String, String> {
2323 let mut lexer = Lexer::new(source);
2324 let tokens = lexer.tokenize().map_err(|e| e.to_string())?;
2325 let mut parser = Parser::new(tokens);
2326 let program = parser.parse().map_err(|e| e.to_string())?;
2327
2328 let mut checker = TypeChecker::new();
2333 if let Some(path) = source_path {
2334 let graph = harn_modules::build(&[path.to_path_buf()]);
2335 if let Some(imported) = graph.imported_names_for_file(path) {
2336 checker = checker.with_imported_names(imported);
2337 }
2338 if let Some(imported) = graph.imported_type_declarations_for_file(path) {
2339 checker = checker.with_imported_type_decls(imported);
2340 }
2341 if let Some(imported) = graph.imported_callable_declarations_for_file(path) {
2342 checker = checker.with_imported_callable_decls(imported);
2343 }
2344 }
2345 let type_diagnostics = checker.check(&program);
2346 let mut warning_lines = Vec::new();
2347 for diag in &type_diagnostics {
2348 match diag.severity {
2349 DiagnosticSeverity::Error => return Err(diag.message.clone()),
2350 DiagnosticSeverity::Warning => {
2351 warning_lines.push(format!("warning: {}", diag.message));
2352 }
2353 }
2354 }
2355
2356 let chunk = harn_vm::Compiler::new()
2357 .compile(&program)
2358 .map_err(|e| e.to_string())?;
2359
2360 let local = tokio::task::LocalSet::new();
2361 local
2362 .run_until(async {
2363 let mut vm = harn_vm::Vm::new();
2364 harn_vm::register_vm_stdlib(&mut vm);
2365 install_default_hostlib(&mut vm);
2366 let source_parent = source_path
2367 .and_then(|p| p.parent())
2368 .unwrap_or(std::path::Path::new("."));
2369 let project_root = harn_vm::stdlib::process::find_project_root(source_parent);
2370 let store_base = project_root.as_deref().unwrap_or(source_parent);
2371 let execution_cwd = std::env::current_dir()
2372 .unwrap_or_else(|_| std::path::PathBuf::from("."))
2373 .to_string_lossy()
2374 .into_owned();
2375 let source_dir = source_parent.to_string_lossy().into_owned();
2376 if source_path.is_some_and(is_conformance_path) {
2377 harn_vm::event_log::install_memory_for_current_thread(64);
2378 }
2379 harn_vm::register_store_builtins(&mut vm, store_base);
2380 harn_vm::register_metadata_builtins(&mut vm, store_base);
2381 let pipeline_name = source_path
2382 .and_then(|p| p.file_stem())
2383 .and_then(|s| s.to_str())
2384 .unwrap_or("default");
2385 harn_vm::register_checkpoint_builtins(&mut vm, store_base, pipeline_name);
2386 harn_vm::stdlib::process::set_thread_execution_context(Some(
2387 harn_vm::orchestration::RunExecutionRecord {
2388 cwd: Some(execution_cwd),
2389 source_dir: Some(source_dir),
2390 env: std::collections::BTreeMap::new(),
2391 adapter: None,
2392 repo_path: None,
2393 worktree_path: None,
2394 branch: None,
2395 base_ref: None,
2396 cleanup: None,
2397 },
2398 ));
2399 if let Some(ref root) = project_root {
2400 vm.set_project_root(root);
2401 }
2402 if let Some(path) = source_path {
2403 if let Some(parent) = path.parent() {
2404 if !parent.as_os_str().is_empty() {
2405 vm.set_source_dir(parent);
2406 }
2407 }
2408 }
2409 let loaded = skill_loader::load_skills(&skill_loader::SkillLoaderInputs {
2413 cli_dirs: Vec::new(),
2414 source_path: source_path.map(Path::to_path_buf),
2415 });
2416 skill_loader::emit_loader_warnings(&loaded.loader_warnings);
2417 skill_loader::install_skills_global(&mut vm, &loaded);
2418 if let Some(path) = source_path {
2419 let extensions = package::load_runtime_extensions(path);
2420 package::install_runtime_extensions(&extensions);
2421 package::install_manifest_triggers(&mut vm, &extensions)
2422 .await
2423 .map_err(|error| format!("failed to install manifest triggers: {error}"))?;
2424 package::install_manifest_hooks(&mut vm, &extensions)
2425 .await
2426 .map_err(|error| format!("failed to install manifest hooks: {error}"))?;
2427 }
2428 let _event_log = harn_vm::event_log::active_event_log()
2429 .unwrap_or_else(|| harn_vm::event_log::install_memory_for_current_thread(64));
2430 let connector_clients_installed =
2431 should_install_default_connector_clients(source, source_path);
2432 if connector_clients_installed {
2433 install_default_connector_clients(store_base)
2434 .await
2435 .map_err(|error| format!("failed to initialize connector clients: {error}"))?;
2436 }
2437 let execution_result = vm.execute(&chunk).await.map_err(|e| e.to_string());
2438 harn_vm::egress::reset_egress_policy_for_host();
2439 if connector_clients_installed {
2440 harn_vm::clear_active_connector_clients();
2441 }
2442 harn_vm::stdlib::process::set_thread_execution_context(None);
2443 execution_result?;
2444 let mut output = String::new();
2445 for wl in &warning_lines {
2446 output.push_str(wl);
2447 output.push('\n');
2448 }
2449 output.push_str(vm.output());
2450 Ok(output)
2451 })
2452 .await
2453}
2454
2455fn should_install_default_connector_clients(source: &str, source_path: Option<&Path>) -> bool {
2456 if !source_path.is_some_and(is_conformance_path) {
2457 return true;
2458 }
2459 source.contains("connector_call")
2460 || source.contains("std/connectors")
2461 || source.contains("connectors/")
2462}
2463
2464fn is_conformance_path(path: &Path) -> bool {
2465 path.components()
2466 .any(|component| component.as_os_str() == "conformance")
2467}
2468
2469async fn install_default_connector_clients(base_dir: &Path) -> Result<(), String> {
2470 let event_log = harn_vm::event_log::active_event_log()
2471 .unwrap_or_else(|| harn_vm::event_log::install_memory_for_current_thread(64));
2472 let secret_namespace = connector_secret_namespace(base_dir);
2473 let secrets: Arc<dyn harn_vm::secrets::SecretProvider> = Arc::new(
2474 harn_vm::secrets::configured_default_chain(secret_namespace)
2475 .map_err(|error| format!("failed to configure secret providers: {error}"))?,
2476 );
2477
2478 let registry = harn_vm::ConnectorRegistry::default();
2479 let metrics = Arc::new(harn_vm::MetricsRegistry::default());
2480 let inbox = Arc::new(
2481 harn_vm::InboxIndex::new(event_log.clone(), metrics.clone())
2482 .await
2483 .map_err(|error| error.to_string())?,
2484 );
2485 registry
2486 .init_all(harn_vm::ConnectorCtx {
2487 event_log,
2488 secrets,
2489 inbox,
2490 metrics,
2491 rate_limiter: Arc::new(harn_vm::RateLimiterFactory::default()),
2492 })
2493 .await
2494 .map_err(|error| error.to_string())?;
2495 let clients = registry.client_map().await;
2496 harn_vm::install_active_connector_clients(clients);
2497 Ok(())
2498}
2499
2500fn connector_secret_namespace(base_dir: &Path) -> String {
2501 match std::env::var("HARN_SECRET_NAMESPACE") {
2502 Ok(namespace) if !namespace.trim().is_empty() => namespace,
2503 _ => {
2504 let leaf = base_dir
2505 .file_name()
2506 .and_then(|name| name.to_str())
2507 .filter(|name| !name.is_empty())
2508 .unwrap_or("workspace");
2509 format!("harn/{leaf}")
2510 }
2511 }
2512}
2513
2514#[cfg(test)]
2515mod main_tests {
2516 use super::{normalize_serve_args, should_install_default_connector_clients};
2517 use std::path::Path;
2518
2519 #[test]
2520 fn normalize_serve_args_inserts_a2a_for_legacy_shape() {
2521 let args = normalize_serve_args(vec![
2522 "harn".to_string(),
2523 "serve".to_string(),
2524 "--port".to_string(),
2525 "3000".to_string(),
2526 "agent.harn".to_string(),
2527 ]);
2528 assert_eq!(
2529 args,
2530 vec![
2531 "harn".to_string(),
2532 "serve".to_string(),
2533 "a2a".to_string(),
2534 "--port".to_string(),
2535 "3000".to_string(),
2536 "agent.harn".to_string(),
2537 ]
2538 );
2539 }
2540
2541 #[test]
2542 fn normalize_serve_args_preserves_explicit_subcommands() {
2543 let args = normalize_serve_args(vec![
2544 "harn".to_string(),
2545 "serve".to_string(),
2546 "acp".to_string(),
2547 "server.harn".to_string(),
2548 ]);
2549 assert_eq!(
2550 args,
2551 vec![
2552 "harn".to_string(),
2553 "serve".to_string(),
2554 "acp".to_string(),
2555 "server.harn".to_string(),
2556 ]
2557 );
2558 }
2559
2560 #[test]
2561 fn conformance_skips_connector_clients_unless_fixture_uses_connectors() {
2562 let path = Path::new("conformance/tests/language/basic.harn");
2563 assert!(!should_install_default_connector_clients(
2564 "println(1)",
2565 Some(path)
2566 ));
2567 assert!(!should_install_default_connector_clients(
2568 "trust_graph_verify_chain()",
2569 Some(path)
2570 ));
2571 assert!(should_install_default_connector_clients(
2572 "import { post_message } from \"std/connectors/slack\"",
2573 Some(path)
2574 ));
2575 assert!(should_install_default_connector_clients(
2576 "println(1)",
2577 Some(Path::new("examples/demo.harn"))
2578 ));
2579 }
2580}