1#![recursion_limit = "256"]
2
3pub mod acp;
4pub mod cli;
5pub mod commands;
6pub mod config;
7pub mod env_guard;
8pub mod format;
9pub mod package;
10mod provider_bootstrap;
11pub mod skill_loader;
12pub mod skill_provenance;
13pub mod test_runner;
14#[doc(hidden)]
15pub mod tests;
16
17use clap::{error::ErrorKind, CommandFactory, Parser as ClapParser};
18use std::path::{Path, PathBuf};
19use std::sync::Arc;
20use std::{env, fs, process, thread};
21
22use cli::{
23 Cli, Command, CompletionShell, EvalCommand, MergeCaptainCommand, MergeCaptainMockCommand,
24 ModelInfoArgs, PackageArtifactsCommand, PackageCacheCommand, PackageCommand, PersonaCommand,
25 PersonaSupervisionCommand, ProvidersCommand, RunsCommand, ServeCommand, SkillCommand,
26 SkillKeyCommand, SkillTrustCommand, SkillsCommand, ToolCommand,
27};
28use harn_lexer::Lexer;
29use harn_parser::{DiagnosticSeverity, Parser, TypeChecker};
30
31pub const CLI_RUNTIME_STACK_SIZE: usize = 16 * 1024 * 1024;
32
33#[cfg(feature = "hostlib")]
34pub(crate) fn install_default_hostlib(vm: &mut harn_vm::Vm) {
35 let _ = harn_hostlib::install_default(vm);
36}
37
38#[cfg(not(feature = "hostlib"))]
39pub(crate) fn install_default_hostlib(_vm: &mut harn_vm::Vm) {}
40
41pub fn run() {
44 let handle = thread::Builder::new()
45 .name("harn-cli".to_string())
46 .stack_size(CLI_RUNTIME_STACK_SIZE)
47 .spawn(|| {
48 let runtime = tokio::runtime::Builder::new_multi_thread()
49 .enable_all()
50 .build()
51 .unwrap_or_else(|error| {
52 eprintln!("failed to start async runtime: {error}");
53 process::exit(1);
54 });
55 runtime.block_on(async_main());
56 })
57 .unwrap_or_else(|error| {
58 eprintln!("failed to start CLI runtime thread: {error}");
59 process::exit(1);
60 });
61
62 if let Err(payload) = handle.join() {
63 std::panic::resume_unwind(payload);
64 }
65}
66
67async fn async_main() {
68 let raw_args = normalize_serve_args(env::args().collect());
69 if raw_args.len() == 2 && raw_args[1].ends_with(".harn") {
70 provider_bootstrap::maybe_seed_ollama_for_run_file(Path::new(&raw_args[1]), false, false)
71 .await;
72 commands::run::run_file(
73 &raw_args[1],
74 false,
75 std::collections::HashSet::new(),
76 Vec::new(),
77 commands::run::CliLlmMockMode::Off,
78 None,
79 commands::run::RunProfileOptions::default(),
80 )
81 .await;
82 return;
83 }
84
85 let cli = match Cli::try_parse_from(&raw_args) {
86 Ok(cli) => cli,
87 Err(error) => {
88 if matches!(
89 error.kind(),
90 ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
91 ) {
92 error.exit();
93 }
94 error.exit();
95 }
96 };
97
98 match cli.command.expect("clap requires a command") {
99 Command::Version => print_version(),
100 Command::Upgrade(args) => {
101 if let Err(error) = commands::upgrade::run(args).await {
102 eprintln!("error: {error}");
103 process::exit(1);
104 }
105 }
106 Command::Skill(args) => match args.command {
107 SkillCommand::Key(key_args) => match key_args.command {
108 SkillKeyCommand::Generate(generate) => commands::skill::run_key_generate(&generate),
109 },
110 SkillCommand::Sign(sign) => commands::skill::run_sign(&sign),
111 SkillCommand::Endorse(endorse) => commands::skill::run_endorse(&endorse),
112 SkillCommand::Verify(verify) => commands::skill::run_verify(&verify),
113 SkillCommand::WhoSigned(who_signed) => {
114 commands::skill::run_who_signed(&who_signed).await
115 }
116 SkillCommand::Trust(trust_args) => match trust_args.command {
117 SkillTrustCommand::Add(add) => commands::skill::run_trust_add(&add),
118 SkillTrustCommand::List(list) => commands::skill::run_trust_list(&list),
119 },
120 SkillCommand::New(new_args) => commands::skills::run_new(&new_args),
121 },
122 Command::Run(args) => {
123 if !args.explain_cost {
124 match (args.eval.as_deref(), args.file.as_deref()) {
125 (Some(code), None) => {
126 provider_bootstrap::maybe_seed_ollama_for_inline(
127 code,
128 args.yes,
129 args.llm_mock.is_some(),
130 )
131 .await;
132 }
133 (None, Some(file)) => {
134 provider_bootstrap::maybe_seed_ollama_for_run_file(
135 Path::new(file),
136 args.yes,
137 args.llm_mock.is_some(),
138 )
139 .await;
140 }
141 _ => {}
142 }
143 }
144 let denied =
145 commands::run::build_denied_builtins(args.deny.as_deref(), args.allow.as_deref());
146 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
147 commands::run::CliLlmMockMode::Replay {
148 fixture_path: PathBuf::from(path),
149 }
150 } else if let Some(path) = args.llm_mock_record.as_ref() {
151 commands::run::CliLlmMockMode::Record {
152 fixture_path: PathBuf::from(path),
153 }
154 } else {
155 commands::run::CliLlmMockMode::Off
156 };
157 let attestation = args.attest.then(|| commands::run::RunAttestationOptions {
158 receipt_out: args.receipt_out.as_ref().map(PathBuf::from),
159 agent_id: args.attest_agent.clone(),
160 });
161 let profile_options = run_profile_options(&args.profile);
162
163 match (args.eval.as_deref(), args.file.as_deref()) {
164 (Some(code), None) => {
165 let (wrapped, tmp) = commands::run::prepare_eval_temp_file(code)
166 .unwrap_or_else(|e| command_error(&e));
167 let tmp_path: PathBuf = tmp.path().to_path_buf();
168 fs::write(&tmp_path, &wrapped).unwrap_or_else(|e| {
169 command_error(&format!("failed to write temp file for -e: {e}"))
170 });
171 let tmp_str = tmp_path.to_string_lossy().into_owned();
172 if args.explain_cost {
173 commands::run::run_explain_cost_file_with_skill_dirs(&tmp_str);
174 } else {
175 commands::run::run_file_with_skill_dirs(
176 &tmp_str,
177 args.trace,
178 denied,
179 args.argv.clone(),
180 args.skill_dir.clone(),
181 llm_mock_mode.clone(),
182 attestation.clone(),
183 profile_options.clone(),
184 )
185 .await;
186 }
187 drop(tmp);
188 }
189 (None, Some(file)) => {
190 if args.explain_cost {
191 commands::run::run_explain_cost_file_with_skill_dirs(file);
192 } else {
193 commands::run::run_file_with_skill_dirs(
194 file,
195 args.trace,
196 denied,
197 args.argv.clone(),
198 args.skill_dir.clone(),
199 llm_mock_mode,
200 attestation,
201 profile_options,
202 )
203 .await
204 }
205 }
206 (Some(_), Some(_)) => command_error(
207 "`harn run` accepts either `-e <code>` or `<file.harn>`, not both",
208 ),
209 (None, None) => {
210 command_error("`harn run` requires either `-e <code>` or `<file.harn>`")
211 }
212 }
213 }
214 Command::Check(args) => {
215 if args.provider_matrix {
216 let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
217 let extensions = package::load_runtime_extensions(&cwd);
218 package::install_runtime_extensions(&extensions);
219 commands::check::provider_matrix::run(args.format, args.filter.as_deref());
220 return;
221 }
222 if args.connector_matrix {
223 commands::check::connector_matrix::run(
224 args.format,
225 args.filter.as_deref(),
226 &args.targets,
227 );
228 return;
229 }
230 let mut target_strings: Vec<String> = args.targets.clone();
231 if args.workspace {
232 let anchor = target_strings.first().map(Path::new);
233 match package::load_workspace_config(anchor) {
234 Some((workspace, manifest_dir)) if !workspace.pipelines.is_empty() => {
235 for pipeline in &workspace.pipelines {
236 let candidate = Path::new(pipeline);
237 let resolved = if candidate.is_absolute() {
238 candidate.to_path_buf()
239 } else {
240 manifest_dir.join(candidate)
241 };
242 target_strings.push(resolved.to_string_lossy().into_owned());
243 }
244 }
245 Some(_) => command_error(
246 "--workspace requires `[workspace].pipelines` in the nearest harn.toml",
247 ),
248 None => command_error(
249 "--workspace could not find a harn.toml walking up from the target(s)",
250 ),
251 }
252 }
253 if target_strings.is_empty() {
254 command_error(
255 "`harn check` requires at least one target path, or `--workspace` with `[workspace].pipelines`",
256 );
257 }
258 for target in &target_strings {
259 if let Err(error) = package::validate_runtime_manifest_extensions(Path::new(target))
260 {
261 command_error(&format!("manifest extension validation failed: {error}"));
262 }
263 }
264 let targets: Vec<&str> = target_strings.iter().map(String::as_str).collect();
265 let files = commands::check::collect_harn_targets(&targets);
266 if files.is_empty() {
267 command_error("no .harn files found under the given target(s)");
268 }
269 let module_graph = commands::check::build_module_graph(&files);
270 let cross_file_imports = commands::check::collect_cross_file_imports(&module_graph);
271 let mut should_fail = false;
272 for file in &files {
273 let mut config = package::load_check_config(Some(file));
274 if let Some(path) = args.host_capabilities.as_ref() {
275 config.host_capabilities_path = Some(path.clone());
276 }
277 if let Some(path) = args.bundle_root.as_ref() {
278 config.bundle_root = Some(path.clone());
279 }
280 if args.strict_types {
281 config.strict_types = true;
282 }
283 if let Some(sev) = args.preflight.as_deref() {
284 config.preflight_severity = Some(sev.to_string());
285 }
286 let outcome = commands::check::check_file_inner(
287 file,
288 &config,
289 &cross_file_imports,
290 &module_graph,
291 args.invariants,
292 );
293 should_fail |= outcome.should_fail(config.strict);
294 }
295 if should_fail {
296 process::exit(1);
297 }
298 }
299 Command::Config(args) => {
300 if let Err(error) = commands::config_cmd::run(args).await {
301 command_error(&error);
302 }
303 }
304 Command::Explain(args) => {
305 let code = commands::explain::run_explain(&args);
306 if code != 0 {
307 process::exit(code);
308 }
309 }
310 Command::Contracts(args) => {
311 commands::contracts::handle_contracts_command(args).await;
312 }
313 Command::Connect(args) => {
314 commands::connect::run_connect(*args).await;
315 }
316 Command::Lint(args) => {
317 let targets: Vec<&str> = args.targets.iter().map(String::as_str).collect();
318 let files = commands::check::collect_harn_targets(&targets);
319 let prompt_files = commands::check::collect_prompt_targets(&targets);
320 if files.is_empty() && prompt_files.is_empty() {
321 command_error("no .harn or .harn.prompt files found under the given target(s)");
322 }
323 let module_graph = commands::check::build_module_graph(&files);
324 let cross_file_imports = commands::check::collect_cross_file_imports(&module_graph);
325 if args.fix {
326 for file in &files {
327 let mut config = package::load_check_config(Some(file));
328 commands::check::apply_harn_lint_config(file, &mut config);
329 let require_header = args.require_file_header
330 || commands::check::harn_lint_require_file_header(file);
331 let complexity_threshold =
332 commands::check::harn_lint_complexity_threshold(file);
333 let persona_step_allowlist =
334 commands::check::harn_lint_persona_step_allowlist(file);
335 commands::check::lint_fix_file(
336 file,
337 &config,
338 &cross_file_imports,
339 &module_graph,
340 require_header,
341 complexity_threshold,
342 &persona_step_allowlist,
343 );
344 }
345 for file in &prompt_files {
346 let threshold =
347 commands::check::harn_lint_template_variant_branch_threshold(file);
348 let disabled = commands::check::harn_lint_disabled_rules(file);
349 commands::check::lint_prompt_file_inner(file, threshold, &disabled);
354 }
355 } else {
356 let mut should_fail = false;
357 for file in &files {
358 let mut config = package::load_check_config(Some(file));
359 commands::check::apply_harn_lint_config(file, &mut config);
360 let require_header = args.require_file_header
361 || commands::check::harn_lint_require_file_header(file);
362 let complexity_threshold =
363 commands::check::harn_lint_complexity_threshold(file);
364 let persona_step_allowlist =
365 commands::check::harn_lint_persona_step_allowlist(file);
366 let outcome = commands::check::lint_file_inner(
367 file,
368 &config,
369 &cross_file_imports,
370 &module_graph,
371 require_header,
372 complexity_threshold,
373 &persona_step_allowlist,
374 );
375 should_fail |= outcome.should_fail(config.strict);
376 }
377 for file in &prompt_files {
378 let threshold =
379 commands::check::harn_lint_template_variant_branch_threshold(file);
380 let disabled = commands::check::harn_lint_disabled_rules(file);
381 let config = package::load_check_config(Some(file));
382 let outcome =
383 commands::check::lint_prompt_file_inner(file, threshold, &disabled);
384 should_fail |= outcome.should_fail(config.strict);
385 }
386 if should_fail {
387 process::exit(1);
388 }
389 }
390 }
391 Command::Fmt(args) => {
392 let targets: Vec<&str> = args.targets.iter().map(String::as_str).collect();
393 let anchor = targets.first().map(Path::new).unwrap_or(Path::new("."));
396 let loaded = match config::load_for_path(anchor) {
397 Ok(c) => c,
398 Err(e) => {
399 eprintln!("warning: {e}");
400 config::HarnConfig::default()
401 }
402 };
403 let mut opts = harn_fmt::FmtOptions::default();
404 if let Some(w) = loaded.fmt.line_width {
405 opts.line_width = w;
406 }
407 if let Some(w) = loaded.fmt.separator_width {
408 opts.separator_width = w;
409 }
410 if let Some(w) = args.line_width {
411 opts.line_width = w;
412 }
413 if let Some(w) = args.separator_width {
414 opts.separator_width = w;
415 }
416 commands::check::fmt_targets(
417 &targets,
418 commands::check::FmtMode::from_check_flag(args.check),
419 &opts,
420 );
421 }
422 Command::Test(args) => {
423 if args.target.as_deref() == Some("agents-conformance") {
424 if args.selection.is_some() {
425 command_error(
426 "`harn test agents-conformance` does not accept a second positional target; use --category instead",
427 );
428 }
429 if args.evals || args.determinism || args.record || args.replay || args.watch {
430 command_error(
431 "`harn test agents-conformance` cannot be combined with --evals, --determinism, --record, --replay, or --watch",
432 );
433 }
434 let Some(target_url) = args.agents_target.clone() else {
435 command_error("`harn test agents-conformance` requires --target <url>");
436 };
437 commands::agents_conformance::run_agents_conformance(
438 commands::agents_conformance::AgentsConformanceConfig {
439 target_url,
440 api_key: args.agents_api_key.clone(),
441 categories: args.agents_category.clone(),
442 timeout_ms: args.timeout,
443 verbose: args.verbose,
444 json: args.json,
445 json_out: args.json_out.clone(),
446 workspace_id: args.agents_workspace_id.clone(),
447 session_id: args.agents_session_id.clone(),
448 },
449 )
450 .await;
451 return;
452 }
453 if args.target.as_deref() == Some("protocols") {
454 if args.evals || args.determinism || args.record || args.replay || args.watch {
455 command_error(
456 "`harn test protocols` cannot be combined with --evals, --determinism, --record, --replay, or --watch",
457 );
458 }
459 if args.junit.is_some()
460 || args.agents_target.is_some()
461 || args.agents_api_key.is_some()
462 || !args.agents_category.is_empty()
463 || args.json
464 || args.json_out.is_some()
465 || args.agents_workspace_id.is_some()
466 || args.agents_session_id.is_some()
467 || args.parallel
468 || !args.skill_dir.is_empty()
469 {
470 command_error(
471 "`harn test protocols` accepts only --filter, --verbose, --timing, and an optional fixture selection",
472 );
473 }
474 commands::protocol_conformance::run_protocol_conformance(
475 args.selection.as_deref(),
476 args.filter.as_deref(),
477 args.verbose || args.timing,
478 );
479 return;
480 }
481 if args.evals {
482 if args.determinism || args.record || args.replay || args.watch {
483 command_error("--evals cannot be combined with --determinism, --record, --replay, or --watch");
484 }
485 if args.target.as_deref() != Some("package") || args.selection.is_some() {
486 command_error("package evals are run with `harn test package --evals`");
487 }
488 run_package_evals();
489 } else if args.determinism {
490 if args.watch {
491 command_error("--determinism cannot be combined with --watch");
492 }
493 if args.record || args.replay {
494 command_error("--determinism manages its own record/replay cycle");
495 }
496 if let Some(t) = args.target.as_deref() {
497 if t == "conformance" {
498 commands::test::run_conformance_determinism_tests(
499 t,
500 args.selection.as_deref(),
501 args.filter.as_deref(),
502 args.timeout,
503 )
504 .await;
505 } else if args.selection.is_some() {
506 command_error(
507 "only `harn test conformance` accepts a second positional target",
508 );
509 } else {
510 commands::test::run_determinism_tests(
511 t,
512 args.filter.as_deref(),
513 args.timeout,
514 )
515 .await;
516 }
517 } else {
518 let test_dir = if PathBuf::from("tests").is_dir() {
519 "tests".to_string()
520 } else {
521 command_error("no path specified and no tests/ directory found");
522 };
523 if args.selection.is_some() {
524 command_error(
525 "only `harn test conformance` accepts a second positional target",
526 );
527 }
528 commands::test::run_determinism_tests(
529 &test_dir,
530 args.filter.as_deref(),
531 args.timeout,
532 )
533 .await;
534 }
535 } else {
536 if args.record {
537 harn_vm::llm::set_replay_mode(
538 harn_vm::llm::LlmReplayMode::Record,
539 ".harn-fixtures",
540 );
541 } else if args.replay {
542 harn_vm::llm::set_replay_mode(
543 harn_vm::llm::LlmReplayMode::Replay,
544 ".harn-fixtures",
545 );
546 }
547
548 if let Some(t) = args.target.as_deref() {
549 if t == "conformance" {
550 commands::test::run_conformance_tests(
551 t,
552 args.selection.as_deref(),
553 args.filter.as_deref(),
554 args.junit.as_deref(),
555 args.timeout,
556 args.verbose,
557 args.timing,
558 args.differential_optimizations,
559 )
560 .await;
561 } else if args.selection.is_some() {
562 command_error(
563 "only `harn test conformance` accepts a second positional target",
564 );
565 } else if args.watch {
566 commands::test::run_watch_tests(
567 t,
568 args.filter.as_deref(),
569 args.timeout,
570 args.parallel,
571 )
572 .await;
573 } else {
574 commands::test::run_user_tests(
575 t,
576 args.filter.as_deref(),
577 args.timeout,
578 args.parallel,
579 )
580 .await;
581 }
582 } else {
583 let test_dir = if PathBuf::from("tests").is_dir() {
584 "tests".to_string()
585 } else {
586 command_error("no path specified and no tests/ directory found");
587 };
588 if args.selection.is_some() {
589 command_error(
590 "only `harn test conformance` accepts a second positional target",
591 );
592 }
593 if args.watch {
594 commands::test::run_watch_tests(
595 &test_dir,
596 args.filter.as_deref(),
597 args.timeout,
598 args.parallel,
599 )
600 .await;
601 } else {
602 commands::test::run_user_tests(
603 &test_dir,
604 args.filter.as_deref(),
605 args.timeout,
606 args.parallel,
607 )
608 .await;
609 }
610 }
611 }
612 }
613 Command::Init(args) => commands::init::init_project(args.name.as_deref(), args.template),
614 Command::New(args) => match commands::init::resolve_new_args(&args) {
615 Ok((name, template)) => commands::init::init_project(name.as_deref(), template),
616 Err(error) => {
617 eprintln!("error: {error}");
618 process::exit(1);
619 }
620 },
621 Command::Doctor(args) => {
622 commands::doctor::run_doctor_with_options(commands::doctor::DoctorOptions {
623 network: !args.no_network,
624 json: args.json,
625 })
626 .await
627 }
628 Command::Models(args) => commands::models::run(args).await,
629 Command::Local(args) => commands::local::run(args).await,
630 Command::Providers(args) => match args.command {
631 ProvidersCommand::Refresh(refresh) => {
632 if let Err(error) = commands::providers::run_refresh(&refresh).await {
633 command_error(&error);
634 }
635 }
636 ProvidersCommand::Validate(validate) => {
637 if let Err(error) = commands::providers::run_validate(&validate) {
638 command_error(&error);
639 }
640 }
641 ProvidersCommand::Export(export) => {
642 if let Err(error) = commands::providers::run_export(&export) {
643 command_error(&error);
644 }
645 }
646 },
647 Command::Try(args) => commands::try_cmd::run(args).await,
648 Command::Quickstart(args) => {
649 if let Err(error) = commands::quickstart::run_quickstart(&args).await {
650 command_error(&error);
651 }
652 }
653 Command::Demo(args) => {
654 let code = commands::demo::run(args).await;
655 if code != 0 {
656 process::exit(code);
657 }
658 }
659 Command::Serve(args) => match args.command {
660 ServeCommand::Acp(args) => {
661 if let Err(error) = commands::serve::run_acp_server(&args).await {
662 command_error(&error);
663 }
664 }
665 ServeCommand::A2a(args) => {
666 if let Err(error) = commands::serve::run_a2a_server(&args).await {
667 command_error(&error);
668 }
669 }
670 ServeCommand::Api(args) => {
671 if let Err(error) = commands::serve::run_api_server(&args).await {
672 command_error(&error);
673 }
674 }
675 ServeCommand::Mcp(args) => {
676 if let Err(error) = commands::serve::run_mcp_server(&args).await {
677 command_error(&error);
678 }
679 }
680 },
681 Command::Connector(args) => {
682 if let Err(error) = commands::connector::handle_connector_command(args).await {
683 eprintln!("error: {error}");
684 process::exit(1);
685 }
686 }
687 Command::Mcp(args) => commands::mcp::handle_mcp_command(&args.command).await,
688 Command::Watch(args) => {
689 let denied =
690 commands::run::build_denied_builtins(args.deny.as_deref(), args.allow.as_deref());
691 commands::run::run_watch(&args.file, denied).await;
692 }
693 Command::Portal(args) => {
694 commands::portal::run_portal(
695 &args.dir,
696 args.manifest,
697 args.persona_state_dir,
698 &args.host,
699 args.port,
700 args.open,
701 args.allow_remote_launch,
702 )
703 .await
704 }
705 Command::Trigger(args) => {
706 if let Err(error) = commands::trigger::handle(args).await {
707 eprintln!("error: {error}");
708 process::exit(1);
709 }
710 }
711 Command::Flow(args) => match commands::flow::run_flow(&args) {
712 Ok(code) => {
713 if code != 0 {
714 process::exit(code);
715 }
716 }
717 Err(error) => command_error(&error),
718 },
719 Command::Workflow(args) => match commands::workflow::handle(args) {
720 Ok(code) => {
721 if code != 0 {
722 process::exit(code);
723 }
724 }
725 Err(error) => command_error(&error),
726 },
727 Command::Supervisor(args) => {
728 if let Err(error) = commands::supervisor::handle(args).await {
729 eprintln!("error: {error}");
730 process::exit(1);
731 }
732 }
733 Command::Trace(args) => {
734 if let Err(error) = commands::trace::handle(args).await {
735 eprintln!("error: {error}");
736 process::exit(1);
737 }
738 }
739 Command::Crystallize(args) => {
740 if let Err(error) = commands::crystallize::run(args) {
741 eprintln!("error: {error}");
742 process::exit(1);
743 }
744 }
745 Command::Trust(args) | Command::TrustGraph(args) => {
746 if let Err(error) = commands::trust::handle(args).await {
747 eprintln!("error: {error}");
748 process::exit(1);
749 }
750 }
751 Command::Verify(args) => {
752 if let Err(error) = verify_provenance_receipt(&args.receipt, args.json) {
753 eprintln!("error: {error}");
754 process::exit(1);
755 }
756 }
757 Command::Completions(args) => print_completions(args.shell),
758 Command::Orchestrator(args) => {
759 if let Err(error) = commands::orchestrator::handle(args).await {
760 eprintln!("error: {error}");
761 process::exit(1);
762 }
763 }
764 Command::Playground(args) => {
765 provider_bootstrap::maybe_seed_ollama_for_playground(
766 Path::new(&args.host),
767 Path::new(&args.script),
768 args.yes,
769 args.llm.is_some(),
770 args.llm_mock.is_some(),
771 )
772 .await;
773 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
774 commands::run::CliLlmMockMode::Replay {
775 fixture_path: PathBuf::from(path),
776 }
777 } else if let Some(path) = args.llm_mock_record.as_ref() {
778 commands::run::CliLlmMockMode::Record {
779 fixture_path: PathBuf::from(path),
780 }
781 } else {
782 commands::run::CliLlmMockMode::Off
783 };
784 if let Err(error) = commands::playground::run_command(args, llm_mock_mode).await {
785 eprint!("{error}");
786 process::exit(1);
787 }
788 }
789 Command::Runs(args) => match args.command {
790 RunsCommand::Inspect(inspect) => {
791 inspect_run_record(&inspect.path, inspect.compare.as_deref())
792 }
793 },
794 Command::Session(args) => commands::session::run(args),
795 Command::Replay(args) => replay_run_record(&args.path),
796 Command::Eval(args) => match args.command {
797 Some(EvalCommand::Prompt(prompt_args)) => {
798 let code = commands::eval_prompt::run(prompt_args).await;
799 if code != 0 {
800 process::exit(code);
801 }
802 }
803 None => {
804 let Some(path) = args.path else {
805 eprintln!(
806 "error: `harn eval` requires a path or a subcommand (e.g. `prompt`)."
807 );
808 eprintln!("See `harn eval --help`.");
809 process::exit(2);
810 };
811 let llm_mock_mode = if let Some(path) = args.llm_mock.as_ref() {
812 commands::run::CliLlmMockMode::Replay {
813 fixture_path: PathBuf::from(path),
814 }
815 } else if let Some(path) = args.llm_mock_record.as_ref() {
816 commands::run::CliLlmMockMode::Record {
817 fixture_path: PathBuf::from(path),
818 }
819 } else {
820 commands::run::CliLlmMockMode::Off
821 };
822 eval_run_record(
823 &path,
824 args.compare.as_deref(),
825 args.structural_experiment.as_deref(),
826 &args.argv,
827 &llm_mock_mode,
828 )
829 }
830 },
831 Command::Repl => commands::repl::run_repl().await,
832 Command::Bench(args) => commands::bench::run(args).await,
833 Command::TestBench(args) => commands::test_bench::run(args.command).await,
834 Command::Viz(args) => commands::viz::run_viz(&args.file, args.output.as_deref()),
835 Command::Install(args) => package::install_packages(
836 args.frozen || args.locked || args.offline,
837 args.refetch.as_deref(),
838 args.offline,
839 args.json,
840 ),
841 Command::Add(args) => package::add_package_with_registry(
842 &args.name_or_spec,
843 args.alias.as_deref(),
844 args.git.as_deref(),
845 args.tag.as_deref(),
846 args.rev.as_deref(),
847 args.branch.as_deref(),
848 args.path.as_deref(),
849 args.registry.as_deref(),
850 ),
851 Command::Update(args) => {
852 package::update_packages(args.alias.as_deref(), args.all, args.json)
853 }
854 Command::Remove(args) => package::remove_package(&args.alias),
855 Command::Lock => package::lock_packages(),
856 Command::Package(args) => match args.command {
857 PackageCommand::List(list) => package::list_packages(list.json),
858 PackageCommand::Doctor(doctor) => package::doctor_packages(doctor.json),
859 PackageCommand::Search(search) => package::search_package_registry(
860 search.query.as_deref(),
861 search.registry.as_deref(),
862 search.json,
863 ),
864 PackageCommand::Info(info) => {
865 package::show_package_registry_info(&info.name, info.registry.as_deref(), info.json)
866 }
867 PackageCommand::Check(check) => {
868 package::check_package(check.package.as_deref(), check.json)
869 }
870 PackageCommand::Pack(pack) => package::pack_package(
871 pack.package.as_deref(),
872 pack.output.as_deref(),
873 pack.dry_run,
874 pack.json,
875 ),
876 PackageCommand::Docs(docs) => package::generate_package_docs(
877 docs.package.as_deref(),
878 docs.output.as_deref(),
879 docs.check,
880 ),
881 PackageCommand::Cache(cache) => match cache.command {
882 PackageCacheCommand::List => package::list_package_cache(),
883 PackageCacheCommand::Clean(clean) => package::clean_package_cache(clean.all),
884 PackageCacheCommand::Verify(verify) => {
885 package::verify_package_cache(verify.materialized)
886 }
887 },
888 PackageCommand::Outdated(args) => package::outdated_packages(
889 args.refresh,
890 args.remote,
891 args.registry.as_deref(),
892 args.json,
893 ),
894 PackageCommand::Audit(args) => {
895 package::audit_packages(args.registry.as_deref(), args.skip_materialized, args.json)
896 }
897 PackageCommand::Artifacts(args) => match args.command {
898 PackageArtifactsCommand::Manifest(manifest) => {
899 package::artifacts_manifest(manifest.output.as_deref())
900 }
901 PackageArtifactsCommand::Check(check) => {
902 package::artifacts_check(&check.manifest, check.json)
903 }
904 },
905 },
906 Command::Publish(args) => package::publish_package(
907 args.package.as_deref(),
908 args.dry_run,
909 args.registry.as_deref(),
910 args.json,
911 ),
912 Command::MergeCaptain(args) => match args.command {
913 MergeCaptainCommand::Run(run) => {
914 let code = commands::merge_captain::run_driver(&run);
915 if code != 0 {
916 process::exit(code);
917 }
918 }
919 MergeCaptainCommand::Ladder(ladder) => {
920 let code = commands::merge_captain::run_ladder(&ladder);
921 if code != 0 {
922 process::exit(code);
923 }
924 }
925 MergeCaptainCommand::Iterate(iterate) => {
926 let code = commands::merge_captain::run_iterate(&iterate);
927 if code != 0 {
928 process::exit(code);
929 }
930 }
931 MergeCaptainCommand::Audit(audit) => {
932 let code = commands::merge_captain::run_audit(&audit);
933 if code != 0 {
934 process::exit(code);
935 }
936 }
937 MergeCaptainCommand::Mock(mock) => {
938 let code = match mock {
939 MergeCaptainMockCommand::Init(args) => {
940 commands::merge_captain_mock::run_init(&args)
941 }
942 MergeCaptainMockCommand::Step(args) => {
943 commands::merge_captain_mock::run_step(&args)
944 }
945 MergeCaptainMockCommand::Status(args) => {
946 commands::merge_captain_mock::run_status(&args)
947 }
948 MergeCaptainMockCommand::Serve(args) => {
949 commands::merge_captain_mock::run_serve(&args).await
950 }
951 MergeCaptainMockCommand::Cleanup(args) => {
952 commands::merge_captain_mock::run_cleanup(&args)
953 }
954 MergeCaptainMockCommand::Scenarios => {
955 commands::merge_captain_mock::run_scenarios()
956 }
957 };
958 if code != 0 {
959 process::exit(code);
960 }
961 }
962 },
963 Command::Persona(args) => match args.command {
964 PersonaCommand::New(new) => {
965 if let Err(error) = commands::persona_scaffold::run_new(&new) {
966 eprintln!("error: {error}");
967 process::exit(1);
968 }
969 }
970 PersonaCommand::Doctor(doctor) => {
971 if let Err(error) =
972 commands::persona_doctor::run_doctor(args.manifest.as_deref(), &doctor).await
973 {
974 eprintln!("error: {error}");
975 process::exit(1);
976 }
977 }
978 PersonaCommand::Check(check) => {
979 commands::persona::run_check(args.manifest.as_deref(), &check)
980 }
981 PersonaCommand::List(list) => {
982 commands::persona::run_list(args.manifest.as_deref(), &list)
983 }
984 PersonaCommand::Inspect(inspect) => {
985 commands::persona::run_inspect(args.manifest.as_deref(), &inspect)
986 }
987 PersonaCommand::Status(status) => {
988 if let Err(error) = commands::persona::run_status(
989 args.manifest.as_deref(),
990 &args.state_dir,
991 &status,
992 )
993 .await
994 {
995 eprintln!("error: {error}");
996 process::exit(1);
997 }
998 }
999 PersonaCommand::Pause(control) => {
1000 if let Err(error) = commands::persona::run_pause(
1001 args.manifest.as_deref(),
1002 &args.state_dir,
1003 &control,
1004 )
1005 .await
1006 {
1007 eprintln!("error: {error}");
1008 process::exit(1);
1009 }
1010 }
1011 PersonaCommand::Resume(control) => {
1012 if let Err(error) = commands::persona::run_resume(
1013 args.manifest.as_deref(),
1014 &args.state_dir,
1015 &control,
1016 )
1017 .await
1018 {
1019 eprintln!("error: {error}");
1020 process::exit(1);
1021 }
1022 }
1023 PersonaCommand::Disable(control) => {
1024 if let Err(error) = commands::persona::run_disable(
1025 args.manifest.as_deref(),
1026 &args.state_dir,
1027 &control,
1028 )
1029 .await
1030 {
1031 eprintln!("error: {error}");
1032 process::exit(1);
1033 }
1034 }
1035 PersonaCommand::Tick(tick) => {
1036 if let Err(error) =
1037 commands::persona::run_tick(args.manifest.as_deref(), &args.state_dir, &tick)
1038 .await
1039 {
1040 eprintln!("error: {error}");
1041 process::exit(1);
1042 }
1043 }
1044 PersonaCommand::Trigger(trigger) => {
1045 if let Err(error) = commands::persona::run_trigger(
1046 args.manifest.as_deref(),
1047 &args.state_dir,
1048 &trigger,
1049 )
1050 .await
1051 {
1052 eprintln!("error: {error}");
1053 process::exit(1);
1054 }
1055 }
1056 PersonaCommand::Spend(spend) => {
1057 if let Err(error) =
1058 commands::persona::run_spend(args.manifest.as_deref(), &args.state_dir, &spend)
1059 .await
1060 {
1061 eprintln!("error: {error}");
1062 process::exit(1);
1063 }
1064 }
1065 PersonaCommand::Supervision(supervision) => match supervision.command {
1066 PersonaSupervisionCommand::Tail(tail) => {
1067 if let Err(error) = commands::persona_supervision::run_tail(
1068 args.manifest.as_deref(),
1069 &args.state_dir,
1070 &tail,
1071 )
1072 .await
1073 {
1074 eprintln!("error: {error}");
1075 process::exit(1);
1076 }
1077 }
1078 },
1079 },
1080 Command::ModelInfo(args) => {
1081 if !print_model_info(&args).await {
1082 process::exit(1);
1083 }
1084 }
1085 Command::ProviderCatalog(args) => print_provider_catalog(args.available_only),
1086 Command::ProviderReady(args) => {
1087 run_provider_ready(
1088 &args.provider,
1089 args.model.as_deref(),
1090 args.base_url.as_deref(),
1091 args.json,
1092 )
1093 .await
1094 }
1095 Command::ProviderProbe(args) => commands::provider::run_provider_probe(args).await,
1096 Command::ProviderToolProbe(args) => commands::provider::run_provider_tool_probe(args).await,
1097 Command::Skills(args) => match args.command {
1098 SkillsCommand::List(list) => commands::skills::run_list(&list),
1099 SkillsCommand::Inspect(inspect) => commands::skills::run_inspect(&inspect),
1100 SkillsCommand::Match(matcher) => commands::skills::run_match(&matcher),
1101 SkillsCommand::Install(install) => commands::skills::run_install(&install),
1102 SkillsCommand::New(new_args) => commands::skills::run_new(&new_args),
1103 },
1104 Command::Tool(args) => match args.command {
1105 ToolCommand::New(new_args) => {
1106 if let Err(error) = commands::tool::run_new(&new_args) {
1107 eprintln!("error: {error}");
1108 process::exit(1);
1109 }
1110 }
1111 },
1112 Command::DumpHighlightKeywords(args) => {
1113 commands::dump_highlight_keywords::run(&args.output, args.check);
1114 }
1115 Command::DumpTriggerQuickref(args) => {
1116 commands::dump_trigger_quickref::run(&args.output, args.check);
1117 }
1118 Command::DumpConnectorMatrix(args) => {
1119 commands::check::connector_matrix::run_docs(&args.output, &args.sources, args.check);
1120 }
1121 Command::DumpProtocolArtifacts(args) => {
1122 commands::dump_protocol_artifacts::run(&args.output_dir, args.check);
1123 }
1124 }
1125}
1126
1127fn run_profile_options(args: &cli::ProfileArgs) -> commands::run::RunProfileOptions {
1128 commands::run::RunProfileOptions {
1129 text: args.text,
1130 json_path: args.json_path.clone(),
1131 }
1132}
1133
1134fn print_completions(shell: CompletionShell) {
1135 let mut command = Cli::command();
1136 let shell = clap_complete::Shell::from(shell);
1137 clap_complete::generate(shell, &mut command, "harn", &mut std::io::stdout());
1138}
1139
1140fn normalize_serve_args(mut raw_args: Vec<String>) -> Vec<String> {
1141 if raw_args.len() > 2
1142 && raw_args.get(1).is_some_and(|arg| arg == "serve")
1143 && !matches!(
1144 raw_args.get(2).map(String::as_str),
1145 Some("acp" | "a2a" | "api" | "mcp" | "-h" | "--help")
1146 )
1147 {
1148 raw_args.insert(2, "a2a".to_string());
1149 }
1150 raw_args
1151}
1152
1153fn print_version() {
1154 println!(
1155 r#"
1156 ╱▔▔╲
1157 ╱ ╲ harn v{}
1158 │ ◆ │ the agent harness language
1159 │ │
1160 ╰──╯╱
1161 ╱╱
1162"#,
1163 env!("CARGO_PKG_VERSION")
1164 );
1165}
1166
1167async fn print_model_info(args: &ModelInfoArgs) -> bool {
1168 let resolved = harn_vm::llm_config::resolve_model_info(&args.model);
1169 let api_key_result = harn_vm::llm::resolve_api_key(&resolved.provider);
1170 let api_key_set = api_key_result.is_ok();
1171 let api_key = api_key_result.unwrap_or_default();
1172 let context_window =
1173 harn_vm::llm::fetch_provider_max_context(&resolved.provider, &resolved.id, &api_key).await;
1174 let readiness = local_openai_readiness(&resolved.provider, &resolved.id, &api_key).await;
1175 let catalog = harn_vm::llm_config::model_catalog_entry(&resolved.id);
1176 let runtime_context_window = catalog
1177 .as_ref()
1178 .and_then(|entry| entry.runtime_context_window);
1179 let capabilities = harn_vm::llm::capabilities::lookup(&resolved.provider, &resolved.id);
1180 let mut payload = serde_json::json!({
1181 "alias": args.model,
1182 "id": resolved.id,
1183 "provider": resolved.provider,
1184 "resolved_alias": resolved.alias,
1185 "tool_format": resolved.tool_format,
1186 "tier": resolved.tier,
1187 "api_key_set": api_key_set,
1188 "context_window": context_window,
1189 "runtime_context_window": runtime_context_window,
1190 "readiness": readiness,
1191 "catalog": catalog,
1192 "capabilities": {
1193 "native_tools": capabilities.native_tools,
1194 "defer_loading": capabilities.defer_loading,
1195 "tool_search": capabilities.tool_search,
1196 "max_tools": capabilities.max_tools,
1197 "prompt_caching": capabilities.prompt_caching,
1198 "vision": capabilities.vision,
1199 "vision_supported": capabilities.vision_supported,
1200 "audio": capabilities.audio,
1201 "pdf": capabilities.pdf,
1202 "files_api_supported": capabilities.files_api_supported,
1203 "json_schema": capabilities.json_schema,
1204 "prefers_xml_scaffolding": capabilities.prefers_xml_scaffolding,
1205 "prefers_markdown_scaffolding": capabilities.prefers_markdown_scaffolding,
1206 "structured_output_mode": capabilities.structured_output_mode,
1207 "supports_assistant_prefill": capabilities.supports_assistant_prefill,
1208 "prefers_role_developer": capabilities.prefers_role_developer,
1209 "prefers_xml_tools": capabilities.prefers_xml_tools,
1210 "thinking": !capabilities.thinking_modes.is_empty(),
1211 "thinking_block_style": capabilities.thinking_block_style,
1212 "thinking_modes": capabilities.thinking_modes,
1213 "interleaved_thinking_supported": capabilities.interleaved_thinking_supported,
1214 "anthropic_beta_features": capabilities.anthropic_beta_features,
1215 "preserve_thinking": capabilities.preserve_thinking,
1216 "server_parser": capabilities.server_parser,
1217 "honors_chat_template_kwargs": capabilities.honors_chat_template_kwargs,
1218 "recommended_endpoint": capabilities.recommended_endpoint,
1219 "text_tool_wire_format_supported": capabilities.text_tool_wire_format_supported,
1220 },
1221 "qc_default_model": harn_vm::llm_config::qc_default_model(&resolved.provider),
1222 });
1223
1224 let should_verify = args.verify || args.warm;
1225 let mut ok = true;
1226 if should_verify {
1227 if resolved.provider == "ollama" {
1228 let mut readiness = harn_vm::llm::OllamaReadinessOptions::new(resolved.id.clone());
1229 readiness.warm = args.warm;
1230 readiness.observe_loaded = true;
1231 readiness.keep_alive = args
1232 .keep_alive
1233 .as_deref()
1234 .and_then(harn_vm::llm::normalize_ollama_keep_alive);
1235 let result = harn_vm::llm::ollama_readiness(readiness).await;
1236 ok = result.valid;
1237 payload["readiness"] = serde_json::to_value(&result).unwrap_or_else(|error| {
1238 serde_json::json!({
1239 "valid": false,
1240 "status": "serialization_error",
1241 "message": format!("failed to serialize readiness result: {error}"),
1242 })
1243 });
1244 } else {
1245 ok = false;
1246 payload["readiness"] = serde_json::json!({
1247 "valid": false,
1248 "status": "unsupported_provider",
1249 "message": format!(
1250 "model-info --verify is only supported for Ollama models; resolved provider is '{}'",
1251 resolved.provider
1252 ),
1253 "provider": resolved.provider,
1254 });
1255 }
1256 }
1257
1258 println!(
1259 "{}",
1260 serde_json::to_string(&payload).unwrap_or_else(|error| {
1261 command_error(&format!("failed to serialize model info: {error}"))
1262 })
1263 );
1264 ok
1265}
1266
1267async fn local_openai_readiness(
1268 provider: &str,
1269 model: &str,
1270 api_key: &str,
1271) -> Option<serde_json::Value> {
1272 let def = harn_vm::llm_config::provider_config(provider)?;
1273 if def.auth_style != "none" || !harn_vm::llm::supports_model_readiness_probe(&def) {
1274 return None;
1275 }
1276 let readiness = harn_vm::llm::probe_openai_compatible_model(provider, model, api_key).await;
1277 Some(serde_json::json!({
1278 "valid": readiness.valid,
1279 "category": readiness.category,
1280 "message": readiness.message,
1281 "provider": readiness.provider,
1282 "model": readiness.model,
1283 "url": readiness.url,
1284 "status": readiness.status,
1285 "available_models": readiness.available_models,
1286 }))
1287}
1288
1289fn print_provider_catalog(available_only: bool) {
1290 let provider_names = if available_only {
1291 harn_vm::llm_config::available_provider_names()
1292 } else {
1293 harn_vm::llm_config::provider_names()
1294 };
1295 let providers: Vec<_> = provider_names
1296 .into_iter()
1297 .filter_map(|name| {
1298 harn_vm::llm_config::provider_config(&name).map(|def| {
1299 serde_json::json!({
1300 "name": name,
1301 "display_name": def.display_name,
1302 "icon": def.icon,
1303 "base_url": harn_vm::llm_config::resolve_base_url(&def),
1304 "base_url_env": def.base_url_env,
1305 "auth_style": def.auth_style,
1306 "auth_envs": harn_vm::llm_config::auth_env_names(&def.auth_env),
1307 "auth_available": harn_vm::llm_config::provider_key_available(&name),
1308 "features": def.features,
1309 "cost_per_1k_in": def.cost_per_1k_in,
1310 "cost_per_1k_out": def.cost_per_1k_out,
1311 "latency_p50_ms": def.latency_p50_ms,
1312 })
1313 })
1314 })
1315 .collect();
1316 let models: Vec<_> = harn_vm::llm_config::model_catalog_entries()
1317 .into_iter()
1318 .map(|(id, model)| {
1319 serde_json::json!({
1320 "id": id,
1321 "name": model.name,
1322 "provider": model.provider,
1323 "context_window": model.context_window,
1324 "runtime_context_window": model.runtime_context_window,
1325 "stream_timeout": model.stream_timeout,
1326 "capabilities": model.capabilities,
1327 "pricing": model.pricing,
1328 })
1329 })
1330 .collect();
1331 let aliases: Vec<_> = harn_vm::llm_config::alias_entries()
1332 .into_iter()
1333 .map(|(name, alias)| {
1334 serde_json::json!({
1335 "name": name,
1336 "id": alias.id,
1337 "provider": alias.provider,
1338 "tool_format": alias.tool_format,
1339 "tool_calling": harn_vm::llm_config::alias_tool_calling_entry(&name),
1340 })
1341 })
1342 .collect();
1343 let payload = serde_json::json!({
1344 "providers": providers,
1345 "known_model_names": harn_vm::llm_config::known_model_names(),
1346 "available_providers": harn_vm::llm_config::available_provider_names(),
1347 "aliases": aliases,
1348 "models": models,
1349 "qc_defaults": harn_vm::llm_config::qc_defaults(),
1350 });
1351 println!(
1352 "{}",
1353 serde_json::to_string(&payload).unwrap_or_else(|error| {
1354 command_error(&format!("failed to serialize provider catalog: {error}"))
1355 })
1356 );
1357}
1358
1359async fn run_provider_ready(
1360 provider: &str,
1361 model: Option<&str>,
1362 base_url: Option<&str>,
1363 json: bool,
1364) {
1365 let readiness =
1366 harn_vm::llm::readiness::probe_provider_readiness(provider, model, base_url).await;
1367 if json {
1368 match serde_json::to_string_pretty(&readiness) {
1369 Ok(payload) => println!("{payload}"),
1370 Err(error) => command_error(&format!("failed to serialize readiness result: {error}")),
1371 }
1372 } else if readiness.ok {
1373 println!("{}", readiness.message);
1374 } else {
1375 eprintln!("{}", readiness.message);
1376 }
1377 if !readiness.ok {
1378 process::exit(1);
1379 }
1380}
1381
1382fn command_error(message: &str) -> ! {
1383 Cli::command()
1384 .error(ErrorKind::ValueValidation, message)
1385 .exit()
1386}
1387
1388fn verify_provenance_receipt(path: &str, json: bool) -> Result<(), String> {
1389 let raw =
1390 fs::read_to_string(path).map_err(|error| format!("failed to read {path}: {error}"))?;
1391 let receipt: harn_vm::ProvenanceReceipt = serde_json::from_str(&raw)
1392 .map_err(|error| format!("failed to parse provenance receipt {path}: {error}"))?;
1393 let report = harn_vm::verify_receipt(&receipt);
1394 if json {
1395 println!(
1396 "{}",
1397 serde_json::to_string_pretty(&report).map_err(|error| error.to_string())?
1398 );
1399 } else if report.verified {
1400 println!(
1401 "verified receipt={} events={} receipt_hash={} event_root_hash={}",
1402 report.receipt_id.unwrap_or_else(|| "-".to_string()),
1403 report.event_count,
1404 report.receipt_hash.unwrap_or_else(|| "-".to_string()),
1405 report.event_root_hash.unwrap_or_else(|| "-".to_string())
1406 );
1407 } else {
1408 println!(
1409 "failed receipt={} events={}",
1410 report.receipt_id.unwrap_or_else(|| "-".to_string()),
1411 report.event_count
1412 );
1413 for error in &report.errors {
1414 println!(" {error}");
1415 }
1416 return Err("provenance receipt verification failed".to_string());
1417 }
1418 Ok(())
1419}
1420
1421fn load_run_record_or_exit(path: &Path) -> harn_vm::orchestration::RunRecord {
1422 match harn_vm::orchestration::load_run_record(path) {
1423 Ok(run) => run,
1424 Err(error) => {
1425 eprintln!("Failed to load run record: {error}");
1426 process::exit(1);
1427 }
1428 }
1429}
1430
1431fn load_eval_suite_manifest_or_exit(path: &Path) -> harn_vm::orchestration::EvalSuiteManifest {
1432 harn_vm::orchestration::load_eval_suite_manifest(path).unwrap_or_else(|error| {
1433 eprintln!("Failed to load eval manifest {}: {error}", path.display());
1434 process::exit(1);
1435 })
1436}
1437
1438fn load_eval_pack_manifest_or_exit(path: &Path) -> harn_vm::orchestration::EvalPackManifest {
1439 harn_vm::orchestration::load_eval_pack_manifest(path).unwrap_or_else(|error| {
1440 eprintln!("Failed to load eval pack {}: {error}", path.display());
1441 process::exit(1);
1442 })
1443}
1444
1445fn load_persona_eval_ladder_manifest_or_exit(
1446 path: &Path,
1447) -> harn_vm::orchestration::PersonaEvalLadderManifest {
1448 harn_vm::orchestration::load_persona_eval_ladder_manifest(path).unwrap_or_else(|error| {
1449 eprintln!(
1450 "Failed to load persona eval ladder {}: {error}",
1451 path.display()
1452 );
1453 process::exit(1);
1454 })
1455}
1456
1457fn file_looks_like_eval_manifest(path: &Path) -> bool {
1458 if path.file_name().and_then(|name| name.to_str()) == Some("harn.eval.toml") {
1459 return true;
1460 }
1461 if path.extension().and_then(|ext| ext.to_str()) == Some("toml") {
1462 let Ok(content) = fs::read_to_string(path) else {
1463 return false;
1464 };
1465 return toml::from_str::<harn_vm::orchestration::EvalPackManifest>(&content)
1466 .is_ok_and(|manifest| !manifest.cases.is_empty() || !manifest.ladders.is_empty());
1467 }
1468 let Ok(content) = fs::read_to_string(path) else {
1469 return false;
1470 };
1471 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1472 return false;
1473 };
1474 json.get("_type").and_then(|value| value.as_str()) == Some("eval_suite_manifest")
1475 || json.get("cases").is_some()
1476}
1477
1478fn file_looks_like_eval_pack_manifest(path: &Path) -> bool {
1479 if path.file_name().and_then(|name| name.to_str()) == Some("harn.eval.toml") {
1480 return true;
1481 }
1482 if path.extension().and_then(|ext| ext.to_str()) == Some("toml") {
1483 return file_looks_like_eval_manifest(path);
1484 }
1485 let Ok(content) = fs::read_to_string(path) else {
1486 return false;
1487 };
1488 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1489 return false;
1490 };
1491 json.get("version").is_some()
1492 && (json.get("cases").is_some() || json.get("ladders").is_some())
1493 && json.get("_type").and_then(|value| value.as_str()) != Some("eval_suite_manifest")
1494}
1495
1496fn file_looks_like_persona_eval_ladder_manifest(path: &Path) -> bool {
1497 let Ok(content) = fs::read_to_string(path) else {
1498 return false;
1499 };
1500 if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
1501 let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
1502 return false;
1503 };
1504 return json.get("_type").and_then(|value| value.as_str())
1505 == Some("persona_eval_ladder_manifest")
1506 || json.get("timeout_tiers").is_some()
1507 || json.get("timeout-tiers").is_some();
1508 }
1509 toml::from_str::<harn_vm::orchestration::PersonaEvalLadderManifest>(&content).is_ok_and(
1510 |manifest| {
1511 manifest
1512 .type_name
1513 .eq_ignore_ascii_case("persona_eval_ladder_manifest")
1514 || (!manifest.timeout_tiers.is_empty() && manifest.backend.path.is_some())
1515 },
1516 )
1517}
1518
1519fn collect_run_record_paths(path: &str) -> Vec<PathBuf> {
1520 let path = Path::new(path);
1521 if path.is_file() {
1522 return vec![path.to_path_buf()];
1523 }
1524 if path.is_dir() {
1525 let mut entries: Vec<PathBuf> = fs::read_dir(path)
1526 .unwrap_or_else(|error| {
1527 eprintln!("Failed to read run directory {}: {error}", path.display());
1528 process::exit(1);
1529 })
1530 .filter_map(|entry| entry.ok().map(|entry| entry.path()))
1531 .filter(|entry| entry.extension().and_then(|ext| ext.to_str()) == Some("json"))
1532 .collect();
1533 entries.sort();
1534 return entries;
1535 }
1536 eprintln!("Run path does not exist: {}", path.display());
1537 process::exit(1);
1538}
1539
1540fn print_run_diff(diff: &harn_vm::orchestration::RunDiffReport) {
1541 println!(
1542 "Diff: {} -> {} [{} -> {}]",
1543 diff.left_run_id, diff.right_run_id, diff.left_status, diff.right_status
1544 );
1545 println!("Identical: {}", diff.identical);
1546 println!("Stage diffs: {}", diff.stage_diffs.len());
1547 println!("Tool diffs: {}", diff.tool_diffs.len());
1548 println!("Observability diffs: {}", diff.observability_diffs.len());
1549 println!("Transition delta: {}", diff.transition_count_delta);
1550 println!("Artifact delta: {}", diff.artifact_count_delta);
1551 println!("Checkpoint delta: {}", diff.checkpoint_count_delta);
1552 for stage in &diff.stage_diffs {
1553 println!("- {} [{}]", stage.node_id, stage.change);
1554 for detail in &stage.details {
1555 println!(" {}", detail);
1556 }
1557 }
1558 for tool in &diff.tool_diffs {
1559 println!("- tool {} [{}]", tool.tool_name, tool.args_hash);
1560 println!(" left: {:?}", tool.left_result);
1561 println!(" right: {:?}", tool.right_result);
1562 }
1563 for item in &diff.observability_diffs {
1564 println!("- {} [{}]", item.label, item.section);
1565 for detail in &item.details {
1566 println!(" {}", detail);
1567 }
1568 }
1569}
1570
1571fn inspect_run_record(path: &str, compare: Option<&str>) {
1572 let run = load_run_record_or_exit(Path::new(path));
1573 println!("Run: {}", run.id);
1574 println!(
1575 "Workflow: {}",
1576 run.workflow_name
1577 .clone()
1578 .unwrap_or_else(|| run.workflow_id.clone())
1579 );
1580 println!("Status: {}", run.status);
1581 println!("Task: {}", run.task);
1582 println!("Stages: {}", run.stages.len());
1583 println!("Artifacts: {}", run.artifacts.len());
1584 println!("Transitions: {}", run.transitions.len());
1585 println!("Checkpoints: {}", run.checkpoints.len());
1586 println!("HITL questions: {}", run.hitl_questions.len());
1587 if let Some(observability) = &run.observability {
1588 println!("Planner rounds: {}", observability.planner_rounds.len());
1589 println!("Research facts: {}", observability.research_fact_count);
1590 println!("Workers: {}", observability.worker_lineage.len());
1591 println!(
1592 "Action graph: {} nodes / {} edges",
1593 observability.action_graph_nodes.len(),
1594 observability.action_graph_edges.len()
1595 );
1596 println!(
1597 "Transcript pointers: {}",
1598 observability.transcript_pointers.len()
1599 );
1600 println!("Daemon events: {}", observability.daemon_events.len());
1601 }
1602 if let Some(parent_worker_id) = run
1603 .metadata
1604 .get("parent_worker_id")
1605 .and_then(|value| value.as_str())
1606 {
1607 println!("Parent worker: {}", parent_worker_id);
1608 }
1609 if let Some(parent_stage_id) = run
1610 .metadata
1611 .get("parent_stage_id")
1612 .and_then(|value| value.as_str())
1613 {
1614 println!("Parent stage: {}", parent_stage_id);
1615 }
1616 if run
1617 .metadata
1618 .get("delegated")
1619 .and_then(|value| value.as_bool())
1620 .unwrap_or(false)
1621 {
1622 println!("Delegated: true");
1623 }
1624 println!(
1625 "Pending nodes: {}",
1626 if run.pending_nodes.is_empty() {
1627 "-".to_string()
1628 } else {
1629 run.pending_nodes.join(", ")
1630 }
1631 );
1632 println!(
1633 "Replay fixture: {}",
1634 if run.replay_fixture.is_some() {
1635 "embedded"
1636 } else {
1637 "derived"
1638 }
1639 );
1640 for stage in &run.stages {
1641 let worker = stage.metadata.get("worker");
1642 let worker_suffix = worker
1643 .and_then(|value| value.get("name"))
1644 .and_then(|value| value.as_str())
1645 .map(|name| format!(" worker={name}"))
1646 .unwrap_or_default();
1647 println!(
1648 "- {} [{}] status={} outcome={} branch={}{}",
1649 stage.node_id,
1650 stage.kind,
1651 stage.status,
1652 stage.outcome,
1653 stage.branch.clone().unwrap_or_else(|| "-".to_string()),
1654 worker_suffix,
1655 );
1656 if let Some(worker) = worker {
1657 if let Some(worker_id) = worker.get("id").and_then(|value| value.as_str()) {
1658 println!(" worker_id: {}", worker_id);
1659 }
1660 if let Some(child_run_id) = worker.get("child_run_id").and_then(|value| value.as_str())
1661 {
1662 println!(" child_run_id: {}", child_run_id);
1663 }
1664 if let Some(child_run_path) = worker
1665 .get("child_run_path")
1666 .and_then(|value| value.as_str())
1667 {
1668 println!(" child_run_path: {}", child_run_path);
1669 }
1670 }
1671 }
1672 if let Some(observability) = &run.observability {
1673 for round in &observability.planner_rounds {
1674 println!(
1675 "- planner {} iterations={} llm_calls={} tools={} research_facts={}",
1676 round.node_id,
1677 round.iteration_count,
1678 round.llm_call_count,
1679 round.tool_execution_count,
1680 round.research_facts.len()
1681 );
1682 }
1683 for pointer in &observability.transcript_pointers {
1684 println!(
1685 "- transcript {} [{}] available={} {}",
1686 pointer.label,
1687 pointer.kind,
1688 pointer.available,
1689 pointer
1690 .path
1691 .clone()
1692 .unwrap_or_else(|| pointer.location.clone())
1693 );
1694 }
1695 for event in &observability.daemon_events {
1696 println!(
1697 "- daemon {} [{:?}] at {}",
1698 event.name, event.kind, event.timestamp
1699 );
1700 println!(" id: {}", event.daemon_id);
1701 println!(" persist_path: {}", event.persist_path);
1702 if let Some(summary) = &event.payload_summary {
1703 println!(" payload: {}", summary);
1704 }
1705 }
1706 }
1707 if let Some(compare_path) = compare {
1708 let baseline = load_run_record_or_exit(Path::new(compare_path));
1709 print_run_diff(&harn_vm::orchestration::diff_run_records(&baseline, &run));
1710 }
1711}
1712
1713fn replay_run_record(path: &str) {
1714 let run = load_run_record_or_exit(Path::new(path));
1715 println!("Replay: {}", run.id);
1716 for stage in &run.stages {
1717 println!(
1718 "[{}] status={} outcome={} branch={}",
1719 stage.node_id,
1720 stage.status,
1721 stage.outcome,
1722 stage.branch.clone().unwrap_or_else(|| "-".to_string())
1723 );
1724 if let Some(text) = &stage.visible_text {
1725 println!(" visible: {}", text);
1726 }
1727 if let Some(verification) = &stage.verification {
1728 println!(" verification: {}", verification);
1729 }
1730 }
1731 if let Some(transcript) = &run.transcript {
1732 println!(
1733 "Transcript events persisted: {}",
1734 transcript["events"]
1735 .as_array()
1736 .map(|v| v.len())
1737 .unwrap_or(0)
1738 );
1739 }
1740 let fixture = run
1741 .replay_fixture
1742 .clone()
1743 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1744 let report = harn_vm::orchestration::evaluate_run_against_fixture(&run, &fixture);
1745 println!(
1746 "Embedded replay fixture: {}",
1747 if report.pass { "PASS" } else { "FAIL" }
1748 );
1749 for transition in &run.transitions {
1750 println!(
1751 "transition {} -> {} ({})",
1752 transition
1753 .from_node_id
1754 .clone()
1755 .unwrap_or_else(|| "start".to_string()),
1756 transition.to_node_id,
1757 transition
1758 .branch
1759 .clone()
1760 .unwrap_or_else(|| "default".to_string())
1761 );
1762 }
1763}
1764
1765fn eval_run_record(
1766 path: &str,
1767 compare: Option<&str>,
1768 structural_experiment: Option<&str>,
1769 argv: &[String],
1770 llm_mock_mode: &commands::run::CliLlmMockMode,
1771) {
1772 if let Some(experiment) = structural_experiment {
1773 let path_buf = PathBuf::from(path);
1774 if !path_buf.is_file() || path_buf.extension().and_then(|ext| ext.to_str()) != Some("harn")
1775 {
1776 eprintln!(
1777 "--structural-experiment currently requires a .harn pipeline path, got {}",
1778 path
1779 );
1780 process::exit(1);
1781 }
1782 if compare.is_some() {
1783 eprintln!("--compare cannot be combined with --structural-experiment");
1784 process::exit(1);
1785 }
1786 if matches!(llm_mock_mode, commands::run::CliLlmMockMode::Record { .. }) {
1787 eprintln!("--llm-mock-record cannot be combined with --structural-experiment");
1788 process::exit(1);
1789 }
1790 let path_buf = fs::canonicalize(&path_buf).unwrap_or_else(|error| {
1791 command_error(&format!(
1792 "failed to canonicalize structural eval pipeline {}: {error}",
1793 path_buf.display()
1794 ))
1795 });
1796 run_structural_experiment_eval(&path_buf, experiment, argv, llm_mock_mode);
1797 return;
1798 }
1799
1800 let path_buf = PathBuf::from(path);
1801 if path_buf.is_file() && file_looks_like_persona_eval_ladder_manifest(&path_buf) {
1802 if compare.is_some() {
1803 eprintln!("--compare is not supported with persona eval ladder manifests");
1804 process::exit(1);
1805 }
1806 let manifest = load_persona_eval_ladder_manifest_or_exit(&path_buf);
1807 let report =
1808 harn_vm::orchestration::run_persona_eval_ladder(&manifest).unwrap_or_else(|error| {
1809 eprintln!(
1810 "Failed to evaluate persona eval ladder {}: {error}",
1811 path_buf.display()
1812 );
1813 process::exit(1);
1814 });
1815 print_persona_ladder_report(&report);
1816 if !report.pass {
1817 process::exit(1);
1818 }
1819 return;
1820 }
1821
1822 if path_buf.is_file() && file_looks_like_eval_pack_manifest(&path_buf) {
1823 if compare.is_some() {
1824 eprintln!("--compare is not supported with eval pack manifests");
1825 process::exit(1);
1826 }
1827 let manifest = load_eval_pack_manifest_or_exit(&path_buf);
1828 let report = harn_vm::orchestration::evaluate_eval_pack_manifest(&manifest).unwrap_or_else(
1829 |error| {
1830 eprintln!(
1831 "Failed to evaluate eval pack {}: {error}",
1832 path_buf.display()
1833 );
1834 process::exit(1);
1835 },
1836 );
1837 print_eval_pack_report(&report);
1838 if !report.pass {
1839 process::exit(1);
1840 }
1841 return;
1842 }
1843
1844 if path_buf.is_file() && file_looks_like_eval_manifest(&path_buf) {
1845 if compare.is_some() {
1846 eprintln!("--compare is not supported with eval suite manifests");
1847 process::exit(1);
1848 }
1849 let manifest = load_eval_suite_manifest_or_exit(&path_buf);
1850 let suite = harn_vm::orchestration::evaluate_run_suite_manifest(&manifest).unwrap_or_else(
1851 |error| {
1852 eprintln!(
1853 "Failed to evaluate manifest {}: {error}",
1854 path_buf.display()
1855 );
1856 process::exit(1);
1857 },
1858 );
1859 println!(
1860 "{} {} passed, {} failed, {} total",
1861 if suite.pass { "PASS" } else { "FAIL" },
1862 suite.passed,
1863 suite.failed,
1864 suite.total
1865 );
1866 for case in &suite.cases {
1867 println!(
1868 "- {} [{}] {}",
1869 case.label.clone().unwrap_or_else(|| case.run_id.clone()),
1870 case.workflow_id,
1871 if case.pass { "PASS" } else { "FAIL" }
1872 );
1873 if let Some(path) = &case.source_path {
1874 println!(" path: {}", path);
1875 }
1876 if let Some(comparison) = &case.comparison {
1877 println!(" baseline identical: {}", comparison.identical);
1878 if !comparison.identical {
1879 println!(
1880 " baseline status: {} -> {}",
1881 comparison.left_status, comparison.right_status
1882 );
1883 }
1884 }
1885 for failure in &case.failures {
1886 println!(" {}", failure);
1887 }
1888 }
1889 if !suite.pass {
1890 process::exit(1);
1891 }
1892 return;
1893 }
1894
1895 let paths = collect_run_record_paths(path);
1896 if paths.len() > 1 {
1897 let mut cases = Vec::new();
1898 for path in &paths {
1899 let run = load_run_record_or_exit(path);
1900 let fixture = run
1901 .replay_fixture
1902 .clone()
1903 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1904 cases.push((run, fixture, Some(path.display().to_string())));
1905 }
1906 let suite = harn_vm::orchestration::evaluate_run_suite(cases);
1907 println!(
1908 "{} {} passed, {} failed, {} total",
1909 if suite.pass { "PASS" } else { "FAIL" },
1910 suite.passed,
1911 suite.failed,
1912 suite.total
1913 );
1914 for case in &suite.cases {
1915 println!(
1916 "- {} [{}] {}",
1917 case.run_id,
1918 case.workflow_id,
1919 if case.pass { "PASS" } else { "FAIL" }
1920 );
1921 if let Some(path) = &case.source_path {
1922 println!(" path: {}", path);
1923 }
1924 if let Some(comparison) = &case.comparison {
1925 println!(" baseline identical: {}", comparison.identical);
1926 }
1927 for failure in &case.failures {
1928 println!(" {}", failure);
1929 }
1930 }
1931 if !suite.pass {
1932 process::exit(1);
1933 }
1934 return;
1935 }
1936
1937 let run = load_run_record_or_exit(&paths[0]);
1938 let fixture = run
1939 .replay_fixture
1940 .clone()
1941 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(&run));
1942 let report = harn_vm::orchestration::evaluate_run_against_fixture(&run, &fixture);
1943 println!("{}", if report.pass { "PASS" } else { "FAIL" });
1944 println!("Stages: {}", report.stage_count);
1945 if let Some(compare_path) = compare {
1946 let baseline = load_run_record_or_exit(Path::new(compare_path));
1947 print_run_diff(&harn_vm::orchestration::diff_run_records(&baseline, &run));
1948 }
1949 if !report.failures.is_empty() {
1950 for failure in &report.failures {
1951 println!("- {}", failure);
1952 }
1953 }
1954 if !report.pass {
1955 process::exit(1);
1956 }
1957}
1958
1959fn print_eval_pack_report(report: &harn_vm::orchestration::EvalPackReport) {
1960 println!(
1961 "{} {} passed, {} blocking failed, {} warning, {} informational, {} total",
1962 if report.pass { "PASS" } else { "FAIL" },
1963 report.passed,
1964 report.blocking_failed,
1965 report.warning_failed,
1966 report.informational_failed,
1967 report.total
1968 );
1969 for case in &report.cases {
1970 println!(
1971 "- {} [{}] {} ({})",
1972 case.label,
1973 case.workflow_id,
1974 if case.pass { "PASS" } else { "FAIL" },
1975 case.severity
1976 );
1977 if let Some(path) = &case.source_path {
1978 println!(" path: {}", path);
1979 }
1980 if let Some(comparison) = &case.comparison {
1981 println!(" baseline identical: {}", comparison.identical);
1982 if !comparison.identical {
1983 println!(
1984 " baseline status: {} -> {}",
1985 comparison.left_status, comparison.right_status
1986 );
1987 }
1988 }
1989 for failure in &case.failures {
1990 println!(" {}", failure);
1991 }
1992 for warning in &case.warnings {
1993 println!(" warning: {}", warning);
1994 }
1995 for item in &case.informational {
1996 println!(" info: {}", item);
1997 }
1998 }
1999 for ladder in &report.ladders {
2000 println!(
2001 "- ladder {} [{}] {} ({}) first_correct={}/{}",
2002 ladder.id,
2003 ladder.persona,
2004 if ladder.pass { "PASS" } else { "FAIL" },
2005 ladder.severity,
2006 ladder.first_correct_route.as_deref().unwrap_or("<none>"),
2007 ladder.first_correct_tier.as_deref().unwrap_or("<none>")
2008 );
2009 println!(" artifacts: {}", ladder.artifact_root);
2010 for tier in &ladder.tiers {
2011 println!(
2012 " - {} [{}] {} tools={} models={} latency={}ms cost=${:.6}",
2013 tier.timeout_tier,
2014 tier.route_id,
2015 tier.outcome,
2016 tier.tool_calls,
2017 tier.model_calls,
2018 tier.latency_ms,
2019 tier.cost_usd
2020 );
2021 for reason in &tier.degradation_reasons {
2022 println!(" {}", reason);
2023 }
2024 }
2025 }
2026}
2027
2028fn print_persona_ladder_report(report: &harn_vm::orchestration::PersonaEvalLadderReport) {
2029 println!(
2030 "{} ladder {} passed, {} degraded/looped, {} total",
2031 if report.pass { "PASS" } else { "FAIL" },
2032 report.passed,
2033 report.failed,
2034 report.total
2035 );
2036 println!(
2037 "first_correct: {}/{}",
2038 report.first_correct_route.as_deref().unwrap_or("<none>"),
2039 report.first_correct_tier.as_deref().unwrap_or("<none>")
2040 );
2041 println!("artifacts: {}", report.artifact_root);
2042 for tier in &report.tiers {
2043 println!(
2044 "- {} [{}] {} tools={} models={} latency={}ms cost=${:.6}",
2045 tier.timeout_tier,
2046 tier.route_id,
2047 tier.outcome,
2048 tier.tool_calls,
2049 tier.model_calls,
2050 tier.latency_ms,
2051 tier.cost_usd
2052 );
2053 for reason in &tier.degradation_reasons {
2054 println!(" {}", reason);
2055 }
2056 }
2057}
2058
2059fn run_package_evals() {
2060 let paths = package::load_package_eval_pack_paths(None).unwrap_or_else(|error| {
2061 eprintln!("{error}");
2062 process::exit(1);
2063 });
2064 let mut all_pass = true;
2065 for path in &paths {
2066 println!("Eval pack: {}", path.display());
2067 let manifest = load_eval_pack_manifest_or_exit(path);
2068 let report = harn_vm::orchestration::evaluate_eval_pack_manifest(&manifest).unwrap_or_else(
2069 |error| {
2070 eprintln!("Failed to evaluate eval pack {}: {error}", path.display());
2071 process::exit(1);
2072 },
2073 );
2074 print_eval_pack_report(&report);
2075 all_pass &= report.pass;
2076 }
2077 if !all_pass {
2078 process::exit(1);
2079 }
2080}
2081
2082fn run_structural_experiment_eval(
2083 path: &Path,
2084 experiment: &str,
2085 argv: &[String],
2086 llm_mock_mode: &commands::run::CliLlmMockMode,
2087) {
2088 let baseline_dir = tempfile::Builder::new()
2089 .prefix("harn-eval-baseline-")
2090 .tempdir()
2091 .unwrap_or_else(|error| {
2092 command_error(&format!("failed to create baseline tempdir: {error}"))
2093 });
2094 let variant_dir = tempfile::Builder::new()
2095 .prefix("harn-eval-variant-")
2096 .tempdir()
2097 .unwrap_or_else(|error| {
2098 command_error(&format!("failed to create variant tempdir: {error}"))
2099 });
2100
2101 let baseline = spawn_eval_pipeline_run(path, baseline_dir.path(), None, argv, llm_mock_mode);
2102 if !baseline.status.success() {
2103 relay_subprocess_failure("baseline", &baseline);
2104 }
2105
2106 let variant = spawn_eval_pipeline_run(
2107 path,
2108 variant_dir.path(),
2109 Some(experiment),
2110 argv,
2111 llm_mock_mode,
2112 );
2113 if !variant.status.success() {
2114 relay_subprocess_failure("variant", &variant);
2115 }
2116
2117 let baseline_runs = collect_structural_eval_runs(baseline_dir.path());
2118 let variant_runs = collect_structural_eval_runs(variant_dir.path());
2119 if baseline_runs.is_empty() || variant_runs.is_empty() {
2120 eprintln!(
2121 "structural eval expected workflow run records under {} and {}, but one side was empty",
2122 baseline_dir.path().display(),
2123 variant_dir.path().display()
2124 );
2125 process::exit(1);
2126 }
2127 if baseline_runs.len() != variant_runs.len() {
2128 eprintln!(
2129 "structural eval produced different run counts: baseline={} variant={}",
2130 baseline_runs.len(),
2131 variant_runs.len()
2132 );
2133 process::exit(1);
2134 }
2135
2136 let mut baseline_ok = 0usize;
2137 let mut variant_ok = 0usize;
2138 let mut any_failures = false;
2139
2140 println!("Structural experiment: {}", experiment);
2141 println!("Cases: {}", baseline_runs.len());
2142 for (baseline_run, variant_run) in baseline_runs.iter().zip(variant_runs.iter()) {
2143 let baseline_fixture = baseline_run
2144 .replay_fixture
2145 .clone()
2146 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(baseline_run));
2147 let variant_fixture = variant_run
2148 .replay_fixture
2149 .clone()
2150 .unwrap_or_else(|| harn_vm::orchestration::replay_fixture_from_run(variant_run));
2151 let baseline_report =
2152 harn_vm::orchestration::evaluate_run_against_fixture(baseline_run, &baseline_fixture);
2153 let variant_report =
2154 harn_vm::orchestration::evaluate_run_against_fixture(variant_run, &variant_fixture);
2155 let diff = harn_vm::orchestration::diff_run_records(baseline_run, variant_run);
2156 if baseline_report.pass {
2157 baseline_ok += 1;
2158 }
2159 if variant_report.pass {
2160 variant_ok += 1;
2161 }
2162 any_failures |= !baseline_report.pass || !variant_report.pass;
2163 println!(
2164 "- {} [{}]",
2165 variant_run
2166 .workflow_name
2167 .clone()
2168 .unwrap_or_else(|| variant_run.workflow_id.clone()),
2169 variant_run.task
2170 );
2171 println!(
2172 " baseline: {}",
2173 if baseline_report.pass { "PASS" } else { "FAIL" }
2174 );
2175 for failure in &baseline_report.failures {
2176 println!(" {}", failure);
2177 }
2178 println!(
2179 " variant: {}",
2180 if variant_report.pass { "PASS" } else { "FAIL" }
2181 );
2182 for failure in &variant_report.failures {
2183 println!(" {}", failure);
2184 }
2185 println!(" diff identical: {}", diff.identical);
2186 println!(" stage diffs: {}", diff.stage_diffs.len());
2187 println!(" tool diffs: {}", diff.tool_diffs.len());
2188 println!(" observability diffs: {}", diff.observability_diffs.len());
2189 }
2190
2191 println!("Baseline {} / {} passed", baseline_ok, baseline_runs.len());
2192 println!("Variant {} / {} passed", variant_ok, variant_runs.len());
2193
2194 if any_failures {
2195 process::exit(1);
2196 }
2197}
2198
2199fn spawn_eval_pipeline_run(
2200 path: &Path,
2201 run_dir: &Path,
2202 structural_experiment: Option<&str>,
2203 argv: &[String],
2204 llm_mock_mode: &commands::run::CliLlmMockMode,
2205) -> std::process::Output {
2206 let exe = env::current_exe().unwrap_or_else(|error| {
2207 command_error(&format!("failed to resolve current executable: {error}"))
2208 });
2209 let mut command = std::process::Command::new(exe);
2210 command.current_dir(path.parent().unwrap_or_else(|| Path::new(".")));
2211 command.arg("run");
2212 match llm_mock_mode {
2213 commands::run::CliLlmMockMode::Off => {}
2214 commands::run::CliLlmMockMode::Replay { fixture_path } => {
2215 command
2216 .arg("--llm-mock")
2217 .arg(absolute_cli_path(fixture_path));
2218 }
2219 commands::run::CliLlmMockMode::Record { fixture_path } => {
2220 command
2221 .arg("--llm-mock-record")
2222 .arg(absolute_cli_path(fixture_path));
2223 }
2224 }
2225 command.arg(path);
2226 if !argv.is_empty() {
2227 command.arg("--");
2228 command.args(argv);
2229 }
2230 command.env(harn_vm::runtime_paths::HARN_RUN_DIR_ENV, run_dir);
2231 if let Some(experiment) = structural_experiment {
2232 command.env("HARN_STRUCTURAL_EXPERIMENT", experiment);
2233 }
2234 command.output().unwrap_or_else(|error| {
2235 command_error(&format!(
2236 "failed to spawn `harn run {}` for structural eval: {error}",
2237 path.display()
2238 ))
2239 })
2240}
2241
2242fn absolute_cli_path(path: &Path) -> PathBuf {
2243 if path.is_absolute() {
2244 return path.to_path_buf();
2245 }
2246 env::current_dir()
2247 .unwrap_or_else(|_| PathBuf::from("."))
2248 .join(path)
2249}
2250
2251fn relay_subprocess_failure(label: &str, output: &std::process::Output) -> ! {
2252 let stdout = String::from_utf8_lossy(&output.stdout);
2253 let stderr = String::from_utf8_lossy(&output.stderr);
2254 if !stdout.trim().is_empty() {
2255 eprintln!("[{label}] stdout:\n{stdout}");
2256 }
2257 if !stderr.trim().is_empty() {
2258 eprintln!("[{label}] stderr:\n{stderr}");
2259 }
2260 process::exit(output.status.code().unwrap_or(1));
2261}
2262
2263fn collect_structural_eval_runs(dir: &Path) -> Vec<harn_vm::orchestration::RunRecord> {
2264 let mut paths: Vec<PathBuf> = fs::read_dir(dir)
2265 .unwrap_or_else(|error| {
2266 command_error(&format!(
2267 "failed to read structural eval run dir {}: {error}",
2268 dir.display()
2269 ))
2270 })
2271 .filter_map(|entry| entry.ok().map(|entry| entry.path()))
2272 .filter(|entry| entry.extension().and_then(|ext| ext.to_str()) == Some("json"))
2273 .collect();
2274 paths.sort();
2275 let mut runs: Vec<_> = paths
2276 .iter()
2277 .map(|path| load_run_record_or_exit(path))
2278 .collect();
2279 runs.sort_by(|left, right| {
2280 (
2281 left.started_at.as_str(),
2282 left.workflow_id.as_str(),
2283 left.task.as_str(),
2284 )
2285 .cmp(&(
2286 right.started_at.as_str(),
2287 right.workflow_id.as_str(),
2288 right.task.as_str(),
2289 ))
2290 });
2291 runs
2292}
2293
2294pub(crate) fn parse_source_file(path: &str) -> (String, Vec<harn_parser::SNode>) {
2296 let source = match fs::read_to_string(path) {
2297 Ok(s) => s,
2298 Err(e) => {
2299 eprintln!("Error reading {path}: {e}");
2300 process::exit(1);
2301 }
2302 };
2303
2304 let mut lexer = Lexer::new(&source);
2305 let tokens = match lexer.tokenize() {
2306 Ok(t) => t,
2307 Err(e) => {
2308 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2309 &source,
2310 path,
2311 &error_span_from_lex(&e),
2312 "error",
2313 &e.to_string(),
2314 Some("here"),
2315 None,
2316 );
2317 eprint!("{diagnostic}");
2318 process::exit(1);
2319 }
2320 };
2321
2322 let mut parser = Parser::new(tokens);
2323 let program = match parser.parse() {
2324 Ok(p) => p,
2325 Err(err) => {
2326 if parser.all_errors().is_empty() {
2327 let span = error_span_from_parse(&err);
2328 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2329 &source,
2330 path,
2331 &span,
2332 "error",
2333 &harn_parser::diagnostic::parser_error_message(&err),
2334 Some(harn_parser::diagnostic::parser_error_label(&err)),
2335 harn_parser::diagnostic::parser_error_help(&err),
2336 );
2337 eprint!("{diagnostic}");
2338 } else {
2339 for e in parser.all_errors() {
2340 let span = error_span_from_parse(e);
2341 let diagnostic = harn_parser::diagnostic::render_diagnostic(
2342 &source,
2343 path,
2344 &span,
2345 "error",
2346 &harn_parser::diagnostic::parser_error_message(e),
2347 Some(harn_parser::diagnostic::parser_error_label(e)),
2348 harn_parser::diagnostic::parser_error_help(e),
2349 );
2350 eprint!("{diagnostic}");
2351 }
2352 }
2353 process::exit(1);
2354 }
2355 };
2356
2357 (source, program)
2358}
2359
2360fn error_span_from_lex(e: &harn_lexer::LexerError) -> harn_lexer::Span {
2361 match e {
2362 harn_lexer::LexerError::UnexpectedCharacter(_, span)
2363 | harn_lexer::LexerError::UnterminatedString(span)
2364 | harn_lexer::LexerError::UnterminatedBlockComment(span) => *span,
2365 }
2366}
2367
2368fn error_span_from_parse(e: &harn_parser::ParserError) -> harn_lexer::Span {
2369 match e {
2370 harn_parser::ParserError::Unexpected { span, .. } => *span,
2371 harn_parser::ParserError::UnexpectedEof { span, .. } => *span,
2372 }
2373}
2374
2375pub(crate) async fn execute(source: &str, source_path: Option<&Path>) -> Result<String, String> {
2377 let mut lexer = Lexer::new(source);
2378 let tokens = lexer.tokenize().map_err(|e| e.to_string())?;
2379 let mut parser = Parser::new(tokens);
2380 let program = parser.parse().map_err(|e| e.to_string())?;
2381
2382 let mut checker = TypeChecker::new();
2387 if let Some(path) = source_path {
2388 let graph = harn_modules::build(&[path.to_path_buf()]);
2389 if let Some(imported) = graph.imported_names_for_file(path) {
2390 checker = checker.with_imported_names(imported);
2391 }
2392 if let Some(imported) = graph.imported_type_declarations_for_file(path) {
2393 checker = checker.with_imported_type_decls(imported);
2394 }
2395 if let Some(imported) = graph.imported_callable_declarations_for_file(path) {
2396 checker = checker.with_imported_callable_decls(imported);
2397 }
2398 }
2399 let type_diagnostics = checker.check(&program);
2400 let mut warning_lines = Vec::new();
2401 for diag in &type_diagnostics {
2402 match diag.severity {
2403 DiagnosticSeverity::Error => return Err(diag.message.clone()),
2404 DiagnosticSeverity::Warning => {
2405 warning_lines.push(format!("warning: {}", diag.message));
2406 }
2407 }
2408 }
2409
2410 let chunk = harn_vm::Compiler::new()
2411 .compile(&program)
2412 .map_err(|e| e.to_string())?;
2413
2414 let local = tokio::task::LocalSet::new();
2415 local
2416 .run_until(async {
2417 let mut vm = harn_vm::Vm::new();
2418 harn_vm::register_vm_stdlib(&mut vm);
2419 install_default_hostlib(&mut vm);
2420 let source_parent = source_path
2421 .and_then(|p| p.parent())
2422 .unwrap_or(std::path::Path::new("."));
2423 let project_root = harn_vm::stdlib::process::find_project_root(source_parent);
2424 let store_base = project_root.as_deref().unwrap_or(source_parent);
2425 let execution_cwd = std::env::current_dir()
2426 .unwrap_or_else(|_| std::path::PathBuf::from("."))
2427 .to_string_lossy()
2428 .into_owned();
2429 let source_dir = source_parent.to_string_lossy().into_owned();
2430 if source_path.is_some_and(is_conformance_path) {
2431 harn_vm::event_log::install_memory_for_current_thread(64);
2432 }
2433 harn_vm::register_store_builtins(&mut vm, store_base);
2434 harn_vm::register_metadata_builtins(&mut vm, store_base);
2435 let pipeline_name = source_path
2436 .and_then(|p| p.file_stem())
2437 .and_then(|s| s.to_str())
2438 .unwrap_or("default");
2439 harn_vm::register_checkpoint_builtins(&mut vm, store_base, pipeline_name);
2440 harn_vm::stdlib::process::set_thread_execution_context(Some(
2441 harn_vm::orchestration::RunExecutionRecord {
2442 cwd: Some(execution_cwd),
2443 source_dir: Some(source_dir),
2444 env: std::collections::BTreeMap::new(),
2445 adapter: None,
2446 repo_path: None,
2447 worktree_path: None,
2448 branch: None,
2449 base_ref: None,
2450 cleanup: None,
2451 },
2452 ));
2453 if let Some(ref root) = project_root {
2454 vm.set_project_root(root);
2455 }
2456 if let Some(path) = source_path {
2457 if let Some(parent) = path.parent() {
2458 if !parent.as_os_str().is_empty() {
2459 vm.set_source_dir(parent);
2460 }
2461 }
2462 }
2463 let loaded = skill_loader::load_skills(&skill_loader::SkillLoaderInputs {
2467 cli_dirs: Vec::new(),
2468 source_path: source_path.map(Path::to_path_buf),
2469 });
2470 skill_loader::emit_loader_warnings(&loaded.loader_warnings);
2471 skill_loader::install_skills_global(&mut vm, &loaded);
2472 if let Some(path) = source_path {
2473 let extensions = package::load_runtime_extensions(path);
2474 package::install_runtime_extensions(&extensions);
2475 package::install_manifest_triggers(&mut vm, &extensions)
2476 .await
2477 .map_err(|error| format!("failed to install manifest triggers: {error}"))?;
2478 package::install_manifest_hooks(&mut vm, &extensions)
2479 .await
2480 .map_err(|error| format!("failed to install manifest hooks: {error}"))?;
2481 }
2482 let _event_log = harn_vm::event_log::active_event_log()
2483 .unwrap_or_else(|| harn_vm::event_log::install_memory_for_current_thread(64));
2484 let connector_clients_installed =
2485 should_install_default_connector_clients(source, source_path);
2486 if connector_clients_installed {
2487 install_default_connector_clients(store_base)
2488 .await
2489 .map_err(|error| format!("failed to initialize connector clients: {error}"))?;
2490 }
2491 let execution_result = vm.execute(&chunk).await.map_err(|e| e.to_string());
2492 harn_vm::egress::reset_egress_policy_for_host();
2493 if connector_clients_installed {
2494 harn_vm::clear_active_connector_clients();
2495 }
2496 harn_vm::stdlib::process::set_thread_execution_context(None);
2497 execution_result?;
2498 let mut output = String::new();
2499 for wl in &warning_lines {
2500 output.push_str(wl);
2501 output.push('\n');
2502 }
2503 output.push_str(vm.output());
2504 Ok(output)
2505 })
2506 .await
2507}
2508
2509fn should_install_default_connector_clients(source: &str, source_path: Option<&Path>) -> bool {
2510 if !source_path.is_some_and(is_conformance_path) {
2511 return true;
2512 }
2513 source.contains("connector_call")
2514 || source.contains("std/connectors")
2515 || source.contains("connectors/")
2516}
2517
2518fn is_conformance_path(path: &Path) -> bool {
2519 path.components()
2520 .any(|component| component.as_os_str() == "conformance")
2521}
2522
2523async fn install_default_connector_clients(base_dir: &Path) -> Result<(), String> {
2524 let event_log = harn_vm::event_log::active_event_log()
2525 .unwrap_or_else(|| harn_vm::event_log::install_memory_for_current_thread(64));
2526 let secret_namespace = connector_secret_namespace(base_dir);
2527 let secrets: Arc<dyn harn_vm::secrets::SecretProvider> = Arc::new(
2528 harn_vm::secrets::configured_default_chain(secret_namespace)
2529 .map_err(|error| format!("failed to configure secret providers: {error}"))?,
2530 );
2531
2532 let registry = harn_vm::ConnectorRegistry::default();
2533 let metrics = Arc::new(harn_vm::MetricsRegistry::default());
2534 let inbox = Arc::new(
2535 harn_vm::InboxIndex::new(event_log.clone(), metrics.clone())
2536 .await
2537 .map_err(|error| error.to_string())?,
2538 );
2539 registry
2540 .init_all(harn_vm::ConnectorCtx {
2541 event_log,
2542 secrets,
2543 inbox,
2544 metrics,
2545 rate_limiter: Arc::new(harn_vm::RateLimiterFactory::default()),
2546 })
2547 .await
2548 .map_err(|error| error.to_string())?;
2549 let clients = registry.client_map().await;
2550 harn_vm::install_active_connector_clients(clients);
2551 Ok(())
2552}
2553
2554fn connector_secret_namespace(base_dir: &Path) -> String {
2555 match std::env::var("HARN_SECRET_NAMESPACE") {
2556 Ok(namespace) if !namespace.trim().is_empty() => namespace,
2557 _ => {
2558 let leaf = base_dir
2559 .file_name()
2560 .and_then(|name| name.to_str())
2561 .filter(|name| !name.is_empty())
2562 .unwrap_or("workspace");
2563 format!("harn/{leaf}")
2564 }
2565 }
2566}
2567
2568#[cfg(test)]
2569mod main_tests {
2570 use super::{normalize_serve_args, should_install_default_connector_clients};
2571 use std::path::Path;
2572
2573 #[test]
2574 fn normalize_serve_args_inserts_a2a_for_legacy_shape() {
2575 let args = normalize_serve_args(vec![
2576 "harn".to_string(),
2577 "serve".to_string(),
2578 "--port".to_string(),
2579 "3000".to_string(),
2580 "agent.harn".to_string(),
2581 ]);
2582 assert_eq!(
2583 args,
2584 vec![
2585 "harn".to_string(),
2586 "serve".to_string(),
2587 "a2a".to_string(),
2588 "--port".to_string(),
2589 "3000".to_string(),
2590 "agent.harn".to_string(),
2591 ]
2592 );
2593 }
2594
2595 #[test]
2596 fn normalize_serve_args_preserves_explicit_subcommands() {
2597 let args = normalize_serve_args(vec![
2598 "harn".to_string(),
2599 "serve".to_string(),
2600 "acp".to_string(),
2601 "server.harn".to_string(),
2602 ]);
2603 assert_eq!(
2604 args,
2605 vec![
2606 "harn".to_string(),
2607 "serve".to_string(),
2608 "acp".to_string(),
2609 "server.harn".to_string(),
2610 ]
2611 );
2612 }
2613
2614 #[test]
2615 fn conformance_skips_connector_clients_unless_fixture_uses_connectors() {
2616 let path = Path::new("conformance/tests/language/basic.harn");
2617 assert!(!should_install_default_connector_clients(
2618 "println(1)",
2619 Some(path)
2620 ));
2621 assert!(!should_install_default_connector_clients(
2622 "trust_graph_verify_chain()",
2623 Some(path)
2624 ));
2625 assert!(should_install_default_connector_clients(
2626 "import { post_message } from \"std/connectors/slack\"",
2627 Some(path)
2628 ));
2629 assert!(should_install_default_connector_clients(
2630 "println(1)",
2631 Some(Path::new("examples/demo.harn"))
2632 ));
2633 }
2634}