1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
use std::path::PathBuf;
use clap::{ArgAction, Args};
use super::util::{llm_model_completion_parser, llm_provider_completion_parser};
#[derive(Debug, Args)]
pub(crate) struct ModelInfoArgs {
/// Verify provider-local readiness for the resolved model when supported.
#[arg(long)]
pub verify: bool,
/// Warm/preload the resolved model when supported. Implies --verify.
#[arg(long)]
pub warm: bool,
/// Ollama keep_alive value to use with --warm (for example 30m, forever, or -1).
#[arg(long = "keep-alive", value_name = "VALUE")]
pub keep_alive: Option<String>,
/// Model alias or provider-native model id.
#[arg(
value_parser = llm_model_completion_parser(),
hide_possible_values = true
)]
pub model: String,
}
#[derive(Debug, Args)]
pub(crate) struct ProviderCatalogArgs {
/// Only include providers that are usable in the current environment.
#[arg(long)]
pub available_only: bool,
}
#[derive(Debug, Args)]
pub(crate) struct ProviderReadyArgs {
/// Provider id from Harn provider config, for example mlx or local.
#[arg(
value_parser = llm_provider_completion_parser(),
hide_possible_values = true
)]
pub provider: String,
/// Model alias or provider-native model id to require in /models.
#[arg(
long,
value_parser = llm_model_completion_parser(),
hide_possible_values = true
)]
pub model: Option<String>,
/// Override the configured provider base URL for this probe.
#[arg(long = "base-url")]
pub base_url: Option<String>,
/// Emit the full structured readiness result as JSON.
#[arg(long, default_value_t = false, action = ArgAction::SetTrue)]
pub json: bool,
}
/// Surface for `harn provider probe`: combined `/v1/models` readiness +
/// loaded-model state (`/api/ps` for Ollama) under one machine-readable
/// command. Evals consume the JSON to record cold load time / VRAM /
/// context length alongside per-call telemetry.
#[derive(Debug, Args)]
pub(crate) struct ProviderProbeArgs {
/// Provider id from Harn provider config (`ollama`, `llamacpp`, `mlx`,
/// `openai`, ...). Required because the probe is provider-scoped.
#[arg(
value_parser = llm_provider_completion_parser(),
hide_possible_values = true
)]
pub provider: String,
/// Optional model alias or provider-native id. When set the probe
/// also confirms the model is currently served.
#[arg(
long,
value_parser = llm_model_completion_parser(),
hide_possible_values = true
)]
pub model: Option<String>,
/// Override the configured provider base URL.
#[arg(long = "base-url")]
pub base_url: Option<String>,
/// Emit JSON. Defaults to true since this command is meant for
/// machine consumption (eval aggregators); pass `--json=false` to
/// drop back to the human summary the readiness probe prints.
#[arg(
long,
default_value_t = true,
num_args = 0..=1,
default_missing_value = "true",
action = ArgAction::Set
)]
pub json: bool,
}
/// Run the one-tool provider conformance probe and emit JSON that eval
/// harnesses can use to select native, text, or disabled tool mode.
#[derive(Debug, Args)]
pub(crate) struct ProviderToolProbeArgs {
/// Provider id from Harn provider config (`ollama`, `llamacpp`, `mlx`,
/// `local`, ...).
#[arg(
value_parser = llm_provider_completion_parser(),
hide_possible_values = true
)]
pub provider: String,
/// Model alias or provider-native model id.
#[arg(
long,
value_parser = llm_model_completion_parser(),
hide_possible_values = true
)]
pub model: String,
/// Override the configured provider base URL.
#[arg(long = "base-url")]
pub base_url: Option<String>,
/// Probe only one transport mode instead of both.
#[arg(long, value_enum, default_value_t = ProviderToolProbeModeArg::Both)]
pub mode: ProviderToolProbeModeArg,
/// Override the marker the model must echo through the tool call.
#[arg(long, default_value = harn_vm::llm::tool_conformance::DEFAULT_TOOL_PROBE_MARKER)]
pub marker: String,
/// Classify a saved provider response body instead of making a live request.
#[arg(long = "response-fixture")]
pub response_fixture: Option<PathBuf>,
/// Request timeout in seconds for each live probe case.
#[arg(long, default_value_t = 120)]
pub timeout_secs: u64,
/// Emit JSON. Defaults to true because evals and setup scripts consume
/// the structured conformance report.
#[arg(
long,
default_value_t = true,
num_args = 0..=1,
default_missing_value = "true",
action = ArgAction::Set
)]
pub json: bool,
}
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
pub(crate) enum ProviderToolProbeModeArg {
Both,
NonStreaming,
Streaming,
}