1use std::collections::{BTreeMap, BTreeSet};
8
9use serde::{Deserialize, Serialize};
10
11use super::tool_conformance::{report_satisfies_required_probe, ToolConformanceReport};
12use crate::llm_config;
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
15#[serde(rename_all = "snake_case")]
16pub enum RuntimeProfileStatus {
17 Preferred,
18 Experimental,
19 VisionOnlyExperimental,
20 Quarantined,
21 Unknown,
22}
23
24impl RuntimeProfileStatus {
25 pub fn as_str(&self) -> &'static str {
26 match self {
27 Self::Preferred => "preferred",
28 Self::Experimental => "experimental",
29 Self::VisionOnlyExperimental => "vision_only_experimental",
30 Self::Quarantined => "quarantined",
31 Self::Unknown => "unknown",
32 }
33 }
34
35 pub fn requires_probe_gate(&self) -> bool {
36 !matches!(self, Self::Preferred | Self::Unknown)
37 }
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct RuntimeProfile {
42 pub status: RuntimeProfileStatus,
43 pub requires: Vec<String>,
44 pub recommended_num_ctx: Option<u64>,
45 pub known_risks: Vec<String>,
46 pub workarounds: Vec<String>,
47 pub notes: Vec<String>,
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct LocalRuntimeProfileReport {
52 pub alias: Option<String>,
53 pub model_id: String,
54 pub provider: String,
55 pub model_family: String,
56 pub selected_runtime: String,
57 pub selected_status: RuntimeProfileStatus,
58 pub requires_probe_gate: bool,
59 pub selected: RuntimeProfile,
60 pub runtime_profiles: BTreeMap<String, RuntimeProfile>,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct RuntimeProfileGate {
65 pub allowed: bool,
66 pub forced: bool,
67 pub selected_status: RuntimeProfileStatus,
68 pub missing_required_probes: Vec<String>,
69 pub passed_probes: Vec<String>,
70 pub message: String,
71}
72
73#[derive(Debug, Clone, Default)]
74pub struct RuntimeProbeEvidence {
75 passed: BTreeSet<String>,
76 tool_reports: Vec<ToolConformanceReport>,
77}
78
79impl RuntimeProbeEvidence {
80 pub fn new() -> Self {
81 Self::default()
82 }
83
84 pub fn add_passed(&mut self, probe: impl Into<String>) {
85 let probe = probe.into();
86 if !probe.trim().is_empty() {
87 self.passed.insert(probe);
88 }
89 }
90
91 pub fn add_tool_report(&mut self, report: ToolConformanceReport) {
92 if report_satisfies_required_probe(&report, "tool_probe") {
93 self.passed.insert("tool_probe".to_string());
94 self.passed.insert("tool_call_probe".to_string());
95 }
96 if report_satisfies_required_probe(&report, "native_tool_probe") {
97 self.passed.insert("native_tool_probe".to_string());
98 }
99 if report_satisfies_required_probe(&report, "streaming_tool_probe") {
100 self.passed.insert("streaming_tool_probe".to_string());
101 }
102 self.tool_reports.push(report);
103 }
104
105 pub fn passed(&self) -> Vec<String> {
106 self.passed.iter().cloned().collect()
107 }
108
109 fn satisfies(&self, requirement: &str) -> bool {
110 self.passed.contains(requirement)
111 || self
112 .tool_reports
113 .iter()
114 .any(|report| report_satisfies_required_probe(report, requirement))
115 }
116}
117
118pub fn local_runtime_profile_report(
119 selector: &str,
120 provider_override: Option<&str>,
121) -> LocalRuntimeProfileReport {
122 let resolved = llm_config::resolve_model_info(selector);
123 let provider = provider_override
124 .map(str::trim)
125 .filter(|provider| !provider.is_empty())
126 .map(str::to_string)
127 .unwrap_or_else(|| resolved.provider.clone());
128 local_runtime_profile_report_for(resolved.alias.as_deref(), &resolved.id, &provider)
129}
130
131pub fn local_runtime_profile_report_for(
132 alias: Option<&str>,
133 model_id: &str,
134 provider: &str,
135) -> LocalRuntimeProfileReport {
136 let family = model_family(alias, model_id);
137 let runtime_profiles = profiles_for_family(family);
138 let selected = runtime_profiles
139 .get(provider)
140 .cloned()
141 .unwrap_or_else(|| generic_profile(provider));
142 LocalRuntimeProfileReport {
143 alias: alias.map(str::to_string),
144 model_id: model_id.to_string(),
145 provider: provider.to_string(),
146 model_family: family.to_string(),
147 selected_runtime: provider.to_string(),
148 selected_status: selected.status.clone(),
149 requires_probe_gate: selected.status.requires_probe_gate(),
150 selected,
151 runtime_profiles,
152 }
153}
154
155pub fn evaluate_runtime_profile_gate(
156 report: &LocalRuntimeProfileReport,
157 evidence: &RuntimeProbeEvidence,
158 force: bool,
159) -> RuntimeProfileGate {
160 let missing: Vec<String> = if report.selected_status.requires_probe_gate() {
161 report
162 .selected
163 .requires
164 .iter()
165 .filter(|requirement| !evidence.satisfies(requirement))
166 .cloned()
167 .collect()
168 } else {
169 Vec::new()
170 };
171 let allowed = force || missing.is_empty();
172 let message = if force {
173 format!(
174 "{} via {} is {} but allowed by --force",
175 report.model_id,
176 report.provider,
177 report.selected_status.as_str()
178 )
179 } else if allowed {
180 format!(
181 "{} via {} is {}",
182 report.model_id,
183 report.provider,
184 report.selected_status.as_str()
185 )
186 } else {
187 format!(
188 "{} via {} is {}; required probes missing: {}",
189 report.model_id,
190 report.provider,
191 report.selected_status.as_str(),
192 missing.join(", ")
193 )
194 };
195 RuntimeProfileGate {
196 allowed,
197 forced: force,
198 selected_status: report.selected_status.clone(),
199 missing_required_probes: missing,
200 passed_probes: evidence.passed(),
201 message,
202 }
203}
204
205fn model_family<'a>(alias: Option<&'a str>, model_id: &'a str) -> &'static str {
206 let haystack = format!(
207 "{} {}",
208 alias.unwrap_or_default().to_ascii_lowercase(),
209 model_id.to_ascii_lowercase()
210 );
211 if haystack.contains("qwen3.6") || haystack.contains("qwen36") {
212 "qwen3.6-a3b-hybrid"
213 } else if haystack.contains("gemma4") || haystack.contains("gemma-4") {
214 "gemma4-hybrid-moe"
215 } else {
216 "generic-local"
217 }
218}
219
220fn profiles_for_family(family: &str) -> BTreeMap<String, RuntimeProfile> {
221 match family {
222 "qwen3.6-a3b-hybrid" => BTreeMap::from([
223 (
224 "ollama".to_string(),
225 profile(
226 RuntimeProfileStatus::Preferred,
227 &["tool_probe", "effective_context_probe"],
228 Some(32_768),
229 &[],
230 &[
231 "Use the text tool wire format unless a fresh native probe passes.",
232 "Keep an explicit num_ctx so the resident runner matches eval settings.",
233 ],
234 &["Best cheap local default on the 2026-05-13 Burin eval pass."],
235 ),
236 ),
237 (
238 "llamacpp".to_string(),
239 profile(
240 RuntimeProfileStatus::Experimental,
241 &["tool_probe", "two_turn_cache_probe"],
242 Some(65_536),
243 &[
244 "full_prompt_reprocess_on_hybrid_cache",
245 "inflated_input_token_accounting_on_repeated_turns",
246 ],
247 &[
248 "Run a two-turn cache probe before write-heavy evals.",
249 "Prefer short-lived scan/edit loops until cache telemetry is clean.",
250 ],
251 &[
252 "Qwen3.6-family GGUF stacks can pass simple edits while still re-prefilling expensive prefixes.",
253 ],
254 ),
255 ),
256 (
257 "mlx".to_string(),
258 profile(
259 RuntimeProfileStatus::VisionOnlyExperimental,
260 &[
261 "served_model_identity_probe",
262 "persistent_readiness_probe",
263 "tool_probe",
264 ],
265 None,
266 &[
267 "stale_or_default_v1_models_identity",
268 "hybrid_prefix_cache_reuse_gap",
269 ],
270 &[
271 "Probe /v1/models twice and send one minimal chat request before selection.",
272 "Record server flags for APC, context length, batching, and thinking mode.",
273 ],
274 &["Use only when MLX-specific throughput or vision support is needed."],
275 ),
276 ),
277 ]),
278 "gemma4-hybrid-moe" => BTreeMap::from([
279 (
280 "ollama".to_string(),
281 profile(
282 RuntimeProfileStatus::Quarantined,
283 &["tool_probe"],
284 Some(32_768),
285 &[
286 "raw_tool_tag_no_structured_calls",
287 "completion_prose_without_executable_tool_calls",
288 ],
289 &[
290 "Allow only after the one-tool probe returns native or parseable text calls.",
291 "Use text mode and corrective retry for write-required turns.",
292 ],
293 &[
294 "Gemma4 through Ollama has produced raw <tool_call> blocks and final prose in local evals.",
295 ],
296 ),
297 ),
298 (
299 "llamacpp".to_string(),
300 profile(
301 RuntimeProfileStatus::Experimental,
302 &["tool_probe", "two_turn_cache_probe"],
303 Some(32_768),
304 &[
305 "full_prompt_reprocess_on_hybrid_cache",
306 "parser_template_drift",
307 ],
308 &[
309 "Confirm the served template emits parseable calls before any write eval.",
310 "Treat final prose as insufficient when artifacts are unchanged.",
311 ],
312 &["Prefer as an eval candidate, not a default editing runtime."],
313 ),
314 ),
315 (
316 "mlx".to_string(),
317 profile(
318 RuntimeProfileStatus::Experimental,
319 &[
320 "served_model_identity_probe",
321 "persistent_readiness_probe",
322 "tool_probe",
323 ],
324 None,
325 &[
326 "raw_gemma_tool_markers_in_content",
327 "hybrid_prefix_cache_reuse_gap",
328 ],
329 &[
330 "Keep raw marker parser fixtures enabled in the Harn text parser.",
331 "Verify OpenAI-compatible tool_calls is non-empty before native mode.",
332 ],
333 &["Use explicit server flags instead of opaque defaults."],
334 ),
335 ),
336 (
337 "local".to_string(),
338 profile(
339 RuntimeProfileStatus::Experimental,
340 &["tool_probe"],
341 Some(32_768),
342 &["provider_specific_parser_required"],
343 &["Prefer text mode until native parser support is proven."],
344 &["Generic local Gemma endpoints vary by serving stack."],
345 ),
346 ),
347 ]),
348 _ => BTreeMap::new(),
349 }
350}
351
352fn generic_profile(provider: &str) -> RuntimeProfile {
353 RuntimeProfile {
354 status: RuntimeProfileStatus::Unknown,
355 requires: vec!["readiness_probe".to_string()],
356 recommended_num_ctx: None,
357 known_risks: Vec::new(),
358 workarounds: Vec::new(),
359 notes: vec![format!(
360 "No dedicated local runtime profile for provider `{provider}` and this model family."
361 )],
362 }
363}
364
365fn profile(
366 status: RuntimeProfileStatus,
367 requires: &[&str],
368 recommended_num_ctx: Option<u64>,
369 known_risks: &[&str],
370 workarounds: &[&str],
371 notes: &[&str],
372) -> RuntimeProfile {
373 RuntimeProfile {
374 status,
375 requires: requires.iter().map(|value| (*value).to_string()).collect(),
376 recommended_num_ctx,
377 known_risks: known_risks
378 .iter()
379 .map(|value| (*value).to_string())
380 .collect(),
381 workarounds: workarounds
382 .iter()
383 .map(|value| (*value).to_string())
384 .collect(),
385 notes: notes.iter().map(|value| (*value).to_string()).collect(),
386 }
387}
388
389#[cfg(test)]
390mod tests {
391 use super::*;
392 use crate::llm::tool_conformance::{classify_tool_conformance_fixture, ToolProbeMode};
393
394 #[test]
395 fn qwen_ollama_profile_is_preferred_and_llamacpp_is_experimental() {
396 let ollama = local_runtime_profile_report("qwen3.6-coding", None);
397 assert_eq!(ollama.model_family, "qwen3.6-a3b-hybrid");
398 assert_eq!(ollama.selected_status, RuntimeProfileStatus::Preferred);
399
400 let llamacpp = local_runtime_profile_report("qwen3.6-coding", Some("llamacpp"));
401 assert_eq!(llamacpp.selected_status, RuntimeProfileStatus::Experimental);
402 assert!(llamacpp
403 .selected
404 .known_risks
405 .contains(&"full_prompt_reprocess_on_hybrid_cache".to_string()));
406 }
407
408 #[test]
409 fn gemma4_ollama_profile_is_quarantined_until_tool_probe_passes() {
410 let report = local_runtime_profile_report("ollama-gemma4", None);
411 assert_eq!(report.selected_status, RuntimeProfileStatus::Quarantined);
412 let gate = evaluate_runtime_profile_gate(&report, &RuntimeProbeEvidence::new(), false);
413 assert!(!gate.allowed);
414 assert_eq!(gate.missing_required_probes, vec!["tool_probe".to_string()]);
415
416 let mut evidence = RuntimeProbeEvidence::new();
417 evidence.add_tool_report(classify_tool_conformance_fixture(
418 "ollama",
419 "gemma4:26b",
420 ToolProbeMode::NonStreaming,
421 "harn_tool_probe_marker",
422 r#"{"content":"echo_marker({ value: \"harn_tool_probe_marker\" })"}"#,
423 ));
424 let gate = evaluate_runtime_profile_gate(&report, &evidence, false);
425 assert!(gate.allowed, "{gate:?}");
426 }
427
428 #[test]
429 fn force_allows_risky_profile_with_receipt() {
430 let report = local_runtime_profile_report("local-qwen3.6", None);
431 assert_eq!(report.selected_status, RuntimeProfileStatus::Experimental);
432 let gate = evaluate_runtime_profile_gate(&report, &RuntimeProbeEvidence::new(), true);
433 assert!(gate.allowed);
434 assert!(gate.forced);
435 }
436}