1use std::collections::{BTreeMap, BTreeSet};
8
9use serde::{Deserialize, Serialize};
10
11use super::tool_conformance::{report_satisfies_required_probe, ToolConformanceReport};
12use crate::llm_config::{self, LocalMemoryDef, ModelDef};
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
15#[serde(rename_all = "snake_case")]
16pub enum RuntimeProfileStatus {
17 Preferred,
18 Experimental,
19 VisionOnlyExperimental,
20 Quarantined,
21 Unknown,
22}
23
24impl RuntimeProfileStatus {
25 pub fn as_str(&self) -> &'static str {
26 match self {
27 Self::Preferred => "preferred",
28 Self::Experimental => "experimental",
29 Self::VisionOnlyExperimental => "vision_only_experimental",
30 Self::Quarantined => "quarantined",
31 Self::Unknown => "unknown",
32 }
33 }
34
35 pub fn requires_probe_gate(&self) -> bool {
36 !matches!(self, Self::Preferred | Self::Unknown)
37 }
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct RuntimeProfile {
42 pub status: RuntimeProfileStatus,
43 pub requires: Vec<String>,
44 pub recommended_num_ctx: Option<u64>,
45 pub known_risks: Vec<String>,
46 pub workarounds: Vec<String>,
47 pub notes: Vec<String>,
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct LocalRuntimeProfileReport {
52 pub alias: Option<String>,
53 pub model_id: String,
54 pub provider: String,
55 pub model_family: String,
56 pub selected_runtime: String,
57 pub selected_status: RuntimeProfileStatus,
58 pub requires_probe_gate: bool,
59 pub selected: RuntimeProfile,
60 pub runtime_profiles: BTreeMap<String, RuntimeProfile>,
61}
62
63#[derive(Debug, Clone, Copy, Default, PartialEq)]
64pub struct RuntimeProfileHost {
65 pub system_available_gib: Option<f64>,
66 pub accelerator_free_gib: Option<f64>,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct RuntimeProfileGate {
71 pub allowed: bool,
72 pub forced: bool,
73 pub selected_status: RuntimeProfileStatus,
74 pub missing_required_probes: Vec<String>,
75 pub passed_probes: Vec<String>,
76 pub message: String,
77}
78
79#[derive(Debug, Clone, Default)]
80pub struct RuntimeProbeEvidence {
81 passed: BTreeSet<String>,
82 tool_reports: Vec<ToolConformanceReport>,
83}
84
85impl RuntimeProbeEvidence {
86 pub fn new() -> Self {
87 Self::default()
88 }
89
90 pub fn add_passed(&mut self, probe: impl Into<String>) {
91 let probe = probe.into();
92 if !probe.trim().is_empty() {
93 self.passed.insert(probe);
94 }
95 }
96
97 pub fn add_tool_report(&mut self, report: ToolConformanceReport) {
98 if report_satisfies_required_probe(&report, "tool_probe") {
99 self.passed.insert("tool_probe".to_string());
100 self.passed.insert("tool_call_probe".to_string());
101 }
102 if report_satisfies_required_probe(&report, "native_tool_probe") {
103 self.passed.insert("native_tool_probe".to_string());
104 }
105 if report_satisfies_required_probe(&report, "streaming_tool_probe") {
106 self.passed.insert("streaming_tool_probe".to_string());
107 }
108 self.tool_reports.push(report);
109 }
110
111 pub fn passed(&self) -> Vec<String> {
112 self.passed.iter().cloned().collect()
113 }
114
115 fn satisfies(&self, requirement: &str) -> bool {
116 self.passed.contains(requirement)
117 || self
118 .tool_reports
119 .iter()
120 .any(|report| report_satisfies_required_probe(report, requirement))
121 }
122}
123
124pub fn local_runtime_profile_report(
125 selector: &str,
126 provider_override: Option<&str>,
127) -> LocalRuntimeProfileReport {
128 let resolved = llm_config::resolve_model_info(selector);
129 let provider = provider_override
130 .map(str::trim)
131 .filter(|provider| !provider.is_empty())
132 .map(str::to_string)
133 .unwrap_or_else(|| resolved.provider.clone());
134 local_runtime_profile_report_for(resolved.alias.as_deref(), &resolved.id, &provider)
135}
136
137pub fn local_runtime_profile_report_for(
138 alias: Option<&str>,
139 model_id: &str,
140 provider: &str,
141) -> LocalRuntimeProfileReport {
142 local_runtime_profile_report_for_host(alias, model_id, provider, None)
143}
144
145pub fn local_runtime_profile_report_for_host(
146 alias: Option<&str>,
147 model_id: &str,
148 provider: &str,
149 host: Option<RuntimeProfileHost>,
150) -> LocalRuntimeProfileReport {
151 let family = model_family(alias, model_id);
152 let catalog_model = llm_config::model_catalog_entry(model_id);
153 let runtime_profiles = profiles_for_family(family)
154 .into_iter()
155 .map(|(runtime, profile)| {
156 let adjusted = adjust_profile_for_host(
157 family,
158 &runtime,
159 catalog_model.as_ref(),
160 profile,
161 host.as_ref(),
162 );
163 (runtime, adjusted)
164 })
165 .collect::<BTreeMap<_, _>>();
166 let selected = runtime_profiles
167 .get(provider)
168 .cloned()
169 .unwrap_or_else(|| generic_profile(provider));
170 LocalRuntimeProfileReport {
171 alias: alias.map(str::to_string),
172 model_id: model_id.to_string(),
173 provider: provider.to_string(),
174 model_family: family.to_string(),
175 selected_runtime: provider.to_string(),
176 selected_status: selected.status.clone(),
177 requires_probe_gate: selected.status.requires_probe_gate(),
178 selected,
179 runtime_profiles,
180 }
181}
182
183pub fn evaluate_runtime_profile_gate(
184 report: &LocalRuntimeProfileReport,
185 evidence: &RuntimeProbeEvidence,
186 force: bool,
187) -> RuntimeProfileGate {
188 let missing: Vec<String> = if report.selected_status.requires_probe_gate() {
189 report
190 .selected
191 .requires
192 .iter()
193 .filter(|requirement| !evidence.satisfies(requirement))
194 .cloned()
195 .collect()
196 } else {
197 Vec::new()
198 };
199 let allowed = force || missing.is_empty();
200 let message = if force {
201 format!(
202 "{} via {} is {} but allowed by --force",
203 report.model_id,
204 report.provider,
205 report.selected_status.as_str()
206 )
207 } else if allowed {
208 format!(
209 "{} via {} is {}",
210 report.model_id,
211 report.provider,
212 report.selected_status.as_str()
213 )
214 } else {
215 format!(
216 "{} via {} is {}; required probes missing: {}",
217 report.model_id,
218 report.provider,
219 report.selected_status.as_str(),
220 missing.join(", ")
221 )
222 };
223 RuntimeProfileGate {
224 allowed,
225 forced: force,
226 selected_status: report.selected_status.clone(),
227 missing_required_probes: missing,
228 passed_probes: evidence.passed(),
229 message,
230 }
231}
232
233fn model_family<'a>(alias: Option<&'a str>, model_id: &'a str) -> &'static str {
234 let haystack = format!(
235 "{} {}",
236 alias.unwrap_or_default().to_ascii_lowercase(),
237 model_id.to_ascii_lowercase()
238 );
239 if haystack.contains("qwen3.6") || haystack.contains("qwen36") {
240 "qwen3.6-a3b-hybrid"
241 } else if haystack.contains("gemma4") || haystack.contains("gemma-4") {
242 "gemma4-hybrid-moe"
243 } else {
244 "generic-local"
245 }
246}
247
248fn profiles_for_family(family: &str) -> BTreeMap<String, RuntimeProfile> {
249 match family {
250 "qwen3.6-a3b-hybrid" => BTreeMap::from([
251 (
252 "ollama".to_string(),
253 profile(
254 RuntimeProfileStatus::Preferred,
255 &["tool_probe", "effective_context_probe"],
256 Some(32_768),
257 &[],
258 &[
259 "Use the text tool wire format unless a fresh native probe passes.",
260 "Keep an explicit num_ctx so the resident runner matches eval settings.",
261 ],
262 &["Best cheap local default on the 2026-05-13 Burin eval pass."],
263 ),
264 ),
265 (
266 "llamacpp".to_string(),
267 profile(
268 RuntimeProfileStatus::Experimental,
269 &["tool_probe"],
270 Some(65_536),
271 &["inflated_input_token_accounting_on_repeated_turns"],
272 &[
273 "Run a tool probe before write-heavy evals.",
274 "Record llama.cpp build, ctx, cache type, and prefix-cache telemetry in eval receipts.",
275 ],
276 &[
277 "Current llama.cpp builds reuse two-turn Qwen3.6 hybrid-cache prefixes; keep token accounting visible in receipts.",
278 ],
279 ),
280 ),
281 (
282 "mlx".to_string(),
283 profile(
284 RuntimeProfileStatus::VisionOnlyExperimental,
285 &[
286 "served_model_identity_probe",
287 "persistent_readiness_probe",
288 "tool_probe",
289 ],
290 None,
291 &[
292 "stale_or_default_v1_models_identity",
293 "hybrid_prefix_cache_reuse_gap",
294 ],
295 &[
296 "Probe /v1/models twice and send one minimal chat request before selection.",
297 "Record server flags for APC, context length, batching, and thinking mode.",
298 ],
299 &["Use only when MLX-specific throughput or vision support is needed."],
300 ),
301 ),
302 ]),
303 "gemma4-hybrid-moe" => BTreeMap::from([
304 (
305 "ollama".to_string(),
306 profile(
307 RuntimeProfileStatus::Quarantined,
308 &["tool_probe"],
309 Some(32_768),
310 &[
311 "raw_tool_tag_no_structured_calls",
312 "completion_prose_without_executable_tool_calls",
313 ],
314 &[
315 "Allow only after the one-tool probe returns native or parseable text calls.",
316 "Use text mode and corrective retry for write-required turns.",
317 ],
318 &[
319 "Gemma4 through Ollama has produced raw <tool_call> blocks and final prose in local evals.",
320 ],
321 ),
322 ),
323 (
324 "llamacpp".to_string(),
325 profile(
326 RuntimeProfileStatus::Experimental,
327 &["tool_probe", "two_turn_cache_probe"],
328 Some(32_768),
329 &[
330 "full_prompt_reprocess_on_hybrid_cache",
331 "parser_template_drift",
332 ],
333 &[
334 "Confirm the served template emits parseable calls before any write eval.",
335 "Treat final prose as insufficient when artifacts are unchanged.",
336 ],
337 &["Prefer as an eval candidate, not a default editing runtime."],
338 ),
339 ),
340 (
341 "mlx".to_string(),
342 profile(
343 RuntimeProfileStatus::Experimental,
344 &[
345 "served_model_identity_probe",
346 "persistent_readiness_probe",
347 "tool_probe",
348 ],
349 None,
350 &[
351 "raw_gemma_tool_markers_in_content",
352 "hybrid_prefix_cache_reuse_gap",
353 ],
354 &[
355 "Keep raw marker parser fixtures enabled in the Harn text parser.",
356 "Verify OpenAI-compatible tool_calls is non-empty before native mode.",
357 ],
358 &["Use explicit server flags instead of opaque defaults."],
359 ),
360 ),
361 (
362 "local".to_string(),
363 profile(
364 RuntimeProfileStatus::Experimental,
365 &["tool_probe"],
366 Some(32_768),
367 &["provider_specific_parser_required"],
368 &["Prefer text mode until native parser support is proven."],
369 &["Generic local Gemma endpoints vary by serving stack."],
370 ),
371 ),
372 ]),
373 _ => BTreeMap::new(),
374 }
375}
376
377fn adjust_profile_for_host(
378 family: &str,
379 runtime: &str,
380 model: Option<&ModelDef>,
381 mut profile: RuntimeProfile,
382 host: Option<&RuntimeProfileHost>,
383) -> RuntimeProfile {
384 if family == "qwen3.6-a3b-hybrid" && runtime == "llamacpp" {
385 if let (Some(model), Some(host)) = (model, host) {
386 if let Some(ctx) = recommended_context_from_local_memory(model, host) {
387 profile.recommended_num_ctx = Some(ctx);
388 }
389 }
390 }
391 profile
392}
393
394fn recommended_context_from_local_memory(
395 model: &ModelDef,
396 host: &RuntimeProfileHost,
397) -> Option<u64> {
398 let memory = model.local_memory.as_ref()?;
399 let available_gib = host
400 .accelerator_free_gib
401 .or(host.system_available_gib)
402 .filter(|available| *available > 0.0)?;
403 let base = memory.base_resident_gib?;
404 let kv_per_1k = scaled_kv_cache_gib_per_1k(memory)?;
405 let safety = memory.safety_margin_gib.unwrap_or(4.0);
406 let usable_for_kv = available_gib - base - safety;
407 if usable_for_kv <= 0.0 {
408 return Some(8_192);
409 }
410
411 let by_memory = ((usable_for_kv / kv_per_1k) * 1_000.0).floor() as u64;
412 let ceiling = memory
413 .max_recommended_context
414 .or(model.runtime_context_window)
415 .unwrap_or(model.context_window)
416 .min(model.context_window);
417 let floor = 65_536_u64.min(ceiling).min(model.context_window);
418 Some(round_context_down(by_memory.min(ceiling).max(floor)))
419}
420
421fn scaled_kv_cache_gib_per_1k(memory: &LocalMemoryDef) -> Option<f64> {
422 let base = memory.kv_cache_gib_per_1k_ctx?;
423 let multiplier = memory
424 .default_cache_type
425 .as_ref()
426 .and_then(|cache_type| memory.cache_type_multipliers.get(cache_type))
427 .copied()
428 .unwrap_or(1.0);
429 let scaled = base * multiplier;
430 (scaled > 0.0).then_some(scaled)
431}
432
433fn round_context_down(ctx: u64) -> u64 {
434 const STEP: u64 = 8_192;
435 (ctx / STEP).max(1) * STEP
436}
437
438fn generic_profile(provider: &str) -> RuntimeProfile {
439 RuntimeProfile {
440 status: RuntimeProfileStatus::Unknown,
441 requires: vec!["readiness_probe".to_string()],
442 recommended_num_ctx: None,
443 known_risks: Vec::new(),
444 workarounds: Vec::new(),
445 notes: vec![format!(
446 "No dedicated local runtime profile for provider `{provider}` and this model family."
447 )],
448 }
449}
450
451fn profile(
452 status: RuntimeProfileStatus,
453 requires: &[&str],
454 recommended_num_ctx: Option<u64>,
455 known_risks: &[&str],
456 workarounds: &[&str],
457 notes: &[&str],
458) -> RuntimeProfile {
459 RuntimeProfile {
460 status,
461 requires: requires.iter().map(|value| (*value).to_string()).collect(),
462 recommended_num_ctx,
463 known_risks: known_risks
464 .iter()
465 .map(|value| (*value).to_string())
466 .collect(),
467 workarounds: workarounds
468 .iter()
469 .map(|value| (*value).to_string())
470 .collect(),
471 notes: notes.iter().map(|value| (*value).to_string()).collect(),
472 }
473}
474
475#[cfg(test)]
476mod tests {
477 use super::*;
478 use crate::llm::tool_conformance::{classify_tool_conformance_fixture, ToolProbeMode};
479
480 #[test]
481 fn qwen_ollama_profile_is_preferred_and_llamacpp_is_experimental() {
482 let ollama = local_runtime_profile_report("local-qwen3.6", Some("ollama"));
483 assert_eq!(ollama.model_family, "qwen3.6-a3b-hybrid");
484 assert_eq!(ollama.selected_status, RuntimeProfileStatus::Preferred);
485
486 let llamacpp = local_runtime_profile_report("local-qwen3.6", Some("llamacpp"));
487 assert_eq!(llamacpp.selected_status, RuntimeProfileStatus::Experimental);
488 assert_eq!(llamacpp.selected.requires, vec!["tool_probe".to_string()]);
489 assert!(!llamacpp
490 .selected
491 .known_risks
492 .contains(&"full_prompt_reprocess_on_hybrid_cache".to_string()));
493 }
494
495 #[test]
496 fn qwen_llamacpp_profile_raises_context_when_accelerator_memory_fits() {
497 let report = local_runtime_profile_report_for_host(
498 Some("local-qwen3.6"),
499 "qwen3.6-35b-a3b-ud-q4-k-xl",
500 "llamacpp",
501 Some(RuntimeProfileHost {
502 system_available_gib: None,
503 accelerator_free_gib: Some(32.0),
504 }),
505 );
506 assert_eq!(report.selected.recommended_num_ctx, Some(262_144));
507 }
508
509 #[test]
510 fn qwen_llamacpp_profile_keeps_conservative_context_when_memory_is_tight() {
511 let report = local_runtime_profile_report_for_host(
512 Some("local-qwen3.6"),
513 "qwen3.6-35b-a3b-ud-q4-k-xl",
514 "llamacpp",
515 Some(RuntimeProfileHost {
516 system_available_gib: None,
517 accelerator_free_gib: Some(24.0),
518 }),
519 );
520 assert_eq!(report.selected.recommended_num_ctx, Some(73_728));
521 }
522
523 #[test]
524 fn gemma4_ollama_profile_is_quarantined_until_tool_probe_passes() {
525 let report = local_runtime_profile_report("ollama-gemma4", None);
526 assert_eq!(report.selected_status, RuntimeProfileStatus::Quarantined);
527 let gate = evaluate_runtime_profile_gate(&report, &RuntimeProbeEvidence::new(), false);
528 assert!(!gate.allowed);
529 assert_eq!(gate.missing_required_probes, vec!["tool_probe".to_string()]);
530
531 let mut evidence = RuntimeProbeEvidence::new();
532 evidence.add_tool_report(classify_tool_conformance_fixture(
533 "ollama",
534 "gemma4:26b",
535 ToolProbeMode::NonStreaming,
536 "harn_tool_probe_marker",
537 r#"{"content":"echo_marker({ value: \"harn_tool_probe_marker\" })"}"#,
538 ));
539 let gate = evaluate_runtime_profile_gate(&report, &evidence, false);
540 assert!(gate.allowed, "{gate:?}");
541 }
542
543 #[test]
544 fn force_allows_risky_profile_with_receipt() {
545 let report = local_runtime_profile_report("local-qwen3.6", None);
546 assert_eq!(report.selected_status, RuntimeProfileStatus::Experimental);
547 let gate = evaluate_runtime_profile_gate(&report, &RuntimeProbeEvidence::new(), true);
548 assert!(gate.allowed);
549 assert!(gate.forced);
550 }
551}