Skip to main content

harn_vm/llm/
local_profiles.rs

1//! Data-driven local runtime risk profiles.
2//!
3//! This layer keeps local model selection explicit: the model family, runtime,
4//! known risks, required probes, and workarounds are all table data that CLI
5//! lifecycle commands can explain and enforce.
6
7use std::collections::{BTreeMap, BTreeSet};
8
9use serde::{Deserialize, Serialize};
10
11use super::tool_conformance::{report_satisfies_required_probe, ToolConformanceReport};
12use crate::llm_config;
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
15#[serde(rename_all = "snake_case")]
16pub enum RuntimeProfileStatus {
17    Preferred,
18    Experimental,
19    VisionOnlyExperimental,
20    Quarantined,
21    Unknown,
22}
23
24impl RuntimeProfileStatus {
25    pub fn as_str(&self) -> &'static str {
26        match self {
27            Self::Preferred => "preferred",
28            Self::Experimental => "experimental",
29            Self::VisionOnlyExperimental => "vision_only_experimental",
30            Self::Quarantined => "quarantined",
31            Self::Unknown => "unknown",
32        }
33    }
34
35    pub fn requires_probe_gate(&self) -> bool {
36        !matches!(self, Self::Preferred | Self::Unknown)
37    }
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct RuntimeProfile {
42    pub status: RuntimeProfileStatus,
43    pub requires: Vec<String>,
44    pub recommended_num_ctx: Option<u64>,
45    pub known_risks: Vec<String>,
46    pub workarounds: Vec<String>,
47    pub notes: Vec<String>,
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct LocalRuntimeProfileReport {
52    pub alias: Option<String>,
53    pub model_id: String,
54    pub provider: String,
55    pub model_family: String,
56    pub selected_runtime: String,
57    pub selected_status: RuntimeProfileStatus,
58    pub requires_probe_gate: bool,
59    pub selected: RuntimeProfile,
60    pub runtime_profiles: BTreeMap<String, RuntimeProfile>,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct RuntimeProfileGate {
65    pub allowed: bool,
66    pub forced: bool,
67    pub selected_status: RuntimeProfileStatus,
68    pub missing_required_probes: Vec<String>,
69    pub passed_probes: Vec<String>,
70    pub message: String,
71}
72
73#[derive(Debug, Clone, Default)]
74pub struct RuntimeProbeEvidence {
75    passed: BTreeSet<String>,
76    tool_reports: Vec<ToolConformanceReport>,
77}
78
79impl RuntimeProbeEvidence {
80    pub fn new() -> Self {
81        Self::default()
82    }
83
84    pub fn add_passed(&mut self, probe: impl Into<String>) {
85        let probe = probe.into();
86        if !probe.trim().is_empty() {
87            self.passed.insert(probe);
88        }
89    }
90
91    pub fn add_tool_report(&mut self, report: ToolConformanceReport) {
92        if report_satisfies_required_probe(&report, "tool_probe") {
93            self.passed.insert("tool_probe".to_string());
94            self.passed.insert("tool_call_probe".to_string());
95        }
96        if report_satisfies_required_probe(&report, "native_tool_probe") {
97            self.passed.insert("native_tool_probe".to_string());
98        }
99        if report_satisfies_required_probe(&report, "streaming_tool_probe") {
100            self.passed.insert("streaming_tool_probe".to_string());
101        }
102        self.tool_reports.push(report);
103    }
104
105    pub fn passed(&self) -> Vec<String> {
106        self.passed.iter().cloned().collect()
107    }
108
109    fn satisfies(&self, requirement: &str) -> bool {
110        self.passed.contains(requirement)
111            || self
112                .tool_reports
113                .iter()
114                .any(|report| report_satisfies_required_probe(report, requirement))
115    }
116}
117
118pub fn local_runtime_profile_report(
119    selector: &str,
120    provider_override: Option<&str>,
121) -> LocalRuntimeProfileReport {
122    let resolved = llm_config::resolve_model_info(selector);
123    let provider = provider_override
124        .map(str::trim)
125        .filter(|provider| !provider.is_empty())
126        .map(str::to_string)
127        .unwrap_or_else(|| resolved.provider.clone());
128    local_runtime_profile_report_for(resolved.alias.as_deref(), &resolved.id, &provider)
129}
130
131pub fn local_runtime_profile_report_for(
132    alias: Option<&str>,
133    model_id: &str,
134    provider: &str,
135) -> LocalRuntimeProfileReport {
136    let family = model_family(alias, model_id);
137    let runtime_profiles = profiles_for_family(family);
138    let selected = runtime_profiles
139        .get(provider)
140        .cloned()
141        .unwrap_or_else(|| generic_profile(provider));
142    LocalRuntimeProfileReport {
143        alias: alias.map(str::to_string),
144        model_id: model_id.to_string(),
145        provider: provider.to_string(),
146        model_family: family.to_string(),
147        selected_runtime: provider.to_string(),
148        selected_status: selected.status.clone(),
149        requires_probe_gate: selected.status.requires_probe_gate(),
150        selected,
151        runtime_profiles,
152    }
153}
154
155pub fn evaluate_runtime_profile_gate(
156    report: &LocalRuntimeProfileReport,
157    evidence: &RuntimeProbeEvidence,
158    force: bool,
159) -> RuntimeProfileGate {
160    let missing: Vec<String> = if report.selected_status.requires_probe_gate() {
161        report
162            .selected
163            .requires
164            .iter()
165            .filter(|requirement| !evidence.satisfies(requirement))
166            .cloned()
167            .collect()
168    } else {
169        Vec::new()
170    };
171    let allowed = force || missing.is_empty();
172    let message = if force {
173        format!(
174            "{} via {} is {} but allowed by --force",
175            report.model_id,
176            report.provider,
177            report.selected_status.as_str()
178        )
179    } else if allowed {
180        format!(
181            "{} via {} is {}",
182            report.model_id,
183            report.provider,
184            report.selected_status.as_str()
185        )
186    } else {
187        format!(
188            "{} via {} is {}; required probes missing: {}",
189            report.model_id,
190            report.provider,
191            report.selected_status.as_str(),
192            missing.join(", ")
193        )
194    };
195    RuntimeProfileGate {
196        allowed,
197        forced: force,
198        selected_status: report.selected_status.clone(),
199        missing_required_probes: missing,
200        passed_probes: evidence.passed(),
201        message,
202    }
203}
204
205fn model_family<'a>(alias: Option<&'a str>, model_id: &'a str) -> &'static str {
206    let haystack = format!(
207        "{} {}",
208        alias.unwrap_or_default().to_ascii_lowercase(),
209        model_id.to_ascii_lowercase()
210    );
211    if haystack.contains("qwen3.6") || haystack.contains("qwen36") {
212        "qwen3.6-a3b-hybrid"
213    } else if haystack.contains("gemma4") || haystack.contains("gemma-4") {
214        "gemma4-hybrid-moe"
215    } else {
216        "generic-local"
217    }
218}
219
220fn profiles_for_family(family: &str) -> BTreeMap<String, RuntimeProfile> {
221    match family {
222        "qwen3.6-a3b-hybrid" => BTreeMap::from([
223            (
224                "ollama".to_string(),
225                profile(
226                    RuntimeProfileStatus::Preferred,
227                    &["tool_probe", "effective_context_probe"],
228                    Some(32_768),
229                    &[],
230                    &[
231                        "Use the text tool wire format unless a fresh native probe passes.",
232                        "Keep an explicit num_ctx so the resident runner matches eval settings.",
233                    ],
234                    &["Best cheap local default on the 2026-05-13 Burin eval pass."],
235                ),
236            ),
237            (
238                "llamacpp".to_string(),
239                profile(
240                    RuntimeProfileStatus::Experimental,
241                    &["tool_probe", "two_turn_cache_probe"],
242                    Some(65_536),
243                    &[
244                        "full_prompt_reprocess_on_hybrid_cache",
245                        "inflated_input_token_accounting_on_repeated_turns",
246                    ],
247                    &[
248                        "Run a two-turn cache probe before write-heavy evals.",
249                        "Prefer short-lived scan/edit loops until cache telemetry is clean.",
250                    ],
251                    &[
252                        "Qwen3.6-family GGUF stacks can pass simple edits while still re-prefilling expensive prefixes.",
253                    ],
254                ),
255            ),
256            (
257                "mlx".to_string(),
258                profile(
259                    RuntimeProfileStatus::VisionOnlyExperimental,
260                    &[
261                        "served_model_identity_probe",
262                        "persistent_readiness_probe",
263                        "tool_probe",
264                    ],
265                    None,
266                    &[
267                        "stale_or_default_v1_models_identity",
268                        "hybrid_prefix_cache_reuse_gap",
269                    ],
270                    &[
271                        "Probe /v1/models twice and send one minimal chat request before selection.",
272                        "Record server flags for APC, context length, batching, and thinking mode.",
273                    ],
274                    &["Use only when MLX-specific throughput or vision support is needed."],
275                ),
276            ),
277        ]),
278        "gemma4-hybrid-moe" => BTreeMap::from([
279            (
280                "ollama".to_string(),
281                profile(
282                    RuntimeProfileStatus::Quarantined,
283                    &["tool_probe"],
284                    Some(32_768),
285                    &[
286                        "raw_tool_tag_no_structured_calls",
287                        "completion_prose_without_executable_tool_calls",
288                    ],
289                    &[
290                        "Allow only after the one-tool probe returns native or parseable text calls.",
291                        "Use text mode and corrective retry for write-required turns.",
292                    ],
293                    &[
294                        "Gemma4 through Ollama has produced raw <tool_call> blocks and final prose in local evals.",
295                    ],
296                ),
297            ),
298            (
299                "llamacpp".to_string(),
300                profile(
301                    RuntimeProfileStatus::Experimental,
302                    &["tool_probe", "two_turn_cache_probe"],
303                    Some(32_768),
304                    &[
305                        "full_prompt_reprocess_on_hybrid_cache",
306                        "parser_template_drift",
307                    ],
308                    &[
309                        "Confirm the served template emits parseable calls before any write eval.",
310                        "Treat final prose as insufficient when artifacts are unchanged.",
311                    ],
312                    &["Prefer as an eval candidate, not a default editing runtime."],
313                ),
314            ),
315            (
316                "mlx".to_string(),
317                profile(
318                    RuntimeProfileStatus::Experimental,
319                    &[
320                        "served_model_identity_probe",
321                        "persistent_readiness_probe",
322                        "tool_probe",
323                    ],
324                    None,
325                    &[
326                        "raw_gemma_tool_markers_in_content",
327                        "hybrid_prefix_cache_reuse_gap",
328                    ],
329                    &[
330                        "Keep raw marker parser fixtures enabled in the Harn text parser.",
331                        "Verify OpenAI-compatible tool_calls is non-empty before native mode.",
332                    ],
333                    &["Use explicit server flags instead of opaque defaults."],
334                ),
335            ),
336            (
337                "local".to_string(),
338                profile(
339                    RuntimeProfileStatus::Experimental,
340                    &["tool_probe"],
341                    Some(32_768),
342                    &["provider_specific_parser_required"],
343                    &["Prefer text mode until native parser support is proven."],
344                    &["Generic local Gemma endpoints vary by serving stack."],
345                ),
346            ),
347        ]),
348        _ => BTreeMap::new(),
349    }
350}
351
352fn generic_profile(provider: &str) -> RuntimeProfile {
353    RuntimeProfile {
354        status: RuntimeProfileStatus::Unknown,
355        requires: vec!["readiness_probe".to_string()],
356        recommended_num_ctx: None,
357        known_risks: Vec::new(),
358        workarounds: Vec::new(),
359        notes: vec![format!(
360            "No dedicated local runtime profile for provider `{provider}` and this model family."
361        )],
362    }
363}
364
365fn profile(
366    status: RuntimeProfileStatus,
367    requires: &[&str],
368    recommended_num_ctx: Option<u64>,
369    known_risks: &[&str],
370    workarounds: &[&str],
371    notes: &[&str],
372) -> RuntimeProfile {
373    RuntimeProfile {
374        status,
375        requires: requires.iter().map(|value| (*value).to_string()).collect(),
376        recommended_num_ctx,
377        known_risks: known_risks
378            .iter()
379            .map(|value| (*value).to_string())
380            .collect(),
381        workarounds: workarounds
382            .iter()
383            .map(|value| (*value).to_string())
384            .collect(),
385        notes: notes.iter().map(|value| (*value).to_string()).collect(),
386    }
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392    use crate::llm::tool_conformance::{classify_tool_conformance_fixture, ToolProbeMode};
393
394    #[test]
395    fn qwen_ollama_profile_is_preferred_and_llamacpp_is_experimental() {
396        let ollama = local_runtime_profile_report("qwen3.6-coding", None);
397        assert_eq!(ollama.model_family, "qwen3.6-a3b-hybrid");
398        assert_eq!(ollama.selected_status, RuntimeProfileStatus::Preferred);
399
400        let llamacpp = local_runtime_profile_report("qwen3.6-coding", Some("llamacpp"));
401        assert_eq!(llamacpp.selected_status, RuntimeProfileStatus::Experimental);
402        assert!(llamacpp
403            .selected
404            .known_risks
405            .contains(&"full_prompt_reprocess_on_hybrid_cache".to_string()));
406    }
407
408    #[test]
409    fn gemma4_ollama_profile_is_quarantined_until_tool_probe_passes() {
410        let report = local_runtime_profile_report("ollama-gemma4", None);
411        assert_eq!(report.selected_status, RuntimeProfileStatus::Quarantined);
412        let gate = evaluate_runtime_profile_gate(&report, &RuntimeProbeEvidence::new(), false);
413        assert!(!gate.allowed);
414        assert_eq!(gate.missing_required_probes, vec!["tool_probe".to_string()]);
415
416        let mut evidence = RuntimeProbeEvidence::new();
417        evidence.add_tool_report(classify_tool_conformance_fixture(
418            "ollama",
419            "gemma4:26b",
420            ToolProbeMode::NonStreaming,
421            "harn_tool_probe_marker",
422            r#"{"content":"echo_marker({ value: \"harn_tool_probe_marker\" })"}"#,
423        ));
424        let gate = evaluate_runtime_profile_gate(&report, &evidence, false);
425        assert!(gate.allowed, "{gate:?}");
426    }
427
428    #[test]
429    fn force_allows_risky_profile_with_receipt() {
430        let report = local_runtime_profile_report("local-qwen3.6", None);
431        assert_eq!(report.selected_status, RuntimeProfileStatus::Experimental);
432        let gate = evaluate_runtime_profile_gate(&report, &RuntimeProbeEvidence::new(), true);
433        assert!(gate.allowed);
434        assert!(gate.forced);
435    }
436}