harn_vm/llm/
resolved_dispatch.rs

1//! ResolvedDispatch — one self-contained record of what an LLM call actually
2//! dispatched, and what came back.
3//!
4//! WHY THIS EXISTS
5//!
6//! Answering "what provider/model/wire-format/thinking did this LLM call
7//! actually use, where did each of those come from, and what did it return?"
8//! used to require joining the `provider_call_request` and
9//! `provider_call_response` transcript events by `call_id`, cross-referencing
10//! `capabilities.toml` to learn the wire format, and reconstructing the
11//! provenance of each field by reading scattered resolution layers. The
12//! transcript needs to carry the *final resolved decision* so route debugging
13//! does not depend on reconstructing state from adjacent events.
14//!
15//! `resolved_dispatch` collapses that into ONE append-only transcript event
16//! per LLM call. It is:
17//!   - self-contained (no join needed),
18//!   - deterministic in its wire-format/base-url fields (derived from the
19//!     capability registry, the single source of truth), and
20//!   - provenance-bearing: each of provider/model/wire_format/thinking/
21//!     tool_format records WHERE it came from. The high-signal value is
22//!     `inherited_from_primary`, which flags silent route inheritance directly.
23//!
24//! This module is observability-only. It reads the request options + the
25//! result and emits a record; it never feeds back into request construction,
26//! so the model's next-turn payload is byte-identical with or without it.
27
28use super::api::{LlmCallOptions, ThinkingConfig};
29use super::capabilities::WireDialect;
30
31/// Where a single resolved dispatch field came from. Carried on
32/// [`LlmCallOptions::dispatch_provenance`], populated by the pipeline resolver
33/// (Burin's smart-escalation / model-selection layers, threaded through the
34/// agent-loop options). `Unknown` is the default when no resolver annotated the
35/// call — e.g. a raw `llm_call(...)` from script context.
36///
37/// The string values are a small, stable vocabulary so downstream tooling
38/// (the harness-debugger `dispatch_trace` MCP tool) can filter on them:
39/// - `operator_pin`: an explicit operator/env pin, such as
40///   `BURIN_EVAL_SMART_PROVIDER`.
41/// - `pipeline_input`: a `selected_*` field on the pipeline input.
42/// - `escalation_override`: chosen by the smart-escalation resolver.
43/// - `catalog_default`: filled from the provider catalog / capability registry.
44/// - `inherited_from_primary`: no pin/override existed, so the value fell
45///   through from the cheap primary model. THIS is the value that flags a
46///   silent-inheritance bug.
47#[derive(Debug, Clone, Default, PartialEq, Eq)]
48pub struct DispatchProvenance {
49    pub provider: Option<String>,
50    pub model: Option<String>,
51    pub wire_format: Option<String>,
52    pub thinking: Option<String>,
53    pub tool_format: Option<String>,
54}
55
56impl DispatchProvenance {
57    /// Canonical provenance origin for a value that fell through from the
58    /// primary model with no pin or override. Named as a constant so the
59    /// resolver and the tests reference the same smoking-gun literal.
60    pub const INHERITED_FROM_PRIMARY: &'static str = "inherited_from_primary";
61    pub const OPERATOR_PIN: &'static str = "operator_pin";
62    pub const ESCALATION_OVERRIDE: &'static str = "escalation_override";
63    pub const PIPELINE_INPUT: &'static str = "pipeline_input";
64    pub const CATALOG_DEFAULT: &'static str = "catalog_default";
65
66    /// Parse a `dispatch_provenance` option dict supplied by the pipeline
67    /// resolver into the typed provenance. Each entry is a per-field origin
68    /// string (`operator_pin`, `escalation_override`, `inherited_from_primary`,
69    /// ...); absent entries stay `None` and surface as `"unknown"` in the
70    /// record. Returns `None` when the value is absent or not a dict, so a
71    /// non-annotating caller pays nothing.
72    pub fn from_vm_value(value: &crate::value::VmValue) -> Option<Self> {
73        let dict = value.as_dict()?;
74        let field = |key: &str| -> Option<String> {
75            dict.get(key)
76                .map(|v| v.as_str_cow().into_owned())
77                .filter(|s| !s.is_empty())
78        };
79        Some(Self {
80            provider: field("provider"),
81            model: field("model"),
82            wire_format: field("wire_format"),
83            thinking: field("thinking"),
84            tool_format: field("tool_format"),
85        })
86    }
87
88    fn origin_or_unknown(value: &Option<String>) -> &str {
89        value.as_deref().unwrap_or("unknown")
90    }
91
92    fn to_json(&self) -> serde_json::Value {
93        serde_json::json!({
94            "provider": Self::origin_or_unknown(&self.provider),
95            "model": Self::origin_or_unknown(&self.model),
96            "wire_format": Self::origin_or_unknown(&self.wire_format),
97            "thinking": Self::origin_or_unknown(&self.thinking),
98            "tool_format": Self::origin_or_unknown(&self.tool_format),
99        })
100    }
101}
102
103/// The normalized outcome of a dispatched LLM call. Derived from the result (on
104/// success) or the thrown error (on failure) so a consumer never has to
105/// pattern-match raw error strings.
106///
107/// The `served` vs `empty_completion_transient_recovered` vs
108/// `empty_completion_terminal` split is the distinction the escalation guard
109/// hinges on: an empty response the runtime retried and recovered from is not a
110/// dead lane; only a terminal unrecovered empty is.
111#[derive(Debug, Clone, PartialEq, Eq)]
112pub(crate) enum DispatchOutcome {
113    /// The call returned committed content / a tool call / thinking on the
114    /// first attempt.
115    Served {
116        completion_tokens: i64,
117        content_len: usize,
118    },
119    /// The provider emitted one or more empty completions that the runtime
120    /// retried and RECOVERED from — the call ultimately served. Not a dead
121    /// lane; the retry machinery did its job.
122    EmptyCompletionTransientRecovered {
123        completion_tokens: i64,
124        content_len: usize,
125        empty_retries: usize,
126    },
127    /// The provider billed output tokens but committed nothing, and the runtime
128    /// exhausted its retry budget (or surfaced the empty as a terminal error).
129    /// THIS is the "escalation served empty" dead-lane signal.
130    EmptyCompletionTerminal { completion_tokens: i64 },
131    /// The provider hit a usage / quota / rate limit.
132    UsageLimit,
133    /// Any other provider-side error, with a short class label.
134    ProviderError { class: String },
135}
136
137impl DispatchOutcome {
138    /// Classify a successful [`super::api::LlmResult`], given how many
139    /// empty-completion retries preceded this (recovered) result. An empty
140    /// committed message with billed output that survives to this point is
141    /// TERMINAL (the retry budget was exhausted and the loop is returning the
142    /// empty result unchanged); a served result after >0 empty retries is a
143    /// transient-recovered flake; a clean first-attempt serve is `served`.
144    pub(crate) fn from_result(result: &super::api::LlmResult, empty_retries: usize) -> Self {
145        let content_len = result.text.len();
146        let committed_nothing = result.text.is_empty()
147            && result.tool_calls.is_empty()
148            && result
149                .thinking
150                .as_deref()
151                .map(str::is_empty)
152                .unwrap_or(true);
153        if committed_nothing && result.output_tokens > 0 {
154            return DispatchOutcome::EmptyCompletionTerminal {
155                completion_tokens: result.output_tokens,
156            };
157        }
158        if empty_retries > 0 {
159            return DispatchOutcome::EmptyCompletionTransientRecovered {
160                completion_tokens: result.output_tokens,
161                content_len,
162                empty_retries,
163            };
164        }
165        DispatchOutcome::Served {
166            completion_tokens: result.output_tokens,
167            content_len,
168        }
169    }
170
171    /// Classify a thrown TERMINAL error message into the outcome vocabulary.
172    /// Only called on the surfaced (non-retryable) error, so an empty-completion
173    /// error here is by definition terminal. Keyed on the same stable substrings
174    /// the retry/skip classifiers use, so the record agrees with the runtime's
175    /// own routing decisions.
176    pub(crate) fn from_error_message(message: &str) -> Self {
177        let lower = message.to_lowercase();
178        if lower.contains("completion_tokens=")
179            && (lower.contains("delivered no content")
180                || (lower.contains("no dispatchable tool call or answer")
181                    && lower.contains("upstream contract violation")))
182        {
183            // The token count is embedded in the message but is not needed for
184            // the class; downstream consumers read `completion_tokens` from the
185            // sibling `provider_call_response` when they need the exact value.
186            // A surfaced empty-completion error is terminal by construction.
187            return DispatchOutcome::EmptyCompletionTerminal {
188                completion_tokens: 0,
189            };
190        }
191        if lower.contains("rate limit")
192            || lower.contains("quota")
193            || lower.contains("usage limit")
194            || lower.contains("429")
195        {
196            return DispatchOutcome::UsageLimit;
197        }
198        DispatchOutcome::ProviderError {
199            class: provider_error_class(&lower),
200        }
201    }
202
203    /// Stable machine label. `dispatch_trace` filters on these.
204    pub(crate) fn label(&self) -> &'static str {
205        match self {
206            DispatchOutcome::Served { .. } => "served",
207            DispatchOutcome::EmptyCompletionTransientRecovered { .. } => {
208                "empty_completion_transient_recovered"
209            }
210            DispatchOutcome::EmptyCompletionTerminal { .. } => "empty_completion_terminal",
211            DispatchOutcome::UsageLimit => "usage_limit",
212            DispatchOutcome::ProviderError { .. } => "provider_error",
213        }
214    }
215
216    fn to_json(&self) -> serde_json::Value {
217        match self {
218            DispatchOutcome::Served {
219                completion_tokens,
220                content_len,
221            } => serde_json::json!({
222                "kind": "served",
223                "completion_tokens": completion_tokens,
224                "content_len": content_len,
225            }),
226            DispatchOutcome::EmptyCompletionTransientRecovered {
227                completion_tokens,
228                content_len,
229                empty_retries,
230            } => serde_json::json!({
231                "kind": "empty_completion_transient_recovered",
232                "completion_tokens": completion_tokens,
233                "content_len": content_len,
234                "empty_retries": empty_retries,
235            }),
236            DispatchOutcome::EmptyCompletionTerminal { completion_tokens } => serde_json::json!({
237                "kind": "empty_completion_terminal",
238                "completion_tokens": completion_tokens,
239                "content_len": 0,
240            }),
241            DispatchOutcome::UsageLimit => serde_json::json!({
242                "kind": "usage_limit",
243            }),
244            DispatchOutcome::ProviderError { class } => serde_json::json!({
245                "kind": "provider_error",
246                "class": class,
247            }),
248        }
249    }
250}
251
252/// Coarse class label for a provider error message so `dispatch_trace` can
253/// bucket failures without exposing the full (potentially secret-bearing) text.
254fn provider_error_class(lower: &str) -> String {
255    for (needle, class) in [
256        ("api error", "api_error"),
257        ("timed out", "timeout"),
258        ("timeout", "timeout"),
259        ("connection", "connection"),
260        ("missing content array", "malformed_response"),
261        ("authentication", "auth"),
262        ("unauthorized", "auth"),
263        ("401", "auth"),
264        ("not found", "not_found"),
265        ("404", "not_found"),
266        ("overloaded", "overloaded"),
267        ("500", "server_error"),
268        ("502", "server_error"),
269        ("503", "server_error"),
270    ] {
271        if lower.contains(needle) {
272            return class.to_string();
273        }
274    }
275    "unknown".to_string()
276}
277
278/// The wire format an LLM call dispatched over, derived from the capability
279/// registry (the single source of truth). This is the field whose absence made
280/// the escalation incident hard to root-cause.
281pub fn wire_format_for(provider: &str, model: &str) -> &'static str {
282    match super::capabilities::lookup(provider, model).message_wire_format {
283        WireDialect::Anthropic => "anthropic_native",
284        WireDialect::OpenAiCompat => "openai_compat",
285        WireDialect::Ollama => "ollama",
286        WireDialect::Gemini => "gemini",
287    }
288}
289
290/// The host of the base URL the call went to (e.g. `api.anthropic.com`), so a
291/// misrouted call to a proxy / third-party rehost is visible at a glance. Falls
292/// back to the raw base URL when it has no parseable host.
293fn base_url_host(provider: &str) -> String {
294    let base_url = super::helpers::ResolvedProvider::resolve(provider).base_url;
295    base_url
296        .split("://")
297        .nth(1)
298        .and_then(|rest| rest.split('/').next())
299        .map(str::to_string)
300        .unwrap_or(base_url)
301}
302
303fn thinking_json(thinking: &ThinkingConfig) -> serde_json::Value {
304    match thinking {
305        ThinkingConfig::Disabled => serde_json::json!({"mode": "off", "enabled": false}),
306        ThinkingConfig::Enabled { budget_tokens } => serde_json::json!({
307            "mode": "enabled",
308            "enabled": true,
309            "budget_tokens": budget_tokens,
310        }),
311        ThinkingConfig::Adaptive => serde_json::json!({"mode": "adaptive", "enabled": true}),
312        ThinkingConfig::Effort { level } => serde_json::json!({
313            "mode": "effort",
314            "level": level.as_str(),
315            "enabled": !thinking.is_disabled(),
316        }),
317    }
318}
319
320/// Build the single self-contained `resolved_dispatch` transcript record for
321/// one LLM call. `iteration`/`call_id`/`span_id` correlate it with the sibling
322/// `provider_call_request` / `provider_call_response` events; the record itself
323/// carries everything a consumer needs to answer "what dispatched, from where,
324/// and what came back" without any join.
325pub(crate) fn build_record(
326    iteration: usize,
327    call_id: &str,
328    span_id: Option<u64>,
329    timestamp: String,
330    opts: &LlmCallOptions,
331    effective_tool_format: &str,
332    outcome: &DispatchOutcome,
333) -> serde_json::Value {
334    let provenance = opts.dispatch_provenance.clone().unwrap_or_default();
335    serde_json::json!({
336        "type": "resolved_dispatch",
337        "iteration": iteration,
338        "call_id": call_id,
339        "span_id": span_id,
340        "timestamp": timestamp,
341        "provider": opts.provider,
342        "model": opts.model,
343        "wire_format": wire_format_for(&opts.provider, &opts.model),
344        "thinking": thinking_json(&opts.thinking),
345        "tool_format": effective_tool_format,
346        "base_url_host": base_url_host(&opts.provider),
347        "provenance": provenance.to_json(),
348        "outcome": outcome.to_json(),
349        "outcome_kind": outcome.label(),
350    })
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356
357    #[test]
358    fn wire_format_native_for_anthropic_claude() {
359        // A claude-* model on the anthropic provider must resolve native.
360        assert_eq!(
361            wire_format_for("anthropic", "claude-sonnet-4-6"),
362            "anthropic_native"
363        );
364    }
365
366    #[test]
367    fn wire_format_compat_for_openai_style() {
368        assert_eq!(wire_format_for("openai", "gpt-4o"), "openai_compat");
369    }
370
371    #[test]
372    fn wire_format_preserves_native_non_openai_dialects() {
373        assert_eq!(wire_format_for("gemini", "gemini-2.5-pro"), "gemini");
374        assert_eq!(wire_format_for("ollama", "llama3.2"), "ollama");
375    }
376
377    #[test]
378    fn outcome_empty_completion_terminal_from_billed_no_content() {
379        // The transport error must name the actual native wire style, not a
380        // generic OpenAI-compatible path.
381        let msg = "anthropic-native model anthropic:claude-sonnet-4-6 reported \
382                   completion_tokens=8 but delivered no content, reasoning, or tool calls";
383        assert!(matches!(
384            DispatchOutcome::from_error_message(msg),
385            DispatchOutcome::EmptyCompletionTerminal {
386                completion_tokens: 0
387            }
388        ));
389    }
390
391    #[test]
392    fn transient_recovered_is_not_served_empty() {
393        // A recovered flake (>0 empty retries but the call ultimately served)
394        // must not be flagged as the dead-lane signal.
395        let recovered = DispatchOutcome::EmptyCompletionTransientRecovered {
396            completion_tokens: 487,
397            content_len: 1666,
398            empty_retries: 3,
399        };
400        assert_eq!(recovered.label(), "empty_completion_transient_recovered");
401        assert!(!matches!(
402            recovered,
403            DispatchOutcome::EmptyCompletionTerminal { .. }
404        ));
405    }
406
407    #[test]
408    fn outcome_usage_limit_from_quota() {
409        assert_eq!(
410            DispatchOutcome::from_error_message("provider returned 429 rate limit exceeded"),
411            DispatchOutcome::UsageLimit
412        );
413    }
414
415    #[test]
416    fn outcome_provider_error_class() {
417        match DispatchOutcome::from_error_message("anthropic API error: overloaded") {
418            DispatchOutcome::ProviderError { class } => assert_eq!(class, "api_error"),
419            other => panic!("expected provider_error, got {other:?}"),
420        }
421    }
422
423    #[test]
424    fn provenance_inherited_marker_is_stable() {
425        assert_eq!(
426            DispatchProvenance::INHERITED_FROM_PRIMARY,
427            "inherited_from_primary"
428        );
429        let prov = DispatchProvenance {
430            provider: Some(DispatchProvenance::INHERITED_FROM_PRIMARY.to_string()),
431            ..Default::default()
432        };
433        let json = prov.to_json();
434        assert_eq!(json["provider"], "inherited_from_primary");
435        // Unset fields serialize as the explicit "unknown" sentinel so a
436        // consumer never sees a missing key.
437        assert_eq!(json["model"], "unknown");
438    }
439}
harn_vm/llm/resolved_dispatch.rs

harn_vm/llm/
resolved_dispatch.rs