Skip to main content

harn_vm/stdlib/template/
llm_context.rs

1//! Ambient render-time LLM context exposed to `.harn.prompt` templates.
2//!
3//! When `render()` / `render_prompt()` / `render_string()` is invoked from
4//! within an LLM-aware frame (`llm_call`, the default handler stack, or
5//! `agent_loop`), the active provider/model/family/capabilities are
6//! published as the reserved `llm` scope key so authors can write
7//! capability-aware partials without manual plumbing:
8//!
9//! ```text
10//! {{ if llm.capabilities.native_tools }}
11//!   Call `finish_task` when done.
12//! {{ else }}
13//!   When done, output: `<<DONE>>`
14//! {{ end }}
15//! ```
16//!
17//! Bare `render()` calls outside any LLM frame leave `llm = nil`, so
18//! templates branch on `{{ if llm }}` for the doc-gen / CI paths.
19//!
20//! The context lives in a thread-local stack so concurrent agent_loop
21//! iterations on different threads stay isolated; nested
22//! push/pop pairs (e.g. an inner `llm_call` from a middleware handler)
23//! shadow the outer frame for the duration of the inner render.
24
25use std::cell::RefCell;
26use std::collections::BTreeMap;
27use std::rc::Rc;
28
29use crate::value::VmValue;
30
31/// Resolved provider/model identity plus the corresponding capability
32/// snapshot, materialized at LLM-frame entry and injected as the `llm`
33/// binding during any `render()` call inside that frame.
34#[derive(Debug, Clone)]
35pub struct LlmRenderContext {
36    pub provider: String,
37    pub model: String,
38    pub family: String,
39    /// Snapshot of `provider_capabilities(provider, model)` — a
40    /// `VmValue::Dict` shaped exactly like the builtin's return value.
41    pub capabilities: VmValue,
42}
43
44impl LlmRenderContext {
45    /// Build a context from resolved provider/model strings, looking up
46    /// the capability snapshot and deriving the canonical model family.
47    pub fn resolve(provider: &str, model: &str) -> Self {
48        let caps = crate::llm::capabilities::lookup(provider, model);
49        let capabilities =
50            crate::llm::config_builtins::capabilities_to_vm_value(provider, model, &caps);
51        Self {
52            provider: provider.to_string(),
53            model: model.to_string(),
54            family: derive_family(provider, model),
55            capabilities,
56        }
57    }
58
59    /// Materialize the context as the `llm` scope value:
60    /// `{provider, model, family, capabilities: <provider_capabilities dict>}`.
61    pub fn to_vm_value(&self) -> VmValue {
62        let mut dict = BTreeMap::new();
63        dict.insert(
64            "provider".to_string(),
65            VmValue::String(Rc::from(self.provider.as_str())),
66        );
67        dict.insert(
68            "model".to_string(),
69            VmValue::String(Rc::from(self.model.as_str())),
70        );
71        dict.insert(
72            "family".to_string(),
73            VmValue::String(Rc::from(self.family.as_str())),
74        );
75        dict.insert("capabilities".to_string(), self.capabilities.clone());
76        VmValue::Dict(Rc::new(dict))
77    }
78}
79
80thread_local! {
81    static LLM_RENDER_STACK: RefCell<Vec<LlmRenderContext>> = const { RefCell::new(Vec::new()) };
82}
83
84/// Push a frame onto the ambient render-context stack. Pair with
85/// `pop_llm_render_context` (or use `LlmRenderContextGuard`) so the
86/// stack stays balanced even on the unwind path.
87pub fn push_llm_render_context(ctx: LlmRenderContext) {
88    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().push(ctx));
89}
90
91/// Pop the most recently pushed frame. Returns `None` (rather than
92/// panicking) if the stack was empty, since the host may legitimately
93/// unwind through a balanced push/pop sequence.
94pub fn pop_llm_render_context() -> Option<LlmRenderContext> {
95    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().pop())
96}
97
98/// Return a clone of the active frame, or `None` if no LLM context is
99/// in scope. Render entry-points use this to decide whether to inject
100/// the `llm` binding.
101pub fn current_llm_render_context() -> Option<LlmRenderContext> {
102    LLM_RENDER_STACK.with(|stack| stack.borrow().last().cloned())
103}
104
105/// Reset the stack — wired into `reset_thread_local_state` so tests
106/// and serialized adapter sessions start clean.
107pub(crate) fn reset_llm_render_stack() {
108    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().clear());
109}
110
111/// RAII guard that pushes a context on construction and pops on drop.
112/// Use this in Rust hosts (e.g. `llm_call_impl`) so the stack stays
113/// balanced across `?`-shortcircuits and panics.
114pub struct LlmRenderContextGuard {
115    /// Tagged so a misuse (drop-order inversion across nested guards)
116    /// surfaces as a `debug_assert` instead of silently popping the
117    /// wrong frame. Carries no runtime cost in release builds.
118    expected_depth: usize,
119}
120
121impl LlmRenderContextGuard {
122    pub fn enter(ctx: LlmRenderContext) -> Self {
123        push_llm_render_context(ctx);
124        let depth = LLM_RENDER_STACK.with(|stack| stack.borrow().len());
125        Self {
126            expected_depth: depth,
127        }
128    }
129}
130
131impl Drop for LlmRenderContextGuard {
132    fn drop(&mut self) {
133        let depth = LLM_RENDER_STACK.with(|stack| stack.borrow().len());
134        debug_assert_eq!(
135            depth, self.expected_depth,
136            "LlmRenderContextGuard nested-drop order violated",
137        );
138        pop_llm_render_context();
139    }
140}
141
142/// Map a `(provider, model)` pair to a canonical model-family token
143/// (`claude` / `gpt` / `gemini` / `qwen` / `llama` / `mistral` / ...).
144/// This is distinct from `provider_family` in the capability matrix —
145/// that walks sibling-provider lineage (e.g. `openrouter → openai`);
146/// this walks model-id heuristics so authors can branch on the
147/// underlying LLM family regardless of the routing provider. Falls
148/// back to the normalized provider name when nothing matches.
149fn derive_family(provider: &str, model: &str) -> String {
150    let model_lc = model.to_ascii_lowercase();
151    // Order matters: longer/more-specific markers first so e.g.
152    // "claude" wins over a generic "ai" substring.
153    const MARKERS: &[(&str, &[&str])] = &[
154        ("claude", &["claude"]),
155        ("gpt", &["gpt-", "gpt_", "o1-", "o3-", "o4-"]),
156        ("gemini", &["gemini"]),
157        ("qwen", &["qwen"]),
158        ("llama", &["llama"]),
159        ("mistral", &["mistral", "mixtral"]),
160        ("deepseek", &["deepseek"]),
161        ("phi", &["phi-", "phi_"]),
162        ("grok", &["grok"]),
163        ("command", &["command-", "command_"]),
164    ];
165    for (family, needles) in MARKERS {
166        if needles.iter().any(|needle| model_lc.contains(needle)) {
167            return (*family).to_string();
168        }
169    }
170    // Model id gave us nothing — fall back to the routing provider
171    // mapped through known aliases so the family token stays stable
172    // when the same model is reached via different providers.
173    match provider {
174        "anthropic" | "bedrock" | "vertex-anthropic" => "claude".to_string(),
175        "openai" | "azure" => "gpt".to_string(),
176        "gemini" | "vertex" | "google" => "gemini".to_string(),
177        other if !other.is_empty() => other.to_string(),
178        _ => "unknown".to_string(),
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn family_from_model_id_takes_precedence() {
188        assert_eq!(
189            derive_family("openrouter", "anthropic/claude-3-5-sonnet"),
190            "claude"
191        );
192        assert_eq!(derive_family("openrouter", "openai/gpt-4o"), "gpt");
193        assert_eq!(
194            derive_family("openrouter", "google/gemini-1.5-pro"),
195            "gemini"
196        );
197        assert_eq!(
198            derive_family("ollama", "qwen3.6:35b-a3b-coding-nvfp4"),
199            "qwen"
200        );
201    }
202
203    #[test]
204    fn family_falls_back_to_provider_alias() {
205        assert_eq!(derive_family("anthropic", "unknown-future-model"), "claude");
206        assert_eq!(derive_family("azure", "deployment-xyz"), "gpt");
207        assert_eq!(derive_family("vertex", "model-xyz"), "gemini");
208        assert_eq!(derive_family("local", "anonymous-snapshot"), "local");
209        assert_eq!(derive_family("", ""), "unknown");
210    }
211
212    #[test]
213    fn push_pop_stack_round_trip() {
214        reset_llm_render_stack();
215        assert!(current_llm_render_context().is_none());
216        push_llm_render_context(LlmRenderContext::resolve("anthropic", "claude-3-5-sonnet"));
217        assert_eq!(
218            current_llm_render_context().map(|c| c.family),
219            Some("claude".to_string()),
220        );
221        push_llm_render_context(LlmRenderContext::resolve("openai", "gpt-4o"));
222        assert_eq!(
223            current_llm_render_context().map(|c| c.family),
224            Some("gpt".to_string()),
225        );
226        pop_llm_render_context();
227        assert_eq!(
228            current_llm_render_context().map(|c| c.family),
229            Some("claude".to_string()),
230        );
231        pop_llm_render_context();
232        assert!(current_llm_render_context().is_none());
233    }
234
235    #[test]
236    fn guard_pops_on_drop() {
237        reset_llm_render_stack();
238        {
239            let _guard = LlmRenderContextGuard::enter(LlmRenderContext::resolve(
240                "anthropic",
241                "claude-3-5-sonnet",
242            ));
243            assert!(current_llm_render_context().is_some());
244        }
245        assert!(current_llm_render_context().is_none());
246    }
247}