Skip to main content

harn_vm/stdlib/template/
llm_context.rs

1//! Ambient render-time LLM context exposed to `.harn.prompt` templates.
2//!
3//! When `render()` / `render_prompt()` / `render_string()` is invoked from
4//! within an LLM-aware frame (`llm_call`, the default handler stack, or
5//! `agent_loop`), the active provider/model/family/capabilities are
6//! published as the reserved `llm` scope key so authors can write
7//! capability-aware partials without manual plumbing:
8//!
9//! ```text
10//! {{ if llm.capabilities.native_tools }}
11//!   Call `finish_task` when done.
12//! {{ else }}
13//!   When done, output: `<<DONE>>`
14//! {{ end }}
15//! ```
16//!
17//! Bare `render()` calls outside any LLM frame leave `llm = nil`, so
18//! templates branch on `{{ if llm }}` for the doc-gen / CI paths.
19//!
20//! The context lives in a thread-local stack so concurrent agent_loop
21//! iterations on different threads stay isolated; nested
22//! push/pop pairs (e.g. an inner `llm_call` from a middleware handler)
23//! shadow the outer frame for the duration of the inner render.
24
25use crate::value::VmDictExt;
26use std::cell::RefCell;
27use std::collections::BTreeMap;
28
29use crate::value::VmValue;
30
31/// Resolved provider/model identity plus the corresponding capability
32/// snapshot, materialized at LLM-frame entry and injected as the `llm`
33/// binding during any `render()` call inside that frame.
34#[derive(Debug, Clone)]
35pub struct LlmRenderContext {
36    pub provider: String,
37    pub model: String,
38    pub family: String,
39    /// Snapshot of `provider_capabilities(provider, model)` — a
40    /// `VmValue::Dict` shaped exactly like the builtin's return value.
41    pub capabilities: VmValue,
42}
43
44impl LlmRenderContext {
45    /// Build a context from resolved provider/model strings, looking up
46    /// the capability snapshot and deriving the canonical model family.
47    pub fn resolve(provider: &str, model: &str) -> Self {
48        let caps = crate::llm::capabilities::lookup(provider, model);
49        let capabilities =
50            crate::llm::config_builtins::capabilities_to_vm_value(provider, model, &caps);
51        Self {
52            provider: provider.to_string(),
53            model: model.to_string(),
54            family: crate::llm_config::model_family(provider, model),
55            capabilities,
56        }
57    }
58
59    /// Materialize the context as the `llm` scope value:
60    /// `{provider, model, family, capabilities: <provider_capabilities dict>}`.
61    pub fn to_vm_value(&self) -> VmValue {
62        let mut dict = BTreeMap::new();
63        dict.put_str("provider", self.provider.as_str());
64        dict.put_str("model", self.model.as_str());
65        dict.put_str("family", self.family.as_str());
66        dict.insert("capabilities".to_string(), self.capabilities.clone());
67        VmValue::dict(dict)
68    }
69}
70
71thread_local! {
72    static LLM_RENDER_STACK: RefCell<Vec<LlmRenderContext>> = const { RefCell::new(Vec::new()) };
73}
74
75/// Push a frame onto the ambient render-context stack. Pair with
76/// `pop_llm_render_context` (or use `LlmRenderContextGuard`) so the
77/// stack stays balanced even on the unwind path.
78pub fn push_llm_render_context(ctx: LlmRenderContext) {
79    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().push(ctx));
80}
81
82/// Pop the most recently pushed frame. Returns `None` (rather than
83/// panicking) if the stack was empty, since the host may legitimately
84/// unwind through a balanced push/pop sequence.
85pub fn pop_llm_render_context() -> Option<LlmRenderContext> {
86    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().pop())
87}
88
89/// Return a clone of the active frame, or `None` if no LLM context is
90/// in scope. Render entry-points use this to decide whether to inject
91/// the `llm` binding.
92pub fn current_llm_render_context() -> Option<LlmRenderContext> {
93    LLM_RENDER_STACK.with(|stack| stack.borrow().last().cloned())
94}
95
96/// Reset the stack — wired into `reset_thread_local_state` so tests
97/// and serialized adapter sessions start clean.
98pub(crate) fn reset_llm_render_stack() {
99    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().clear());
100}
101
102/// RAII guard that pushes a context on construction and pops on drop.
103/// Use this in Rust hosts (e.g. `llm_call_impl`) so the stack stays
104/// balanced across `?`-shortcircuits and panics.
105pub struct LlmRenderContextGuard {
106    /// Tagged so a misuse (drop-order inversion across nested guards)
107    /// surfaces as a `debug_assert` instead of silently popping the
108    /// wrong frame. Carries no runtime cost in release builds.
109    expected_depth: usize,
110}
111
112impl LlmRenderContextGuard {
113    pub fn enter(ctx: LlmRenderContext) -> Self {
114        push_llm_render_context(ctx);
115        let depth = LLM_RENDER_STACK.with(|stack| stack.borrow().len());
116        Self {
117            expected_depth: depth,
118        }
119    }
120}
121
122impl Drop for LlmRenderContextGuard {
123    fn drop(&mut self) {
124        let depth = LLM_RENDER_STACK.with(|stack| stack.borrow().len());
125        debug_assert_eq!(
126            depth, self.expected_depth,
127            "LlmRenderContextGuard nested-drop order violated",
128        );
129        pop_llm_render_context();
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    fn derive_family(provider: &str, model: &str) -> String {
138        crate::llm_config::model_family(provider, model)
139    }
140
141    #[test]
142    fn family_from_model_id_takes_precedence() {
143        assert_eq!(
144            derive_family("openrouter", "anthropic/claude-3-5-sonnet"),
145            "anthropic-claude"
146        );
147        assert_eq!(derive_family("openrouter", "openai/gpt-4o"), "openai-gpt");
148        assert_eq!(
149            derive_family("openrouter", "google/gemini-1.5-pro"),
150            "google-gemini"
151        );
152        assert_eq!(derive_family("llamacpp", "qwen3.6-35b-a3b"), "qwen");
153    }
154
155    #[test]
156    fn family_falls_back_to_provider_alias() {
157        assert_eq!(
158            derive_family("anthropic", "unknown-future-model"),
159            "anthropic-claude"
160        );
161        assert_eq!(derive_family("azure", "deployment-xyz"), "openai-gpt");
162        assert_eq!(derive_family("vertex", "model-xyz"), "google-gemini");
163        assert_eq!(derive_family("local", "anonymous-snapshot"), "local");
164        assert_eq!(derive_family("", ""), "unknown");
165    }
166
167    #[test]
168    fn push_pop_stack_round_trip() {
169        reset_llm_render_stack();
170        assert!(current_llm_render_context().is_none());
171        push_llm_render_context(LlmRenderContext::resolve("anthropic", "claude-3-5-sonnet"));
172        assert_eq!(
173            current_llm_render_context().map(|c| c.family),
174            Some("anthropic-claude".to_string()),
175        );
176        push_llm_render_context(LlmRenderContext::resolve("openai", "gpt-4o"));
177        assert_eq!(
178            current_llm_render_context().map(|c| c.family),
179            Some("openai-gpt".to_string()),
180        );
181        pop_llm_render_context();
182        assert_eq!(
183            current_llm_render_context().map(|c| c.family),
184            Some("anthropic-claude".to_string()),
185        );
186        pop_llm_render_context();
187        assert!(current_llm_render_context().is_none());
188    }
189
190    #[test]
191    fn guard_pops_on_drop() {
192        reset_llm_render_stack();
193        {
194            let _guard = LlmRenderContextGuard::enter(LlmRenderContext::resolve(
195                "anthropic",
196                "claude-3-5-sonnet",
197            ));
198            assert!(current_llm_render_context().is_some());
199        }
200        assert!(current_llm_render_context().is_none());
201    }
202}