harn_vm/stdlib/template/
llm_context.rs

1//! Ambient render-time LLM context exposed to `.harn.prompt` templates.
2//!
3//! When `render()` / `render_prompt()` / `render_string()` is invoked from
4//! within an LLM-aware frame (`llm_call`, the default handler stack, or
5//! `agent_loop`), the active provider/model/family/capabilities are
6//! published as the reserved `llm` scope key so authors can write
7//! capability-aware partials without manual plumbing:
8//!
9//! ```text
10//! {{ if llm.capabilities.native_tools }}
11//!   Call `finish_task` when done.
12//! {{ else }}
13//!   When done, output: `<<DONE>>`
14//! {{ end }}
15//! ```
16//!
17//! Bare `render()` calls outside any LLM frame leave `llm = nil`, so
18//! templates branch on `{{ if llm }}` for the doc-gen / CI paths.
19//!
20//! The context lives in a thread-local stack so concurrent agent_loop
21//! iterations on different threads stay isolated; nested
22//! push/pop pairs (e.g. an inner `llm_call` from a middleware handler)
23//! shadow the outer frame for the duration of the inner render.
24
25use std::cell::RefCell;
26use std::collections::BTreeMap;
27
28use crate::value::VmValue;
29
30/// Resolved provider/model identity plus the corresponding capability
31/// snapshot, materialized at LLM-frame entry and injected as the `llm`
32/// binding during any `render()` call inside that frame.
33#[derive(Debug, Clone)]
34pub struct LlmRenderContext {
35    pub provider: String,
36    pub model: String,
37    pub family: String,
38    /// Snapshot of `provider_capabilities(provider, model)` — a
39    /// `VmValue::Dict` shaped exactly like the builtin's return value.
40    pub capabilities: VmValue,
41}
42
43impl LlmRenderContext {
44    /// Build a context from resolved provider/model strings, looking up
45    /// the capability snapshot and deriving the canonical model family.
46    pub fn resolve(provider: &str, model: &str) -> Self {
47        let caps = crate::llm::capabilities::lookup(provider, model);
48        let capabilities =
49            crate::llm::config_builtins::capabilities_to_vm_value(provider, model, &caps);
50        Self {
51            provider: provider.to_string(),
52            model: model.to_string(),
53            family: crate::llm_config::model_family(provider, model),
54            capabilities,
55        }
56    }
57
58    /// Materialize the context as the `llm` scope value:
59    /// `{provider, model, family, capabilities: <provider_capabilities dict>}`.
60    pub fn to_vm_value(&self) -> VmValue {
61        let mut dict = BTreeMap::new();
62        dict.insert(
63            "provider".to_string(),
64            VmValue::String(std::sync::Arc::from(self.provider.as_str())),
65        );
66        dict.insert(
67            "model".to_string(),
68            VmValue::String(std::sync::Arc::from(self.model.as_str())),
69        );
70        dict.insert(
71            "family".to_string(),
72            VmValue::String(std::sync::Arc::from(self.family.as_str())),
73        );
74        dict.insert("capabilities".to_string(), self.capabilities.clone());
75        VmValue::Dict(std::sync::Arc::new(dict))
76    }
77}
78
79thread_local! {
80    static LLM_RENDER_STACK: RefCell<Vec<LlmRenderContext>> = const { RefCell::new(Vec::new()) };
81}
82
83/// Push a frame onto the ambient render-context stack. Pair with
84/// `pop_llm_render_context` (or use `LlmRenderContextGuard`) so the
85/// stack stays balanced even on the unwind path.
86pub fn push_llm_render_context(ctx: LlmRenderContext) {
87    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().push(ctx));
88}
89
90/// Pop the most recently pushed frame. Returns `None` (rather than
91/// panicking) if the stack was empty, since the host may legitimately
92/// unwind through a balanced push/pop sequence.
93pub fn pop_llm_render_context() -> Option<LlmRenderContext> {
94    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().pop())
95}
96
97/// Return a clone of the active frame, or `None` if no LLM context is
98/// in scope. Render entry-points use this to decide whether to inject
99/// the `llm` binding.
100pub fn current_llm_render_context() -> Option<LlmRenderContext> {
101    LLM_RENDER_STACK.with(|stack| stack.borrow().last().cloned())
102}
103
104/// Reset the stack — wired into `reset_thread_local_state` so tests
105/// and serialized adapter sessions start clean.
106pub(crate) fn reset_llm_render_stack() {
107    LLM_RENDER_STACK.with(|stack| stack.borrow_mut().clear());
108}
109
110/// RAII guard that pushes a context on construction and pops on drop.
111/// Use this in Rust hosts (e.g. `llm_call_impl`) so the stack stays
112/// balanced across `?`-shortcircuits and panics.
113pub struct LlmRenderContextGuard {
114    /// Tagged so a misuse (drop-order inversion across nested guards)
115    /// surfaces as a `debug_assert` instead of silently popping the
116    /// wrong frame. Carries no runtime cost in release builds.
117    expected_depth: usize,
118}
119
120impl LlmRenderContextGuard {
121    pub fn enter(ctx: LlmRenderContext) -> Self {
122        push_llm_render_context(ctx);
123        let depth = LLM_RENDER_STACK.with(|stack| stack.borrow().len());
124        Self {
125            expected_depth: depth,
126        }
127    }
128}
129
130impl Drop for LlmRenderContextGuard {
131    fn drop(&mut self) {
132        let depth = LLM_RENDER_STACK.with(|stack| stack.borrow().len());
133        debug_assert_eq!(
134            depth, self.expected_depth,
135            "LlmRenderContextGuard nested-drop order violated",
136        );
137        pop_llm_render_context();
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    fn derive_family(provider: &str, model: &str) -> String {
146        crate::llm_config::model_family(provider, model)
147    }
148
149    #[test]
150    fn family_from_model_id_takes_precedence() {
151        assert_eq!(
152            derive_family("openrouter", "anthropic/claude-3-5-sonnet"),
153            "anthropic-claude"
154        );
155        assert_eq!(derive_family("openrouter", "openai/gpt-4o"), "openai-gpt");
156        assert_eq!(
157            derive_family("openrouter", "google/gemini-1.5-pro"),
158            "google-gemini"
159        );
160        assert_eq!(
161            derive_family("ollama", "qwen3.6:35b-a3b-coding-nvfp4"),
162            "qwen"
163        );
164    }
165
166    #[test]
167    fn family_falls_back_to_provider_alias() {
168        assert_eq!(
169            derive_family("anthropic", "unknown-future-model"),
170            "anthropic-claude"
171        );
172        assert_eq!(derive_family("azure", "deployment-xyz"), "openai-gpt");
173        assert_eq!(derive_family("vertex", "model-xyz"), "google-gemini");
174        assert_eq!(derive_family("local", "anonymous-snapshot"), "local");
175        assert_eq!(derive_family("", ""), "unknown");
176    }
177
178    #[test]
179    fn push_pop_stack_round_trip() {
180        reset_llm_render_stack();
181        assert!(current_llm_render_context().is_none());
182        push_llm_render_context(LlmRenderContext::resolve("anthropic", "claude-3-5-sonnet"));
183        assert_eq!(
184            current_llm_render_context().map(|c| c.family),
185            Some("anthropic-claude".to_string()),
186        );
187        push_llm_render_context(LlmRenderContext::resolve("openai", "gpt-4o"));
188        assert_eq!(
189            current_llm_render_context().map(|c| c.family),
190            Some("openai-gpt".to_string()),
191        );
192        pop_llm_render_context();
193        assert_eq!(
194            current_llm_render_context().map(|c| c.family),
195            Some("anthropic-claude".to_string()),
196        );
197        pop_llm_render_context();
198        assert!(current_llm_render_context().is_none());
199    }
200
201    #[test]
202    fn guard_pops_on_drop() {
203        reset_llm_render_stack();
204        {
205            let _guard = LlmRenderContextGuard::enter(LlmRenderContext::resolve(
206                "anthropic",
207                "claude-3-5-sonnet",
208            ));
209            assert!(current_llm_render_context().is_some());
210        }
211        assert!(current_llm_render_context().is_none());
212    }
213}
harn_vm/stdlib/template/llm_context.rs

harn_vm/stdlib/template/
llm_context.rs