harn-vm 0.7.27

Async bytecode virtual machine for the Harn programming language
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
//! Option and payload types for `llm_call`: `LlmCallOptions`,
//! `LlmRequestPayload`, plus the `tool_search` / `thinking` sub-configs.

use crate::value::VmValue;

/// Sender for streaming text deltas from an in-flight LLM call.
pub(crate) type DeltaSender = tokio::sync::mpsc::UnboundedSender<String>;

/// Extended thinking configuration.
#[derive(Clone, Debug, serde::Serialize)]
pub(crate) enum ThinkingConfig {
    /// Enable with provider defaults.
    Enabled,
    /// Enable with a specific token budget.
    WithBudget(i64),
}

/// Which tool-search variant to use. Two shapes today, matching the two
/// Anthropic variants (also reused as the mental model for the OpenAI path
/// landing in harn#71). Scripts write the lower-case short name.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum ToolSearchVariant {
    /// BM25 / natural-language queries. Default when the user wrote just
    /// `tool_search: true` or omitted the variant.
    Bm25,
    /// Python-regex queries (more precise, less ergonomic).
    Regex,
}

impl ToolSearchVariant {
    pub(crate) fn as_short(self) -> &'static str {
        match self {
            ToolSearchVariant::Bm25 => "bm25",
            ToolSearchVariant::Regex => "regex",
        }
    }
}

/// Implementation of the client-executed tool-search fallback (harn#70).
/// Only consulted when `ToolSearchMode::Client` resolves (either
/// explicit or via auto-fallback when the provider lacks native
/// support). Orthogonal to `ToolSearchVariant`: a user can ask for
/// `variant: bm25` (the model sees the BM25-style tool) and
/// `strategy: semantic` (the host runs embedding search under the
/// hood).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum ToolSearchStrategy {
    /// In-tree BM25 over the deferred tool corpus. **Default.**
    Bm25,
    /// In-tree regex over the deferred tool corpus (case-insensitive).
    Regex,
    /// Delegated to the host via the `tool_search/query` bridge RPC so
    /// integrators can wire embeddings without Harn depending on ML
    /// crates.
    Semantic,
    /// Pure host-side implementation; the VM just round-trips the query
    /// and promotes whatever names the host returns.
    Host,
}

impl ToolSearchStrategy {
    pub(crate) fn as_short(self) -> &'static str {
        match self {
            ToolSearchStrategy::Bm25 => "bm25",
            ToolSearchStrategy::Regex => "regex",
            ToolSearchStrategy::Semantic => "semantic",
            ToolSearchStrategy::Host => "host",
        }
    }

    /// Whether this strategy runs entirely inside the VM (no bridge
    /// hop). Used by the dispatch path to decide between the sync
    /// in-tree index and the `tool_search/query` RPC.
    #[allow(dead_code)] // consumed by harness tests + future dispatch refactors
    pub(crate) fn is_in_tree(self) -> bool {
        matches!(self, ToolSearchStrategy::Bm25 | ToolSearchStrategy::Regex)
    }

    /// Map to the in-tree strategy enum used by
    /// [`crate::llm::tool_search::run_in_tree`]. Panics on non-in-tree
    /// strategies — callers must gate on `is_in_tree()`.
    pub(crate) fn as_in_tree(self) -> crate::llm::tool_search::InTreeStrategy {
        match self {
            ToolSearchStrategy::Bm25 => crate::llm::tool_search::InTreeStrategy::Bm25,
            ToolSearchStrategy::Regex => crate::llm::tool_search::InTreeStrategy::Regex,
            _ => unreachable!("as_in_tree called on {self:?}"),
        }
    }

    /// Default strategy for a given variant when the user did not
    /// specify one explicitly. Native-facing variant leaks into the
    /// client path as a sensible default: `variant: regex` users
    /// probably want regex semantics in the fallback too.
    pub(crate) fn default_for_variant(variant: ToolSearchVariant) -> Self {
        match variant {
            ToolSearchVariant::Bm25 => ToolSearchStrategy::Bm25,
            ToolSearchVariant::Regex => ToolSearchStrategy::Regex,
        }
    }
}

/// How to resolve `tool_search` against the active provider.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum ToolSearchMode {
    /// Auto-select: native if the provider supports it, client-executed
    /// fallback otherwise (harn#70). Default.
    Auto,
    /// Force the provider's native mechanism; error if unsupported.
    Native,
    /// Force client-executed fallback even when native is available.
    /// Currently errors with a pointer to harn#70 until the fallback lands.
    Client,
}

/// User-facing tool_search configuration. Parsed from the `tool_search`
/// option on `llm_call` / `agent_loop`. Absent means no deferred-loading
/// machinery is engaged — tools ship eagerly as always.
#[derive(Clone, Debug)]
pub(crate) struct ToolSearchConfig {
    pub variant: ToolSearchVariant,
    pub mode: ToolSearchMode,
    /// Tool names that must remain eager even when `defer_loading: true`
    /// is otherwise set on them. Useful for a "safety net" a skill wants
    /// always available regardless of the tool-search index's decisions.
    /// Only consumed by the client-executed path — for the native
    /// Anthropic path, eagerness is already controlled per-tool via
    /// `defer_loading`.
    pub always_loaded: Vec<String>,
    /// Client-mode implementation strategy. When unset, defaults to
    /// `ToolSearchStrategy::default_for_variant(variant)`.
    pub strategy: Option<ToolSearchStrategy>,
    /// Soft cap on how many deferred tools the client-executed loop
    /// may promote into the eager set over the life of this call.
    /// Oldest-promoted tools are evicted when the cap is hit. `None`
    /// means no cap — rely on the `max_results` per search call.
    pub budget_tokens: Option<i64>,
    /// Override for the synthetic tool's name. Default
    /// `__harn_tool_search`. Lets skills with a brand-specific vocabulary
    /// name the tool something the model will understand out of the
    /// box (`find_tool`, `discover_tool`, etc.).
    pub name: Option<String>,
    /// When true, the client-mode loop includes a short stub line for
    /// each deferred tool (name + one-line summary) alongside the
    /// synthetic search tool so the model knows what's available
    /// without calling search first. Default: `false` — the Anthropic
    /// native path also ships no stubs.
    pub include_stub_listing: bool,
    /// Canonical native-shape JSON for every tool that had
    /// `defer_loading: true` at option-parse time, keyed by tool name.
    /// Populated by `apply_tool_search_client_injection` and later
    /// drained by `AgentLoopState::new` when it builds the per-loop
    /// client state. Never populated for native mode — the provider
    /// handles deferral server-side.
    pub deferred_bodies: std::collections::BTreeMap<String, serde_json::Value>,
}

impl ToolSearchConfig {
    /// Default when the user writes `tool_search: true` with no detail.
    pub(crate) fn default_bm25_auto() -> Self {
        Self {
            variant: ToolSearchVariant::Bm25,
            mode: ToolSearchMode::Auto,
            always_loaded: Vec::new(),
            strategy: None,
            budget_tokens: None,
            name: None,
            include_stub_listing: false,
            deferred_bodies: std::collections::BTreeMap::new(),
        }
    }

    /// Resolve the effective strategy, falling back to the variant
    /// default when the user left `strategy` unset.
    pub(crate) fn effective_strategy(&self) -> ToolSearchStrategy {
        self.strategy
            .unwrap_or_else(|| ToolSearchStrategy::default_for_variant(self.variant))
    }

    /// Resolve the synthetic tool's name. Default matches the spec's
    /// proposed `__harn_tool_search` sentinel.
    pub(crate) fn effective_name(&self) -> &str {
        self.name.as_deref().unwrap_or("__harn_tool_search")
    }
}

/// All options for an LLM API call, extracted once from user-facing args.
#[derive(Clone)]
pub(crate) struct LlmCallOptions {
    // --- Routing ---
    pub provider: String,
    pub model: String,
    pub api_key: String,

    // --- Conversation ---
    pub messages: Vec<serde_json::Value>,
    pub system: Option<String>,
    /// Optional short summary string prepended to the system prompt.
    /// Populated by auto-compaction at mid-loop boundaries; callers
    /// typically leave this `None`.
    pub transcript_summary: Option<String>,

    // --- Generation ---
    pub max_tokens: i64,
    pub temperature: Option<f64>,
    pub top_p: Option<f64>,
    pub top_k: Option<i64>,
    pub stop: Option<Vec<String>>,
    pub seed: Option<i64>,
    pub frequency_penalty: Option<f64>,
    pub presence_penalty: Option<f64>,

    // --- Structured output ---
    pub response_format: Option<String>,
    pub json_schema: Option<serde_json::Value>,
    pub output_schema: Option<serde_json::Value>,
    pub output_validation: Option<String>,

    // --- Thinking ---
    pub thinking: Option<ThinkingConfig>,

    // --- Tools ---
    pub tools: Option<VmValue>,
    pub native_tools: Option<Vec<serde_json::Value>>,
    pub tool_choice: Option<serde_json::Value>,
    /// Progressive-disclosure configuration. When set, the options
    /// extractor resolves this against the active provider's capability
    /// matrix and, for native-supporting providers, prepends a
    /// `tool_search_tool_*_20251119` meta-tool to `native_tools`. For
    /// client-executed mode (harn#70) this carries the config forward
    /// into the agent-loop fallback. See [`ToolSearchConfig`].
    #[allow(dead_code)] // consumed by the options extractor; persisted for transcript /
    // replay fidelity and harn#70's client-executed loop
    pub tool_search: Option<ToolSearchConfig>,

    // --- Caching ---
    pub cache: bool,

    // --- Transport ---
    pub timeout: Option<u64>,
    /// Per-chunk idle timeout for streaming responses (seconds).
    pub idle_timeout: Option<u64>,
    /// When true, use streaming SSE transport (token-by-token deltas).
    /// When false, use synchronous request/response. Default: true.
    pub stream: bool,

    // --- Provider-specific overrides ---
    pub provider_overrides: Option<serde_json::Value>,

    // --- Assistant prefill ---
    /// Optional prefill string. When set, providers append a final
    /// `role: "assistant"` message with this content so the model
    /// continues from there. Cleared by the agent loop after each turn.
    /// See `llm::providers::anthropic` and `llm::providers::openai_compat`
    /// for provider-specific plumbing.
    pub prefill: Option<String>,
    /// Optional prompt-structure transform applied immediately before
    /// each provider call.
    pub structural_experiment:
        Option<crate::llm::structural_experiments::StructuralExperimentConfig>,
    /// Metadata for the transform actually applied to this call.
    pub applied_structural_experiment:
        Option<crate::llm::structural_experiments::AppliedStructuralExperiment>,
}

/// Resolve effective request timeout: explicit value > `HARN_LLM_TIMEOUT` env > 120s default.
fn resolve_timeout(explicit: Option<u64>) -> u64 {
    explicit.unwrap_or_else(|| {
        std::env::var("HARN_LLM_TIMEOUT")
            .ok()
            .and_then(|v| v.parse::<u64>().ok())
            .unwrap_or(120)
    })
}

impl LlmCallOptions {
    pub(crate) fn resolve_timeout(&self) -> u64 {
        resolve_timeout(self.timeout)
    }
}

/// Send-safe subset of `LlmCallOptions` used for provider transport.
#[derive(Clone, Debug, serde::Serialize)]
pub(crate) struct LlmRequestPayload {
    pub provider: String,
    pub model: String,
    pub api_key: String,
    pub messages: Vec<serde_json::Value>,
    pub system: Option<String>,
    pub max_tokens: i64,
    pub temperature: Option<f64>,
    pub top_p: Option<f64>,
    pub top_k: Option<i64>,
    pub stop: Option<Vec<String>>,
    pub seed: Option<i64>,
    pub frequency_penalty: Option<f64>,
    pub presence_penalty: Option<f64>,
    pub response_format: Option<String>,
    pub json_schema: Option<serde_json::Value>,
    pub thinking: Option<ThinkingConfig>,
    pub native_tools: Option<Vec<serde_json::Value>>,
    pub tool_choice: Option<serde_json::Value>,
    pub cache: bool,
    pub timeout: Option<u64>,
    pub stream: bool,
    pub provider_overrides: Option<serde_json::Value>,
    pub prefill: Option<String>,
}

impl LlmRequestPayload {
    pub(crate) fn resolve_timeout(&self) -> u64 {
        resolve_timeout(self.timeout)
    }
}

impl From<&LlmCallOptions> for LlmRequestPayload {
    fn from(opts: &LlmCallOptions) -> Self {
        Self {
            provider: opts.provider.clone(),
            model: opts.model.clone(),
            api_key: opts.api_key.clone(),
            messages: opts.messages.clone(),
            system: opts.system.clone(),
            max_tokens: opts.max_tokens,
            temperature: opts.temperature,
            top_p: opts.top_p,
            top_k: opts.top_k,
            stop: opts.stop.clone(),
            seed: opts.seed,
            frequency_penalty: opts.frequency_penalty,
            presence_penalty: opts.presence_penalty,
            response_format: opts.response_format.clone(),
            json_schema: opts.json_schema.clone(),
            thinking: opts.thinking.clone(),
            native_tools: opts.native_tools.clone(),
            tool_choice: opts.tool_choice.clone(),
            cache: opts.cache,
            timeout: opts.timeout,
            stream: opts.stream,
            provider_overrides: opts.provider_overrides.clone(),
            prefill: opts.prefill.clone(),
        }
    }
}

#[cfg(test)]
pub(super) fn base_opts(provider: &str) -> LlmCallOptions {
    use std::rc::Rc;
    LlmCallOptions {
        provider: provider.to_string(),
        model: "test-model".to_string(),
        api_key: String::new(),
        messages: vec![serde_json::json!({"role": "user", "content": "hello"})],
        system: None,
        transcript_summary: Some("summary".to_string()),
        max_tokens: 64,
        temperature: Some(0.2),
        top_p: Some(0.8),
        top_k: Some(40),
        stop: Some(vec!["STOP".to_string()]),
        seed: Some(7),
        frequency_penalty: Some(0.1),
        presence_penalty: Some(0.2),
        response_format: Some("json".to_string()),
        json_schema: Some(serde_json::json!({"type": "object"})),
        output_schema: Some(serde_json::json!({"type": "object"})),
        output_validation: Some("error".to_string()),
        thinking: None,
        tools: Some(VmValue::String(Rc::from("vm-local-tools"))),
        native_tools: Some(vec![
            serde_json::json!({"type": "function", "function": {"name": "tool"}}),
        ]),
        tool_choice: Some(serde_json::json!({
            "type": "function",
            "function": {"name": "tool"}
        })),
        tool_search: None,
        cache: true,
        stream: true,
        timeout: Some(5),
        idle_timeout: None,
        provider_overrides: Some(serde_json::json!({"custom_flag": true})),
        prefill: None,
        structural_experiment: None,
        applied_structural_experiment: None,
    }
}

#[cfg(test)]
mod tests {
    use super::{base_opts, LlmRequestPayload};

    fn assert_send<T: Send>() {}

    #[test]
    fn request_payload_is_send_safe_and_drops_vm_local_fields() {
        let payload = LlmRequestPayload::from(&base_opts("openai"));
        assert_send::<LlmRequestPayload>();
        assert_eq!(payload.provider, "openai");
        assert_eq!(payload.model, "test-model");
        assert!(payload.native_tools.is_some());
        assert!(payload.tool_choice.is_some());
        assert_eq!(
            payload.provider_overrides,
            Some(serde_json::json!({"custom_flag": true}))
        );
    }
}