/** std/llm/handlers — small middleware helpers for LLM call handlers. */
import { cache_get, cache_put } from "std/cache"
var __llm_handler_circuits = []
fn __llm_handler_is_callable(value) -> bool {
let kind = type_of(value)
return kind == "function" || kind == "closure" || kind == "fn"
}
fn __circuit_name_for(call) -> string {
let opts = call?.opts ?? {}
let provider = to_string(opts?.provider ?? "<unset>")
let model = to_string(opts?.model ?? "<unset>")
return "llm:" + provider + ":" + model
}
fn __ensure_llm_handler_circuit(name, threshold, reset_ms) {
if !contains(__llm_handler_circuits, name) {
circuit_breaker(name, threshold, reset_ms)
__llm_handler_circuits = __llm_handler_circuits + [name]
}
}
fn __circuit_open_error(call, name) {
let opts = call?.opts ?? {}
return {
kind: "terminal",
reason: "circuit_open",
category: "circuit_open",
message: "circuit open: " + name,
provider: opts?.provider ?? "<unset>",
model: opts?.model ?? "<unset>",
circuit: name,
}
}
fn __llm_handler_should_skip_cache(prompt, system, opts) -> bool {
let predicate = opts?.skip_when
if predicate == nil {
return opts?.tools != nil
}
if __llm_handler_is_callable(predicate) {
return predicate({prompt: prompt, system: system, options: opts}) ? true : false
}
return predicate ? true : false
}
fn __llm_handler_cache_options(opts) -> dict {
var ttl = opts?.ttl
if ttl == nil && opts?.ttl_seconds == nil && opts?.max_age_seconds == nil {
ttl = "10m"
}
return {
store: opts?.store ?? "llm.with_cache",
backend: opts?.backend,
namespace: opts?.namespace,
name: opts?.name,
path: opts?.path,
cache_dir: opts?.cache_dir,
ttl: ttl,
ttl_seconds: opts?.ttl_seconds,
max_age_seconds: opts?.max_age_seconds,
max_entries: opts?.max_entries ?? 256,
}
}
/**
* Wrap an LLM call handler with circuit-breaker protection.
*
* By default each invocation uses a circuit derived from the call's
* `(opts.provider, opts.model)` pair, so one failing upstream cannot poison
* other models routed through the same wrapper. Pass `name` to intentionally
* share one circuit across calls.
*/
pub fn with_circuit_breaker(handler, options = nil) {
if !__llm_handler_is_callable(handler) {
throw "with_circuit_breaker: handler must be callable"
}
let opts = options ?? {}
let threshold = opts?.threshold ?? 5
let reset_ms = opts?.reset_ms ?? 30000
return fn(call) {
let name = opts?.name ?? __circuit_name_for(call)
__ensure_llm_handler_circuit(name, threshold, reset_ms)
let state = circuit_check(name)
if state == "open" {
throw __circuit_open_error(call, name)
}
let outcome = try {
handler(call)
}
if is_err(outcome) {
circuit_record_failure(name)
throw unwrap_err(outcome)
}
let result = unwrap(outcome)
circuit_record_success(name)
return result
}
}
/** llm_cache_key returns the canonical sha256 cache key used by with_cache. */
pub fn llm_cache_key(prompt, system = nil, options = nil) -> string {
return __llm_cache_key(prompt, system, options ?? {})
}
/**
* with_cache executes llm_call behind a persistent content-addressed cache.
*
* Cache identity is based on prompt, system, provider, model, temperature,
* top_p, and max_tokens. By default calls with tools are not cached.
*/
pub fn with_cache(prompt, system = nil, options = nil) -> dict {
let opts = options ?? {}
if __llm_handler_should_skip_cache(prompt, system, opts) {
return llm_call(prompt, system, opts)
}
let key = llm_cache_key(prompt, system, opts)
let cache_options = __llm_handler_cache_options(opts)
let cached = cache_get(key, cache_options)
if cached.hit {
return cached.value
}
let result = llm_call(prompt, system, opts)
cache_put(key, result, cache_options)
return result
}