harn-stdlib 0.7.62

/** std/llm/handlers — small middleware helpers for LLM call handlers. */
import { cache_get, cache_put } from "std/cache"

var __llm_handler_circuits = []

fn __llm_handler_is_callable(value) -> bool {
  let kind = type_of(value)
  return kind == "function" || kind == "closure" || kind == "fn"
}

fn __circuit_name_for(call) -> string {
  let opts = call?.opts ?? {}
  let provider = to_string(opts?.provider ?? "<unset>")
  let model = to_string(opts?.model ?? "<unset>")
  return "llm:" + provider + ":" + model
}

fn __ensure_llm_handler_circuit(name, threshold, reset_ms) {
  if !contains(__llm_handler_circuits, name) {
    circuit_breaker(name, threshold, reset_ms)
    __llm_handler_circuits = __llm_handler_circuits + [name]
  }
}

fn __circuit_open_error(call, name) {
  let opts = call?.opts ?? {}
  return {
    kind: "terminal",
    reason: "circuit_open",
    category: "circuit_open",
    message: "circuit open: " + name,
    provider: opts?.provider ?? "<unset>",
    model: opts?.model ?? "<unset>",
    circuit: name,
  }
}

fn __llm_handler_should_skip_cache(prompt, system, opts) -> bool {
  let predicate = opts?.skip_when
  if predicate == nil {
    return opts?.tools != nil
  }
  if __llm_handler_is_callable(predicate) {
    return predicate({prompt: prompt, system: system, options: opts}) ? true : false
  }
  return predicate ? true : false
}

fn __llm_handler_cache_options(opts) -> dict {
  var ttl = opts?.ttl
  if ttl == nil && opts?.ttl_seconds == nil && opts?.max_age_seconds == nil {
    ttl = "10m"
  }
  return {
    store: opts?.store ?? "llm.with_cache",
    backend: opts?.backend,
    namespace: opts?.namespace,
    name: opts?.name,
    path: opts?.path,
    cache_dir: opts?.cache_dir,
    ttl: ttl,
    ttl_seconds: opts?.ttl_seconds,
    max_age_seconds: opts?.max_age_seconds,
    max_entries: opts?.max_entries ?? 256,
  }
}

/**
 * Wrap an LLM call handler with circuit-breaker protection.
 *
 * By default each invocation uses a circuit derived from the call's
 * `(opts.provider, opts.model)` pair, so one failing upstream cannot poison
 * other models routed through the same wrapper. Pass `name` to intentionally
 * share one circuit across calls.
 */
pub fn with_circuit_breaker(handler, options = nil) {
  if !__llm_handler_is_callable(handler) {
    throw "with_circuit_breaker: handler must be callable"
  }
  let opts = options ?? {}
  let threshold = opts?.threshold ?? 5
  let reset_ms = opts?.reset_ms ?? 30000
  return fn(call) {
    let name = opts?.name ?? __circuit_name_for(call)
    __ensure_llm_handler_circuit(name, threshold, reset_ms)
    let state = circuit_check(name)
    if state == "open" {
      throw __circuit_open_error(call, name)
    }
    let outcome = try {
      handler(call)
    }
    if is_err(outcome) {
      circuit_record_failure(name)
      throw unwrap_err(outcome)
    }
    let result = unwrap(outcome)
    circuit_record_success(name)
    return result
  }
}

/** llm_cache_key returns the canonical sha256 cache key used by with_cache. */
pub fn llm_cache_key(prompt, system = nil, options = nil) -> string {
  return __llm_cache_key(prompt, system, options ?? {})
}

/**
 * with_cache executes llm_call behind a persistent content-addressed cache.
 *
 * Cache identity is based on prompt, system, provider, model, temperature,
 * top_p, and max_tokens. By default calls with tools are not cached.
 */
pub fn with_cache(prompt, system = nil, options = nil) -> dict {
  let opts = options ?? {}
  if __llm_handler_should_skip_cache(prompt, system, opts) {
    return llm_call(prompt, system, opts)
  }
  let key = llm_cache_key(prompt, system, opts)
  let cache_options = __llm_handler_cache_options(opts)
  let cached = cache_get(key, cache_options)
  if cached.hit {
    return cached.value
  }
  let result = llm_call(prompt, system, opts)
  cache_put(key, result, cache_options)
  return result
}