Skip to main content

axon/backends/
mod.rs

1//! Native Rust LLM backends — Fase 24.
2//!
3//! Per-provider async backends consumed by the algebraic-effects
4//! runtime (Fase 23.f) and, in upcoming Fase 25+, the general flow
5//! executor. Each provider lives in its own submodule:
6//!
7//!   * [`anthropic`]   — Claude Messages API (Fase 24.c)
8//!   * [`openai`]      — GPT chat/completions (Fase 24.d)
9//!   * [`gemini`]      — Google generateContent (Fase 24.e)
10//!   * [`kimi`]        — Moonshot K2.x (Fase 24.f, locked params)
11//!   * [`glm`]         — Zhipu GLM-4.x (Fase 24.g)
12//!   * [`ollama`]      — local LLMs via REST (Fase 24.h)
13//!   * [`openrouter`]  — multi-provider gateway (Fase 24.i)
14//!
15//! Shared infrastructure ships in 24.b alongside the trait + Registry:
16//!
17//!   * [`error`]            — typed transport errors named per failure mode
18//!   * [`retry`]            — retry policy + `Retry-After` parsing
19//!   * [`observability`]    — tracing span helpers per call lifecycle
20//!   * [`locked_model`]     — locked-parameter dispatch (Kimi K2.x / o1 / o3)
21//!   * [`tokens`]           — unified `count_tokens` dispatch by model prefix
22//!
23//! Adopter usage (post-24.k):
24//!
25//! ```ignore
26//! use axon::backends::{Registry, ChatRequest, Message, Role};
27//!
28//! let registry = Registry::production();
29//! let backend = registry.get("anthropic").expect("anthropic registered");
30//!
31//! let req = ChatRequest {
32//!     model: "claude-sonnet-4-5".into(),
33//!     messages: vec![Message::user("Hello!")],
34//!     ..Default::default()
35//! };
36//! let response = backend.complete(req).await?;
37//! println!("{}", response.content);
38//! ```
39//!
40//! # Architecture decisions (see docs/fase/fase_24_native_rust_backends.md)
41//!
42//! * **D1** — `async_trait` over native async-fn-in-trait so `dyn Backend`
43//!   stays object-safe (Registry uses `HashMap<String, Box<dyn Backend>>`).
44//! * **D6** — the legacy [`crate::backend`] module stays in place during
45//!   24.b–24.i to avoid touching 200+ call sites; in 24.j it becomes a
46//!   thin re-export shim that delegates here.
47//! * **D7** — Python `axon/backends/*.py` is untouched; flows running on
48//!   the Python runtime keep using it.
49
50#![allow(dead_code)]
51
52use std::collections::HashMap;
53use std::pin::Pin;
54
55use async_trait::async_trait;
56use futures::Stream;
57
58pub mod anthropic;
59pub mod error;
60pub mod gemini;
61pub mod glm;
62pub mod kimi;
63pub mod locked_model;
64pub mod observability;
65pub mod ollama;
66pub mod openai;
67pub mod openai_compat;
68pub mod openrouter;
69pub mod retry;
70pub mod sse_streaming;
71/// §Fase 33.x.b — `StubBackend` implementing the [`Backend`] trait so
72/// the production async streaming path resolves "stub" through the
73/// uniform [`Registry`] surface (no special-cased branches in the
74/// runtime). Excluded from the Fase 24.j cross-stack drift gate
75/// SHARED_INFRA_MODULES because it is not a real provider.
76pub mod stub;
77pub mod tokens;
78pub(crate) mod transport;
79
80pub use anthropic::AnthropicBackend;
81pub use error::{categorise_http, BackendError};
82pub use gemini::GeminiBackend;
83pub use glm::GLMBackend;
84pub use kimi::KimiBackend;
85pub use ollama::OllamaBackend;
86pub use openai::OpenAIBackend;
87pub use openai_compat::{OpenAICompatConfig, OpenAICompatibleBackend};
88pub use openrouter::OpenRouterBackend;
89pub use stub::{StubBackend, STUB_CONTENT, STUB_DEFAULT_MODEL, STUB_PROVIDER_NAME};
90
91// ────────────────────────────────────────────────────────────────────
92//  Request / Response types — the wire shape every backend speaks
93// ────────────────────────────────────────────────────────────────────
94
95/// Role of a message in a chat conversation.
96///
97/// Mirrors the OpenAI ChatML enumeration with one provider-neutral
98/// addition (`Tool`) used for tool-call result messages. Per-provider
99/// adapters translate this enum to the wire encoding that provider
100/// expects (e.g. Anthropic's `system` becomes a top-level field, not a
101/// message; Gemini uses `user`/`model`/`function`).
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub enum Role {
104    System,
105    User,
106    Assistant,
107    Tool,
108}
109
110impl Role {
111    pub fn as_str(&self) -> &'static str {
112        match self {
113            Self::System => "system",
114            Self::User => "user",
115            Self::Assistant => "assistant",
116            Self::Tool => "tool",
117        }
118    }
119}
120
121/// One chat message in a conversation.
122#[derive(Debug, Clone, PartialEq, Eq)]
123pub struct Message {
124    pub role: Role,
125    pub content: String,
126    /// Optional tool-call identifier when role == Tool. Per-provider
127    /// adapters thread this back to the correct tool call ID.
128    pub tool_call_id: Option<String>,
129}
130
131impl Message {
132    pub fn user(content: impl Into<String>) -> Self {
133        Self { role: Role::User, content: content.into(), tool_call_id: None }
134    }
135    pub fn assistant(content: impl Into<String>) -> Self {
136        Self { role: Role::Assistant, content: content.into(), tool_call_id: None }
137    }
138    pub fn system(content: impl Into<String>) -> Self {
139        Self { role: Role::System, content: content.into(), tool_call_id: None }
140    }
141}
142
143/// A tool the model may invoke during the response.
144///
145/// `parameters_json` is the JSON Schema describing the parameter shape;
146/// each provider serialises it with its own envelope.
147#[derive(Debug, Clone, PartialEq)]
148pub struct ToolSpec {
149    pub name: String,
150    pub description: String,
151    pub parameters_json: String,
152}
153
154/// Provider-feature discovery enum — used by [`Backend::supports`] so
155/// adopters can ask "does this backend support X for this model?"
156/// without parsing model strings themselves.
157#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
158pub enum Capability {
159    Streaming,
160    ToolUse,
161    Vision,
162    /// Anthropic prompt caching (cache_control breakpoints).
163    PromptCaching,
164    /// Gemini safetySettings on the request body.
165    SafetySettings,
166    /// OpenAI structured outputs (response_format=json_schema).
167    StructuredOutput,
168    /// Provider hard-codes sampling parameters (Kimi K2.x, o1, o3).
169    LockedParams,
170}
171
172/// One canonical chat request — provider-neutral. Per-provider adapters
173/// translate to the wire JSON the provider expects.
174#[derive(Debug, Clone, Default)]
175pub struct ChatRequest {
176    /// Empty string → backend uses its `default_model()`.
177    pub model: String,
178    pub messages: Vec<Message>,
179    /// System prompt — Anthropic puts it in a top-level field; OpenAI &
180    /// compats prepend a system message to the messages array.
181    pub system: Option<String>,
182    pub max_tokens: Option<u32>,
183    /// Temperature; ignored when the resolved model is locked-params.
184    pub temperature: Option<f64>,
185    pub top_p: Option<f64>,
186    pub tools: Vec<ToolSpec>,
187    /// `false` → call `complete()`. `true` → call `stream()` and consume
188    /// the chunk stream incrementally.
189    pub stream: bool,
190    /// Trace ID propagated from the calling flow step. Surfaces in
191    /// tracing spans so log lines correlate.
192    pub trace_id: Option<String>,
193    /// §Fase 33.x.e — Cancellation flag observed INSIDE the reqwest
194    /// body. Each per-provider `Backend::stream()` impl wraps its
195    /// returned chunk stream with `sse_streaming::cancel_aware`
196    /// so the next-chunk poll races a `cancel.cancelled()` future
197    /// against the upstream HTTP body — when the flag fires the
198    /// stream returns `None` within ≤100ms p95 + the dropped
199    /// reqwest Response aborts the upstream HTTP request body.
200    ///
201    /// `Default` is an uncancelled flag; adopters that don't supply
202    /// one get pre-33.x.e semantics (the stream runs to completion).
203    /// Cloning is cheap (`Arc`-backed inside).
204    pub cancel: crate::cancel_token::CancellationFlag,
205}
206
207/// How the model decided to stop generating.
208///
209/// Maps the provider-specific finish-reason strings to a closed enum
210/// callers can `match` on. Unmapped values land in `Other(s)` so the
211/// raw string is still recoverable for diagnostics.
212#[derive(Debug, Clone, PartialEq, Eq)]
213pub enum FinishReason {
214    /// Natural end of generation (Anthropic `end_turn`, OpenAI `stop`,
215    /// Gemini `STOP`).
216    Stop,
217    /// Hit `max_tokens` budget (Anthropic `max_tokens`, OpenAI `length`,
218    /// Gemini `MAX_TOKENS`).
219    Length,
220    /// Model invoked a tool (Anthropic `tool_use`, OpenAI `tool_calls`).
221    ToolUse,
222    /// Provider's content filter blocked output (OpenAI `content_filter`,
223    /// Gemini `SAFETY`, Anthropic empty + `end_turn`).
224    SafetyBreach,
225    /// Anything else; carries the raw provider string.
226    Other(String),
227}
228
229impl FinishReason {
230    /// Map a raw provider string into the enum.
231    pub fn from_provider(provider: &str, raw: &str) -> Self {
232        let lc = raw.to_ascii_lowercase();
233        match (provider, lc.as_str()) {
234            ("anthropic", "end_turn") => Self::Stop,
235            ("anthropic", "max_tokens") => Self::Length,
236            ("anthropic", "tool_use") => Self::ToolUse,
237            ("anthropic", "stop_sequence") => Self::Stop,
238            (_, "stop") => Self::Stop,
239            (_, "length") => Self::Length,
240            (_, "tool_calls") | (_, "function_call") => Self::ToolUse,
241            (_, "content_filter") => Self::SafetyBreach,
242            // Gemini uses upper-case slugs.
243            (_, "max_tokens") => Self::Length,
244            (_, "safety") => Self::SafetyBreach,
245            (_, "") => Self::Other(String::new()),
246            _ => Self::Other(raw.to_string()),
247        }
248    }
249
250    /// True iff this finish reason is the provider's safety classifier
251    /// blocking output. Used by `BackendError::SafetyBreach` lifting.
252    pub fn is_safety_breach(&self) -> bool {
253        matches!(self, Self::SafetyBreach)
254    }
255}
256
257/// Token-usage breakdown returned by the provider. Field naming is
258/// canonical (input/output/total); per-provider deltas (cache reads on
259/// Anthropic, reasoning tokens on o1/o3) live in dedicated fields so
260/// aggregating dashboards across providers stay coherent.
261#[derive(Debug, Clone, Default, PartialEq, Eq)]
262pub struct Usage {
263    pub input_tokens: u32,
264    pub output_tokens: u32,
265    pub total_tokens: u32,
266    /// Anthropic prompt-cache hit (`cache_read_input_tokens`).
267    pub cache_read_tokens: u32,
268    /// Anthropic prompt-cache write (`cache_creation_input_tokens`).
269    pub cache_creation_tokens: u32,
270    /// OpenAI o1/o3 reasoning-token allocation (`reasoning_tokens`).
271    pub reasoning_tokens: u32,
272}
273
274/// A complete chat response from a non-streaming `complete()` call.
275#[derive(Debug, Clone)]
276pub struct ChatResponse {
277    pub content: String,
278    /// Resolved model slug — what the provider actually returned the
279    /// response from (may differ from request when an alias was sent).
280    pub model_name: String,
281    /// Provider name (`"anthropic"`, `"openai"`, etc.).
282    pub provider_name: String,
283    pub finish_reason: FinishReason,
284    pub usage: Usage,
285    /// Number of retries that fired before this success. 0 on a clean
286    /// first-attempt response.
287    pub retry_count: u32,
288    /// Trace ID echoed back from the request (or auto-generated if the
289    /// request omitted one).
290    pub trace_id: String,
291}
292
293/// One delta in a streaming response.
294///
295/// `delta` is the incremental text fragment for this chunk. `finish_reason`
296/// + `usage` are populated only on the final chunk so consumers can
297/// compute totals without keeping a running tally.
298#[derive(Debug, Clone, Default)]
299pub struct ChatChunk {
300    pub delta: String,
301    pub finish_reason: Option<FinishReason>,
302    pub usage: Option<Usage>,
303}
304
305/// Pinned, boxed stream alias — the concrete return type of
306/// [`Backend::stream`]. Adopters consume via `futures::StreamExt`.
307pub type ChatStream =
308    Pin<Box<dyn Stream<Item = Result<ChatChunk, BackendError>> + Send>>;
309
310// ────────────────────────────────────────────────────────────────────
311//  Backend trait — the per-provider contract
312// ────────────────────────────────────────────────────────────────────
313
314/// One LLM provider's native Rust client.
315///
316/// Implementors live in `axon-rs/src/backends/<provider>.rs` and are
317/// registered into [`Registry`] at process startup. The trait is
318/// object-safe (D1 — uses `async_trait`) so registries can hold
319/// `Box<dyn Backend>` for runtime dispatch by name.
320#[async_trait]
321pub trait Backend: Send + Sync {
322    /// Short provider name used as the registry key.
323    /// E.g. `"anthropic"`, `"openai"`, `"kimi"`.
324    fn name(&self) -> &str;
325
326    /// Default model used when [`ChatRequest::model`] is empty.
327    fn default_model(&self) -> &str;
328
329    /// Synchronous-result chat completion (non-streaming).
330    async fn complete(&self, request: ChatRequest) -> Result<ChatResponse, BackendError>;
331
332    /// Streaming chat completion. Adopter consumes the returned stream;
333    /// per-chunk text arrives in `ChatChunk::delta`, finish reason +
334    /// usage in the final chunk.
335    async fn stream(&self, request: ChatRequest) -> Result<ChatStream, BackendError>;
336
337    /// Best-effort token count for `text` against a specific model on
338    /// this provider. Default impl delegates to the unified
339    /// [`tokens::count_tokens`] dispatch; per-provider overrides may
340    /// consult the provider's HTTP `count_tokens` endpoint when an
341    /// exact answer is required + a network round-trip is acceptable.
342    fn count_tokens(&self, model: &str, text: &str) -> usize {
343        tokens::count_tokens(model, text).count
344    }
345
346    /// Capability discovery — does this backend support `capability`
347    /// for the given model? Default returns `false` for everything;
348    /// per-provider impls override.
349    #[allow(unused_variables)]
350    fn supports(&self, capability: Capability, model: &str) -> bool {
351        false
352    }
353}
354
355// ────────────────────────────────────────────────────────────────────
356//  Registry — string-keyed dispatch by provider name
357// ────────────────────────────────────────────────────────────────────
358
359/// Process-wide registry of registered backends.
360///
361/// Backends are registered by their canonical short name (the same
362/// string the Python `BACKEND_REGISTRY` uses — verified by the
363/// Fase 24.j drift gate). Lookup is `O(1)` HashMap.
364pub struct Registry {
365    backends: HashMap<String, Box<dyn Backend>>,
366}
367
368impl Registry {
369    /// Empty registry — useful for tests that want to register only
370    /// stub backends.
371    pub fn empty() -> Self {
372        Self { backends: HashMap::new() }
373    }
374
375    /// Production registry — populated with all 7 native backends.
376    ///
377    /// Every backend is constructed via its `from_env()` factory — i.e.
378    /// API keys are read at registry-construction time from the
379    /// per-provider env vars (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`,
380    /// `GEMINI_API_KEY`, `KIMI_API_KEY`, `GLM_API_KEY`, `OPENROUTER_API_KEY`,
381    /// plus `OLLAMA_HOST` / `OLLAMA_API_KEY` for the local daemon).
382    /// Backends whose env var is missing still construct successfully;
383    /// the auth check fires on the first `complete()` call instead.
384    ///
385    /// The registry's `provider_names()` returns the sorted list of all
386    /// 7 keys: `["anthropic", "gemini", "glm", "kimi", "ollama",
387    /// "openai", "openrouter"]`. The Fase 24.j drift gate
388    /// (`tests/test_fase24_backend_parity.py`) asserts this set
389    /// matches Python's `BACKEND_REGISTRY` keys exactly.
390    pub fn production() -> Self {
391        let mut registry = Self::empty();
392        registry.register(Box::new(anthropic::AnthropicBackend::from_env()));
393        registry.register(Box::new(gemini::GeminiBackend::from_env()));
394        registry.register(Box::new(glm::GLMBackend::from_env()));
395        registry.register(Box::new(kimi::KimiBackend::from_env()));
396        registry.register(Box::new(ollama::OllamaBackend::from_env()));
397        registry.register(Box::new(openai::OpenAIBackend::from_env()));
398        registry.register(Box::new(openrouter::OpenRouterBackend::from_env()));
399        registry
400    }
401
402    /// §Fase 33.x.b — Production registry PLUS the `stub` backend.
403    ///
404    /// Used by the server streaming path so dispatch through the
405    /// uniform `Registry` surface includes the stub. The 7 canonical
406    /// production backends are unchanged; `stub` is added as an 8th
407    /// entry. The Fase 24.j cross-stack drift gate continues to pin
408    /// the 7 canonical entries exactly via filesystem enumeration of
409    /// `axon-rs/src/backends/*.rs` minus the `SHARED_INFRA_MODULES`
410    /// set (which includes `stub`).
411    ///
412    /// Adopters who call [`Registry::production()`] directly do not
413    /// see the stub — it surfaces only on the streaming-path
414    /// dispatcher, where its content matches the legacy synchronous
415    /// stub-mode output byte-for-byte (D4 wire byte-compat).
416    pub fn production_with_stub() -> Self {
417        let mut registry = Self::production();
418        registry.register(Box::new(stub::StubBackend::new()));
419        registry
420    }
421}
422
423/// §Fase 33.x.b — Owned-backend resolver for the streaming dispatch
424/// path.
425///
426/// Returns `Some(Box<dyn Backend>)` for the 7 canonical production
427/// providers plus `"stub"`. Returns `None` for any other name (e.g.
428/// `"auto"` after upstream resolution failed, or an unknown name
429/// the adopter supplied).
430///
431/// The dispatch set MUST match [`Registry::production_with_stub`]
432/// exactly — adding a backend here without adding it to the
433/// registry (or vice versa) is caught by the
434/// `resolve_streaming_backend_dispatch_set_matches_production_with_stub`
435/// drift test below.
436///
437/// Each lookup constructs a fresh backend via `from_env()` so the
438/// returned `Box` owns its own reqwest client + retry policy.
439/// Async tasks own their backend for the duration of one flow
440/// (the trait is `Send + Sync` but not `Clone`, so per-task
441/// ownership keeps the dispatch path simple).
442pub fn resolve_streaming_backend(name: &str) -> Option<Box<dyn Backend>> {
443    match name {
444        "anthropic" => Some(Box::new(anthropic::AnthropicBackend::from_env())),
445        "openai" => Some(Box::new(openai::OpenAIBackend::from_env())),
446        "gemini" => Some(Box::new(gemini::GeminiBackend::from_env())),
447        "kimi" => Some(Box::new(kimi::KimiBackend::from_env())),
448        "glm" => Some(Box::new(glm::GLMBackend::from_env())),
449        "ollama" => Some(Box::new(ollama::OllamaBackend::from_env())),
450        "openrouter" => Some(Box::new(openrouter::OpenRouterBackend::from_env())),
451        "stub" => Some(Box::new(stub::StubBackend::new())),
452        _ => None,
453    }
454}
455
456/// Names recognised by [`resolve_streaming_backend`]. Sorted.
457/// Pinned by the drift test below.
458pub const STREAMING_BACKEND_NAMES: &[&str] = &[
459    "anthropic",
460    "gemini",
461    "glm",
462    "kimi",
463    "ollama",
464    "openai",
465    "openrouter",
466    "stub",
467];
468
469/// §Fase 33.x.i — Canonical 7-provider set surfaced to adopters.
470///
471/// Identical to [`STREAMING_BACKEND_NAMES`] minus `"stub"` (which is
472/// a test/internal backend, not an adopter-facing provider). This is
473/// the SINGLE SOURCE OF TRUTH for "which providers does axon
474/// support natively"; the legacy
475/// [`crate::backend::SUPPORTED_BACKENDS`] mono-file constant is now
476/// a `pub use` re-export of this.
477///
478/// Drift-gated by `resolver_tests::canonical_providers_equals_legacy_supported`
479/// (asserts byte-equality with the legacy constant) and
480/// `tests/fase33x_i_mono_file_retirement.rs` (asserts the same plus
481/// the count + canonical-vs-stub-removed invariant).
482pub const CANONICAL_PROVIDERS: &[&str] = &[
483    "anthropic",
484    "gemini",
485    "glm",
486    "kimi",
487    "ollama",
488    "openai",
489    "openrouter",
490];
491
492/// §Fase 36.c — Canonical providers with a usable API key present in
493/// the environment, in `CANONICAL_PROVIDERS` priority order.
494///
495/// Feeds the `env_available` rung of the Backend Resolution Contract
496/// (D1 / D6): when the operator-tuned `backend_registry` is empty,
497/// `auto` resolution picks the first canonical provider whose
498/// `<PROVIDER>_API_KEY` is set — so a server started with one provider
499/// key "just works" without a `PUT /v1/backends` registration dance.
500///
501/// `ollama` (whose key is optional for the local daemon) is included
502/// ONLY when `OLLAMA_API_KEY` is explicitly set to a non-empty value —
503/// a local-ollama deployment declares `backend: ollama` explicitly (or
504/// sets the var) rather than the resolver probing a network port.
505/// `stub` is not a canonical provider, so it is never returned here.
506pub fn env_available_backends() -> Vec<String> {
507    CANONICAL_PROVIDERS
508        .iter()
509        .filter(|p| get_api_key(p).map(|k| !k.is_empty()).unwrap_or(false))
510        .map(|p| p.to_string())
511        .collect()
512}
513
514/// §Fase 33.x.i — Canonical API-key env-var resolution.
515///
516/// Same semantics as the legacy `crate::backend::get_api_key`:
517///   - For known providers, reads `<PROVIDER>_API_KEY` from the
518///     environment.
519///   - For `"ollama"`, missing key is permitted (local daemon).
520///   - For all other providers, returns an error with adopter-
521///     actionable hint when the env var is unset.
522///   - For unknown provider names, returns an error listing
523///     [`CANONICAL_PROVIDERS`].
524///
525/// This is the SINGLE SOURCE OF TRUTH for the legacy
526/// `crate::backend::get_api_key` shim. The legacy shim wraps this
527/// with the legacy `crate::backend::BackendError` struct shape;
528/// callers using the trait `Backend` surface read keys via their
529/// per-provider `from_env` factory instead.
530pub fn get_api_key(provider: &str) -> Result<String, String> {
531    let env_var = match provider {
532        "anthropic" => "ANTHROPIC_API_KEY",
533        "openai" => "OPENAI_API_KEY",
534        "gemini" => "GEMINI_API_KEY",
535        "kimi" => "KIMI_API_KEY",
536        "glm" => "GLM_API_KEY",
537        "openrouter" => "OPENROUTER_API_KEY",
538        "ollama" => "OLLAMA_API_KEY", // local: missing key permitted
539        _ => {
540            return Err(format!(
541                "Unknown backend '{provider}'. Supported: {}",
542                CANONICAL_PROVIDERS.join(", ")
543            ));
544        }
545    };
546    if provider == "ollama" {
547        return Ok(std::env::var(env_var).unwrap_or_default());
548    }
549    std::env::var(env_var).map_err(|_| {
550        format!(
551            "{env_var} not set. Required for backend '{provider}'.\n\
552             hint: export {env_var}=<your-api-key>"
553        )
554    })
555}
556
557#[cfg(test)]
558mod resolver_tests {
559    use super::*;
560
561    #[test]
562    fn resolve_streaming_backend_returns_none_for_unknown_name() {
563        assert!(resolve_streaming_backend("does-not-exist").is_none());
564        assert!(resolve_streaming_backend("").is_none());
565        assert!(resolve_streaming_backend("auto").is_none());
566    }
567
568    #[test]
569    fn resolve_streaming_backend_returns_some_for_each_streaming_name() {
570        for name in STREAMING_BACKEND_NAMES {
571            let backend = resolve_streaming_backend(name)
572                .unwrap_or_else(|| panic!("resolver should return Some for {name:?}"));
573            assert_eq!(backend.name(), *name);
574        }
575    }
576
577    #[test]
578    fn resolve_streaming_backend_dispatch_set_matches_production_with_stub() {
579        let registry = Registry::production_with_stub();
580        let registry_names = registry.provider_names();
581        let mut resolver_names: Vec<String> =
582            STREAMING_BACKEND_NAMES.iter().map(|s| s.to_string()).collect();
583        resolver_names.sort();
584        assert_eq!(
585            registry_names, resolver_names,
586            "resolve_streaming_backend() and Registry::production_with_stub() \
587             must dispatch the same set of backends — drift here breaks the \
588             D1 contract that Backend::stream() is the only production path \
589             for Stream<T>"
590        );
591    }
592
593    #[test]
594    fn streaming_backend_names_pins_eight_entries() {
595        // 7 canonical providers + stub. Adding a ninth requires
596        // updating both the resolver match and the
597        // `Registry::production_with_stub()` constructor — and
598        // re-running the drift test above.
599        assert_eq!(STREAMING_BACKEND_NAMES.len(), 8);
600    }
601
602    #[test]
603    fn streaming_backend_names_are_sorted() {
604        let mut sorted = STREAMING_BACKEND_NAMES.to_vec();
605        sorted.sort();
606        assert_eq!(sorted.as_slice(), STREAMING_BACKEND_NAMES);
607    }
608
609    #[test]
610    fn canonical_providers_equals_legacy_supported_backends() {
611        // §Fase 33.x.i drift gate: the new
612        // `crate::backends::CANONICAL_PROVIDERS` (consolidated
613        // single source of truth) MUST equal the legacy
614        // `crate::backend::SUPPORTED_BACKENDS` byte-for-byte. The
615        // legacy constant is now a `pub use` re-export of the new
616        // one, so byte-equality is by-construction; this test pins
617        // the invariant + catches accidental drift if someone
618        // re-introduces a separate const.
619        assert_eq!(
620            CANONICAL_PROVIDERS,
621            crate::backend::SUPPORTED_BACKENDS,
622            "33.x.i drift: canonical providers must equal legacy SUPPORTED_BACKENDS"
623        );
624    }
625
626    #[test]
627    fn canonical_providers_is_streaming_minus_stub() {
628        // §Fase 33.x.i invariant: the canonical 7-provider set
629        // equals the 8-entry streaming dispatch set with `stub`
630        // removed. Drift here surfaces if a new provider is added
631        // to one set but not the other.
632        let mut canonical_sorted: Vec<&str> = CANONICAL_PROVIDERS.to_vec();
633        canonical_sorted.sort();
634        let streaming_without_stub: Vec<&str> = STREAMING_BACKEND_NAMES
635            .iter()
636            .copied()
637            .filter(|n| *n != "stub")
638            .collect();
639        assert_eq!(canonical_sorted, streaming_without_stub);
640    }
641
642    #[test]
643    fn get_api_key_unknown_provider_returns_error() {
644        let err = get_api_key("does-not-exist").unwrap_err();
645        assert!(err.contains("Unknown backend"));
646        assert!(err.contains("Supported:"));
647    }
648
649    #[test]
650    fn get_api_key_ollama_permits_missing_key() {
651        // Ollama is a local daemon — missing key is allowed.
652        // Save+restore to avoid test-isolation issues.
653        let prev = std::env::var("OLLAMA_API_KEY").ok();
654        std::env::remove_var("OLLAMA_API_KEY");
655        let result = get_api_key("ollama");
656        assert!(
657            result.is_ok(),
658            "ollama MUST permit missing API key for local daemon"
659        );
660        assert_eq!(result.unwrap(), "");
661        if let Some(v) = prev {
662            std::env::set_var("OLLAMA_API_KEY", v);
663        }
664    }
665
666    #[tokio::test]
667    async fn resolved_stub_streams_one_canonical_chunk() {
668        let backend = resolve_streaming_backend("stub").expect("stub resolves");
669        let req = ChatRequest::default();
670        let mut stream = backend.stream(req).await.expect("stub streams");
671        use futures::StreamExt;
672        let chunk = stream.next().await.expect("one chunk").expect("ok");
673        assert_eq!(chunk.delta, stub::STUB_CONTENT);
674        assert!(stream.next().await.is_none(), "single-chunk semantics");
675    }
676}
677
678impl Registry {
679    /// Internal marker reserved for future expansion of the
680    /// streaming-resolver dispatch surface. Currently a no-op; kept
681    /// as a public-crate anchor so future Fase 33.x sub-fases can
682    /// extend the dispatch table without re-opening the parent impl
683    /// block. Untyped const is a zero-cost marker in monomorphisation.
684    #[doc(hidden)]
685    pub(crate) const __FASE_33X_B_RESOLVER_BOUNDARY: () = ();
686
687    /// Register `backend` under the key `backend.name()`. Replaces any
688    /// existing entry with the same name (last-write-wins).
689    pub fn register(&mut self, backend: Box<dyn Backend>) {
690        self.backends.insert(backend.name().to_string(), backend);
691    }
692
693    /// Look up a backend by name. Returns `None` if not registered.
694    pub fn get(&self, name: &str) -> Option<&dyn Backend> {
695        self.backends.get(name).map(|b| b.as_ref())
696    }
697
698    /// All registered provider names, sorted alphabetically. Used by
699    /// the cross-stack drift gate (Fase 24.j) to verify the Rust set
700    /// equals the Python `BACKEND_REGISTRY` set.
701    pub fn provider_names(&self) -> Vec<String> {
702        let mut names: Vec<String> = self.backends.keys().cloned().collect();
703        names.sort();
704        names
705    }
706
707    pub fn len(&self) -> usize {
708        self.backends.len()
709    }
710
711    pub fn is_empty(&self) -> bool {
712        self.backends.is_empty()
713    }
714}
715
716impl Default for Registry {
717    fn default() -> Self {
718        Self::production()
719    }
720}
721
722// ────────────────────────────────────────────────────────────────────
723//  Tests — trait + types + Registry
724// ────────────────────────────────────────────────────────────────────
725
726#[cfg(test)]
727mod tests {
728    use super::*;
729    use futures::StreamExt;
730
731    /// Test-only stub that lets us exercise the Registry + trait without
732    /// hitting a real provider.
733    struct StubBackend {
734        name: String,
735    }
736
737    #[async_trait]
738    impl Backend for StubBackend {
739        fn name(&self) -> &str {
740            &self.name
741        }
742        fn default_model(&self) -> &str {
743            "stub-model"
744        }
745        async fn complete(
746            &self,
747            _request: ChatRequest,
748        ) -> Result<ChatResponse, BackendError> {
749            Ok(ChatResponse {
750                content: "stubbed".into(),
751                model_name: "stub-model".into(),
752                provider_name: self.name.clone(),
753                finish_reason: FinishReason::Stop,
754                usage: Usage::default(),
755                retry_count: 0,
756                trace_id: "stub".into(),
757            })
758        }
759        async fn stream(
760            &self,
761            _request: ChatRequest,
762        ) -> Result<ChatStream, BackendError> {
763            let chunks = vec![
764                Ok(ChatChunk { delta: "hi ".into(), ..Default::default() }),
765                Ok(ChatChunk {
766                    delta: "world".into(),
767                    finish_reason: Some(FinishReason::Stop),
768                    usage: Some(Usage { input_tokens: 1, output_tokens: 2, total_tokens: 3, ..Default::default() }),
769                }),
770            ];
771            Ok(Box::pin(futures::stream::iter(chunks)))
772        }
773        fn supports(&self, capability: Capability, _model: &str) -> bool {
774            matches!(capability, Capability::Streaming)
775        }
776    }
777
778    fn stub(name: &str) -> Box<dyn Backend> {
779        Box::new(StubBackend { name: name.to_string() })
780    }
781
782    #[test]
783    fn role_round_trips_via_as_str() {
784        for r in [Role::System, Role::User, Role::Assistant, Role::Tool] {
785            assert!(!r.as_str().is_empty());
786        }
787        assert_eq!(Role::User.as_str(), "user");
788    }
789
790    #[test]
791    fn message_helpers_set_role() {
792        assert_eq!(Message::user("a").role, Role::User);
793        assert_eq!(Message::assistant("b").role, Role::Assistant);
794        assert_eq!(Message::system("c").role, Role::System);
795    }
796
797    #[test]
798    fn chat_request_default_is_empty() {
799        let r = ChatRequest::default();
800        assert!(r.model.is_empty());
801        assert!(r.messages.is_empty());
802        assert!(r.tools.is_empty());
803        assert!(!r.stream);
804    }
805
806    #[test]
807    fn finish_reason_anthropic_mapping() {
808        assert_eq!(FinishReason::from_provider("anthropic", "end_turn"), FinishReason::Stop);
809        assert_eq!(FinishReason::from_provider("anthropic", "max_tokens"), FinishReason::Length);
810        assert_eq!(FinishReason::from_provider("anthropic", "tool_use"), FinishReason::ToolUse);
811        assert_eq!(FinishReason::from_provider("anthropic", "stop_sequence"), FinishReason::Stop);
812    }
813
814    #[test]
815    fn finish_reason_openai_mapping() {
816        assert_eq!(FinishReason::from_provider("openai", "stop"), FinishReason::Stop);
817        assert_eq!(FinishReason::from_provider("openai", "length"), FinishReason::Length);
818        assert_eq!(FinishReason::from_provider("openai", "tool_calls"), FinishReason::ToolUse);
819        assert_eq!(FinishReason::from_provider("openai", "content_filter"), FinishReason::SafetyBreach);
820    }
821
822    #[test]
823    fn finish_reason_gemini_mapping_uppercase() {
824        // Gemini emits SAFETY / MAX_TOKENS / STOP — case-folded.
825        assert_eq!(FinishReason::from_provider("gemini", "STOP"), FinishReason::Stop);
826        assert_eq!(FinishReason::from_provider("gemini", "MAX_TOKENS"), FinishReason::Length);
827        assert_eq!(FinishReason::from_provider("gemini", "SAFETY"), FinishReason::SafetyBreach);
828    }
829
830    #[test]
831    fn finish_reason_unknown_preserves_raw() {
832        let r = FinishReason::from_provider("openai", "weird_signal");
833        assert_eq!(r, FinishReason::Other("weird_signal".into()));
834    }
835
836    #[test]
837    fn finish_reason_safety_breach_predicate() {
838        assert!(FinishReason::SafetyBreach.is_safety_breach());
839        assert!(!FinishReason::Stop.is_safety_breach());
840        assert!(!FinishReason::Other("anything".into()).is_safety_breach());
841    }
842
843    #[test]
844    fn registry_empty_then_register() {
845        let mut r = Registry::empty();
846        assert_eq!(r.len(), 0);
847        r.register(stub("anthropic"));
848        assert_eq!(r.len(), 1);
849        assert!(r.get("anthropic").is_some());
850        assert!(r.get("openai").is_none());
851    }
852
853    #[test]
854    fn registry_provider_names_sorted() {
855        let mut r = Registry::empty();
856        r.register(stub("openai"));
857        r.register(stub("anthropic"));
858        r.register(stub("gemini"));
859        assert_eq!(
860            r.provider_names(),
861            vec!["anthropic".to_string(), "gemini".to_string(), "openai".to_string()]
862        );
863    }
864
865    #[test]
866    fn registry_replace_on_duplicate_register() {
867        let mut r = Registry::empty();
868        r.register(stub("anthropic"));
869        r.register(stub("anthropic"));
870        assert_eq!(r.len(), 1); // last-write-wins
871    }
872
873    #[tokio::test]
874    async fn stub_complete_returns_response() {
875        let b = StubBackend { name: "stub".into() };
876        let resp = b.complete(ChatRequest::default()).await.unwrap();
877        assert_eq!(resp.content, "stubbed");
878        assert_eq!(resp.provider_name, "stub");
879        assert_eq!(resp.finish_reason, FinishReason::Stop);
880    }
881
882    #[tokio::test]
883    async fn stub_stream_yields_chunks() {
884        let b = StubBackend { name: "stub".into() };
885        let stream = b.stream(ChatRequest::default()).await.unwrap();
886        let chunks: Vec<_> = stream.collect().await;
887        assert_eq!(chunks.len(), 2);
888        let first = chunks[0].as_ref().unwrap();
889        assert_eq!(first.delta, "hi ");
890        assert!(first.finish_reason.is_none());
891        let last = chunks[1].as_ref().unwrap();
892        assert_eq!(last.delta, "world");
893        assert!(matches!(last.finish_reason, Some(FinishReason::Stop)));
894        let usage = last.usage.as_ref().unwrap();
895        assert_eq!(usage.total_tokens, 3);
896    }
897
898    #[tokio::test]
899    async fn registry_dispatches_to_correct_backend() {
900        let mut r = Registry::empty();
901        r.register(stub("anthropic"));
902        r.register(stub("openai"));
903        let b = r.get("openai").expect("openai registered");
904        let resp = b.complete(ChatRequest::default()).await.unwrap();
905        assert_eq!(resp.provider_name, "openai");
906    }
907
908    #[test]
909    fn supports_capability_default_false() {
910        struct DefaultBackend;
911        #[async_trait]
912        impl Backend for DefaultBackend {
913            fn name(&self) -> &str {
914                "default"
915            }
916            fn default_model(&self) -> &str {
917                ""
918            }
919            async fn complete(
920                &self,
921                _r: ChatRequest,
922            ) -> Result<ChatResponse, BackendError> {
923                unreachable!()
924            }
925            async fn stream(
926                &self,
927                _r: ChatRequest,
928            ) -> Result<ChatStream, BackendError> {
929                unreachable!()
930            }
931        }
932        let b = DefaultBackend;
933        assert!(!b.supports(Capability::Streaming, "anything"));
934        assert!(!b.supports(Capability::ToolUse, "anything"));
935    }
936
937    #[test]
938    fn count_tokens_default_uses_unified_dispatch() {
939        let b = StubBackend { name: "stub".into() };
940        // The stub doesn't override count_tokens, so the trait default
941        // delegates to tokens::count_tokens — same model dispatch as
942        // the standalone function.
943        let n = b.count_tokens("gpt-4o-mini", "hello world");
944        assert!(n > 0);
945    }
946}