axon/backends/mod.rs
1//! Native Rust LLM backends — Fase 24.
2//!
3//! Per-provider async backends consumed by the algebraic-effects
4//! runtime (Fase 23.f) and, in upcoming Fase 25+, the general flow
5//! executor. Each provider lives in its own submodule:
6//!
7//! * [`anthropic`] — Claude Messages API (Fase 24.c)
8//! * [`openai`] — GPT chat/completions (Fase 24.d)
9//! * [`gemini`] — Google generateContent (Fase 24.e)
10//! * [`kimi`] — Moonshot K2.x (Fase 24.f, locked params)
11//! * [`glm`] — Zhipu GLM-4.x (Fase 24.g)
12//! * [`ollama`] — local LLMs via REST (Fase 24.h)
13//! * [`openrouter`] — multi-provider gateway (Fase 24.i)
14//!
15//! Shared infrastructure ships in 24.b alongside the trait + Registry:
16//!
17//! * [`error`] — typed transport errors named per failure mode
18//! * [`retry`] — retry policy + `Retry-After` parsing
19//! * [`observability`] — tracing span helpers per call lifecycle
20//! * [`locked_model`] — locked-parameter dispatch (Kimi K2.x / o1 / o3)
21//! * [`tokens`] — unified `count_tokens` dispatch by model prefix
22//!
23//! Adopter usage (post-24.k):
24//!
25//! ```ignore
26//! use axon::backends::{Registry, ChatRequest, Message, Role};
27//!
28//! let registry = Registry::production();
29//! let backend = registry.get("anthropic").expect("anthropic registered");
30//!
31//! let req = ChatRequest {
32//! model: "claude-sonnet-4-5".into(),
33//! messages: vec![Message::user("Hello!")],
34//! ..Default::default()
35//! };
36//! let response = backend.complete(req).await?;
37//! println!("{}", response.content);
38//! ```
39//!
40//! # Architecture decisions (see docs/fase/fase_24_native_rust_backends.md)
41//!
42//! * **D1** — `async_trait` over native async-fn-in-trait so `dyn Backend`
43//! stays object-safe (Registry uses `HashMap<String, Box<dyn Backend>>`).
44//! * **D6** — the legacy [`crate::backend`] module stays in place during
45//! 24.b–24.i to avoid touching 200+ call sites; in 24.j it becomes a
46//! thin re-export shim that delegates here.
47//! * **D7** — Python `axon/backends/*.py` is untouched; flows running on
48//! the Python runtime keep using it.
49
50#![allow(dead_code)]
51
52use std::collections::HashMap;
53use std::pin::Pin;
54
55use async_trait::async_trait;
56use futures::Stream;
57
58pub mod anthropic;
59pub mod error;
60pub mod gemini;
61pub mod glm;
62pub mod kimi;
63pub mod locked_model;
64pub mod observability;
65pub mod ollama;
66pub mod openai;
67pub mod openai_compat;
68pub mod openrouter;
69pub mod retry;
70pub mod sse_streaming;
71/// §Fase 33.x.b — `StubBackend` implementing the [`Backend`] trait so
72/// the production async streaming path resolves "stub" through the
73/// uniform [`Registry`] surface (no special-cased branches in the
74/// runtime). Excluded from the Fase 24.j cross-stack drift gate
75/// SHARED_INFRA_MODULES because it is not a real provider.
76pub mod stub;
77pub mod tokens;
78pub(crate) mod transport;
79
80pub use anthropic::AnthropicBackend;
81pub use error::{categorise_http, BackendError};
82pub use gemini::GeminiBackend;
83pub use glm::GLMBackend;
84pub use kimi::KimiBackend;
85pub use ollama::OllamaBackend;
86pub use openai::OpenAIBackend;
87pub use openai_compat::{OpenAICompatConfig, OpenAICompatibleBackend};
88pub use openrouter::OpenRouterBackend;
89pub use stub::{StubBackend, STUB_CONTENT, STUB_DEFAULT_MODEL, STUB_PROVIDER_NAME};
90
91// ────────────────────────────────────────────────────────────────────
92// Request / Response types — the wire shape every backend speaks
93// ────────────────────────────────────────────────────────────────────
94
95/// Role of a message in a chat conversation.
96///
97/// Mirrors the OpenAI ChatML enumeration with one provider-neutral
98/// addition (`Tool`) used for tool-call result messages. Per-provider
99/// adapters translate this enum to the wire encoding that provider
100/// expects (e.g. Anthropic's `system` becomes a top-level field, not a
101/// message; Gemini uses `user`/`model`/`function`).
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
103pub enum Role {
104 System,
105 User,
106 Assistant,
107 Tool,
108}
109
110impl Role {
111 pub fn as_str(&self) -> &'static str {
112 match self {
113 Self::System => "system",
114 Self::User => "user",
115 Self::Assistant => "assistant",
116 Self::Tool => "tool",
117 }
118 }
119}
120
121/// One chat message in a conversation.
122#[derive(Debug, Clone, PartialEq, Eq)]
123pub struct Message {
124 pub role: Role,
125 pub content: String,
126 /// Optional tool-call identifier when role == Tool. Per-provider
127 /// adapters thread this back to the correct tool call ID.
128 pub tool_call_id: Option<String>,
129}
130
131impl Message {
132 pub fn user(content: impl Into<String>) -> Self {
133 Self { role: Role::User, content: content.into(), tool_call_id: None }
134 }
135 pub fn assistant(content: impl Into<String>) -> Self {
136 Self { role: Role::Assistant, content: content.into(), tool_call_id: None }
137 }
138 pub fn system(content: impl Into<String>) -> Self {
139 Self { role: Role::System, content: content.into(), tool_call_id: None }
140 }
141}
142
143/// A tool the model may invoke during the response.
144///
145/// `parameters_json` is the JSON Schema describing the parameter shape;
146/// each provider serialises it with its own envelope.
147#[derive(Debug, Clone, PartialEq)]
148pub struct ToolSpec {
149 pub name: String,
150 pub description: String,
151 pub parameters_json: String,
152}
153
154/// Provider-feature discovery enum — used by [`Backend::supports`] so
155/// adopters can ask "does this backend support X for this model?"
156/// without parsing model strings themselves.
157#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
158pub enum Capability {
159 Streaming,
160 ToolUse,
161 Vision,
162 /// Anthropic prompt caching (cache_control breakpoints).
163 PromptCaching,
164 /// Gemini safetySettings on the request body.
165 SafetySettings,
166 /// OpenAI structured outputs (response_format=json_schema).
167 StructuredOutput,
168 /// Provider hard-codes sampling parameters (Kimi K2.x, o1, o3).
169 LockedParams,
170}
171
172/// One canonical chat request — provider-neutral. Per-provider adapters
173/// translate to the wire JSON the provider expects.
174#[derive(Debug, Clone, Default)]
175pub struct ChatRequest {
176 /// Empty string → backend uses its `default_model()`.
177 pub model: String,
178 pub messages: Vec<Message>,
179 /// System prompt — Anthropic puts it in a top-level field; OpenAI &
180 /// compats prepend a system message to the messages array.
181 pub system: Option<String>,
182 pub max_tokens: Option<u32>,
183 /// Temperature; ignored when the resolved model is locked-params.
184 pub temperature: Option<f64>,
185 pub top_p: Option<f64>,
186 pub tools: Vec<ToolSpec>,
187 /// `false` → call `complete()`. `true` → call `stream()` and consume
188 /// the chunk stream incrementally.
189 pub stream: bool,
190 /// Trace ID propagated from the calling flow step. Surfaces in
191 /// tracing spans so log lines correlate.
192 pub trace_id: Option<String>,
193 /// §Fase 33.x.e — Cancellation flag observed INSIDE the reqwest
194 /// body. Each per-provider `Backend::stream()` impl wraps its
195 /// returned chunk stream with `sse_streaming::cancel_aware`
196 /// so the next-chunk poll races a `cancel.cancelled()` future
197 /// against the upstream HTTP body — when the flag fires the
198 /// stream returns `None` within ≤100ms p95 + the dropped
199 /// reqwest Response aborts the upstream HTTP request body.
200 ///
201 /// `Default` is an uncancelled flag; adopters that don't supply
202 /// one get pre-33.x.e semantics (the stream runs to completion).
203 /// Cloning is cheap (`Arc`-backed inside).
204 pub cancel: crate::cancel_token::CancellationFlag,
205}
206
207/// How the model decided to stop generating.
208///
209/// Maps the provider-specific finish-reason strings to a closed enum
210/// callers can `match` on. Unmapped values land in `Other(s)` so the
211/// raw string is still recoverable for diagnostics.
212#[derive(Debug, Clone, PartialEq, Eq)]
213pub enum FinishReason {
214 /// Natural end of generation (Anthropic `end_turn`, OpenAI `stop`,
215 /// Gemini `STOP`).
216 Stop,
217 /// Hit `max_tokens` budget (Anthropic `max_tokens`, OpenAI `length`,
218 /// Gemini `MAX_TOKENS`).
219 Length,
220 /// Model invoked a tool (Anthropic `tool_use`, OpenAI `tool_calls`).
221 ToolUse,
222 /// Provider's content filter blocked output (OpenAI `content_filter`,
223 /// Gemini `SAFETY`, Anthropic empty + `end_turn`).
224 SafetyBreach,
225 /// Anything else; carries the raw provider string.
226 Other(String),
227}
228
229impl FinishReason {
230 /// Map a raw provider string into the enum.
231 pub fn from_provider(provider: &str, raw: &str) -> Self {
232 let lc = raw.to_ascii_lowercase();
233 match (provider, lc.as_str()) {
234 ("anthropic", "end_turn") => Self::Stop,
235 ("anthropic", "max_tokens") => Self::Length,
236 ("anthropic", "tool_use") => Self::ToolUse,
237 ("anthropic", "stop_sequence") => Self::Stop,
238 (_, "stop") => Self::Stop,
239 (_, "length") => Self::Length,
240 (_, "tool_calls") | (_, "function_call") => Self::ToolUse,
241 (_, "content_filter") => Self::SafetyBreach,
242 // Gemini uses upper-case slugs.
243 (_, "max_tokens") => Self::Length,
244 (_, "safety") => Self::SafetyBreach,
245 (_, "") => Self::Other(String::new()),
246 _ => Self::Other(raw.to_string()),
247 }
248 }
249
250 /// True iff this finish reason is the provider's safety classifier
251 /// blocking output. Used by `BackendError::SafetyBreach` lifting.
252 pub fn is_safety_breach(&self) -> bool {
253 matches!(self, Self::SafetyBreach)
254 }
255}
256
257/// Token-usage breakdown returned by the provider. Field naming is
258/// canonical (input/output/total); per-provider deltas (cache reads on
259/// Anthropic, reasoning tokens on o1/o3) live in dedicated fields so
260/// aggregating dashboards across providers stay coherent.
261#[derive(Debug, Clone, Default, PartialEq, Eq)]
262pub struct Usage {
263 pub input_tokens: u32,
264 pub output_tokens: u32,
265 pub total_tokens: u32,
266 /// Anthropic prompt-cache hit (`cache_read_input_tokens`).
267 pub cache_read_tokens: u32,
268 /// Anthropic prompt-cache write (`cache_creation_input_tokens`).
269 pub cache_creation_tokens: u32,
270 /// OpenAI o1/o3 reasoning-token allocation (`reasoning_tokens`).
271 pub reasoning_tokens: u32,
272}
273
274/// A complete chat response from a non-streaming `complete()` call.
275#[derive(Debug, Clone)]
276pub struct ChatResponse {
277 pub content: String,
278 /// Resolved model slug — what the provider actually returned the
279 /// response from (may differ from request when an alias was sent).
280 pub model_name: String,
281 /// Provider name (`"anthropic"`, `"openai"`, etc.).
282 pub provider_name: String,
283 pub finish_reason: FinishReason,
284 pub usage: Usage,
285 /// Number of retries that fired before this success. 0 on a clean
286 /// first-attempt response.
287 pub retry_count: u32,
288 /// Trace ID echoed back from the request (or auto-generated if the
289 /// request omitted one).
290 pub trace_id: String,
291}
292
293/// One delta in a streaming response.
294///
295/// `delta` is the incremental text fragment for this chunk. `finish_reason`
296/// + `usage` are populated only on the final chunk so consumers can
297/// compute totals without keeping a running tally.
298#[derive(Debug, Clone, Default)]
299pub struct ChatChunk {
300 pub delta: String,
301 pub finish_reason: Option<FinishReason>,
302 pub usage: Option<Usage>,
303}
304
305/// Pinned, boxed stream alias — the concrete return type of
306/// [`Backend::stream`]. Adopters consume via `futures::StreamExt`.
307pub type ChatStream =
308 Pin<Box<dyn Stream<Item = Result<ChatChunk, BackendError>> + Send>>;
309
310// ────────────────────────────────────────────────────────────────────
311// Backend trait — the per-provider contract
312// ────────────────────────────────────────────────────────────────────
313
314/// One LLM provider's native Rust client.
315///
316/// Implementors live in `axon-rs/src/backends/<provider>.rs` and are
317/// registered into [`Registry`] at process startup. The trait is
318/// object-safe (D1 — uses `async_trait`) so registries can hold
319/// `Box<dyn Backend>` for runtime dispatch by name.
320#[async_trait]
321pub trait Backend: Send + Sync {
322 /// Short provider name used as the registry key.
323 /// E.g. `"anthropic"`, `"openai"`, `"kimi"`.
324 fn name(&self) -> &str;
325
326 /// Default model used when [`ChatRequest::model`] is empty.
327 fn default_model(&self) -> &str;
328
329 /// Synchronous-result chat completion (non-streaming).
330 async fn complete(&self, request: ChatRequest) -> Result<ChatResponse, BackendError>;
331
332 /// Streaming chat completion. Adopter consumes the returned stream;
333 /// per-chunk text arrives in `ChatChunk::delta`, finish reason +
334 /// usage in the final chunk.
335 async fn stream(&self, request: ChatRequest) -> Result<ChatStream, BackendError>;
336
337 /// Best-effort token count for `text` against a specific model on
338 /// this provider. Default impl delegates to the unified
339 /// [`tokens::count_tokens`] dispatch; per-provider overrides may
340 /// consult the provider's HTTP `count_tokens` endpoint when an
341 /// exact answer is required + a network round-trip is acceptable.
342 fn count_tokens(&self, model: &str, text: &str) -> usize {
343 tokens::count_tokens(model, text).count
344 }
345
346 /// Capability discovery — does this backend support `capability`
347 /// for the given model? Default returns `false` for everything;
348 /// per-provider impls override.
349 #[allow(unused_variables)]
350 fn supports(&self, capability: Capability, model: &str) -> bool {
351 false
352 }
353}
354
355// ────────────────────────────────────────────────────────────────────
356// Registry — string-keyed dispatch by provider name
357// ────────────────────────────────────────────────────────────────────
358
359/// Process-wide registry of registered backends.
360///
361/// Backends are registered by their canonical short name (the same
362/// string the Python `BACKEND_REGISTRY` uses — verified by the
363/// Fase 24.j drift gate). Lookup is `O(1)` HashMap.
364pub struct Registry {
365 backends: HashMap<String, Box<dyn Backend>>,
366}
367
368impl Registry {
369 /// Empty registry — useful for tests that want to register only
370 /// stub backends.
371 pub fn empty() -> Self {
372 Self { backends: HashMap::new() }
373 }
374
375 /// Production registry — populated with all 7 native backends.
376 ///
377 /// Every backend is constructed via its `from_env()` factory — i.e.
378 /// API keys are read at registry-construction time from the
379 /// per-provider env vars (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`,
380 /// `GEMINI_API_KEY`, `KIMI_API_KEY`, `GLM_API_KEY`, `OPENROUTER_API_KEY`,
381 /// plus `OLLAMA_HOST` / `OLLAMA_API_KEY` for the local daemon).
382 /// Backends whose env var is missing still construct successfully;
383 /// the auth check fires on the first `complete()` call instead.
384 ///
385 /// The registry's `provider_names()` returns the sorted list of all
386 /// 7 keys: `["anthropic", "gemini", "glm", "kimi", "ollama",
387 /// "openai", "openrouter"]`. The Fase 24.j drift gate
388 /// (`tests/test_fase24_backend_parity.py`) asserts this set
389 /// matches Python's `BACKEND_REGISTRY` keys exactly.
390 pub fn production() -> Self {
391 let mut registry = Self::empty();
392 registry.register(Box::new(anthropic::AnthropicBackend::from_env()));
393 registry.register(Box::new(gemini::GeminiBackend::from_env()));
394 registry.register(Box::new(glm::GLMBackend::from_env()));
395 registry.register(Box::new(kimi::KimiBackend::from_env()));
396 registry.register(Box::new(ollama::OllamaBackend::from_env()));
397 registry.register(Box::new(openai::OpenAIBackend::from_env()));
398 registry.register(Box::new(openrouter::OpenRouterBackend::from_env()));
399 registry
400 }
401
402 /// §Fase 33.x.b — Production registry PLUS the `stub` backend.
403 ///
404 /// Used by the server streaming path so dispatch through the
405 /// uniform `Registry` surface includes the stub. The 7 canonical
406 /// production backends are unchanged; `stub` is added as an 8th
407 /// entry. The Fase 24.j cross-stack drift gate continues to pin
408 /// the 7 canonical entries exactly via filesystem enumeration of
409 /// `axon-rs/src/backends/*.rs` minus the `SHARED_INFRA_MODULES`
410 /// set (which includes `stub`).
411 ///
412 /// Adopters who call [`Registry::production()`] directly do not
413 /// see the stub — it surfaces only on the streaming-path
414 /// dispatcher, where its content matches the legacy synchronous
415 /// stub-mode output byte-for-byte (D4 wire byte-compat).
416 pub fn production_with_stub() -> Self {
417 let mut registry = Self::production();
418 registry.register(Box::new(stub::StubBackend::new()));
419 registry
420 }
421}
422
423/// §Fase 33.x.b — Owned-backend resolver for the streaming dispatch
424/// path.
425///
426/// Returns `Some(Box<dyn Backend>)` for the 7 canonical production
427/// providers plus `"stub"`. Returns `None` for any other name (e.g.
428/// `"auto"` after upstream resolution failed, or an unknown name
429/// the adopter supplied).
430///
431/// The dispatch set MUST match [`Registry::production_with_stub`]
432/// exactly — adding a backend here without adding it to the
433/// registry (or vice versa) is caught by the
434/// `resolve_streaming_backend_dispatch_set_matches_production_with_stub`
435/// drift test below.
436///
437/// Each lookup constructs a fresh backend via `from_env()` so the
438/// returned `Box` owns its own reqwest client + retry policy.
439/// Async tasks own their backend for the duration of one flow
440/// (the trait is `Send + Sync` but not `Clone`, so per-task
441/// ownership keeps the dispatch path simple).
442pub fn resolve_streaming_backend(name: &str) -> Option<Box<dyn Backend>> {
443 match name {
444 "anthropic" => Some(Box::new(anthropic::AnthropicBackend::from_env())),
445 "openai" => Some(Box::new(openai::OpenAIBackend::from_env())),
446 "gemini" => Some(Box::new(gemini::GeminiBackend::from_env())),
447 "kimi" => Some(Box::new(kimi::KimiBackend::from_env())),
448 "glm" => Some(Box::new(glm::GLMBackend::from_env())),
449 "ollama" => Some(Box::new(ollama::OllamaBackend::from_env())),
450 "openrouter" => Some(Box::new(openrouter::OpenRouterBackend::from_env())),
451 "stub" => Some(Box::new(stub::StubBackend::new())),
452 _ => None,
453 }
454}
455
456/// Names recognised by [`resolve_streaming_backend`]. Sorted.
457/// Pinned by the drift test below.
458pub const STREAMING_BACKEND_NAMES: &[&str] = &[
459 "anthropic",
460 "gemini",
461 "glm",
462 "kimi",
463 "ollama",
464 "openai",
465 "openrouter",
466 "stub",
467];
468
469/// §Fase 33.x.i — Canonical 7-provider set surfaced to adopters.
470///
471/// Identical to [`STREAMING_BACKEND_NAMES`] minus `"stub"` (which is
472/// a test/internal backend, not an adopter-facing provider). This is
473/// the SINGLE SOURCE OF TRUTH for "which providers does axon
474/// support natively"; the legacy
475/// [`crate::backend::SUPPORTED_BACKENDS`] mono-file constant is now
476/// a `pub use` re-export of this.
477///
478/// Drift-gated by `resolver_tests::canonical_providers_equals_legacy_supported`
479/// (asserts byte-equality with the legacy constant) and
480/// `tests/fase33x_i_mono_file_retirement.rs` (asserts the same plus
481/// the count + canonical-vs-stub-removed invariant).
482pub const CANONICAL_PROVIDERS: &[&str] = &[
483 "anthropic",
484 "gemini",
485 "glm",
486 "kimi",
487 "ollama",
488 "openai",
489 "openrouter",
490];
491
492/// §Fase 36.c — Canonical providers with a usable API key present in
493/// the environment, in `CANONICAL_PROVIDERS` priority order.
494///
495/// Feeds the `env_available` rung of the Backend Resolution Contract
496/// (D1 / D6): when the operator-tuned `backend_registry` is empty,
497/// `auto` resolution picks the first canonical provider whose
498/// `<PROVIDER>_API_KEY` is set — so a server started with one provider
499/// key "just works" without a `PUT /v1/backends` registration dance.
500///
501/// `ollama` (whose key is optional for the local daemon) is included
502/// ONLY when `OLLAMA_API_KEY` is explicitly set to a non-empty value —
503/// a local-ollama deployment declares `backend: ollama` explicitly (or
504/// sets the var) rather than the resolver probing a network port.
505/// `stub` is not a canonical provider, so it is never returned here.
506pub fn env_available_backends() -> Vec<String> {
507 CANONICAL_PROVIDERS
508 .iter()
509 .filter(|p| get_api_key(p).map(|k| !k.is_empty()).unwrap_or(false))
510 .map(|p| p.to_string())
511 .collect()
512}
513
514/// §Fase 33.x.i — Canonical API-key env-var resolution.
515///
516/// Same semantics as the legacy `crate::backend::get_api_key`:
517/// - For known providers, reads `<PROVIDER>_API_KEY` from the
518/// environment.
519/// - For `"ollama"`, missing key is permitted (local daemon).
520/// - For all other providers, returns an error with adopter-
521/// actionable hint when the env var is unset.
522/// - For unknown provider names, returns an error listing
523/// [`CANONICAL_PROVIDERS`].
524///
525/// This is the SINGLE SOURCE OF TRUTH for the legacy
526/// `crate::backend::get_api_key` shim. The legacy shim wraps this
527/// with the legacy `crate::backend::BackendError` struct shape;
528/// callers using the trait `Backend` surface read keys via their
529/// per-provider `from_env` factory instead.
530pub fn get_api_key(provider: &str) -> Result<String, String> {
531 let env_var = match provider {
532 "anthropic" => "ANTHROPIC_API_KEY",
533 "openai" => "OPENAI_API_KEY",
534 "gemini" => "GEMINI_API_KEY",
535 "kimi" => "KIMI_API_KEY",
536 "glm" => "GLM_API_KEY",
537 "openrouter" => "OPENROUTER_API_KEY",
538 "ollama" => "OLLAMA_API_KEY", // local: missing key permitted
539 _ => {
540 return Err(format!(
541 "Unknown backend '{provider}'. Supported: {}",
542 CANONICAL_PROVIDERS.join(", ")
543 ));
544 }
545 };
546 if provider == "ollama" {
547 return Ok(std::env::var(env_var).unwrap_or_default());
548 }
549 std::env::var(env_var).map_err(|_| {
550 format!(
551 "{env_var} not set. Required for backend '{provider}'.\n\
552 hint: export {env_var}=<your-api-key>"
553 )
554 })
555}
556
557#[cfg(test)]
558mod resolver_tests {
559 use super::*;
560
561 #[test]
562 fn resolve_streaming_backend_returns_none_for_unknown_name() {
563 assert!(resolve_streaming_backend("does-not-exist").is_none());
564 assert!(resolve_streaming_backend("").is_none());
565 assert!(resolve_streaming_backend("auto").is_none());
566 }
567
568 #[test]
569 fn resolve_streaming_backend_returns_some_for_each_streaming_name() {
570 for name in STREAMING_BACKEND_NAMES {
571 let backend = resolve_streaming_backend(name)
572 .unwrap_or_else(|| panic!("resolver should return Some for {name:?}"));
573 assert_eq!(backend.name(), *name);
574 }
575 }
576
577 #[test]
578 fn resolve_streaming_backend_dispatch_set_matches_production_with_stub() {
579 let registry = Registry::production_with_stub();
580 let registry_names = registry.provider_names();
581 let mut resolver_names: Vec<String> =
582 STREAMING_BACKEND_NAMES.iter().map(|s| s.to_string()).collect();
583 resolver_names.sort();
584 assert_eq!(
585 registry_names, resolver_names,
586 "resolve_streaming_backend() and Registry::production_with_stub() \
587 must dispatch the same set of backends — drift here breaks the \
588 D1 contract that Backend::stream() is the only production path \
589 for Stream<T>"
590 );
591 }
592
593 #[test]
594 fn streaming_backend_names_pins_eight_entries() {
595 // 7 canonical providers + stub. Adding a ninth requires
596 // updating both the resolver match and the
597 // `Registry::production_with_stub()` constructor — and
598 // re-running the drift test above.
599 assert_eq!(STREAMING_BACKEND_NAMES.len(), 8);
600 }
601
602 #[test]
603 fn streaming_backend_names_are_sorted() {
604 let mut sorted = STREAMING_BACKEND_NAMES.to_vec();
605 sorted.sort();
606 assert_eq!(sorted.as_slice(), STREAMING_BACKEND_NAMES);
607 }
608
609 #[test]
610 fn canonical_providers_equals_legacy_supported_backends() {
611 // §Fase 33.x.i drift gate: the new
612 // `crate::backends::CANONICAL_PROVIDERS` (consolidated
613 // single source of truth) MUST equal the legacy
614 // `crate::backend::SUPPORTED_BACKENDS` byte-for-byte. The
615 // legacy constant is now a `pub use` re-export of the new
616 // one, so byte-equality is by-construction; this test pins
617 // the invariant + catches accidental drift if someone
618 // re-introduces a separate const.
619 assert_eq!(
620 CANONICAL_PROVIDERS,
621 crate::backend::SUPPORTED_BACKENDS,
622 "33.x.i drift: canonical providers must equal legacy SUPPORTED_BACKENDS"
623 );
624 }
625
626 #[test]
627 fn canonical_providers_is_streaming_minus_stub() {
628 // §Fase 33.x.i invariant: the canonical 7-provider set
629 // equals the 8-entry streaming dispatch set with `stub`
630 // removed. Drift here surfaces if a new provider is added
631 // to one set but not the other.
632 let mut canonical_sorted: Vec<&str> = CANONICAL_PROVIDERS.to_vec();
633 canonical_sorted.sort();
634 let streaming_without_stub: Vec<&str> = STREAMING_BACKEND_NAMES
635 .iter()
636 .copied()
637 .filter(|n| *n != "stub")
638 .collect();
639 assert_eq!(canonical_sorted, streaming_without_stub);
640 }
641
642 #[test]
643 fn get_api_key_unknown_provider_returns_error() {
644 let err = get_api_key("does-not-exist").unwrap_err();
645 assert!(err.contains("Unknown backend"));
646 assert!(err.contains("Supported:"));
647 }
648
649 #[test]
650 fn get_api_key_ollama_permits_missing_key() {
651 // Ollama is a local daemon — missing key is allowed.
652 // Save+restore to avoid test-isolation issues.
653 let prev = std::env::var("OLLAMA_API_KEY").ok();
654 std::env::remove_var("OLLAMA_API_KEY");
655 let result = get_api_key("ollama");
656 assert!(
657 result.is_ok(),
658 "ollama MUST permit missing API key for local daemon"
659 );
660 assert_eq!(result.unwrap(), "");
661 if let Some(v) = prev {
662 std::env::set_var("OLLAMA_API_KEY", v);
663 }
664 }
665
666 #[tokio::test]
667 async fn resolved_stub_streams_one_canonical_chunk() {
668 let backend = resolve_streaming_backend("stub").expect("stub resolves");
669 let req = ChatRequest::default();
670 let mut stream = backend.stream(req).await.expect("stub streams");
671 use futures::StreamExt;
672 let chunk = stream.next().await.expect("one chunk").expect("ok");
673 assert_eq!(chunk.delta, stub::STUB_CONTENT);
674 assert!(stream.next().await.is_none(), "single-chunk semantics");
675 }
676}
677
678impl Registry {
679 /// Internal marker reserved for future expansion of the
680 /// streaming-resolver dispatch surface. Currently a no-op; kept
681 /// as a public-crate anchor so future Fase 33.x sub-fases can
682 /// extend the dispatch table without re-opening the parent impl
683 /// block. Untyped const is a zero-cost marker in monomorphisation.
684 #[doc(hidden)]
685 pub(crate) const __FASE_33X_B_RESOLVER_BOUNDARY: () = ();
686
687 /// Register `backend` under the key `backend.name()`. Replaces any
688 /// existing entry with the same name (last-write-wins).
689 pub fn register(&mut self, backend: Box<dyn Backend>) {
690 self.backends.insert(backend.name().to_string(), backend);
691 }
692
693 /// Look up a backend by name. Returns `None` if not registered.
694 pub fn get(&self, name: &str) -> Option<&dyn Backend> {
695 self.backends.get(name).map(|b| b.as_ref())
696 }
697
698 /// All registered provider names, sorted alphabetically. Used by
699 /// the cross-stack drift gate (Fase 24.j) to verify the Rust set
700 /// equals the Python `BACKEND_REGISTRY` set.
701 pub fn provider_names(&self) -> Vec<String> {
702 let mut names: Vec<String> = self.backends.keys().cloned().collect();
703 names.sort();
704 names
705 }
706
707 pub fn len(&self) -> usize {
708 self.backends.len()
709 }
710
711 pub fn is_empty(&self) -> bool {
712 self.backends.is_empty()
713 }
714}
715
716impl Default for Registry {
717 fn default() -> Self {
718 Self::production()
719 }
720}
721
722// ────────────────────────────────────────────────────────────────────
723// Tests — trait + types + Registry
724// ────────────────────────────────────────────────────────────────────
725
726#[cfg(test)]
727mod tests {
728 use super::*;
729 use futures::StreamExt;
730
731 /// Test-only stub that lets us exercise the Registry + trait without
732 /// hitting a real provider.
733 struct StubBackend {
734 name: String,
735 }
736
737 #[async_trait]
738 impl Backend for StubBackend {
739 fn name(&self) -> &str {
740 &self.name
741 }
742 fn default_model(&self) -> &str {
743 "stub-model"
744 }
745 async fn complete(
746 &self,
747 _request: ChatRequest,
748 ) -> Result<ChatResponse, BackendError> {
749 Ok(ChatResponse {
750 content: "stubbed".into(),
751 model_name: "stub-model".into(),
752 provider_name: self.name.clone(),
753 finish_reason: FinishReason::Stop,
754 usage: Usage::default(),
755 retry_count: 0,
756 trace_id: "stub".into(),
757 })
758 }
759 async fn stream(
760 &self,
761 _request: ChatRequest,
762 ) -> Result<ChatStream, BackendError> {
763 let chunks = vec![
764 Ok(ChatChunk { delta: "hi ".into(), ..Default::default() }),
765 Ok(ChatChunk {
766 delta: "world".into(),
767 finish_reason: Some(FinishReason::Stop),
768 usage: Some(Usage { input_tokens: 1, output_tokens: 2, total_tokens: 3, ..Default::default() }),
769 }),
770 ];
771 Ok(Box::pin(futures::stream::iter(chunks)))
772 }
773 fn supports(&self, capability: Capability, _model: &str) -> bool {
774 matches!(capability, Capability::Streaming)
775 }
776 }
777
778 fn stub(name: &str) -> Box<dyn Backend> {
779 Box::new(StubBackend { name: name.to_string() })
780 }
781
782 #[test]
783 fn role_round_trips_via_as_str() {
784 for r in [Role::System, Role::User, Role::Assistant, Role::Tool] {
785 assert!(!r.as_str().is_empty());
786 }
787 assert_eq!(Role::User.as_str(), "user");
788 }
789
790 #[test]
791 fn message_helpers_set_role() {
792 assert_eq!(Message::user("a").role, Role::User);
793 assert_eq!(Message::assistant("b").role, Role::Assistant);
794 assert_eq!(Message::system("c").role, Role::System);
795 }
796
797 #[test]
798 fn chat_request_default_is_empty() {
799 let r = ChatRequest::default();
800 assert!(r.model.is_empty());
801 assert!(r.messages.is_empty());
802 assert!(r.tools.is_empty());
803 assert!(!r.stream);
804 }
805
806 #[test]
807 fn finish_reason_anthropic_mapping() {
808 assert_eq!(FinishReason::from_provider("anthropic", "end_turn"), FinishReason::Stop);
809 assert_eq!(FinishReason::from_provider("anthropic", "max_tokens"), FinishReason::Length);
810 assert_eq!(FinishReason::from_provider("anthropic", "tool_use"), FinishReason::ToolUse);
811 assert_eq!(FinishReason::from_provider("anthropic", "stop_sequence"), FinishReason::Stop);
812 }
813
814 #[test]
815 fn finish_reason_openai_mapping() {
816 assert_eq!(FinishReason::from_provider("openai", "stop"), FinishReason::Stop);
817 assert_eq!(FinishReason::from_provider("openai", "length"), FinishReason::Length);
818 assert_eq!(FinishReason::from_provider("openai", "tool_calls"), FinishReason::ToolUse);
819 assert_eq!(FinishReason::from_provider("openai", "content_filter"), FinishReason::SafetyBreach);
820 }
821
822 #[test]
823 fn finish_reason_gemini_mapping_uppercase() {
824 // Gemini emits SAFETY / MAX_TOKENS / STOP — case-folded.
825 assert_eq!(FinishReason::from_provider("gemini", "STOP"), FinishReason::Stop);
826 assert_eq!(FinishReason::from_provider("gemini", "MAX_TOKENS"), FinishReason::Length);
827 assert_eq!(FinishReason::from_provider("gemini", "SAFETY"), FinishReason::SafetyBreach);
828 }
829
830 #[test]
831 fn finish_reason_unknown_preserves_raw() {
832 let r = FinishReason::from_provider("openai", "weird_signal");
833 assert_eq!(r, FinishReason::Other("weird_signal".into()));
834 }
835
836 #[test]
837 fn finish_reason_safety_breach_predicate() {
838 assert!(FinishReason::SafetyBreach.is_safety_breach());
839 assert!(!FinishReason::Stop.is_safety_breach());
840 assert!(!FinishReason::Other("anything".into()).is_safety_breach());
841 }
842
843 #[test]
844 fn registry_empty_then_register() {
845 let mut r = Registry::empty();
846 assert_eq!(r.len(), 0);
847 r.register(stub("anthropic"));
848 assert_eq!(r.len(), 1);
849 assert!(r.get("anthropic").is_some());
850 assert!(r.get("openai").is_none());
851 }
852
853 #[test]
854 fn registry_provider_names_sorted() {
855 let mut r = Registry::empty();
856 r.register(stub("openai"));
857 r.register(stub("anthropic"));
858 r.register(stub("gemini"));
859 assert_eq!(
860 r.provider_names(),
861 vec!["anthropic".to_string(), "gemini".to_string(), "openai".to_string()]
862 );
863 }
864
865 #[test]
866 fn registry_replace_on_duplicate_register() {
867 let mut r = Registry::empty();
868 r.register(stub("anthropic"));
869 r.register(stub("anthropic"));
870 assert_eq!(r.len(), 1); // last-write-wins
871 }
872
873 #[tokio::test]
874 async fn stub_complete_returns_response() {
875 let b = StubBackend { name: "stub".into() };
876 let resp = b.complete(ChatRequest::default()).await.unwrap();
877 assert_eq!(resp.content, "stubbed");
878 assert_eq!(resp.provider_name, "stub");
879 assert_eq!(resp.finish_reason, FinishReason::Stop);
880 }
881
882 #[tokio::test]
883 async fn stub_stream_yields_chunks() {
884 let b = StubBackend { name: "stub".into() };
885 let stream = b.stream(ChatRequest::default()).await.unwrap();
886 let chunks: Vec<_> = stream.collect().await;
887 assert_eq!(chunks.len(), 2);
888 let first = chunks[0].as_ref().unwrap();
889 assert_eq!(first.delta, "hi ");
890 assert!(first.finish_reason.is_none());
891 let last = chunks[1].as_ref().unwrap();
892 assert_eq!(last.delta, "world");
893 assert!(matches!(last.finish_reason, Some(FinishReason::Stop)));
894 let usage = last.usage.as_ref().unwrap();
895 assert_eq!(usage.total_tokens, 3);
896 }
897
898 #[tokio::test]
899 async fn registry_dispatches_to_correct_backend() {
900 let mut r = Registry::empty();
901 r.register(stub("anthropic"));
902 r.register(stub("openai"));
903 let b = r.get("openai").expect("openai registered");
904 let resp = b.complete(ChatRequest::default()).await.unwrap();
905 assert_eq!(resp.provider_name, "openai");
906 }
907
908 #[test]
909 fn supports_capability_default_false() {
910 struct DefaultBackend;
911 #[async_trait]
912 impl Backend for DefaultBackend {
913 fn name(&self) -> &str {
914 "default"
915 }
916 fn default_model(&self) -> &str {
917 ""
918 }
919 async fn complete(
920 &self,
921 _r: ChatRequest,
922 ) -> Result<ChatResponse, BackendError> {
923 unreachable!()
924 }
925 async fn stream(
926 &self,
927 _r: ChatRequest,
928 ) -> Result<ChatStream, BackendError> {
929 unreachable!()
930 }
931 }
932 let b = DefaultBackend;
933 assert!(!b.supports(Capability::Streaming, "anything"));
934 assert!(!b.supports(Capability::ToolUse, "anything"));
935 }
936
937 #[test]
938 fn count_tokens_default_uses_unified_dispatch() {
939 let b = StubBackend { name: "stub".into() };
940 // The stub doesn't override count_tokens, so the trait default
941 // delegates to tokens::count_tokens — same model dispatch as
942 // the standalone function.
943 let n = b.count_tokens("gpt-4o-mini", "hello world");
944 assert!(n > 0);
945 }
946}