axon-lang 1.21.1

AXON v1.5.1 — first crates.io publication of the AXON language full-stack runtime. Lexer/parser/type-checker/IR generator (re-exported from axon-frontend) plus the native Rust runtime: typed channels (TypedEventBus with QoS×5, π-calculus mobility, capability extrusion via shield D8 — Fase 13.f.2), Free Monad CPS handlers (Fase 2), lease kernel + reconcile loop (Fase 3+5), Epistemic Security Kernel (ESK Fase 6), Trust Types + ReplayLog (Fase 11.a+11.c), Stateful PEM over WebSocket (Fase 11.d), Ontological Tool Synthesis (Fase 11.e), Mobile Typed Channels (Fase 13). Crate publishes as `axon-lang` to mirror the Python PyPI package; library import remains `use axon::*` so existing call sites keep working unchanged.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
//! Native Rust LLM backends — Fase 24.
//!
//! Per-provider async backends consumed by the algebraic-effects
//! runtime (Fase 23.f) and, in upcoming Fase 25+, the general flow
//! executor. Each provider lives in its own submodule:
//!
//!   * [`anthropic`]   — Claude Messages API (Fase 24.c)
//!   * [`openai`]      — GPT chat/completions (Fase 24.d)
//!   * [`gemini`]      — Google generateContent (Fase 24.e)
//!   * [`kimi`]        — Moonshot K2.x (Fase 24.f, locked params)
//!   * [`glm`]         — Zhipu GLM-4.x (Fase 24.g)
//!   * [`ollama`]      — local LLMs via REST (Fase 24.h)
//!   * [`openrouter`]  — multi-provider gateway (Fase 24.i)
//!
//! Shared infrastructure ships in 24.b alongside the trait + Registry:
//!
//!   * [`error`]            — typed transport errors named per failure mode
//!   * [`retry`]            — retry policy + `Retry-After` parsing
//!   * [`observability`]    — tracing span helpers per call lifecycle
//!   * [`locked_model`]     — locked-parameter dispatch (Kimi K2.x / o1 / o3)
//!   * [`tokens`]           — unified `count_tokens` dispatch by model prefix
//!
//! Adopter usage (post-24.k):
//!
//! ```ignore
//! use axon::backends::{Registry, ChatRequest, Message, Role};
//!
//! let registry = Registry::production();
//! let backend = registry.get("anthropic").expect("anthropic registered");
//!
//! let req = ChatRequest {
//!     model: "claude-sonnet-4-5".into(),
//!     messages: vec![Message::user("Hello!")],
//!     ..Default::default()
//! };
//! let response = backend.complete(req).await?;
//! println!("{}", response.content);
//! ```
//!
//! # Architecture decisions (see docs/fase_24_native_rust_backends.md)
//!
//! * **D1** — `async_trait` over native async-fn-in-trait so `dyn Backend`
//!   stays object-safe (Registry uses `HashMap<String, Box<dyn Backend>>`).
//! * **D6** — the legacy [`crate::backend`] module stays in place during
//!   24.b–24.i to avoid touching 200+ call sites; in 24.j it becomes a
//!   thin re-export shim that delegates here.
//! * **D7** — Python `axon/backends/*.py` is untouched; flows running on
//!   the Python runtime keep using it.

#![allow(dead_code)]

use std::collections::HashMap;
use std::pin::Pin;

use async_trait::async_trait;
use futures::Stream;

pub mod anthropic;
pub mod error;
pub mod gemini;
pub mod glm;
pub mod kimi;
pub mod locked_model;
pub mod observability;
pub mod ollama;
pub mod openai;
pub mod openai_compat;
pub mod openrouter;
pub mod retry;
pub mod tokens;
pub(crate) mod transport;

pub use anthropic::AnthropicBackend;
pub use error::{categorise_http, BackendError};
pub use gemini::GeminiBackend;
pub use glm::GLMBackend;
pub use kimi::KimiBackend;
pub use ollama::OllamaBackend;
pub use openai::OpenAIBackend;
pub use openai_compat::{OpenAICompatConfig, OpenAICompatibleBackend};
pub use openrouter::OpenRouterBackend;

// ────────────────────────────────────────────────────────────────────
//  Request / Response types — the wire shape every backend speaks
// ────────────────────────────────────────────────────────────────────

/// Role of a message in a chat conversation.
///
/// Mirrors the OpenAI ChatML enumeration with one provider-neutral
/// addition (`Tool`) used for tool-call result messages. Per-provider
/// adapters translate this enum to the wire encoding that provider
/// expects (e.g. Anthropic's `system` becomes a top-level field, not a
/// message; Gemini uses `user`/`model`/`function`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Role {
    System,
    User,
    Assistant,
    Tool,
}

impl Role {
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::System => "system",
            Self::User => "user",
            Self::Assistant => "assistant",
            Self::Tool => "tool",
        }
    }
}

/// One chat message in a conversation.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Message {
    pub role: Role,
    pub content: String,
    /// Optional tool-call identifier when role == Tool. Per-provider
    /// adapters thread this back to the correct tool call ID.
    pub tool_call_id: Option<String>,
}

impl Message {
    pub fn user(content: impl Into<String>) -> Self {
        Self { role: Role::User, content: content.into(), tool_call_id: None }
    }
    pub fn assistant(content: impl Into<String>) -> Self {
        Self { role: Role::Assistant, content: content.into(), tool_call_id: None }
    }
    pub fn system(content: impl Into<String>) -> Self {
        Self { role: Role::System, content: content.into(), tool_call_id: None }
    }
}

/// A tool the model may invoke during the response.
///
/// `parameters_json` is the JSON Schema describing the parameter shape;
/// each provider serialises it with its own envelope.
#[derive(Debug, Clone, PartialEq)]
pub struct ToolSpec {
    pub name: String,
    pub description: String,
    pub parameters_json: String,
}

/// Provider-feature discovery enum — used by [`Backend::supports`] so
/// adopters can ask "does this backend support X for this model?"
/// without parsing model strings themselves.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Capability {
    Streaming,
    ToolUse,
    Vision,
    /// Anthropic prompt caching (cache_control breakpoints).
    PromptCaching,
    /// Gemini safetySettings on the request body.
    SafetySettings,
    /// OpenAI structured outputs (response_format=json_schema).
    StructuredOutput,
    /// Provider hard-codes sampling parameters (Kimi K2.x, o1, o3).
    LockedParams,
}

/// One canonical chat request — provider-neutral. Per-provider adapters
/// translate to the wire JSON the provider expects.
#[derive(Debug, Clone, Default)]
pub struct ChatRequest {
    /// Empty string → backend uses its `default_model()`.
    pub model: String,
    pub messages: Vec<Message>,
    /// System prompt — Anthropic puts it in a top-level field; OpenAI &
    /// compats prepend a system message to the messages array.
    pub system: Option<String>,
    pub max_tokens: Option<u32>,
    /// Temperature; ignored when the resolved model is locked-params.
    pub temperature: Option<f64>,
    pub top_p: Option<f64>,
    pub tools: Vec<ToolSpec>,
    /// `false` → call `complete()`. `true` → call `stream()` and consume
    /// the chunk stream incrementally.
    pub stream: bool,
    /// Trace ID propagated from the calling flow step. Surfaces in
    /// tracing spans so log lines correlate.
    pub trace_id: Option<String>,
}

/// How the model decided to stop generating.
///
/// Maps the provider-specific finish-reason strings to a closed enum
/// callers can `match` on. Unmapped values land in `Other(s)` so the
/// raw string is still recoverable for diagnostics.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FinishReason {
    /// Natural end of generation (Anthropic `end_turn`, OpenAI `stop`,
    /// Gemini `STOP`).
    Stop,
    /// Hit `max_tokens` budget (Anthropic `max_tokens`, OpenAI `length`,
    /// Gemini `MAX_TOKENS`).
    Length,
    /// Model invoked a tool (Anthropic `tool_use`, OpenAI `tool_calls`).
    ToolUse,
    /// Provider's content filter blocked output (OpenAI `content_filter`,
    /// Gemini `SAFETY`, Anthropic empty + `end_turn`).
    SafetyBreach,
    /// Anything else; carries the raw provider string.
    Other(String),
}

impl FinishReason {
    /// Map a raw provider string into the enum.
    pub fn from_provider(provider: &str, raw: &str) -> Self {
        let lc = raw.to_ascii_lowercase();
        match (provider, lc.as_str()) {
            ("anthropic", "end_turn") => Self::Stop,
            ("anthropic", "max_tokens") => Self::Length,
            ("anthropic", "tool_use") => Self::ToolUse,
            ("anthropic", "stop_sequence") => Self::Stop,
            (_, "stop") => Self::Stop,
            (_, "length") => Self::Length,
            (_, "tool_calls") | (_, "function_call") => Self::ToolUse,
            (_, "content_filter") => Self::SafetyBreach,
            // Gemini uses upper-case slugs.
            (_, "max_tokens") => Self::Length,
            (_, "safety") => Self::SafetyBreach,
            (_, "") => Self::Other(String::new()),
            _ => Self::Other(raw.to_string()),
        }
    }

    /// True iff this finish reason is the provider's safety classifier
    /// blocking output. Used by `BackendError::SafetyBreach` lifting.
    pub fn is_safety_breach(&self) -> bool {
        matches!(self, Self::SafetyBreach)
    }
}

/// Token-usage breakdown returned by the provider. Field naming is
/// canonical (input/output/total); per-provider deltas (cache reads on
/// Anthropic, reasoning tokens on o1/o3) live in dedicated fields so
/// aggregating dashboards across providers stay coherent.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Usage {
    pub input_tokens: u32,
    pub output_tokens: u32,
    pub total_tokens: u32,
    /// Anthropic prompt-cache hit (`cache_read_input_tokens`).
    pub cache_read_tokens: u32,
    /// Anthropic prompt-cache write (`cache_creation_input_tokens`).
    pub cache_creation_tokens: u32,
    /// OpenAI o1/o3 reasoning-token allocation (`reasoning_tokens`).
    pub reasoning_tokens: u32,
}

/// A complete chat response from a non-streaming `complete()` call.
#[derive(Debug, Clone)]
pub struct ChatResponse {
    pub content: String,
    /// Resolved model slug — what the provider actually returned the
    /// response from (may differ from request when an alias was sent).
    pub model_name: String,
    /// Provider name (`"anthropic"`, `"openai"`, etc.).
    pub provider_name: String,
    pub finish_reason: FinishReason,
    pub usage: Usage,
    /// Number of retries that fired before this success. 0 on a clean
    /// first-attempt response.
    pub retry_count: u32,
    /// Trace ID echoed back from the request (or auto-generated if the
    /// request omitted one).
    pub trace_id: String,
}

/// One delta in a streaming response.
///
/// `delta` is the incremental text fragment for this chunk. `finish_reason`
/// + `usage` are populated only on the final chunk so consumers can
/// compute totals without keeping a running tally.
#[derive(Debug, Clone, Default)]
pub struct ChatChunk {
    pub delta: String,
    pub finish_reason: Option<FinishReason>,
    pub usage: Option<Usage>,
}

/// Pinned, boxed stream alias — the concrete return type of
/// [`Backend::stream`]. Adopters consume via `futures::StreamExt`.
pub type ChatStream =
    Pin<Box<dyn Stream<Item = Result<ChatChunk, BackendError>> + Send>>;

// ────────────────────────────────────────────────────────────────────
//  Backend trait — the per-provider contract
// ────────────────────────────────────────────────────────────────────

/// One LLM provider's native Rust client.
///
/// Implementors live in `axon-rs/src/backends/<provider>.rs` and are
/// registered into [`Registry`] at process startup. The trait is
/// object-safe (D1 — uses `async_trait`) so registries can hold
/// `Box<dyn Backend>` for runtime dispatch by name.
#[async_trait]
pub trait Backend: Send + Sync {
    /// Short provider name used as the registry key.
    /// E.g. `"anthropic"`, `"openai"`, `"kimi"`.
    fn name(&self) -> &str;

    /// Default model used when [`ChatRequest::model`] is empty.
    fn default_model(&self) -> &str;

    /// Synchronous-result chat completion (non-streaming).
    async fn complete(&self, request: ChatRequest) -> Result<ChatResponse, BackendError>;

    /// Streaming chat completion. Adopter consumes the returned stream;
    /// per-chunk text arrives in `ChatChunk::delta`, finish reason +
    /// usage in the final chunk.
    async fn stream(&self, request: ChatRequest) -> Result<ChatStream, BackendError>;

    /// Best-effort token count for `text` against a specific model on
    /// this provider. Default impl delegates to the unified
    /// [`tokens::count_tokens`] dispatch; per-provider overrides may
    /// consult the provider's HTTP `count_tokens` endpoint when an
    /// exact answer is required + a network round-trip is acceptable.
    fn count_tokens(&self, model: &str, text: &str) -> usize {
        tokens::count_tokens(model, text).count
    }

    /// Capability discovery — does this backend support `capability`
    /// for the given model? Default returns `false` for everything;
    /// per-provider impls override.
    #[allow(unused_variables)]
    fn supports(&self, capability: Capability, model: &str) -> bool {
        false
    }
}

// ────────────────────────────────────────────────────────────────────
//  Registry — string-keyed dispatch by provider name
// ────────────────────────────────────────────────────────────────────

/// Process-wide registry of registered backends.
///
/// Backends are registered by their canonical short name (the same
/// string the Python `BACKEND_REGISTRY` uses — verified by the
/// Fase 24.j drift gate). Lookup is `O(1)` HashMap.
pub struct Registry {
    backends: HashMap<String, Box<dyn Backend>>,
}

impl Registry {
    /// Empty registry — useful for tests that want to register only
    /// stub backends.
    pub fn empty() -> Self {
        Self { backends: HashMap::new() }
    }

    /// Production registry — populated with all 7 native backends.
    ///
    /// Every backend is constructed via its `from_env()` factory — i.e.
    /// API keys are read at registry-construction time from the
    /// per-provider env vars (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`,
    /// `GEMINI_API_KEY`, `KIMI_API_KEY`, `GLM_API_KEY`, `OPENROUTER_API_KEY`,
    /// plus `OLLAMA_HOST` / `OLLAMA_API_KEY` for the local daemon).
    /// Backends whose env var is missing still construct successfully;
    /// the auth check fires on the first `complete()` call instead.
    ///
    /// The registry's `provider_names()` returns the sorted list of all
    /// 7 keys: `["anthropic", "gemini", "glm", "kimi", "ollama",
    /// "openai", "openrouter"]`. The Fase 24.j drift gate
    /// (`tests/test_fase24_backend_parity.py`) asserts this set
    /// matches Python's `BACKEND_REGISTRY` keys exactly.
    pub fn production() -> Self {
        let mut registry = Self::empty();
        registry.register(Box::new(anthropic::AnthropicBackend::from_env()));
        registry.register(Box::new(gemini::GeminiBackend::from_env()));
        registry.register(Box::new(glm::GLMBackend::from_env()));
        registry.register(Box::new(kimi::KimiBackend::from_env()));
        registry.register(Box::new(ollama::OllamaBackend::from_env()));
        registry.register(Box::new(openai::OpenAIBackend::from_env()));
        registry.register(Box::new(openrouter::OpenRouterBackend::from_env()));
        registry
    }

    /// Register `backend` under the key `backend.name()`. Replaces any
    /// existing entry with the same name (last-write-wins).
    pub fn register(&mut self, backend: Box<dyn Backend>) {
        self.backends.insert(backend.name().to_string(), backend);
    }

    /// Look up a backend by name. Returns `None` if not registered.
    pub fn get(&self, name: &str) -> Option<&dyn Backend> {
        self.backends.get(name).map(|b| b.as_ref())
    }

    /// All registered provider names, sorted alphabetically. Used by
    /// the cross-stack drift gate (Fase 24.j) to verify the Rust set
    /// equals the Python `BACKEND_REGISTRY` set.
    pub fn provider_names(&self) -> Vec<String> {
        let mut names: Vec<String> = self.backends.keys().cloned().collect();
        names.sort();
        names
    }

    pub fn len(&self) -> usize {
        self.backends.len()
    }

    pub fn is_empty(&self) -> bool {
        self.backends.is_empty()
    }
}

impl Default for Registry {
    fn default() -> Self {
        Self::production()
    }
}

// ────────────────────────────────────────────────────────────────────
//  Tests — trait + types + Registry
// ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use futures::StreamExt;

    /// Test-only stub that lets us exercise the Registry + trait without
    /// hitting a real provider.
    struct StubBackend {
        name: String,
    }

    #[async_trait]
    impl Backend for StubBackend {
        fn name(&self) -> &str {
            &self.name
        }
        fn default_model(&self) -> &str {
            "stub-model"
        }
        async fn complete(
            &self,
            _request: ChatRequest,
        ) -> Result<ChatResponse, BackendError> {
            Ok(ChatResponse {
                content: "stubbed".into(),
                model_name: "stub-model".into(),
                provider_name: self.name.clone(),
                finish_reason: FinishReason::Stop,
                usage: Usage::default(),
                retry_count: 0,
                trace_id: "stub".into(),
            })
        }
        async fn stream(
            &self,
            _request: ChatRequest,
        ) -> Result<ChatStream, BackendError> {
            let chunks = vec![
                Ok(ChatChunk { delta: "hi ".into(), ..Default::default() }),
                Ok(ChatChunk {
                    delta: "world".into(),
                    finish_reason: Some(FinishReason::Stop),
                    usage: Some(Usage { input_tokens: 1, output_tokens: 2, total_tokens: 3, ..Default::default() }),
                }),
            ];
            Ok(Box::pin(futures::stream::iter(chunks)))
        }
        fn supports(&self, capability: Capability, _model: &str) -> bool {
            matches!(capability, Capability::Streaming)
        }
    }

    fn stub(name: &str) -> Box<dyn Backend> {
        Box::new(StubBackend { name: name.to_string() })
    }

    #[test]
    fn role_round_trips_via_as_str() {
        for r in [Role::System, Role::User, Role::Assistant, Role::Tool] {
            assert!(!r.as_str().is_empty());
        }
        assert_eq!(Role::User.as_str(), "user");
    }

    #[test]
    fn message_helpers_set_role() {
        assert_eq!(Message::user("a").role, Role::User);
        assert_eq!(Message::assistant("b").role, Role::Assistant);
        assert_eq!(Message::system("c").role, Role::System);
    }

    #[test]
    fn chat_request_default_is_empty() {
        let r = ChatRequest::default();
        assert!(r.model.is_empty());
        assert!(r.messages.is_empty());
        assert!(r.tools.is_empty());
        assert!(!r.stream);
    }

    #[test]
    fn finish_reason_anthropic_mapping() {
        assert_eq!(FinishReason::from_provider("anthropic", "end_turn"), FinishReason::Stop);
        assert_eq!(FinishReason::from_provider("anthropic", "max_tokens"), FinishReason::Length);
        assert_eq!(FinishReason::from_provider("anthropic", "tool_use"), FinishReason::ToolUse);
        assert_eq!(FinishReason::from_provider("anthropic", "stop_sequence"), FinishReason::Stop);
    }

    #[test]
    fn finish_reason_openai_mapping() {
        assert_eq!(FinishReason::from_provider("openai", "stop"), FinishReason::Stop);
        assert_eq!(FinishReason::from_provider("openai", "length"), FinishReason::Length);
        assert_eq!(FinishReason::from_provider("openai", "tool_calls"), FinishReason::ToolUse);
        assert_eq!(FinishReason::from_provider("openai", "content_filter"), FinishReason::SafetyBreach);
    }

    #[test]
    fn finish_reason_gemini_mapping_uppercase() {
        // Gemini emits SAFETY / MAX_TOKENS / STOP — case-folded.
        assert_eq!(FinishReason::from_provider("gemini", "STOP"), FinishReason::Stop);
        assert_eq!(FinishReason::from_provider("gemini", "MAX_TOKENS"), FinishReason::Length);
        assert_eq!(FinishReason::from_provider("gemini", "SAFETY"), FinishReason::SafetyBreach);
    }

    #[test]
    fn finish_reason_unknown_preserves_raw() {
        let r = FinishReason::from_provider("openai", "weird_signal");
        assert_eq!(r, FinishReason::Other("weird_signal".into()));
    }

    #[test]
    fn finish_reason_safety_breach_predicate() {
        assert!(FinishReason::SafetyBreach.is_safety_breach());
        assert!(!FinishReason::Stop.is_safety_breach());
        assert!(!FinishReason::Other("anything".into()).is_safety_breach());
    }

    #[test]
    fn registry_empty_then_register() {
        let mut r = Registry::empty();
        assert_eq!(r.len(), 0);
        r.register(stub("anthropic"));
        assert_eq!(r.len(), 1);
        assert!(r.get("anthropic").is_some());
        assert!(r.get("openai").is_none());
    }

    #[test]
    fn registry_provider_names_sorted() {
        let mut r = Registry::empty();
        r.register(stub("openai"));
        r.register(stub("anthropic"));
        r.register(stub("gemini"));
        assert_eq!(
            r.provider_names(),
            vec!["anthropic".to_string(), "gemini".to_string(), "openai".to_string()]
        );
    }

    #[test]
    fn registry_replace_on_duplicate_register() {
        let mut r = Registry::empty();
        r.register(stub("anthropic"));
        r.register(stub("anthropic"));
        assert_eq!(r.len(), 1); // last-write-wins
    }

    #[tokio::test]
    async fn stub_complete_returns_response() {
        let b = StubBackend { name: "stub".into() };
        let resp = b.complete(ChatRequest::default()).await.unwrap();
        assert_eq!(resp.content, "stubbed");
        assert_eq!(resp.provider_name, "stub");
        assert_eq!(resp.finish_reason, FinishReason::Stop);
    }

    #[tokio::test]
    async fn stub_stream_yields_chunks() {
        let b = StubBackend { name: "stub".into() };
        let stream = b.stream(ChatRequest::default()).await.unwrap();
        let chunks: Vec<_> = stream.collect().await;
        assert_eq!(chunks.len(), 2);
        let first = chunks[0].as_ref().unwrap();
        assert_eq!(first.delta, "hi ");
        assert!(first.finish_reason.is_none());
        let last = chunks[1].as_ref().unwrap();
        assert_eq!(last.delta, "world");
        assert!(matches!(last.finish_reason, Some(FinishReason::Stop)));
        let usage = last.usage.as_ref().unwrap();
        assert_eq!(usage.total_tokens, 3);
    }

    #[tokio::test]
    async fn registry_dispatches_to_correct_backend() {
        let mut r = Registry::empty();
        r.register(stub("anthropic"));
        r.register(stub("openai"));
        let b = r.get("openai").expect("openai registered");
        let resp = b.complete(ChatRequest::default()).await.unwrap();
        assert_eq!(resp.provider_name, "openai");
    }

    #[test]
    fn supports_capability_default_false() {
        struct DefaultBackend;
        #[async_trait]
        impl Backend for DefaultBackend {
            fn name(&self) -> &str {
                "default"
            }
            fn default_model(&self) -> &str {
                ""
            }
            async fn complete(
                &self,
                _r: ChatRequest,
            ) -> Result<ChatResponse, BackendError> {
                unreachable!()
            }
            async fn stream(
                &self,
                _r: ChatRequest,
            ) -> Result<ChatStream, BackendError> {
                unreachable!()
            }
        }
        let b = DefaultBackend;
        assert!(!b.supports(Capability::Streaming, "anything"));
        assert!(!b.supports(Capability::ToolUse, "anything"));
    }

    #[test]
    fn count_tokens_default_uses_unified_dispatch() {
        let b = StubBackend { name: "stub".into() };
        // The stub doesn't override count_tokens, so the trait default
        // delegates to tokens::count_tokens — same model dispatch as
        // the standalone function.
        let n = b.count_tokens("gpt-4o-mini", "hello world");
        assert!(n > 0);
    }
}