entelix-core 0.5.4

entelix DAG root — IR, codecs, transports, Tool trait + ToolRegistry, auth, ExecutionContext, ModelInvocation/ToolInvocation Service spine, StreamAggregator
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
//! `TokenCounter` — operator-supplied token-count surface.
//!
//! Tokens are the unit budget caps and chunk boundaries are denominated
//! in. The vendor's wire-level usage report (the `Usage` block on a
//! `ModelResponse`) gives the *post-flight* count. `TokenCounter` is
//! the *pre-flight* counterpart — what operators reach for when:
//!
//! - **`RunBudget` pre-flight check** — refuse a request whose
//!   estimated input would already exceed the configured input-token
//!   cap, before paying the round-trip cost.
//! - **RAG chunking** — `entelix-rag::TokenCountSplitter` slices a
//!   document so each chunk lands under the model's per-message
//!   ceiling.
//! - **Content-economy budgeting** — system prompt + tools + history
//!   sum estimation when assembling a request.
//!
//! Vendor-accurate counters live in companion crates —
//! [`entelix-tokenizer-tiktoken`](https://docs.rs/entelix-tokenizer-tiktoken)
//! for the OpenAI BPE family (`cl100k_base`, `o200k_base`,
//! `p50k_base`, `r50k_base`) and
//! [`entelix-tokenizer-hf`](https://docs.rs/entelix-tokenizer-hf)
//! for HuggingFace tokenizer.json sources (Llama, Qwen, Mistral,
//! DeepSeek, Gemma, Phi, …). Future companions cover locale-aware
//! morphological accuracy for Korean and Japanese.
//! [`ByteCountTokenCounter`] ships in core as a zero-dependency
//! conservative default — accurate enough for development scaffolding,
//! never for production budgeting on non-English content.

use std::sync::Arc;

use crate::ir::Message;

/// Counts tokens for budget enforcement, splitter sizing, and
/// content-economy estimation.
///
/// Synchronous by contract — counters that need IO (remote tokenizer
/// service, lazy file-backed model) should pre-load eagerly at
/// construction or wrap the slow path in
/// `tokio::task::spawn_blocking` at the *call* site rather than
/// hiding `.await` inside the trait. Mirrors the
/// [`crate::time::Clock`] discipline: low-level primitives stay
/// pure so they compose with locks and hot paths.
pub trait TokenCounter: Send + Sync + std::fmt::Debug {
    /// Count the tokens in `text` under this counter's encoding.
    fn count(&self, text: &str) -> u64;

    /// Sum the token count across every text-bearing content part
    /// of a message slice. The default impl walks
    /// [`crate::ir::ContentPart::Text`] parts; non-text parts (image,
    /// tool-use, tool-result blocks) are vendor-specific in their
    /// token cost — counters that need an exact tally for those
    /// shapes override this method.
    fn count_messages(&self, msgs: &[Message]) -> u64 {
        msgs.iter()
            .flat_map(|m| m.content.iter())
            .filter_map(|part| match part {
                crate::ir::ContentPart::Text { text, .. } => Some(text.as_str()),
                _ => None,
            })
            .map(|t| self.count(t))
            .sum()
    }

    /// Vendor-published encoding name (`"cl100k_base"`,
    /// `"o200k_base"`, `"claude"`, `"gemini-tokenizer"`, …) — surfaced
    /// on OTel `gen_ai.tokenizer.name` and operator diagnostics.
    fn encoding_name(&self) -> &'static str;
}

impl<T: TokenCounter + ?Sized> TokenCounter for Arc<T> {
    fn count(&self, text: &str) -> u64 {
        (**self).count(text)
    }
    fn count_messages(&self, msgs: &[Message]) -> u64 {
        (**self).count_messages(msgs)
    }
    fn encoding_name(&self) -> &'static str {
        (**self).encoding_name()
    }
}

/// Zero-dependency conservative counter — `bytes.div_ceil(4)`.
///
/// Approximates English at the ~4-bytes-per-token rule of thumb that
/// tiktoken's `cl100k_base` is built around. **Systematically
/// inaccurate** for CJK, Devanagari, Arabic, and other scripts whose
/// UTF-8 byte cost diverges from typical token boundaries — operators
/// shipping multilingual workloads inject a vendor-accurate counter
/// (`entelix-tokenizer-tiktoken`, `entelix-tokenizer-hf`, locale-aware
/// companions) at `ChatModel::with_token_counter(...)` time.
///
/// The bias direction is deliberate: `div_ceil` rounds up, so the
/// estimate skews *over* the real count on average. Pre-flight
/// `RunBudget` checks built on top remain conservative — a
/// near-budget call is more likely refused than admitted, which is
/// the correct error direction for budget enforcement.
#[derive(Clone, Copy, Debug, Default)]
pub struct ByteCountTokenCounter;

impl ByteCountTokenCounter {
    /// Construct the counter. Stateless — every call to [`Self::new`]
    /// returns the same logical instance.
    #[must_use]
    pub const fn new() -> Self {
        Self
    }
}

impl TokenCounter for ByteCountTokenCounter {
    fn count(&self, text: &str) -> u64 {
        // `usize::div_ceil` only stabilised in 1.73 — the workspace
        // pins 1.95 so the direct call is fine. `u64::from` over an
        // `as` cast keeps the lossless-conversion lint happy.
        u64::from(u32::try_from(text.len().div_ceil(4)).unwrap_or(u32::MAX))
    }

    fn encoding_name(&self) -> &'static str {
        "byte-count-naive"
    }
}

/// Routing table from `(provider, model)` pairs to a vendor-accurate
/// [`TokenCounter`].
///
/// Operators registering one counter per `(provider, model_prefix)`
/// pair drive the [`Self::resolve`] lookup; gateways (one process
/// fronting many model handles) read every chat-side dispatch
/// through one shared registry. The facade ships
/// `default_token_counter_registry()` factory (feature-gated on
/// `tokenizer-tiktoken` / `tokenizer-hf`) with the OpenAI BPE
/// pre-populated; operators extend it with their HuggingFace
/// `tokenizer.json` bytes.
///
/// ## Matching algorithm
///
/// Entries match when the provider name is **exactly equal** and the
/// model name **starts with** the registered prefix. Among all
/// matching entries, the one with the longest prefix wins — so
/// registering both `"gpt-4"` and `"gpt-4o"` routes
/// `"gpt-4o-mini"` to the `"gpt-4o"` entry without depending on
/// registration order. Ties on prefix length resolve to the
/// last-registered entry (operator-overridable). Misses fall through
/// to the registry's fallback counter.
///
/// ## Why prefix matching
///
/// Vendors version models with stable family prefixes (`gpt-4o-*`,
/// `claude-sonnet-*`, `gemini-1.5-*`). Exact-name matching forces
/// the operator to update the registry on every minor model release;
/// prefix matching absorbs new patch revisions silently into the
/// same tokenizer mapping the family uses. Regex was considered and
/// rejected — too expressive, and the typical mistake is *missing* a
/// model, which prefix matching handles by falling through to the
/// fallback rather than misrouting silently.
pub struct TokenCounterRegistry {
    entries: Vec<RegistryEntry>,
    fallback: Arc<dyn TokenCounter>,
}

struct RegistryEntry {
    provider: &'static str,
    model_prefix: &'static str,
    counter: Arc<dyn TokenCounter>,
}

impl TokenCounterRegistry {
    /// Construct an empty registry with [`ByteCountTokenCounter`] as
    /// the fallback. Add entries with [`Self::register`]; replace
    /// the fallback with [`Self::with_default`].
    #[must_use]
    pub fn new() -> Self {
        Self {
            entries: Vec::new(),
            fallback: Arc::new(ByteCountTokenCounter::new()),
        }
    }

    /// Replace the fallback counter used when no entry matches.
    /// Default is [`ByteCountTokenCounter`] (conservative — biased
    /// to over-count so pre-flight budget checks fail closed).
    #[must_use]
    pub fn with_default(mut self, fallback: Arc<dyn TokenCounter>) -> Self {
        self.fallback = fallback;
        self
    }

    /// Append a `(provider, model_prefix) → counter` entry. Multiple
    /// entries for the same provider partition the model space by
    /// prefix; the longest-prefix match wins at lookup time.
    /// Ties on prefix length resolve to the last-registered entry,
    /// so a later `register` call overrides an earlier one for the
    /// same `(provider, model_prefix)` pair.
    #[must_use]
    pub fn register(
        mut self,
        provider: &'static str,
        model_prefix: &'static str,
        counter: Arc<dyn TokenCounter>,
    ) -> Self {
        self.entries.push(RegistryEntry {
            provider,
            model_prefix,
            counter,
        });
        self
    }

    /// Resolve `(provider, model)` to a counter. The returned
    /// [`Resolution`] makes match vs. fallback visible at the type
    /// level so callers branch on the miss without inspecting
    /// `encoding_name()` after the fact (invariant 15 — silent
    /// fallback is not an option). See [`Resolution`] for the
    /// pattern-match shape and the deliberate "accept fallback"
    /// idiom.
    #[must_use]
    pub fn resolve(&self, provider: &str, model: &str) -> Resolution {
        let mut best: Option<&RegistryEntry> = None;
        for entry in &self.entries {
            if entry.provider != provider {
                continue;
            }
            if !model.starts_with(entry.model_prefix) {
                continue;
            }
            match best {
                Some(prev) if prev.model_prefix.len() > entry.model_prefix.len() => {}
                _ => best = Some(entry),
            }
        }
        match best {
            Some(entry) => Resolution::Matched(Arc::clone(&entry.counter)),
            None => Resolution::Fallback(Arc::clone(&self.fallback)),
        }
    }

    /// Number of registered `(provider, model_prefix)` entries.
    /// Excludes the fallback. Operators wiring a `tracing::info!` on
    /// boot read this to confirm the table is the expected size.
    #[must_use]
    pub fn len(&self) -> usize {
        self.entries.len()
    }

    /// Whether the registry has no registered entries (the fallback
    /// is always present).
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.entries.is_empty()
    }
}

impl Default for TokenCounterRegistry {
    fn default() -> Self {
        Self::new()
    }
}

/// Outcome of [`TokenCounterRegistry::resolve`] — surfaces whether a
/// registered entry actually matched `(provider, model)` or the
/// registry's fallback counter was returned because no entry matched.
///
/// Making fallback visible at the type level lets production cost
/// estimation paths log / alert on unknown models without inspecting
/// `encoding_name()` after the fact (invariant 15 — silent fallback is
/// not an option). Callers consume the resolution via pattern match —
/// there is no "give me the counter regardless" shortcut, because
/// such a shortcut would re-enable silent absorption of the fallback.
///
/// ```ignore
/// let counter = match registry.resolve("openai", "gpt-9") {
///     Resolution::Matched(c) => c,
///     Resolution::Fallback(c) => {
///         tracing::warn!("unknown model — using fallback counter");
///         c
///     }
/// };
/// ```
///
/// Operators that intentionally accept the fallback bind both arms to
/// the same name (`Resolution::Matched(c) | Resolution::Fallback(c) => c`).
/// The pattern is short, and the call site signals the deliberate
/// choice in source.
///
/// ## Future variants
///
/// `Resolution` is `#[non_exhaustive]`. The two predicates
/// [`Self::is_match`] and [`Self::is_fallback`] each return `true`
/// only for their own variant — `!is_match()` and `is_fallback()`
/// are **not** equivalent under future variant additions. Operators
/// that want to alert on the no-match branch should match on
/// `Self::Fallback` explicitly.
#[derive(Clone)]
#[non_exhaustive]
pub enum Resolution {
    /// A registered `(provider, model_prefix)` entry matched.
    Matched(Arc<dyn TokenCounter>),
    /// No entry matched; the registry's fallback counter is returned.
    Fallback(Arc<dyn TokenCounter>),
}

impl Resolution {
    /// Borrow the resolved counter regardless of which branch matched.
    /// Operators that need the owned `Arc` consume the enum through a
    /// pattern match.
    #[must_use]
    pub fn counter(&self) -> &Arc<dyn TokenCounter> {
        match self {
            Self::Matched(c) | Self::Fallback(c) => c,
        }
    }

    /// `true` when a registered entry matched.
    #[must_use]
    pub const fn is_match(&self) -> bool {
        matches!(self, Self::Matched(_))
    }

    /// `true` when no entry matched and the fallback was returned.
    /// Distinct from `!is_match()` — `Resolution` is `non_exhaustive`,
    /// so a future variant could make both predicates return `false`.
    #[must_use]
    pub const fn is_fallback(&self) -> bool {
        matches!(self, Self::Fallback(_))
    }
}

impl std::fmt::Debug for Resolution {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let (kind, encoding) = match self {
            Self::Matched(c) => ("Matched", c.encoding_name()),
            Self::Fallback(c) => ("Fallback", c.encoding_name()),
        };
        f.debug_struct(kind).field("encoding", &encoding).finish()
    }
}

impl std::fmt::Debug for TokenCounterRegistry {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let entries: Vec<(&'static str, &'static str, &'static str)> = self
            .entries
            .iter()
            .map(|e| (e.provider, e.model_prefix, e.counter.encoding_name()))
            .collect();
        f.debug_struct("TokenCounterRegistry")
            .field("entries", &entries)
            .field("fallback", &self.fallback.encoding_name())
            .finish()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::ir::{ContentPart, Role};

    #[test]
    fn byte_count_rounds_up() {
        let c = ByteCountTokenCounter::new();
        assert_eq!(c.count(""), 0, "empty string is zero");
        assert_eq!(c.count("a"), 1, "one byte rounds up to one token");
        assert_eq!(c.count("abcd"), 1, "exactly four bytes is one token");
        assert_eq!(c.count("abcde"), 2, "five bytes rounds up to two");
        assert_eq!(c.count("abcdefgh"), 2, "exactly eight bytes is two");
    }

    #[test]
    fn byte_count_handles_multibyte_utf8_at_byte_granularity() {
        // Korean "안녕" — 2 chars, 6 UTF-8 bytes → 2 tokens by the
        // div_ceil(4) heuristic. Real cl100k_base would tokenise
        // these as 2-3 tokens; the naive counter is documented as
        // approximate, not exact.
        let c = ByteCountTokenCounter::new();
        assert_eq!(c.count("안녕"), 2);
    }

    #[test]
    fn count_messages_sums_text_parts_only() {
        let counter = ByteCountTokenCounter::new();
        let msg = Message::new(
            Role::User,
            vec![
                ContentPart::text("hello world!"), // 12 bytes → 3 tokens
                ContentPart::text("xyz"),          // 3 bytes  → 1 token
            ],
        );
        assert_eq!(counter.count_messages(std::slice::from_ref(&msg)), 4);
    }

    #[test]
    fn count_messages_default_impl_skips_non_text_parts() {
        // Tool-use blocks etc. carry vendor-specific tokens; the
        // default counter walks Text parts only. A counter that
        // wants exact counting for tool-use shapes overrides
        // count_messages.
        let counter = ByteCountTokenCounter::new();
        let msg = Message::new(
            Role::Assistant,
            vec![
                ContentPart::text("hi"), // 2 bytes → 1 token
                ContentPart::ToolUse {
                    id: "call_1".into(),
                    name: "tool".into(),
                    input: serde_json::json!({}),
                    provider_echoes: Vec::new(),
                },
            ],
        );
        assert_eq!(counter.count_messages(std::slice::from_ref(&msg)), 1);
    }

    #[test]
    fn encoding_name_surfaces_for_otel_attribute() {
        assert_eq!(
            ByteCountTokenCounter::new().encoding_name(),
            "byte-count-naive"
        );
    }

    #[test]
    fn arc_blanket_impl_forwards() {
        let c: Arc<dyn TokenCounter> = Arc::new(ByteCountTokenCounter::new());
        assert_eq!(c.count("abcd"), 1);
        assert_eq!(c.encoding_name(), "byte-count-naive");
    }

    #[derive(Debug)]
    struct LabelledCounter(&'static str, u64);
    impl TokenCounter for LabelledCounter {
        fn count(&self, _text: &str) -> u64 {
            self.1
        }
        fn encoding_name(&self) -> &'static str {
            self.0
        }
    }

    fn labelled(name: &'static str, fixed: u64) -> Arc<dyn TokenCounter> {
        Arc::new(LabelledCounter(name, fixed))
    }

    #[test]
    fn registry_returns_fallback_when_empty() {
        let reg = TokenCounterRegistry::new();
        let resolution = reg.resolve("openai", "gpt-5");
        assert!(
            resolution.is_fallback(),
            "empty registry should fall through"
        );
        assert_eq!(resolution.counter().encoding_name(), "byte-count-naive");
    }

    #[test]
    fn registry_resolves_exact_provider_and_prefix() {
        let reg = TokenCounterRegistry::new().register("openai", "gpt-4o", labelled("o200k", 1));
        let resolution = reg.resolve("openai", "gpt-4o-mini");
        assert!(resolution.is_match(), "registered prefix should match");
        assert_eq!(resolution.counter().encoding_name(), "o200k");
    }

    #[test]
    fn registry_ignores_wrong_provider() {
        let reg =
            TokenCounterRegistry::new().register("anthropic", "claude", labelled("anthropic", 2));
        let resolution = reg.resolve("openai", "claude-clone");
        // Provider mismatch — fall through to fallback.
        assert!(resolution.is_fallback());
        assert_eq!(resolution.counter().encoding_name(), "byte-count-naive");
    }

    #[test]
    fn registry_longest_prefix_wins_regardless_of_registration_order() {
        // Register "gpt-4" first, then "gpt-4o" — longest-prefix wins
        // on "gpt-4o-mini" regardless of order.
        let reg = TokenCounterRegistry::new()
            .register("openai", "gpt-4", labelled("cl100k", 1))
            .register("openai", "gpt-4o", labelled("o200k", 1));
        assert_eq!(
            reg.resolve("openai", "gpt-4o-mini")
                .counter()
                .encoding_name(),
            "o200k"
        );

        // Reverse registration order — same outcome.
        let reg = TokenCounterRegistry::new()
            .register("openai", "gpt-4o", labelled("o200k", 1))
            .register("openai", "gpt-4", labelled("cl100k", 1));
        assert_eq!(
            reg.resolve("openai", "gpt-4o-mini")
                .counter()
                .encoding_name(),
            "o200k"
        );
    }

    #[test]
    fn registry_falls_through_to_fallback_on_non_matching_model() {
        let reg = TokenCounterRegistry::new().register("openai", "gpt-4o", labelled("o200k", 1));
        // Same provider, prefix doesn't match — fall through.
        let resolution = reg.resolve("openai", "davinci");
        assert!(resolution.is_fallback());
        assert_eq!(resolution.counter().encoding_name(), "byte-count-naive");
    }

    #[test]
    fn registry_last_wins_on_tie() {
        let reg = TokenCounterRegistry::new()
            .register("openai", "gpt-4", labelled("first", 1))
            .register("openai", "gpt-4", labelled("second", 1));
        assert_eq!(
            reg.resolve("openai", "gpt-4-turbo")
                .counter()
                .encoding_name(),
            "second"
        );
    }

    #[test]
    fn registry_with_default_replaces_fallback() {
        let reg = TokenCounterRegistry::new().with_default(labelled("custom-fb", 0));
        let resolution = reg.resolve("any", "x");
        assert!(resolution.is_fallback());
        assert_eq!(resolution.counter().encoding_name(), "custom-fb");
    }

    #[test]
    fn registry_len_excludes_fallback() {
        let reg = TokenCounterRegistry::new()
            .register("openai", "gpt-4", labelled("a", 1))
            .register("openai", "gpt-4o", labelled("b", 1));
        assert_eq!(reg.len(), 2);
        assert!(!reg.is_empty());
    }

    #[test]
    fn resolution_pattern_match_yields_counter() {
        let reg = TokenCounterRegistry::new().register("openai", "gpt-4o", labelled("o200k", 1));
        let counter = match reg.resolve("openai", "gpt-4o") {
            Resolution::Matched(c) | Resolution::Fallback(c) => c,
        };
        assert_eq!(counter.encoding_name(), "o200k");
    }

    #[test]
    fn resolution_match_and_fallback_are_distinguishable() {
        let reg = TokenCounterRegistry::new().register("openai", "gpt-4o", labelled("o200k", 1));
        let matched = reg.resolve("openai", "gpt-4o-mini");
        let fallback = reg.resolve("openai", "davinci-002");
        assert!(matched.is_match() && !matched.is_fallback());
        assert!(fallback.is_fallback() && !fallback.is_match());
    }
}