Skip to main content

axon/
runtime_flags.rs

1//! §Fase 33.x.h — Process-wide runtime opt-in flags.
2//!
3//! Adopter-tunable runtime behaviors that DEFAULT to OFF (to
4//! preserve v1.24.0 wire byte-compat) and can be flipped ON for
5//! production-time experimentation or vertical-aware enterprise
6//! enhancements.
7//!
8//! # Why not on `ServerConfig`?
9//!
10//! ServerConfig is constructed in 29+ call sites across the test
11//! suite; adding fields there means a one-time-but-broad churn.
12//! Process-wide flags are simpler for OSS opt-in features that
13//! don't affect the wire format or the auth surface. The
14//! `std::sync::Mutex<bool>` indirection serializes read+write so
15//! there's no torn-write under concurrent test access.
16//!
17//! # D9 contract (Fase 33.x cycle)
18//!
19//! [`tokenizer_fallback_enabled`] gates the BPE-tokenized chunking
20//! that replaces the legacy whitespace 3-word grouping on the SSE
21//! LEGACY path. Defaults to OFF — the wire body stays byte-
22//! identical with v1.24.0 + with 33.x.b-g for adopters that
23//! don't opt in.
24//!
25//! When ON + the LEGACY path activates (flow shape unsupported,
26//! backend unknown, etc.), each step's full output goes through
27//! `axon_csys::tokens::cl100k_base()` and one StepToken event is
28//! emitted per BPE-token-decode-boundary. Adopter sees ~1-token
29//! granularity that matches real provider chunk size on English
30//! prose; non-English degrades to UTF-8-replacement chars at
31//! invalid token-boundary slices (rare in practice).
32//!
33//! # Test isolation
34//!
35//! Tests that toggle the flag use the `tokenizer_fallback_guard`
36//! RAII helper or the `with_tokenizer_fallback` scoped runner.
37//! Both restore the previous flag value on drop, so a test that
38//! crashes mid-body doesn't leak state into the next test.
39
40use std::sync::Mutex;
41
42/// Process-wide flag — OFF by default. `std::sync::Mutex` (not
43/// `AtomicBool`) so the test-side guard can atomically capture the
44/// previous value during set + restore it on drop without races.
45static TOKENIZER_FALLBACK: Mutex<bool> = Mutex::new(false);
46
47/// Read the current flag value. Cheap — single Mutex acquisition.
48/// Called once per `run_streaming_legacy_path` chunking decision
49/// (per-flow, not per-token), so the lock contention is
50/// negligible.
51pub fn tokenizer_fallback_enabled() -> bool {
52    *TOKENIZER_FALLBACK
53        .lock()
54        .expect("tokenizer_fallback flag mutex poisoned")
55}
56
57/// Set the flag explicitly. Returns the previous value so callers
58/// can restore it (the [`TokenizerFallbackGuard`] RAII helper does
59/// this automatically).
60pub fn set_tokenizer_fallback(enabled: bool) -> bool {
61    let mut g = TOKENIZER_FALLBACK
62        .lock()
63        .expect("tokenizer_fallback flag mutex poisoned");
64    let prev = *g;
65    *g = enabled;
66    prev
67}
68
69/// RAII guard that restores the flag to its previous value when
70/// dropped. Use in tests to scope a flag mutation to a single
71/// `#[tokio::test]` body:
72///
73/// ```ignore
74/// let _guard = TokenizerFallbackGuard::set(true);
75/// // ... test body with flag enabled ...
76/// // guard drops here → flag restored.
77/// ```
78pub struct TokenizerFallbackGuard {
79    previous: bool,
80}
81
82impl TokenizerFallbackGuard {
83    /// Set the flag to `enabled` and capture the previous value
84    /// for restoration on drop.
85    pub fn set(enabled: bool) -> Self {
86        let previous = set_tokenizer_fallback(enabled);
87        Self { previous }
88    }
89}
90
91impl Drop for TokenizerFallbackGuard {
92    fn drop(&mut self) {
93        set_tokenizer_fallback(self.previous);
94    }
95}
96
97// ────────────────────────────────────────────────────────────────────
98//  §Fase 33.z.e — Streaming-via-dispatcher flag RETIRED
99// ────────────────────────────────────────────────────────────────────
100//
101// Pre-33.z.e this module exposed `streaming_via_dispatcher_enabled()`
102// + `set_streaming_via_dispatcher(bool)` + `StreamingViaDispatcherGuard`
103// for the feature-flagged dispatcher graft (33.z.b alpha, 33.z.c
104// stable default-on). 33.z.e DELETES all three symbols — the
105// dispatcher is the unconditional production path; there is no
106// opt-out.
107//
108// Any downstream crate that called `set_streaming_via_dispatcher(...)`
109// hits an explicit compile error at the v1.26.0 → v1.27.0 upgrade —
110// the intended failure shape for the deprecation cycle started in
111// 33.y.l and closed here.
112
113// ────────────────────────────────────────────────────────────────────
114//  Tokenizer-aware chunking helper
115// ────────────────────────────────────────────────────────────────────
116
117/// §Fase 33.x.h — Tokenize `text` into BPE chunks via
118/// `axon_csys::tokens::cl100k_base()` and return one `String` per
119/// token (or per safe UTF-8 boundary group when a single token
120/// produces non-UTF-8 bytes).
121///
122/// # When this fires
123///
124/// Called from `run_streaming_legacy_path` ONLY when
125/// [`tokenizer_fallback_enabled`] returns `true`. The default
126/// behavior (flag OFF) preserves the v1.24.0 whitespace 3-word
127/// chunking — adopters that don't opt in see identical wire
128/// behavior.
129///
130/// # Fallback semantics
131///
132/// If tokenizer construction or encoding fails (cl100k_base()
133/// returns a `BpeError`), the function returns an empty Vec; the
134/// caller falls back to whitespace chunking. NO panic, NO silent
135/// success on a bad tokenizer — the adopter sees the same wire
136/// shape as flag-off.
137///
138/// # UTF-8 boundary safety
139///
140/// BPE tokens can split mid-codepoint (e.g., a single Chinese
141/// character may take multiple tokens). For each token's decoded
142/// bytes we use `String::from_utf8_lossy` which substitutes
143/// U+FFFD for invalid sequences. Adopters on non-Latin scripts
144/// may see replacement chars when tokens land mid-codepoint;
145/// for English prose this never happens in practice.
146pub fn bpe_chunk_text(text: &str) -> Vec<String> {
147    if text.is_empty() {
148        return Vec::new();
149    }
150    let tokenizer = match axon_csys::tokens::cl100k_base() {
151        Ok(t) => t,
152        Err(_) => return Vec::new(),
153    };
154    let token_ids = match tokenizer.encode_ordinary(text) {
155        Ok(ids) => ids,
156        Err(_) => return Vec::new(),
157    };
158    let mut chunks = Vec::with_capacity(token_ids.len());
159    for id in &token_ids {
160        let bytes = match tokenizer.decode_bytes(&[*id]) {
161            Ok(b) => b,
162            Err(_) => continue,
163        };
164        // `String::from_utf8_lossy` substitutes U+FFFD for invalid
165        // UTF-8 sequences (mid-codepoint token splits). For most
166        // English prose tokens are entire words or word-fragments,
167        // never split codepoints.
168        let s = String::from_utf8_lossy(&bytes).to_string();
169        if !s.is_empty() {
170            chunks.push(s);
171        }
172    }
173    chunks
174}
175
176// ────────────────────────────────────────────────────────────────────
177//  Tests
178// ────────────────────────────────────────────────────────────────────
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    /// Serialize all flag-mutating tests via a shared Mutex.
185    /// The lock is held for the duration of the test body so the
186    /// flag's value during this test isn't observed by parallel
187    /// tests. Tests that don't touch the flag don't need this
188    /// guard — `tokenizer_fallback_enabled()` always returns the
189    /// default false outside flag-mutation scopes.
190    static FLAG_TEST_LOCK: Mutex<()> = Mutex::new(());
191
192    #[test]
193    fn flag_default_is_off() {
194        let _serial = FLAG_TEST_LOCK
195            .lock()
196            .unwrap_or_else(|p| p.into_inner());
197        // Defensive: another test may have left the flag ON if
198        // its panic happened before drop. Reset.
199        set_tokenizer_fallback(false);
200        assert!(!tokenizer_fallback_enabled());
201    }
202
203    #[test]
204    fn set_returns_previous_value() {
205        let _serial = FLAG_TEST_LOCK
206            .lock()
207            .unwrap_or_else(|p| p.into_inner());
208        set_tokenizer_fallback(false);
209        let prev = set_tokenizer_fallback(true);
210        assert!(!prev);
211        let prev = set_tokenizer_fallback(false);
212        assert!(prev);
213    }
214
215    #[test]
216    fn guard_restores_flag_on_drop() {
217        let _serial = FLAG_TEST_LOCK
218            .lock()
219            .unwrap_or_else(|p| p.into_inner());
220        set_tokenizer_fallback(false);
221        {
222            let _g = TokenizerFallbackGuard::set(true);
223            assert!(tokenizer_fallback_enabled());
224        }
225        assert!(!tokenizer_fallback_enabled(), "guard must restore on drop");
226    }
227
228    #[test]
229    fn guard_restores_to_previous_not_default() {
230        let _serial = FLAG_TEST_LOCK
231            .lock()
232            .unwrap_or_else(|p| p.into_inner());
233        set_tokenizer_fallback(true);
234        {
235            let _g = TokenizerFallbackGuard::set(false);
236            assert!(!tokenizer_fallback_enabled());
237        }
238        assert!(
239            tokenizer_fallback_enabled(),
240            "guard restores to PREVIOUS (true), not default (false)"
241        );
242        // Cleanup.
243        set_tokenizer_fallback(false);
244    }
245
246    #[test]
247    fn bpe_chunk_empty_text_returns_empty_vec() {
248        let chunks = bpe_chunk_text("");
249        assert!(chunks.is_empty());
250    }
251
252    #[test]
253    fn bpe_chunk_english_produces_token_level_granularity() {
254        // "Hello world" via cl100k_base ⇒ ~2 tokens
255        // ("Hello" + " world"). Compare to whitespace chunking
256        // (which would emit 1 chunk for "Hello world" via
257        // chunks(3) of [Hello, world]).
258        let chunks = bpe_chunk_text("Hello world");
259        // BPE for English usually yields 1 token per word; we
260        // assert ≥1 to remain robust against tokenizer-vocab
261        // updates that may merge or split.
262        assert!(
263            !chunks.is_empty(),
264            "BPE on 'Hello world' must produce ≥1 chunk"
265        );
266        // Concat round-trip preserves content.
267        let joined: String = chunks.join("");
268        assert_eq!(joined, "Hello world");
269    }
270
271    #[test]
272    fn bpe_chunk_finer_than_whitespace_for_long_text() {
273        // Long English prose: whitespace chunks(3) groups 3 words
274        // at a time; BPE chunks ≥1 token per word. BPE should
275        // produce strictly more chunks for non-trivial text.
276        let text = "The quick brown fox jumps over the lazy dog repeatedly.";
277        let word_chunk_count = text.split_whitespace().count().div_ceil(3);
278        let bpe_chunks = bpe_chunk_text(text);
279        assert!(
280            bpe_chunks.len() > word_chunk_count,
281            "BPE ({}) must be finer than whitespace chunks-of-3 ({})",
282            bpe_chunks.len(),
283            word_chunk_count
284        );
285        // Round-trip content preservation.
286        let joined: String = bpe_chunks.join("");
287        assert_eq!(joined, text);
288    }
289
290    #[test]
291    fn bpe_chunk_round_trip_preserves_content() {
292        // Round-trip pin: joining all BPE chunks reconstructs the
293        // original text byte-for-byte (modulo non-UTF-8 tokens
294        // which substitute U+FFFD).
295        let text = "axon for axon — four-pillar streaming language.";
296        let chunks = bpe_chunk_text(text);
297        let joined: String = chunks.join("");
298        assert_eq!(joined, text);
299    }
300}