axon/runtime_flags.rs
1//! §Fase 33.x.h — Process-wide runtime opt-in flags.
2//!
3//! Adopter-tunable runtime behaviors that DEFAULT to OFF (to
4//! preserve v1.24.0 wire byte-compat) and can be flipped ON for
5//! production-time experimentation or vertical-aware enterprise
6//! enhancements.
7//!
8//! # Why not on `ServerConfig`?
9//!
10//! ServerConfig is constructed in 29+ call sites across the test
11//! suite; adding fields there means a one-time-but-broad churn.
12//! Process-wide flags are simpler for OSS opt-in features that
13//! don't affect the wire format or the auth surface. The
14//! `std::sync::Mutex<bool>` indirection serializes read+write so
15//! there's no torn-write under concurrent test access.
16//!
17//! # D9 contract (Fase 33.x cycle)
18//!
19//! [`tokenizer_fallback_enabled`] gates the BPE-tokenized chunking
20//! that replaces the legacy whitespace 3-word grouping on the SSE
21//! LEGACY path. Defaults to OFF — the wire body stays byte-
22//! identical with v1.24.0 + with 33.x.b-g for adopters that
23//! don't opt in.
24//!
25//! When ON + the LEGACY path activates (flow shape unsupported,
26//! backend unknown, etc.), each step's full output goes through
27//! `axon_csys::tokens::cl100k_base()` and one StepToken event is
28//! emitted per BPE-token-decode-boundary. Adopter sees ~1-token
29//! granularity that matches real provider chunk size on English
30//! prose; non-English degrades to UTF-8-replacement chars at
31//! invalid token-boundary slices (rare in practice).
32//!
33//! # Test isolation
34//!
35//! Tests that toggle the flag use the `tokenizer_fallback_guard`
36//! RAII helper or the `with_tokenizer_fallback` scoped runner.
37//! Both restore the previous flag value on drop, so a test that
38//! crashes mid-body doesn't leak state into the next test.
39
40use std::sync::Mutex;
41
42/// Process-wide flag — OFF by default. `std::sync::Mutex` (not
43/// `AtomicBool`) so the test-side guard can atomically capture the
44/// previous value during set + restore it on drop without races.
45static TOKENIZER_FALLBACK: Mutex<bool> = Mutex::new(false);
46
47/// Read the current flag value. Cheap — single Mutex acquisition.
48/// Called once per `run_streaming_legacy_path` chunking decision
49/// (per-flow, not per-token), so the lock contention is
50/// negligible.
51pub fn tokenizer_fallback_enabled() -> bool {
52 *TOKENIZER_FALLBACK
53 .lock()
54 .expect("tokenizer_fallback flag mutex poisoned")
55}
56
57/// Set the flag explicitly. Returns the previous value so callers
58/// can restore it (the [`TokenizerFallbackGuard`] RAII helper does
59/// this automatically).
60pub fn set_tokenizer_fallback(enabled: bool) -> bool {
61 let mut g = TOKENIZER_FALLBACK
62 .lock()
63 .expect("tokenizer_fallback flag mutex poisoned");
64 let prev = *g;
65 *g = enabled;
66 prev
67}
68
69/// RAII guard that restores the flag to its previous value when
70/// dropped. Use in tests to scope a flag mutation to a single
71/// `#[tokio::test]` body:
72///
73/// ```ignore
74/// let _guard = TokenizerFallbackGuard::set(true);
75/// // ... test body with flag enabled ...
76/// // guard drops here → flag restored.
77/// ```
78pub struct TokenizerFallbackGuard {
79 previous: bool,
80}
81
82impl TokenizerFallbackGuard {
83 /// Set the flag to `enabled` and capture the previous value
84 /// for restoration on drop.
85 pub fn set(enabled: bool) -> Self {
86 let previous = set_tokenizer_fallback(enabled);
87 Self { previous }
88 }
89}
90
91impl Drop for TokenizerFallbackGuard {
92 fn drop(&mut self) {
93 set_tokenizer_fallback(self.previous);
94 }
95}
96
97// ────────────────────────────────────────────────────────────────────
98// §Fase 33.z.e — Streaming-via-dispatcher flag RETIRED
99// ────────────────────────────────────────────────────────────────────
100//
101// Pre-33.z.e this module exposed `streaming_via_dispatcher_enabled()`
102// + `set_streaming_via_dispatcher(bool)` + `StreamingViaDispatcherGuard`
103// for the feature-flagged dispatcher graft (33.z.b alpha, 33.z.c
104// stable default-on). 33.z.e DELETES all three symbols — the
105// dispatcher is the unconditional production path; there is no
106// opt-out.
107//
108// Any downstream crate that called `set_streaming_via_dispatcher(...)`
109// hits an explicit compile error at the v1.26.0 → v1.27.0 upgrade —
110// the intended failure shape for the deprecation cycle started in
111// 33.y.l and closed here.
112
113// ────────────────────────────────────────────────────────────────────
114// Tokenizer-aware chunking helper
115// ────────────────────────────────────────────────────────────────────
116
117/// §Fase 33.x.h — Tokenize `text` into BPE chunks via
118/// `axon_csys::tokens::cl100k_base()` and return one `String` per
119/// token (or per safe UTF-8 boundary group when a single token
120/// produces non-UTF-8 bytes).
121///
122/// # When this fires
123///
124/// Called from `run_streaming_legacy_path` ONLY when
125/// [`tokenizer_fallback_enabled`] returns `true`. The default
126/// behavior (flag OFF) preserves the v1.24.0 whitespace 3-word
127/// chunking — adopters that don't opt in see identical wire
128/// behavior.
129///
130/// # Fallback semantics
131///
132/// If tokenizer construction or encoding fails (cl100k_base()
133/// returns a `BpeError`), the function returns an empty Vec; the
134/// caller falls back to whitespace chunking. NO panic, NO silent
135/// success on a bad tokenizer — the adopter sees the same wire
136/// shape as flag-off.
137///
138/// # UTF-8 boundary safety
139///
140/// BPE tokens can split mid-codepoint (e.g., a single Chinese
141/// character may take multiple tokens). For each token's decoded
142/// bytes we use `String::from_utf8_lossy` which substitutes
143/// U+FFFD for invalid sequences. Adopters on non-Latin scripts
144/// may see replacement chars when tokens land mid-codepoint;
145/// for English prose this never happens in practice.
146pub fn bpe_chunk_text(text: &str) -> Vec<String> {
147 if text.is_empty() {
148 return Vec::new();
149 }
150 let tokenizer = match axon_csys::tokens::cl100k_base() {
151 Ok(t) => t,
152 Err(_) => return Vec::new(),
153 };
154 let token_ids = match tokenizer.encode_ordinary(text) {
155 Ok(ids) => ids,
156 Err(_) => return Vec::new(),
157 };
158 let mut chunks = Vec::with_capacity(token_ids.len());
159 for id in &token_ids {
160 let bytes = match tokenizer.decode_bytes(&[*id]) {
161 Ok(b) => b,
162 Err(_) => continue,
163 };
164 // `String::from_utf8_lossy` substitutes U+FFFD for invalid
165 // UTF-8 sequences (mid-codepoint token splits). For most
166 // English prose tokens are entire words or word-fragments,
167 // never split codepoints.
168 let s = String::from_utf8_lossy(&bytes).to_string();
169 if !s.is_empty() {
170 chunks.push(s);
171 }
172 }
173 chunks
174}
175
176// ────────────────────────────────────────────────────────────────────
177// Tests
178// ────────────────────────────────────────────────────────────────────
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183
184 /// Serialize all flag-mutating tests via a shared Mutex.
185 /// The lock is held for the duration of the test body so the
186 /// flag's value during this test isn't observed by parallel
187 /// tests. Tests that don't touch the flag don't need this
188 /// guard — `tokenizer_fallback_enabled()` always returns the
189 /// default false outside flag-mutation scopes.
190 static FLAG_TEST_LOCK: Mutex<()> = Mutex::new(());
191
192 #[test]
193 fn flag_default_is_off() {
194 let _serial = FLAG_TEST_LOCK
195 .lock()
196 .unwrap_or_else(|p| p.into_inner());
197 // Defensive: another test may have left the flag ON if
198 // its panic happened before drop. Reset.
199 set_tokenizer_fallback(false);
200 assert!(!tokenizer_fallback_enabled());
201 }
202
203 #[test]
204 fn set_returns_previous_value() {
205 let _serial = FLAG_TEST_LOCK
206 .lock()
207 .unwrap_or_else(|p| p.into_inner());
208 set_tokenizer_fallback(false);
209 let prev = set_tokenizer_fallback(true);
210 assert!(!prev);
211 let prev = set_tokenizer_fallback(false);
212 assert!(prev);
213 }
214
215 #[test]
216 fn guard_restores_flag_on_drop() {
217 let _serial = FLAG_TEST_LOCK
218 .lock()
219 .unwrap_or_else(|p| p.into_inner());
220 set_tokenizer_fallback(false);
221 {
222 let _g = TokenizerFallbackGuard::set(true);
223 assert!(tokenizer_fallback_enabled());
224 }
225 assert!(!tokenizer_fallback_enabled(), "guard must restore on drop");
226 }
227
228 #[test]
229 fn guard_restores_to_previous_not_default() {
230 let _serial = FLAG_TEST_LOCK
231 .lock()
232 .unwrap_or_else(|p| p.into_inner());
233 set_tokenizer_fallback(true);
234 {
235 let _g = TokenizerFallbackGuard::set(false);
236 assert!(!tokenizer_fallback_enabled());
237 }
238 assert!(
239 tokenizer_fallback_enabled(),
240 "guard restores to PREVIOUS (true), not default (false)"
241 );
242 // Cleanup.
243 set_tokenizer_fallback(false);
244 }
245
246 #[test]
247 fn bpe_chunk_empty_text_returns_empty_vec() {
248 let chunks = bpe_chunk_text("");
249 assert!(chunks.is_empty());
250 }
251
252 #[test]
253 fn bpe_chunk_english_produces_token_level_granularity() {
254 // "Hello world" via cl100k_base ⇒ ~2 tokens
255 // ("Hello" + " world"). Compare to whitespace chunking
256 // (which would emit 1 chunk for "Hello world" via
257 // chunks(3) of [Hello, world]).
258 let chunks = bpe_chunk_text("Hello world");
259 // BPE for English usually yields 1 token per word; we
260 // assert ≥1 to remain robust against tokenizer-vocab
261 // updates that may merge or split.
262 assert!(
263 !chunks.is_empty(),
264 "BPE on 'Hello world' must produce ≥1 chunk"
265 );
266 // Concat round-trip preserves content.
267 let joined: String = chunks.join("");
268 assert_eq!(joined, "Hello world");
269 }
270
271 #[test]
272 fn bpe_chunk_finer_than_whitespace_for_long_text() {
273 // Long English prose: whitespace chunks(3) groups 3 words
274 // at a time; BPE chunks ≥1 token per word. BPE should
275 // produce strictly more chunks for non-trivial text.
276 let text = "The quick brown fox jumps over the lazy dog repeatedly.";
277 let word_chunk_count = text.split_whitespace().count().div_ceil(3);
278 let bpe_chunks = bpe_chunk_text(text);
279 assert!(
280 bpe_chunks.len() > word_chunk_count,
281 "BPE ({}) must be finer than whitespace chunks-of-3 ({})",
282 bpe_chunks.len(),
283 word_chunk_count
284 );
285 // Round-trip content preservation.
286 let joined: String = bpe_chunks.join("");
287 assert_eq!(joined, text);
288 }
289
290 #[test]
291 fn bpe_chunk_round_trip_preserves_content() {
292 // Round-trip pin: joining all BPE chunks reconstructs the
293 // original text byte-for-byte (modulo non-UTF-8 tokens
294 // which substitute U+FFFD).
295 let text = "axon for axon — four-pillar streaming language.";
296 let chunks = bpe_chunk_text(text);
297 let joined: String = chunks.join("");
298 assert_eq!(joined, text);
299 }
300}