aprender-serve 0.50.0

Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
//! GGUF utility functions
//!
//! Private helpers used across GGUF modules for parsing, tensor manipulation,
//! and inference operations.

use std::sync::OnceLock;

// ============================================================================
// Verbose mode helper
// ============================================================================

/// Check if verbose mode is enabled (REALIZAR_VERBOSE=1)
/// Default is quiet - only errors are printed
#[allow(dead_code)]
pub(crate) fn verbose() -> bool {
    static VERBOSE: OnceLock<bool> = OnceLock::new();
    *VERBOSE.get_or_init(|| std::env::var("REALIZAR_VERBOSE").is_ok())
}

// ============================================================================
// GPT-2 BPE Unicode utilities
// ============================================================================

/// Convert GPT-2 style byte-level BPE unicode character back to raw byte.
///
/// GPT-2's byte-level BPE uses a mapping where:
/// - Printable ASCII (0x21-0x7E) and Latin-1 (0xA1-0xAC, 0xAE-0xFF) map to themselves
/// - Other bytes (0x00-0x20, 0x7F-0xA0, 0xAD) map to U+0100-U+0143
///
/// This function returns the original byte value for a GPT-2 BPE token character.
#[inline]
pub(crate) fn gpt2_unicode_to_byte(c: char) -> Option<u8> {
    let cp = c as u32;

    // Special encoded bytes: U+0100 to U+0143 map back to non-printable/special bytes
    if (0x0100..=0x0143).contains(&cp) {
        let offset = (cp - 0x0100) as u8;
        // The special bytes in order: 0x00-0x20 (0-32), then 0x7F (33), then 0x80-0xA0 (34-66), then 0xAD (67)
        let byte = if offset <= 32 {
            offset // 0x00-0x20
        } else if offset == 33 {
            0x7F // DEL
        } else if offset <= 66 {
            0x80 + (offset - 34) // 0x80-0xA0
        } else {
            0xAD // Soft hyphen
        };
        Some(byte)
    } else if cp <= 0xFF {
        // Direct mapping for printable chars
        Some(cp as u8)
    } else {
        None
    }
}

/// Convert a raw byte to its GPT-2 style byte-level BPE unicode character.
///
/// This is the EXACT inverse of [`gpt2_unicode_to_byte`] (HuggingFace/GPT-2
/// `bytes_to_unicode`). PMAT-855 uses it on the ENCODE direction so the serve
/// HF tokenizer emits the same byte-level glyphs the vocab/merges were trained
/// on, instead of the SentencePiece/GGUF `<0xNN>` byte-fallback form (which
/// never exists in a GPT-2/Qwen vocab and was silently dropped).
///
/// Mapping (the GPT-2 staircase, inverse of `gpt2_unicode_to_byte`):
/// - Printable ASCII `0x21-0x7E` and Latin-1 `0xA1-0xAC`, `0xAE-0xFF` map to `char(b)`.
/// - The remaining bytes `0x00-0x20`, `0x7F-0xA0`, `0xAD` map to `U+0100 + n`,
///   where `n` is the byte's position in the ordered set of "other" bytes:
///   `0x00-0x20` → n=0..32, `0x7F` → n=33, `0x80-0xA0` → n=34..66, `0xAD` → n=67.
#[inline]
pub(crate) fn gpt2_byte_to_unicode(b: u8) -> char {
    let cp: u32 = match b {
        // Directly-mapped printable ranges: char == byte.
        0x21..=0x7E | 0xA1..=0xAC | 0xAE..=0xFF => u32::from(b),
        // "Other" bytes are remapped onto the U+0100.. staircase, in order.
        0x00..=0x20 => 0x0100 + u32::from(b), // n = 0..32
        0x7F => 0x0121,                       // n = 33 (DEL)
        0x80..=0xA0 => 0x0100 + 34 + u32::from(b - 0x80), // n = 34..66
        0xAD => 0x0143,                       // n = 67 (soft hyphen)
    };
    char::from_u32(cp).unwrap_or('\u{FFFD}')
}

/// Decode a GPT-2 style byte-level BPE token to raw bytes.
///
/// Each character in the token may represent either a direct byte (printable ASCII/Latin-1)
/// or an encoded byte (using Unicode codepoints U+0100-U+0143).
#[allow(dead_code)]
pub(crate) fn decode_gpt2_token_to_bytes(token: &str) -> Vec<u8> {
    token.chars().filter_map(gpt2_unicode_to_byte).collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_verbose_default_false() {
        // Unless REALIZAR_VERBOSE is set, should be false
        // Note: This may be true if env var is set during testing
        let _ = verbose(); // Just verify it doesn't panic
    }

    #[test]
    fn test_gpt2_unicode_to_byte_printable() {
        // Printable ASCII maps to itself
        assert_eq!(gpt2_unicode_to_byte('A'), Some(0x41));
        assert_eq!(gpt2_unicode_to_byte('z'), Some(0x7A));
        assert_eq!(gpt2_unicode_to_byte('!'), Some(0x21));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_special() {
        // Special encoded bytes U+0100-U+0143
        assert_eq!(gpt2_unicode_to_byte('\u{0100}'), Some(0x00)); // NUL
        assert_eq!(gpt2_unicode_to_byte('\u{0120}'), Some(0x20)); // Space (offset 32)
        assert_eq!(gpt2_unicode_to_byte('\u{0121}'), Some(0x7F)); // DEL (offset 33)
    }

    #[test]
    fn test_decode_gpt2_token() {
        let token = "Hello";
        let bytes = decode_gpt2_token_to_bytes(token);
        assert_eq!(bytes, b"Hello");
    }

    #[test]
    fn test_decode_gpt2_token_with_special() {
        // Token with encoded space character
        let bytes = decode_gpt2_token_to_bytes("A\u{0120}B");
        assert_eq!(bytes, vec![0x41, 0x20, 0x42]); // A, space, B
    }

    #[test]
    fn test_gpt2_unicode_to_byte_null() {
        // NUL character (0x00) encoded as U+0100
        assert_eq!(gpt2_unicode_to_byte('\u{0100}'), Some(0x00));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_tab() {
        // TAB (0x09) encoded as U+0109
        assert_eq!(gpt2_unicode_to_byte('\u{0109}'), Some(0x09));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_newline() {
        // Newline (0x0A) encoded as U+010A
        assert_eq!(gpt2_unicode_to_byte('\u{010A}'), Some(0x0A));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_carriage_return() {
        // CR (0x0D) encoded as U+010D
        assert_eq!(gpt2_unicode_to_byte('\u{010D}'), Some(0x0D));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_latin1() {
        // Latin-1 characters (0xA1-0xAC, 0xAE-0xFF) map to themselves
        assert_eq!(gpt2_unicode_to_byte('¡'), Some(0xA1)); // Inverted exclamation
        assert_eq!(gpt2_unicode_to_byte('ñ'), Some(0xF1)); // N with tilde
        assert_eq!(gpt2_unicode_to_byte('ÿ'), Some(0xFF)); // Y with diaeresis
    }

    #[test]
    fn test_gpt2_unicode_to_byte_extended_special() {
        // Extended special range (0x80-0xA0)
        assert_eq!(gpt2_unicode_to_byte('\u{0122}'), Some(0x80)); // offset 34 -> 0x80
        assert_eq!(gpt2_unicode_to_byte('\u{0142}'), Some(0xA0)); // offset 66 -> 0xA0
    }

    #[test]
    fn test_gpt2_unicode_to_byte_soft_hyphen() {
        // Soft hyphen (0xAD) encoded as U+0143
        assert_eq!(gpt2_unicode_to_byte('\u{0143}'), Some(0xAD));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_out_of_range() {
        // Characters outside the valid range return None
        assert_eq!(gpt2_unicode_to_byte('\u{0200}'), None);
        assert_eq!(gpt2_unicode_to_byte('\u{1000}'), None);
        assert_eq!(gpt2_unicode_to_byte('🎉'), None);
    }

    #[test]
    fn test_decode_gpt2_token_empty() {
        let bytes = decode_gpt2_token_to_bytes("");
        assert!(bytes.is_empty());
    }

    #[test]
    fn test_decode_gpt2_token_numbers() {
        let bytes = decode_gpt2_token_to_bytes("12345");
        assert_eq!(bytes, b"12345");
    }

    #[test]
    fn test_decode_gpt2_token_punctuation() {
        let bytes = decode_gpt2_token_to_bytes("!@#$%");
        assert_eq!(bytes, b"!@#$%");
    }

    #[test]
    fn test_decode_gpt2_token_mixed() {
        // Mix of printable ASCII and encoded special chars
        let bytes = decode_gpt2_token_to_bytes("Hello\u{010A}World");
        assert_eq!(
            bytes,
            vec![0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x57, 0x6F, 0x72, 0x6C, 0x64]
        );
    }

    #[test]
    fn test_decode_gpt2_token_filters_invalid() {
        // Characters that can't be decoded are filtered out
        let bytes = decode_gpt2_token_to_bytes("A🎉B");
        assert_eq!(bytes, vec![0x41, 0x42]); // A, B - emoji filtered
    }

    #[test]
    fn test_gpt2_unicode_to_byte_all_printable_ascii() {
        // Test all printable ASCII (0x21-0x7E)
        for byte in 0x21u8..=0x7E {
            let c = byte as char;
            assert_eq!(gpt2_unicode_to_byte(c), Some(byte));
        }
    }

    #[test]
    fn test_verbose_returns_consistent() {
        // Calling verbose multiple times should return the same value
        let first = verbose();
        let second = verbose();
        assert_eq!(first, second);
    }

    // =========================================================================
    // PMAT-855: gpt2_byte_to_unicode is the exact inverse of gpt2_unicode_to_byte
    // =========================================================================

    /// PMAT-855 falsifier (round-trip): the ENCODE map must be the exact inverse
    /// of the DECODE map for every byte 0..=255. Discharges C-GPT2BPE-ENC-001.
    #[test]
    fn falsify_gpt2_byte_to_unicode_roundtrip_all_bytes() {
        for b in 0u8..=255 {
            let c = gpt2_byte_to_unicode(b);
            assert_eq!(
                gpt2_unicode_to_byte(c),
                Some(b),
                "byte 0x{:02X} -> '{}' (U+{:04X}) did not invert back to itself",
                b,
                c,
                c as u32,
            );
        }
    }

    #[test]
    fn test_gpt2_byte_to_unicode_known_points() {
        // Latin-1 self-mapped: 'é' = UTF-8 [0xC3, 0xA9] -> 'Ã' + '©'
        assert_eq!(gpt2_byte_to_unicode(0xC3), 'Ã');
        assert_eq!(gpt2_byte_to_unicode(0xA9), '©');
        // Printable ASCII self-maps
        assert_eq!(gpt2_byte_to_unicode(b'A'), 'A');
        assert_eq!(gpt2_byte_to_unicode(b'!'), '!');
        // Staircase boundaries
        assert_eq!(gpt2_byte_to_unicode(0x00), '\u{0100}'); // NUL
        assert_eq!(gpt2_byte_to_unicode(b' '), '\u{0120}'); // Space -> Ġ
        assert_eq!(gpt2_byte_to_unicode(b'\n'), '\u{010A}'); // Newline -> Ċ
        assert_eq!(gpt2_byte_to_unicode(b'\t'), '\u{0109}'); // Tab -> ĉ
        assert_eq!(gpt2_byte_to_unicode(0x7F), '\u{0121}'); // DEL
        assert_eq!(gpt2_byte_to_unicode(0x80), '\u{0122}'); // 0x80
        assert_eq!(gpt2_byte_to_unicode(0xA0), '\u{0142}'); // 0xA0
        assert_eq!(gpt2_byte_to_unicode(0xAD), '\u{0143}'); // soft hyphen
    }

    // =========================================================================
    // gpt2_unicode_to_byte: exhaustive branch coverage
    // =========================================================================

    #[test]
    fn test_gpt2_unicode_to_byte_all_special_offsets_0_to_32() {
        // Offsets 0..=32 map to bytes 0x00..=0x20
        for offset in 0u32..=32 {
            let cp = 0x0100 + offset;
            let c = char::from_u32(cp).expect("c");
            assert_eq!(
                gpt2_unicode_to_byte(c),
                Some(offset as u8),
                "Offset {} should map to byte 0x{:02X}",
                offset,
                offset
            );
        }
    }

    #[test]
    fn test_gpt2_unicode_to_byte_offset_33_is_del() {
        // Offset 33 maps to 0x7F (DEL)
        let c = char::from_u32(0x0100 + 33).expect("c");
        assert_eq!(gpt2_unicode_to_byte(c), Some(0x7F));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_offsets_34_to_66() {
        // Offsets 34..=66 map to 0x80..=0xA0
        for offset in 34u32..=66 {
            let cp = 0x0100 + offset;
            let c = char::from_u32(cp).expect("c");
            let expected = (0x80 + (offset - 34)) as u8;
            assert_eq!(
                gpt2_unicode_to_byte(c),
                Some(expected),
                "Offset {} should map to byte 0x{:02X}",
                offset,
                expected
            );
        }
    }

    #[test]
    fn test_gpt2_unicode_to_byte_offset_67_is_soft_hyphen() {
        // Offset 67 (U+0143) maps to 0xAD (soft hyphen)
        let c = char::from_u32(0x0100 + 67).expect("c");
        assert_eq!(gpt2_unicode_to_byte(c), Some(0xAD));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_boundary_at_0x20() {
        // 0x20 = space = offset 32 (last in the 0..=32 range)
        let c = char::from_u32(0x0120).expect("c");
        assert_eq!(gpt2_unicode_to_byte(c), Some(0x20));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_boundary_at_0x80() {
        // 0x80 = offset 34 (first in the 34..=66 range)
        let c = char::from_u32(0x0122).expect("c");
        assert_eq!(gpt2_unicode_to_byte(c), Some(0x80));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_boundary_at_0xa0() {
        // 0xA0 = offset 66 (last in the 34..=66 range)
        let c = char::from_u32(0x0142).expect("c");
        assert_eq!(gpt2_unicode_to_byte(c), Some(0xA0));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_direct_low_ascii() {
        // Direct mapping: NUL (0x00) doesn't go through direct path,
        // but 0x21 ('!') is the start of direct-mapped printable ASCII
        assert_eq!(gpt2_unicode_to_byte('\x21'), Some(0x21));
        // 0x7E ('~') is the end
        assert_eq!(gpt2_unicode_to_byte('\x7E'), Some(0x7E));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_latin1_extended_range() {
        // Latin-1 0xA1 to 0xAC (direct mapping)
        assert_eq!(gpt2_unicode_to_byte('\u{00A1}'), Some(0xA1));
        assert_eq!(gpt2_unicode_to_byte('\u{00AC}'), Some(0xAC));
        // Latin-1 0xAE to 0xFF (direct mapping)
        assert_eq!(gpt2_unicode_to_byte('\u{00AE}'), Some(0xAE));
        assert_eq!(gpt2_unicode_to_byte('\u{00FF}'), Some(0xFF));
    }

    #[test]
    fn test_gpt2_unicode_to_byte_just_above_range() {
        // U+0144 is just above the special range (0x0100..=0x0143)
        // and also above 0xFF, so should return None
        assert_eq!(gpt2_unicode_to_byte('\u{0144}'), None);
    }

    #[test]
    fn test_gpt2_unicode_to_byte_just_below_special_range() {
        // U+00FF is the last character in the direct-mapping range
        assert_eq!(gpt2_unicode_to_byte('\u{00FF}'), Some(0xFF));
    }

    // =========================================================================
    // decode_gpt2_token_to_bytes: additional patterns
    // =========================================================================

    #[test]
    fn test_decode_gpt2_token_all_special() {
        // Token with all special-encoded bytes (NUL, TAB, LF, CR, SPACE)
        let token = "\u{0100}\u{0109}\u{010A}\u{010D}\u{0120}";
        let bytes = decode_gpt2_token_to_bytes(token);
        assert_eq!(bytes, vec![0x00, 0x09, 0x0A, 0x0D, 0x20]);
    }

    #[test]
    fn test_decode_gpt2_token_extended_special_bytes() {
        // Token with 0x80 and 0xA0 encoded chars
        let token = "\u{0122}\u{0142}";
        let bytes = decode_gpt2_token_to_bytes(token);
        assert_eq!(bytes, vec![0x80, 0xA0]);
    }

    #[test]
    fn test_decode_gpt2_token_soft_hyphen() {
        let token = "A\u{0143}B";
        let bytes = decode_gpt2_token_to_bytes(token);
        assert_eq!(bytes, vec![0x41, 0xAD, 0x42]);
    }

    #[test]
    fn test_decode_gpt2_token_del_character() {
        let token = "X\u{0121}Y";
        let bytes = decode_gpt2_token_to_bytes(token);
        assert_eq!(bytes, vec![0x58, 0x7F, 0x59]); // X, DEL, Y
    }

    #[test]
    fn test_decode_gpt2_token_only_invalid() {
        // All characters are above U+0143 and above 0xFF, so all filtered out
        let bytes = decode_gpt2_token_to_bytes("\u{0200}\u{0300}\u{0400}");
        assert!(bytes.is_empty());
    }

    #[test]
    fn test_decode_gpt2_token_latin1_direct() {
        // Latin-1 characters that map directly
        let token = "\u{00F1}\u{00E9}"; // ñ, é
        let bytes = decode_gpt2_token_to_bytes(token);
        assert_eq!(bytes, vec![0xF1, 0xE9]);
    }

    #[test]
    fn test_decode_gpt2_token_single_char() {
        assert_eq!(decode_gpt2_token_to_bytes("A"), vec![0x41]);
    }

    #[test]
    fn test_decode_gpt2_token_whitespace_and_text() {
        // Encoded space + text + encoded newline
        let token = "\u{0120}Hello\u{010A}";
        let bytes = decode_gpt2_token_to_bytes(token);
        assert_eq!(bytes, vec![0x20, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A]);
    }
}