base_d/
bench.rs

1//! Benchmarking utilities for comparing encoding paths.
2//!
3//! This module exposes internal encoding paths for performance comparison:
4//! - Scalar: Pure Rust, no SIMD
5//! - LUT: SIMD with runtime lookup tables
6//! - Specialized: Hardcoded SIMD for known dictionaries
7//!
8//! # Example
9//!
10//! ```ignore
11//! use base_d::bench::{EncodingPath, encode_with_path, detect_available_paths};
12//!
13//! let dict = get_dictionary("base64");
14//! let paths = detect_available_paths(&dict);
15//!
16//! for path in paths {
17//!     let result = encode_with_path(data, &dict, path);
18//! }
19//! ```
20
21use crate::EncodingMode;
22use crate::core::dictionary::Dictionary;
23use crate::encoders::algorithms::{DecodeError, byte_range, radix};
24
25#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
26use crate::simd;
27
28/// Available encoding paths for benchmarking.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
30pub enum EncodingPath {
31    /// Pure scalar implementation (no SIMD)
32    Scalar,
33    /// SIMD with runtime LUT construction
34    Lut,
35    /// Hardcoded SIMD for known RFC dictionaries
36    Specialized,
37}
38
39impl std::fmt::Display for EncodingPath {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        match self {
42            EncodingPath::Scalar => write!(f, "Scalar"),
43            EncodingPath::Lut => write!(f, "LUT"),
44            EncodingPath::Specialized => write!(f, "Specialized"),
45        }
46    }
47}
48
49/// Platform capabilities for SIMD.
50#[derive(Debug, Clone)]
51pub struct PlatformInfo {
52    pub arch: &'static str,
53    pub simd_features: Vec<&'static str>,
54}
55
56impl PlatformInfo {
57    /// Detect current platform capabilities.
58    pub fn detect() -> Self {
59        let arch = std::env::consts::ARCH;
60        let mut simd_features = Vec::new();
61
62        #[cfg(target_arch = "x86_64")]
63        {
64            if is_x86_feature_detected!("avx512vbmi") {
65                simd_features.push("AVX-512 VBMI");
66            }
67            if is_x86_feature_detected!("avx2") {
68                simd_features.push("AVX2");
69            }
70            if is_x86_feature_detected!("ssse3") {
71                simd_features.push("SSSE3");
72            }
73        }
74
75        #[cfg(target_arch = "aarch64")]
76        {
77            // NEON is always available on aarch64
78            simd_features.push("NEON");
79        }
80
81        PlatformInfo {
82            arch,
83            simd_features,
84        }
85    }
86
87    /// Format as display string.
88    pub fn display(&self) -> String {
89        if self.simd_features.is_empty() {
90            self.arch.to_string()
91        } else {
92            format!("{} ({})", self.arch, self.simd_features.join(", "))
93        }
94    }
95}
96
97/// Information about a dictionary's benchmark capabilities.
98#[derive(Debug, Clone)]
99pub struct DictionaryBenchInfo {
100    pub name: String,
101    pub base: usize,
102    pub mode: EncodingMode,
103    pub available_paths: Vec<EncodingPath>,
104    pub supports_streaming: bool,
105}
106
107/// Detect which encoding paths are available for a dictionary.
108pub fn detect_available_paths(dict: &Dictionary) -> Vec<EncodingPath> {
109    let mut paths = vec![EncodingPath::Scalar]; // Scalar always available
110
111    #[cfg(feature = "simd")]
112    {
113        let base = dict.base();
114        let mode = dict.mode();
115
116        // Check if LUT path is available (power-of-2 base, ASCII chars)
117        if base.is_power_of_two() && base <= 256 {
118            // Check if all chars are ASCII
119            let all_ascii = (0..base).all(|i| {
120                dict.encode_digit(i)
121                    .map(|c| (c as u32) < 128)
122                    .unwrap_or(false)
123            });
124
125            if all_ascii && matches!(mode, EncodingMode::Chunked) {
126                paths.push(EncodingPath::Lut);
127            }
128        }
129
130        // Check if specialized path is available
131        if is_specialized_available(dict) {
132            paths.push(EncodingPath::Specialized);
133        }
134    }
135
136    paths
137}
138
139/// Check if a specialized SIMD path exists for this dictionary.
140#[cfg(feature = "simd")]
141fn is_specialized_available(dict: &Dictionary) -> bool {
142    use crate::simd::variants::{identify_base32_variant, identify_base64_variant};
143
144    let base = dict.base();
145
146    match base {
147        16 => {
148            // Check if it's standard hex (uppercase or lowercase)
149            let first_char = dict.encode_digit(10); // 'A' or 'a' position
150            matches!(first_char, Some('A') | Some('a'))
151        }
152        32 => identify_base32_variant(dict).is_some(),
153        64 => identify_base64_variant(dict).is_some(),
154        256 => matches!(dict.mode(), EncodingMode::Chunked | EncodingMode::ByteRange),
155        _ => false,
156    }
157}
158
159#[cfg(not(feature = "simd"))]
160fn is_specialized_available(_dict: &Dictionary) -> bool {
161    false
162}
163
164/// Encode using a specific path (for benchmarking).
165///
166/// Returns `None` if the path is not available for this dictionary.
167pub fn encode_with_path(data: &[u8], dict: &Dictionary, path: EncodingPath) -> Option<String> {
168    match path {
169        EncodingPath::Scalar => Some(encode_scalar(data, dict)),
170        EncodingPath::Lut => encode_lut(data, dict),
171        EncodingPath::Specialized => encode_specialized(data, dict),
172    }
173}
174
175/// Decode using a specific path (for benchmarking).
176///
177/// Returns `None` if the path is not available for this dictionary.
178pub fn decode_with_path(encoded: &str, dict: &Dictionary, path: EncodingPath) -> Option<Vec<u8>> {
179    match path {
180        EncodingPath::Scalar => decode_scalar(encoded, dict).ok(),
181        EncodingPath::Lut => decode_lut(encoded, dict),
182        EncodingPath::Specialized => decode_specialized(encoded, dict),
183    }
184}
185
186/// Pure scalar encoding (no SIMD).
187fn encode_scalar(data: &[u8], dict: &Dictionary) -> String {
188    match dict.mode() {
189        EncodingMode::Radix => radix::encode(data, dict),
190        EncodingMode::Chunked => encode_chunked_scalar(data, dict),
191        EncodingMode::ByteRange => byte_range::encode_byte_range(data, dict),
192    }
193}
194
195/// Pure scalar decoding (no SIMD).
196fn decode_scalar(encoded: &str, dict: &Dictionary) -> Result<Vec<u8>, crate::DecodeError> {
197    match dict.mode() {
198        EncodingMode::Radix => radix::decode(encoded, dict),
199        EncodingMode::Chunked => decode_chunked_scalar(encoded, dict),
200        EncodingMode::ByteRange => byte_range::decode_byte_range(encoded, dict),
201    }
202}
203
204/// Scalar chunked encoding (bypasses SIMD).
205fn encode_chunked_scalar(data: &[u8], dict: &Dictionary) -> String {
206    let base = dict.base();
207    let bits_per_char = (base as f64).log2() as usize;
208
209    if bits_per_char == 0 || base & (base - 1) != 0 {
210        // Non-power-of-2, fall back to radix
211        return radix::encode(data, dict);
212    }
213
214    let mut result = String::new();
215    let mut bit_buffer: u64 = 0;
216    let mut bits_in_buffer = 0;
217
218    for &byte in data {
219        bit_buffer = (bit_buffer << 8) | byte as u64;
220        bits_in_buffer += 8;
221
222        while bits_in_buffer >= bits_per_char {
223            bits_in_buffer -= bits_per_char;
224            let index = ((bit_buffer >> bits_in_buffer) & ((1 << bits_per_char) - 1)) as usize;
225            if let Some(ch) = dict.encode_digit(index) {
226                result.push(ch);
227            }
228        }
229    }
230
231    // Handle remaining bits
232    if bits_in_buffer > 0 {
233        let index = ((bit_buffer << (bits_per_char - bits_in_buffer)) & ((1 << bits_per_char) - 1))
234            as usize;
235        if let Some(ch) = dict.encode_digit(index) {
236            result.push(ch);
237        }
238    }
239
240    // Add padding if needed
241    if let Some(pad) = dict.padding() {
242        let output_block_size = match bits_per_char {
243            6 => 4, // base64
244            5 => 8, // base32
245            4 => 2, // base16
246            _ => 1,
247        };
248        while !result.len().is_multiple_of(output_block_size) {
249            result.push(pad);
250        }
251    }
252
253    result
254}
255
256/// Scalar chunked decoding (bypasses SIMD).
257fn decode_chunked_scalar(encoded: &str, dict: &Dictionary) -> Result<Vec<u8>, crate::DecodeError> {
258    let base = dict.base();
259    let bits_per_char = (base as f64).log2() as usize;
260
261    if bits_per_char == 0 || base & (base - 1) != 0 {
262        return radix::decode(encoded, dict);
263    }
264
265    // Strip padding
266    let padding = dict.padding();
267    let encoded = if let Some(pad) = padding {
268        encoded.trim_end_matches(pad)
269    } else {
270        encoded
271    };
272
273    let mut result = Vec::new();
274    let mut bit_buffer: u64 = 0;
275    let mut bits_in_buffer = 0;
276
277    for ch in encoded.chars() {
278        let value = dict.decode_char(ch).ok_or(DecodeError::InvalidCharacter {
279            char: ch,
280            position: 0,
281            input: String::new(),
282            valid_chars: String::new(),
283        })?;
284        bit_buffer = (bit_buffer << bits_per_char) | value as u64;
285        bits_in_buffer += bits_per_char;
286
287        while bits_in_buffer >= 8 {
288            bits_in_buffer -= 8;
289            result.push((bit_buffer >> bits_in_buffer) as u8);
290        }
291    }
292
293    Ok(result)
294}
295
296/// LUT-based SIMD encoding (uses runtime LUT construction, not hardcoded tables).
297#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
298fn encode_lut(data: &[u8], dict: &Dictionary) -> Option<String> {
299    let base = dict.base();
300
301    // Skip specialized paths - force LUT-based codecs only
302    // 1. Try GenericSimdCodec for sequential power-of-2 dictionaries
303    if let Some(codec) = simd::GenericSimdCodec::from_dictionary(dict) {
304        return codec.encode(data, dict);
305    }
306
307    // 2. Try GappedSequentialCodec for near-sequential dictionaries
308    if let Some(codec) = simd::GappedSequentialCodec::from_dictionary(dict) {
309        return codec.encode(data, dict);
310    }
311
312    // 3. Try SmallLutCodec for small arbitrary dictionaries (≤16 chars)
313    if base <= 16
314        && base.is_power_of_two()
315        && let Some(codec) = simd::SmallLutCodec::from_dictionary(dict)
316    {
317        return codec.encode(data, dict);
318    }
319
320    // 4. Try Base64LutCodec for larger arbitrary dictionaries (17-64 chars)
321    if (17..=64).contains(&base)
322        && base.is_power_of_two()
323        && let Some(codec) = simd::Base64LutCodec::from_dictionary(dict)
324    {
325        return codec.encode(data, dict);
326    }
327
328    None
329}
330
331#[cfg(not(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64"))))]
332fn encode_lut(_data: &[u8], _dict: &Dictionary) -> Option<String> {
333    None
334}
335
336/// LUT-based SIMD decoding (uses runtime LUT construction, not hardcoded tables).
337#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
338fn decode_lut(encoded: &str, dict: &Dictionary) -> Option<Vec<u8>> {
339    let base = dict.base();
340
341    // Skip specialized paths - force LUT-based codecs only
342    // 1. Try GenericSimdCodec for sequential power-of-2 dictionaries
343    if let Some(codec) = simd::GenericSimdCodec::from_dictionary(dict) {
344        return codec.decode(encoded, dict);
345    }
346
347    // 2. Try GappedSequentialCodec for near-sequential dictionaries
348    if let Some(codec) = simd::GappedSequentialCodec::from_dictionary(dict) {
349        return codec.decode(encoded, dict);
350    }
351
352    // 3. Try SmallLutCodec for small arbitrary dictionaries (≤16 chars)
353    if base <= 16
354        && base.is_power_of_two()
355        && let Some(codec) = simd::SmallLutCodec::from_dictionary(dict)
356    {
357        return codec.decode(encoded, dict);
358    }
359
360    // 4. Try Base64LutCodec for larger arbitrary dictionaries (17-64 chars)
361    if (17..=64).contains(&base)
362        && base.is_power_of_two()
363        && let Some(codec) = simd::Base64LutCodec::from_dictionary(dict)
364    {
365        return codec.decode(encoded, dict);
366    }
367
368    None
369}
370
371#[cfg(not(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64"))))]
372fn decode_lut(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
373    None
374}
375
376/// Specialized SIMD encoding (for known RFC dictionaries).
377#[cfg(all(feature = "simd", target_arch = "x86_64"))]
378fn encode_specialized(data: &[u8], dict: &Dictionary) -> Option<String> {
379    use crate::simd::{
380        encode_base16_simd, encode_base32_simd, encode_base64_simd, encode_base256_simd,
381    };
382
383    match dict.base() {
384        16 => encode_base16_simd(data, dict),
385        32 => encode_base32_simd(data, dict),
386        64 => encode_base64_simd(data, dict),
387        256 => encode_base256_simd(data, dict),
388        _ => None,
389    }
390}
391
392#[cfg(all(feature = "simd", not(target_arch = "x86_64")))]
393fn encode_specialized(_data: &[u8], _dict: &Dictionary) -> Option<String> {
394    // ARM doesn't have specialized paths yet (uses LUT)
395    None
396}
397
398#[cfg(not(feature = "simd"))]
399fn encode_specialized(_data: &[u8], _dict: &Dictionary) -> Option<String> {
400    None
401}
402
403/// Specialized SIMD decoding (for known RFC dictionaries).
404#[cfg(all(feature = "simd", target_arch = "x86_64"))]
405fn decode_specialized(encoded: &str, dict: &Dictionary) -> Option<Vec<u8>> {
406    use crate::simd::{
407        decode_base16_simd, decode_base32_simd, decode_base64_simd, decode_base256_simd,
408    };
409
410    match dict.base() {
411        16 => decode_base16_simd(encoded, dict),
412        32 => decode_base32_simd(encoded, dict),
413        64 => decode_base64_simd(encoded, dict),
414        256 => decode_base256_simd(encoded, dict),
415        _ => None,
416    }
417}
418
419#[cfg(all(feature = "simd", not(target_arch = "x86_64")))]
420fn decode_specialized(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
421    None
422}
423
424#[cfg(not(feature = "simd"))]
425fn decode_specialized(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
426    None
427}
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432    use crate::DictionaryRegistry;
433
434    fn get_test_dict(name: &str) -> Dictionary {
435        let config = DictionaryRegistry::load_default().unwrap();
436        let dict_config = config.get_dictionary(name).unwrap();
437        let chars: Vec<char> = dict_config.effective_chars().unwrap().chars().collect();
438        let padding = dict_config.padding.as_ref().and_then(|s| s.chars().next());
439        let mut builder = Dictionary::builder()
440            .chars(chars)
441            .mode(dict_config.effective_mode());
442        if let Some(p) = padding {
443            builder = builder.padding(p);
444        }
445        builder.build().unwrap()
446    }
447
448    #[test]
449    fn test_platform_detection() {
450        let info = PlatformInfo::detect();
451        assert!(!info.arch.is_empty());
452        println!("Platform: {}", info.display());
453    }
454
455    #[test]
456    fn test_path_detection_base64() {
457        let dict = get_test_dict("base64");
458        let paths = detect_available_paths(&dict);
459
460        assert!(paths.contains(&EncodingPath::Scalar));
461        #[cfg(feature = "simd")]
462        {
463            assert!(
464                paths.contains(&EncodingPath::Lut) || paths.contains(&EncodingPath::Specialized)
465            );
466        }
467    }
468
469    #[test]
470    fn test_scalar_round_trip() {
471        let dict = get_test_dict("base64");
472        let data = b"Hello, World!";
473
474        let encoded = encode_with_path(data, &dict, EncodingPath::Scalar).unwrap();
475        let decoded = decode_with_path(&encoded, &dict, EncodingPath::Scalar).unwrap();
476
477        assert_eq!(&decoded[..], &data[..]);
478    }
479
480    #[test]
481    fn test_paths_produce_same_output() {
482        let dict = get_test_dict("base64");
483        let data = b"The quick brown fox jumps over the lazy dog";
484        let paths = detect_available_paths(&dict);
485
486        let mut results: Vec<(EncodingPath, String)> = Vec::new();
487        for path in &paths {
488            if let Some(encoded) = encode_with_path(data, &dict, *path) {
489                results.push((*path, encoded));
490            }
491        }
492
493        // Compare Scalar with others, stripping padding for fair comparison
494        // (LUT codecs don't add padding, specialized RFC implementations do)
495        let scalar_result = results.iter().find(|(p, _)| *p == EncodingPath::Scalar);
496        if let Some((_, scalar_encoded)) = scalar_result {
497            let scalar_stripped = scalar_encoded.trim_end_matches('=');
498            for (path, encoded) in &results {
499                if *path != EncodingPath::Scalar {
500                    let stripped = encoded.trim_end_matches('=');
501                    assert_eq!(
502                        scalar_stripped, stripped,
503                        "{:?} output differs from Scalar (ignoring padding)",
504                        path
505                    );
506                }
507            }
508        }
509    }
510}