Skip to main content

base_d/
bench.rs

1//! Benchmarking utilities for comparing encoding paths.
2//!
3//! This module exposes internal encoding paths for performance comparison:
4//! - Scalar: Pure Rust, no SIMD
5//! - LUT: SIMD with runtime lookup tables
6//! - Specialized: Hardcoded SIMD for known dictionaries
7//!
8//! # Example
9//!
10//! ```ignore
11//! use base_d::bench::{EncodingPath, encode_with_path, detect_available_paths};
12//!
13//! let dict = get_dictionary("base64");
14//! let paths = detect_available_paths(&dict);
15//!
16//! for path in paths {
17//!     let result = encode_with_path(data, &dict, path);
18//! }
19//! ```
20
21use crate::EncodingMode;
22use crate::core::dictionary::Dictionary;
23use crate::encoders::algorithms::{DecodeError, byte_range, radix};
24
25#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
26use crate::simd;
27
28/// Available encoding paths for benchmarking.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
30pub enum EncodingPath {
31    /// Pure scalar implementation (no SIMD)
32    Scalar,
33    /// SIMD with runtime LUT construction
34    Lut,
35    /// Hardcoded SIMD for known RFC dictionaries
36    Specialized,
37}
38
39impl std::fmt::Display for EncodingPath {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        match self {
42            EncodingPath::Scalar => write!(f, "Scalar"),
43            EncodingPath::Lut => write!(f, "LUT"),
44            EncodingPath::Specialized => write!(f, "Specialized"),
45        }
46    }
47}
48
49/// Platform capabilities for SIMD.
50#[derive(Debug, Clone)]
51pub struct PlatformInfo {
52    pub arch: &'static str,
53    pub simd_features: Vec<&'static str>,
54}
55
56impl PlatformInfo {
57    /// Detect current platform capabilities.
58    pub fn detect() -> Self {
59        let arch = std::env::consts::ARCH;
60        let mut simd_features = Vec::new();
61
62        #[cfg(target_arch = "x86_64")]
63        {
64            if is_x86_feature_detected!("avx512vbmi") {
65                simd_features.push("AVX-512 VBMI");
66            }
67            if is_x86_feature_detected!("avx2") {
68                simd_features.push("AVX2");
69            }
70            if is_x86_feature_detected!("ssse3") {
71                simd_features.push("SSSE3");
72            }
73        }
74
75        #[cfg(target_arch = "aarch64")]
76        {
77            // NEON is always available on aarch64
78            simd_features.push("NEON");
79        }
80
81        PlatformInfo {
82            arch,
83            simd_features,
84        }
85    }
86
87    /// Format as display string.
88    pub fn display(&self) -> String {
89        if self.simd_features.is_empty() {
90            self.arch.to_string()
91        } else {
92            format!("{} ({})", self.arch, self.simd_features.join(", "))
93        }
94    }
95}
96
97/// Information about a dictionary's benchmark capabilities.
98#[derive(Debug, Clone)]
99pub struct DictionaryBenchInfo {
100    pub name: String,
101    pub base: usize,
102    pub mode: EncodingMode,
103    pub available_paths: Vec<EncodingPath>,
104    pub supports_streaming: bool,
105}
106
107/// Detect which encoding paths are available for a dictionary.
108pub fn detect_available_paths(dict: &Dictionary) -> Vec<EncodingPath> {
109    let mut paths = vec![EncodingPath::Scalar]; // Scalar always available
110
111    #[cfg(feature = "simd")]
112    {
113        let base = dict.base();
114        let mode = dict.mode();
115
116        // Check if LUT path is available (power-of-2 base, ASCII chars)
117        if base.is_power_of_two() && base <= 256 {
118            // Check if all chars are ASCII
119            let all_ascii = (0..base).all(|i| {
120                dict.encode_digit(i)
121                    .map(|c| (c as u32) < 128)
122                    .unwrap_or(false)
123            });
124
125            if all_ascii && matches!(mode, EncodingMode::Chunked) {
126                paths.push(EncodingPath::Lut);
127            }
128        }
129
130        // Check if specialized path is available
131        if is_specialized_available(dict) {
132            paths.push(EncodingPath::Specialized);
133        }
134    }
135
136    paths
137}
138
139/// Check if a specialized SIMD path exists for this dictionary.
140#[cfg(feature = "simd")]
141fn is_specialized_available(dict: &Dictionary) -> bool {
142    use crate::simd::variants::{identify_base32_variant, identify_base64_variant};
143
144    let base = dict.base();
145
146    match base {
147        16 => {
148            // Check if it's standard hex (uppercase or lowercase)
149            let first_char = dict.encode_digit(10); // 'A' or 'a' position
150            matches!(first_char, Some('A') | Some('a'))
151        }
152        32 => identify_base32_variant(dict).is_some(),
153        64 => identify_base64_variant(dict).is_some(),
154        256 => matches!(dict.mode(), EncodingMode::Chunked | EncodingMode::ByteRange),
155        _ => false,
156    }
157}
158
159#[cfg(not(feature = "simd"))]
160fn is_specialized_available(_dict: &Dictionary) -> bool {
161    false
162}
163
164/// Encode using a specific path (for benchmarking).
165///
166/// Returns `None` if the path is not available for this dictionary.
167pub fn encode_with_path(data: &[u8], dict: &Dictionary, path: EncodingPath) -> Option<String> {
168    match path {
169        EncodingPath::Scalar => Some(encode_scalar(data, dict)),
170        EncodingPath::Lut => encode_lut(data, dict),
171        EncodingPath::Specialized => encode_specialized(data, dict),
172    }
173}
174
175/// Decode using a specific path (for benchmarking).
176///
177/// Returns `None` if the path is not available for this dictionary.
178pub fn decode_with_path(encoded: &str, dict: &Dictionary, path: EncodingPath) -> Option<Vec<u8>> {
179    match path {
180        EncodingPath::Scalar => decode_scalar(encoded, dict).ok(),
181        EncodingPath::Lut => decode_lut(encoded, dict),
182        EncodingPath::Specialized => decode_specialized(encoded, dict),
183    }
184}
185
186/// Pure scalar encoding (no SIMD).
187fn encode_scalar(data: &[u8], dict: &Dictionary) -> String {
188    match dict.mode() {
189        EncodingMode::Radix => radix::encode(data, dict),
190        EncodingMode::Chunked => encode_chunked_scalar(data, dict),
191        EncodingMode::ByteRange => byte_range::encode_byte_range(data, dict).expect(
192            "ByteRange encode failed: dictionary should have been validated at construction time",
193        ),
194    }
195}
196
197/// Pure scalar decoding (no SIMD).
198fn decode_scalar(encoded: &str, dict: &Dictionary) -> Result<Vec<u8>, crate::DecodeError> {
199    match dict.mode() {
200        EncodingMode::Radix => radix::decode(encoded, dict),
201        EncodingMode::Chunked => decode_chunked_scalar(encoded, dict),
202        EncodingMode::ByteRange => byte_range::decode_byte_range(encoded, dict),
203    }
204}
205
206/// Scalar chunked encoding (bypasses SIMD).
207fn encode_chunked_scalar(data: &[u8], dict: &Dictionary) -> String {
208    let base = dict.base();
209    let bits_per_char = (base as f64).log2() as usize;
210
211    if bits_per_char == 0 || base & (base - 1) != 0 {
212        // Non-power-of-2, fall back to radix
213        return radix::encode(data, dict);
214    }
215
216    let mut result = String::new();
217    let mut bit_buffer: u64 = 0;
218    let mut bits_in_buffer = 0;
219
220    for &byte in data {
221        bit_buffer = (bit_buffer << 8) | byte as u64;
222        bits_in_buffer += 8;
223
224        while bits_in_buffer >= bits_per_char {
225            bits_in_buffer -= bits_per_char;
226            let index = ((bit_buffer >> bits_in_buffer) & ((1 << bits_per_char) - 1)) as usize;
227            if let Some(ch) = dict.encode_digit(index) {
228                result.push(ch);
229            }
230        }
231    }
232
233    // Handle remaining bits
234    if bits_in_buffer > 0 {
235        let index = ((bit_buffer << (bits_per_char - bits_in_buffer)) & ((1 << bits_per_char) - 1))
236            as usize;
237        if let Some(ch) = dict.encode_digit(index) {
238            result.push(ch);
239        }
240    }
241
242    // Add padding if needed
243    if let Some(pad) = dict.padding() {
244        let output_block_size = match bits_per_char {
245            6 => 4, // base64
246            5 => 8, // base32
247            4 => 2, // base16
248            _ => 1,
249        };
250        while !result.len().is_multiple_of(output_block_size) {
251            result.push(pad);
252        }
253    }
254
255    result
256}
257
258/// Scalar chunked decoding (bypasses SIMD).
259fn decode_chunked_scalar(encoded: &str, dict: &Dictionary) -> Result<Vec<u8>, crate::DecodeError> {
260    let base = dict.base();
261    let bits_per_char = (base as f64).log2() as usize;
262
263    if bits_per_char == 0 || base & (base - 1) != 0 {
264        return radix::decode(encoded, dict);
265    }
266
267    // Strip padding
268    let padding = dict.padding();
269    let encoded = if let Some(pad) = padding {
270        encoded.trim_end_matches(pad)
271    } else {
272        encoded
273    };
274
275    let mut result = Vec::new();
276    let mut bit_buffer: u64 = 0;
277    let mut bits_in_buffer = 0;
278
279    for ch in encoded.chars() {
280        let value = dict.decode_char(ch).ok_or(DecodeError::InvalidCharacter {
281            char: ch,
282            position: 0,
283            input: String::new(),
284            valid_chars: String::new(),
285        })?;
286        bit_buffer = (bit_buffer << bits_per_char) | value as u64;
287        bits_in_buffer += bits_per_char;
288
289        while bits_in_buffer >= 8 {
290            bits_in_buffer -= 8;
291            result.push((bit_buffer >> bits_in_buffer) as u8);
292        }
293    }
294
295    Ok(result)
296}
297
298/// LUT-based SIMD encoding (uses runtime LUT construction, not hardcoded tables).
299#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
300fn encode_lut(data: &[u8], dict: &Dictionary) -> Option<String> {
301    let base = dict.base();
302
303    // Skip specialized paths - force LUT-based codecs only
304    // 1. Try GenericSimdCodec for sequential power-of-2 dictionaries
305    if let Some(codec) = simd::GenericSimdCodec::from_dictionary(dict) {
306        return codec.encode(data, dict);
307    }
308
309    // 2. Try GappedSequentialCodec for near-sequential dictionaries
310    if let Some(codec) = simd::GappedSequentialCodec::from_dictionary(dict) {
311        return codec.encode(data, dict);
312    }
313
314    // 3. Try SmallLutCodec for small arbitrary dictionaries (≤16 chars)
315    if base <= 16
316        && base.is_power_of_two()
317        && let Some(codec) = simd::SmallLutCodec::from_dictionary(dict)
318    {
319        return codec.encode(data, dict);
320    }
321
322    // 4. Try Base64LutCodec for larger arbitrary dictionaries (17-64 chars)
323    if (17..=64).contains(&base)
324        && base.is_power_of_two()
325        && let Some(codec) = simd::Base64LutCodec::from_dictionary(dict)
326    {
327        return codec.encode(data, dict);
328    }
329
330    None
331}
332
333#[cfg(not(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64"))))]
334fn encode_lut(_data: &[u8], _dict: &Dictionary) -> Option<String> {
335    None
336}
337
338/// LUT-based SIMD decoding (uses runtime LUT construction, not hardcoded tables).
339#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
340fn decode_lut(encoded: &str, dict: &Dictionary) -> Option<Vec<u8>> {
341    let base = dict.base();
342
343    // Skip specialized paths - force LUT-based codecs only
344    // 1. Try GenericSimdCodec for sequential power-of-2 dictionaries
345    if let Some(codec) = simd::GenericSimdCodec::from_dictionary(dict) {
346        return codec.decode(encoded, dict);
347    }
348
349    // 2. Try GappedSequentialCodec for near-sequential dictionaries
350    if let Some(codec) = simd::GappedSequentialCodec::from_dictionary(dict) {
351        return codec.decode(encoded, dict);
352    }
353
354    // 3. Try SmallLutCodec for small arbitrary dictionaries (≤16 chars)
355    if base <= 16
356        && base.is_power_of_two()
357        && let Some(codec) = simd::SmallLutCodec::from_dictionary(dict)
358    {
359        return codec.decode(encoded, dict);
360    }
361
362    // 4. Try Base64LutCodec for larger arbitrary dictionaries (17-64 chars)
363    if (17..=64).contains(&base)
364        && base.is_power_of_two()
365        && let Some(codec) = simd::Base64LutCodec::from_dictionary(dict)
366    {
367        return codec.decode(encoded, dict);
368    }
369
370    None
371}
372
373#[cfg(not(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64"))))]
374fn decode_lut(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
375    None
376}
377
378/// Specialized SIMD encoding (for known RFC dictionaries).
379#[cfg(all(feature = "simd", target_arch = "x86_64"))]
380fn encode_specialized(data: &[u8], dict: &Dictionary) -> Option<String> {
381    use crate::simd::{
382        encode_base16_simd, encode_base32_simd, encode_base64_simd, encode_base256_simd,
383    };
384
385    match dict.base() {
386        16 => encode_base16_simd(data, dict),
387        32 => encode_base32_simd(data, dict),
388        64 => encode_base64_simd(data, dict),
389        256 => encode_base256_simd(data, dict),
390        _ => None,
391    }
392}
393
394#[cfg(all(feature = "simd", not(target_arch = "x86_64")))]
395fn encode_specialized(_data: &[u8], _dict: &Dictionary) -> Option<String> {
396    // ARM doesn't have specialized paths yet (uses LUT)
397    None
398}
399
400#[cfg(not(feature = "simd"))]
401fn encode_specialized(_data: &[u8], _dict: &Dictionary) -> Option<String> {
402    None
403}
404
405/// Specialized SIMD decoding (for known RFC dictionaries).
406#[cfg(all(feature = "simd", target_arch = "x86_64"))]
407fn decode_specialized(encoded: &str, dict: &Dictionary) -> Option<Vec<u8>> {
408    use crate::simd::{
409        decode_base16_simd, decode_base32_simd, decode_base64_simd, decode_base256_simd,
410    };
411
412    match dict.base() {
413        16 => decode_base16_simd(encoded, dict),
414        32 => decode_base32_simd(encoded, dict),
415        64 => decode_base64_simd(encoded, dict),
416        256 => decode_base256_simd(encoded, dict),
417        _ => None,
418    }
419}
420
421#[cfg(all(feature = "simd", not(target_arch = "x86_64")))]
422fn decode_specialized(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
423    None
424}
425
426#[cfg(not(feature = "simd"))]
427fn decode_specialized(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
428    None
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434    use crate::DictionaryRegistry;
435
436    fn get_test_dict(name: &str) -> Dictionary {
437        let config = DictionaryRegistry::load_default().unwrap();
438        let dict_config = config.get_dictionary(name).unwrap();
439        let chars: Vec<char> = dict_config.effective_chars().unwrap().chars().collect();
440        let padding = dict_config.padding.as_ref().and_then(|s| s.chars().next());
441        let mut builder = Dictionary::builder()
442            .chars(chars)
443            .mode(dict_config.effective_mode());
444        if let Some(p) = padding {
445            builder = builder.padding(p);
446        }
447        builder.build().unwrap()
448    }
449
450    #[test]
451    fn test_platform_detection() {
452        let info = PlatformInfo::detect();
453        assert!(!info.arch.is_empty());
454        println!("Platform: {}", info.display());
455    }
456
457    #[test]
458    fn test_path_detection_base64() {
459        let dict = get_test_dict("base64");
460        let paths = detect_available_paths(&dict);
461
462        assert!(paths.contains(&EncodingPath::Scalar));
463        #[cfg(feature = "simd")]
464        {
465            assert!(
466                paths.contains(&EncodingPath::Lut) || paths.contains(&EncodingPath::Specialized)
467            );
468        }
469    }
470
471    #[test]
472    fn test_scalar_round_trip() {
473        let dict = get_test_dict("base64");
474        let data = b"Hello, World!";
475
476        let encoded = encode_with_path(data, &dict, EncodingPath::Scalar).unwrap();
477        let decoded = decode_with_path(&encoded, &dict, EncodingPath::Scalar).unwrap();
478
479        assert_eq!(&decoded[..], &data[..]);
480    }
481
482    #[test]
483    fn test_paths_produce_same_output() {
484        let dict = get_test_dict("base64");
485        let data = b"The quick brown fox jumps over the lazy dog";
486        let paths = detect_available_paths(&dict);
487
488        let mut results: Vec<(EncodingPath, String)> = Vec::new();
489        for path in &paths {
490            if let Some(encoded) = encode_with_path(data, &dict, *path) {
491                results.push((*path, encoded));
492            }
493        }
494
495        // Compare Scalar with others, stripping padding for fair comparison
496        // (LUT codecs don't add padding, specialized RFC implementations do)
497        let scalar_result = results.iter().find(|(p, _)| *p == EncodingPath::Scalar);
498        if let Some((_, scalar_encoded)) = scalar_result {
499            let scalar_stripped = scalar_encoded.trim_end_matches('=');
500            for (path, encoded) in &results {
501                if *path != EncodingPath::Scalar {
502                    let stripped = encoded.trim_end_matches('=');
503                    assert_eq!(
504                        scalar_stripped, stripped,
505                        "{:?} output differs from Scalar (ignoring padding)",
506                        path
507                    );
508                }
509            }
510        }
511    }
512}