1use crate::EncodingMode;
22use crate::core::dictionary::Dictionary;
23use crate::encoders::algorithms::{DecodeError, byte_range, radix};
24
25#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
26use crate::simd;
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
30pub enum EncodingPath {
31 Scalar,
33 Lut,
35 Specialized,
37}
38
39impl std::fmt::Display for EncodingPath {
40 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 match self {
42 EncodingPath::Scalar => write!(f, "Scalar"),
43 EncodingPath::Lut => write!(f, "LUT"),
44 EncodingPath::Specialized => write!(f, "Specialized"),
45 }
46 }
47}
48
49#[derive(Debug, Clone)]
51pub struct PlatformInfo {
52 pub arch: &'static str,
53 pub simd_features: Vec<&'static str>,
54}
55
56impl PlatformInfo {
57 pub fn detect() -> Self {
59 let arch = std::env::consts::ARCH;
60 let mut simd_features = Vec::new();
61
62 #[cfg(target_arch = "x86_64")]
63 {
64 if is_x86_feature_detected!("avx512vbmi") {
65 simd_features.push("AVX-512 VBMI");
66 }
67 if is_x86_feature_detected!("avx2") {
68 simd_features.push("AVX2");
69 }
70 if is_x86_feature_detected!("ssse3") {
71 simd_features.push("SSSE3");
72 }
73 }
74
75 #[cfg(target_arch = "aarch64")]
76 {
77 simd_features.push("NEON");
79 }
80
81 PlatformInfo {
82 arch,
83 simd_features,
84 }
85 }
86
87 pub fn display(&self) -> String {
89 if self.simd_features.is_empty() {
90 self.arch.to_string()
91 } else {
92 format!("{} ({})", self.arch, self.simd_features.join(", "))
93 }
94 }
95}
96
97#[derive(Debug, Clone)]
99pub struct DictionaryBenchInfo {
100 pub name: String,
101 pub base: usize,
102 pub mode: EncodingMode,
103 pub available_paths: Vec<EncodingPath>,
104 pub supports_streaming: bool,
105}
106
107pub fn detect_available_paths(dict: &Dictionary) -> Vec<EncodingPath> {
109 let mut paths = vec![EncodingPath::Scalar]; #[cfg(feature = "simd")]
112 {
113 let base = dict.base();
114 let mode = dict.mode();
115
116 if base.is_power_of_two() && base <= 256 {
118 let all_ascii = (0..base).all(|i| {
120 dict.encode_digit(i)
121 .map(|c| (c as u32) < 128)
122 .unwrap_or(false)
123 });
124
125 if all_ascii && matches!(mode, EncodingMode::Chunked) {
126 paths.push(EncodingPath::Lut);
127 }
128 }
129
130 if is_specialized_available(dict) {
132 paths.push(EncodingPath::Specialized);
133 }
134 }
135
136 paths
137}
138
139#[cfg(feature = "simd")]
141fn is_specialized_available(dict: &Dictionary) -> bool {
142 use crate::simd::variants::{identify_base32_variant, identify_base64_variant};
143
144 let base = dict.base();
145
146 match base {
147 16 => {
148 let first_char = dict.encode_digit(10); matches!(first_char, Some('A') | Some('a'))
151 }
152 32 => identify_base32_variant(dict).is_some(),
153 64 => identify_base64_variant(dict).is_some(),
154 256 => matches!(dict.mode(), EncodingMode::Chunked | EncodingMode::ByteRange),
155 _ => false,
156 }
157}
158
159#[cfg(not(feature = "simd"))]
160fn is_specialized_available(_dict: &Dictionary) -> bool {
161 false
162}
163
164pub fn encode_with_path(data: &[u8], dict: &Dictionary, path: EncodingPath) -> Option<String> {
168 match path {
169 EncodingPath::Scalar => Some(encode_scalar(data, dict)),
170 EncodingPath::Lut => encode_lut(data, dict),
171 EncodingPath::Specialized => encode_specialized(data, dict),
172 }
173}
174
175pub fn decode_with_path(encoded: &str, dict: &Dictionary, path: EncodingPath) -> Option<Vec<u8>> {
179 match path {
180 EncodingPath::Scalar => decode_scalar(encoded, dict).ok(),
181 EncodingPath::Lut => decode_lut(encoded, dict),
182 EncodingPath::Specialized => decode_specialized(encoded, dict),
183 }
184}
185
186fn encode_scalar(data: &[u8], dict: &Dictionary) -> String {
188 match dict.mode() {
189 EncodingMode::Radix => radix::encode(data, dict),
190 EncodingMode::Chunked => encode_chunked_scalar(data, dict),
191 EncodingMode::ByteRange => byte_range::encode_byte_range(data, dict),
192 }
193}
194
195fn decode_scalar(encoded: &str, dict: &Dictionary) -> Result<Vec<u8>, crate::DecodeError> {
197 match dict.mode() {
198 EncodingMode::Radix => radix::decode(encoded, dict),
199 EncodingMode::Chunked => decode_chunked_scalar(encoded, dict),
200 EncodingMode::ByteRange => byte_range::decode_byte_range(encoded, dict),
201 }
202}
203
204fn encode_chunked_scalar(data: &[u8], dict: &Dictionary) -> String {
206 let base = dict.base();
207 let bits_per_char = (base as f64).log2() as usize;
208
209 if bits_per_char == 0 || base & (base - 1) != 0 {
210 return radix::encode(data, dict);
212 }
213
214 let mut result = String::new();
215 let mut bit_buffer: u64 = 0;
216 let mut bits_in_buffer = 0;
217
218 for &byte in data {
219 bit_buffer = (bit_buffer << 8) | byte as u64;
220 bits_in_buffer += 8;
221
222 while bits_in_buffer >= bits_per_char {
223 bits_in_buffer -= bits_per_char;
224 let index = ((bit_buffer >> bits_in_buffer) & ((1 << bits_per_char) - 1)) as usize;
225 if let Some(ch) = dict.encode_digit(index) {
226 result.push(ch);
227 }
228 }
229 }
230
231 if bits_in_buffer > 0 {
233 let index = ((bit_buffer << (bits_per_char - bits_in_buffer)) & ((1 << bits_per_char) - 1))
234 as usize;
235 if let Some(ch) = dict.encode_digit(index) {
236 result.push(ch);
237 }
238 }
239
240 if let Some(pad) = dict.padding() {
242 let output_block_size = match bits_per_char {
243 6 => 4, 5 => 8, 4 => 2, _ => 1,
247 };
248 while !result.len().is_multiple_of(output_block_size) {
249 result.push(pad);
250 }
251 }
252
253 result
254}
255
256fn decode_chunked_scalar(encoded: &str, dict: &Dictionary) -> Result<Vec<u8>, crate::DecodeError> {
258 let base = dict.base();
259 let bits_per_char = (base as f64).log2() as usize;
260
261 if bits_per_char == 0 || base & (base - 1) != 0 {
262 return radix::decode(encoded, dict);
263 }
264
265 let padding = dict.padding();
267 let encoded = if let Some(pad) = padding {
268 encoded.trim_end_matches(pad)
269 } else {
270 encoded
271 };
272
273 let mut result = Vec::new();
274 let mut bit_buffer: u64 = 0;
275 let mut bits_in_buffer = 0;
276
277 for ch in encoded.chars() {
278 let value = dict.decode_char(ch).ok_or(DecodeError::InvalidCharacter {
279 char: ch,
280 position: 0,
281 input: String::new(),
282 valid_chars: String::new(),
283 })?;
284 bit_buffer = (bit_buffer << bits_per_char) | value as u64;
285 bits_in_buffer += bits_per_char;
286
287 while bits_in_buffer >= 8 {
288 bits_in_buffer -= 8;
289 result.push((bit_buffer >> bits_in_buffer) as u8);
290 }
291 }
292
293 Ok(result)
294}
295
296#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
298fn encode_lut(data: &[u8], dict: &Dictionary) -> Option<String> {
299 let base = dict.base();
300
301 if let Some(codec) = simd::GenericSimdCodec::from_dictionary(dict) {
304 return codec.encode(data, dict);
305 }
306
307 if let Some(codec) = simd::GappedSequentialCodec::from_dictionary(dict) {
309 return codec.encode(data, dict);
310 }
311
312 if base <= 16
314 && base.is_power_of_two()
315 && let Some(codec) = simd::SmallLutCodec::from_dictionary(dict)
316 {
317 return codec.encode(data, dict);
318 }
319
320 if (17..=64).contains(&base)
322 && base.is_power_of_two()
323 && let Some(codec) = simd::Base64LutCodec::from_dictionary(dict)
324 {
325 return codec.encode(data, dict);
326 }
327
328 None
329}
330
331#[cfg(not(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64"))))]
332fn encode_lut(_data: &[u8], _dict: &Dictionary) -> Option<String> {
333 None
334}
335
336#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
338fn decode_lut(encoded: &str, dict: &Dictionary) -> Option<Vec<u8>> {
339 let base = dict.base();
340
341 if let Some(codec) = simd::GenericSimdCodec::from_dictionary(dict) {
344 return codec.decode(encoded, dict);
345 }
346
347 if let Some(codec) = simd::GappedSequentialCodec::from_dictionary(dict) {
349 return codec.decode(encoded, dict);
350 }
351
352 if base <= 16
354 && base.is_power_of_two()
355 && let Some(codec) = simd::SmallLutCodec::from_dictionary(dict)
356 {
357 return codec.decode(encoded, dict);
358 }
359
360 if (17..=64).contains(&base)
362 && base.is_power_of_two()
363 && let Some(codec) = simd::Base64LutCodec::from_dictionary(dict)
364 {
365 return codec.decode(encoded, dict);
366 }
367
368 None
369}
370
371#[cfg(not(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64"))))]
372fn decode_lut(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
373 None
374}
375
376#[cfg(all(feature = "simd", target_arch = "x86_64"))]
378fn encode_specialized(data: &[u8], dict: &Dictionary) -> Option<String> {
379 use crate::simd::{
380 encode_base16_simd, encode_base32_simd, encode_base64_simd, encode_base256_simd,
381 };
382
383 match dict.base() {
384 16 => encode_base16_simd(data, dict),
385 32 => encode_base32_simd(data, dict),
386 64 => encode_base64_simd(data, dict),
387 256 => encode_base256_simd(data, dict),
388 _ => None,
389 }
390}
391
392#[cfg(all(feature = "simd", not(target_arch = "x86_64")))]
393fn encode_specialized(_data: &[u8], _dict: &Dictionary) -> Option<String> {
394 None
396}
397
398#[cfg(not(feature = "simd"))]
399fn encode_specialized(_data: &[u8], _dict: &Dictionary) -> Option<String> {
400 None
401}
402
403#[cfg(all(feature = "simd", target_arch = "x86_64"))]
405fn decode_specialized(encoded: &str, dict: &Dictionary) -> Option<Vec<u8>> {
406 use crate::simd::{
407 decode_base16_simd, decode_base32_simd, decode_base64_simd, decode_base256_simd,
408 };
409
410 match dict.base() {
411 16 => decode_base16_simd(encoded, dict),
412 32 => decode_base32_simd(encoded, dict),
413 64 => decode_base64_simd(encoded, dict),
414 256 => decode_base256_simd(encoded, dict),
415 _ => None,
416 }
417}
418
419#[cfg(all(feature = "simd", not(target_arch = "x86_64")))]
420fn decode_specialized(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
421 None
422}
423
424#[cfg(not(feature = "simd"))]
425fn decode_specialized(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
426 None
427}
428
429#[cfg(test)]
430mod tests {
431 use super::*;
432 use crate::DictionaryRegistry;
433
434 fn get_test_dict(name: &str) -> Dictionary {
435 let config = DictionaryRegistry::load_default().unwrap();
436 let dict_config = config.get_dictionary(name).unwrap();
437 let chars: Vec<char> = dict_config.effective_chars().unwrap().chars().collect();
438 let padding = dict_config.padding.as_ref().and_then(|s| s.chars().next());
439 let mut builder = Dictionary::builder()
440 .chars(chars)
441 .mode(dict_config.effective_mode());
442 if let Some(p) = padding {
443 builder = builder.padding(p);
444 }
445 builder.build().unwrap()
446 }
447
448 #[test]
449 fn test_platform_detection() {
450 let info = PlatformInfo::detect();
451 assert!(!info.arch.is_empty());
452 println!("Platform: {}", info.display());
453 }
454
455 #[test]
456 fn test_path_detection_base64() {
457 let dict = get_test_dict("base64");
458 let paths = detect_available_paths(&dict);
459
460 assert!(paths.contains(&EncodingPath::Scalar));
461 #[cfg(feature = "simd")]
462 {
463 assert!(
464 paths.contains(&EncodingPath::Lut) || paths.contains(&EncodingPath::Specialized)
465 );
466 }
467 }
468
469 #[test]
470 fn test_scalar_round_trip() {
471 let dict = get_test_dict("base64");
472 let data = b"Hello, World!";
473
474 let encoded = encode_with_path(data, &dict, EncodingPath::Scalar).unwrap();
475 let decoded = decode_with_path(&encoded, &dict, EncodingPath::Scalar).unwrap();
476
477 assert_eq!(&decoded[..], &data[..]);
478 }
479
480 #[test]
481 fn test_paths_produce_same_output() {
482 let dict = get_test_dict("base64");
483 let data = b"The quick brown fox jumps over the lazy dog";
484 let paths = detect_available_paths(&dict);
485
486 let mut results: Vec<(EncodingPath, String)> = Vec::new();
487 for path in &paths {
488 if let Some(encoded) = encode_with_path(data, &dict, *path) {
489 results.push((*path, encoded));
490 }
491 }
492
493 let scalar_result = results.iter().find(|(p, _)| *p == EncodingPath::Scalar);
496 if let Some((_, scalar_encoded)) = scalar_result {
497 let scalar_stripped = scalar_encoded.trim_end_matches('=');
498 for (path, encoded) in &results {
499 if *path != EncodingPath::Scalar {
500 let stripped = encoded.trim_end_matches('=');
501 assert_eq!(
502 scalar_stripped, stripped,
503 "{:?} output differs from Scalar (ignoring padding)",
504 path
505 );
506 }
507 }
508 }
509 }
510}