1use crate::EncodingMode;
22use crate::core::dictionary::Dictionary;
23use crate::encoders::algorithms::{DecodeError, byte_range, radix};
24
25#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
26use crate::simd;
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
30pub enum EncodingPath {
31 Scalar,
33 Lut,
35 Specialized,
37}
38
39impl std::fmt::Display for EncodingPath {
40 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 match self {
42 EncodingPath::Scalar => write!(f, "Scalar"),
43 EncodingPath::Lut => write!(f, "LUT"),
44 EncodingPath::Specialized => write!(f, "Specialized"),
45 }
46 }
47}
48
49#[derive(Debug, Clone)]
51pub struct PlatformInfo {
52 pub arch: &'static str,
53 pub simd_features: Vec<&'static str>,
54}
55
56impl PlatformInfo {
57 pub fn detect() -> Self {
59 let arch = std::env::consts::ARCH;
60 let mut simd_features = Vec::new();
61
62 #[cfg(target_arch = "x86_64")]
63 {
64 if is_x86_feature_detected!("avx512vbmi") {
65 simd_features.push("AVX-512 VBMI");
66 }
67 if is_x86_feature_detected!("avx2") {
68 simd_features.push("AVX2");
69 }
70 if is_x86_feature_detected!("ssse3") {
71 simd_features.push("SSSE3");
72 }
73 }
74
75 #[cfg(target_arch = "aarch64")]
76 {
77 simd_features.push("NEON");
79 }
80
81 PlatformInfo {
82 arch,
83 simd_features,
84 }
85 }
86
87 pub fn display(&self) -> String {
89 if self.simd_features.is_empty() {
90 self.arch.to_string()
91 } else {
92 format!("{} ({})", self.arch, self.simd_features.join(", "))
93 }
94 }
95}
96
97#[derive(Debug, Clone)]
99pub struct DictionaryBenchInfo {
100 pub name: String,
101 pub base: usize,
102 pub mode: EncodingMode,
103 pub available_paths: Vec<EncodingPath>,
104 pub supports_streaming: bool,
105}
106
107pub fn detect_available_paths(dict: &Dictionary) -> Vec<EncodingPath> {
109 let mut paths = vec![EncodingPath::Scalar]; #[cfg(feature = "simd")]
112 {
113 let base = dict.base();
114 let mode = dict.mode();
115
116 if base.is_power_of_two() && base <= 256 {
118 let all_ascii = (0..base).all(|i| {
120 dict.encode_digit(i)
121 .map(|c| (c as u32) < 128)
122 .unwrap_or(false)
123 });
124
125 if all_ascii && matches!(mode, EncodingMode::Chunked) {
126 paths.push(EncodingPath::Lut);
127 }
128 }
129
130 if is_specialized_available(dict) {
132 paths.push(EncodingPath::Specialized);
133 }
134 }
135
136 paths
137}
138
139#[cfg(feature = "simd")]
141fn is_specialized_available(dict: &Dictionary) -> bool {
142 use crate::simd::variants::{identify_base32_variant, identify_base64_variant};
143
144 let base = dict.base();
145
146 match base {
147 16 => {
148 let first_char = dict.encode_digit(10); matches!(first_char, Some('A') | Some('a'))
151 }
152 32 => identify_base32_variant(dict).is_some(),
153 64 => identify_base64_variant(dict).is_some(),
154 256 => matches!(dict.mode(), EncodingMode::Chunked | EncodingMode::ByteRange),
155 _ => false,
156 }
157}
158
159#[cfg(not(feature = "simd"))]
160fn is_specialized_available(_dict: &Dictionary) -> bool {
161 false
162}
163
164pub fn encode_with_path(data: &[u8], dict: &Dictionary, path: EncodingPath) -> Option<String> {
168 match path {
169 EncodingPath::Scalar => Some(encode_scalar(data, dict)),
170 EncodingPath::Lut => encode_lut(data, dict),
171 EncodingPath::Specialized => encode_specialized(data, dict),
172 }
173}
174
175pub fn decode_with_path(encoded: &str, dict: &Dictionary, path: EncodingPath) -> Option<Vec<u8>> {
179 match path {
180 EncodingPath::Scalar => decode_scalar(encoded, dict).ok(),
181 EncodingPath::Lut => decode_lut(encoded, dict),
182 EncodingPath::Specialized => decode_specialized(encoded, dict),
183 }
184}
185
186fn encode_scalar(data: &[u8], dict: &Dictionary) -> String {
188 match dict.mode() {
189 EncodingMode::Radix => radix::encode(data, dict),
190 EncodingMode::Chunked => encode_chunked_scalar(data, dict),
191 EncodingMode::ByteRange => byte_range::encode_byte_range(data, dict).expect(
192 "ByteRange encode failed: dictionary should have been validated at construction time",
193 ),
194 }
195}
196
197fn decode_scalar(encoded: &str, dict: &Dictionary) -> Result<Vec<u8>, crate::DecodeError> {
199 match dict.mode() {
200 EncodingMode::Radix => radix::decode(encoded, dict),
201 EncodingMode::Chunked => decode_chunked_scalar(encoded, dict),
202 EncodingMode::ByteRange => byte_range::decode_byte_range(encoded, dict),
203 }
204}
205
206fn encode_chunked_scalar(data: &[u8], dict: &Dictionary) -> String {
208 let base = dict.base();
209 let bits_per_char = (base as f64).log2() as usize;
210
211 if bits_per_char == 0 || base & (base - 1) != 0 {
212 return radix::encode(data, dict);
214 }
215
216 let mut result = String::new();
217 let mut bit_buffer: u64 = 0;
218 let mut bits_in_buffer = 0;
219
220 for &byte in data {
221 bit_buffer = (bit_buffer << 8) | byte as u64;
222 bits_in_buffer += 8;
223
224 while bits_in_buffer >= bits_per_char {
225 bits_in_buffer -= bits_per_char;
226 let index = ((bit_buffer >> bits_in_buffer) & ((1 << bits_per_char) - 1)) as usize;
227 if let Some(ch) = dict.encode_digit(index) {
228 result.push(ch);
229 }
230 }
231 }
232
233 if bits_in_buffer > 0 {
235 let index = ((bit_buffer << (bits_per_char - bits_in_buffer)) & ((1 << bits_per_char) - 1))
236 as usize;
237 if let Some(ch) = dict.encode_digit(index) {
238 result.push(ch);
239 }
240 }
241
242 if let Some(pad) = dict.padding() {
244 let output_block_size = match bits_per_char {
245 6 => 4, 5 => 8, 4 => 2, _ => 1,
249 };
250 while !result.len().is_multiple_of(output_block_size) {
251 result.push(pad);
252 }
253 }
254
255 result
256}
257
258fn decode_chunked_scalar(encoded: &str, dict: &Dictionary) -> Result<Vec<u8>, crate::DecodeError> {
260 let base = dict.base();
261 let bits_per_char = (base as f64).log2() as usize;
262
263 if bits_per_char == 0 || base & (base - 1) != 0 {
264 return radix::decode(encoded, dict);
265 }
266
267 let padding = dict.padding();
269 let encoded = if let Some(pad) = padding {
270 encoded.trim_end_matches(pad)
271 } else {
272 encoded
273 };
274
275 let mut result = Vec::new();
276 let mut bit_buffer: u64 = 0;
277 let mut bits_in_buffer = 0;
278
279 for ch in encoded.chars() {
280 let value = dict.decode_char(ch).ok_or(DecodeError::InvalidCharacter {
281 char: ch,
282 position: 0,
283 input: String::new(),
284 valid_chars: String::new(),
285 })?;
286 bit_buffer = (bit_buffer << bits_per_char) | value as u64;
287 bits_in_buffer += bits_per_char;
288
289 while bits_in_buffer >= 8 {
290 bits_in_buffer -= 8;
291 result.push((bit_buffer >> bits_in_buffer) as u8);
292 }
293 }
294
295 Ok(result)
296}
297
298#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
300fn encode_lut(data: &[u8], dict: &Dictionary) -> Option<String> {
301 let base = dict.base();
302
303 if let Some(codec) = simd::GenericSimdCodec::from_dictionary(dict) {
306 return codec.encode(data, dict);
307 }
308
309 if let Some(codec) = simd::GappedSequentialCodec::from_dictionary(dict) {
311 return codec.encode(data, dict);
312 }
313
314 if base <= 16
316 && base.is_power_of_two()
317 && let Some(codec) = simd::SmallLutCodec::from_dictionary(dict)
318 {
319 return codec.encode(data, dict);
320 }
321
322 if (17..=64).contains(&base)
324 && base.is_power_of_two()
325 && let Some(codec) = simd::Base64LutCodec::from_dictionary(dict)
326 {
327 return codec.encode(data, dict);
328 }
329
330 None
331}
332
333#[cfg(not(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64"))))]
334fn encode_lut(_data: &[u8], _dict: &Dictionary) -> Option<String> {
335 None
336}
337
338#[cfg(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64")))]
340fn decode_lut(encoded: &str, dict: &Dictionary) -> Option<Vec<u8>> {
341 let base = dict.base();
342
343 if let Some(codec) = simd::GenericSimdCodec::from_dictionary(dict) {
346 return codec.decode(encoded, dict);
347 }
348
349 if let Some(codec) = simd::GappedSequentialCodec::from_dictionary(dict) {
351 return codec.decode(encoded, dict);
352 }
353
354 if base <= 16
356 && base.is_power_of_two()
357 && let Some(codec) = simd::SmallLutCodec::from_dictionary(dict)
358 {
359 return codec.decode(encoded, dict);
360 }
361
362 if (17..=64).contains(&base)
364 && base.is_power_of_two()
365 && let Some(codec) = simd::Base64LutCodec::from_dictionary(dict)
366 {
367 return codec.decode(encoded, dict);
368 }
369
370 None
371}
372
373#[cfg(not(all(feature = "simd", any(target_arch = "x86_64", target_arch = "aarch64"))))]
374fn decode_lut(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
375 None
376}
377
378#[cfg(all(feature = "simd", target_arch = "x86_64"))]
380fn encode_specialized(data: &[u8], dict: &Dictionary) -> Option<String> {
381 use crate::simd::{
382 encode_base16_simd, encode_base32_simd, encode_base64_simd, encode_base256_simd,
383 };
384
385 match dict.base() {
386 16 => encode_base16_simd(data, dict),
387 32 => encode_base32_simd(data, dict),
388 64 => encode_base64_simd(data, dict),
389 256 => encode_base256_simd(data, dict),
390 _ => None,
391 }
392}
393
394#[cfg(all(feature = "simd", not(target_arch = "x86_64")))]
395fn encode_specialized(_data: &[u8], _dict: &Dictionary) -> Option<String> {
396 None
398}
399
400#[cfg(not(feature = "simd"))]
401fn encode_specialized(_data: &[u8], _dict: &Dictionary) -> Option<String> {
402 None
403}
404
405#[cfg(all(feature = "simd", target_arch = "x86_64"))]
407fn decode_specialized(encoded: &str, dict: &Dictionary) -> Option<Vec<u8>> {
408 use crate::simd::{
409 decode_base16_simd, decode_base32_simd, decode_base64_simd, decode_base256_simd,
410 };
411
412 match dict.base() {
413 16 => decode_base16_simd(encoded, dict),
414 32 => decode_base32_simd(encoded, dict),
415 64 => decode_base64_simd(encoded, dict),
416 256 => decode_base256_simd(encoded, dict),
417 _ => None,
418 }
419}
420
421#[cfg(all(feature = "simd", not(target_arch = "x86_64")))]
422fn decode_specialized(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
423 None
424}
425
426#[cfg(not(feature = "simd"))]
427fn decode_specialized(_encoded: &str, _dict: &Dictionary) -> Option<Vec<u8>> {
428 None
429}
430
431#[cfg(test)]
432mod tests {
433 use super::*;
434 use crate::DictionaryRegistry;
435
436 fn get_test_dict(name: &str) -> Dictionary {
437 let config = DictionaryRegistry::load_default().unwrap();
438 let dict_config = config.get_dictionary(name).unwrap();
439 let chars: Vec<char> = dict_config.effective_chars().unwrap().chars().collect();
440 let padding = dict_config.padding.as_ref().and_then(|s| s.chars().next());
441 let mut builder = Dictionary::builder()
442 .chars(chars)
443 .mode(dict_config.effective_mode());
444 if let Some(p) = padding {
445 builder = builder.padding(p);
446 }
447 builder.build().unwrap()
448 }
449
450 #[test]
451 fn test_platform_detection() {
452 let info = PlatformInfo::detect();
453 assert!(!info.arch.is_empty());
454 println!("Platform: {}", info.display());
455 }
456
457 #[test]
458 fn test_path_detection_base64() {
459 let dict = get_test_dict("base64");
460 let paths = detect_available_paths(&dict);
461
462 assert!(paths.contains(&EncodingPath::Scalar));
463 #[cfg(feature = "simd")]
464 {
465 assert!(
466 paths.contains(&EncodingPath::Lut) || paths.contains(&EncodingPath::Specialized)
467 );
468 }
469 }
470
471 #[test]
472 fn test_scalar_round_trip() {
473 let dict = get_test_dict("base64");
474 let data = b"Hello, World!";
475
476 let encoded = encode_with_path(data, &dict, EncodingPath::Scalar).unwrap();
477 let decoded = decode_with_path(&encoded, &dict, EncodingPath::Scalar).unwrap();
478
479 assert_eq!(&decoded[..], &data[..]);
480 }
481
482 #[test]
483 fn test_paths_produce_same_output() {
484 let dict = get_test_dict("base64");
485 let data = b"The quick brown fox jumps over the lazy dog";
486 let paths = detect_available_paths(&dict);
487
488 let mut results: Vec<(EncodingPath, String)> = Vec::new();
489 for path in &paths {
490 if let Some(encoded) = encode_with_path(data, &dict, *path) {
491 results.push((*path, encoded));
492 }
493 }
494
495 let scalar_result = results.iter().find(|(p, _)| *p == EncodingPath::Scalar);
498 if let Some((_, scalar_encoded)) = scalar_result {
499 let scalar_stripped = scalar_encoded.trim_end_matches('=');
500 for (path, encoded) in &results {
501 if *path != EncodingPath::Scalar {
502 let stripped = encoded.trim_end_matches('=');
503 assert_eq!(
504 scalar_stripped, stripped,
505 "{:?} output differs from Scalar (ignoring padding)",
506 path
507 );
508 }
509 }
510 }
511 }
512}