Skip to main content

base58_turbo/
lib.rs

1//! # Base58 Turbo
2//!
3//! [![Crates.io](https://img.shields.io/crates/v/base58-turbo.svg)](https://crates.io/crates/base58-turbo)
4//! [![Documentation](https://docs.rs/base58-turbo/badge.svg)](https://docs.rs/base58-turbo)
5//! [![License](https://img.shields.io/github/license/hacer-bark/base58-turbo)](https://github.com/hacer-bark/base58-turbo/blob/main/LICENSE)
6//! [![MIRI Verified](https://img.shields.io/github/actions/workflow/status/hacer-bark/base58-turbo/miri.yml?label=MIRI%20Verified)](https://github.com/hacer-bark/base58-turbo/actions/workflows/miri.yml)
7//! [![Logic Tests](https://img.shields.io/github/actions/workflow/status/hacer-bark/base58-turbo/tests.yml?label=Logic%20Tests)](https://github.com/hacer-bark/base58-turbo/actions/workflows/tests.yml)
8//!
9//! A high-performance Base58 encoder/decoder for Rust, optimized for high-throughput systems.
10//!
11//! This crate provides highly optimized scalar kernels for encoding and decoding,
12//! supporting `no_std` environments and zero-allocation processing.
13//!
14//! ## Usage
15//!
16//! Add this to your `Cargo.toml`:
17//!
18//! ```toml
19//! [dependencies]
20//! base58-turbo = "0.1"
21//! ```
22//!
23//! ### Basic API (Allocating)
24//!
25//! Standard usage for general applications. Requires the `std` feature (enabled by default).
26//!
27//! ```rust
28//! use base58_turbo::BITCOIN;
29//!
30//! let data = b"Hello World";
31//! let encoded = BITCOIN.encode(data).unwrap();
32//! assert_eq!(encoded, "JxF12TrwUP45BMd");
33//!
34//! let decoded = BITCOIN.decode(&encoded).unwrap();
35//! assert_eq!(decoded, data);
36//! ```
37//!
38//! ### Zero-Allocation API (Slice-based)
39//!
40//! For low-latency scenarios or `no_std` environments where heap allocation is undesirable.
41//! These methods write directly into a user-provided mutable slice.
42//!
43//! ```rust
44//! use base58_turbo::BITCOIN;
45//!
46//! let data = b"Hello World";
47//! let mut output = [0u8; 32];
48//!
49//! let len = BITCOIN.encode_into(data, &mut output).unwrap();
50//! let encoded = std::str::from_utf8(&output[..len]).unwrap();
51//! assert_eq!(encoded, "JxF12TrwUP45BMd");
52//! ```
53//!
54//! ## Feature Flags
55//!
56//! This crate is lightweight and configurable via Cargo features:
57//!
58//! | Feature | Default | Description |
59//! |---------|---------|-------------|
60//! | **`serde`** | **No** | Enables `serde` serialization/deserialization for Config and Engine. |
61//! | **`std`** | **Yes** | Enables `String` and `Vec` support. Disable this for `no_std` environments. |
62//!
63//! ## Safety & Verification
64//!
65//! This crate utilizes `unsafe` code for pointer arithmetic and optimized kernels to achieve maximum performance.
66//!
67//! *   **MIRI Tests:** Core logic and fallbacks are verified with **MIRI** (Undefined Behavior checker) in CI.
68//! *   **MSan Audited:** MemorySanitizer confirms no logic is ever performed on uninitialized memory.
69//! *   **Fuzzing:** The codebase is continuously fuzz-tested via `cargo-fuzz`.
70//!
71//! **[Learn More](https://github.com/hacer-bark/base58-turbo/blob/main/docs/verification.md)**: Details on our threat model and strict verification strategy.
72
73#![cfg_attr(not(any(feature = "std", test)), no_std)]
74#![doc(issue_tracker_base_url = "https://github.com/hacer-bark/base58-turbo/issues/")]
75#![deny(unsafe_op_in_unsafe_fn)]
76#![warn(missing_docs)]
77#![warn(rust_2018_idioms)]
78#![warn(unused_qualifications)]
79#![cfg_attr(docsrs, feature(doc_cfg))]
80
81// Use `serde` when enabled
82#[cfg(feature = "serde")]
83pub mod serde;
84
85mod decode;
86mod encode;
87use decode::decode_slice_unsafe;
88use encode::encode_slice_unsafe;
89
90// ======================================================================
91// Errors
92// ======================================================================
93
94/// Errors that can occur during Base58 encoding or decoding operations or alphabet creation.
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum Error {
97    /// An invalid character was encountered (not in the alphabet).
98    InvalidCharacter,
99    /// The output buffer is too small to hold the result.
100    BufferTooSmall,
101    /// The input data is too big to process. Limit is 1024 bytes (encode) or 2048 bytes (decode).
102    InputTooBig,
103    /// The input alphabet has duplicate chars.
104    WrongAlphabet,
105}
106
107impl core::fmt::Display for Error {
108    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
109        match self {
110            Error::InvalidCharacter => write!(f, "invalid character in base58 string"),
111            Error::BufferTooSmall => write!(f, "output buffer too small"),
112            Error::InputTooBig => write!(f, "input data too big"),
113            Error::WrongAlphabet => write!(f, "input alphabet has duplicate chars"),
114        }
115    }
116}
117
118// Enable std::error::Error trait when the 'std' feature is active
119#[cfg(feature = "std")]
120impl std::error::Error for Error {}
121
122// ======================================================================
123// Configuration & Types
124// ======================================================================
125
126/// Internal configuration containing pre-computed tables for an alphabet.
127#[derive(Debug, Clone, Copy)]
128pub struct Config {
129    /// Alphabet of chars for encoding and decoding.
130    pub alphabet: [u8; 58],
131    /// Pre-computed map of values for decoding.
132    pub decode_map: [u8; 256],
133    /// Pre-computed LUT of squared values for encoding.
134    pub lut_58_squared: [u16; 3364],
135}
136
137impl Config {
138    /// Creates a new configuration from a 58-byte alphabet.
139    /// Checks that all characters are unique.
140    pub const fn new(alphabet: &[u8; 58]) -> Result<Self, Error> {
141        // 1. Generate Decode Map & Check Uniqueness
142        let mut map = [255u8; 256];
143        let mut i = 0;
144
145        while i < 58 {
146            let byte = alphabet[i];
147
148            // Uniqueness Check:
149            // If the map position is not 255, it means we already saw this byte.
150            if map[byte as usize] != 255 {
151                return Err(Error::WrongAlphabet);
152            }
153
154            map[byte as usize] = i as u8;
155            i += 1;
156        }
157
158        // 2. Return valid Config
159        Ok(Self {
160            alphabet: *alphabet,
161            decode_map: map,
162            lut_58_squared: gen_lut_squared(alphabet),
163        })
164    }
165}
166
167/// A Base58 Encoder/Decoder Engine.
168#[derive(Debug, Clone, Copy)]
169pub struct Engine {
170    config: Config,
171}
172
173// 2. Add manual Serde implementations underneath
174#[cfg(feature = "serde")]
175impl ::serde::Serialize for Config {
176    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
177    where
178        S: ::serde::Serializer,
179    {
180        // The alphabet is guaranteed valid ASCII/UTF-8 by Config::new checks.
181        // Serializing it as a string makes it clean in JSON/TOML.
182        let alpha_str =
183            core::str::from_utf8(&self.alphabet).map_err(::serde::ser::Error::custom)?;
184        serializer.serialize_str(alpha_str)
185    }
186}
187
188#[cfg(feature = "serde")]
189impl<'de> ::serde::Deserialize<'de> for Config {
190    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
191    where
192        D: ::serde::Deserializer<'de>,
193    {
194        struct AlphabetVisitor;
195
196        impl<'de> ::serde::de::Visitor<'de> for AlphabetVisitor {
197            type Value = Config;
198
199            fn expecting(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
200                formatter.write_str("a 58-character Base58 alphabet string")
201            }
202
203            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
204            where
205                E: ::serde::de::Error,
206            {
207                let bytes = v.as_bytes();
208                if bytes.len() != 58 {
209                    return Err(E::custom("expected exactly 58-byte alphabet"));
210                }
211
212                let mut alpha = [0u8; 58];
213                alpha.copy_from_slice(bytes);
214
215                // Re-calculate the LUTs and Maps automatically
216                Config::new(&alpha).map_err(E::custom)
217            }
218        }
219
220        deserializer.deserialize_str(AlphabetVisitor)
221    }
222}
223
224#[cfg(feature = "serde")]
225impl ::serde::Serialize for Engine {
226    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
227    where
228        S: ::serde::Serializer,
229    {
230        self.config.serialize(serializer)
231    }
232}
233
234#[cfg(feature = "serde")]
235impl<'de> ::serde::Deserialize<'de> for Engine {
236    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
237    where
238        D: ::serde::Deserializer<'de>,
239    {
240        Config::deserialize(deserializer).map(|config| Engine { config })
241    }
242}
243
244// ======================================================================
245// Pre-defined Engines
246// ======================================================================
247
248/// Standard Bitcoin Base58 Engine.
249pub const BITCOIN: Engine =
250    match Engine::new(b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz") {
251        Ok(e) => e,
252        Err(_) => panic!("Invalid Bitcoin alphabet definition"),
253    };
254
255/// Monero Base58 Engine.
256pub const MONERO: Engine =
257    match Engine::new(b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz") {
258        Ok(e) => e,
259        Err(_) => panic!("Invalid Monero alphabet definition"),
260    };
261
262/// Ripple Base58 Engine.
263pub const RIPPLE: Engine =
264    match Engine::new(b"rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz") {
265        Ok(e) => e,
266        Err(_) => panic!("Invalid Ripple alphabet definition"),
267    };
268
269/// Flickr Base58 Engine.
270pub const FLICKR: Engine =
271    match Engine::new(b"123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ") {
272        Ok(e) => e,
273        Err(_) => panic!("Invalid Flickr alphabet definition"),
274    };
275
276// ======================================================================
277// Const Table Generators
278// ======================================================================
279
280const fn gen_lut_squared(alphabet: &[u8; 58]) -> [u16; 3364] {
281    let mut table = [0u16; 3364];
282    let mut i = 0;
283    while i < 3364 {
284        let c1 = alphabet[i / 58];
285        let c2 = alphabet[i % 58];
286        // Store as Big Endian u16 for direct memory write
287        table[i] = ((c1 as u16) << 8) | (c2 as u16);
288        i += 1;
289    }
290    table
291}
292
293// ======================================================================
294// Engine Implementation
295// ======================================================================
296
297impl Engine {
298    /// Constructs a new Engine with a custom alphabet.
299    /// Returns Error::WrongAlphabet if the alphabet contains duplicates.
300    pub const fn new(alphabet: &[u8; 58]) -> Result<Self, Error> {
301        match Config::new(alphabet) {
302            Ok(c) => Ok(Self { config: c }),
303            Err(e) => Err(e),
304        }
305    }
306
307    /// Returns the internal configuration.
308    #[inline(always)]
309    pub const fn config(&self) -> &Config {
310        &self.config
311    }
312
313    // ======================================================================
314    // Length Calculators
315    // ======================================================================
316
317    /// Returns the maximum possible length of the encoded data.
318    /// Base58 expansion is ~137%. We add padding for safety.
319    #[inline]
320    #[must_use]
321    pub const fn encoded_len(&self, input_len: usize) -> usize {
322        (input_len.saturating_mul(137) / 100).saturating_add(1)
323    }
324
325    /// Returns the maximum possible length of the decoded data.
326    /// Base58 '1's map 1:1 to bytes. We cannot assume compression.
327    /// The worst-case decoded size is equal to the input string length.
328    #[inline]
329    #[must_use]
330    pub const fn decoded_len(&self, input_len: usize) -> usize {
331        input_len
332    }
333
334    // ======================================================================
335    // Zero-Allocation APIs
336    // ======================================================================
337
338    /// Encodes `input` into the `output` buffer.
339    /// Returns the actual number of bytes written.
340    #[inline]
341    pub fn encode_into<T: AsRef<[u8]>>(&self, input: T, output: &mut [u8]) -> Result<usize, Error> {
342        let input = input.as_ref();
343        if input.is_empty() {
344            return Ok(0);
345        }
346        if input.len() > 1024 {
347            return Err(Error::InputTooBig);
348        }
349
350        let req_len = self.encoded_len(input.len());
351        if output.len() < req_len {
352            return Err(Error::BufferTooSmall);
353        }
354
355        // SAFETY:
356        // 1. We checked output has sufficient capacity above.
357        // 2. We assume `encode_slice_unsafe` respects the pointer limits.
358        // 3. We assume `encode_slice_unsafe` uses `self.config` for the alphabet.
359        let actual_len = unsafe { encode_slice_unsafe(input, output.as_mut_ptr(), &self.config) };
360
361        Ok(actual_len)
362    }
363
364    /// Decodes `input` into the `output` buffer.
365    /// Returns the actual number of bytes written.
366    #[inline]
367    pub fn decode_into<T: AsRef<[u8]>>(&self, input: T, output: &mut [u8]) -> Result<usize, Error> {
368        let input = input.as_ref();
369        if input.is_empty() {
370            return Ok(0);
371        }
372        if input.len() > 2048 {
373            return Err(Error::InputTooBig);
374        }
375
376        // While decoding implies shrinking, we must ensure buffer is enough for the worst case.
377        // However, standard usage usually provides a buffer size == input size or calculated decoded_len.
378        // The safest check is:
379        let req_len = self.decoded_len(input.len());
380        if output.len() < req_len {
381            return Err(Error::BufferTooSmall);
382        }
383
384        // SAFETY:
385        // 1. `decode_slice_unsafe` performs bounds checks internally or logic ensures it.
386        // 2. We pass the slice `output` via mutable reference, guaranteeing validity.
387        unsafe { decode_slice_unsafe(input, output, &self.config) }
388    }
389
390    // ========================================================================
391    // Allocating APIs (std)
392    // ========================================================================
393
394    /// Encodes `input` into the newly allocated `String`.
395    /// Returns the `String`.
396    #[inline]
397    #[cfg(feature = "std")]
398    pub fn encode<T: AsRef<[u8]>>(&self, input: T) -> Result<String, Error> {
399        let input = input.as_ref();
400        if input.is_empty() {
401            return Ok(String::new());
402        }
403        if input.len() > 1024 {
404            return Err(Error::InputTooBig);
405        }
406
407        let max_len = self.encoded_len(input.len());
408        let mut out = Vec::with_capacity(max_len);
409
410        // SAFETY:
411        // We set the length to `max_len` to allow the unsafe kernel to write into the uninitialized capacity.
412        // We MUST successfully overwrite or truncate this before returning.
413        #[allow(clippy::uninit_vec)]
414        unsafe {
415            out.set_len(max_len);
416        }
417
418        match self.encode_into(input, &mut out) {
419            Ok(actual_len) => {
420                // SAFETY: The kernel reported `actual_len` bytes were written.
421                // Truncate the vector to remove the remaining uninitialized tail.
422                unsafe {
423                    out.set_len(actual_len);
424                }
425
426                // SAFETY: Base58 is always valid ASCII, which is valid UTF-8.
427                unsafe { Ok(String::from_utf8_unchecked(out)) }
428            }
429            Err(_) => {
430                // This branch should technically be unreachable if `encoded_len` is correct
431                // and `Vec::with_capacity` succeeded.
432                // Prevent returning uninitialized memory if logic fails.
433                unsafe {
434                    out.set_len(0);
435                }
436                panic!("Base58 encoding failed due to insufficient buffer (logic error).");
437            }
438        }
439    }
440
441    /// Decodes `input` into the newly allocated `Vec<u8>`.
442    /// Returns the `Vec<u8>`.
443    #[inline]
444    #[cfg(feature = "std")]
445    pub fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, Error> {
446        let input = input.as_ref();
447        if input.is_empty() {
448            return Ok(Vec::new());
449        }
450        if input.len() > 2048 {
451            return Err(Error::InputTooBig);
452        }
453
454        let max_len = self.decoded_len(input.len());
455        let mut out = Vec::with_capacity(max_len);
456
457        // SAFETY: Expose uninitialized buffer to the decoder.
458        #[allow(clippy::uninit_vec)]
459        unsafe {
460            out.set_len(max_len);
461        }
462
463        match self.decode_into(input, &mut out) {
464            Ok(actual_len) => {
465                // SAFETY: Success. Truncate to actual size.
466                unsafe {
467                    out.set_len(actual_len);
468                }
469                Ok(out)
470            }
471            Err(e) => {
472                // SAFETY: Failure. Clear length to prevent access to junk data.
473                unsafe {
474                    out.set_len(0);
475                }
476                Err(e)
477            }
478        }
479    }
480}
481
482#[cfg(all(test, miri))]
483mod lib_miri_coverage {
484    use super::*;
485
486    #[test]
487    fn miri_engine_lifecycle() {
488        let alphabet = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
489        let engine = Engine::new(alphabet).unwrap();
490
491        let data = b"Miri Test Data";
492        let encoded = engine.encode(data).unwrap();
493        let decoded = engine.decode(&encoded).unwrap();
494
495        assert_eq!(data, decoded.as_slice());
496    }
497
498    #[test]
499    fn miri_all_predefined_engines() {
500        let engines = [BITCOIN, MONERO, RIPPLE, FLICKR];
501        let data = b"test";
502        for engine in engines {
503            let encoded = engine.encode(data).unwrap();
504            let decoded = engine.decode(&encoded).unwrap();
505            assert_eq!(data, decoded.as_slice());
506        }
507    }
508
509    #[test]
510    fn miri_config_errors() {
511        let alphabet = [b'a'; 58];
512        // Duplicate chars should fail
513        assert!(Config::new(&alphabet).is_err());
514    }
515}