base58_turbo/lib.rs
1//! # Base58 Turbo
2//!
3//! [](https://crates.io/crates/base58-turbo)
4//! [](https://docs.rs/base58-turbo)
5//! [](https://github.com/hacer-bark/base58-turbo/blob/main/LICENSE)
6//! [](https://github.com/hacer-bark/base58-turbo/actions/workflows/miri.yml)
7//! [](https://github.com/hacer-bark/base58-turbo/actions/workflows/tests.yml)
8//!
9//! A high-performance Base58 encoder/decoder for Rust, optimized for high-throughput systems.
10//!
11//! This crate provides highly optimized scalar kernels for encoding and decoding,
12//! supporting `no_std` environments and zero-allocation processing.
13//!
14//! ## Usage
15//!
16//! Add this to your `Cargo.toml`:
17//!
18//! ```toml
19//! [dependencies]
20//! base58-turbo = "0.1"
21//! ```
22//!
23//! ### Basic API (Allocating)
24//!
25//! Standard usage for general applications. Requires the `std` feature (enabled by default).
26//!
27//! ```rust
28//! use base58_turbo::BITCOIN;
29//!
30//! let data = b"Hello World";
31//! let encoded = BITCOIN.encode(data).unwrap();
32//! assert_eq!(encoded, "JxF12TrwUP45BMd");
33//!
34//! let decoded = BITCOIN.decode(&encoded).unwrap();
35//! assert_eq!(decoded, data);
36//! ```
37//!
38//! ### Zero-Allocation API (Slice-based)
39//!
40//! For low-latency scenarios or `no_std` environments where heap allocation is undesirable.
41//! These methods write directly into a user-provided mutable slice.
42//!
43//! ```rust
44//! use base58_turbo::BITCOIN;
45//!
46//! let data = b"Hello World";
47//! let mut output = [0u8; 32];
48//!
49//! let len = BITCOIN.encode_into(data, &mut output).unwrap();
50//! let encoded = std::str::from_utf8(&output[..len]).unwrap();
51//! assert_eq!(encoded, "JxF12TrwUP45BMd");
52//! ```
53//!
54//! ## Feature Flags
55//!
56//! This crate is lightweight and configurable via Cargo features:
57//!
58//! | Feature | Default | Description |
59//! |---------|---------|-------------|
60//! | **`serde`** | **No** | Enables `serde` serialization/deserialization for Config and Engine. |
61//! | **`std`** | **Yes** | Enables `String` and `Vec` support. Disable this for `no_std` environments. |
62//!
63//! ## Safety & Verification
64//!
65//! This crate utilizes `unsafe` code for pointer arithmetic and optimized kernels to achieve maximum performance.
66//!
67//! * **MIRI Tests:** Core logic and fallbacks are verified with **MIRI** (Undefined Behavior checker) in CI.
68//! * **MSan Audited:** MemorySanitizer confirms no logic is ever performed on uninitialized memory.
69//! * **Fuzzing:** The codebase is continuously fuzz-tested via `cargo-fuzz`.
70//!
71//! **[Learn More](https://github.com/hacer-bark/base58-turbo/blob/main/docs/verification.md)**: Details on our threat model and strict verification strategy.
72
73#![cfg_attr(not(any(feature = "std", test)), no_std)]
74#![doc(issue_tracker_base_url = "https://github.com/hacer-bark/base58-turbo/issues/")]
75#![deny(unsafe_op_in_unsafe_fn)]
76#![warn(missing_docs)]
77#![warn(rust_2018_idioms)]
78#![warn(unused_qualifications)]
79#![cfg_attr(docsrs, feature(doc_cfg))]
80
81// Use `serde` when enabled
82#[cfg(feature = "serde")]
83pub mod serde;
84
85mod decode;
86mod encode;
87use decode::decode_slice_unsafe;
88use encode::encode_slice_unsafe;
89
90// ======================================================================
91// Errors
92// ======================================================================
93
94/// Errors that can occur during Base58 encoding or decoding operations or alphabet creation.
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum Error {
97 /// An invalid character was encountered (not in the alphabet).
98 InvalidCharacter,
99 /// The output buffer is too small to hold the result.
100 BufferTooSmall,
101 /// The input data is too big to process. Limit is 1024 bytes (encode) or 2048 bytes (decode).
102 InputTooBig,
103 /// The input alphabet has duplicate chars.
104 WrongAlphabet,
105}
106
107impl core::fmt::Display for Error {
108 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
109 match self {
110 Error::InvalidCharacter => write!(f, "invalid character in base58 string"),
111 Error::BufferTooSmall => write!(f, "output buffer too small"),
112 Error::InputTooBig => write!(f, "input data too big"),
113 Error::WrongAlphabet => write!(f, "input alphabet has duplicate chars"),
114 }
115 }
116}
117
118// Enable std::error::Error trait when the 'std' feature is active
119#[cfg(feature = "std")]
120impl std::error::Error for Error {}
121
122// ======================================================================
123// Configuration & Types
124// ======================================================================
125
126/// Internal configuration containing pre-computed tables for an alphabet.
127#[derive(Debug, Clone, Copy)]
128pub struct Config {
129 /// Alphabet of chars for encoding and decoding.
130 pub alphabet: [u8; 58],
131 /// Pre-computed map of values for decoding.
132 pub decode_map: [u8; 256],
133 /// Pre-computed LUT of squared values for encoding.
134 pub lut_58_squared: [u16; 3364],
135}
136
137impl Config {
138 /// Creates a new configuration from a 58-byte alphabet.
139 /// Checks that all characters are unique.
140 pub const fn new(alphabet: &[u8; 58]) -> Result<Self, Error> {
141 // 1. Generate Decode Map & Check Uniqueness
142 let mut map = [255u8; 256];
143 let mut i = 0;
144
145 while i < 58 {
146 let byte = alphabet[i];
147
148 // Uniqueness Check:
149 // If the map position is not 255, it means we already saw this byte.
150 if map[byte as usize] != 255 {
151 return Err(Error::WrongAlphabet);
152 }
153
154 map[byte as usize] = i as u8;
155 i += 1;
156 }
157
158 // 2. Return valid Config
159 Ok(Self {
160 alphabet: *alphabet,
161 decode_map: map,
162 lut_58_squared: gen_lut_squared(alphabet),
163 })
164 }
165}
166
167/// A Base58 Encoder/Decoder Engine.
168#[derive(Debug, Clone, Copy)]
169pub struct Engine {
170 config: Config,
171}
172
173// 2. Add manual Serde implementations underneath
174#[cfg(feature = "serde")]
175impl ::serde::Serialize for Config {
176 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
177 where
178 S: ::serde::Serializer,
179 {
180 // The alphabet is guaranteed valid ASCII/UTF-8 by Config::new checks.
181 // Serializing it as a string makes it clean in JSON/TOML.
182 let alpha_str =
183 core::str::from_utf8(&self.alphabet).map_err(::serde::ser::Error::custom)?;
184 serializer.serialize_str(alpha_str)
185 }
186}
187
188#[cfg(feature = "serde")]
189impl<'de> ::serde::Deserialize<'de> for Config {
190 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
191 where
192 D: ::serde::Deserializer<'de>,
193 {
194 struct AlphabetVisitor;
195
196 impl<'de> ::serde::de::Visitor<'de> for AlphabetVisitor {
197 type Value = Config;
198
199 fn expecting(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
200 formatter.write_str("a 58-character Base58 alphabet string")
201 }
202
203 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
204 where
205 E: ::serde::de::Error,
206 {
207 let bytes = v.as_bytes();
208 if bytes.len() != 58 {
209 return Err(E::custom("expected exactly 58-byte alphabet"));
210 }
211
212 let mut alpha = [0u8; 58];
213 alpha.copy_from_slice(bytes);
214
215 // Re-calculate the LUTs and Maps automatically
216 Config::new(&alpha).map_err(E::custom)
217 }
218 }
219
220 deserializer.deserialize_str(AlphabetVisitor)
221 }
222}
223
224#[cfg(feature = "serde")]
225impl ::serde::Serialize for Engine {
226 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
227 where
228 S: ::serde::Serializer,
229 {
230 self.config.serialize(serializer)
231 }
232}
233
234#[cfg(feature = "serde")]
235impl<'de> ::serde::Deserialize<'de> for Engine {
236 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
237 where
238 D: ::serde::Deserializer<'de>,
239 {
240 Config::deserialize(deserializer).map(|config| Engine { config })
241 }
242}
243
244// ======================================================================
245// Pre-defined Engines
246// ======================================================================
247
248/// Standard Bitcoin Base58 Engine.
249pub const BITCOIN: Engine =
250 match Engine::new(b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz") {
251 Ok(e) => e,
252 Err(_) => panic!("Invalid Bitcoin alphabet definition"),
253 };
254
255/// Monero Base58 Engine.
256pub const MONERO: Engine =
257 match Engine::new(b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz") {
258 Ok(e) => e,
259 Err(_) => panic!("Invalid Monero alphabet definition"),
260 };
261
262/// Ripple Base58 Engine.
263pub const RIPPLE: Engine =
264 match Engine::new(b"rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz") {
265 Ok(e) => e,
266 Err(_) => panic!("Invalid Ripple alphabet definition"),
267 };
268
269/// Flickr Base58 Engine.
270pub const FLICKR: Engine =
271 match Engine::new(b"123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ") {
272 Ok(e) => e,
273 Err(_) => panic!("Invalid Flickr alphabet definition"),
274 };
275
276// ======================================================================
277// Const Table Generators
278// ======================================================================
279
280const fn gen_lut_squared(alphabet: &[u8; 58]) -> [u16; 3364] {
281 let mut table = [0u16; 3364];
282 let mut i = 0;
283 while i < 3364 {
284 let c1 = alphabet[i / 58];
285 let c2 = alphabet[i % 58];
286 // Store as Big Endian u16 for direct memory write
287 table[i] = ((c1 as u16) << 8) | (c2 as u16);
288 i += 1;
289 }
290 table
291}
292
293// ======================================================================
294// Engine Implementation
295// ======================================================================
296
297impl Engine {
298 /// Constructs a new Engine with a custom alphabet.
299 /// Returns Error::WrongAlphabet if the alphabet contains duplicates.
300 pub const fn new(alphabet: &[u8; 58]) -> Result<Self, Error> {
301 match Config::new(alphabet) {
302 Ok(c) => Ok(Self { config: c }),
303 Err(e) => Err(e),
304 }
305 }
306
307 /// Returns the internal configuration.
308 #[inline(always)]
309 pub const fn config(&self) -> &Config {
310 &self.config
311 }
312
313 // ======================================================================
314 // Length Calculators
315 // ======================================================================
316
317 /// Returns the maximum possible length of the encoded data.
318 /// Base58 expansion is ~137%. We add padding for safety.
319 #[inline]
320 #[must_use]
321 pub const fn encoded_len(&self, input_len: usize) -> usize {
322 (input_len.saturating_mul(137) / 100).saturating_add(1)
323 }
324
325 /// Returns the maximum possible length of the decoded data.
326 /// Base58 '1's map 1:1 to bytes. We cannot assume compression.
327 /// The worst-case decoded size is equal to the input string length.
328 #[inline]
329 #[must_use]
330 pub const fn decoded_len(&self, input_len: usize) -> usize {
331 input_len
332 }
333
334 // ======================================================================
335 // Zero-Allocation APIs
336 // ======================================================================
337
338 /// Encodes `input` into the `output` buffer.
339 /// Returns the actual number of bytes written.
340 #[inline]
341 pub fn encode_into<T: AsRef<[u8]>>(&self, input: T, output: &mut [u8]) -> Result<usize, Error> {
342 let input = input.as_ref();
343 if input.is_empty() {
344 return Ok(0);
345 }
346 if input.len() > 1024 {
347 return Err(Error::InputTooBig);
348 }
349
350 let req_len = self.encoded_len(input.len());
351 if output.len() < req_len {
352 return Err(Error::BufferTooSmall);
353 }
354
355 // SAFETY:
356 // 1. We checked output has sufficient capacity above.
357 // 2. We assume `encode_slice_unsafe` respects the pointer limits.
358 // 3. We assume `encode_slice_unsafe` uses `self.config` for the alphabet.
359 let actual_len = unsafe { encode_slice_unsafe(input, output.as_mut_ptr(), &self.config) };
360
361 Ok(actual_len)
362 }
363
364 /// Decodes `input` into the `output` buffer.
365 /// Returns the actual number of bytes written.
366 #[inline]
367 pub fn decode_into<T: AsRef<[u8]>>(&self, input: T, output: &mut [u8]) -> Result<usize, Error> {
368 let input = input.as_ref();
369 if input.is_empty() {
370 return Ok(0);
371 }
372 if input.len() > 2048 {
373 return Err(Error::InputTooBig);
374 }
375
376 // While decoding implies shrinking, we must ensure buffer is enough for the worst case.
377 // However, standard usage usually provides a buffer size == input size or calculated decoded_len.
378 // The safest check is:
379 let req_len = self.decoded_len(input.len());
380 if output.len() < req_len {
381 return Err(Error::BufferTooSmall);
382 }
383
384 // SAFETY:
385 // 1. `decode_slice_unsafe` performs bounds checks internally or logic ensures it.
386 // 2. We pass the slice `output` via mutable reference, guaranteeing validity.
387 unsafe { decode_slice_unsafe(input, output, &self.config) }
388 }
389
390 // ========================================================================
391 // Allocating APIs (std)
392 // ========================================================================
393
394 /// Encodes `input` into the newly allocated `String`.
395 /// Returns the `String`.
396 #[inline]
397 #[cfg(feature = "std")]
398 pub fn encode<T: AsRef<[u8]>>(&self, input: T) -> Result<String, Error> {
399 let input = input.as_ref();
400 if input.is_empty() {
401 return Ok(String::new());
402 }
403 if input.len() > 1024 {
404 return Err(Error::InputTooBig);
405 }
406
407 let max_len = self.encoded_len(input.len());
408 let mut out = Vec::with_capacity(max_len);
409
410 // SAFETY:
411 // We set the length to `max_len` to allow the unsafe kernel to write into the uninitialized capacity.
412 // We MUST successfully overwrite or truncate this before returning.
413 #[allow(clippy::uninit_vec)]
414 unsafe {
415 out.set_len(max_len);
416 }
417
418 match self.encode_into(input, &mut out) {
419 Ok(actual_len) => {
420 // SAFETY: The kernel reported `actual_len` bytes were written.
421 // Truncate the vector to remove the remaining uninitialized tail.
422 unsafe {
423 out.set_len(actual_len);
424 }
425
426 // SAFETY: Base58 is always valid ASCII, which is valid UTF-8.
427 unsafe { Ok(String::from_utf8_unchecked(out)) }
428 }
429 Err(_) => {
430 // This branch should technically be unreachable if `encoded_len` is correct
431 // and `Vec::with_capacity` succeeded.
432 // Prevent returning uninitialized memory if logic fails.
433 unsafe {
434 out.set_len(0);
435 }
436 panic!("Base58 encoding failed due to insufficient buffer (logic error).");
437 }
438 }
439 }
440
441 /// Decodes `input` into the newly allocated `Vec<u8>`.
442 /// Returns the `Vec<u8>`.
443 #[inline]
444 #[cfg(feature = "std")]
445 pub fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, Error> {
446 let input = input.as_ref();
447 if input.is_empty() {
448 return Ok(Vec::new());
449 }
450 if input.len() > 2048 {
451 return Err(Error::InputTooBig);
452 }
453
454 let max_len = self.decoded_len(input.len());
455 let mut out = Vec::with_capacity(max_len);
456
457 // SAFETY: Expose uninitialized buffer to the decoder.
458 #[allow(clippy::uninit_vec)]
459 unsafe {
460 out.set_len(max_len);
461 }
462
463 match self.decode_into(input, &mut out) {
464 Ok(actual_len) => {
465 // SAFETY: Success. Truncate to actual size.
466 unsafe {
467 out.set_len(actual_len);
468 }
469 Ok(out)
470 }
471 Err(e) => {
472 // SAFETY: Failure. Clear length to prevent access to junk data.
473 unsafe {
474 out.set_len(0);
475 }
476 Err(e)
477 }
478 }
479 }
480}
481
482#[cfg(all(test, miri))]
483mod lib_miri_coverage {
484 use super::*;
485
486 #[test]
487 fn miri_engine_lifecycle() {
488 let alphabet = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
489 let engine = Engine::new(alphabet).unwrap();
490
491 let data = b"Miri Test Data";
492 let encoded = engine.encode(data).unwrap();
493 let decoded = engine.decode(&encoded).unwrap();
494
495 assert_eq!(data, decoded.as_slice());
496 }
497
498 #[test]
499 fn miri_all_predefined_engines() {
500 let engines = [BITCOIN, MONERO, RIPPLE, FLICKR];
501 let data = b"test";
502 for engine in engines {
503 let encoded = engine.encode(data).unwrap();
504 let decoded = engine.decode(&encoded).unwrap();
505 assert_eq!(data, decoded.as_slice());
506 }
507 }
508
509 #[test]
510 fn miri_config_errors() {
511 let alphabet = [b'a'; 58];
512 // Duplicate chars should fail
513 assert!(Config::new(&alphabet).is_err());
514 }
515}