base_d/
lib.rs

1//! # base-d
2//!
3//! A universal, multi-dictionary encoding library for Rust.
4//!
5//! Encode binary data using numerous dictionaries including RFC standards, ancient scripts,
6//! emoji, playing cards, and more. Supports three encoding modes: radix (true base
7//! conversion), RFC 4648 chunked encoding, and direct byte-range mapping.
8//!
9//! ## Quick Start
10//!
11//! ```
12//! use base_d::{DictionaryRegistry, Dictionary, encode, decode};
13//!
14//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
15//! // Load built-in dictionaries
16//! let config = DictionaryRegistry::load_default()?;
17//! let base64_config = config.get_dictionary("base64").unwrap();
18//!
19//! // Create dictionary
20//! let chars: Vec<char> = base64_config.chars.chars().collect();
21//! let padding = base64_config.padding.as_ref().and_then(|s| s.chars().next());
22//! let mut builder = Dictionary::builder()
23//!     .chars(chars)
24//!     .mode(base64_config.effective_mode());
25//! if let Some(p) = padding {
26//!     builder = builder.padding(p);
27//! }
28//! let dictionary = builder.build()?;
29//!
30//! // Encode and decode
31//! let data = b"Hello, World!";
32//! let encoded = encode(data, &dictionary);
33//! let decoded = decode(&encoded, &dictionary)?;
34//! assert_eq!(data, &decoded[..]);
35//! # Ok(())
36//! # }
37//! ```
38//!
39//! ## Features
40//!
41//! - **33 Built-in Dictionaries**: RFC standards, emoji, ancient scripts, and more
42//! - **3 Encoding Modes**: Radix, chunked (RFC-compliant), byte-range
43//! - **Streaming Support**: Memory-efficient processing for large files
44//! - **Custom Dictionaries**: Define your own via TOML configuration
45//! - **User Configuration**: Load dictionaries from `~/.config/base-d/dictionaries.toml`
46//! - **SIMD Acceleration**: AVX2/SSSE3 on x86_64, NEON on aarch64 (enabled by default)
47//!
48//! ## Cargo Features
49//!
50//! - `simd` (default): Enable SIMD acceleration for encoding/decoding.
51//!   Disable with `--no-default-features` for scalar-only builds.
52//!
53//! ## Encoding Modes
54//!
55//! ### Radix Base Conversion
56//!
57//! True base conversion treating data as a large number. Works with any dictionary size.
58//!
59//! ```
60//! use base_d::{Dictionary, EncodingMode, encode};
61//!
62//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
63//! let chars: Vec<char> = "😀😁😂🤣😃😄😅😆".chars().collect();
64//! let dictionary = Dictionary::builder()
65//!     .chars(chars)
66//!     .mode(EncodingMode::Radix)
67//!     .build()?;
68//!
69//! let encoded = encode(b"Hi", &dictionary);
70//! # Ok(())
71//! # }
72//! ```
73//!
74//! ### Chunked Mode (RFC 4648)
75//!
76//! Fixed-size bit groups, compatible with standard base64/base32.
77//!
78//! ```
79//! use base_d::{Dictionary, EncodingMode, encode};
80//!
81//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
82//! let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
83//!     .chars().collect();
84//! let dictionary = Dictionary::builder()
85//!     .chars(chars)
86//!     .mode(EncodingMode::Chunked)
87//!     .padding('=')
88//!     .build()?;
89//!
90//! let encoded = encode(b"Hello", &dictionary);
91//! assert_eq!(encoded, "SGVsbG8=");
92//! # Ok(())
93//! # }
94//! ```
95//!
96//! ### Byte Range Mode
97//!
98//! Direct 1:1 byte-to-emoji mapping. Zero encoding overhead.
99//!
100//! ```
101//! use base_d::{Dictionary, EncodingMode, encode};
102//!
103//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
104//! let dictionary = Dictionary::builder()
105//!     .mode(EncodingMode::ByteRange)
106//!     .start_codepoint(127991)  // U+1F3F7
107//!     .build()?;
108//!
109//! let data = b"Hi";
110//! let encoded = encode(data, &dictionary);
111//! assert_eq!(encoded.chars().count(), 2);  // 1:1 mapping
112//! # Ok(())
113//! # }
114//! ```
115//!
116//! ## Streaming
117//!
118//! For large files, use streaming to avoid loading entire file into memory:
119//!
120//! ```no_run
121//! use base_d::{DictionaryRegistry, StreamingEncoder};
122//! use std::fs::File;
123//!
124//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
125//! let config = DictionaryRegistry::load_default()?;
126//! let dictionary_config = config.get_dictionary("base64").unwrap();
127//!
128//! // ... create dictionary from config
129//! # let chars: Vec<char> = dictionary_config.chars.chars().collect();
130//! # let padding = dictionary_config.padding.as_ref().and_then(|s| s.chars().next());
131//! # let mut builder = base_d::Dictionary::builder().chars(chars).mode(dictionary_config.effective_mode());
132//! # if let Some(p) = padding { builder = builder.padding(p); }
133//! # let dictionary = builder.build()?;
134//!
135//! let mut input = File::open("large_file.bin")?;
136//! let output = File::create("encoded.txt")?;
137//!
138//! let mut encoder = StreamingEncoder::new(&dictionary, output);
139//! encoder.encode(&mut input)?;
140//! # Ok(())
141//! # }
142//! ```
143
144mod core;
145mod encoders;
146mod features;
147
148#[cfg(feature = "simd")]
149mod simd;
150
151pub use core::config::{
152    CompressionConfig, DictionaryConfig, DictionaryRegistry, EncodingMode, Settings,
153};
154pub use core::dictionary::{Dictionary, DictionaryBuilder};
155pub use encoders::algorithms::{DecodeError, DictionaryNotFoundError, find_closest_dictionary};
156pub use encoders::streaming::{StreamingDecoder, StreamingEncoder};
157pub use features::{
158    CompressionAlgorithm, DictionaryDetector, DictionaryMatch, HashAlgorithm, XxHashConfig,
159    compress, decompress, detect_dictionary, hash, hash_with_config,
160};
161
162/// Encodes binary data using the specified dictionary.
163///
164/// Automatically selects the appropriate encoding strategy based on the
165/// dictionary's mode (Radix, Chunked, or ByteRange).
166///
167/// # Arguments
168///
169/// * `data` - The binary data to encode
170/// * `dictionary` - The dictionary to use for encoding
171///
172/// # Returns
173///
174/// A string containing the encoded data
175///
176/// # Examples
177///
178/// ```
179/// use base_d::{Dictionary, EncodingMode};
180///
181/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
182/// let chars: Vec<char> = "01".chars().collect();
183/// let dictionary = Dictionary::builder()
184///     .chars(chars)
185///     .mode(EncodingMode::Radix)
186///     .build()?;
187/// let encoded = base_d::encode(b"Hi", &dictionary);
188/// # Ok(())
189/// # }
190/// ```
191pub fn encode(data: &[u8], dictionary: &Dictionary) -> String {
192    match dictionary.mode() {
193        EncodingMode::Radix => encoders::algorithms::radix::encode(data, dictionary),
194        EncodingMode::Chunked => encoders::algorithms::chunked::encode_chunked(data, dictionary),
195        EncodingMode::ByteRange => {
196            encoders::algorithms::byte_range::encode_byte_range(data, dictionary)
197        }
198    }
199}
200
201/// Decodes a string back to binary data using the specified dictionary.
202///
203/// Automatically selects the appropriate decoding strategy based on the
204/// dictionary's mode (Radix, Chunked, or ByteRange).
205///
206/// # Arguments
207///
208/// * `encoded` - The encoded string to decode
209/// * `dictionary` - The dictionary used for encoding
210///
211/// # Returns
212///
213/// A `Result` containing the decoded binary data, or a `DecodeError` if
214/// the input is invalid
215///
216/// # Errors
217///
218/// Returns `DecodeError` if:
219/// - The input contains invalid characters
220/// - The input is empty
221/// - The padding is invalid (for chunked mode)
222///
223/// # Examples
224///
225/// ```
226/// use base_d::{Dictionary, EncodingMode, encode, decode};
227///
228/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
229/// let chars: Vec<char> = "01".chars().collect();
230/// let dictionary = Dictionary::builder()
231///     .chars(chars)
232///     .mode(EncodingMode::Radix)
233///     .build()?;
234/// let data = b"Hi";
235/// let encoded = encode(data, &dictionary);
236/// let decoded = decode(&encoded, &dictionary)?;
237/// assert_eq!(data, &decoded[..]);
238/// # Ok(())
239/// # }
240/// ```
241pub fn decode(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
242    match dictionary.mode() {
243        EncodingMode::Radix => encoders::algorithms::radix::decode(encoded, dictionary),
244        EncodingMode::Chunked => encoders::algorithms::chunked::decode_chunked(encoded, dictionary),
245        EncodingMode::ByteRange => {
246            encoders::algorithms::byte_range::decode_byte_range(encoded, dictionary)
247        }
248    }
249}
250
251#[cfg(test)]
252mod tests;