base_d/
lib.rs

1#![allow(clippy::should_implement_trait)]
2#![allow(clippy::derivable_impls)]
3#![allow(clippy::manual_div_ceil)]
4#![allow(clippy::io_other_error)]
5#![allow(clippy::unnecessary_cast)]
6#![allow(clippy::collapsible_if)]
7#![allow(clippy::manual_range_contains)]
8#![allow(clippy::iter_nth_zero)]
9#![allow(clippy::map_identity)]
10#![allow(clippy::large_enum_variant)]
11#![allow(clippy::redundant_locals)]
12#![allow(clippy::manual_is_multiple_of)]
13#![allow(clippy::doc_lazy_continuation)]
14#![allow(clippy::collapsible_else_if)]
15#![allow(clippy::explicit_iter_loop)]
16#![allow(clippy::needless_range_loop)]
17
18//! # base-d
19//!
20//! A universal, multi-dictionary encoding library for Rust.
21//!
22//! Encode binary data using numerous dictionaries including RFC standards, ancient scripts,
23//! emoji, playing cards, and more. Supports three encoding modes: mathematical
24//! base conversion, RFC 4648 chunked encoding, and direct byte-range mapping.
25//!
26//! ## Quick Start
27//!
28//! ```
29//! use base_d::{DictionaryRegistry, Dictionary, encode, decode};
30//!
31//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
32//! // Load built-in dictionaries
33//! let config = DictionaryRegistry::load_default()?;
34//! let base64_config = config.get_dictionary("base64").unwrap();
35//!
36//! // Create dictionary
37//! let chars: Vec<char> = base64_config.chars.chars().collect();
38//! let padding = base64_config.padding.as_ref().and_then(|s| s.chars().next());
39//! let dictionary = Dictionary::new_with_mode(
40//!     chars,
41//!     base64_config.mode.clone(),
42//!     padding
43//! )?;
44//!
45//! // Encode and decode
46//! let data = b"Hello, World!";
47//! let encoded = encode(data, &dictionary);
48//! let decoded = decode(&encoded, &dictionary)?;
49//! assert_eq!(data, &decoded[..]);
50//! # Ok(())
51//! # }
52//! ```
53//!
54//! ## Features
55//!
56//! - **33 Built-in Dictionaries**: RFC standards, emoji, ancient scripts, and more
57//! - **3 Encoding Modes**: Mathematical, chunked (RFC-compliant), byte-range
58//! - **Streaming Support**: Memory-efficient processing for large files
59//! - **Custom Dictionaries**: Define your own via TOML configuration
60//! - **User Configuration**: Load dictionaries from `~/.config/base-d/dictionaries.toml`
61//!
62//! ## Encoding Modes
63//!
64//! ### Mathematical Base Conversion
65//!
66//! Treats data as a large number. Works with any dictionary size.
67//!
68//! ```
69//! use base_d::{Dictionary, EncodingMode, encode};
70//!
71//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
72//! let chars: Vec<char> = "😀😁😂🤣😃😄😅😆".chars().collect();
73//! let dictionary = Dictionary::new_with_mode(
74//!     chars,
75//!     EncodingMode::BaseConversion,
76//!     None
77//! )?;
78//!
79//! let encoded = encode(b"Hi", &dictionary);
80//! # Ok(())
81//! # }
82//! ```
83//!
84//! ### Chunked Mode (RFC 4648)
85//!
86//! Fixed-size bit groups, compatible with standard base64/base32.
87//!
88//! ```
89//! use base_d::{Dictionary, EncodingMode, encode};
90//!
91//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
92//! let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
93//!     .chars().collect();
94//! let dictionary = Dictionary::new_with_mode(
95//!     chars,
96//!     EncodingMode::Chunked,
97//!     Some('=')
98//! )?;
99//!
100//! let encoded = encode(b"Hello", &dictionary);
101//! assert_eq!(encoded, "SGVsbG8=");
102//! # Ok(())
103//! # }
104//! ```
105//!
106//! ### Byte Range Mode
107//!
108//! Direct 1:1 byte-to-emoji mapping. Zero encoding overhead.
109//!
110//! ```
111//! use base_d::{Dictionary, EncodingMode, encode};
112//!
113//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
114//! let dictionary = Dictionary::new_with_mode_and_range(
115//!     Vec::new(),
116//!     EncodingMode::ByteRange,
117//!     None,
118//!     Some(127991)  // U+1F3F7
119//! )?;
120//!
121//! let data = b"Hi";
122//! let encoded = encode(data, &dictionary);
123//! assert_eq!(encoded.chars().count(), 2);  // 1:1 mapping
124//! # Ok(())
125//! # }
126//! ```
127//!
128//! ## Streaming
129//!
130//! For large files, use streaming to avoid loading entire file into memory:
131//!
132//! ```no_run
133//! use base_d::{DictionaryRegistry, StreamingEncoder};
134//! use std::fs::File;
135//!
136//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
137//! let config = DictionaryRegistry::load_default()?;
138//! let dictionary_config = config.get_dictionary("base64").unwrap();
139//!
140//! // ... create dictionary from config
141//! # let chars: Vec<char> = dictionary_config.chars.chars().collect();
142//! # let padding = dictionary_config.padding.as_ref().and_then(|s| s.chars().next());
143//! # let dictionary = base_d::Dictionary::new_with_mode(chars, dictionary_config.mode.clone(), padding)?;
144//!
145//! let mut input = File::open("large_file.bin")?;
146//! let output = File::create("encoded.txt")?;
147//!
148//! let mut encoder = StreamingEncoder::new(&dictionary, output);
149//! encoder.encode(&mut input)?;
150//! # Ok(())
151//! # }
152//! ```
153
154mod core;
155mod encoders;
156mod features;
157
158mod simd;
159
160pub use core::config::{
161    CompressionConfig, DictionaryConfig, DictionaryRegistry, EncodingMode, Settings,
162};
163pub use core::dictionary::Dictionary;
164pub use encoders::algorithms::DecodeError;
165pub use encoders::streaming::{StreamingDecoder, StreamingEncoder};
166pub use features::{
167    compress, decompress, detect_dictionary, hash, hash_with_config, CompressionAlgorithm,
168    DictionaryDetector, DictionaryMatch, HashAlgorithm, XxHashConfig,
169};
170
171/// Encodes binary data using the specified dictionary.
172///
173/// Automatically selects the appropriate encoding strategy based on the
174/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
175///
176/// # Arguments
177///
178/// * `data` - The binary data to encode
179/// * `dictionary` - The dictionary to use for encoding
180///
181/// # Returns
182///
183/// A string containing the encoded data
184///
185/// # Examples
186///
187/// ```
188/// use base_d::{Dictionary, EncodingMode};
189///
190/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
191/// let chars: Vec<char> = "01".chars().collect();
192/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
193/// let encoded = base_d::encode(b"Hi", &dictionary);
194/// # Ok(())
195/// # }
196/// ```
197pub fn encode(data: &[u8], dictionary: &Dictionary) -> String {
198    match dictionary.mode() {
199        EncodingMode::BaseConversion => encoders::algorithms::math::encode(data, dictionary),
200        EncodingMode::Chunked => encoders::algorithms::chunked::encode_chunked(data, dictionary),
201        EncodingMode::ByteRange => {
202            encoders::algorithms::byte_range::encode_byte_range(data, dictionary)
203        }
204    }
205}
206
207/// Decodes a string back to binary data using the specified dictionary.
208///
209/// Automatically selects the appropriate decoding strategy based on the
210/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
211///
212/// # Arguments
213///
214/// * `encoded` - The encoded string to decode
215/// * `dictionary` - The dictionary used for encoding
216///
217/// # Returns
218///
219/// A `Result` containing the decoded binary data, or a `DecodeError` if
220/// the input is invalid
221///
222/// # Errors
223///
224/// Returns `DecodeError` if:
225/// - The input contains invalid characters
226/// - The input is empty
227/// - The padding is invalid (for chunked mode)
228///
229/// # Examples
230///
231/// ```
232/// use base_d::{Dictionary, EncodingMode, encode, decode};
233///
234/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
235/// let chars: Vec<char> = "01".chars().collect();
236/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
237/// let data = b"Hi";
238/// let encoded = encode(data, &dictionary);
239/// let decoded = decode(&encoded, &dictionary)?;
240/// assert_eq!(data, &decoded[..]);
241/// # Ok(())
242/// # }
243/// ```
244pub fn decode(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
245    match dictionary.mode() {
246        EncodingMode::BaseConversion => encoders::algorithms::math::decode(encoded, dictionary),
247        EncodingMode::Chunked => encoders::algorithms::chunked::decode_chunked(encoded, dictionary),
248        EncodingMode::ByteRange => {
249            encoders::algorithms::byte_range::decode_byte_range(encoded, dictionary)
250        }
251    }
252}
253
254#[cfg(test)]
255mod tests;