base_d/lib.rs
1#![allow(clippy::should_implement_trait)]
2#![allow(clippy::derivable_impls)]
3#![allow(clippy::manual_div_ceil)]
4#![allow(clippy::io_other_error)]
5#![allow(clippy::unnecessary_cast)]
6#![allow(clippy::collapsible_if)]
7#![allow(clippy::manual_range_contains)]
8#![allow(clippy::iter_nth_zero)]
9#![allow(clippy::map_identity)]
10#![allow(clippy::large_enum_variant)]
11#![allow(clippy::redundant_locals)]
12#![allow(clippy::manual_is_multiple_of)]
13#![allow(clippy::doc_lazy_continuation)]
14#![allow(clippy::collapsible_else_if)]
15#![allow(clippy::explicit_iter_loop)]
16#![allow(clippy::needless_range_loop)]
17
18//! # base-d
19//!
20//! A universal, multi-dictionary encoding library for Rust.
21//!
22//! Encode binary data using numerous dictionaries including RFC standards, ancient scripts,
23//! emoji, playing cards, and more. Supports three encoding modes: mathematical
24//! base conversion, RFC 4648 chunked encoding, and direct byte-range mapping.
25//!
26//! ## Quick Start
27//!
28//! ```
29//! use base_d::{DictionariesConfig, Dictionary, encode, decode};
30//!
31//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
32//! // Load built-in dictionaries
33//! let config = DictionariesConfig::load_default()?;
34//! let base64_config = config.get_dictionary("base64").unwrap();
35//!
36//! // Create dictionary
37//! let chars: Vec<char> = base64_config.chars.chars().collect();
38//! let padding = base64_config.padding.as_ref().and_then(|s| s.chars().next());
39//! let dictionary = Dictionary::new_with_mode(
40//! chars,
41//! base64_config.mode.clone(),
42//! padding
43//! )?;
44//!
45//! // Encode and decode
46//! let data = b"Hello, World!";
47//! let encoded = encode(data, &dictionary);
48//! let decoded = decode(&encoded, &dictionary)?;
49//! assert_eq!(data, &decoded[..]);
50//! # Ok(())
51//! # }
52//! ```
53//!
54//! ## Features
55//!
56//! - **33 Built-in Alphabets**: RFC standards, emoji, ancient scripts, and more
57//! - **3 Encoding Modes**: Mathematical, chunked (RFC-compliant), byte-range
58//! - **Streaming Support**: Memory-efficient processing for large files
59//! - **Custom Alphabets**: Define your own via TOML configuration
60//! - **User Configuration**: Load dictionaries from `~/.config/base-d/dictionaries.toml`
61//!
62//! ## Encoding Modes
63//!
64//! ### Mathematical Base Conversion
65//!
66//! Treats data as a large number. Works with any dictionary size.
67//!
68//! ```
69//! use base_d::{Dictionary, EncodingMode, encode};
70//!
71//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
72//! let chars: Vec<char> = "😀😁😂🤣😃😄😅😆".chars().collect();
73//! let dictionary = Dictionary::new_with_mode(
74//! chars,
75//! EncodingMode::BaseConversion,
76//! None
77//! )?;
78//!
79//! let encoded = encode(b"Hi", &dictionary);
80//! # Ok(())
81//! # }
82//! ```
83//!
84//! ### Chunked Mode (RFC 4648)
85//!
86//! Fixed-size bit groups, compatible with standard base64/base32.
87//!
88//! ```
89//! use base_d::{Dictionary, EncodingMode, encode};
90//!
91//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
92//! let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
93//! .chars().collect();
94//! let dictionary = Dictionary::new_with_mode(
95//! chars,
96//! EncodingMode::Chunked,
97//! Some('=')
98//! )?;
99//!
100//! let encoded = encode(b"Hello", &dictionary);
101//! assert_eq!(encoded, "SGVsbG8=");
102//! # Ok(())
103//! # }
104//! ```
105//!
106//! ### Byte Range Mode
107//!
108//! Direct 1:1 byte-to-emoji mapping. Zero encoding overhead.
109//!
110//! ```
111//! use base_d::{Dictionary, EncodingMode, encode};
112//!
113//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
114//! let dictionary = Dictionary::new_with_mode_and_range(
115//! Vec::new(),
116//! EncodingMode::ByteRange,
117//! None,
118//! Some(127991) // U+1F3F7
119//! )?;
120//!
121//! let data = b"Hi";
122//! let encoded = encode(data, &dictionary);
123//! assert_eq!(encoded.chars().count(), 2); // 1:1 mapping
124//! # Ok(())
125//! # }
126//! ```
127//!
128//! ## Streaming
129//!
130//! For large files, use streaming to avoid loading entire file into memory:
131//!
132//! ```no_run
133//! use base_d::{DictionariesConfig, StreamingEncoder};
134//! use std::fs::File;
135//!
136//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
137//! let config = DictionariesConfig::load_default()?;
138//! let alphabet_config = config.get_dictionary("base64").unwrap();
139//!
140//! // ... create dictionary from config
141//! # let chars: Vec<char> = alphabet_config.chars.chars().collect();
142//! # let padding = alphabet_config.padding.as_ref().and_then(|s| s.chars().next());
143//! # let dictionary = base_d::Dictionary::new_with_mode(chars, alphabet_config.mode.clone(), padding)?;
144//!
145//! let mut input = File::open("large_file.bin")?;
146//! let output = File::create("encoded.txt")?;
147//!
148//! let mut encoder = StreamingEncoder::new(&dictionary, output);
149//! encoder.encode(&mut input)?;
150//! # Ok(())
151//! # }
152//! ```
153
154mod compression;
155mod core;
156mod detection;
157mod encoders;
158mod hashing;
159
160mod simd;
161
162pub use compression::{compress, decompress, CompressionAlgorithm};
163pub use core::config::{
164 CompressionConfig, DictionariesConfig, DictionaryConfig, EncodingMode, Settings,
165};
166pub use core::dictionary::Dictionary;
167pub use detection::{detect_dictionary, DictionaryDetector, DictionaryMatch};
168pub use encoders::encoding::DecodeError;
169pub use encoders::streaming::{StreamingDecoder, StreamingEncoder};
170pub use hashing::{hash, hash_with_config, HashAlgorithm, XxHashConfig};
171
172/// Encodes binary data using the specified dictionary.
173///
174/// Automatically selects the appropriate encoding strategy based on the
175/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
176///
177/// # Arguments
178///
179/// * `data` - The binary data to encode
180/// * `dictionary` - The dictionary to use for encoding
181///
182/// # Returns
183///
184/// A string containing the encoded data
185///
186/// # Examples
187///
188/// ```
189/// use base_d::{Dictionary, EncodingMode};
190///
191/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
192/// let chars: Vec<char> = "01".chars().collect();
193/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
194/// let encoded = base_d::encode(b"Hi", &dictionary);
195/// # Ok(())
196/// # }
197/// ```
198pub fn encode(data: &[u8], dictionary: &Dictionary) -> String {
199 match dictionary.mode() {
200 EncodingMode::BaseConversion => encoders::encoding::encode(data, dictionary),
201 EncodingMode::Chunked => encoders::chunked::encode_chunked(data, dictionary),
202 EncodingMode::ByteRange => encoders::byte_range::encode_byte_range(data, dictionary),
203 }
204}
205
206/// Decodes a string back to binary data using the specified dictionary.
207///
208/// Automatically selects the appropriate decoding strategy based on the
209/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
210///
211/// # Arguments
212///
213/// * `encoded` - The encoded string to decode
214/// * `dictionary` - The dictionary used for encoding
215///
216/// # Returns
217///
218/// A `Result` containing the decoded binary data, or a `DecodeError` if
219/// the input is invalid
220///
221/// # Errors
222///
223/// Returns `DecodeError` if:
224/// - The input contains invalid characters
225/// - The input is empty
226/// - The padding is invalid (for chunked mode)
227///
228/// # Examples
229///
230/// ```
231/// use base_d::{Dictionary, EncodingMode, encode, decode};
232///
233/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
234/// let chars: Vec<char> = "01".chars().collect();
235/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
236/// let data = b"Hi";
237/// let encoded = encode(data, &dictionary);
238/// let decoded = decode(&encoded, &dictionary)?;
239/// assert_eq!(data, &decoded[..]);
240/// # Ok(())
241/// # }
242/// ```
243pub fn decode(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
244 match dictionary.mode() {
245 EncodingMode::BaseConversion => encoders::encoding::decode(encoded, dictionary),
246 EncodingMode::Chunked => encoders::chunked::decode_chunked(encoded, dictionary),
247 EncodingMode::ByteRange => encoders::byte_range::decode_byte_range(encoded, dictionary),
248 }
249}
250
251#[cfg(test)]
252mod tests;