base_d/lib.rs
1#![allow(dead_code)]
2#![allow(clippy::should_implement_trait)]
3#![allow(clippy::derivable_impls)]
4#![allow(clippy::manual_div_ceil)]
5#![allow(clippy::io_other_error)]
6#![allow(clippy::unnecessary_cast)]
7#![allow(clippy::collapsible_if)]
8#![allow(clippy::manual_range_contains)]
9#![allow(clippy::iter_nth_zero)]
10#![allow(clippy::map_identity)]
11#![allow(clippy::large_enum_variant)]
12#![allow(clippy::redundant_locals)]
13#![allow(clippy::manual_is_multiple_of)]
14#![allow(clippy::doc_lazy_continuation)]
15#![allow(clippy::collapsible_else_if)]
16#![allow(clippy::explicit_iter_loop)]
17#![allow(clippy::needless_range_loop)]
18
19//! # base-d
20//!
21//! A universal, multi-dictionary encoding library for Rust.
22//!
23//! Encode binary data using numerous dictionaries including RFC standards, ancient scripts,
24//! emoji, playing cards, and more. Supports three encoding modes: mathematical
25//! base conversion, RFC 4648 chunked encoding, and direct byte-range mapping.
26//!
27//! ## Quick Start
28//!
29//! ```
30//! use base_d::{DictionaryRegistry, Dictionary, encode, decode};
31//!
32//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
33//! // Load built-in dictionaries
34//! let config = DictionaryRegistry::load_default()?;
35//! let base64_config = config.get_dictionary("base64").unwrap();
36//!
37//! // Create dictionary
38//! let chars: Vec<char> = base64_config.chars.chars().collect();
39//! let padding = base64_config.padding.as_ref().and_then(|s| s.chars().next());
40//! let dictionary = Dictionary::new_with_mode(
41//! chars,
42//! base64_config.mode.clone(),
43//! padding
44//! )?;
45//!
46//! // Encode and decode
47//! let data = b"Hello, World!";
48//! let encoded = encode(data, &dictionary);
49//! let decoded = decode(&encoded, &dictionary)?;
50//! assert_eq!(data, &decoded[..]);
51//! # Ok(())
52//! # }
53//! ```
54//!
55//! ## Features
56//!
57//! - **33 Built-in Dictionaries**: RFC standards, emoji, ancient scripts, and more
58//! - **3 Encoding Modes**: Mathematical, chunked (RFC-compliant), byte-range
59//! - **Streaming Support**: Memory-efficient processing for large files
60//! - **Custom Dictionaries**: Define your own via TOML configuration
61//! - **User Configuration**: Load dictionaries from `~/.config/base-d/dictionaries.toml`
62//!
63//! ## Encoding Modes
64//!
65//! ### Mathematical Base Conversion
66//!
67//! Treats data as a large number. Works with any dictionary size.
68//!
69//! ```
70//! use base_d::{Dictionary, EncodingMode, encode};
71//!
72//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
73//! let chars: Vec<char> = "😀😁😂🤣😃😄😅😆".chars().collect();
74//! let dictionary = Dictionary::new_with_mode(
75//! chars,
76//! EncodingMode::BaseConversion,
77//! None
78//! )?;
79//!
80//! let encoded = encode(b"Hi", &dictionary);
81//! # Ok(())
82//! # }
83//! ```
84//!
85//! ### Chunked Mode (RFC 4648)
86//!
87//! Fixed-size bit groups, compatible with standard base64/base32.
88//!
89//! ```
90//! use base_d::{Dictionary, EncodingMode, encode};
91//!
92//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
93//! let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
94//! .chars().collect();
95//! let dictionary = Dictionary::new_with_mode(
96//! chars,
97//! EncodingMode::Chunked,
98//! Some('=')
99//! )?;
100//!
101//! let encoded = encode(b"Hello", &dictionary);
102//! assert_eq!(encoded, "SGVsbG8=");
103//! # Ok(())
104//! # }
105//! ```
106//!
107//! ### Byte Range Mode
108//!
109//! Direct 1:1 byte-to-emoji mapping. Zero encoding overhead.
110//!
111//! ```
112//! use base_d::{Dictionary, EncodingMode, encode};
113//!
114//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
115//! let dictionary = Dictionary::new_with_mode_and_range(
116//! Vec::new(),
117//! EncodingMode::ByteRange,
118//! None,
119//! Some(127991) // U+1F3F7
120//! )?;
121//!
122//! let data = b"Hi";
123//! let encoded = encode(data, &dictionary);
124//! assert_eq!(encoded.chars().count(), 2); // 1:1 mapping
125//! # Ok(())
126//! # }
127//! ```
128//!
129//! ## Streaming
130//!
131//! For large files, use streaming to avoid loading entire file into memory:
132//!
133//! ```no_run
134//! use base_d::{DictionaryRegistry, StreamingEncoder};
135//! use std::fs::File;
136//!
137//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
138//! let config = DictionaryRegistry::load_default()?;
139//! let dictionary_config = config.get_dictionary("base64").unwrap();
140//!
141//! // ... create dictionary from config
142//! # let chars: Vec<char> = dictionary_config.chars.chars().collect();
143//! # let padding = dictionary_config.padding.as_ref().and_then(|s| s.chars().next());
144//! # let dictionary = base_d::Dictionary::new_with_mode(chars, dictionary_config.mode.clone(), padding)?;
145//!
146//! let mut input = File::open("large_file.bin")?;
147//! let output = File::create("encoded.txt")?;
148//!
149//! let mut encoder = StreamingEncoder::new(&dictionary, output);
150//! encoder.encode(&mut input)?;
151//! # Ok(())
152//! # }
153//! ```
154
155mod core;
156mod encoders;
157mod features;
158
159mod simd;
160
161pub use core::config::{
162 CompressionConfig, DictionaryConfig, DictionaryRegistry, EncodingMode, Settings,
163};
164pub use core::dictionary::Dictionary;
165pub use encoders::algorithms::{find_closest_dictionary, DecodeError, DictionaryNotFoundError};
166pub use encoders::streaming::{StreamingDecoder, StreamingEncoder};
167pub use features::{
168 compress, decompress, detect_dictionary, hash, hash_with_config, CompressionAlgorithm,
169 DictionaryDetector, DictionaryMatch, HashAlgorithm, XxHashConfig,
170};
171
172/// Encodes binary data using the specified dictionary.
173///
174/// Automatically selects the appropriate encoding strategy based on the
175/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
176///
177/// # Arguments
178///
179/// * `data` - The binary data to encode
180/// * `dictionary` - The dictionary to use for encoding
181///
182/// # Returns
183///
184/// A string containing the encoded data
185///
186/// # Examples
187///
188/// ```
189/// use base_d::{Dictionary, EncodingMode};
190///
191/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
192/// let chars: Vec<char> = "01".chars().collect();
193/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
194/// let encoded = base_d::encode(b"Hi", &dictionary);
195/// # Ok(())
196/// # }
197/// ```
198pub fn encode(data: &[u8], dictionary: &Dictionary) -> String {
199 match dictionary.mode() {
200 EncodingMode::BaseConversion => encoders::algorithms::math::encode(data, dictionary),
201 EncodingMode::Chunked => encoders::algorithms::chunked::encode_chunked(data, dictionary),
202 EncodingMode::ByteRange => {
203 encoders::algorithms::byte_range::encode_byte_range(data, dictionary)
204 }
205 }
206}
207
208/// Decodes a string back to binary data using the specified dictionary.
209///
210/// Automatically selects the appropriate decoding strategy based on the
211/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
212///
213/// # Arguments
214///
215/// * `encoded` - The encoded string to decode
216/// * `dictionary` - The dictionary used for encoding
217///
218/// # Returns
219///
220/// A `Result` containing the decoded binary data, or a `DecodeError` if
221/// the input is invalid
222///
223/// # Errors
224///
225/// Returns `DecodeError` if:
226/// - The input contains invalid characters
227/// - The input is empty
228/// - The padding is invalid (for chunked mode)
229///
230/// # Examples
231///
232/// ```
233/// use base_d::{Dictionary, EncodingMode, encode, decode};
234///
235/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
236/// let chars: Vec<char> = "01".chars().collect();
237/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
238/// let data = b"Hi";
239/// let encoded = encode(data, &dictionary);
240/// let decoded = decode(&encoded, &dictionary)?;
241/// assert_eq!(data, &decoded[..]);
242/// # Ok(())
243/// # }
244/// ```
245pub fn decode(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
246 match dictionary.mode() {
247 EncodingMode::BaseConversion => encoders::algorithms::math::decode(encoded, dictionary),
248 EncodingMode::Chunked => encoders::algorithms::chunked::decode_chunked(encoded, dictionary),
249 EncodingMode::ByteRange => {
250 encoders::algorithms::byte_range::decode_byte_range(encoded, dictionary)
251 }
252 }
253}
254
255#[cfg(test)]
256mod tests;