base_d/lib.rs
1#![allow(dead_code)]
2#![allow(deprecated)]
3#![allow(clippy::should_implement_trait)]
4#![allow(clippy::derivable_impls)]
5#![allow(clippy::manual_div_ceil)]
6#![allow(clippy::io_other_error)]
7#![allow(clippy::unnecessary_cast)]
8#![allow(clippy::collapsible_if)]
9#![allow(clippy::manual_range_contains)]
10#![allow(clippy::iter_nth_zero)]
11#![allow(clippy::map_identity)]
12#![allow(clippy::large_enum_variant)]
13#![allow(clippy::redundant_locals)]
14#![allow(clippy::manual_is_multiple_of)]
15#![allow(clippy::doc_lazy_continuation)]
16#![allow(clippy::collapsible_else_if)]
17#![allow(clippy::explicit_iter_loop)]
18#![allow(clippy::needless_range_loop)]
19
20//! # base-d
21//!
22//! A universal, multi-dictionary encoding library for Rust.
23//!
24//! Encode binary data using numerous dictionaries including RFC standards, ancient scripts,
25//! emoji, playing cards, and more. Supports three encoding modes: mathematical
26//! base conversion, RFC 4648 chunked encoding, and direct byte-range mapping.
27//!
28//! ## Quick Start
29//!
30//! ```
31//! use base_d::{DictionaryRegistry, Dictionary, encode, decode};
32//!
33//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
34//! // Load built-in dictionaries
35//! let config = DictionaryRegistry::load_default()?;
36//! let base64_config = config.get_dictionary("base64").unwrap();
37//!
38//! // Create dictionary
39//! let chars: Vec<char> = base64_config.chars.chars().collect();
40//! let padding = base64_config.padding.as_ref().and_then(|s| s.chars().next());
41//! let dictionary = Dictionary::new_with_mode(
42//! chars,
43//! base64_config.mode.clone(),
44//! padding
45//! )?;
46//!
47//! // Encode and decode
48//! let data = b"Hello, World!";
49//! let encoded = encode(data, &dictionary);
50//! let decoded = decode(&encoded, &dictionary)?;
51//! assert_eq!(data, &decoded[..]);
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! ## Features
57//!
58//! - **33 Built-in Dictionaries**: RFC standards, emoji, ancient scripts, and more
59//! - **3 Encoding Modes**: Mathematical, chunked (RFC-compliant), byte-range
60//! - **Streaming Support**: Memory-efficient processing for large files
61//! - **Custom Dictionaries**: Define your own via TOML configuration
62//! - **User Configuration**: Load dictionaries from `~/.config/base-d/dictionaries.toml`
63//!
64//! ## Encoding Modes
65//!
66//! ### Mathematical Base Conversion
67//!
68//! Treats data as a large number. Works with any dictionary size.
69//!
70//! ```
71//! use base_d::{Dictionary, EncodingMode, encode};
72//!
73//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
74//! let chars: Vec<char> = "😀😁😂🤣😃😄😅😆".chars().collect();
75//! let dictionary = Dictionary::new_with_mode(
76//! chars,
77//! EncodingMode::BaseConversion,
78//! None
79//! )?;
80//!
81//! let encoded = encode(b"Hi", &dictionary);
82//! # Ok(())
83//! # }
84//! ```
85//!
86//! ### Chunked Mode (RFC 4648)
87//!
88//! Fixed-size bit groups, compatible with standard base64/base32.
89//!
90//! ```
91//! use base_d::{Dictionary, EncodingMode, encode};
92//!
93//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
94//! let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
95//! .chars().collect();
96//! let dictionary = Dictionary::new_with_mode(
97//! chars,
98//! EncodingMode::Chunked,
99//! Some('=')
100//! )?;
101//!
102//! let encoded = encode(b"Hello", &dictionary);
103//! assert_eq!(encoded, "SGVsbG8=");
104//! # Ok(())
105//! # }
106//! ```
107//!
108//! ### Byte Range Mode
109//!
110//! Direct 1:1 byte-to-emoji mapping. Zero encoding overhead.
111//!
112//! ```
113//! use base_d::{Dictionary, EncodingMode, encode};
114//!
115//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
116//! let dictionary = Dictionary::new_with_mode_and_range(
117//! Vec::new(),
118//! EncodingMode::ByteRange,
119//! None,
120//! Some(127991) // U+1F3F7
121//! )?;
122//!
123//! let data = b"Hi";
124//! let encoded = encode(data, &dictionary);
125//! assert_eq!(encoded.chars().count(), 2); // 1:1 mapping
126//! # Ok(())
127//! # }
128//! ```
129//!
130//! ## Streaming
131//!
132//! For large files, use streaming to avoid loading entire file into memory:
133//!
134//! ```no_run
135//! use base_d::{DictionaryRegistry, StreamingEncoder};
136//! use std::fs::File;
137//!
138//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
139//! let config = DictionaryRegistry::load_default()?;
140//! let dictionary_config = config.get_dictionary("base64").unwrap();
141//!
142//! // ... create dictionary from config
143//! # let chars: Vec<char> = dictionary_config.chars.chars().collect();
144//! # let padding = dictionary_config.padding.as_ref().and_then(|s| s.chars().next());
145//! # let dictionary = base_d::Dictionary::new_with_mode(chars, dictionary_config.mode.clone(), padding)?;
146//!
147//! let mut input = File::open("large_file.bin")?;
148//! let output = File::create("encoded.txt")?;
149//!
150//! let mut encoder = StreamingEncoder::new(&dictionary, output);
151//! encoder.encode(&mut input)?;
152//! # Ok(())
153//! # }
154//! ```
155
156mod core;
157mod encoders;
158mod features;
159
160mod simd;
161
162pub use core::config::{
163 CompressionConfig, DictionaryConfig, DictionaryRegistry, EncodingMode, Settings,
164};
165pub use core::dictionary::{Dictionary, DictionaryBuilder};
166pub use encoders::algorithms::{find_closest_dictionary, DecodeError, DictionaryNotFoundError};
167pub use encoders::streaming::{StreamingDecoder, StreamingEncoder};
168pub use features::{
169 compress, decompress, detect_dictionary, hash, hash_with_config, CompressionAlgorithm,
170 DictionaryDetector, DictionaryMatch, HashAlgorithm, XxHashConfig,
171};
172
173/// Encodes binary data using the specified dictionary.
174///
175/// Automatically selects the appropriate encoding strategy based on the
176/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
177///
178/// # Arguments
179///
180/// * `data` - The binary data to encode
181/// * `dictionary` - The dictionary to use for encoding
182///
183/// # Returns
184///
185/// A string containing the encoded data
186///
187/// # Examples
188///
189/// ```
190/// use base_d::{Dictionary, EncodingMode};
191///
192/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
193/// let chars: Vec<char> = "01".chars().collect();
194/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
195/// let encoded = base_d::encode(b"Hi", &dictionary);
196/// # Ok(())
197/// # }
198/// ```
199pub fn encode(data: &[u8], dictionary: &Dictionary) -> String {
200 match dictionary.mode() {
201 EncodingMode::BaseConversion => encoders::algorithms::math::encode(data, dictionary),
202 EncodingMode::Chunked => encoders::algorithms::chunked::encode_chunked(data, dictionary),
203 EncodingMode::ByteRange => {
204 encoders::algorithms::byte_range::encode_byte_range(data, dictionary)
205 }
206 }
207}
208
209/// Decodes a string back to binary data using the specified dictionary.
210///
211/// Automatically selects the appropriate decoding strategy based on the
212/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
213///
214/// # Arguments
215///
216/// * `encoded` - The encoded string to decode
217/// * `dictionary` - The dictionary used for encoding
218///
219/// # Returns
220///
221/// A `Result` containing the decoded binary data, or a `DecodeError` if
222/// the input is invalid
223///
224/// # Errors
225///
226/// Returns `DecodeError` if:
227/// - The input contains invalid characters
228/// - The input is empty
229/// - The padding is invalid (for chunked mode)
230///
231/// # Examples
232///
233/// ```
234/// use base_d::{Dictionary, EncodingMode, encode, decode};
235///
236/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
237/// let chars: Vec<char> = "01".chars().collect();
238/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
239/// let data = b"Hi";
240/// let encoded = encode(data, &dictionary);
241/// let decoded = decode(&encoded, &dictionary)?;
242/// assert_eq!(data, &decoded[..]);
243/// # Ok(())
244/// # }
245/// ```
246pub fn decode(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
247 match dictionary.mode() {
248 EncodingMode::BaseConversion => encoders::algorithms::math::decode(encoded, dictionary),
249 EncodingMode::Chunked => encoders::algorithms::chunked::decode_chunked(encoded, dictionary),
250 EncodingMode::ByteRange => {
251 encoders::algorithms::byte_range::decode_byte_range(encoded, dictionary)
252 }
253 }
254}
255
256#[cfg(test)]
257mod tests;