base_d/lib.rs
1//! # base-d
2//!
3//! A universal, multi-dictionary encoding library for Rust.
4//!
5//! Encode binary data using numerous dictionaries including RFC standards, ancient scripts,
6//! emoji, playing cards, and more. Supports three encoding modes: mathematical
7//! base conversion, RFC 4648 chunked encoding, and direct byte-range mapping.
8//!
9//! ## Quick Start
10//!
11//! ```
12//! use base_d::{DictionaryRegistry, Dictionary, encode, decode};
13//!
14//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
15//! // Load built-in dictionaries
16//! let config = DictionaryRegistry::load_default()?;
17//! let base64_config = config.get_dictionary("base64").unwrap();
18//!
19//! // Create dictionary
20//! let chars: Vec<char> = base64_config.chars.chars().collect();
21//! let padding = base64_config.padding.as_ref().and_then(|s| s.chars().next());
22//! let mut builder = Dictionary::builder()
23//! .chars(chars)
24//! .mode(base64_config.mode.clone());
25//! if let Some(p) = padding {
26//! builder = builder.padding(p);
27//! }
28//! let dictionary = builder.build()?;
29//!
30//! // Encode and decode
31//! let data = b"Hello, World!";
32//! let encoded = encode(data, &dictionary);
33//! let decoded = decode(&encoded, &dictionary)?;
34//! assert_eq!(data, &decoded[..]);
35//! # Ok(())
36//! # }
37//! ```
38//!
39//! ## Features
40//!
41//! - **33 Built-in Dictionaries**: RFC standards, emoji, ancient scripts, and more
42//! - **3 Encoding Modes**: Mathematical, chunked (RFC-compliant), byte-range
43//! - **Streaming Support**: Memory-efficient processing for large files
44//! - **Custom Dictionaries**: Define your own via TOML configuration
45//! - **User Configuration**: Load dictionaries from `~/.config/base-d/dictionaries.toml`
46//!
47//! ## Encoding Modes
48//!
49//! ### Mathematical Base Conversion
50//!
51//! Treats data as a large number. Works with any dictionary size.
52//!
53//! ```
54//! use base_d::{Dictionary, EncodingMode, encode};
55//!
56//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
57//! let chars: Vec<char> = "😀😁😂🤣😃😄😅😆".chars().collect();
58//! let dictionary = Dictionary::builder()
59//! .chars(chars)
60//! .mode(EncodingMode::BaseConversion)
61//! .build()?;
62//!
63//! let encoded = encode(b"Hi", &dictionary);
64//! # Ok(())
65//! # }
66//! ```
67//!
68//! ### Chunked Mode (RFC 4648)
69//!
70//! Fixed-size bit groups, compatible with standard base64/base32.
71//!
72//! ```
73//! use base_d::{Dictionary, EncodingMode, encode};
74//!
75//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
76//! let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
77//! .chars().collect();
78//! let dictionary = Dictionary::builder()
79//! .chars(chars)
80//! .mode(EncodingMode::Chunked)
81//! .padding('=')
82//! .build()?;
83//!
84//! let encoded = encode(b"Hello", &dictionary);
85//! assert_eq!(encoded, "SGVsbG8=");
86//! # Ok(())
87//! # }
88//! ```
89//!
90//! ### Byte Range Mode
91//!
92//! Direct 1:1 byte-to-emoji mapping. Zero encoding overhead.
93//!
94//! ```
95//! use base_d::{Dictionary, EncodingMode, encode};
96//!
97//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
98//! let dictionary = Dictionary::builder()
99//! .mode(EncodingMode::ByteRange)
100//! .start_codepoint(127991) // U+1F3F7
101//! .build()?;
102//!
103//! let data = b"Hi";
104//! let encoded = encode(data, &dictionary);
105//! assert_eq!(encoded.chars().count(), 2); // 1:1 mapping
106//! # Ok(())
107//! # }
108//! ```
109//!
110//! ## Streaming
111//!
112//! For large files, use streaming to avoid loading entire file into memory:
113//!
114//! ```no_run
115//! use base_d::{DictionaryRegistry, StreamingEncoder};
116//! use std::fs::File;
117//!
118//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
119//! let config = DictionaryRegistry::load_default()?;
120//! let dictionary_config = config.get_dictionary("base64").unwrap();
121//!
122//! // ... create dictionary from config
123//! # let chars: Vec<char> = dictionary_config.chars.chars().collect();
124//! # let padding = dictionary_config.padding.as_ref().and_then(|s| s.chars().next());
125//! # let mut builder = base_d::Dictionary::builder().chars(chars).mode(dictionary_config.mode.clone());
126//! # if let Some(p) = padding { builder = builder.padding(p); }
127//! # let dictionary = builder.build()?;
128//!
129//! let mut input = File::open("large_file.bin")?;
130//! let output = File::create("encoded.txt")?;
131//!
132//! let mut encoder = StreamingEncoder::new(&dictionary, output);
133//! encoder.encode(&mut input)?;
134//! # Ok(())
135//! # }
136//! ```
137
138mod core;
139mod encoders;
140mod features;
141
142mod simd;
143
144pub use core::config::{
145 CompressionConfig, DictionaryConfig, DictionaryRegistry, EncodingMode, Settings,
146};
147pub use core::dictionary::{Dictionary, DictionaryBuilder};
148pub use encoders::algorithms::{DecodeError, DictionaryNotFoundError, find_closest_dictionary};
149pub use encoders::streaming::{StreamingDecoder, StreamingEncoder};
150pub use features::{
151 CompressionAlgorithm, DictionaryDetector, DictionaryMatch, HashAlgorithm, XxHashConfig,
152 compress, decompress, detect_dictionary, hash, hash_with_config,
153};
154
155/// Encodes binary data using the specified dictionary.
156///
157/// Automatically selects the appropriate encoding strategy based on the
158/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
159///
160/// # Arguments
161///
162/// * `data` - The binary data to encode
163/// * `dictionary` - The dictionary to use for encoding
164///
165/// # Returns
166///
167/// A string containing the encoded data
168///
169/// # Examples
170///
171/// ```
172/// use base_d::{Dictionary, EncodingMode};
173///
174/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
175/// let chars: Vec<char> = "01".chars().collect();
176/// let dictionary = Dictionary::builder()
177/// .chars(chars)
178/// .mode(EncodingMode::BaseConversion)
179/// .build()?;
180/// let encoded = base_d::encode(b"Hi", &dictionary);
181/// # Ok(())
182/// # }
183/// ```
184pub fn encode(data: &[u8], dictionary: &Dictionary) -> String {
185 match dictionary.mode() {
186 EncodingMode::BaseConversion => encoders::algorithms::math::encode(data, dictionary),
187 EncodingMode::Chunked => encoders::algorithms::chunked::encode_chunked(data, dictionary),
188 EncodingMode::ByteRange => {
189 encoders::algorithms::byte_range::encode_byte_range(data, dictionary)
190 }
191 }
192}
193
194/// Decodes a string back to binary data using the specified dictionary.
195///
196/// Automatically selects the appropriate decoding strategy based on the
197/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
198///
199/// # Arguments
200///
201/// * `encoded` - The encoded string to decode
202/// * `dictionary` - The dictionary used for encoding
203///
204/// # Returns
205///
206/// A `Result` containing the decoded binary data, or a `DecodeError` if
207/// the input is invalid
208///
209/// # Errors
210///
211/// Returns `DecodeError` if:
212/// - The input contains invalid characters
213/// - The input is empty
214/// - The padding is invalid (for chunked mode)
215///
216/// # Examples
217///
218/// ```
219/// use base_d::{Dictionary, EncodingMode, encode, decode};
220///
221/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
222/// let chars: Vec<char> = "01".chars().collect();
223/// let dictionary = Dictionary::builder()
224/// .chars(chars)
225/// .mode(EncodingMode::BaseConversion)
226/// .build()?;
227/// let data = b"Hi";
228/// let encoded = encode(data, &dictionary);
229/// let decoded = decode(&encoded, &dictionary)?;
230/// assert_eq!(data, &decoded[..]);
231/// # Ok(())
232/// # }
233/// ```
234pub fn decode(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
235 match dictionary.mode() {
236 EncodingMode::BaseConversion => encoders::algorithms::math::decode(encoded, dictionary),
237 EncodingMode::Chunked => encoders::algorithms::chunked::decode_chunked(encoded, dictionary),
238 EncodingMode::ByteRange => {
239 encoders::algorithms::byte_range::decode_byte_range(encoded, dictionary)
240 }
241 }
242}
243
244#[cfg(test)]
245mod tests;