base_d/lib.rs
1//! # base-d
2//!
3//! A universal, multi-dictionary encoding library for Rust.
4//!
5//! Encode binary data using numerous dictionaries including RFC standards, ancient scripts,
6//! emoji, playing cards, and more. Supports three encoding modes: mathematical
7//! base conversion, RFC 4648 chunked encoding, and direct byte-range mapping.
8//!
9//! ## Quick Start
10//!
11//! ```
12//! use base_d::{DictionariesConfig, Dictionary, encode, decode};
13//!
14//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
15//! // Load built-in dictionaries
16//! let config = DictionariesConfig::load_default()?;
17//! let base64_config = config.get_dictionary("base64").unwrap();
18//!
19//! // Create dictionary
20//! let chars: Vec<char> = base64_config.chars.chars().collect();
21//! let padding = base64_config.padding.as_ref().and_then(|s| s.chars().next());
22//! let dictionary = Dictionary::new_with_mode(
23//! chars,
24//! base64_config.mode.clone(),
25//! padding
26//! )?;
27//!
28//! // Encode and decode
29//! let data = b"Hello, World!";
30//! let encoded = encode(data, &dictionary);
31//! let decoded = decode(&encoded, &dictionary)?;
32//! assert_eq!(data, &decoded[..]);
33//! # Ok(())
34//! # }
35//! ```
36//!
37//! ## Features
38//!
39//! - **33 Built-in Alphabets**: RFC standards, emoji, ancient scripts, and more
40//! - **3 Encoding Modes**: Mathematical, chunked (RFC-compliant), byte-range
41//! - **Streaming Support**: Memory-efficient processing for large files
42//! - **Custom Alphabets**: Define your own via TOML configuration
43//! - **User Configuration**: Load dictionaries from `~/.config/base-d/dictionaries.toml`
44//!
45//! ## Encoding Modes
46//!
47//! ### Mathematical Base Conversion
48//!
49//! Treats data as a large number. Works with any dictionary size.
50//!
51//! ```
52//! use base_d::{Dictionary, EncodingMode, encode};
53//!
54//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
55//! let chars: Vec<char> = "😀😁😂🤣😃😄😅😆".chars().collect();
56//! let dictionary = Dictionary::new_with_mode(
57//! chars,
58//! EncodingMode::BaseConversion,
59//! None
60//! )?;
61//!
62//! let encoded = encode(b"Hi", &dictionary);
63//! # Ok(())
64//! # }
65//! ```
66//!
67//! ### Chunked Mode (RFC 4648)
68//!
69//! Fixed-size bit groups, compatible with standard base64/base32.
70//!
71//! ```
72//! use base_d::{Dictionary, EncodingMode, encode};
73//!
74//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
75//! let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
76//! .chars().collect();
77//! let dictionary = Dictionary::new_with_mode(
78//! chars,
79//! EncodingMode::Chunked,
80//! Some('=')
81//! )?;
82//!
83//! let encoded = encode(b"Hello", &dictionary);
84//! assert_eq!(encoded, "SGVsbG8=");
85//! # Ok(())
86//! # }
87//! ```
88//!
89//! ### Byte Range Mode
90//!
91//! Direct 1:1 byte-to-emoji mapping. Zero encoding overhead.
92//!
93//! ```
94//! use base_d::{Dictionary, EncodingMode, encode};
95//!
96//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
97//! let dictionary = Dictionary::new_with_mode_and_range(
98//! Vec::new(),
99//! EncodingMode::ByteRange,
100//! None,
101//! Some(127991) // U+1F3F7
102//! )?;
103//!
104//! let data = b"Hi";
105//! let encoded = encode(data, &dictionary);
106//! assert_eq!(encoded.chars().count(), 2); // 1:1 mapping
107//! # Ok(())
108//! # }
109//! ```
110//!
111//! ## Streaming
112//!
113//! For large files, use streaming to avoid loading entire file into memory:
114//!
115//! ```no_run
116//! use base_d::{DictionariesConfig, StreamingEncoder};
117//! use std::fs::File;
118//!
119//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
120//! let config = DictionariesConfig::load_default()?;
121//! let alphabet_config = config.get_dictionary("base64").unwrap();
122//!
123//! // ... create dictionary from config
124//! # let chars: Vec<char> = alphabet_config.chars.chars().collect();
125//! # let padding = alphabet_config.padding.as_ref().and_then(|s| s.chars().next());
126//! # let dictionary = base_d::Dictionary::new_with_mode(chars, alphabet_config.mode.clone(), padding)?;
127//!
128//! let mut input = File::open("large_file.bin")?;
129//! let output = File::create("encoded.txt")?;
130//!
131//! let mut encoder = StreamingEncoder::new(&dictionary, output);
132//! encoder.encode(&mut input)?;
133//! # Ok(())
134//! # }
135//! ```
136
137mod core;
138mod encoders;
139mod compression;
140mod detection;
141mod hashing;
142
143#[cfg(target_arch = "x86_64")]
144mod simd;
145
146pub use core::dictionary::Dictionary;
147pub use core::config::{DictionariesConfig, DictionaryConfig, EncodingMode, CompressionConfig, Settings};
148pub use encoders::streaming::{StreamingEncoder, StreamingDecoder};
149pub use encoders::encoding::DecodeError;
150pub use compression::{CompressionAlgorithm, compress, decompress};
151pub use detection::{DictionaryDetector, DictionaryMatch, detect_dictionary};
152pub use hashing::{HashAlgorithm, hash, hash_with_config, XxHashConfig};
153
154/// Encodes binary data using the specified dictionary.
155///
156/// Automatically selects the appropriate encoding strategy based on the
157/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
158///
159/// # Arguments
160///
161/// * `data` - The binary data to encode
162/// * `dictionary` - The dictionary to use for encoding
163///
164/// # Returns
165///
166/// A string containing the encoded data
167///
168/// # Examples
169///
170/// ```
171/// use base_d::{Dictionary, EncodingMode};
172///
173/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
174/// let chars: Vec<char> = "01".chars().collect();
175/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
176/// let encoded = base_d::encode(b"Hi", &dictionary);
177/// # Ok(())
178/// # }
179/// ```
180pub fn encode(data: &[u8], dictionary: &Dictionary) -> String {
181 match dictionary.mode() {
182 EncodingMode::BaseConversion => encoders::encoding::encode(data, dictionary),
183 EncodingMode::Chunked => encoders::chunked::encode_chunked(data, dictionary),
184 EncodingMode::ByteRange => encoders::byte_range::encode_byte_range(data, dictionary),
185 }
186}
187
188/// Decodes a string back to binary data using the specified dictionary.
189///
190/// Automatically selects the appropriate decoding strategy based on the
191/// dictionary's mode (BaseConversion, Chunked, or ByteRange).
192///
193/// # Arguments
194///
195/// * `encoded` - The encoded string to decode
196/// * `dictionary` - The dictionary used for encoding
197///
198/// # Returns
199///
200/// A `Result` containing the decoded binary data, or a `DecodeError` if
201/// the input is invalid
202///
203/// # Errors
204///
205/// Returns `DecodeError` if:
206/// - The input contains invalid characters
207/// - The input is empty
208/// - The padding is invalid (for chunked mode)
209///
210/// # Examples
211///
212/// ```
213/// use base_d::{Dictionary, EncodingMode, encode, decode};
214///
215/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
216/// let chars: Vec<char> = "01".chars().collect();
217/// let dictionary = Dictionary::new_with_mode(chars, EncodingMode::BaseConversion, None)?;
218/// let data = b"Hi";
219/// let encoded = encode(data, &dictionary);
220/// let decoded = decode(&encoded, &dictionary)?;
221/// assert_eq!(data, &decoded[..]);
222/// # Ok(())
223/// # }
224/// ```
225pub fn decode(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
226 match dictionary.mode() {
227 EncodingMode::BaseConversion => encoders::encoding::decode(encoded, dictionary),
228 EncodingMode::Chunked => encoders::chunked::decode_chunked(encoded, dictionary),
229 EncodingMode::ByteRange => encoders::byte_range::decode_byte_range(encoded, dictionary),
230 }
231}
232
233#[cfg(test)]
234mod tests;