jeb/
lib.rs

1#![warn(
2    clippy::std_instead_of_core,
3    clippy::pedantic,
4    clippy::cargo,
5    clippy::nursery,
6    clippy::allow_attributes,
7    clippy::arbitrary_source_item_ordering
8)]
9#![expect(
10    missing_docs,
11    clippy::missing_errors_doc,
12    clippy::redundant_else,
13    clippy::needless_continue,
14    clippy::manual_assert,
15    clippy::cast_sign_loss,
16    clippy::cast_possible_truncation,
17    clippy::default_constructed_unit_structs,
18    clippy::too_long_first_doc_paragraph,
19    clippy::arbitrary_source_item_ordering,
20    clippy::missing_panics_doc
21)]
22#![allow(
23    clippy::unnecessary_wraps,
24    clippy::use_self,
25    mismatched_lifetime_syntaxes,
26    dead_code
27)]
28#![doc = include_str!("../README.md")]
29// cSpell:ignoreRegExp b"(\\?.){5}"
30
31pub mod byte_ranges;
32pub mod const_checked;
33pub mod errors;
34pub mod model;
35pub mod nodes;
36pub mod shell_tokenizer;
37
38
39pub use crate::{byte_ranges::*, const_checked::*, errors::*};
40
41
42
43// Aliases for compatibility with slop crate
44pub const Z85_ALPHABET: &[u8; 85] = Z85;
45pub const Z85_DECODE: [u8; 256] = Z85_LUT;
46pub const MAX_TEXT_SIZE: usize = TARGET_RAW_BYTES;
47
48// MARK: encoding constants
49
50/// This encoding uses base 85 for binary data.
51pub const BASE_85: usize = 85;
52/// This encoding works in 4-byte (32-bit) blocks.
53pub const BLOCK_BYTES_4: usize = 4;
54/// This encoding represents each block with 5 digits.
55pub const BLOCK_DIGITS_5: usize = 5;
56
57/// The prefix byte preceding raw data.
58pub const RAW_PREFIX: u8 = b'|';
59/// The default padding byte repeated after raw data to align following blocks.
60pub const RAW_PADDING: u8 = b'.';
61
62/// This encoding allows maximum of roughly 200 MiB of raw data per raw chunk.
63pub const MAX_RAW_BYTES: usize = usize_eq(208_802_508, MAX_RAW_BLOCKS * BLOCK_BYTES_4);
64/// This encoding's number of raw blocks in a raw chunk is limited by the
65/// maximum raw prefix size value that can fit in the initial block with
66/// `RAW_PREFIX`.
67pub const MAX_RAW_BLOCKS: usize = usize_eq(52_200_627, 2 + pow(BASE_85, BLOCK_DIGITS_5 - 1));
68
69/// The number of blocks required to encode a given number of bytes.
70pub const BLOCK_DIGITS_BY_BYTES: [usize; BLOCK_BYTES_4 + 1] = [0, 2, 3, 4, 5];
71/// The number of bytes encoded by a given number of digits.
72pub const BLOCK_BYTES_BY_DIGITS: [usize; BLOCK_DIGITS_5 + 1] = [0, -1 as _, 1, 2, 3, 4];
73
74/// When this encoding is used to convert binary data into line of text, our
75/// implementation limits each line to 80 digits.
76pub const TARGET_LINE_SIZE_DIGITS: usize = 80;
77/// When this encoding is split into 80 digit lines, each line contains 64 bytes
78/// of data, which has a good chance of some alignment with binary data.
79pub const TARGET_LINE_SIZE_BYTES: usize = usize_eq(
80    64,
81    div_exact(TARGET_LINE_SIZE_DIGITS * BLOCK_BYTES_4, BLOCK_DIGITS_5),
82);
83
84/// We encode a maximum of 64 KiB of raw data per raw chunk.
85pub const TARGET_RAW_BYTES: usize = usize_eq(65_536, 64 * 1024);
86/// We encode a maximum of 16 Ki blocks per raw chunk.
87pub const TARGET_RAW_BLOCKS: usize = usize_eq(16_384, div_exact(TARGET_RAW_BYTES, BLOCK_BYTES_4));
88
89// MARK: high-level interface
90
91#[derive(Default)]
92pub struct Encoder;
93impl Encoder {
94    #[must_use]
95    pub fn encode_bytes(&self, _bytes: &[u8]) -> Vec<u8> {
96        unimplemented!()
97    }
98}
99
100#[derive(Default)]
101pub struct Decoder;
102
103impl Decoder {
104    pub fn decode_bytes(&self, _encoded: &[u8]) -> Result<Vec<u8>, Panic> {
105        unimplemented!()
106    }
107}
108
109#[must_use]
110pub fn encode(bytes: &[u8]) -> Vec<u8> {
111    Encoder::default().encode_bytes(bytes)
112}
113
114pub fn decode(encoded: &[u8]) -> Result<Vec<u8>, Panic> {
115    Decoder::default().decode_bytes(encoded)
116}
117
118// MARK: Z85 block ser/de
119
120/// Encodes a 4-byte (32-bit) binary block into a 5-digit Z85 block.
121#[must_use]
122pub const fn encode_z85_block(bytes: [u8; BLOCK_BYTES_4]) -> [u8; BLOCK_DIGITS_5] {
123    let mut encoded = [0u8; BLOCK_DIGITS_5];
124
125    let mut value = u32::from_be_bytes(bytes) as usize;
126
127    let mut encoded_index = BLOCK_DIGITS_5 - 1;
128    loop {
129        let digit_value = value % BASE_85;
130        value /= BASE_85;
131
132        let digit = Z85[digit_value];
133        encoded[encoded_index] = digit;
134
135        if encoded_index > 0 {
136            encoded_index -= 1;
137            continue;
138        } else {
139            break;
140        }
141    }
142
143    encoded
144}
145
146/// Decodes a 5-digit Z85 block into a 4-byte (32-bit) binary block.
147///
148/// Errors with `Panic` if an invalid digit is encountered or the value
149/// overflows.
150pub const fn decode_z85_block(
151    encoded: [u8; BLOCK_DIGITS_5],
152) -> Result<[u8; BLOCK_BYTES_4], &'static str> {
153    let mut value: u32 = 0;
154
155    let mut encoded_index = 0;
156    loop {
157        value = match value.checked_mul(BASE_85 as u32) {
158            Some(value) => value,
159            None => return Err("decode_z85_block failed: invalid overflowing leading digit"),
160        };
161
162        let digit = encoded[encoded_index];
163        let digit_value = Z85_LUT[digit as usize] as usize;
164
165        if digit_value >= BASE_85 {
166            return Err("decode_z85_block failed: invalid digit");
167        }
168
169        value = match value.checked_add(digit_value as u32) {
170            Some(value) => value,
171            None => return Err("decode_z85_block failed: invalid overflowing value"),
172        };
173
174        if encoded_index < BLOCK_DIGITS_5 - 1 {
175            encoded_index += 1;
176            continue;
177        } else {
178            break;
179        }
180    }
181
182    let bytes = value.to_be_bytes();
183
184    Ok(bytes)
185}
186
187/// Decodes a Z85 block, panicking on error.
188///
189/// # Panics
190///
191/// Panics if the encoded block contains invalid digits or the value overflows.
192#[must_use]
193pub const fn decode_z85_block_or_panic(encoded: [u8; BLOCK_DIGITS_5]) -> [u8; BLOCK_BYTES_4] {
194    match decode_z85_block(encoded) {
195        Ok(bytes) => bytes,
196        Err(err) => panic!("{}", err),
197    }
198}
199
200#[cfg(test)]
201#[test]
202#[expect(clippy::trivially_copy_pass_by_ref)]
203fn test_z85_blocks() {
204    macro_rules! assertions {
205        () => {
206            expect(b"00000", b"\x00\x00\x00\x00");
207            expect(b"00001", b"\x00\x00\x00\x01");
208            expect(b"0000#", b"\x00\x00\x00\x54");
209            expect(b"00010", b"\x00\x00\x00\x55");
210            expect(b"000##", b"\x00\x00\x1c\x38");
211            expect(b"00100", b"\x00\x00\x1C\x39");
212            expect(b"00###", b"\x00\x09\x5E\xEC");
213            expect(b"01000", b"\x00\x09\x5E\xED");
214            expect(b"0####", b"\x03\x1C\x84\xB0");
215            expect(b"10000", b"\x03\x1C\x84\xB1");
216            reject(b"#####");
217            reject(b"#0000");
218            reject(b"$0000");
219            expect(b"%0000", b"\xFF\x22\x80\xB2");
220            reject(b"%%%%%");
221            expect(b"%nSc0", b"\xFF\xFF\xFF\xFF");
222            reject(b"%nSc1");
223            expect(b"%nSb#", b"\xFF\xFF\xFF\xFE");
224
225            expect(b"01234", b"\x00\x09\x98\x62");
226            expect(b"56789", b"\x0F\xC7\x99\x43");
227            expect(b"abcde", b"\x1F\x85\x9A\x24");
228            expect(b"fghij", b"\x2F\x43\x9B\x05");
229            expect(b"klmno", b"\x3F\x01\x9B\xE6");
230            expect(b"pqrst", b"\x4E\xBF\x9C\xC7");
231            expect(b"uvwxy", b"\x5E\x7D\x9D\xA8");
232            expect(b"zABCD", b"\x6E\x3B\x9E\x89");
233            expect(b"EFGHI", b"\x7D\xF9\x9F\x6A");
234            expect(b"JKLMN", b"\x8D\xB7\xA0\x4B");
235            expect(b"OPQRS", b"\x9D\x75\xA1\x2C");
236            expect(b"TUVWX", b"\xAD\x33\xA2\x0D");
237            expect(b"YZ.-:", b"\xBC\xF1\xA2\xEE");
238            expect(b"+=^!/", b"\xCC\xAF\xA3\xCF");
239            expect(b"*?&<>", b"\xDC\x6D\xA4\xB0");
240            expect(b"()[]{", b"\xEC\x2B\xA5\x91");
241            expect(b"}@%$#", b"\xFB\xE9\xA6\x72");
242
243            reject(b"     ");
244            reject(b" 0000");
245            reject(b"0 000");
246            reject(b"00 00");
247            reject(b"00|00");
248            reject(b"00_00");
249            reject(b"00,00");
250            reject(b"00;00");
251            reject(b"00~00");
252            reject(b"00`00");
253            reject(b"00'00");
254            reject(b"00\"00");
255            reject(b"00\\00");
256            reject(b"000 0");
257            reject(b"0000 ");
258            reject(b"\0\0\0\0\0");
259            reject(b"\n\n\n\n\n");
260            reject(b"\xFF\xFF\xFF\xFF\xFF");
261        };
262    }
263
264    const _: () = {
265        const fn expect(encoded: &[u8; BLOCK_DIGITS_5], bytes: &[u8; BLOCK_BYTES_4]) {
266            bytes_eq(bytes, &decode_z85_block_or_panic(*encoded));
267            bytes_eq(encoded, &encode_z85_block(*bytes));
268        }
269
270        const fn reject(encoded: &[u8; BLOCK_DIGITS_5]) {
271            if decode_z85_block(*encoded).is_ok() {
272                panic!("expected error decoding invalid z85 block, but it succeeded")
273            }
274        }
275
276        assertions!();
277    };
278
279    {
280        fn expect(encoded: &[u8; BLOCK_DIGITS_5], bytes: &[u8; BLOCK_BYTES_4]) {
281            assert_eq!(Ok(bytes), decode_z85_block(*encoded).as_ref());
282            assert_eq!(encoded, &encode_z85_block(*bytes));
283        }
284
285        fn reject(encoded: &[u8; BLOCK_DIGITS_5]) {
286            assert!(decode_z85_block(*encoded).is_err());
287        }
288
289        assertions!();
290    }
291}
292
293#[must_use]
294pub const fn encoded_z85_length(byte_length: usize) -> usize {
295    let full_blocks = byte_length / BLOCK_BYTES_4;
296    let remaining_bytes = byte_length % BLOCK_BYTES_4;
297
298    let full_block_digits = full_blocks * BLOCK_DIGITS_5;
299    let remaining_block_digits = BLOCK_DIGITS_BY_BYTES[remaining_bytes];
300
301    full_block_digits + remaining_block_digits
302}
303
304#[must_use]
305pub const fn decoded_z85_length(digit_length: usize) -> usize {
306    let full_blocks = digit_length / BLOCK_DIGITS_5;
307    let remaining_digits = digit_length % BLOCK_DIGITS_5;
308
309    let full_block_bytes = full_blocks * BLOCK_BYTES_4;
310    let remaining_block_bytes = BLOCK_BYTES_BY_DIGITS[remaining_digits];
311
312    full_block_bytes + remaining_block_bytes
313}
314
315#[must_use]
316pub fn encode_z85(bytes: &[u8]) -> Vec<u8> {
317    let encoded_length = encoded_z85_length(bytes.len());
318    let mut output = Vec::with_capacity(encoded_length);
319
320    for bytes in bytes.chunks(BLOCK_BYTES_4) {
321        let byte_length = bytes.len();
322        let mut byte_block = [0x00; BLOCK_BYTES_4];
323        byte_block[..byte_length].copy_from_slice(bytes);
324
325        let encoded_length = BLOCK_DIGITS_BY_BYTES[bytes.len()];
326        let encoded_block = encode_z85_block(byte_block);
327        let encoded = &encoded_block[..encoded_length];
328
329        output.extend_from_slice(encoded);
330    }
331
332    debug_assert!(output.len() == encoded_length);
333
334    output
335}
336
337#[must_use]
338pub fn encode_jeb85(bytes: &[u8]) -> Vec<u8> {
339    let encoded_length = encoded_z85_length(bytes.len());
340    let mut output = Vec::with_capacity(encoded_length);
341
342    let mut raw_buffer = Vec::<u8>::new();
343
344    for bytes in bytes.chunks(BLOCK_BYTES_4) {
345        if bytes.iter().all(|b| ASCII_INLINE_TEXT_LUT[*b as usize]) {
346            raw_buffer.extend_from_slice(bytes);
347            continue;
348        }
349
350        if !raw_buffer.is_empty() {
351            let raw_block_count = raw_buffer.len() / BLOCK_BYTES_4;
352
353            if raw_block_count == 1 {
354                output.extend([RAW_PREFIX]);
355                output.extend(&raw_buffer);
356            } else {
357                let block_count_prefix_value = raw_block_count - 2;
358                let block_count_prefix_block = encode_z85_block(
359                    u32::try_from(block_count_prefix_value)
360                        .unwrap()
361                        .to_be_bytes(),
362                );
363                let mut block_count_prefix = &block_count_prefix_block[..];
364                while block_count_prefix.first() == Some(&b'0') {
365                    block_count_prefix = &block_count_prefix[1..];
366                }
367                let mut block_prefix = block_count_prefix.to_vec();
368                block_prefix.extend([RAW_PREFIX]);
369
370                let raw_block_digits = raw_block_count * BLOCK_DIGITS_5;
371                let padding_needed = raw_block_digits - block_prefix.len() - raw_buffer.len();
372
373                let mut padding = vec![RAW_PADDING; padding_needed];
374
375                let mut cosmetic_padding = Vec::new();
376                for byte in bytes {
377                    if ASCII_INLINE_TEXT_LUT[*byte as usize] {
378                        cosmetic_padding.push(*byte);
379                    } else {
380                        break;
381                    }
382                }
383                cosmetic_padding.push(RAW_PREFIX);
384
385                let available_len = padding.len().min(cosmetic_padding.len());
386                padding[..available_len].copy_from_slice(&cosmetic_padding[..available_len]);
387
388
389                output.extend(&block_prefix);
390                output.extend(&raw_buffer);
391                output.extend(&padding);
392            }
393
394            raw_buffer.clear();
395        }
396
397        let byte_length = bytes.len();
398        let mut byte_block = [0x00; BLOCK_BYTES_4];
399        byte_block[..byte_length].copy_from_slice(bytes);
400
401        let encoded_length = BLOCK_DIGITS_BY_BYTES[bytes.len()];
402        let encoded_block = encode_z85_block(byte_block);
403        let encoded = &encoded_block[..encoded_length];
404
405        output.extend_from_slice(encoded);
406    }
407
408    if !raw_buffer.is_empty() {
409        if raw_buffer.len() <= BLOCK_BYTES_4 {
410            output.push(RAW_PREFIX);
411        } else {
412            output.extend([RAW_PREFIX; 2]);
413        }
414        output.extend_from_slice(&raw_buffer);
415        raw_buffer.clear();
416    }
417
418    debug_assert!(output.len() <= encoded_length);
419
420    output
421}