Skip to main content

base_d/encoders/algorithms/schema/
frame.rs

1use super::super::errors::safe_truncate;
2use super::display96;
3use super::types::SchemaError;
4use num_bigint::BigUint;
5use num_integer::Integer;
6use num_traits::Zero;
7
8/// Egyptian hieroglyph quotation marks - parser-inert frame delimiters
9/// These characters are chosen because:
10/// - They're visually distinctive
11/// - They're unlikely to appear in parsers/syntax highlighters
12/// - They clearly denote "special encoded content"
13pub const FRAME_START: char = '𓍹'; // U+13379 EGYPTIAN HIEROGLYPH V011A
14pub const FRAME_END: char = '𓍺'; // U+1337A EGYPTIAN HIEROGLYPH V011B
15
16/// Encode binary data with display96 and wrap in frame delimiters
17///
18/// # Algorithm
19/// 1. Convert binary bytes to base-96 using display96 alphabet
20/// 2. Prepend FRAME_START delimiter
21/// 3. Append FRAME_END delimiter
22///
23/// # Example
24/// ```ignore
25/// let binary = vec![0x01, 0x02, 0x03];
26/// let framed = encode_framed(&binary);
27/// // Returns: "𓍹{base96_encoded_content}𓍺"
28/// ```
29pub fn encode_framed(binary: &[u8]) -> String {
30    let encoded = encode_base96(binary);
31    format!("{}{}{}", FRAME_START, encoded, FRAME_END)
32}
33
34/// Remove frame delimiters and decode display96 back to binary
35///
36/// # Errors
37/// - Returns `InvalidFrame` if delimiters are missing or malformed
38/// - Returns `InvalidCharacter` if non-alphabet chars are found
39///
40/// # Example
41/// ```ignore
42/// let framed = "𓍹{base96_content}𓍺";
43/// let binary = decode_framed(framed)?;
44/// ```
45pub fn decode_framed(encoded: &str) -> Result<Vec<u8>, SchemaError> {
46    // Show preview of what was received (char-safe truncation)
47    let preview = safe_truncate(encoded, 40);
48
49    // Validate frame delimiters
50    if !encoded.starts_with(FRAME_START) {
51        return Err(SchemaError::InvalidFrame(format!(
52            "Missing start delimiter '{}' (U+{:04X}).\n\
53             Expected encoded data starting with {}...{}, but received:\n  {}\n\
54             Hint: To encode JSON, omit the -d flag.",
55            FRAME_START, FRAME_START as u32, FRAME_START, FRAME_END, preview
56        )));
57    }
58
59    if !encoded.ends_with(FRAME_END) {
60        return Err(SchemaError::InvalidFrame(format!(
61            "Missing end delimiter '{}' (U+{:04X}).\n\
62             Expected encoded data ending with {}, but received:\n  {}\n\
63             The data may be truncated or corrupted.",
64            FRAME_END, FRAME_END as u32, FRAME_END, preview
65        )));
66    }
67
68    // Strip delimiters
69    let start_len = FRAME_START.len_utf8();
70    let end_len = FRAME_END.len_utf8();
71    let content = &encoded[start_len..encoded.len() - end_len];
72
73    // Decode base96
74    decode_base96(content)
75}
76
77/// Encode bytes to display96 string using base-96 radix conversion
78///
79/// Uses BigUint for arbitrary precision, similar to radix.rs approach.
80fn encode_base96(data: &[u8]) -> String {
81    if data.is_empty() {
82        return String::new();
83    }
84
85    // Count leading zeros for efficient handling
86    let leading_zeros = data.iter().take_while(|&&b| b == 0).count();
87
88    // If all zeros, return early
89    if leading_zeros == data.len() {
90        return display96::char_at(0)
91            .unwrap()
92            .to_string()
93            .repeat(data.len());
94    }
95
96    let base = 96u32;
97    let mut num = BigUint::from_bytes_be(&data[leading_zeros..]);
98
99    // Pre-allocate result vector
100    let max_digits =
101        ((data.len() - leading_zeros) * 8 * 1000) / (base as f64).log2() as usize / 1000 + 1;
102    let mut result = Vec::with_capacity(max_digits + leading_zeros);
103
104    let base_big = BigUint::from(base);
105
106    while !num.is_zero() {
107        let (quotient, remainder) = num.div_rem(&base_big);
108        let digit = remainder.to_u64_digits();
109        let digit_val = if digit.is_empty() {
110            0
111        } else {
112            digit[0] as usize
113        };
114        result.push(display96::char_at(digit_val).unwrap());
115        num = quotient;
116    }
117
118    // Add leading zeros
119    for _ in 0..leading_zeros {
120        result.push(display96::char_at(0).unwrap());
121    }
122
123    result.reverse();
124    result.into_iter().collect()
125}
126
127/// Decode display96 string to bytes using base-96 radix conversion
128fn decode_base96(encoded: &str) -> Result<Vec<u8>, SchemaError> {
129    if encoded.is_empty() {
130        return Ok(Vec::new());
131    }
132
133    let base = 96u32;
134    let mut num = BigUint::from(0u8);
135    let base_big = BigUint::from(base);
136
137    let chars: Vec<char> = encoded.chars().collect();
138    let mut leading_zeros = 0;
139
140    for (pos, &c) in chars.iter().enumerate() {
141        let digit = display96::index_of(c).ok_or_else(|| {
142            // Detect if this looks like common wrong encodings
143            let hint = if c.is_ascii_alphanumeric() {
144                " (looks like Base64/hex - this is not the correct encoding)"
145            } else if c.is_ascii() {
146                " (ASCII characters are not valid in display96)"
147            } else {
148                ""
149            };
150
151            SchemaError::InvalidCharacter(format!(
152                "Invalid character '{}' (U+{:04X}) at position {} of {}{}.\n\
153                 Expected only display96 alphabet characters (box drawing, blocks, geometric shapes).",
154                c, c as u32, pos, chars.len(), hint
155            ))
156        })?;
157
158        if num.is_zero() && digit == 0 {
159            leading_zeros += 1;
160        } else {
161            num *= &base_big;
162            num += BigUint::from(digit);
163        }
164    }
165
166    // Handle all-zero case
167    if num.is_zero() && leading_zeros > 0 {
168        return Ok(vec![0u8; leading_zeros]);
169    }
170
171    let bytes = num.to_bytes_be();
172
173    // Construct result with leading zeros
174    let mut result = Vec::with_capacity(leading_zeros + bytes.len());
175    result.resize(leading_zeros, 0u8);
176    result.extend_from_slice(&bytes);
177
178    Ok(result)
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    #[test]
186    fn test_frame_delimiters() {
187        assert_eq!(FRAME_START as u32, 0x13379);
188        assert_eq!(FRAME_END as u32, 0x1337A);
189    }
190
191    #[test]
192    fn test_encode_decode_empty() {
193        let binary = vec![];
194        let framed = encode_framed(&binary);
195        assert_eq!(framed, format!("{}{}", FRAME_START, FRAME_END));
196
197        let decoded = decode_framed(&framed).unwrap();
198        assert_eq!(decoded, binary);
199    }
200
201    #[test]
202    fn test_encode_decode_single_byte() {
203        let binary = vec![42];
204        let framed = encode_framed(&binary);
205        assert!(framed.starts_with(FRAME_START));
206        assert!(framed.ends_with(FRAME_END));
207
208        let decoded = decode_framed(&framed).unwrap();
209        assert_eq!(decoded, binary);
210    }
211
212    #[test]
213    fn test_encode_decode_multiple_bytes() {
214        let binary = vec![0x01, 0x02, 0x03, 0x04, 0x05];
215        let framed = encode_framed(&binary);
216
217        let decoded = decode_framed(&framed).unwrap();
218        assert_eq!(decoded, binary);
219    }
220
221    #[test]
222    fn test_encode_decode_zeros() {
223        let binary = vec![0x00, 0x00, 0x00];
224        let framed = encode_framed(&binary);
225
226        let decoded = decode_framed(&framed).unwrap();
227        assert_eq!(decoded, binary);
228    }
229
230    #[test]
231    fn test_encode_decode_leading_zeros() {
232        let binary = vec![0x00, 0x00, 0x42, 0xFF];
233        let framed = encode_framed(&binary);
234
235        let decoded = decode_framed(&framed).unwrap();
236        assert_eq!(decoded, binary);
237    }
238
239    #[test]
240    fn test_encode_decode_large_values() {
241        let binary = vec![0xFF; 32]; // 32 bytes of 0xFF
242        let framed = encode_framed(&binary);
243
244        let decoded = decode_framed(&framed).unwrap();
245        assert_eq!(decoded, binary);
246    }
247
248    #[test]
249    fn test_decode_missing_start_delimiter() {
250        let encoded = format!("test{}", FRAME_END);
251        let result = decode_framed(&encoded);
252        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
253    }
254
255    #[test]
256    fn test_decode_missing_end_delimiter() {
257        let encoded = format!("{}test", FRAME_START);
258        let result = decode_framed(&encoded);
259        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
260    }
261
262    #[test]
263    fn test_decode_invalid_character() {
264        let encoded = format!("{}ABC{}", FRAME_START, FRAME_END);
265        let result = decode_framed(&encoded);
266        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
267    }
268
269    #[test]
270    fn test_base96_roundtrip() {
271        let test_cases = vec![
272            vec![0x00],
273            vec![0x01],
274            vec![0xFF],
275            vec![0x01, 0x02, 0x03],
276            vec![0x00, 0x42],
277            vec![0x42, 0x00],
278            (0..=255).collect::<Vec<u8>>(),
279        ];
280
281        for binary in test_cases {
282            let encoded = encode_base96(&binary);
283            let decoded = decode_base96(&encoded).unwrap();
284            assert_eq!(decoded, binary, "Failed for input: {:02X?}", binary);
285        }
286    }
287
288    #[test]
289    fn test_visual_output() {
290        let test_data = b"Hello, world!";
291        let framed = encode_framed(test_data);
292
293        println!("Input: {:02X?}", test_data);
294        println!("Framed output: {}", framed);
295        println!("Length: {} chars", framed.chars().count());
296
297        let decoded = decode_framed(&framed).unwrap();
298        assert_eq!(decoded, test_data);
299    }
300}