base_d/encoders/algorithms/schema/
frame.rs

1use super::display96;
2use super::types::SchemaError;
3use num_bigint::BigUint;
4use num_integer::Integer;
5use num_traits::Zero;
6
7/// Egyptian hieroglyph quotation marks - parser-inert frame delimiters
8/// These characters are chosen because:
9/// - They're visually distinctive
10/// - They're unlikely to appear in parsers/syntax highlighters
11/// - They clearly denote "special encoded content"
12pub const FRAME_START: char = '𓍹'; // U+13379 EGYPTIAN HIEROGLYPH V011A
13pub const FRAME_END: char = '𓍺'; // U+1337A EGYPTIAN HIEROGLYPH V011B
14
15/// Encode binary data with display96 and wrap in frame delimiters
16///
17/// # Algorithm
18/// 1. Convert binary bytes to base-96 using display96 alphabet
19/// 2. Prepend FRAME_START delimiter
20/// 3. Append FRAME_END delimiter
21///
22/// # Example
23/// ```ignore
24/// let binary = vec![0x01, 0x02, 0x03];
25/// let framed = encode_framed(&binary);
26/// // Returns: "𓍹{base96_encoded_content}𓍺"
27/// ```
28pub fn encode_framed(binary: &[u8]) -> String {
29    let encoded = encode_base96(binary);
30    format!("{}{}{}", FRAME_START, encoded, FRAME_END)
31}
32
33/// Remove frame delimiters and decode display96 back to binary
34///
35/// # Errors
36/// - Returns `InvalidFrame` if delimiters are missing or malformed
37/// - Returns `InvalidCharacter` if non-alphabet chars are found
38///
39/// # Example
40/// ```ignore
41/// let framed = "𓍹{base96_content}𓍺";
42/// let binary = decode_framed(framed)?;
43/// ```
44pub fn decode_framed(encoded: &str) -> Result<Vec<u8>, SchemaError> {
45    // Show preview of what was received
46    let preview = if encoded.len() > 40 {
47        format!("{}...", &encoded.chars().take(40).collect::<String>())
48    } else {
49        encoded.to_string()
50    };
51
52    // Validate frame delimiters
53    if !encoded.starts_with(FRAME_START) {
54        return Err(SchemaError::InvalidFrame(format!(
55            "Missing start delimiter '{}' (U+{:04X}).\n\
56             Expected encoded data starting with {}...{}, but received:\n  {}\n\
57             Hint: To encode JSON, omit the -d flag.",
58            FRAME_START, FRAME_START as u32, FRAME_START, FRAME_END, preview
59        )));
60    }
61
62    if !encoded.ends_with(FRAME_END) {
63        return Err(SchemaError::InvalidFrame(format!(
64            "Missing end delimiter '{}' (U+{:04X}).\n\
65             Expected encoded data ending with {}, but received:\n  {}\n\
66             The data may be truncated or corrupted.",
67            FRAME_END, FRAME_END as u32, FRAME_END, preview
68        )));
69    }
70
71    // Strip delimiters
72    let start_len = FRAME_START.len_utf8();
73    let end_len = FRAME_END.len_utf8();
74    let content = &encoded[start_len..encoded.len() - end_len];
75
76    // Decode base96
77    decode_base96(content)
78}
79
80/// Encode bytes to display96 string using base-96 radix conversion
81///
82/// Uses BigUint for arbitrary precision, similar to radix.rs approach.
83fn encode_base96(data: &[u8]) -> String {
84    if data.is_empty() {
85        return String::new();
86    }
87
88    // Count leading zeros for efficient handling
89    let leading_zeros = data.iter().take_while(|&&b| b == 0).count();
90
91    // If all zeros, return early
92    if leading_zeros == data.len() {
93        return display96::char_at(0)
94            .unwrap()
95            .to_string()
96            .repeat(data.len());
97    }
98
99    let base = 96u32;
100    let mut num = BigUint::from_bytes_be(&data[leading_zeros..]);
101
102    // Pre-allocate result vector
103    let max_digits =
104        ((data.len() - leading_zeros) * 8 * 1000) / (base as f64).log2() as usize / 1000 + 1;
105    let mut result = Vec::with_capacity(max_digits + leading_zeros);
106
107    let base_big = BigUint::from(base);
108
109    while !num.is_zero() {
110        let (quotient, remainder) = num.div_rem(&base_big);
111        let digit = remainder.to_u64_digits();
112        let digit_val = if digit.is_empty() {
113            0
114        } else {
115            digit[0] as usize
116        };
117        result.push(display96::char_at(digit_val).unwrap());
118        num = quotient;
119    }
120
121    // Add leading zeros
122    for _ in 0..leading_zeros {
123        result.push(display96::char_at(0).unwrap());
124    }
125
126    result.reverse();
127    result.into_iter().collect()
128}
129
130/// Decode display96 string to bytes using base-96 radix conversion
131fn decode_base96(encoded: &str) -> Result<Vec<u8>, SchemaError> {
132    if encoded.is_empty() {
133        return Ok(Vec::new());
134    }
135
136    let base = 96u32;
137    let mut num = BigUint::from(0u8);
138    let base_big = BigUint::from(base);
139
140    let chars: Vec<char> = encoded.chars().collect();
141    let mut leading_zeros = 0;
142
143    for (pos, &c) in chars.iter().enumerate() {
144        let digit = display96::index_of(c).ok_or_else(|| {
145            // Detect if this looks like common wrong encodings
146            let hint = if c.is_ascii_alphanumeric() {
147                " (looks like Base64/hex - this is not the correct encoding)"
148            } else if c.is_ascii() {
149                " (ASCII characters are not valid in display96)"
150            } else {
151                ""
152            };
153
154            SchemaError::InvalidCharacter(format!(
155                "Invalid character '{}' (U+{:04X}) at position {} of {}{}.\n\
156                 Expected only display96 alphabet characters (box drawing, blocks, geometric shapes).",
157                c, c as u32, pos, chars.len(), hint
158            ))
159        })?;
160
161        if num.is_zero() && digit == 0 {
162            leading_zeros += 1;
163        } else {
164            num *= &base_big;
165            num += BigUint::from(digit);
166        }
167    }
168
169    // Handle all-zero case
170    if num.is_zero() && leading_zeros > 0 {
171        return Ok(vec![0u8; leading_zeros]);
172    }
173
174    let bytes = num.to_bytes_be();
175
176    // Construct result with leading zeros
177    let mut result = Vec::with_capacity(leading_zeros + bytes.len());
178    result.resize(leading_zeros, 0u8);
179    result.extend_from_slice(&bytes);
180
181    Ok(result)
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn test_frame_delimiters() {
190        assert_eq!(FRAME_START as u32, 0x13379);
191        assert_eq!(FRAME_END as u32, 0x1337A);
192    }
193
194    #[test]
195    fn test_encode_decode_empty() {
196        let binary = vec![];
197        let framed = encode_framed(&binary);
198        assert_eq!(framed, format!("{}{}", FRAME_START, FRAME_END));
199
200        let decoded = decode_framed(&framed).unwrap();
201        assert_eq!(decoded, binary);
202    }
203
204    #[test]
205    fn test_encode_decode_single_byte() {
206        let binary = vec![42];
207        let framed = encode_framed(&binary);
208        assert!(framed.starts_with(FRAME_START));
209        assert!(framed.ends_with(FRAME_END));
210
211        let decoded = decode_framed(&framed).unwrap();
212        assert_eq!(decoded, binary);
213    }
214
215    #[test]
216    fn test_encode_decode_multiple_bytes() {
217        let binary = vec![0x01, 0x02, 0x03, 0x04, 0x05];
218        let framed = encode_framed(&binary);
219
220        let decoded = decode_framed(&framed).unwrap();
221        assert_eq!(decoded, binary);
222    }
223
224    #[test]
225    fn test_encode_decode_zeros() {
226        let binary = vec![0x00, 0x00, 0x00];
227        let framed = encode_framed(&binary);
228
229        let decoded = decode_framed(&framed).unwrap();
230        assert_eq!(decoded, binary);
231    }
232
233    #[test]
234    fn test_encode_decode_leading_zeros() {
235        let binary = vec![0x00, 0x00, 0x42, 0xFF];
236        let framed = encode_framed(&binary);
237
238        let decoded = decode_framed(&framed).unwrap();
239        assert_eq!(decoded, binary);
240    }
241
242    #[test]
243    fn test_encode_decode_large_values() {
244        let binary = vec![0xFF; 32]; // 32 bytes of 0xFF
245        let framed = encode_framed(&binary);
246
247        let decoded = decode_framed(&framed).unwrap();
248        assert_eq!(decoded, binary);
249    }
250
251    #[test]
252    fn test_decode_missing_start_delimiter() {
253        let encoded = format!("test{}", FRAME_END);
254        let result = decode_framed(&encoded);
255        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
256    }
257
258    #[test]
259    fn test_decode_missing_end_delimiter() {
260        let encoded = format!("{}test", FRAME_START);
261        let result = decode_framed(&encoded);
262        assert!(matches!(result, Err(SchemaError::InvalidFrame(_))));
263    }
264
265    #[test]
266    fn test_decode_invalid_character() {
267        let encoded = format!("{}ABC{}", FRAME_START, FRAME_END);
268        let result = decode_framed(&encoded);
269        assert!(matches!(result, Err(SchemaError::InvalidCharacter(_))));
270    }
271
272    #[test]
273    fn test_base96_roundtrip() {
274        let test_cases = vec![
275            vec![0x00],
276            vec![0x01],
277            vec![0xFF],
278            vec![0x01, 0x02, 0x03],
279            vec![0x00, 0x42],
280            vec![0x42, 0x00],
281            (0..=255).collect::<Vec<u8>>(),
282        ];
283
284        for binary in test_cases {
285            let encoded = encode_base96(&binary);
286            let decoded = decode_base96(&encoded).unwrap();
287            assert_eq!(decoded, binary, "Failed for input: {:02X?}", binary);
288        }
289    }
290
291    #[test]
292    fn test_visual_output() {
293        let test_data = b"Hello, world!";
294        let framed = encode_framed(test_data);
295
296        println!("Input: {:02X?}", test_data);
297        println!("Framed output: {}", framed);
298        println!("Length: {} chars", framed.chars().count());
299
300        let decoded = decode_framed(&framed).unwrap();
301        assert_eq!(decoded, test_data);
302    }
303}