1#![no_std]
30
31extern crate alloc;
32use alloc::string::String;
33use alloc::vec::Vec;
34use core::ptr::copy_nonoverlapping;
35
36const ILLEGALS: [u8; 6] = [0, 10, 13, 34, 38, 92];
37const SHORTENED: u8 = 0b111;
38
39const ILLEGAL_IDX_DIRECT: [u8; 128] = {
40 let mut arr = [0xFF_u8; 128];
41 arr[0] = 0;
42 arr[10] = 1;
43 arr[13] = 2;
44 arr[34] = 3;
45 arr[38] = 4;
46 arr[92] = 5;
47 arr
48};
49
50const ILLEGAL_FLAG: [u8; 128] = {
51 let mut arr = [0_u8; 128];
52 arr[0] = 1;
53 arr[10] = 1;
54 arr[13] = 1;
55 arr[34] = 1;
56 arr[38] = 1;
57 arr[92] = 1;
58 arr
59};
60
61const ESCAPE_B1: [u8; 16] = {
62 let mut arr = [0_u8; 16];
63 let mut i = 0;
64 while i < 8 {
65 arr[i * 2] = 0b1100_0010 | ((i as u8) << 2);
66 arr[i * 2 + 1] = 0b1100_0010 | ((i as u8) << 2) | 1;
67 i += 1;
68 }
69 arr
70};
71
72#[inline(always)]
73unsafe fn emit_escape_idx(out_ptr: *mut u8, out_pos: &mut usize, idx: u8, next: u8) {
74 let key = ((idx as usize) << 1) | ((next >> 6) as usize);
75 unsafe {
76 *out_ptr.add(*out_pos) = *ESCAPE_B1.get_unchecked(key);
77 *out_ptr.add(*out_pos + 1) = 0x80 | (next & 0x3F);
78 }
79 *out_pos += 2;
80}
81
82#[inline(always)]
83unsafe fn emit_shortened(out_ptr: *mut u8, out_pos: &mut usize, bits: u8) {
84 let key = ((SHORTENED as usize) << 1) | ((bits >> 6) as usize);
85 unsafe {
86 *out_ptr.add(*out_pos) = *ESCAPE_B1.get_unchecked(key);
87 *out_ptr.add(*out_pos + 1) = 0x80 | (bits & 0x3F);
88 }
89 *out_pos += 2;
90}
91
92#[inline(always)]
93fn pull7_tail(tail: &[u8], pos: &mut usize, acc: &mut u64, acc_bits: &mut u32) -> Option<u8> {
94 while *acc_bits < 7 && *pos < tail.len() {
95 *acc = (*acc << 8) | tail[*pos] as u64;
96 *pos += 1;
97 *acc_bits += 8;
98 }
99 if *acc_bits >= 7 {
100 *acc_bits -= 7;
101 let bits = ((*acc >> *acc_bits) & 0x7F) as u8;
102 if *acc_bits == 0 {
103 *acc = 0;
104 } else {
105 *acc &= (1_u64 << *acc_bits) - 1;
106 }
107 Some(bits)
108 } else if *acc_bits > 0 {
109 let bits = ((*acc << (7 - *acc_bits)) & 0x7F) as u8;
110 *acc = 0;
111 *acc_bits = 0;
112 Some(bits)
113 } else {
114 None
115 }
116}
117
118#[inline(always)]
119fn split7(b: [u8; 7]) -> [u8; 8] {
120 [
121 b[0] >> 1,
122 ((b[0] & 0x01) << 6) | (b[1] >> 2),
123 ((b[1] & 0x03) << 5) | (b[2] >> 3),
124 ((b[2] & 0x07) << 4) | (b[3] >> 4),
125 ((b[3] & 0x0F) << 3) | (b[4] >> 5),
126 ((b[4] & 0x1F) << 2) | (b[5] >> 6),
127 ((b[5] & 0x3F) << 1) | (b[6] >> 7),
128 b[6] & 0x7F,
129 ]
130}
131
132#[inline(always)]
133fn encoded_capacity(input_len: usize) -> usize {
134 if input_len == 0 {
135 0
136 } else {
137 (input_len.saturating_mul(8).saturating_add(6) / 7).saturating_add(1)
138 }
139}
140
141#[inline(always)]
142fn decoded_capacity(encoded_len: usize) -> usize {
143 (encoded_len.saturating_mul(7).saturating_add(7) / 8).saturating_add(8)
144}
145
146#[inline(always)]
147unsafe fn process_groups8(
148 groups: &[u8; 8],
149 out_ptr: *mut u8,
150 out_pos: &mut usize,
151 pending_illegal_bits: &mut u8,
152 has_pending_illegal: &mut bool,
153) {
154 let mut j = 0usize;
155 if *has_pending_illegal {
156 unsafe {
157 emit_escape_idx(
158 out_ptr,
159 out_pos,
160 *ILLEGAL_IDX_DIRECT.get_unchecked(*pending_illegal_bits as usize),
161 groups[0],
162 )
163 };
164 *has_pending_illegal = false;
165 j = 1;
166 }
167 while j < 8 {
168 let cur = groups[j];
169 let idx = *unsafe { ILLEGAL_IDX_DIRECT.get_unchecked(cur as usize) };
170 if idx == 0xFF {
171 unsafe { *out_ptr.add(*out_pos) = cur };
172 *out_pos += 1;
173 j += 1;
174 } else if j + 1 < 8 {
175 unsafe { emit_escape_idx(out_ptr, out_pos, idx, groups[j + 1]) };
176 j += 2;
177 } else {
178 *pending_illegal_bits = cur;
179 *has_pending_illegal = true;
180 break;
181 }
182 }
183}
184
185pub fn encode(data: &[u8]) -> String {
207 if data.is_empty() {
208 return String::new();
209 }
210 let max_out = encoded_capacity(data.len());
211 let mut out = Vec::<u8>::with_capacity(max_out);
212 let out_ptr = out.as_mut_ptr();
213 let mut out_pos = 0usize;
214 let len = data.len();
215 let ptr = data.as_ptr();
216 let mut i = 0usize;
217 let mut pending_illegal_bits = 0u8;
218 let mut has_pending_illegal = false;
219
220 while i + 7 <= len {
221 let b = unsafe {
222 [
223 *ptr.add(i),
224 *ptr.add(i + 1),
225 *ptr.add(i + 2),
226 *ptr.add(i + 3),
227 *ptr.add(i + 4),
228 *ptr.add(i + 5),
229 *ptr.add(i + 6),
230 ]
231 };
232 let groups = split7(b);
233
234 if !has_pending_illegal {
235 let illegal = unsafe {
236 *ILLEGAL_FLAG.get_unchecked(groups[0] as usize)
237 | *ILLEGAL_FLAG.get_unchecked(groups[1] as usize)
238 | *ILLEGAL_FLAG.get_unchecked(groups[2] as usize)
239 | *ILLEGAL_FLAG.get_unchecked(groups[3] as usize)
240 | *ILLEGAL_FLAG.get_unchecked(groups[4] as usize)
241 | *ILLEGAL_FLAG.get_unchecked(groups[5] as usize)
242 | *ILLEGAL_FLAG.get_unchecked(groups[6] as usize)
243 | *ILLEGAL_FLAG.get_unchecked(groups[7] as usize)
244 };
245 if illegal == 0 {
246 unsafe {
247 copy_nonoverlapping(groups.as_ptr(), out_ptr.add(out_pos), 8);
248 }
249 out_pos += 8;
250 i += 7;
251 continue;
252 }
253 }
254 unsafe {
255 process_groups8(
256 &groups,
257 out_ptr,
258 &mut out_pos,
259 &mut pending_illegal_bits,
260 &mut has_pending_illegal,
261 );
262 }
263 i += 7;
264 }
265
266 let tail = &data[i..];
267 let mut tail_pos = 0usize;
268 let mut acc = 0u64;
269 let mut acc_bits = 0u32;
270
271 if has_pending_illegal {
272 if let Some(nb) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
273 unsafe {
274 emit_escape_idx(
275 out_ptr,
276 &mut out_pos,
277 *ILLEGAL_IDX_DIRECT.get_unchecked(pending_illegal_bits as usize),
278 nb,
279 );
280 }
281 } else {
282 unsafe { emit_shortened(out_ptr, &mut out_pos, pending_illegal_bits) }
283 }
284 }
285 while let Some(cur) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
286 let idx = unsafe { *ILLEGAL_IDX_DIRECT.get_unchecked(cur as usize) };
287 if idx == 0xFF {
288 unsafe { *out_ptr.add(out_pos) = cur };
289 out_pos += 1;
290 } else if let Some(nb) = pull7_tail(tail, &mut tail_pos, &mut acc, &mut acc_bits) {
291 unsafe { emit_escape_idx(out_ptr, &mut out_pos, idx, nb) };
292 } else {
293 unsafe { emit_shortened(out_ptr, &mut out_pos, cur) };
294 break;
295 }
296 }
297 unsafe {
298 out.set_len(out_pos);
299 String::from_utf8_unchecked(out)
300 }
301}
302
303#[inline(always)]
304unsafe fn unpack8groups_scalar(
305 groups_ptr: *const u8,
306 out_ptr: *mut u8,
307 out_pos: &mut usize,
308 acc: &mut u64,
309 acc_bits: &mut u32,
310) {
311 let g0 = unsafe { *groups_ptr } as u64;
312 let g1 = unsafe { *groups_ptr.add(1) } as u64;
313 let g2 = unsafe { *groups_ptr.add(2) } as u64;
314 let g3 = unsafe { *groups_ptr.add(3) } as u64;
315 let g4 = unsafe { *groups_ptr.add(4) } as u64;
316 let g5 = unsafe { *groups_ptr.add(5) } as u64;
317 let g6 = unsafe { *groups_ptr.add(6) } as u64;
318 let g7 = unsafe { *groups_ptr.add(7) } as u64;
319
320 let bits56 = (g0 << 49)
321 | (g1 << 42)
322 | (g2 << 35)
323 | (g3 << 28)
324 | (g4 << 21)
325 | (g5 << 14)
326 | (g6 << 7)
327 | g7;
328
329 let combined = (*acc << 56) | bits56;
330 let k = *acc_bits as usize;
331 unsafe {
332 *out_ptr.add(*out_pos) = (combined >> (k + 48)) as u8;
333 *out_ptr.add(*out_pos + 1) = (combined >> (k + 40)) as u8;
334 *out_ptr.add(*out_pos + 2) = (combined >> (k + 32)) as u8;
335 *out_ptr.add(*out_pos + 3) = (combined >> (k + 24)) as u8;
336 *out_ptr.add(*out_pos + 4) = (combined >> (k + 16)) as u8;
337 *out_ptr.add(*out_pos + 5) = (combined >> (k + 8)) as u8;
338 *out_ptr.add(*out_pos + 6) = (combined >> k) as u8;
339 }
340 *out_pos += 7;
341
342 *acc = if *acc_bits == 0 {
343 0
344 } else {
345 combined & ((1u64 << *acc_bits) - 1)
346 };
347}
348
349#[inline(always)]
350unsafe fn push7_scalar(
351 out_ptr: *mut u8,
352 out_pos: &mut usize,
353 acc: &mut u64,
354 acc_bits: &mut u32,
355 bits: u8,
356) {
357 *acc = (*acc << 7) | ((bits & 0x7F) as u64);
358 *acc_bits += 7;
359 if *acc_bits >= 8 {
360 *acc_bits -= 8;
361 unsafe { *out_ptr.add(*out_pos) = ((*acc >> *acc_bits) & 0xFF) as u8 };
362 *out_pos += 1;
363 if *acc_bits == 0 {
364 *acc = 0;
365 } else {
366 *acc &= (1_u64 << *acc_bits) - 1;
367 }
368 }
369}
370
371pub fn decode(encoded: &str) -> Result<Vec<u8>, &'static str> {
394 if encoded.is_empty() {
395 return Ok(Vec::new());
396 }
397 let bytes = encoded.as_bytes();
398 let len = bytes.len();
399 let estimated = decoded_capacity(len);
400 let mut out = Vec::<u8>::with_capacity(estimated);
401 let out_ptr = out.as_mut_ptr();
402 let mut out_pos = 0usize;
403 let mut acc = 0u64;
404 let mut acc_bits = 0u32;
405 let ptr = bytes.as_ptr();
406 let mut i = 0usize;
407
408 while i < len {
409 while i + 8 <= len {
410 let chunk = u64::from_le(unsafe { (ptr.add(i) as *const u64).read_unaligned() });
411 if (chunk & 0x8080_8080_8080_8080u64) != 0 {
412 break;
413 }
414 unsafe {
415 unpack8groups_scalar(ptr.add(i), out_ptr, &mut out_pos, &mut acc, &mut acc_bits);
416 }
417 i += 8;
418 }
419
420 while i < len {
421 let b = unsafe { *ptr.add(i) };
422 if b >= 128 {
423 break;
424 }
425 i += 1;
426 unsafe { push7_scalar(out_ptr, &mut out_pos, &mut acc, &mut acc_bits, b) };
427 }
428
429 if i >= len {
430 break;
431 }
432
433 let b1 = unsafe { *ptr.add(i) };
434 if (b1 & 0xE0) != 0xC0 {
435 return Err("Invalid lead byte");
436 }
437
438 if i + 1 >= len {
439 return Err("Unexpected end of input");
440 }
441
442 let b2 = unsafe { *ptr.add(i + 1) };
443 if (b2 & 0xC0) != 0x80 {
444 return Err("Invalid continuation byte");
445 }
446 i += 2;
447
448 let illegal_index = (b1 >> 2) & 7;
449 let first_bit = b1 & 1;
450 let low6 = b2 & 0x3F;
451
452 if illegal_index != SHORTENED {
453 if illegal_index as usize >= ILLEGALS.len() {
454 return Err("Illegal index out of bounds");
455 }
456 unsafe {
457 push7_scalar(
458 out_ptr,
459 &mut out_pos,
460 &mut acc,
461 &mut acc_bits,
462 ILLEGALS[illegal_index as usize],
463 );
464 }
465 }
466 unsafe {
467 push7_scalar(
468 out_ptr,
469 &mut out_pos,
470 &mut acc,
471 &mut acc_bits,
472 (first_bit << 6) | low6,
473 );
474 }
475 }
476
477 unsafe { out.set_len(out_pos) };
478 Ok(out)
479}
480
481#[cfg(test)]
482mod tests {
483 use alloc::{format, vec};
484
485 use super::*;
486
487 #[test]
488 fn test_empty() {
489 assert_eq!(encode(b""), "");
490 assert_eq!(decode("").unwrap(), b"");
491 }
492
493 #[test]
494 fn test_hello_world() {
495 let data = b"hello world";
496 let enc = encode(data);
497 let dec = decode(&enc).expect("decoding failed");
498 assert_eq!(dec, data);
499 }
500
501 #[test]
502 fn test_single_byte_values() {
503 for b in 0..=255u8 {
504 let data = vec![b];
505 let enc = encode(&data);
506 let dec = decode(&enc).expect(&format!("decoding failed for byte {}", b));
507 assert_eq!(dec, data, "failed for byte {}", b);
508 }
509 }
510
511 #[test]
512 fn test_various_lengths_roundtrip() {
513 for len in [
514 0, 1, 2, 3, 6, 7, 8, 9, 14, 15, 16, 17, 31, 32, 33, 100, 255, 256, 511, 512,
515 ] {
516 let data: Vec<u8> = (0..len).map(|i| (i % 251) as u8).collect();
517 let enc = encode(&data);
518 let dec = decode(&enc).expect("decoding failed");
519 assert_eq!(dec, data, "roundtrip failed for length {}", len);
520 }
521 }
522
523 #[test]
524 fn test_all_illegal_bytes_handling() {
525 let data = b"\x00\x0A\x0D\x22\x26\x5C";
526 let enc = encode(data);
527 let dec = decode(&enc).expect("decoding failed");
528 assert_eq!(dec, data.as_ref());
529 }
530
531 #[test]
532 fn test_mixed_content() {
533 let data: Vec<u8> = (0..=255).collect();
534 let enc = encode(&data);
535 let dec = decode(&enc).expect("decoding failed");
536 assert_eq!(dec, data);
537 }
538
539 #[test]
540 fn test_repeated_illegal_bytes() {
541 let data = vec![0u8; 100];
542 let enc = encode(&data);
543 let dec = decode(&enc).expect("decoding failed");
544 assert_eq!(dec, data);
545 }
546
547 #[test]
548 fn test_decode_invalid_lead_byte() {
549 let invalid = vec![0x80u8];
550 let s = unsafe { String::from_utf8_unchecked(invalid) };
551 assert!(decode(&s).is_err());
552 let invalid2 = vec![0xFFu8];
553 let s2 = unsafe { String::from_utf8_unchecked(invalid2) };
554 assert!(decode(&s2).is_err());
555 }
556
557 #[test]
558 fn test_decode_truncated_escape() {
559 let mut data = vec![0xC0u8];
560 let s = unsafe { String::from_utf8_unchecked(data.clone()) };
561 assert!(decode(&s).is_err());
562 data.push(0x40);
563 let s2 = unsafe { String::from_utf8_unchecked(data) };
564 assert!(decode(&s2).is_err());
565 }
566
567 #[test]
568 fn test_decode_invalid_continuation_byte() {
569 let data = vec![0xC2u8, 0xFF];
570 let s = unsafe { String::from_utf8_unchecked(data) };
571 assert!(decode(&s).is_err());
572 }
573
574 #[test]
575 fn test_shortened_at_end() {
576 let data = vec![0u8];
577 let enc = encode(&data);
578 let dec = decode(&enc).expect("decode failed");
579 assert_eq!(dec, data);
580 }
581
582 #[test]
583 fn test_very_long_input() {
584 let data = vec![0x55u8; 100000];
585 let enc = encode(&data);
586 let dec = decode(&enc).expect("decode failed");
587 assert_eq!(dec, data);
588 }
589}