#![feature(array_chunks, core_intrinsics, int_roundings, test, portable_simd)]
#![no_std]
extern crate alloc;
use core::{
mem::{transmute, transmute_copy},
simd::Which::*,
simd::*,
slice,
};
use alloc::{string::String, vec};
pub struct Encoder {
encode_table: [u8; 64],
}
impl Encoder {
pub const fn new() -> Self {
Self {
encode_table: [
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b',
b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3',
b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/',
],
}
}
pub const fn with_encode_table(encode_table: [u8; 64]) -> Self {
Self { encode_table }
}
#[rustfmt::skip]
fn internal_encode(&self, buf: &[u8], out: &mut [u8]) {
let chunks = buf.array_chunks::<12>();
let out_chunks = out.array_chunks_mut::<16>();
let mut output_index = 0;
let rem = chunks.remainder();
{
pub fn unpack_with_bswap(input: u8x16) -> u8x16 {
let in_u8 = simd_swizzle!(input, [1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10,]);
unsafe {
let in_u32: u32x4 = transmute(in_u8);
let t0 = in_u32 & u32x4::splat(0x0fc0fc00);
let t0_u16 = transmute::<_, u16x8>(t0);
let t1 = simd_swizzle!(
t0_u16 >> Simd::splat(10),
t0_u16 >> Simd::splat(6),
[
First(0),
Second(1),
First(2),
Second(3),
First(4),
Second(5),
First(6),
Second(7),
]
);
let t2 = in_u32 & u32x4::splat(0x003f03f0);
let t3 = transmute::<_, u16x8>(t2)
* u16x8::from_array([
0x0010, 0x0100, 0x0010, 0x0100, 0x0010, 0x0100, 0x0010, 0x0100,
]);
transmute(t1 | t3)
}
}
fn enc_translate(input: u8x16) -> u8x16 {
let lut: Simd<i8, 16> = Simd::from_array([
65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
]);
let indicies = input.saturating_sub(Simd::splat(51));
let mask = input
.simd_gt(Simd::splat(25))
.to_int()
.cast::<u8>();
let indicies = indicies - mask;
let out = input.cast::<i8>() + swizzle_dyn(lut.cast::<u8>(), indicies).cast::<i8>();
out.cast::<u8>()
}
chunks.zip(out_chunks).for_each(|(chunk, out)| {
let buf = unsafe { slice::from_raw_parts(chunk.as_ptr(), 16) };
let vec: Simd<u8, 16> = Simd::from_slice(buf);
let indicies = unpack_with_bswap(vec);
let chars = enc_translate(indicies);
*out = chars.to_array();
output_index += 16;
});
}
let rem_out = &mut out[output_index..];
let chunks = rem.array_chunks::<3>();
let out_chunks = rem_out.array_chunks_mut::<4>();
let rem = chunks.remainder();
chunks.zip(out_chunks).for_each(|(chunk, out)| {
let byte_array = u32::from_be_bytes([chunk[0], chunk[1], chunk[2], 0]);
let bit_1 = byte_array >> 26 & 0b00111111;
let bit_2 = byte_array >> 20 & 0b00111111;
let bit_3 = byte_array >> 14 & 0b00111111;
let bit_4 = byte_array >> 8 & 0b00111111;
*out = [
self.encode_table[bit_1 as usize],
self.encode_table[bit_2 as usize],
self.encode_table[bit_3 as usize],
self.encode_table[bit_4 as usize],
];
output_index += 4;
});
let rem_out = &mut out[output_index..];
let chunks = rem.array_chunks::<2>();
let out_chunks = rem_out.array_chunks_mut::<3>();
let rem = chunks.remainder();
chunks.zip(out_chunks).for_each(|(chunk, out)| {
let byte_array = u16::from_be_bytes([chunk[0], chunk[1]]);
let bit_1 = byte_array >> 10 & 0b00111111;
let bit_2 = byte_array >> 4 & 0b00111111;
let bit_3 = byte_array << 2 & 0b00111111;
*out = [
self.encode_table[bit_1 as usize],
self.encode_table[bit_2 as usize],
self.encode_table[bit_3 as usize],
];
output_index += 3;
});
let rem_out = &mut out[output_index..];
let chunks = rem.array_chunks::<1>();
let out_chunks = rem_out.array_chunks_mut::<2>();
chunks.zip(out_chunks).for_each(|(chunk, out)| {
let byte = chunk[0];
let bit_1 = byte >> 2;
let bit_2 = (byte & 0b00000011) << 4;
*out = [
self.encode_table[bit_1 as usize],
self.encode_table[bit_2 as usize],
];
output_index += 2;
});
}
pub fn encode<T>(&self, bytes: T) -> String
where
T: AsRef<[u8]>,
{
let buf = bytes.as_ref();
let mut out = vec![b'='; buf.len().div_ceil(3) * 4];
self.internal_encode(buf, &mut out);
String::from_utf8(out).unwrap()
}
pub unsafe fn encode_unchecked<T>(&self, bytes: T) -> String
where
T: AsRef<[u8]>,
{
let buf = bytes.as_ref();
let mut out = vec![b'='; buf.len().div_ceil(3) * 4];
self.internal_encode(buf, &mut out);
String::from_utf8_unchecked(out)
}
pub fn encode_without_padding<T>(&self, bytes: T) -> String
where
T: AsRef<[u8]>,
{
let buf = bytes.as_ref();
let mut out = vec![0; buf.len().div_ceil(3) * 4];
self.internal_encode(buf, &mut out);
String::from_utf8(out).unwrap()
}
pub unsafe fn encode_unchecked_without_padding<T>(&self, bytes: T) -> String
where
T: AsRef<[u8]>,
{
let buf = bytes.as_ref();
let mut out = vec![0; buf.len().div_ceil(3) * 4];
self.internal_encode(buf, &mut out);
String::from_utf8_unchecked(out)
}
}
#[inline]
pub fn swizzle_dyn<const N: usize>(val: Simd<u8, N>, idxs: Simd<u8, N>) -> Simd<u8, N>
where
LaneCount<N>: SupportedLaneCount,
{
#![allow(unused_imports, unused_unsafe)]
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
use core::arch::aarch64::{uint8x8_t, vqtbl1q_u8, vtbl1_u8};
#[cfg(all(target_arch = "arm", target_feature = "v7", target_endian = "little"))]
use core::arch::arm::{uint8x8_t, vtbl1_u8};
#[cfg(target_arch = "wasm32")]
use core::arch::wasm32 as wasm;
#[cfg(target_arch = "x86")]
use core::arch::x86;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as x86;
unsafe {
#[cfg(target_feature = "ssse3")]
return transize(x86::_mm_shuffle_epi8, val, idxs);
#[cfg(target_feature = "simd128")]
return transize(wasm::i8x16_swizzle, val, idxs);
#[cfg(all(
target_arch = "aarch64",
target_feature = "neon",
target_endian = "little"
))]
return transize(vqtbl1q_u8, val, idxs);
}
}
#[allow(dead_code)]
#[inline(always)]
unsafe fn transize<T, const N: usize>(
f: unsafe fn(T, T) -> T,
bytes: Simd<u8, N>,
idxs: Simd<u8, N>,
) -> Simd<u8, N>
where
LaneCount<N>: SupportedLaneCount,
{
let idxs = zeroing_idxs(idxs);
unsafe { transmute_copy(&f(transmute_copy(&bytes), transmute_copy(&idxs))) }
}
#[inline(always)]
fn zeroing_idxs<const N: usize>(idxs: Simd<u8, N>) -> Simd<u8, N>
where
LaneCount<N>: SupportedLaneCount,
{
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let idxs = {
idxs.simd_lt(Simd::splat(N as u8))
.select(idxs, Simd::splat(u8::MAX))
};
idxs
}
#[cfg(test)]
mod test {
extern crate test;
use alloc::{string::String, vec};
use core::intrinsics::black_box;
use test::Bencher;
use base64::{engine::general_purpose, Engine};
use rand::{
distributions::{Alphanumeric, DistString},
thread_rng,
};
use super::Encoder;
#[test]
fn b64() {
for _ in 0..100000 {
let mut rng = thread_rng();
let string = Alphanumeric.sample_string(&mut rng, 20);
let b64_encoded = general_purpose::STANDARD.encode(string.clone());
let encoder = Encoder::new();
let my_encoded = encoder.encode(string);
if !my_encoded.eq(&b64_encoded) {
panic!("{my_encoded:?} != {b64_encoded:?}")
}
}
}
#[bench]
fn my_b64(b: &mut Bencher) {
let mut strings = vec![String::new(); 10000];
let mut rng = black_box(thread_rng());
for x in 0..10000 {
strings[x] = black_box(Alphanumeric.sample_string(&mut rng, 1924));
}
let encoder = Encoder::new();
b.iter(|| {
black_box({
for x in 0..10000 {
black_box(encoder.encode(&strings[x]));
}
});
})
}
#[bench]
fn real_b64(b: &mut Bencher) {
let mut strings = vec![String::new(); 10000];
let mut rng = black_box(thread_rng());
for x in 0..10000 {
strings[x] = black_box(Alphanumeric.sample_string(&mut rng, 1924));
}
b.iter(|| {
black_box({
for x in 0..10000 {
general_purpose::STANDARD.encode(&strings[x]);
}
});
})
}
}