use std::mem::MaybeUninit;
use crate::column::Parts;
use crate::offset::Offset;
use crate::types;
pub const DECOMPRESS_BUFFER_PADDING: usize = types::MAX_TOKEN_SIZE - 1;
#[derive(Copy, Clone, Debug)]
pub struct DecodeEntry(u64);
impl DecodeEntry {
#[inline]
fn new(offset: u32, len: u32) -> Self {
Self(((len as u64) << 32) | offset as u64)
}
#[inline]
fn offset(self) -> usize {
self.0 as u32 as usize
}
#[inline]
fn len(self) -> usize {
(self.0 >> 32) as usize
}
}
#[inline]
fn row_code_range<O: Offset>(parts: Parts<'_, O>, row: usize) -> (usize, usize) {
let begin = parts.code_boundaries[row]
.to_usize()
.expect("code boundary fits usize");
let end = parts.code_boundaries[row + 1]
.to_usize()
.expect("code boundary fits usize");
(begin, end)
}
#[inline]
fn code_byte_range<O: Offset>(parts: Parts<'_, O>, code: u16) -> (usize, usize) {
let s = parts.dict_offsets[code as usize] as usize;
let e = parts.dict_offsets[code as usize + 1] as usize;
assert!(e >= s, "dictionary offsets must be nondecreasing");
(s, e)
}
#[inline]
fn code_len<O: Offset>(parts: Parts<'_, O>, code: u16) -> usize {
let (s, e) = code_byte_range(parts, code);
e - s
}
#[inline]
fn dict_has_decoder_padding<O: Offset>(parts: Parts<'_, O>) -> bool {
let Some(&logical_len) = parts.dict_offsets.last() else {
return false;
};
(logical_len as usize)
.checked_add(DECOMPRESS_BUFFER_PADDING)
.is_some_and(|padded_len| parts.dict_bytes.len() >= padded_len)
}
#[inline(always)]
unsafe fn copy_16_token_bytes(src: *const u8, dst: *mut u8) {
#[cfg(target_arch = "aarch64")]
unsafe {
std::arch::aarch64::vst1q_u8(dst, std::arch::aarch64::vld1q_u8(src));
}
#[cfg(not(target_arch = "aarch64"))]
unsafe {
dst.cast::<u64>()
.write_unaligned(src.cast::<u64>().read_unaligned());
dst.add(8)
.cast::<u64>()
.write_unaligned(src.add(8).cast::<u64>().read_unaligned());
}
}
#[inline(always)]
unsafe fn copy_token_bytes(src: *const u8, dst: *mut u8, len: usize) {
unsafe {
match len {
0 => {}
1 => dst.write(src.read()),
2 | 3 => {
dst.cast::<u16>()
.write_unaligned(src.cast::<u16>().read_unaligned());
dst.add(len - 2)
.cast::<u16>()
.write_unaligned(src.add(len - 2).cast::<u16>().read_unaligned());
}
4..=7 => {
dst.cast::<u32>()
.write_unaligned(src.cast::<u32>().read_unaligned());
dst.add(len - 4)
.cast::<u32>()
.write_unaligned(src.add(len - 4).cast::<u32>().read_unaligned());
}
8..=15 => {
dst.cast::<u64>()
.write_unaligned(src.cast::<u64>().read_unaligned());
dst.add(len - 8)
.cast::<u64>()
.write_unaligned(src.add(len - 8).cast::<u64>().read_unaligned());
}
16 => copy_16_token_bytes(src, dst),
_ => std::ptr::copy_nonoverlapping(src, dst, len),
}
}
}
#[inline(always)]
unsafe fn copy_padded_token_bytes(src: *const u8, dst: *mut u8) {
unsafe {
copy_16_token_bytes(src, dst);
}
}
#[inline]
fn write_code<O: Offset>(
parts: Parts<'_, O>,
code: u16,
out_ptr: *mut u8,
out_len: usize,
written: &mut usize,
) {
let (s, e) = code_byte_range(parts, code);
let src = parts
.dict_bytes
.get(s..e)
.expect("dictionary offset range fits dictionary bytes");
let len = src.len();
assert!(
len <= out_len.saturating_sub(*written),
"output buffer too small for decompressed bytes"
);
unsafe {
copy_token_bytes(src.as_ptr(), out_ptr.add(*written), len);
}
*written += len;
}
pub fn decompressed_row_len<O: Offset>(parts: Parts<'_, O>, row: usize) -> usize {
let (begin, end) = row_code_range(parts, row);
parts.codes[begin..end]
.iter()
.map(|&code| code_len(parts, code))
.sum()
}
pub fn decompressed_len<O: Offset>(parts: Parts<'_, O>) -> usize {
parts.codes.iter().map(|&code| code_len(parts, code)).sum()
}
pub fn decode_entries<O: Offset>(parts: Parts<'_, O>) -> Vec<DecodeEntry> {
let len = parts.dict_offsets.len().saturating_sub(1);
(0..len)
.map(|i| {
let s = parts.dict_offsets[i];
let e = parts.dict_offsets[i + 1];
assert!(e > s, "dictionary tokens must be nonempty");
DecodeEntry::new(s, e - s)
})
.collect()
}
#[inline]
fn decompress_into_checked<O: Offset>(parts: Parts<'_, O>, out: &mut [MaybeUninit<u8>]) -> usize {
let out_ptr = out.as_mut_ptr().cast::<u8>();
let mut written = 0;
for &code in parts.codes {
write_code(parts, code, out_ptr, out.len(), &mut written);
}
written
}
pub fn decompress_row_into<O: Offset>(
parts: Parts<'_, O>,
row: usize,
out: &mut [MaybeUninit<u8>],
) -> usize {
let (begin, end) = row_code_range(parts, row);
let out_ptr = out.as_mut_ptr().cast::<u8>();
let mut written = 0;
for &code in &parts.codes[begin..end] {
write_code(parts, code, out_ptr, out.len(), &mut written);
}
written
}
pub fn decompress_into<O: Offset>(parts: Parts<'_, O>, out: &mut [MaybeUninit<u8>]) -> usize {
if dict_has_decoder_padding(parts) {
let entries = decode_entries(parts);
return unsafe { decompress_into_checked_padded_with_entries(parts, &entries, out) };
}
decompress_into_checked(parts, out)
}
pub unsafe fn decompress_into_unchecked<O: Offset>(
parts: Parts<'_, O>,
out: &mut [MaybeUninit<u8>],
) -> usize {
let offsets = parts.dict_offsets.as_ptr();
let dict = parts.dict_bytes.as_ptr();
let out_ptr = out.as_mut_ptr().cast::<u8>();
let mut written = 0;
for &code in parts.codes {
let i = code as usize;
unsafe {
let s = *offsets.add(i) as usize;
let e = *offsets.add(i + 1) as usize;
let len = e - s;
copy_token_bytes(dict.add(s), out_ptr.add(written), len);
written += len;
}
}
written
}
pub unsafe fn decompress_into_unchecked_padded<O: Offset>(
parts: Parts<'_, O>,
out: &mut [MaybeUninit<u8>],
) -> usize {
let offsets = parts.dict_offsets.as_ptr();
let dict = parts.dict_bytes.as_ptr();
let out_ptr = out.as_mut_ptr().cast::<u8>();
let mut written = 0;
let (fast_codes, exact_codes) = parts
.codes
.split_at(parts.codes.len().saturating_sub(types::MAX_TOKEN_SIZE));
for &code in fast_codes {
let i = code as usize;
unsafe {
let s = *offsets.add(i) as usize;
let e = *offsets.add(i + 1) as usize;
copy_padded_token_bytes(dict.add(s), out_ptr.add(written));
written += e - s;
}
}
for &code in exact_codes {
let i = code as usize;
unsafe {
let s = *offsets.add(i) as usize;
let e = *offsets.add(i + 1) as usize;
let len = e - s;
copy_token_bytes(dict.add(s), out_ptr.add(written), len);
written += len;
}
}
written
}
pub unsafe fn decompress_into_unchecked_padded_with_entries<O: Offset>(
parts: Parts<'_, O>,
entries: &[DecodeEntry],
out: &mut [MaybeUninit<u8>],
) -> usize {
let entries = entries.as_ptr();
let dict = parts.dict_bytes.as_ptr();
let out_ptr = out.as_mut_ptr().cast::<u8>();
let mut written = 0;
let (fast_codes, exact_codes) = parts
.codes
.split_at(parts.codes.len().saturating_sub(types::MAX_TOKEN_SIZE));
for &code in fast_codes {
unsafe {
let entry = *entries.add(code as usize);
copy_padded_token_bytes(dict.add(entry.offset()), out_ptr.add(written));
written += entry.len();
}
}
for &code in exact_codes {
unsafe {
let entry = *entries.add(code as usize);
copy_token_bytes(dict.add(entry.offset()), out_ptr.add(written), entry.len());
written += entry.len();
}
}
written
}
unsafe fn decompress_into_checked_padded_with_entries<O: Offset>(
parts: Parts<'_, O>,
entries: &[DecodeEntry],
out: &mut [MaybeUninit<u8>],
) -> usize {
let entries = entries.as_ptr();
let dict = parts.dict_bytes.as_ptr();
let out_ptr = out.as_mut_ptr().cast::<u8>();
let out_len = out.len();
let mut written = 0;
let mut code_index = 0;
let fast_end = out_len.saturating_sub(types::MAX_TOKEN_SIZE - 1);
while code_index < parts.codes.len() && written < fast_end {
let code = parts.codes[code_index];
unsafe {
let entry = *entries.add(code as usize);
copy_padded_token_bytes(dict.add(entry.offset()), out_ptr.add(written));
written += entry.len();
}
code_index += 1;
}
for &code in &parts.codes[code_index..] {
unsafe {
let entry = *entries.add(code as usize);
assert!(
written <= out_len,
"output buffer too small for decompressed bytes"
);
let remaining = out_len - written;
assert!(
entry.len() <= remaining,
"output buffer too small for decompressed bytes"
);
copy_token_bytes(dict.add(entry.offset()), out_ptr.add(written), entry.len());
written += entry.len();
}
}
written
}
pub fn decompress<O: Offset>(parts: Parts<'_, O>) -> Vec<u8> {
let decoded_len = decompressed_len(parts);
let mut out: Vec<u8> = Vec::with_capacity(decoded_len);
let len = if dict_has_decoder_padding(parts) {
let entries = decode_entries(parts);
unsafe {
decompress_into_unchecked_padded_with_entries(parts, &entries, out.spare_capacity_mut())
}
} else {
unsafe { decompress_into_unchecked(parts, out.spare_capacity_mut()) }
};
unsafe { out.set_len(len) };
out
}
#[cfg(test)]
mod tests {
use crate::{DEFAULT_CONFIG, Parts, compress};
use super::*;
#[test]
fn decompress_into_uses_caller_buffer() {
let rows: &[&[u8]] = &[b"alpha", b"", b"beta beta", b"gamma"];
let mut bytes = Vec::new();
let mut offsets = vec![0u32];
for row in rows {
bytes.extend_from_slice(row);
offsets.push(bytes.len() as u32);
}
let col = compress(&bytes, &offsets, DEFAULT_CONFIG).unwrap();
assert!(
dict_has_decoder_padding(col.as_parts()),
"compressed columns include decoder padding"
);
let mut decoded = Vec::with_capacity(bytes.len());
let len = decompress_into(col.as_parts(), decoded.spare_capacity_mut());
unsafe { decoded.set_len(len) };
assert_eq!(decoded, bytes);
}
#[test]
fn decompress_falls_back_for_unpadded_parts() {
let offsets = [0u32, 1, 2];
let boundaries = [0u32, 2];
let codes = [0u16, 1];
let parts = Parts {
dict_bytes: b"ab",
dict_offsets: &offsets,
bits: 1,
codes: &codes,
code_boundaries: &boundaries,
};
assert!(!dict_has_decoder_padding(parts));
assert_eq!(decompress(parts), b"ab");
}
#[test]
fn decompress_row_into_uses_caller_buffer() {
let rows: &[&[u8]] = &[b"short", b"longer-row", b"", b"tail"];
let mut bytes = Vec::new();
let mut offsets = vec![0u32];
for row in rows {
bytes.extend_from_slice(row);
offsets.push(bytes.len() as u32);
}
let col = compress(&bytes, &offsets, DEFAULT_CONFIG).unwrap();
for (row, expected) in rows.iter().enumerate() {
let mut decoded = Vec::with_capacity(expected.len());
let len = decompress_row_into(col.as_parts(), row, decoded.spare_capacity_mut());
unsafe { decoded.set_len(len) };
assert_eq!(decoded, *expected);
}
}
}