use minarrow::Bitmask;
use crate::error::IoError;
pub fn encode_int32_plain(data: &[i32], out: &mut Vec<u8>) {
for v in data {
out.extend_from_slice(&v.to_le_bytes());
}
}
pub fn encode_int64_plain(data: &[i64], out: &mut Vec<u8>) {
for v in data {
out.extend_from_slice(&v.to_le_bytes());
}
}
pub fn encode_uint32_as_int32_plain(data: &[u32], out: &mut Vec<u8>) {
for &v in data {
out.extend_from_slice(&v.to_le_bytes());
}
}
pub fn encode_uint64_as_int64_plain(data: &[u64], out: &mut Vec<u8>) {
for &v in data {
out.extend_from_slice(&v.to_le_bytes());
}
}
#[cfg(feature = "extended_numeric_types")]
pub fn encode_uint8_as_int32_plain(data: &[u8], out: &mut Vec<u8>) {
for &v in data {
out.extend_from_slice(&(v as u32).to_le_bytes());
}
}
#[cfg(feature = "extended_numeric_types")]
pub fn encode_uint16_as_int32_plain(data: &[u16], out: &mut Vec<u8>) {
for &v in data {
out.extend_from_slice(&(v as u32).to_le_bytes());
}
}
pub fn encode_float32_plain(data: &[f32], out: &mut Vec<u8>) {
for v in data {
out.extend_from_slice(&v.to_le_bytes());
}
}
pub fn encode_float64_plain(data: &[f64], out: &mut Vec<u8>) {
for v in data {
out.extend_from_slice(&v.to_le_bytes());
}
}
pub fn encode_bool_bitpacked(
values: &Bitmask,
null_mask: Option<&Bitmask>,
len: usize,
out: &mut Vec<u8>,
) {
let mut byte = 0u8;
let mut bit = 0;
for i in 0..len {
let valid = null_mask.map_or(true, |m| m.get(i));
let v = if valid { values.get(i) } else { false };
if v {
byte |= 1 << bit;
}
bit += 1;
if bit == 8 {
out.push(byte);
byte = 0;
bit = 0;
}
}
if bit != 0 {
out.push(byte); }
}
pub fn encode_string_plain(
offsets: &[u32],
values: &[u8],
null_mask: Option<&Bitmask>,
len: usize,
out: &mut Vec<u8>,
) -> Result<(), IoError> {
for i in 0..len {
let valid = null_mask.map_or(true, |m| m.get(i));
let start = offsets[i] as usize;
let end = offsets[i + 1] as usize;
let s_len = if valid { end - start } else { 0 };
out.extend_from_slice(&(s_len as u32).to_le_bytes());
if valid {
out.extend_from_slice(&values[start..end]);
}
}
Ok(())
}
#[cfg(feature = "large_string")]
pub fn encode_large_string_plain(
offsets: &[u64],
values: &[u8],
null_mask: Option<&Bitmask>,
len: usize,
out: &mut Vec<u8>,
) -> Result<(), IoError> {
for i in 0..len {
let valid = null_mask.map_or(true, |m| m.get(i));
let start = offsets[i] as usize;
let end = offsets[i + 1] as usize;
let s_len = if valid { end - start } else { 0 };
if valid && s_len > u32::MAX as usize {
return Err(IoError::InputDataError(format!(
"string >4 GiB ({} bytes)",
s_len
)));
}
out.extend_from_slice(&(s_len as u32).to_le_bytes());
if valid {
out.extend_from_slice(&values[start..end]);
}
}
Ok(())
}
pub fn encode_datetime32_plain(data: &[i32], out: &mut Vec<u8>) {
encode_int32_plain(data, out)
}
pub fn encode_datetime64_plain(data: &[i64], out: &mut Vec<u8>) {
encode_int64_plain(data, out)
}
pub fn encode_dictionary_indices_rle(indices: &[u32], out: &mut Vec<u8>) -> Result<(), IoError> {
if indices.is_empty() {
out.push(0);
return Ok(());
}
let bit_width = (32 - indices.iter().max().unwrap().leading_zeros()) as u8;
debug_assert!(bit_width != 0 && bit_width <= 32);
out.push(bit_width);
#[inline]
fn write_uleb128(mut v: u64, o: &mut Vec<u8>) {
loop {
let byte = (v & 0x7f) as u8;
v >>= 7;
if v == 0 {
o.push(byte);
break;
}
o.push(byte | 0x80);
}
}
let bytes_per_val = ((bit_width + 7) / 8) as usize;
let mut i = 0;
while i < indices.len() {
let v = indices[i];
let mut run = 1usize;
while i + run < indices.len() && indices[i + run] == v {
run += 1;
}
if run >= 8 {
write_uleb128((run as u64) << 1, out); for b in 0..bytes_per_val {
out.push((v >> (b * 8)) as u8);
}
i += run;
continue;
}
let start = i;
let mut n = 0usize;
while i + n < indices.len() {
if n >= 8 {
let val = indices[i + n];
let mut look = 1usize;
while i + n + look < indices.len() && indices[i + n + look] == val {
look += 1;
if look == 8 {
break;
}
}
if look == 8 {
break;
}
}
n += 1;
}
let padded = ((n + 7) / 8) * 8;
let groups = padded / 8;
write_uleb128(((groups as u64) << 1) | 1, out);
for bit in 0..bit_width {
for g in 0..groups {
let mut byte = 0u8;
for j in 0..8 {
let idx = start + g * 8 + j;
if idx < start + n && ((indices[idx] >> bit) & 1) != 0 {
byte |= 1 << j;
}
}
out.push(byte);
}
}
let bytes_this_run = bit_width as usize * groups;
if bytes_this_run % 1 != 0 {
unreachable!(); }
i += n;
}
Ok(())
}
#[cfg(test)]
mod tests {
use std::io::Read;
use minarrow::vec64;
use super::*;
#[test]
fn test_encode_int32_plain() {
let data = [1i32, -1i32, 0x12345678];
let mut buf = Vec::new();
encode_int32_plain(&data, &mut buf);
let out: Vec<i32> = buf
.chunks_exact(4)
.map(|c| i32::from_le_bytes(c.try_into().unwrap()))
.collect();
assert_eq!(out, data);
}
#[test]
fn test_encode_int64_plain() {
let data = [1i64, -1i64, 0x1122334455667788];
let mut buf = Vec::new();
encode_int64_plain(&data, &mut buf);
let out: Vec<i64> = buf
.chunks_exact(8)
.map(|c| i64::from_le_bytes(c.try_into().unwrap()))
.collect();
assert_eq!(out, data);
}
#[test]
fn test_encode_uint32_as_int32_plain() {
let data = [0u32, 1, 0xdeadbeef];
let mut buf = Vec::new();
encode_uint32_as_int32_plain(&data, &mut buf);
let out: Vec<u32> = buf
.chunks_exact(4)
.map(|c| u32::from_le_bytes(c.try_into().unwrap()))
.collect();
assert_eq!(out, data);
}
#[test]
fn test_encode_uint64_as_int64_plain() {
let data = [0u64, 1, 0xabcdef0123456789];
let mut buf = Vec::new();
encode_uint64_as_int64_plain(&data, &mut buf);
let out: Vec<u64> = buf
.chunks_exact(8)
.map(|c| u64::from_le_bytes(c.try_into().unwrap()))
.collect();
assert_eq!(out, data);
}
#[test]
fn test_encode_float32_plain() {
let data = [0.0f32, -1.5, 3.14159];
let mut buf = Vec::new();
encode_float32_plain(&data, &mut buf);
let out: Vec<f32> = buf
.chunks_exact(4)
.map(|c| f32::from_le_bytes(c.try_into().unwrap()))
.collect();
assert_eq!(out, data);
}
#[test]
fn test_encode_float64_plain() {
let data = [0.0f64, -1.5, 2.718281828];
let mut buf = Vec::new();
encode_float64_plain(&data, &mut buf);
let out: Vec<f64> = buf
.chunks_exact(8)
.map(|c| f64::from_le_bytes(c.try_into().unwrap()))
.collect();
assert_eq!(out, data);
}
#[test]
fn test_encode_bool_bitpacked_no_nulls() {
let bits = [
true, false, true, true, false, false, false, true, true, false,
];
let mask = Bitmask::from_bools(&bits);
let mut buf = Vec::new();
encode_bool_bitpacked(&mask, None, bits.len(), &mut buf);
assert_eq!(buf.len(), 2);
let mut out = Vec::new();
for (i, &byte) in buf.iter().enumerate() {
for bit in 0..8 {
let idx = i * 8 + bit;
if idx < bits.len() {
out.push((byte >> bit) & 1 == 1);
}
}
}
assert_eq!(out, bits);
}
#[test]
fn test_encode_bool_bitpacked_with_nulls() {
let values = vec64![true, true, true, false];
let mut nulls = Bitmask::new_set_all(values.len(), true);
nulls.set_false(1);
nulls.set_false(3);
let mut buf = Vec::new();
encode_bool_bitpacked(
&Bitmask::from_bools(&values),
Some(&nulls),
values.len(),
&mut buf,
);
let byte = buf[0];
assert_eq!((byte >> 0) & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
#[test]
fn test_encode_string_plain() {
let slices = ["foo", "bar", "", "rust"];
let mut offsets = Vec::with_capacity(slices.len() + 1);
offsets.push(0);
let mut values = Vec::new();
for s in &slices {
values.extend_from_slice(s.as_bytes());
offsets.push(values.len() as u32);
}
let mut buf = Vec::new();
encode_string_plain(&offsets, &values, None, slices.len(), &mut buf).unwrap();
let mut cur = &buf[..];
for s in &slices {
let mut lenb = [0u8; 4];
cur.read_exact(&mut lenb).unwrap();
let l = u32::from_le_bytes(lenb) as usize;
let mut strb = vec![0u8; l];
cur.read_exact(&mut strb).unwrap();
assert_eq!(&strb, s.as_bytes());
}
assert!(cur.is_empty());
}
#[cfg(feature = "large_string")]
#[test]
fn test_encode_large_string_plain_errors_on_too_long() {
let offsets = [0u64, (u32::MAX as u64) + 1];
let values = vec![0u8; 10]; let res = encode_large_string_plain(&offsets, &values, None, 1, &mut Vec::new());
assert!(matches!(res, Err(IoError::InputDataError(_))));
}
#[test]
fn test_encode_datetime_aliases() {
let data32 = [10i32, -20, 30];
let data64 = [100i64, -200, 300];
let mut b1 = Vec::new();
encode_datetime32_plain(&data32, &mut b1);
let mut b2 = Vec::new();
encode_int32_plain(&data32, &mut b2);
assert_eq!(b1, b2);
let mut c1 = Vec::new();
encode_datetime64_plain(&data64, &mut c1);
let mut c2 = Vec::new();
encode_int64_plain(&data64, &mut c2);
assert_eq!(c1, c2);
}
#[test]
fn test_encode_dictionary_indices_rle_empty() {
let mut buf = Vec::new();
encode_dictionary_indices_rle(&[], &mut buf).unwrap();
assert_eq!(buf, &[0]);
}
#[test]
fn test_encode_dictionary_indices_rle_short_bitpacked() {
let indices = vec![1, 2, 3, 4, 5];
let mut buf = Vec::new();
encode_dictionary_indices_rle(&indices, &mut buf).unwrap();
assert_eq!(buf[0], 3);
assert!(buf.len() >= 3);
}
#[test]
fn test_encode_dictionary_indices_rle_rle_run() {
let indices = vec![7u32; 8];
let mut buf = Vec::new();
encode_dictionary_indices_rle(&indices, &mut buf).unwrap();
assert_eq!(buf[0], 3);
assert_eq!(buf[1], 16);
assert_eq!(buf[2], 7);
}
#[test]
fn test_encode_dictionary_indices_rle_mixed() {
let mut indices = vec![10u32; 5];
indices.extend(std::iter::repeat(2).take(8));
indices.extend(std::iter::repeat(3).take(3));
let mut buf = Vec::new();
encode_dictionary_indices_rle(&indices, &mut buf).unwrap();
assert!(buf.len() > 0);
}
}