use crate::constants::*;
use crate::error::{Result, SpssError};
const SYSMIS_RAW: [u8; 8] = SYSMIS_BITS.to_le_bytes();
const SPACES_RAW: [u8; 8] = [0x20u8; 8];
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub enum SlotValue {
Numeric(f64),
Raw([u8; 8]),
Spaces,
Sysmis,
EndOfFile,
}
pub struct BytecodeDecompressor {
#[cfg_attr(not(test), allow(dead_code))]
bias: f64,
pos: usize,
control_bytes: [u8; 8],
control_idx: usize,
eof: bool,
bias_lut: [[u8; 8]; 256],
}
pub struct DecompressorCheckpoint {
pos: usize,
control_bytes: [u8; 8],
control_idx: usize,
eof: bool,
}
impl BytecodeDecompressor {
pub fn new(bias: f64) -> Self {
let mut bias_lut = [[0u8; 8]; 256];
for code in 1u16..=251 {
bias_lut[code as usize] = ((code as f64) - bias).to_le_bytes();
}
BytecodeDecompressor {
bias,
pos: 0,
control_bytes: [0u8; 8],
control_idx: 8, eof: false,
bias_lut,
}
}
pub fn pos(&self) -> usize {
self.pos
}
pub fn set_pos(&mut self, new_pos: usize) {
self.pos = new_pos;
}
pub fn is_eof(&self) -> bool {
self.eof
}
pub fn checkpoint(&self) -> DecompressorCheckpoint {
DecompressorCheckpoint {
pos: self.pos,
control_bytes: self.control_bytes,
control_idx: self.control_idx,
eof: self.eof,
}
}
pub fn restore(&mut self, cp: DecompressorCheckpoint) {
self.pos = cp.pos;
self.control_bytes = cp.control_bytes;
self.control_idx = cp.control_idx;
self.eof = cp.eof;
}
#[cfg(test)]
pub fn decompress_row(
&mut self,
input: &[u8],
slots_per_row: usize,
slots: &mut Vec<SlotValue>,
) -> Result<()> {
slots.clear();
if self.eof {
return Ok(());
}
while slots.len() < slots_per_row {
if self.control_idx >= 8 {
if self.pos + 8 > input.len() {
return Ok(());
}
self.control_bytes
.copy_from_slice(&input[self.pos..self.pos + 8]);
self.pos += 8;
self.control_idx = 0;
}
let code = self.control_bytes[self.control_idx];
self.control_idx += 1;
match code {
1..=251 => {
let value = (code as f64) - self.bias;
slots.push(SlotValue::Numeric(value));
}
COMPRESS_SKIP => {
continue;
}
COMPRESS_RAW_FOLLOWS => {
if self.pos + 8 > input.len() {
return Err(truncated_err(self.pos + 8, input.len()));
}
let mut raw = [0u8; 8];
raw.copy_from_slice(&input[self.pos..self.pos + 8]);
self.pos += 8;
slots.push(SlotValue::Raw(raw));
}
COMPRESS_EIGHT_SPACES => {
slots.push(SlotValue::Spaces);
}
COMPRESS_SYSMIS => {
slots.push(SlotValue::Sysmis);
}
COMPRESS_END_OF_FILE => {
self.eof = true;
return Ok(());
}
}
}
Ok(())
}
pub fn decompress_row_raw(
&mut self,
input: &[u8],
slots_per_row: usize,
output: &mut [u8],
out_offset: usize,
) -> Result<bool> {
if self.eof {
return Ok(false);
}
debug_assert!(
out_offset + slots_per_row * 8 <= output.len(),
"output buffer too small: need {} bytes at offset {}, have {}",
slots_per_row * 8,
out_offset,
output.len()
);
let mut slot = 0;
while slot < slots_per_row {
if self.control_idx >= 8 {
if self.pos + 8 > input.len() {
return Ok(false);
}
self.control_bytes
.copy_from_slice(&input[self.pos..self.pos + 8]);
self.pos += 8;
self.control_idx = 0;
}
let code = self.control_bytes[self.control_idx];
self.control_idx += 1;
let dest_offset = out_offset + slot * 8;
match code {
1..=251 => {
unsafe {
std::ptr::copy_nonoverlapping(
self.bias_lut[code as usize].as_ptr(),
output.as_mut_ptr().add(dest_offset),
8,
);
}
slot += 1;
}
COMPRESS_SKIP => {
continue;
}
COMPRESS_RAW_FOLLOWS => {
if self.pos + 8 > input.len() {
return Err(truncated_err(self.pos + 8, input.len()));
}
unsafe {
std::ptr::copy_nonoverlapping(
input.as_ptr().add(self.pos),
output.as_mut_ptr().add(dest_offset),
8,
);
}
self.pos += 8;
slot += 1;
}
COMPRESS_EIGHT_SPACES => {
unsafe {
std::ptr::copy_nonoverlapping(
SPACES_RAW.as_ptr(),
output.as_mut_ptr().add(dest_offset),
8,
);
}
slot += 1;
}
COMPRESS_SYSMIS => {
unsafe {
std::ptr::copy_nonoverlapping(
SYSMIS_RAW.as_ptr(),
output.as_mut_ptr().add(dest_offset),
8,
);
}
slot += 1;
}
COMPRESS_END_OF_FILE => {
self.eof = true;
return Ok(false);
}
}
}
Ok(true)
}
}
#[cold]
fn truncated_err(expected: usize, actual: usize) -> SpssError {
SpssError::TruncatedFile { expected, actual }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_numeric_bias_codes() {
let mut decompressor = BytecodeDecompressor::new(100.0);
let mut slots = Vec::with_capacity(8);
let input: Vec<u8> = vec![101, 102, 0, 0, 0, 0, 0, 0];
decompressor.decompress_row(&input, 2, &mut slots).unwrap();
assert_eq!(slots.len(), 2);
match slots[0] {
SlotValue::Numeric(v) => assert!((v - 1.0).abs() < f64::EPSILON),
_ => panic!("expected Numeric"),
}
match slots[1] {
SlotValue::Numeric(v) => assert!((v - 2.0).abs() < f64::EPSILON),
_ => panic!("expected Numeric"),
}
}
#[test]
fn test_sysmis_and_spaces() {
let mut decompressor = BytecodeDecompressor::new(100.0);
let mut slots = Vec::with_capacity(8);
let input: Vec<u8> = vec![255, 254, 0, 0, 0, 0, 0, 0];
decompressor.decompress_row(&input, 2, &mut slots).unwrap();
assert!(matches!(slots[0], SlotValue::Sysmis));
assert!(matches!(slots[1], SlotValue::Spaces));
}
#[test]
fn test_raw_follows() {
let mut decompressor = BytecodeDecompressor::new(100.0);
let mut slots = Vec::with_capacity(8);
let mut input = Vec::new();
input.extend_from_slice(&[253, 0, 0, 0, 0, 0, 0, 0]);
input.extend_from_slice(&3.14_f64.to_le_bytes());
decompressor.decompress_row(&input, 1, &mut slots).unwrap();
assert_eq!(slots.len(), 1);
match slots[0] {
SlotValue::Raw(bytes) => {
let val = f64::from_le_bytes(bytes);
assert!((val - 3.14).abs() < 1e-10);
}
_ => panic!("expected Raw"),
}
}
#[test]
fn test_cross_block_rows() {
let mut decompressor = BytecodeDecompressor::new(100.0);
let mut slots = Vec::with_capacity(8);
let input: Vec<u8> = vec![101, 102, 103, 104, 105, 106, 0, 0];
decompressor.decompress_row(&input, 3, &mut slots).unwrap();
assert_eq!(slots.len(), 3);
match slots[0] {
SlotValue::Numeric(v) => assert!((v - 1.0).abs() < f64::EPSILON),
_ => panic!("expected 1.0"),
}
decompressor.decompress_row(&input, 3, &mut slots).unwrap();
assert_eq!(slots.len(), 3);
match slots[0] {
SlotValue::Numeric(v) => assert!((v - 4.0).abs() < f64::EPSILON),
_ => panic!("expected 4.0"),
}
}
}