Skip to main content

nodedb_codec/fastlanes/
block.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Block-level encode and decode for the FastLanes FOR + bit-packing codec.
4
5use super::bits::{pack_bits, unpack_bits};
6use crate::error::CodecError;
7
8/// Per-block header: 2 bytes count + 1 byte bit_width + 8 bytes min_value.
9pub(super) const BLOCK_HEADER_SIZE: usize = 11;
10
11/// Encode a single block (up to 1024 values).
12pub(super) fn encode_block(values: &[i64], out: &mut Vec<u8>) {
13    let count = values.len() as u16;
14
15    // Find min/max for FOR.
16    let mut min_val = values[0];
17    let mut max_val = values[0];
18    for &v in &values[1..] {
19        if v < min_val {
20            min_val = v;
21        }
22        if v > max_val {
23            max_val = v;
24        }
25    }
26
27    // Compute residuals and bit width.
28    let range = (max_val as u128).wrapping_sub(min_val as u128) as u64;
29    let bit_width = if range == 0 {
30        0u8
31    } else {
32        64 - range.leading_zeros() as u8
33    };
34
35    // Block header.
36    out.extend_from_slice(&count.to_le_bytes());
37    out.push(bit_width);
38    out.extend_from_slice(&min_val.to_le_bytes());
39
40    if bit_width == 0 {
41        // All values identical — no packed data needed.
42        return;
43    }
44
45    // Bit-pack residuals.
46    // This loop is structured for auto-vectorization: simple operations on
47    // contiguous arrays, no branches in the inner loop, predictable access.
48    let packed_bytes = (count as usize * bit_width as usize).div_ceil(8);
49    let pack_start = out.len();
50    out.resize(pack_start + packed_bytes, 0);
51    let packed = &mut out[pack_start..];
52
53    let bw = bit_width as u64;
54    let mask = if bw == 64 { u64::MAX } else { (1u64 << bw) - 1 };
55
56    // Pack values into the byte array, bit by bit.
57    let mut bit_offset: usize = 0;
58    for &val in values {
59        let residual = (val.wrapping_sub(min_val) as u64) & mask;
60        pack_bits(packed, bit_offset, residual, bit_width);
61        bit_offset += bit_width as usize;
62    }
63}
64
65/// Decode a single block from the byte stream.
66///
67/// Returns the new offset after this block.
68pub(super) fn decode_block(
69    data: &[u8],
70    offset: usize,
71    values: &mut Vec<i64>,
72    block_idx: usize,
73) -> Result<usize, CodecError> {
74    if offset + BLOCK_HEADER_SIZE > data.len() {
75        return Err(CodecError::Truncated {
76            expected: offset + BLOCK_HEADER_SIZE,
77            actual: data.len(),
78        });
79    }
80
81    let count = u16::from_le_bytes([data[offset], data[offset + 1]]) as usize;
82    let bit_width = data[offset + 2];
83    let min_val = i64::from_le_bytes([
84        data[offset + 3],
85        data[offset + 4],
86        data[offset + 5],
87        data[offset + 6],
88        data[offset + 7],
89        data[offset + 8],
90        data[offset + 9],
91        data[offset + 10],
92    ]);
93
94    let mut pos = offset + BLOCK_HEADER_SIZE;
95
96    if bit_width == 0 {
97        // All values are min_val.
98        values.extend(std::iter::repeat_n(min_val, count));
99        return Ok(pos);
100    }
101
102    if bit_width > 64 {
103        return Err(CodecError::Corrupt {
104            detail: format!("block {block_idx}: invalid bit_width {bit_width}"),
105        });
106    }
107
108    let packed_bytes = (count * bit_width as usize).div_ceil(8);
109    if pos + packed_bytes > data.len() {
110        return Err(CodecError::Truncated {
111            expected: pos + packed_bytes,
112            actual: data.len(),
113        });
114    }
115
116    let packed = &data[pos..pos + packed_bytes];
117    let mask: u64 = if bit_width == 64 {
118        u64::MAX
119    } else {
120        (1u64 << bit_width) - 1
121    };
122
123    // Unpack residuals and add min_val.
124    let mut bit_offset: usize = 0;
125    for _ in 0..count {
126        let residual = unpack_bits(packed, bit_offset, bit_width) & mask;
127        values.push(min_val.wrapping_add(residual as i64));
128        bit_offset += bit_width as usize;
129    }
130
131    pos += packed_bytes;
132    Ok(pos)
133}
134
135/// Skip a block without decoding, returning the next byte offset.
136pub(super) fn skip_block(
137    data: &[u8],
138    offset: usize,
139    block_idx: usize,
140) -> Result<usize, CodecError> {
141    if offset + BLOCK_HEADER_SIZE > data.len() {
142        return Err(CodecError::Truncated {
143            expected: offset + BLOCK_HEADER_SIZE,
144            actual: data.len(),
145        });
146    }
147    let count = u16::from_le_bytes([data[offset], data[offset + 1]]) as usize;
148    let bit_width = data[offset + 2];
149    if bit_width > 64 {
150        return Err(CodecError::Corrupt {
151            detail: format!("block {block_idx}: invalid bit_width {bit_width}"),
152        });
153    }
154    let packed_bytes = if bit_width == 0 {
155        0
156    } else {
157        (count * bit_width as usize).div_ceil(8)
158    };
159    Ok(offset + BLOCK_HEADER_SIZE + packed_bytes)
160}
161
162/// Compute the minimum number of bits needed to represent the range of values.
163///
164/// Useful for external callers that want to estimate compression ratio.
165pub fn bit_width_for_range(min: i64, max: i64) -> u8 {
166    let range = (max as u128).wrapping_sub(min as u128) as u64;
167    if range == 0 {
168        0
169    } else {
170        64 - range.leading_zeros() as u8
171    }
172}