lance_encoding/encodings/logical/primitive/
fullzip.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Routines for encoding and decoding full-zip data
5//!
6//! Full-zip is one of the two structural encodings in Lance 2.1.
7//! In this approach the various compressed buffers are zipped
8//! together so that all parts of a value are stored contiguously in memory.
9//!
10//! This requires transparent compression and is most suitable for
11//! large data types.
12
13use crate::{
14    data::{DataBlock, FixedWidthDataBlock, VariableWidthBlock},
15    format::pb21::CompressiveEncoding,
16};
17
18use lance_core::Result;
19
20/// Per-value compression must either:
21///
22/// A single buffer of fixed-width values
23/// A single buffer of value data and a buffer of offsets
24///
25/// TODO: In the future we may allow metadata buffers
26#[derive(Debug)]
27pub enum PerValueDataBlock {
28    Fixed(FixedWidthDataBlock),
29    Variable(VariableWidthBlock),
30}
31
32impl PerValueDataBlock {
33    pub fn data_size(&self) -> u64 {
34        match self {
35            Self::Fixed(fixed) => fixed.data_size(),
36            Self::Variable(variable) => variable.data_size(),
37        }
38    }
39}
40
41/// Trait for compression algorithms that are suitable for use in the zipped structural encoding
42///
43/// This compression must return either a FixedWidthDataBlock or a VariableWidthBlock.  This is because
44/// we need to zip the data and those are the only two blocks we know how to zip today.
45///
46/// In addition, the compressed data must be able to be decompressed in a random-access fashion.
47/// This means that the decompression algorithm must be able to decompress any value without
48/// decompressing all values before it.
49pub trait PerValueCompressor: std::fmt::Debug + Send + Sync {
50    /// Compress the data into a single buffer
51    ///
52    /// Also returns a description of the compression that can be used to decompress when reading the data back
53    fn compress(&self, data: DataBlock) -> Result<(PerValueDataBlock, CompressiveEncoding)>;
54}