lance_encoding/previous/encodings/physical/
block.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use arrow_schema::DataType;
5
6use crate::{
7    data::{BlockInfo, DataBlock, OpaqueBlock},
8    encodings::physical::block::{CompressedBufferEncoder, CompressionConfig, CompressionScheme},
9    format::ProtobufUtils,
10    previous::encoder::{ArrayEncoder, EncodedArray},
11};
12
13use lance_core::Result;
14
15impl ArrayEncoder for CompressedBufferEncoder {
16    fn encode(
17        &self,
18        data: DataBlock,
19        _data_type: &DataType,
20        buffer_index: &mut u32,
21    ) -> Result<EncodedArray> {
22        let uncompressed_data = data.as_fixed_width().unwrap();
23
24        let mut compressed_buf = Vec::with_capacity(uncompressed_data.data.len());
25        self.compressor
26            .compress(&uncompressed_data.data, &mut compressed_buf)?;
27
28        let compressed_data = DataBlock::Opaque(OpaqueBlock {
29            buffers: vec![compressed_buf.into()],
30            num_values: uncompressed_data.num_values,
31            block_info: BlockInfo::new(),
32        });
33
34        let comp_buf_index = *buffer_index;
35        *buffer_index += 1;
36
37        let encoding = ProtobufUtils::flat_encoding(
38            uncompressed_data.bits_per_value,
39            comp_buf_index,
40            Some(CompressionConfig::new(CompressionScheme::Zstd, None)),
41        );
42
43        Ok(EncodedArray {
44            data: compressed_data,
45            encoding,
46        })
47    }
48}
49
50#[cfg(test)]
51mod tests {
52    use crate::{buffer::LanceBuffer, data::FixedWidthDataBlock};
53
54    use super::*;
55
56    #[test]
57    fn test_compressed_buffer_encoder() {
58        let encoder = CompressedBufferEncoder::default();
59        let data = DataBlock::FixedWidth(FixedWidthDataBlock {
60            bits_per_value: 64,
61            data: LanceBuffer::reinterpret_vec(vec![0, 1, 2, 3, 4, 5, 6, 7]),
62            num_values: 8,
63            block_info: BlockInfo::new(),
64        });
65
66        let mut buffer_index = 0;
67        let encoded_array_result = encoder.encode(data, &DataType::Int64, &mut buffer_index);
68        assert!(encoded_array_result.is_ok(), "{:?}", encoded_array_result);
69        let encoded_array = encoded_array_result.unwrap();
70        assert_eq!(encoded_array.data.num_values(), 8);
71        let buffers = encoded_array.data.into_buffers();
72        assert_eq!(buffers.len(), 1);
73        assert!(buffers[0].len() < 64 * 8);
74    }
75}