lance_encoding/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::ops::Range;
5
6use bytes::Bytes;
7use futures::{future::BoxFuture, FutureExt, TryFutureExt};
8
9use lance_core::Result;
10
11pub mod buffer;
12pub mod compression;
13pub mod compression_algo;
14pub mod compression_config;
15pub mod constants;
16pub mod data;
17pub mod decoder;
18pub mod encoder;
19pub mod encodings;
20pub mod format;
21pub mod previous;
22pub mod repdef;
23pub mod statistics;
24#[cfg(test)]
25pub mod testing;
26pub mod utils;
27pub mod version;
28
29// We can definitely add support for big-endian machines someday.  However, it's not a priority and
30// would involve extensive testing (probably through emulation) to ensure that the encodings are
31// correct.
32#[cfg(not(target_endian = "little"))]
33compile_error!("Lance encodings only support little-endian systems.");
34
35/// A trait for an I/O service
36///
37/// This represents the I/O API that the encoders and decoders need in order to operate.
38/// We specify this as a trait so that lance-encodings does not need to depend on lance-io
39///
40/// In general, it is assumed that this trait will be implemented by some kind of "file reader"
41/// or "file scheduler".  The encodings here are all limited to accessing a single file.
42pub trait EncodingsIo: std::fmt::Debug + Send + Sync {
43    /// Submit an I/O request
44    ///
45    /// The response must contain a `Bytes` object for each range requested even if the underlying
46    /// I/O was coalesced into fewer actual requests.
47    ///
48    /// # Arguments
49    ///
50    /// * `ranges` - the byte ranges to request
51    /// * `priority` - the priority of the request
52    ///
53    /// Priority should be set to the lowest row number that this request is delivering data for.
54    /// This is important in cases where indirect I/O causes high priority requests to be submitted
55    /// after low priority requests.  We want to fulfill the indirect I/O more quickly so that we
56    /// can decode as quickly as possible.
57    ///
58    /// The implementation should be able to handle empty ranges, and should return an empty
59    /// byte buffer for each empty range.
60    fn submit_request(
61        &self,
62        range: Vec<Range<u64>>,
63        priority: u64,
64    ) -> BoxFuture<'static, Result<Vec<Bytes>>>;
65
66    /// Submit an I/O request with a single range
67    ///
68    /// This is just a utitliy function that wraps [`EncodingsIo::submit_request`] for the common
69    /// case of a single range request.
70    fn submit_single(
71        &self,
72        range: std::ops::Range<u64>,
73        priority: u64,
74    ) -> BoxFuture<'static, lance_core::Result<bytes::Bytes>> {
75        self.submit_request(vec![range], priority)
76            .map_ok(|mut v| v.pop().unwrap())
77            .boxed()
78    }
79}
80
81/// An implementation of EncodingsIo that serves data from an in-memory buffer
82#[derive(Debug)]
83pub struct BufferScheduler {
84    data: Bytes,
85}
86
87impl BufferScheduler {
88    pub fn new(data: Bytes) -> Self {
89        Self { data }
90    }
91
92    fn satisfy_request(&self, req: Range<u64>) -> Bytes {
93        self.data.slice(req.start as usize..req.end as usize)
94    }
95}
96
97impl EncodingsIo for BufferScheduler {
98    fn submit_request(
99        &self,
100        ranges: Vec<Range<u64>>,
101        _priority: u64,
102    ) -> BoxFuture<'static, Result<Vec<Bytes>>> {
103        std::future::ready(Ok(ranges
104            .into_iter()
105            .map(|range| self.satisfy_request(range))
106            .collect::<Vec<_>>()))
107        .boxed()
108    }
109}