lance_encoding/lib.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::ops::Range;
5
6use bytes::Bytes;
7use futures::{future::BoxFuture, FutureExt, TryFutureExt};
8
9use lance_core::Result;
10
11pub mod buffer;
12pub mod compression;
13pub mod compression_algo;
14pub mod data;
15pub mod decoder;
16pub mod encoder;
17pub mod encodings;
18pub mod format;
19pub mod repdef;
20pub mod statistics;
21#[cfg(test)]
22pub mod testing;
23pub mod utils;
24pub mod v2;
25pub mod version;
26
27// We can definitely add support for big-endian machines someday.  However, it's not a priority and
28// would involve extensive testing (probably through emulation) to ensure that the encodings are
29// correct.
30#[cfg(not(target_endian = "little"))]
31compile_error!("Lance encodings only support little-endian systems.");
32
33/// A trait for an I/O service
34///
35/// This represents the I/O API that the encoders and decoders need in order to operate.
36/// We specify this as a trait so that lance-encodings does not need to depend on lance-io
37///
38/// In general, it is assumed that this trait will be implemented by some kind of "file reader"
39/// or "file scheduler".  The encodings here are all limited to accessing a single file.
40pub trait EncodingsIo: std::fmt::Debug + Send + Sync {
41    /// Submit an I/O request
42    ///
43    /// The response must contain a `Bytes` object for each range requested even if the underlying
44    /// I/O was coalesced into fewer actual requests.
45    ///
46    /// # Arguments
47    ///
48    /// * `ranges` - the byte ranges to request
49    /// * `priority` - the priority of the request
50    ///
51    /// Priority should be set to the lowest row number that this request is delivering data for.
52    /// This is important in cases where indirect I/O causes high priority requests to be submitted
53    /// after low priority requests.  We want to fulfill the indirect I/O more quickly so that we
54    /// can decode as quickly as possible.
55    ///
56    /// The implementation should be able to handle empty ranges, and should return an empty
57    /// byte buffer for each empty range.
58    fn submit_request(
59        &self,
60        range: Vec<Range<u64>>,
61        priority: u64,
62    ) -> BoxFuture<'static, Result<Vec<Bytes>>>;
63
64    /// Submit an I/O request with a single range
65    ///
66    /// This is just a utitliy function that wraps [`EncodingsIo::submit_request`] for the common
67    /// case of a single range request.
68    fn submit_single(
69        &self,
70        range: std::ops::Range<u64>,
71        priority: u64,
72    ) -> BoxFuture<'static, lance_core::Result<bytes::Bytes>> {
73        self.submit_request(vec![range], priority)
74            .map_ok(|mut v| v.pop().unwrap())
75            .boxed()
76    }
77}
78
79/// An implementation of EncodingsIo that serves data from an in-memory buffer
80#[derive(Debug)]
81pub struct BufferScheduler {
82    data: Bytes,
83}
84
85impl BufferScheduler {
86    pub fn new(data: Bytes) -> Self {
87        Self { data }
88    }
89
90    fn satisfy_request(&self, req: Range<u64>) -> Bytes {
91        self.data.slice(req.start as usize..req.end as usize)
92    }
93}
94
95impl EncodingsIo for BufferScheduler {
96    fn submit_request(
97        &self,
98        ranges: Vec<Range<u64>>,
99        _priority: u64,
100    ) -> BoxFuture<'static, Result<Vec<Bytes>>> {
101        std::future::ready(Ok(ranges
102            .into_iter()
103            .map(|range| self.satisfy_request(range))
104            .collect::<Vec<_>>()))
105        .boxed()
106    }
107}