summavy_common/
file_slice.rs

1use std::ops::{Deref, Range, RangeBounds};
2use std::sync::Arc;
3use std::{fmt, io};
4
5use async_trait::async_trait;
6use ownedbytes::{OwnedBytes, StableDeref};
7
8use crate::HasLen;
9
10/// Objects that represents files sections in tantivy.
11///
12/// By contract, whatever happens to the directory file, as long as a FileHandle
13/// is alive, the data associated with it cannot be altered or destroyed.
14///
15/// The underlying behavior is therefore specific to the `Directory` that
16/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file
17/// on the filesystem.
18
19#[async_trait]
20pub trait FileHandle: 'static + Send + Sync + HasLen + fmt::Debug {
21    /// Reads a slice of bytes.
22    ///
23    /// This method may panic if the range requested is invalid.
24    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes>;
25
26    #[doc(hidden)]
27    async fn read_bytes_async(&self, _byte_range: Range<usize>) -> io::Result<OwnedBytes> {
28        Err(io::Error::new(
29            io::ErrorKind::Unsupported,
30            "Async read is not supported.",
31        ))
32    }
33}
34
35#[async_trait]
36impl FileHandle for &'static [u8] {
37    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
38        let bytes = &self[range];
39        Ok(OwnedBytes::new(bytes))
40    }
41
42    async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
43        Ok(self.read_bytes(byte_range)?)
44    }
45}
46
47impl<B> From<B> for FileSlice
48where B: StableDeref + Deref<Target = [u8]> + 'static + Send + Sync
49{
50    fn from(bytes: B) -> FileSlice {
51        FileSlice::new(Arc::new(OwnedBytes::new(bytes)))
52    }
53}
54
55/// Logical slice of read only file in tantivy.
56///
57/// It can be cloned and sliced cheaply.
58#[derive(Clone)]
59pub struct FileSlice {
60    data: Arc<dyn FileHandle>,
61    range: Range<usize>,
62}
63
64impl fmt::Debug for FileSlice {
65    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
66        write!(f, "FileSlice({:?}, {:?})", &self.data, self.range)
67    }
68}
69
70/// Takes a range, a `RangeBounds` object, and returns
71/// a `Range` that corresponds to the relative application of the
72/// `RangeBounds` object to the original `Range`.
73///
74/// For instance, combine_ranges(`[2..11)`, `[5..7]`) returns `[7..10]`
75/// as it reads, what is the sub-range that starts at the 5 element of
76/// `[2..11)` and ends at the 9th element included.
77///
78/// This function panics, if the result would suggest something outside
79/// of the bounds of the original range.
80fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_range: R) -> Range<usize> {
81    let start: usize = orig_range.start
82        + match rel_range.start_bound().cloned() {
83            std::ops::Bound::Included(rel_start) => rel_start,
84            std::ops::Bound::Excluded(rel_start) => rel_start + 1,
85            std::ops::Bound::Unbounded => 0,
86        };
87    assert!(start <= orig_range.end);
88    let end: usize = match rel_range.end_bound().cloned() {
89        std::ops::Bound::Included(rel_end) => orig_range.start + rel_end + 1,
90        std::ops::Bound::Excluded(rel_end) => orig_range.start + rel_end,
91        std::ops::Bound::Unbounded => orig_range.end,
92    };
93    assert!(end >= start);
94    assert!(end <= orig_range.end);
95    start..end
96}
97
98impl FileSlice {
99    /// Wraps a FileHandle.
100    pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
101        let num_bytes = file_handle.len();
102        FileSlice::new_with_num_bytes(file_handle, num_bytes)
103    }
104
105    /// Wraps a FileHandle.
106    #[doc(hidden)]
107    #[must_use]
108    pub fn new_with_num_bytes(file_handle: Arc<dyn FileHandle>, num_bytes: usize) -> Self {
109        FileSlice {
110            data: file_handle,
111            range: 0..num_bytes,
112        }
113    }
114
115    /// Creates a fileslice that is just a view over a slice of the data.
116    ///
117    /// # Panics
118    ///
119    /// Panics if `byte_range.end` exceeds the filesize.
120    #[must_use]
121    #[inline]
122    pub fn slice<R: RangeBounds<usize>>(&self, byte_range: R) -> FileSlice {
123        FileSlice {
124            data: self.data.clone(),
125            range: combine_ranges(self.range.clone(), byte_range),
126        }
127    }
128
129    /// Creates an empty FileSlice
130    pub fn empty() -> FileSlice {
131        const EMPTY_SLICE: &[u8] = &[];
132        FileSlice::from(EMPTY_SLICE)
133    }
134
135    /// Returns a `OwnedBytes` with all of the data in the `FileSlice`.
136    ///
137    /// The behavior is strongly dependent on the implementation of the underlying
138    /// `Directory` and the `FileSliceTrait` it creates.
139    /// In particular, it is  up to the `Directory` implementation
140    /// to handle caching if needed.
141    pub fn read_bytes(&self) -> io::Result<OwnedBytes> {
142        self.data.read_bytes(self.range.clone())
143    }
144
145    #[doc(hidden)]
146    pub async fn read_bytes_async(&self) -> io::Result<OwnedBytes> {
147        self.data.read_bytes_async(self.range.clone()).await
148    }
149
150    /// Reads a specific slice of data.
151    ///
152    /// This is equivalent to running `file_slice.slice(from, to).read_bytes()`.
153    pub fn read_bytes_slice(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
154        assert!(
155            range.end <= self.len(),
156            "end of requested range exceeds the fileslice length ({} > {})",
157            range.end,
158            self.len()
159        );
160        self.data
161            .read_bytes(self.range.start + range.start..self.range.start + range.end)
162    }
163
164    #[doc(hidden)]
165    pub async fn read_bytes_slice_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
166        assert!(
167            self.range.start + byte_range.end <= self.range.end,
168            "`to` exceeds the fileslice length"
169        );
170        self.data
171            .read_bytes_async(
172                self.range.start + byte_range.start..self.range.start + byte_range.end,
173            )
174            .await
175    }
176
177    /// Splits the FileSlice at the given offset and return two file slices.
178    /// `file_slice[..split_offset]` and `file_slice[split_offset..]`.
179    ///
180    /// This operation is cheap and must not copy any underlying data.
181    pub fn split(self, left_len: usize) -> (FileSlice, FileSlice) {
182        let left = self.slice_to(left_len);
183        let right = self.slice_from(left_len);
184        (left, right)
185    }
186
187    /// Splits the file slice at the given offset and return two file slices.
188    /// `file_slice[..split_offset]` and `file_slice[split_offset..]`.
189    pub fn split_from_end(self, right_len: usize) -> (FileSlice, FileSlice) {
190        let left_len = self.len() - right_len;
191        self.split(left_len)
192    }
193
194    /// Like `.slice(...)` but enforcing only the `from`
195    /// boundary.
196    ///
197    /// Equivalent to `.slice(from_offset, self.len())`
198    #[must_use]
199    pub fn slice_from(&self, from_offset: usize) -> FileSlice {
200        self.slice(from_offset..self.len())
201    }
202
203    /// Returns a slice from the end.
204    ///
205    /// Equivalent to `.slice(self.len() - from_offset, self.len())`
206    #[must_use]
207    pub fn slice_from_end(&self, from_offset: usize) -> FileSlice {
208        self.slice(self.len() - from_offset..self.len())
209    }
210
211    /// Like `.slice(...)` but enforcing only the `to`
212    /// boundary.
213    ///
214    /// Equivalent to `.slice(0, to_offset)`
215    #[must_use]
216    pub fn slice_to(&self, to_offset: usize) -> FileSlice {
217        self.slice(0..to_offset)
218    }
219}
220
221#[async_trait]
222impl FileHandle for FileSlice {
223    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
224        self.read_bytes_slice(range)
225    }
226
227    async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
228        self.read_bytes_slice_async(byte_range).await
229    }
230}
231
232impl HasLen for FileSlice {
233    fn len(&self) -> usize {
234        self.range.len()
235    }
236}
237
238#[async_trait]
239impl FileHandle for OwnedBytes {
240    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
241        Ok(self.slice(range))
242    }
243
244    async fn read_bytes_async(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
245        self.read_bytes(range)
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    use std::io;
252    use std::ops::Bound;
253    use std::sync::Arc;
254
255    use super::{FileHandle, FileSlice};
256    use crate::file_slice::combine_ranges;
257    use crate::HasLen;
258
259    #[test]
260    fn test_file_slice() -> io::Result<()> {
261        let file_slice = FileSlice::new(Arc::new(b"abcdef".as_ref()));
262        assert_eq!(file_slice.len(), 6);
263        assert_eq!(file_slice.slice_from(2).read_bytes()?.as_slice(), b"cdef");
264        assert_eq!(file_slice.slice_to(2).read_bytes()?.as_slice(), b"ab");
265        assert_eq!(
266            file_slice
267                .slice_from(1)
268                .slice_to(2)
269                .read_bytes()?
270                .as_slice(),
271            b"bc"
272        );
273        {
274            let (left, right) = file_slice.clone().split(0);
275            assert_eq!(left.read_bytes()?.as_slice(), b"");
276            assert_eq!(right.read_bytes()?.as_slice(), b"abcdef");
277        }
278        {
279            let (left, right) = file_slice.clone().split(2);
280            assert_eq!(left.read_bytes()?.as_slice(), b"ab");
281            assert_eq!(right.read_bytes()?.as_slice(), b"cdef");
282        }
283        {
284            let (left, right) = file_slice.clone().split_from_end(0);
285            assert_eq!(left.read_bytes()?.as_slice(), b"abcdef");
286            assert_eq!(right.read_bytes()?.as_slice(), b"");
287        }
288        {
289            let (left, right) = file_slice.split_from_end(2);
290            assert_eq!(left.read_bytes()?.as_slice(), b"abcd");
291            assert_eq!(right.read_bytes()?.as_slice(), b"ef");
292        }
293        Ok(())
294    }
295
296    #[test]
297    fn test_file_slice_trait_slice_len() {
298        let blop: &'static [u8] = b"abc";
299        let owned_bytes: Box<dyn FileHandle> = Box::new(blop);
300        assert_eq!(owned_bytes.len(), 3);
301    }
302
303    #[test]
304    fn test_slice_simple_read() -> io::Result<()> {
305        let slice = FileSlice::new(Arc::new(&b"abcdef"[..]));
306        assert_eq!(slice.len(), 6);
307        assert_eq!(slice.read_bytes()?.as_ref(), b"abcdef");
308        assert_eq!(slice.slice(1..4).read_bytes()?.as_ref(), b"bcd");
309        Ok(())
310    }
311
312    #[test]
313    fn test_slice_read_slice() -> io::Result<()> {
314        let slice_deref = FileSlice::new(Arc::new(&b"abcdef"[..]));
315        assert_eq!(slice_deref.read_bytes_slice(1..4)?.as_ref(), b"bcd");
316        Ok(())
317    }
318
319    #[test]
320    #[should_panic(expected = "end of requested range exceeds the fileslice length (10 > 6)")]
321    fn test_slice_read_slice_invalid_range_exceeds() {
322        let slice_deref = FileSlice::new(Arc::new(&b"abcdef"[..]));
323        assert_eq!(
324            slice_deref.read_bytes_slice(0..10).unwrap().as_ref(),
325            b"bcd"
326        );
327    }
328
329    #[test]
330    fn test_combine_range() {
331        assert_eq!(combine_ranges(1..3, 0..1), 1..2);
332        assert_eq!(combine_ranges(1..3, 1..), 2..3);
333        assert_eq!(combine_ranges(1..4, ..2), 1..3);
334        assert_eq!(combine_ranges(3..10, 2..5), 5..8);
335        assert_eq!(combine_ranges(2..11, 5..=7), 7..10);
336        assert_eq!(
337            combine_ranges(2..11, (Bound::Excluded(5), Bound::Unbounded)),
338            8..11
339        );
340    }
341
342    #[test]
343    #[should_panic]
344    fn test_combine_range_panics() {
345        let _ = combine_ranges(3..5, 1..4);
346    }
347}