Skip to main content

symbolic_common/
byteview.rs

1//! A wrapper type providing direct memory access to binary data.
2//!
3//! See the [`ByteView`] struct for more documentation.
4//!
5//! [`ByteView`]: struct.ByteView.html
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io;
10use std::ops::Deref;
11use std::path::Path;
12use std::sync::Arc;
13
14#[cfg(not(target_arch = "wasm32"))]
15use memmap2::Mmap;
16
17use crate::cell::StableDeref;
18
19/// The owner of data behind a ByteView.
20///
21/// This can either be an mmapped file, an owned buffer or a borrowed binary slice.
22#[derive(Debug)]
23enum ByteViewBacking<'a> {
24    Buf(Cow<'a, [u8]>),
25    #[cfg(not(target_arch = "wasm32"))]
26    Mmap(Mmap),
27}
28
29impl Deref for ByteViewBacking<'_> {
30    type Target = [u8];
31
32    fn deref(&self) -> &Self::Target {
33        match *self {
34            ByteViewBacking::Buf(ref buf) => buf,
35            #[cfg(not(target_arch = "wasm32"))]
36            ByteViewBacking::Mmap(ref mmap) => mmap,
37        }
38    }
39}
40
41/// A smart pointer for byte data.
42///
43/// This type can be used to uniformly access bytes that were created either from mmapping in a
44/// path, a vector or a borrowed slice. A `ByteView` dereferences into a `&[u8]` and guarantees
45/// random access to the underlying buffer or file.
46///
47/// A `ByteView` can be constructed from borrowed slices, vectors or memory mapped from the file
48/// system directly.
49///
50/// # Example
51///
52/// The most common way to use `ByteView` is to construct it from a file handle. This will own the
53/// underlying file handle until the `ByteView` is dropped:
54///
55/// ```
56/// use std::io::Write;
57/// use symbolic_common::ByteView;
58///
59/// fn main() -> Result<(), std::io::Error> {
60///     let mut file = tempfile::tempfile()?;
61///     file.write_all(b"1234");
62///
63///     let view = ByteView::map_file(file)?;
64///     assert_eq!(view.as_slice(), b"1234");
65///     Ok(())
66/// }
67/// ```
68#[derive(Clone, Debug)]
69pub struct ByteView<'a> {
70    backing: Arc<ByteViewBacking<'a>>,
71}
72
73impl<'a> ByteView<'a> {
74    fn with_backing(backing: ByteViewBacking<'a>) -> Self {
75        ByteView {
76            backing: Arc::new(backing),
77        }
78    }
79
80    /// Constructs a `ByteView` from a `Cow`.
81    ///
82    /// # Example
83    ///
84    /// ```
85    /// use std::borrow::Cow;
86    /// use symbolic_common::ByteView;
87    ///
88    /// let cow = Cow::Borrowed(&b"1234"[..]);
89    /// let view = ByteView::from_cow(cow);
90    /// ```
91    pub fn from_cow(cow: Cow<'a, [u8]>) -> Self {
92        ByteView::with_backing(ByteViewBacking::Buf(cow))
93    }
94
95    /// Constructs a `ByteView` from a byte slice.
96    ///
97    /// # Example
98    ///
99    /// ```
100    /// use symbolic_common::ByteView;
101    ///
102    /// let view = ByteView::from_slice(b"1234");
103    /// ```
104    pub fn from_slice(buffer: &'a [u8]) -> Self {
105        ByteView::from_cow(Cow::Borrowed(buffer))
106    }
107
108    /// Constructs a `ByteView` from a vector of bytes.
109    ///
110    /// # Example
111    ///
112    /// ```
113    /// use symbolic_common::ByteView;
114    ///
115    /// let vec = b"1234".to_vec();
116    /// let view = ByteView::from_vec(vec);
117    /// ```
118    pub fn from_vec(buffer: Vec<u8>) -> Self {
119        ByteView::from_cow(Cow::Owned(buffer))
120    }
121
122    /// Constructs a `ByteView` from an open file handle by memory mapping the file.
123    ///
124    /// See [`ByteView::map_file_ref`] for a non-consuming version of this constructor.
125    ///
126    /// # Example
127    ///
128    /// ```
129    /// use std::io::Write;
130    /// use symbolic_common::ByteView;
131    ///
132    /// fn main() -> Result<(), std::io::Error> {
133    ///     let mut file = tempfile::tempfile()?;
134    ///     let view = ByteView::map_file(file)?;
135    ///     Ok(())
136    /// }
137    /// ```
138    #[cfg(not(target_arch = "wasm32"))]
139    pub fn map_file(file: File) -> Result<Self, io::Error> {
140        Self::map_file_ref(&file)
141    }
142
143    /// Constructs a `ByteView` from an open file handle by memory mapping the file.
144    ///
145    /// The main difference with [`ByteView::map_file`] is that this takes the [`File`] by
146    /// reference rather than consuming it.
147    ///
148    /// # Example
149    ///
150    /// ```
151    /// use std::io::Write;
152    /// use symbolic_common::ByteView;
153    ///
154    /// fn main() -> Result<(), std::io::Error> {
155    ///     let mut file = tempfile::tempfile()?;
156    ///     let view = ByteView::map_file_ref(&file)?;
157    ///     Ok(())
158    /// }
159    /// ```
160    #[cfg(not(target_arch = "wasm32"))]
161    pub fn map_file_ref(file: &File) -> Result<Self, io::Error> {
162        let backing = match unsafe { Mmap::map(file) } {
163            Ok(mmap) => ByteViewBacking::Mmap(mmap),
164            Err(err) => {
165                // this is raised on empty mmaps which we want to ignore. The 1006 Windows error
166                // looks like "The volume for a file has been externally altered so that the opened
167                // file is no longer valid."
168                if err.kind() == io::ErrorKind::InvalidInput
169                    || (cfg!(windows) && err.raw_os_error() == Some(1006))
170                {
171                    ByteViewBacking::Buf(Cow::Borrowed(b""))
172                } else {
173                    return Err(err);
174                }
175            }
176        };
177
178        Ok(ByteView::with_backing(backing))
179    }
180
181    /// Constructs a `ByteView` from any `std::io::Reader`.
182    ///
183    /// **Note**: This currently consumes the entire reader and stores its data in an internal
184    /// buffer. Prefer [`open`] when reading from the file system or [`from_slice`] / [`from_vec`]
185    /// for in-memory operations. This behavior might change in the future.
186    ///
187    /// # Example
188    ///
189    /// ```
190    /// use std::io::Cursor;
191    /// use symbolic_common::ByteView;
192    ///
193    /// fn main() -> Result<(), std::io::Error> {
194    ///     let reader = Cursor::new(b"1234");
195    ///     let view = ByteView::read(reader)?;
196    ///     Ok(())
197    /// }
198    /// ```
199    ///
200    /// [`open`]: struct.ByteView.html#method.open
201    /// [`from_slice`]: struct.ByteView.html#method.from_slice
202    /// [`from_vec`]: struct.ByteView.html#method.from_vec
203    pub fn read<R: io::Read>(mut reader: R) -> Result<Self, io::Error> {
204        let mut buffer = vec![];
205        reader.read_to_end(&mut buffer)?;
206        Ok(ByteView::from_vec(buffer))
207    }
208
209    /// Constructs a `ByteView` from a file path by memory mapping the file.
210    ///
211    /// # Example
212    ///
213    /// ```no_run
214    /// use symbolic_common::ByteView;
215    ///
216    /// fn main() -> Result<(), std::io::Error> {
217    ///     let view = ByteView::open("test.txt")?;
218    ///     Ok(())
219    /// }
220    /// ```
221    #[cfg(not(target_arch = "wasm32"))]
222    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
223        let file = File::open(path)?;
224        Self::map_file(file)
225    }
226
227    /// Constructs a `ByteView` from a file path by reading the entire file.
228    ///
229    /// On `wasm32` targets there is no `mmap`, so the file is read into an
230    /// owned buffer instead.
231    #[cfg(target_arch = "wasm32")]
232    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
233        let file = File::open(path)?;
234        Self::read(file)
235    }
236
237    /// Returns a slice of the underlying data.
238    ///
239    ///
240    /// # Example
241    ///
242    /// ```
243    /// use symbolic_common::ByteView;
244    ///
245    /// let view = ByteView::from_slice(b"1234");
246    /// let data = view.as_slice();
247    /// ```
248    #[inline(always)]
249    pub fn as_slice(&self) -> &[u8] {
250        self.backing.deref()
251    }
252
253    /// Applies a [`AccessPattern`] hint to the backing storage.
254    ///
255    /// A hint can be applied when the predominantly access pattern
256    /// for this byte view is known.
257    ///
258    /// Applying the wrong hint may have significant effects on performance.
259    ///
260    /// Hints are applied on best effort basis, not all platforms
261    /// support the same hints, not all backing storages support
262    /// hints.
263    ///
264    /// # Example
265    ///
266    /// ```
267    /// use std::io::Write;
268    /// use symbolic_common::{ByteView, AccessPattern};
269    ///
270    /// fn main() -> Result<(), std::io::Error> {
271    ///     let mut file = tempfile::tempfile()?;
272    ///     let view = ByteView::map_file_ref(&file)?;
273    ///     let _ = view.hint(AccessPattern::Random);
274    ///     Ok(())
275    /// }
276    /// ```
277    pub fn hint(&self, hint: AccessPattern) -> Result<(), io::Error> {
278        let _hint = hint; // silence unused lint
279        match self.backing.deref() {
280            ByteViewBacking::Buf(_) => Ok(()),
281            #[cfg(unix)]
282            ByteViewBacking::Mmap(mmap) => mmap.advise(_hint.to_madvise()),
283            #[cfg(all(not(unix), not(target_arch = "wasm32")))]
284            ByteViewBacking::Mmap(_) => Ok(()),
285        }
286    }
287}
288
289impl AsRef<[u8]> for ByteView<'_> {
290    #[inline(always)]
291    fn as_ref(&self) -> &[u8] {
292        self.as_slice()
293    }
294}
295
296impl Deref for ByteView<'_> {
297    type Target = [u8];
298
299    #[inline(always)]
300    fn deref(&self) -> &Self::Target {
301        self.as_slice()
302    }
303}
304
305unsafe impl StableDeref for ByteView<'_> {}
306
307/// Values supported by [`ByteView::hint`].
308///
309/// This is largely an abstraction over [`madvise(2)`] and [`fadvise(2)`].
310///
311/// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html
312/// [`fadvise(2)`]: https://man7.org/linux/man-pages/man2/posix_fadvise.2.html
313#[derive(Debug, Default, Clone, Copy)]
314pub enum AccessPattern {
315    /// No special treatment.
316    ///
317    /// The operating system is in full control of the buffer,
318    /// a generally good default.
319    ///
320    /// This is the default.
321    #[default]
322    Normal,
323    /// Expect access to be random.
324    ///
325    /// Read ahead might be less useful than normally.
326    Random,
327    /// Expect access to be in sequential order, read ahead might be very useful.
328    /// After reading data there is a high chance it will not be accessed again
329    /// and can be aggressively freed.
330    Sequential,
331}
332
333impl AccessPattern {
334    #[cfg(unix)]
335    fn to_madvise(self) -> memmap2::Advice {
336        match self {
337            AccessPattern::Normal => memmap2::Advice::Normal,
338            AccessPattern::Random => memmap2::Advice::Random,
339            AccessPattern::Sequential => memmap2::Advice::Sequential,
340        }
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    use std::io::{Read, Seek, Write};
349
350    use similar_asserts::assert_eq;
351    use tempfile::NamedTempFile;
352
353    #[test]
354    fn test_open_empty_file() -> Result<(), std::io::Error> {
355        let tmp = NamedTempFile::new()?;
356
357        let view = ByteView::open(tmp.path())?;
358        assert_eq!(&*view, b"");
359
360        Ok(())
361    }
362
363    #[test]
364    fn test_open_file() -> Result<(), std::io::Error> {
365        let mut tmp = NamedTempFile::new()?;
366
367        tmp.write_all(b"1234")?;
368
369        let view = ByteView::open(tmp.path())?;
370        assert_eq!(&*view, b"1234");
371
372        Ok(())
373    }
374
375    #[test]
376    fn test_mmap_fd_reuse() -> Result<(), std::io::Error> {
377        let mut tmp = NamedTempFile::new()?;
378        tmp.write_all(b"1234")?;
379
380        let view = ByteView::map_file_ref(tmp.as_file())?;
381
382        // This deletes the file on disk.
383        let _path = tmp.path().to_path_buf();
384        let mut file = tmp.into_file();
385        #[cfg(not(windows))]
386        {
387            assert!(!_path.exists());
388        }
389
390        // Ensure we can still read from the the file after mmapping and deleting it on disk.
391        let mut buf = Vec::new();
392        file.rewind()?;
393        file.read_to_end(&mut buf)?;
394        assert_eq!(buf, b"1234");
395        drop(file);
396
397        // Ensure the byteview can still read the file as well.
398        assert_eq!(&*view, b"1234");
399
400        Ok(())
401    }
402}