symbolic_common/byteview.rs
1//! A wrapper type providing direct memory access to binary data.
2//!
3//! See the [`ByteView`] struct for more documentation.
4//!
5//! [`ByteView`]: struct.ByteView.html
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io;
10use std::ops::Deref;
11use std::path::Path;
12use std::sync::Arc;
13
14#[cfg(not(target_arch = "wasm32"))]
15use memmap2::Mmap;
16
17use crate::cell::StableDeref;
18
19/// The owner of data behind a ByteView.
20///
21/// This can either be an mmapped file, an owned buffer or a borrowed binary slice.
22#[derive(Debug)]
23enum ByteViewBacking<'a> {
24 Buf(Cow<'a, [u8]>),
25 #[cfg(not(target_arch = "wasm32"))]
26 Mmap(Mmap),
27}
28
29impl Deref for ByteViewBacking<'_> {
30 type Target = [u8];
31
32 fn deref(&self) -> &Self::Target {
33 match *self {
34 ByteViewBacking::Buf(ref buf) => buf,
35 #[cfg(not(target_arch = "wasm32"))]
36 ByteViewBacking::Mmap(ref mmap) => mmap,
37 }
38 }
39}
40
41/// A smart pointer for byte data.
42///
43/// This type can be used to uniformly access bytes that were created either from mmapping in a
44/// path, a vector or a borrowed slice. A `ByteView` dereferences into a `&[u8]` and guarantees
45/// random access to the underlying buffer or file.
46///
47/// A `ByteView` can be constructed from borrowed slices, vectors or memory mapped from the file
48/// system directly.
49///
50/// # Example
51///
52/// The most common way to use `ByteView` is to construct it from a file handle. This will own the
53/// underlying file handle until the `ByteView` is dropped:
54///
55/// ```
56/// use std::io::Write;
57/// use symbolic_common::ByteView;
58///
59/// fn main() -> Result<(), std::io::Error> {
60/// let mut file = tempfile::tempfile()?;
61/// file.write_all(b"1234");
62///
63/// let view = ByteView::map_file(file)?;
64/// assert_eq!(view.as_slice(), b"1234");
65/// Ok(())
66/// }
67/// ```
68#[derive(Clone, Debug)]
69pub struct ByteView<'a> {
70 backing: Arc<ByteViewBacking<'a>>,
71}
72
73impl<'a> ByteView<'a> {
74 fn with_backing(backing: ByteViewBacking<'a>) -> Self {
75 ByteView {
76 backing: Arc::new(backing),
77 }
78 }
79
80 /// Constructs a `ByteView` from a `Cow`.
81 ///
82 /// # Example
83 ///
84 /// ```
85 /// use std::borrow::Cow;
86 /// use symbolic_common::ByteView;
87 ///
88 /// let cow = Cow::Borrowed(&b"1234"[..]);
89 /// let view = ByteView::from_cow(cow);
90 /// ```
91 pub fn from_cow(cow: Cow<'a, [u8]>) -> Self {
92 ByteView::with_backing(ByteViewBacking::Buf(cow))
93 }
94
95 /// Constructs a `ByteView` from a byte slice.
96 ///
97 /// # Example
98 ///
99 /// ```
100 /// use symbolic_common::ByteView;
101 ///
102 /// let view = ByteView::from_slice(b"1234");
103 /// ```
104 pub fn from_slice(buffer: &'a [u8]) -> Self {
105 ByteView::from_cow(Cow::Borrowed(buffer))
106 }
107
108 /// Constructs a `ByteView` from a vector of bytes.
109 ///
110 /// # Example
111 ///
112 /// ```
113 /// use symbolic_common::ByteView;
114 ///
115 /// let vec = b"1234".to_vec();
116 /// let view = ByteView::from_vec(vec);
117 /// ```
118 pub fn from_vec(buffer: Vec<u8>) -> Self {
119 ByteView::from_cow(Cow::Owned(buffer))
120 }
121
122 /// Constructs a `ByteView` from an open file handle by memory mapping the file.
123 ///
124 /// See [`ByteView::map_file_ref`] for a non-consuming version of this constructor.
125 ///
126 /// # Example
127 ///
128 /// ```
129 /// use std::io::Write;
130 /// use symbolic_common::ByteView;
131 ///
132 /// fn main() -> Result<(), std::io::Error> {
133 /// let mut file = tempfile::tempfile()?;
134 /// let view = ByteView::map_file(file)?;
135 /// Ok(())
136 /// }
137 /// ```
138 #[cfg(not(target_arch = "wasm32"))]
139 pub fn map_file(file: File) -> Result<Self, io::Error> {
140 Self::map_file_ref(&file)
141 }
142
143 /// Constructs a `ByteView` from an open file handle by memory mapping the file.
144 ///
145 /// The main difference with [`ByteView::map_file`] is that this takes the [`File`] by
146 /// reference rather than consuming it.
147 ///
148 /// # Example
149 ///
150 /// ```
151 /// use std::io::Write;
152 /// use symbolic_common::ByteView;
153 ///
154 /// fn main() -> Result<(), std::io::Error> {
155 /// let mut file = tempfile::tempfile()?;
156 /// let view = ByteView::map_file_ref(&file)?;
157 /// Ok(())
158 /// }
159 /// ```
160 #[cfg(not(target_arch = "wasm32"))]
161 pub fn map_file_ref(file: &File) -> Result<Self, io::Error> {
162 let backing = match unsafe { Mmap::map(file) } {
163 Ok(mmap) => ByteViewBacking::Mmap(mmap),
164 Err(err) => {
165 // this is raised on empty mmaps which we want to ignore. The 1006 Windows error
166 // looks like "The volume for a file has been externally altered so that the opened
167 // file is no longer valid."
168 if err.kind() == io::ErrorKind::InvalidInput
169 || (cfg!(windows) && err.raw_os_error() == Some(1006))
170 {
171 ByteViewBacking::Buf(Cow::Borrowed(b""))
172 } else {
173 return Err(err);
174 }
175 }
176 };
177
178 Ok(ByteView::with_backing(backing))
179 }
180
181 /// Constructs a `ByteView` from any `std::io::Reader`.
182 ///
183 /// **Note**: This currently consumes the entire reader and stores its data in an internal
184 /// buffer. Prefer [`open`] when reading from the file system or [`from_slice`] / [`from_vec`]
185 /// for in-memory operations. This behavior might change in the future.
186 ///
187 /// # Example
188 ///
189 /// ```
190 /// use std::io::Cursor;
191 /// use symbolic_common::ByteView;
192 ///
193 /// fn main() -> Result<(), std::io::Error> {
194 /// let reader = Cursor::new(b"1234");
195 /// let view = ByteView::read(reader)?;
196 /// Ok(())
197 /// }
198 /// ```
199 ///
200 /// [`open`]: struct.ByteView.html#method.open
201 /// [`from_slice`]: struct.ByteView.html#method.from_slice
202 /// [`from_vec`]: struct.ByteView.html#method.from_vec
203 pub fn read<R: io::Read>(mut reader: R) -> Result<Self, io::Error> {
204 let mut buffer = vec![];
205 reader.read_to_end(&mut buffer)?;
206 Ok(ByteView::from_vec(buffer))
207 }
208
209 /// Constructs a `ByteView` from a file path by memory mapping the file.
210 ///
211 /// # Example
212 ///
213 /// ```no_run
214 /// use symbolic_common::ByteView;
215 ///
216 /// fn main() -> Result<(), std::io::Error> {
217 /// let view = ByteView::open("test.txt")?;
218 /// Ok(())
219 /// }
220 /// ```
221 #[cfg(not(target_arch = "wasm32"))]
222 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
223 let file = File::open(path)?;
224 Self::map_file(file)
225 }
226
227 /// Constructs a `ByteView` from a file path by reading the entire file.
228 ///
229 /// On `wasm32` targets there is no `mmap`, so the file is read into an
230 /// owned buffer instead.
231 #[cfg(target_arch = "wasm32")]
232 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
233 let file = File::open(path)?;
234 Self::read(file)
235 }
236
237 /// Returns a slice of the underlying data.
238 ///
239 ///
240 /// # Example
241 ///
242 /// ```
243 /// use symbolic_common::ByteView;
244 ///
245 /// let view = ByteView::from_slice(b"1234");
246 /// let data = view.as_slice();
247 /// ```
248 #[inline(always)]
249 pub fn as_slice(&self) -> &[u8] {
250 self.backing.deref()
251 }
252
253 /// Applies a [`AccessPattern`] hint to the backing storage.
254 ///
255 /// A hint can be applied when the predominantly access pattern
256 /// for this byte view is known.
257 ///
258 /// Applying the wrong hint may have significant effects on performance.
259 ///
260 /// Hints are applied on best effort basis, not all platforms
261 /// support the same hints, not all backing storages support
262 /// hints.
263 ///
264 /// # Example
265 ///
266 /// ```
267 /// use std::io::Write;
268 /// use symbolic_common::{ByteView, AccessPattern};
269 ///
270 /// fn main() -> Result<(), std::io::Error> {
271 /// let mut file = tempfile::tempfile()?;
272 /// let view = ByteView::map_file_ref(&file)?;
273 /// let _ = view.hint(AccessPattern::Random);
274 /// Ok(())
275 /// }
276 /// ```
277 pub fn hint(&self, hint: AccessPattern) -> Result<(), io::Error> {
278 let _hint = hint; // silence unused lint
279 match self.backing.deref() {
280 ByteViewBacking::Buf(_) => Ok(()),
281 #[cfg(unix)]
282 ByteViewBacking::Mmap(mmap) => mmap.advise(_hint.to_madvise()),
283 #[cfg(all(not(unix), not(target_arch = "wasm32")))]
284 ByteViewBacking::Mmap(_) => Ok(()),
285 }
286 }
287}
288
289impl AsRef<[u8]> for ByteView<'_> {
290 #[inline(always)]
291 fn as_ref(&self) -> &[u8] {
292 self.as_slice()
293 }
294}
295
296impl Deref for ByteView<'_> {
297 type Target = [u8];
298
299 #[inline(always)]
300 fn deref(&self) -> &Self::Target {
301 self.as_slice()
302 }
303}
304
305unsafe impl StableDeref for ByteView<'_> {}
306
307/// Values supported by [`ByteView::hint`].
308///
309/// This is largely an abstraction over [`madvise(2)`] and [`fadvise(2)`].
310///
311/// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html
312/// [`fadvise(2)`]: https://man7.org/linux/man-pages/man2/posix_fadvise.2.html
313#[derive(Debug, Default, Clone, Copy)]
314pub enum AccessPattern {
315 /// No special treatment.
316 ///
317 /// The operating system is in full control of the buffer,
318 /// a generally good default.
319 ///
320 /// This is the default.
321 #[default]
322 Normal,
323 /// Expect access to be random.
324 ///
325 /// Read ahead might be less useful than normally.
326 Random,
327 /// Expect access to be in sequential order, read ahead might be very useful.
328 /// After reading data there is a high chance it will not be accessed again
329 /// and can be aggressively freed.
330 Sequential,
331}
332
333impl AccessPattern {
334 #[cfg(unix)]
335 fn to_madvise(self) -> memmap2::Advice {
336 match self {
337 AccessPattern::Normal => memmap2::Advice::Normal,
338 AccessPattern::Random => memmap2::Advice::Random,
339 AccessPattern::Sequential => memmap2::Advice::Sequential,
340 }
341 }
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347
348 use std::io::{Read, Seek, Write};
349
350 use similar_asserts::assert_eq;
351 use tempfile::NamedTempFile;
352
353 #[test]
354 fn test_open_empty_file() -> Result<(), std::io::Error> {
355 let tmp = NamedTempFile::new()?;
356
357 let view = ByteView::open(tmp.path())?;
358 assert_eq!(&*view, b"");
359
360 Ok(())
361 }
362
363 #[test]
364 fn test_open_file() -> Result<(), std::io::Error> {
365 let mut tmp = NamedTempFile::new()?;
366
367 tmp.write_all(b"1234")?;
368
369 let view = ByteView::open(tmp.path())?;
370 assert_eq!(&*view, b"1234");
371
372 Ok(())
373 }
374
375 #[test]
376 fn test_mmap_fd_reuse() -> Result<(), std::io::Error> {
377 let mut tmp = NamedTempFile::new()?;
378 tmp.write_all(b"1234")?;
379
380 let view = ByteView::map_file_ref(tmp.as_file())?;
381
382 // This deletes the file on disk.
383 let _path = tmp.path().to_path_buf();
384 let mut file = tmp.into_file();
385 #[cfg(not(windows))]
386 {
387 assert!(!_path.exists());
388 }
389
390 // Ensure we can still read from the the file after mmapping and deleting it on disk.
391 let mut buf = Vec::new();
392 file.rewind()?;
393 file.read_to_end(&mut buf)?;
394 assert_eq!(buf, b"1234");
395 drop(file);
396
397 // Ensure the byteview can still read the file as well.
398 assert_eq!(&*view, b"1234");
399
400 Ok(())
401 }
402}