epserde/deser/
mod.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Deserialization traits and types
9//!
10//! [`Deserialize`] is the main deserialization trait, providing methods
11//! [`Deserialize::deserialize_eps`] and [`Deserialize::deserialize_full`] which
12//! implement ε-copy and full-copy deserialization, respectively. The
13//! implementation of this trait is based on [`DeserInner`], which is
14//! automatically derived with `#[derive(Deserialize)]`.
15
16use crate::ser::SerInner;
17use crate::traits::*;
18use crate::{MAGIC, MAGIC_REV, VERSION};
19use core::hash::Hasher;
20use core::{mem::MaybeUninit, ptr::addr_of_mut};
21
22pub mod helpers;
23pub use helpers::*;
24pub mod mem_case;
25pub use mem_case::*;
26pub mod read;
27pub use read::*;
28pub mod reader_with_pos;
29pub use reader_with_pos::*;
30pub mod slice_with_pos;
31pub use slice_with_pos::*;
32
33#[cfg(not(feature = "std"))]
34use alloc::{
35    string::{String, ToString},
36    vec::Vec,
37};
38#[cfg(feature = "std")]
39use std::{io::BufReader, path::Path};
40
41pub type Result<T> = core::result::Result<T, Error>;
42
43/// A shorthand for the [deserialization type associated with a deserializable
44/// type](DeserInner::DeserType).
45pub type DeserType<'a, T> = <T as DeserInner>::DeserType<'a>;
46
47/// Main deserialization trait. It is separated from [`DeserInner`] to
48/// avoid that the user modify its behavior, and hide internal serialization
49/// methods.
50///
51/// It provides several convenience methods to load or map into memory
52/// structures that have been previously serialized. See, for example,
53/// [`Deserialize::load_full`], [`Deserialize::load_mem`], and
54/// [`Deserialize::mmap`].
55///
56/// # Safety
57///
58/// All deserialization methods are unsafe.
59///
60/// - No validation is performed on zero-copy types. For example, by altering a
61///   serialized form you can deserialize a vector of
62///   [`NonZeroUsize`](core::num::NonZeroUsize) containing zeros.
63/// - The code assume that the [`read_exact`](ReadNoStd::read_exact) method of
64///   the backend does not read the buffer. If the method reads the buffer, it
65///   will cause undefined behavior. This is a general issue with Rust as the
66///   I/O traits were written before [`MaybeUninit`] was stabilized.
67/// - Malicious [`TypeHash`]/[`AlignHash`] implementations maybe lead to read
68///   incompatible structures using the same code, or cause undefined behavior
69///   by loading data with an incorrect alignment.
70/// - Memory-mapped files might be modified externally.
71/// - If you use a method coupling a deserialized structure with its serialized
72///   support using [`MemCase`] (e.g., [`Deserialize::mmap`]),
73///   [`DeserInner::DeserType`] must be covariant (i.e., behave like a
74///   structure, not a closure with a generic argument)
75pub trait Deserialize: DeserInner {
76    /// Fully deserializes a structure of this type from the given backend.
77    ///
78    /// # Safety
79    ///
80    /// See the [trait documentation](Deserialize).
81    unsafe fn deserialize_full(backend: &mut impl ReadNoStd) -> Result<Self>;
82    /// ε-copy deserializes a structure of this type from the given backend.
83    ///
84    /// # Safety
85    ///
86    /// See the [trait documentation](Deserialize).
87    unsafe fn deserialize_eps(backend: &'_ [u8]) -> Result<Self::DeserType<'_>>;
88
89    /// Convenience method to fully deserialize from a file.
90    ///
91    /// # Safety
92    ///
93    /// See the [trait documentation](Deserialize).
94    #[cfg(feature = "std")]
95    unsafe fn load_full(path: impl AsRef<Path>) -> anyhow::Result<Self> {
96        let file = std::fs::File::open(path).map_err(Error::FileOpenError)?;
97        let mut buf_reader = BufReader::new(file);
98        unsafe { Self::deserialize_full(&mut buf_reader).map_err(|e| e.into()) }
99    }
100
101    /// Reads data from a reader into heap-allocated memory and ε-deserialize a
102    /// data structure from it, returning a [`MemCase`] containing the data
103    /// structure and the memory. Excess bytes are zeroed out.
104    ///
105    /// The allocated memory will have [`MemoryAlignment`] as alignment: types
106    /// with a higher alignment requirement will cause an [alignment
107    /// error](`Error::AlignmentError`).
108    ///
109    /// For a version using a file path, see [`load_mem`](Self::load_mem).
110    ///
111    /// # Examples
112    ///
113    /// ```rust
114    /// use epserde::prelude::*;
115    /// let data = vec![1, 2, 3, 4, 5];
116    /// let mut buffer = Vec::new();
117    /// unsafe { data.serialize(&mut buffer)? };
118    ///
119    /// let cursor = <AlignedCursor>::from_slice(&buffer);
120    /// let mem_case = unsafe { <Vec<i32>>::read_mem(cursor, buffer.len())? };
121    /// assert_eq!(data, **mem_case.uncase());
122    /// # Ok::<(), Box<dyn std::error::Error>>(())
123    /// ```
124    ///
125    /// # Safety
126    ///
127    /// See the [trait documentation](Deserialize).
128    unsafe fn read_mem(mut read: impl ReadNoStd, size: usize) -> anyhow::Result<MemCase<Self>> {
129        let align_to = align_of::<MemoryAlignment>();
130        if align_of::<Self>() > align_to {
131            return Err(Error::AlignmentError.into());
132        }
133        // Round up to u128 size
134        let capacity = size + crate::pad_align_to(size, align_to);
135
136        let mut uninit: MaybeUninit<MemCase<Self>> = MaybeUninit::uninit();
137        let ptr = uninit.as_mut_ptr();
138
139        // SAFETY: the entire vector will be filled with data read from the reader,
140        // or with zeroes if the reader provides less data than expected.
141        #[allow(invalid_value)]
142        let mut aligned_vec = unsafe {
143            #[cfg(not(feature = "std"))]
144            let alloc_func = alloc::alloc::alloc;
145            #[cfg(feature = "std")]
146            let alloc_func = std::alloc::alloc;
147
148            <Vec<MemoryAlignment>>::from_raw_parts(
149                alloc_func(core::alloc::Layout::from_size_align(capacity, align_to)?)
150                    as *mut MemoryAlignment,
151                capacity / align_to,
152                capacity / align_to,
153            )
154        };
155
156        let bytes = unsafe {
157            core::slice::from_raw_parts_mut(aligned_vec.as_mut_ptr() as *mut u8, capacity)
158        };
159
160        read.read_exact(&mut bytes[..size])?;
161        // Fixes the last few bytes to guarantee zero-extension semantics
162        // for bit vectors and full-vector initialization.
163        bytes[size..].fill(0);
164
165        // SAFETY: the vector is aligned to 16 bytes.
166        let backend = MemBackend::Memory(aligned_vec.into_boxed_slice());
167
168        // store the backend inside the MemCase
169        unsafe {
170            addr_of_mut!((*ptr).1).write(backend);
171        }
172        // deserialize the data structure
173        let mem = unsafe { (*ptr).1.as_ref().unwrap() };
174        let s = unsafe { Self::deserialize_eps(mem) }?;
175        // write the deserialized struct in the MemCase
176        unsafe {
177            addr_of_mut!((*ptr).0).write(s);
178        }
179        // finish init
180        Ok(unsafe { uninit.assume_init() })
181    }
182
183    /// Loads a file into heap-allocated memory and ε-deserialize a data
184    /// structure from it, returning a [`MemCase`] containing the data structure
185    /// and the memory. Excess bytes are zeroed out.
186    ///
187    /// The allocated memory will have [`MemoryAlignment`] as alignment: types
188    /// with a higher alignment requirement will cause an [alignment
189    /// error](`Error::AlignmentError`).
190    ///
191    /// For a version using a generic [`std::io::Read`], see
192    /// [`read_mem`](Self::read_mem).
193    ///
194    /// # Safety
195    ///
196    /// See the [trait documentation](Deserialize).
197    #[cfg(feature = "std")]
198    unsafe fn load_mem(path: impl AsRef<Path>) -> anyhow::Result<MemCase<Self>> {
199        let file_len = path.as_ref().metadata()?.len() as usize;
200        let file = std::fs::File::open(path)?;
201        unsafe { Self::read_mem(file, file_len) }
202    }
203
204    /// Reads data from a reader into `mmap()`-allocated memory and ε-deserialize
205    /// a data structure from it, returning a [`MemCase`] containing the data
206    /// structure and the memory. Excess bytes are zeroed out.
207    ///
208    /// The behavior of `mmap()` can be modified by passing some [`Flags`];
209    /// otherwise, just pass `Flags::empty()`.
210    ///
211    /// For a version using a file path, see [`load_mmap`](Self::load_mmap).
212    ///
213    /// Requires the `mmap` feature.
214    ///
215    /// # Example
216    ///
217    /// ```rust
218    /// # #[cfg(feature = "mmap")]
219    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
220    /// # use epserde::prelude::*;
221    /// # use std::io::Cursor;
222    /// let data = vec![1, 2, 3, 4, 5];
223    /// let mut buffer = Vec::new();
224    /// unsafe { data.serialize(&mut buffer)? };
225    ///
226    /// let cursor = Cursor::new(&buffer);
227    /// let mmap_case = unsafe { <Vec<i32>>::read_mmap(cursor, buffer.len(), Flags::empty())? };
228    /// assert_eq!(data, **mmap_case.uncase());
229    /// # Ok(())
230    /// # }
231    /// ```
232    ///
233    /// # Safety
234    ///
235    /// See the [trait documentation](Deserialize).
236    #[cfg(feature = "mmap")]
237    unsafe fn read_mmap(
238        mut read: impl ReadNoStd,
239        size: usize,
240        flags: Flags,
241    ) -> anyhow::Result<MemCase<Self>> {
242        let capacity = size + crate::pad_align_to(size, 16);
243
244        let mut uninit: MaybeUninit<MemCase<Self>> = MaybeUninit::uninit();
245        let ptr = uninit.as_mut_ptr();
246
247        let mut mmap = mmap_rs::MmapOptions::new(capacity)?
248            .with_flags(flags.mmap_flags())
249            .map_mut()?;
250        read.read_exact(&mut mmap[..size])?;
251        // Fixes the last few bytes to guarantee zero-extension semantics
252        // for bit vectors.
253        mmap[size..].fill(0);
254
255        let backend = MemBackend::Mmap(mmap.make_read_only().map_err(|(_, err)| err)?);
256
257        // store the backend inside the MemCase
258        unsafe {
259            addr_of_mut!((*ptr).1).write(backend);
260        }
261        // deserialize the data structure
262        let mem = unsafe { (*ptr).1.as_ref().unwrap() };
263        let s = unsafe { Self::deserialize_eps(mem) }?;
264        // write the deserialized struct in the MemCase
265        unsafe {
266            addr_of_mut!((*ptr).0).write(s);
267        }
268        // finish init
269        Ok(unsafe { uninit.assume_init() })
270    }
271
272    /// Loads a file into `mmap()`-allocated memory and ε-deserialize a data
273    /// structure from it, returning a [`MemCase`] containing the data structure
274    /// and the memory. Excess bytes are zeroed out.
275    ///
276    /// The behavior of `mmap()` can be modified by passing some [`Flags`];
277    /// otherwise, just pass `Flags::empty()`.
278    ///
279    /// For a version using a generic [`std::io::Read`], see
280    /// [`read_mmap`](Self::read_mmap).
281    ///
282    /// Requires the `mmap` feature.
283    ///
284    /// # Safety
285    ///
286    /// See the [trait documentation](Deserialize) and [mmap's `with_file`'s
287    /// documentation](mmap_rs::MmapOptions::with_file).
288    #[cfg(all(feature = "mmap", feature = "std"))]
289    unsafe fn load_mmap(path: impl AsRef<Path>, flags: Flags) -> anyhow::Result<MemCase<Self>> {
290        let file_len = path.as_ref().metadata()?.len() as usize;
291        let file = std::fs::File::open(path)?;
292        unsafe { Self::read_mmap(file, file_len, flags) }
293    }
294
295    /// Memory maps a file and ε-deserializes a data structure from it,
296    /// returning a [`MemCase`] containing the data structure and the
297    /// memory mapping.
298    ///
299    /// The behavior of `mmap()` can be modified by passing some [`Flags`]; otherwise,
300    /// just pass `Flags::empty()`.
301    ///
302    /// Requires the `mmap` feature.
303    ///
304    /// # Safety
305    ///
306    /// See the [trait documentation](Deserialize) and [mmap's `with_file`'s documentation](mmap_rs::MmapOptions::with_file).
307    #[cfg(all(feature = "mmap", feature = "std"))]
308    unsafe fn mmap(path: impl AsRef<Path>, flags: Flags) -> anyhow::Result<MemCase<Self>> {
309        let file_len = path.as_ref().metadata()?.len();
310        let file = std::fs::File::open(path)?;
311
312        let mut uninit: MaybeUninit<MemCase<Self>> = MaybeUninit::uninit();
313        let ptr = uninit.as_mut_ptr();
314
315        let mmap = unsafe {
316            mmap_rs::MmapOptions::new(file_len as _)?
317                .with_flags(flags.mmap_flags())
318                .with_file(&file, 0)
319                .map()?
320        };
321
322        // store the backend inside the MemCase
323        unsafe {
324            addr_of_mut!((*ptr).1).write(MemBackend::Mmap(mmap));
325        }
326
327        let mmap = unsafe { (*ptr).1.as_ref().unwrap() };
328        // deserialize the data structure
329        let s = unsafe { Self::deserialize_eps(mmap) }?;
330        // write the deserialized struct in the MemCase
331        unsafe {
332            addr_of_mut!((*ptr).0).write(s);
333        }
334        // finish init
335        Ok(unsafe { uninit.assume_init() })
336    }
337}
338
339#[allow(clippy::missing_safety_doc)] // Clippy bug
340/// Inner trait to implement deserialization of a type. This trait exists to
341/// separate the user-facing [`Deserialize`] trait from the low-level
342/// deserialization mechanisms of [`DeserInner::_deser_full_inner`]
343/// and [`DeserInner::_deser_eps_inner`]. Moreover, it makes it
344/// possible to behave slightly differently at the top of the recursion tree
345/// (e.g., to check the endianness marker), and to prevent the user from
346/// modifying the methods in [`Deserialize`].
347///
348/// The user should not implement this trait directly, but rather derive it.
349///
350/// # Safety
351///
352/// See [`Deserialize`].
353pub trait DeserInner: Sized {
354    /// The deserialization type associated with this type. It can be retrieved
355    /// conveniently with the alias [`DeserType`].
356    type DeserType<'a>;
357
358    /// # Safety
359    ///
360    /// See the documentation of [`Deserialize`].
361    unsafe fn _deser_full_inner(backend: &mut impl ReadWithPos) -> Result<Self>;
362
363    /// # Safety
364    ///
365    /// See the documentation of [`Deserialize`].
366    unsafe fn _deser_eps_inner<'a>(backend: &mut SliceWithPos<'a>) -> Result<Self::DeserType<'a>>;
367}
368
369/// Blanket implementation that prevents the user from overwriting the
370/// methods in [`Deserialize`].
371///
372/// This implementation [checks the header](`check_header`) written
373/// by the blanket implementation of [`crate::ser::Serialize`] and then delegates to
374/// [`DeserInner::_deser_full_inner`] or
375/// [`DeserInner::_deser_eps_inner`].
376impl<T: SerInner<SerType: TypeHash + AlignHash> + DeserInner> Deserialize for T {
377    /// # Safety
378    ///
379    /// See the documentation of [`Deserialize`].
380    unsafe fn deserialize_full(backend: &mut impl ReadNoStd) -> Result<Self> {
381        let mut backend = ReaderWithPos::new(backend);
382        check_header::<Self>(&mut backend)?;
383        unsafe { Self::_deser_full_inner(&mut backend) }
384    }
385
386    /// # Safety
387    ///
388    /// See the documentation of [`Deserialize`].
389    unsafe fn deserialize_eps(backend: &'_ [u8]) -> Result<Self::DeserType<'_>> {
390        let mut backend = SliceWithPos::new(backend);
391        check_header::<Self>(&mut backend)?;
392        unsafe { Self::_deser_eps_inner(&mut backend) }
393    }
394}
395
396/// Common header check code for both ε-copy and full-copy deserialization.
397///
398/// Must be kept in sync with [`crate::ser::write_header`].
399pub fn check_header<T: SerInner<SerType: TypeHash + AlignHash>>(
400    backend: &mut impl ReadWithPos,
401) -> Result<()> {
402    let self_type_name = core::any::type_name::<T>().to_string();
403    let self_ser_type_name = core::any::type_name::<T::SerType>().to_string();
404    let mut type_hasher = xxhash_rust::xxh3::Xxh3::new();
405    T::SerType::type_hash(&mut type_hasher);
406    let self_type_hash = type_hasher.finish();
407
408    let mut align_hasher = xxhash_rust::xxh3::Xxh3::new();
409    let mut offset_of = 0;
410    T::SerType::align_hash(&mut align_hasher, &mut offset_of);
411    let self_align_hash = align_hasher.finish();
412
413    let magic = unsafe { u64::_deser_full_inner(backend)? };
414    match magic {
415        MAGIC => Ok(()),
416        MAGIC_REV => Err(Error::EndiannessError),
417        magic => Err(Error::MagicCookieError(magic)),
418    }?;
419
420    let major = unsafe { u16::_deser_full_inner(backend)? };
421    if major != VERSION.0 {
422        return Err(Error::MajorVersionMismatch(major));
423    }
424    let minor = unsafe { u16::_deser_full_inner(backend)? };
425    if minor > VERSION.1 {
426        return Err(Error::MinorVersionMismatch(minor));
427    };
428
429    let usize_size = unsafe { u8::_deser_full_inner(backend)? };
430    let usize_size = usize_size as usize;
431    let native_usize_size = core::mem::size_of::<usize>();
432    if usize_size != native_usize_size {
433        return Err(Error::UsizeSizeMismatch(usize_size));
434    };
435
436    let ser_type_hash = unsafe { u64::_deser_full_inner(backend)? };
437    let ser_align_hash = unsafe { u64::_deser_full_inner(backend)? };
438    let ser_type_name = unsafe { String::_deser_full_inner(backend)? }.to_string();
439
440    if ser_type_hash != self_type_hash {
441        return Err(Error::WrongTypeHash {
442            ser_type_name,
443            ser_type_hash,
444            self_type_name,
445            self_ser_type_name,
446            self_type_hash,
447        });
448    }
449    if ser_align_hash != self_align_hash {
450        return Err(Error::WrongAlignHash {
451            ser_type_name,
452            ser_align_hash,
453            self_type_name,
454            self_ser_type_name,
455            self_align_hash,
456        });
457    }
458
459    Ok(())
460}
461
462/// A helper trait that makes it possible to implement differently
463/// deserialization for [`crate::traits::ZeroCopy`] and [`crate::traits::DeepCopy`] types.
464/// See [`crate::traits::CopyType`] for more information.
465pub trait DeserHelper<T: CopySelector> {
466    type FullType;
467    type DeserType<'a>;
468
469    /// # Safety
470    ///
471    /// See the documentation of [`Deserialize`].
472    unsafe fn _deser_full_inner_impl(backend: &mut impl ReadWithPos) -> Result<Self::FullType>;
473
474    /// # Safety
475    ///
476    /// See the documentation of [`Deserialize`].
477    unsafe fn _deser_eps_inner_impl<'a>(
478        backend: &mut SliceWithPos<'a>,
479    ) -> Result<Self::DeserType<'a>>;
480}
481
482#[derive(thiserror::Error, Debug)]
483/// Errors that can happen during deserialization.
484pub enum Error {
485    #[error("Error reading stats for file during ε-serde deserialization: {0}")]
486    /// [`Deserialize::load_full`] could not open the provided file.
487    #[cfg(feature = "std")]
488    FileOpenError(std::io::Error),
489    #[error("Read error during ε-serde deserialization")]
490    /// The underlying reader returned an error.
491    ReadError,
492    /// The file is from ε-serde but the endianness is wrong.
493    #[cfg_attr(
494        target_endian = "big",
495        error("The current arch is big-endian but the data is little-endian.")
496    )]
497    #[cfg_attr(
498        target_endian = "little",
499        error("The current arch is little-endian but the data is big-endian.")
500    )]
501    EndiannessError,
502    #[error(
503        "Alignment error. Most likely you are deserializing from a memory region with insufficient alignment."
504    )]
505    /// Some fields are not properly aligned.
506    AlignmentError,
507    #[error("Major version mismatch. Expected {major} but got {0}.", major = VERSION.0)]
508    /// The file was serialized with a version of ε-serde that is not compatible.
509    MajorVersionMismatch(u16),
510    #[error("Minor version mismatch. Expected {minor} but got {0}.", minor = VERSION.1)]
511    /// The file was serialized with a compatible, but too new version of ε-serde
512    /// so we might be missing features.
513    MinorVersionMismatch(u16),
514    #[error("The file was serialized on an architecture where a usize has size {0}, but on the current architecture it has size {size}.", size = core::mem::size_of::<usize>())]
515    /// The pointer width of the serialized file is different from the pointer
516    /// width of the current architecture. For example, the file was serialized
517    /// on a 64-bit machine and we are trying to deserialize it on a 32-bit
518    /// machine.
519    UsizeSizeMismatch(usize),
520    #[error("Wrong magic cookie 0x{0:016x}. The byte stream does not come from ε-serde.")]
521    /// The magic cookie is wrong. The byte sequence does not come from ε-serde.
522    MagicCookieError(u64),
523    #[error("Invalid tag: 0x{0:02x}")]
524    /// A tag is wrong (e.g., for [`Option`]).
525    InvalidTag(usize),
526    #[error(
527        r#"Wrong type hash
528Actual: 0x{ser_type_hash:016x}; expected: 0x{self_type_hash:016x}.
529
530The serialized type is
531    '{ser_type_name}',
532but the deserializable type on which the deserialization method was invoked is
533    '{self_type_name}',
534which has serialization type
535    {self_ser_type_name}.
536
537You are trying to deserialize a file with the wrong type. You might also be
538trying to deserialize a tuple of mixed zero-copy types, which is no longer
539supported since 0.8.0, an instance containing tuples, whose type hash was fixed
540in 0.9.0, or an instance containing a vector or a string that was serialized
541before 0.10.0."#
542    )]
543    /// The type hash is wrong. Probably the user is trying to deserialize a
544    /// file with the wrong type.
545    WrongTypeHash {
546        // The name of the type that was serialized.
547        ser_type_name: String,
548        // The [`TypeHash`] of the type that was serialized.
549        ser_type_hash: u64,
550        // The name of the type on which the deserialization method was called.
551        self_type_name: String,
552        // The name of the serialization type of `self_type_name`.
553        self_ser_type_name: String,
554        // The [`TypeHash`] of the type on which the deserialization method was called.
555        self_type_hash: u64,
556    },
557    #[error(
558        r#"Wrong alignment hash
559Actual: 0x{ser_align_hash:016x}; expected: 0x{self_align_hash:016x}.
560
561The serialized type is
562    '{ser_type_name}',
563but the deserializable type on which the deserialization method was invoked is
564    '{self_type_name}',
565which has serialization type
566    {self_ser_type_name}.
567
568You might be trying to deserialize a file that was serialized on an
569architecture with different alignment requirements, or some of the fields of
570the type might have changed their copy type (zero or deep). You might also be
571trying to deserialize an array, whose alignment hash has been fixed in 0.8.0.
572It is also possible that you are trying to deserialize a file serialized before
573version 0.10.0 in which repr attributes were not sorted lexicographically."#
574    )]
575    /// The type representation hash is wrong. Probably the user is trying to
576    /// deserialize a file with some zero-copy type that has different
577    /// in-memory representations on the serialization arch and on the current one,
578    /// usually because of alignment issues. There are also some backward-compatibility
579    /// issues discussed in the error message.
580    WrongAlignHash {
581        // The name of the type that was serialized.
582        ser_type_name: String,
583        // The [`AlignHash`] of the type that was serialized.
584        ser_align_hash: u64,
585        // The name of the type on which the deserialization method was called.
586        self_type_name: String,
587        // The name of the serialization type of `self_type_name`.
588        self_ser_type_name: String,
589        // The [`AlignHash`] of the type on which the deserialization method was called.
590        self_align_hash: u64,
591    },
592}