epserde/deser/
mod.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8/*!
9
10Deserialization traits and types
11
12[`Deserialize`] is the main deserialization trait, providing methods
13[`Deserialize::deserialize_eps`] and [`Deserialize::deserialize_full`]
14which implement ε-copy and full-copy deserialization, respectively.
15The implementation of this trait is based on [`DeserializeInner`],
16which is automatically derived with `#[derive(Deserialize)]`.
17
18*/
19
20use crate::traits::*;
21use crate::{MAGIC, MAGIC_REV, VERSION};
22use core::mem::align_of;
23use core::ptr::addr_of_mut;
24use core::{hash::Hasher, mem::MaybeUninit};
25use std::{io::BufReader, path::Path};
26
27pub mod helpers;
28pub use helpers::*;
29pub mod mem_case;
30pub use mem_case::*;
31pub mod read;
32pub use read::*;
33pub mod reader_with_pos;
34pub use reader_with_pos::*;
35pub mod slice_with_pos;
36pub use slice_with_pos::*;
37
38pub type Result<T> = core::result::Result<T, Error>;
39
40/// A shorthand for the [deserialized type associated with a type](DeserializeInner::DeserType).
41pub type DeserType<'a, T> = <T as DeserializeInner>::DeserType<'a>;
42
43/// Main deserialization trait. It is separated from [`DeserializeInner`] to
44/// avoid that the user modify its behavior, and hide internal serialization
45/// methods.
46///
47/// It provides several convenience methods to load or map into memory
48/// structures that have been previously serialized. See, for example,
49/// [`Deserialize::load_full`], [`Deserialize::load_mem`], and [`Deserialize::mmap`].
50pub trait Deserialize: DeserializeInner {
51    /// Fully deserialize a structure of this type from the given backend.
52    fn deserialize_full(backend: &mut impl ReadNoStd) -> Result<Self>;
53    /// ε-copy deserialize a structure of this type from the given backend.
54    fn deserialize_eps(backend: &'_ [u8]) -> Result<Self::DeserType<'_>>;
55
56    /// Convenience method to fully deserialize from a file.
57    fn load_full(path: impl AsRef<Path>) -> anyhow::Result<Self> {
58        let file = std::fs::File::open(path).map_err(Error::FileOpenError)?;
59        let mut buf_reader = BufReader::new(file);
60        Self::deserialize_full(&mut buf_reader).map_err(|e| e.into())
61    }
62
63    /// Load a file into heap-allocated memory and ε-deserialize a data structure from it,
64    /// returning a [`MemCase`] containing the data structure and the
65    /// memory. Excess bytes are zeroed out.
66    ///
67    /// The allocated memory will have [`MemoryAlignment`] as alignment: types with
68    /// a higher alignment requirement will cause an [alignment error](`Error::AlignmentError`).
69    fn load_mem<'a>(
70        path: impl AsRef<Path>,
71    ) -> anyhow::Result<MemCase<<Self as DeserializeInner>::DeserType<'a>>> {
72        let align_to = align_of::<MemoryAlignment>();
73        if align_of::<Self>() > align_to {
74            return Err(Error::AlignmentError.into());
75        }
76        let file_len = path.as_ref().metadata()?.len() as usize;
77        let mut file = std::fs::File::open(path)?;
78        // Round up to u128 size
79        let capacity = file_len + crate::pad_align_to(file_len, align_to);
80
81        let mut uninit: MaybeUninit<MemCase<<Self as DeserializeInner>::DeserType<'_>>> =
82            MaybeUninit::uninit();
83        let ptr = uninit.as_mut_ptr();
84
85        // SAFETY: the entire vector will be filled with data read from the file,
86        // or with zeroes if the file is shorter than the vector.
87        #[allow(invalid_value)]
88        let mut aligned_vec = unsafe {
89            <Vec<MemoryAlignment>>::from_raw_parts(
90                std::alloc::alloc(std::alloc::Layout::from_size_align(capacity, align_to)?)
91                    as *mut MemoryAlignment,
92                capacity / align_to,
93                capacity / align_to,
94            )
95        };
96
97        let bytes = unsafe {
98            core::slice::from_raw_parts_mut(aligned_vec.as_mut_ptr() as *mut u8, capacity)
99        };
100
101        file.read_exact(&mut bytes[..file_len])?;
102        // Fixes the last few bytes to guarantee zero-extension semantics
103        // for bit vectors and full-vector initialization.
104        bytes[file_len..].fill(0);
105
106        // SAFETY: the vector is aligned to 16 bytes.
107        let backend = MemBackend::Memory(aligned_vec.into_boxed_slice());
108
109        // store the backend inside the MemCase
110        unsafe {
111            addr_of_mut!((*ptr).1).write(backend);
112        }
113        // deserialize the data structure
114        let mem = unsafe { (*ptr).1.as_ref().unwrap() };
115        let s = Self::deserialize_eps(mem)?;
116        // write the deserialized struct in the memcase
117        unsafe {
118            addr_of_mut!((*ptr).0).write(s);
119        }
120        // finish init
121        Ok(unsafe { uninit.assume_init() })
122    }
123
124    /// Load a file into `mmap()`-allocated memory and ε-deserialize a data structure from it,
125    /// returning a [`MemCase`] containing the data structure and the
126    /// memory. Excess bytes are zeroed out.
127    ///
128    /// The behavior of `mmap()` can be modified by passing some [`Flags`]; otherwise,
129    /// just pass `Flags::empty()`.
130    ///
131    /// Requires the `mmap` feature.
132    #[cfg(feature = "mmap")]
133    #[allow(clippy::uninit_vec)]
134    fn load_mmap<'a>(
135        path: impl AsRef<Path>,
136        flags: Flags,
137    ) -> anyhow::Result<MemCase<<Self as DeserializeInner>::DeserType<'a>>> {
138        let file_len = path.as_ref().metadata()?.len() as usize;
139        let mut file = std::fs::File::open(path)?;
140        let capacity = file_len + crate::pad_align_to(file_len, 16);
141
142        let mut uninit: MaybeUninit<MemCase<<Self as DeserializeInner>::DeserType<'_>>> =
143            MaybeUninit::uninit();
144        let ptr = uninit.as_mut_ptr();
145
146        let mut mmap = mmap_rs::MmapOptions::new(capacity)?
147            .with_flags(flags.mmap_flags())
148            .map_mut()?;
149        file.read_exact(&mut mmap[..file_len])?;
150        // Fixes the last few bytes to guarantee zero-extension semantics
151        // for bit vectors.
152        mmap[file_len..].fill(0);
153
154        let backend = MemBackend::Mmap(mmap.make_read_only().map_err(|(_, err)| err)?);
155
156        // store the backend inside the MemCase
157        unsafe {
158            addr_of_mut!((*ptr).1).write(backend);
159        }
160        // deserialize the data structure
161        let mem = unsafe { (*ptr).1.as_ref().unwrap() };
162        let s = Self::deserialize_eps(mem)?;
163        // write the deserialized struct in the MemCase
164        unsafe {
165            addr_of_mut!((*ptr).0).write(s);
166        }
167        // finish init
168        Ok(unsafe { uninit.assume_init() })
169    }
170
171    /// Memory map a file and ε-deserialize a data structure from it,
172    /// returning a [`MemCase`] containing the data structure and the
173    /// memory mapping.
174    ///
175    /// The behavior of `mmap()` can be modified by passing some [`Flags`]; otherwise,
176    /// just pass `Flags::empty()`.
177    ///
178    /// Requires the `mmap` feature.
179    #[cfg(feature = "mmap")]
180    #[allow(clippy::uninit_vec)]
181    fn mmap<'a>(
182        path: impl AsRef<Path>,
183        flags: Flags,
184    ) -> anyhow::Result<MemCase<<Self as DeserializeInner>::DeserType<'a>>> {
185        let file_len = path.as_ref().metadata()?.len();
186        let file = std::fs::File::open(path)?;
187
188        let mut uninit: MaybeUninit<MemCase<<Self as DeserializeInner>::DeserType<'_>>> =
189            MaybeUninit::uninit();
190        let ptr = uninit.as_mut_ptr();
191
192        let mmap = unsafe {
193            mmap_rs::MmapOptions::new(file_len as _)?
194                .with_flags(flags.mmap_flags())
195                .with_file(&file, 0)
196                .map()?
197        };
198
199        // store the backend inside the MemCase
200        unsafe {
201            addr_of_mut!((*ptr).1).write(MemBackend::Mmap(mmap));
202        }
203
204        let mmap = unsafe { (*ptr).1.as_ref().unwrap() };
205        // deserialize the data structure
206        let s = Self::deserialize_eps(mmap)?;
207        // write the deserialized struct in the MemCase
208        unsafe {
209            addr_of_mut!((*ptr).0).write(s);
210        }
211        // finish init
212        Ok(unsafe { uninit.assume_init() })
213    }
214}
215
216/// Inner trait to implement deserialization of a type. This trait exists
217/// to separate the user-facing [`Deserialize`] trait from the low-level
218/// deserialization mechanisms of [`DeserializeInner::_deserialize_full_inner`]
219/// and [`DeserializeInner::_deserialize_eps_inner`]. Moreover,
220/// it makes it possible to behave slighly differently at the top
221/// of the recursion tree (e.g., to check the endianness marker), and to prevent
222/// the user from modifying the methods in [`Deserialize`].
223///
224/// The user should not implement this trait directly, but rather derive it.
225pub trait DeserializeInner: Sized {
226    /// The deserialization type associated with this type. It can be
227    /// retrieved conveniently with the alias [`DeserType`].
228    type DeserType<'a>;
229    fn _deserialize_full_inner(backend: &mut impl ReadWithPos) -> Result<Self>;
230
231    fn _deserialize_eps_inner<'a>(backend: &mut SliceWithPos<'a>) -> Result<Self::DeserType<'a>>;
232}
233
234/// Blanket implementation that prevents the user from overwriting the
235/// methods in [`Deserialize`].
236///
237/// This implementation [checks the header](`check_header`) written
238/// by the blanket implementation of [`crate::ser::Serialize`] and then delegates to
239/// [`DeserializeInner::_deserialize_full_inner`] or
240/// [`DeserializeInner::_deserialize_eps_inner`].
241impl<T: TypeHash + AlignHash + DeserializeInner> Deserialize for T {
242    fn deserialize_full(backend: &mut impl ReadNoStd) -> Result<Self> {
243        let mut backend = ReaderWithPos::new(backend);
244        check_header::<Self>(&mut backend)?;
245        Self::_deserialize_full_inner(&mut backend)
246    }
247
248    fn deserialize_eps(backend: &'_ [u8]) -> Result<Self::DeserType<'_>> {
249        let mut backend = SliceWithPos::new(backend);
250        check_header::<Self>(&mut backend)?;
251        Self::_deserialize_eps_inner(&mut backend)
252    }
253}
254
255/// Common header check code for both ε-copy and full-copy deserialization.
256///
257/// Must be kept in sync with [`crate::ser::write_header`].
258pub fn check_header<T: Deserialize + TypeHash + AlignHash>(
259    backend: &mut impl ReadWithPos,
260) -> Result<()> {
261    let self_type_name = core::any::type_name::<T>().to_string();
262
263    let mut type_hasher = xxhash_rust::xxh3::Xxh3::new();
264    T::type_hash(&mut type_hasher);
265    let self_type_hash = type_hasher.finish();
266
267    let mut align_hasher = xxhash_rust::xxh3::Xxh3::new();
268    let mut offset_of = 0;
269    T::align_hash(&mut align_hasher, &mut offset_of);
270    let self_align_hash = align_hasher.finish();
271
272    let magic = u64::_deserialize_full_inner(backend)?;
273    match magic {
274        MAGIC => Ok(()),
275        MAGIC_REV => Err(Error::EndiannessError),
276        magic => Err(Error::MagicCookieError(magic)),
277    }?;
278
279    let major = u16::_deserialize_full_inner(backend)?;
280    if major != VERSION.0 {
281        return Err(Error::MajorVersionMismatch(major));
282    }
283    let minor = u16::_deserialize_full_inner(backend)?;
284    if minor > VERSION.1 {
285        return Err(Error::MinorVersionMismatch(minor));
286    };
287
288    let usize_size = u8::_deserialize_full_inner(backend)?;
289    let usize_size = usize_size as usize;
290    let native_usize_size = core::mem::size_of::<usize>();
291    if usize_size != native_usize_size {
292        return Err(Error::UsizeSizeMismatch(usize_size));
293    };
294
295    let ser_type_hash = u64::_deserialize_full_inner(backend)?;
296    let ser_align_hash = u64::_deserialize_full_inner(backend)?;
297    let ser_type_name = String::_deserialize_full_inner(backend)?;
298
299    if ser_type_hash != self_type_hash {
300        return Err(Error::WrongTypeHash {
301            got_type_name: self_type_name,
302            got: self_type_hash,
303            expected_type_name: ser_type_name,
304            expected: ser_type_hash,
305        });
306    }
307    if ser_align_hash != self_align_hash {
308        return Err(Error::WrongTypeAlignHash {
309            got_type_name: self_type_name,
310            got: self_align_hash,
311            expected_type_name: ser_type_name,
312            expected: ser_align_hash,
313        });
314    }
315
316    Ok(())
317}
318
319/// A helper trait that makes it possible to implement differently
320/// deserialization for [`crate::traits::ZeroCopy`] and [`crate::traits::DeepCopy`] types.
321/// See [`crate::traits::CopyType`] for more information.
322pub trait DeserializeHelper<T: CopySelector> {
323    type FullType;
324    type DeserType<'a>;
325
326    fn _deserialize_full_inner_impl(backend: &mut impl ReadWithPos) -> Result<Self::FullType>;
327
328    fn _deserialize_eps_inner_impl<'a>(
329        backend: &mut SliceWithPos<'a>,
330    ) -> Result<Self::DeserType<'a>>;
331}
332
333#[derive(thiserror::Error, Debug)]
334/// Errors that can happen during deserialization.
335pub enum Error {
336    #[error("Error reading stats for file during ε-serde deserialization: {0}")]
337    /// [`Deserialize::load_full`] could not open the provided file.
338    FileOpenError(std::io::Error),
339    #[error("Read error during ε-serde deserialization")]
340    /// The underlying reader returned an error.
341    ReadError,
342    /// The file is from ε-serde but the endianess is wrong.
343    #[cfg_attr(
344        target_endian = "big",
345        error("The current arch is big-endian but the data is little-endian.")
346    )]
347    #[cfg_attr(
348        target_endian = "little",
349        error("The current arch is little-endian but the data is big-endian.")
350    )]
351    EndiannessError,
352    #[error("Alignment error. Most likely you are deserializing from a memory region with insufficient alignment.")]
353    /// Some fields are not properly aligned.
354    AlignmentError,
355    #[error("Major version mismatch. Expected {major} but got {0}.", major = VERSION.0)]
356    /// The file was serialized with a version of ε-serde that is not compatible.
357    MajorVersionMismatch(u16),
358    #[error("Minor version mismatch. Expected {minor} but got {0}.", minor = VERSION.1)]
359    /// The file was serialized with a compatible, but too new version of ε-serde
360    /// so we might be missing features.
361    MinorVersionMismatch(u16),
362    #[error("The file was serialized on an architecture where a usize has size {0}, but on the current architecture it has size {size}.", size = core::mem::size_of::<usize>())]
363    /// The the `pointer_width` of the serialized file is different from the
364    /// `pointer_width` of the current architecture.
365    /// For example, the file was serialized on a 64-bit machine and we are trying to
366    /// deserialize it on a 32-bit machine.
367    UsizeSizeMismatch(usize),
368    #[error("Wrong magic cookie 0x{0:016x}. The byte stream does not come from ε-serde.")]
369    /// The magic coookie is wrong. The byte sequence does not come from ε-serde.
370    MagicCookieError(u64),
371    #[error("Invalid tag: 0x{0:02x}")]
372    /// A tag is wrong (e.g., for [`Option`]).
373    InvalidTag(usize),
374    #[error(
375        r#"Wrong type hash. Expected: 0x{expected:016x} Actual: 0x{got:016x}.
376You are trying to deserialize a file with the wrong type.
377The serialized type is '{expected_type_name}' and the deserialized type is '{got_type_name}'."#
378    )]
379    /// The type hash is wrong. Probably the user is trying to deserialize a
380    /// file with the wrong type.
381    WrongTypeHash {
382        got_type_name: String,
383        expected_type_name: String,
384        expected: u64,
385        got: u64,
386    },
387    #[error(
388r#"Wrong alignment hash. Expected: 0x{expected:016x} Actual: 0x{got:016x}.
389You might be trying to deserialize a file that was serialized on an architecture with different alignment requirements, or some of the fields of the type have changed their copy type (zero or deep).
390You might also be trying to deserialize a tuple of mixed zero-copy types, which is no longer supported since 0.8.0, or to deserialize an array, whose alignment hash has been fixed in 0.8.0. 
391The serialized type is '{expected_type_name}' and the deserialized type is '{got_type_name}'."#
392    )]
393    /// The type representation hash is wrong. Probabliy the user is trying to
394    /// deserialize a file with some zero-copy type that has different
395    /// in-memory representations on the serialization arch and on the current one,
396    /// usually because of alignment issues.
397    WrongTypeAlignHash {
398        got_type_name: String,
399        expected_type_name: String,
400        expected: u64,
401        got: u64,
402    },
403}