epserde/deser/mod.rs
1/*
2 * SPDX-FileCopyrightText: 2023 Inria
3 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Deserialization traits and types
9//!
10//! [`Deserialize`] is the main deserialization trait, providing methods
11//! [`Deserialize::deserialize_eps`] and [`Deserialize::deserialize_full`] which
12//! implement ε-copy and full-copy deserialization, respectively. The
13//! implementation of this trait is based on [`DeserInner`], which is
14//! automatically derived with `#[derive(Deserialize)]`.
15
16use crate::ser::SerInner;
17use crate::traits::*;
18use crate::{MAGIC, MAGIC_REV, VERSION};
19use core::hash::Hasher;
20use core::{mem::MaybeUninit, ptr::addr_of_mut};
21
22pub mod helpers;
23pub use helpers::*;
24pub mod mem_case;
25pub use mem_case::*;
26pub mod read;
27pub use read::*;
28pub mod reader_with_pos;
29pub use reader_with_pos::*;
30pub mod slice_with_pos;
31pub use slice_with_pos::*;
32
33#[cfg(not(feature = "std"))]
34use alloc::{
35 string::{String, ToString},
36 vec::Vec,
37};
38#[cfg(feature = "std")]
39use std::{io::BufReader, path::Path};
40
41pub type Result<T> = core::result::Result<T, Error>;
42
43/// A shorthand for the [deserialization type associated with a deserializable
44/// type](DeserInner::DeserType).
45pub type DeserType<'a, T> = <T as DeserInner>::DeserType<'a>;
46
47/// Main deserialization trait. It is separated from [`DeserInner`] to
48/// avoid that the user modify its behavior, and hide internal serialization
49/// methods.
50///
51/// It provides several convenience methods to load or map into memory
52/// structures that have been previously serialized. See, for example,
53/// [`Deserialize::load_full`], [`Deserialize::load_mem`], and
54/// [`Deserialize::mmap`].
55///
56/// # Safety
57///
58/// All deserialization methods are unsafe.
59///
60/// - No validation is performed on zero-copy types. For example, by altering a
61/// serialized form you can deserialize a vector of
62/// [`NonZeroUsize`](core::num::NonZeroUsize) containing zeros.
63/// - The code assume that the [`read_exact`](ReadNoStd::read_exact) method of
64/// the backend does not read the buffer. If the method reads the buffer, it
65/// will cause undefined behavior. This is a general issue with Rust as the
66/// I/O traits were written before [`MaybeUninit`] was stabilized.
67/// - Malicious [`TypeHash`]/[`AlignHash`] implementations maybe lead to read
68/// incompatible structures using the same code, or cause undefined behavior
69/// by loading data with an incorrect alignment.
70/// - Memory-mapped files might be modified externally.
71/// - If you use a method coupling a deserialized structure with its serialized
72/// support using [`MemCase`] (e.g., [`Deserialize::mmap`]),
73/// [`DeserInner::DeserType`] must be covariant (i.e., behave like a
74/// structure, not a closure with a generic argument)
75pub trait Deserialize: DeserInner {
76 /// Fully deserializes a structure of this type from the given backend.
77 ///
78 /// # Safety
79 ///
80 /// See the [trait documentation](Deserialize).
81 unsafe fn deserialize_full(backend: &mut impl ReadNoStd) -> Result<Self>;
82 /// ε-copy deserializes a structure of this type from the given backend.
83 ///
84 /// # Safety
85 ///
86 /// See the [trait documentation](Deserialize).
87 unsafe fn deserialize_eps(backend: &'_ [u8]) -> Result<Self::DeserType<'_>>;
88
89 /// Convenience method to fully deserialize from a file.
90 ///
91 /// # Safety
92 ///
93 /// See the [trait documentation](Deserialize).
94 #[cfg(feature = "std")]
95 unsafe fn load_full(path: impl AsRef<Path>) -> anyhow::Result<Self> {
96 let file = std::fs::File::open(path).map_err(Error::FileOpenError)?;
97 let mut buf_reader = BufReader::new(file);
98 unsafe { Self::deserialize_full(&mut buf_reader).map_err(|e| e.into()) }
99 }
100
101 /// Reads data from a reader into heap-allocated memory and ε-deserialize a
102 /// data structure from it, returning a [`MemCase`] containing the data
103 /// structure and the memory. Excess bytes are zeroed out.
104 ///
105 /// The allocated memory will have [`MemoryAlignment`] as alignment: types
106 /// with a higher alignment requirement will cause an [alignment
107 /// error](`Error::AlignmentError`).
108 ///
109 /// For a version using a file path, see [`load_mem`](Self::load_mem).
110 ///
111 /// # Examples
112 ///
113 /// ```rust
114 /// use epserde::prelude::*;
115 /// let data = vec![1, 2, 3, 4, 5];
116 /// let mut buffer = Vec::new();
117 /// unsafe { data.serialize(&mut buffer)? };
118 ///
119 /// let cursor = <AlignedCursor>::from_slice(&buffer);
120 /// let mem_case = unsafe { <Vec<i32>>::read_mem(cursor, buffer.len())? };
121 /// assert_eq!(data, **mem_case.uncase());
122 /// # Ok::<(), Box<dyn std::error::Error>>(())
123 /// ```
124 ///
125 /// # Safety
126 ///
127 /// See the [trait documentation](Deserialize).
128 unsafe fn read_mem(mut read: impl ReadNoStd, size: usize) -> anyhow::Result<MemCase<Self>> {
129 let align_to = align_of::<MemoryAlignment>();
130 if align_of::<Self>() > align_to {
131 return Err(Error::AlignmentError.into());
132 }
133 // Round up to u128 size
134 let capacity = size + crate::pad_align_to(size, align_to);
135
136 let mut uninit: MaybeUninit<MemCase<Self>> = MaybeUninit::uninit();
137 let ptr = uninit.as_mut_ptr();
138
139 // SAFETY: the entire vector will be filled with data read from the reader,
140 // or with zeroes if the reader provides less data than expected.
141 #[allow(invalid_value)]
142 let mut aligned_vec = unsafe {
143 #[cfg(not(feature = "std"))]
144 let alloc_func = alloc::alloc::alloc;
145 #[cfg(feature = "std")]
146 let alloc_func = std::alloc::alloc;
147
148 <Vec<MemoryAlignment>>::from_raw_parts(
149 alloc_func(core::alloc::Layout::from_size_align(capacity, align_to)?)
150 as *mut MemoryAlignment,
151 capacity / align_to,
152 capacity / align_to,
153 )
154 };
155
156 let bytes = unsafe {
157 core::slice::from_raw_parts_mut(aligned_vec.as_mut_ptr() as *mut u8, capacity)
158 };
159
160 read.read_exact(&mut bytes[..size])?;
161 // Fixes the last few bytes to guarantee zero-extension semantics
162 // for bit vectors and full-vector initialization.
163 bytes[size..].fill(0);
164
165 // SAFETY: the vector is aligned to 16 bytes.
166 let backend = MemBackend::Memory(aligned_vec.into_boxed_slice());
167
168 // store the backend inside the MemCase
169 unsafe {
170 addr_of_mut!((*ptr).1).write(backend);
171 }
172 // deserialize the data structure
173 let mem = unsafe { (*ptr).1.as_ref().unwrap() };
174 let s = unsafe { Self::deserialize_eps(mem) }?;
175 // write the deserialized struct in the MemCase
176 unsafe {
177 addr_of_mut!((*ptr).0).write(s);
178 }
179 // finish init
180 Ok(unsafe { uninit.assume_init() })
181 }
182
183 /// Loads a file into heap-allocated memory and ε-deserialize a data
184 /// structure from it, returning a [`MemCase`] containing the data structure
185 /// and the memory. Excess bytes are zeroed out.
186 ///
187 /// The allocated memory will have [`MemoryAlignment`] as alignment: types
188 /// with a higher alignment requirement will cause an [alignment
189 /// error](`Error::AlignmentError`).
190 ///
191 /// For a version using a generic [`std::io::Read`], see
192 /// [`read_mem`](Self::read_mem).
193 ///
194 /// # Safety
195 ///
196 /// See the [trait documentation](Deserialize).
197 #[cfg(feature = "std")]
198 unsafe fn load_mem(path: impl AsRef<Path>) -> anyhow::Result<MemCase<Self>> {
199 let file_len = path.as_ref().metadata()?.len() as usize;
200 let file = std::fs::File::open(path)?;
201 unsafe { Self::read_mem(file, file_len) }
202 }
203
204 /// Reads data from a reader into `mmap()`-allocated memory and ε-deserialize
205 /// a data structure from it, returning a [`MemCase`] containing the data
206 /// structure and the memory. Excess bytes are zeroed out.
207 ///
208 /// The behavior of `mmap()` can be modified by passing some [`Flags`];
209 /// otherwise, just pass `Flags::empty()`.
210 ///
211 /// For a version using a file path, see [`load_mmap`](Self::load_mmap).
212 ///
213 /// Requires the `mmap` feature.
214 ///
215 /// # Example
216 ///
217 /// ```rust
218 /// # #[cfg(feature = "mmap")]
219 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
220 /// # use epserde::prelude::*;
221 /// # use std::io::Cursor;
222 /// let data = vec![1, 2, 3, 4, 5];
223 /// let mut buffer = Vec::new();
224 /// unsafe { data.serialize(&mut buffer)? };
225 ///
226 /// let cursor = Cursor::new(&buffer);
227 /// let mmap_case = unsafe { <Vec<i32>>::read_mmap(cursor, buffer.len(), Flags::empty())? };
228 /// assert_eq!(data, **mmap_case.uncase());
229 /// # Ok(())
230 /// # }
231 /// ```
232 ///
233 /// # Safety
234 ///
235 /// See the [trait documentation](Deserialize).
236 #[cfg(feature = "mmap")]
237 unsafe fn read_mmap(
238 mut read: impl ReadNoStd,
239 size: usize,
240 flags: Flags,
241 ) -> anyhow::Result<MemCase<Self>> {
242 let capacity = size + crate::pad_align_to(size, 16);
243
244 let mut uninit: MaybeUninit<MemCase<Self>> = MaybeUninit::uninit();
245 let ptr = uninit.as_mut_ptr();
246
247 let mut mmap = mmap_rs::MmapOptions::new(capacity)?
248 .with_flags(flags.mmap_flags())
249 .map_mut()?;
250 read.read_exact(&mut mmap[..size])?;
251 // Fixes the last few bytes to guarantee zero-extension semantics
252 // for bit vectors.
253 mmap[size..].fill(0);
254
255 let backend = MemBackend::Mmap(mmap.make_read_only().map_err(|(_, err)| err)?);
256
257 // store the backend inside the MemCase
258 unsafe {
259 addr_of_mut!((*ptr).1).write(backend);
260 }
261 // deserialize the data structure
262 let mem = unsafe { (*ptr).1.as_ref().unwrap() };
263 let s = unsafe { Self::deserialize_eps(mem) }?;
264 // write the deserialized struct in the MemCase
265 unsafe {
266 addr_of_mut!((*ptr).0).write(s);
267 }
268 // finish init
269 Ok(unsafe { uninit.assume_init() })
270 }
271
272 /// Loads a file into `mmap()`-allocated memory and ε-deserialize a data
273 /// structure from it, returning a [`MemCase`] containing the data structure
274 /// and the memory. Excess bytes are zeroed out.
275 ///
276 /// The behavior of `mmap()` can be modified by passing some [`Flags`];
277 /// otherwise, just pass `Flags::empty()`.
278 ///
279 /// For a version using a generic [`std::io::Read`], see
280 /// [`read_mmap`](Self::read_mmap).
281 ///
282 /// Requires the `mmap` feature.
283 ///
284 /// # Safety
285 ///
286 /// See the [trait documentation](Deserialize) and [mmap's `with_file`'s
287 /// documentation](mmap_rs::MmapOptions::with_file).
288 #[cfg(all(feature = "mmap", feature = "std"))]
289 unsafe fn load_mmap(path: impl AsRef<Path>, flags: Flags) -> anyhow::Result<MemCase<Self>> {
290 let file_len = path.as_ref().metadata()?.len() as usize;
291 let file = std::fs::File::open(path)?;
292 unsafe { Self::read_mmap(file, file_len, flags) }
293 }
294
295 /// Memory maps a file and ε-deserializes a data structure from it,
296 /// returning a [`MemCase`] containing the data structure and the
297 /// memory mapping.
298 ///
299 /// The behavior of `mmap()` can be modified by passing some [`Flags`]; otherwise,
300 /// just pass `Flags::empty()`.
301 ///
302 /// Requires the `mmap` feature.
303 ///
304 /// # Safety
305 ///
306 /// See the [trait documentation](Deserialize) and [mmap's `with_file`'s documentation](mmap_rs::MmapOptions::with_file).
307 #[cfg(all(feature = "mmap", feature = "std"))]
308 unsafe fn mmap(path: impl AsRef<Path>, flags: Flags) -> anyhow::Result<MemCase<Self>> {
309 let file_len = path.as_ref().metadata()?.len();
310 let file = std::fs::File::open(path)?;
311
312 let mut uninit: MaybeUninit<MemCase<Self>> = MaybeUninit::uninit();
313 let ptr = uninit.as_mut_ptr();
314
315 let mmap = unsafe {
316 mmap_rs::MmapOptions::new(file_len as _)?
317 .with_flags(flags.mmap_flags())
318 .with_file(&file, 0)
319 .map()?
320 };
321
322 // store the backend inside the MemCase
323 unsafe {
324 addr_of_mut!((*ptr).1).write(MemBackend::Mmap(mmap));
325 }
326
327 let mmap = unsafe { (*ptr).1.as_ref().unwrap() };
328 // deserialize the data structure
329 let s = unsafe { Self::deserialize_eps(mmap) }?;
330 // write the deserialized struct in the MemCase
331 unsafe {
332 addr_of_mut!((*ptr).0).write(s);
333 }
334 // finish init
335 Ok(unsafe { uninit.assume_init() })
336 }
337}
338
339#[allow(clippy::missing_safety_doc)] // Clippy bug
340/// Inner trait to implement deserialization of a type. This trait exists to
341/// separate the user-facing [`Deserialize`] trait from the low-level
342/// deserialization mechanisms of [`DeserInner::_deser_full_inner`]
343/// and [`DeserInner::_deser_eps_inner`]. Moreover, it makes it
344/// possible to behave slightly differently at the top of the recursion tree
345/// (e.g., to check the endianness marker), and to prevent the user from
346/// modifying the methods in [`Deserialize`].
347///
348/// The user should not implement this trait directly, but rather derive it.
349///
350/// # Safety
351///
352/// See [`Deserialize`].
353pub trait DeserInner: Sized {
354 /// The deserialization type associated with this type. It can be retrieved
355 /// conveniently with the alias [`DeserType`].
356 type DeserType<'a>;
357
358 /// # Safety
359 ///
360 /// See the documentation of [`Deserialize`].
361 unsafe fn _deser_full_inner(backend: &mut impl ReadWithPos) -> Result<Self>;
362
363 /// # Safety
364 ///
365 /// See the documentation of [`Deserialize`].
366 unsafe fn _deser_eps_inner<'a>(backend: &mut SliceWithPos<'a>) -> Result<Self::DeserType<'a>>;
367}
368
369/// Blanket implementation that prevents the user from overwriting the
370/// methods in [`Deserialize`].
371///
372/// This implementation [checks the header](`check_header`) written
373/// by the blanket implementation of [`crate::ser::Serialize`] and then delegates to
374/// [`DeserInner::_deser_full_inner`] or
375/// [`DeserInner::_deser_eps_inner`].
376impl<T: SerInner<SerType: TypeHash + AlignHash> + DeserInner> Deserialize for T {
377 /// # Safety
378 ///
379 /// See the documentation of [`Deserialize`].
380 unsafe fn deserialize_full(backend: &mut impl ReadNoStd) -> Result<Self> {
381 let mut backend = ReaderWithPos::new(backend);
382 check_header::<Self>(&mut backend)?;
383 unsafe { Self::_deser_full_inner(&mut backend) }
384 }
385
386 /// # Safety
387 ///
388 /// See the documentation of [`Deserialize`].
389 unsafe fn deserialize_eps(backend: &'_ [u8]) -> Result<Self::DeserType<'_>> {
390 let mut backend = SliceWithPos::new(backend);
391 check_header::<Self>(&mut backend)?;
392 unsafe { Self::_deser_eps_inner(&mut backend) }
393 }
394}
395
396/// Common header check code for both ε-copy and full-copy deserialization.
397///
398/// Must be kept in sync with [`crate::ser::write_header`].
399pub fn check_header<T: SerInner<SerType: TypeHash + AlignHash>>(
400 backend: &mut impl ReadWithPos,
401) -> Result<()> {
402 let self_type_name = core::any::type_name::<T>().to_string();
403 let self_ser_type_name = core::any::type_name::<T::SerType>().to_string();
404 let mut type_hasher = xxhash_rust::xxh3::Xxh3::new();
405 T::SerType::type_hash(&mut type_hasher);
406 let self_type_hash = type_hasher.finish();
407
408 let mut align_hasher = xxhash_rust::xxh3::Xxh3::new();
409 let mut offset_of = 0;
410 T::SerType::align_hash(&mut align_hasher, &mut offset_of);
411 let self_align_hash = align_hasher.finish();
412
413 let magic = unsafe { u64::_deser_full_inner(backend)? };
414 match magic {
415 MAGIC => Ok(()),
416 MAGIC_REV => Err(Error::EndiannessError),
417 magic => Err(Error::MagicCookieError(magic)),
418 }?;
419
420 let major = unsafe { u16::_deser_full_inner(backend)? };
421 if major != VERSION.0 {
422 return Err(Error::MajorVersionMismatch(major));
423 }
424 let minor = unsafe { u16::_deser_full_inner(backend)? };
425 if minor > VERSION.1 {
426 return Err(Error::MinorVersionMismatch(minor));
427 };
428
429 let usize_size = unsafe { u8::_deser_full_inner(backend)? };
430 let usize_size = usize_size as usize;
431 let native_usize_size = core::mem::size_of::<usize>();
432 if usize_size != native_usize_size {
433 return Err(Error::UsizeSizeMismatch(usize_size));
434 };
435
436 let ser_type_hash = unsafe { u64::_deser_full_inner(backend)? };
437 let ser_align_hash = unsafe { u64::_deser_full_inner(backend)? };
438 let ser_type_name = unsafe { String::_deser_full_inner(backend)? }.to_string();
439
440 if ser_type_hash != self_type_hash {
441 return Err(Error::WrongTypeHash {
442 ser_type_name,
443 ser_type_hash,
444 self_type_name,
445 self_ser_type_name,
446 self_type_hash,
447 });
448 }
449 if ser_align_hash != self_align_hash {
450 return Err(Error::WrongAlignHash {
451 ser_type_name,
452 ser_align_hash,
453 self_type_name,
454 self_ser_type_name,
455 self_align_hash,
456 });
457 }
458
459 Ok(())
460}
461
462/// A helper trait that makes it possible to implement differently
463/// deserialization for [`crate::traits::ZeroCopy`] and [`crate::traits::DeepCopy`] types.
464/// See [`crate::traits::CopyType`] for more information.
465pub trait DeserHelper<T: CopySelector> {
466 type FullType;
467 type DeserType<'a>;
468
469 /// # Safety
470 ///
471 /// See the documentation of [`Deserialize`].
472 unsafe fn _deser_full_inner_impl(backend: &mut impl ReadWithPos) -> Result<Self::FullType>;
473
474 /// # Safety
475 ///
476 /// See the documentation of [`Deserialize`].
477 unsafe fn _deser_eps_inner_impl<'a>(
478 backend: &mut SliceWithPos<'a>,
479 ) -> Result<Self::DeserType<'a>>;
480}
481
482#[derive(thiserror::Error, Debug)]
483/// Errors that can happen during deserialization.
484pub enum Error {
485 #[error("Error reading stats for file during ε-serde deserialization: {0}")]
486 /// [`Deserialize::load_full`] could not open the provided file.
487 #[cfg(feature = "std")]
488 FileOpenError(std::io::Error),
489 #[error("Read error during ε-serde deserialization")]
490 /// The underlying reader returned an error.
491 ReadError,
492 /// The file is from ε-serde but the endianness is wrong.
493 #[cfg_attr(
494 target_endian = "big",
495 error("The current arch is big-endian but the data is little-endian.")
496 )]
497 #[cfg_attr(
498 target_endian = "little",
499 error("The current arch is little-endian but the data is big-endian.")
500 )]
501 EndiannessError,
502 #[error(
503 "Alignment error. Most likely you are deserializing from a memory region with insufficient alignment."
504 )]
505 /// Some fields are not properly aligned.
506 AlignmentError,
507 #[error("Major version mismatch. Expected {major} but got {0}.", major = VERSION.0)]
508 /// The file was serialized with a version of ε-serde that is not compatible.
509 MajorVersionMismatch(u16),
510 #[error("Minor version mismatch. Expected {minor} but got {0}.", minor = VERSION.1)]
511 /// The file was serialized with a compatible, but too new version of ε-serde
512 /// so we might be missing features.
513 MinorVersionMismatch(u16),
514 #[error("The file was serialized on an architecture where a usize has size {0}, but on the current architecture it has size {size}.", size = core::mem::size_of::<usize>())]
515 /// The pointer width of the serialized file is different from the pointer
516 /// width of the current architecture. For example, the file was serialized
517 /// on a 64-bit machine and we are trying to deserialize it on a 32-bit
518 /// machine.
519 UsizeSizeMismatch(usize),
520 #[error("Wrong magic cookie 0x{0:016x}. The byte stream does not come from ε-serde.")]
521 /// The magic cookie is wrong. The byte sequence does not come from ε-serde.
522 MagicCookieError(u64),
523 #[error("Invalid tag: 0x{0:02x}")]
524 /// A tag is wrong (e.g., for [`Option`]).
525 InvalidTag(usize),
526 #[error(
527 r#"Wrong type hash
528Actual: 0x{ser_type_hash:016x}; expected: 0x{self_type_hash:016x}.
529
530The serialized type is
531 '{ser_type_name}',
532but the deserializable type on which the deserialization method was invoked is
533 '{self_type_name}',
534which has serialization type
535 {self_ser_type_name}.
536
537You are trying to deserialize a file with the wrong type. You might also be
538trying to deserialize a tuple of mixed zero-copy types, which is no longer
539supported since 0.8.0, an instance containing tuples, whose type hash was fixed
540in 0.9.0, or an instance containing a vector or a string that was serialized
541before 0.10.0."#
542 )]
543 /// The type hash is wrong. Probably the user is trying to deserialize a
544 /// file with the wrong type.
545 WrongTypeHash {
546 // The name of the type that was serialized.
547 ser_type_name: String,
548 // The [`TypeHash`] of the type that was serialized.
549 ser_type_hash: u64,
550 // The name of the type on which the deserialization method was called.
551 self_type_name: String,
552 // The name of the serialization type of `self_type_name`.
553 self_ser_type_name: String,
554 // The [`TypeHash`] of the type on which the deserialization method was called.
555 self_type_hash: u64,
556 },
557 #[error(
558 r#"Wrong alignment hash
559Actual: 0x{ser_align_hash:016x}; expected: 0x{self_align_hash:016x}.
560
561The serialized type is
562 '{ser_type_name}',
563but the deserializable type on which the deserialization method was invoked is
564 '{self_type_name}',
565which has serialization type
566 {self_ser_type_name}.
567
568You might be trying to deserialize a file that was serialized on an
569architecture with different alignment requirements, or some of the fields of
570the type might have changed their copy type (zero or deep). You might also be
571trying to deserialize an array, whose alignment hash has been fixed in 0.8.0.
572It is also possible that you are trying to deserialize a file serialized before
573version 0.10.0 in which repr attributes were not sorted lexicographically."#
574 )]
575 /// The type representation hash is wrong. Probably the user is trying to
576 /// deserialize a file with some zero-copy type that has different
577 /// in-memory representations on the serialization arch and on the current one,
578 /// usually because of alignment issues. There are also some backward-compatibility
579 /// issues discussed in the error message.
580 WrongAlignHash {
581 // The name of the type that was serialized.
582 ser_type_name: String,
583 // The [`AlignHash`] of the type that was serialized.
584 ser_align_hash: u64,
585 // The name of the type on which the deserialization method was called.
586 self_type_name: String,
587 // The name of the serialization type of `self_type_name`.
588 self_ser_type_name: String,
589 // The [`AlignHash`] of the type on which the deserialization method was called.
590 self_align_hash: u64,
591 },
592}