1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
/*
 * SPDX-FileCopyrightText: 2023 Inria
 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
 *
 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
 */

/*!

Deserialization traits and types

[`Deserialize`] is the main deserialization trait, providing methods
[`Deserialize::deserialize_eps`] and [`Deserialize::deserialize_full`]
which implement ε-copy and full-copy deserialization, respectively.
The implementation of this trait is based on [`DeserializeInner`],
which is automatically derived with `#[derive(Deserialize)]`.

*/

use crate::traits::*;
use crate::{MAGIC, MAGIC_REV, VERSION};
use core::mem::align_of;
use core::ptr::addr_of_mut;
use core::{hash::Hasher, mem::MaybeUninit};
use std::{io::BufReader, path::Path};

pub mod helpers;
pub use helpers::*;
pub mod mem_case;
pub use mem_case::*;
pub mod read;
pub use read::*;
pub mod reader_with_pos;
pub use reader_with_pos::*;
pub mod slice_with_pos;
pub use slice_with_pos::*;

pub type Result<T> = core::result::Result<T, Error>;

/// A shorthand for the [deserialized type associated with a type](DeserializeInner::DeserType).
pub type DeserType<'a, T> = <T as DeserializeInner>::DeserType<'a>;

/// Main deserialization trait. It is separated from [`DeserializeInner`] to
/// avoid that the user modify its behavior, and hide internal serialization
/// methods.
///
/// It provides several convenience methods to load or map into memory
/// structures that have been previously serialized. See, for example,
/// [`Deserialize::load_full`], [`Deserialize::load_mem`], and [`Deserialize::mmap`].
pub trait Deserialize: TypeHash + ReprHash + DeserializeInner {
    /// Fully deserialize a structure of this type from the given backend.
    fn deserialize_full(backend: &mut impl ReadNoStd) -> Result<Self>;
    /// ε-copy deserialize a structure of this type from the given backend.
    fn deserialize_eps(backend: &'_ [u8]) -> Result<Self::DeserType<'_>>;

    /// Commodity method to fully deserialize from a file.
    fn load_full(path: impl AsRef<Path>) -> Result<Self> {
        let file = std::fs::File::open(path).map_err(Error::FileOpenError)?;
        let mut buf_reader = BufReader::new(file);
        Self::deserialize_full(&mut buf_reader)
    }

    /// Load a file into heap-allocated memory and ε-deserialize a data structure from it,
    /// returning a [`MemCase`] containing the data structure and the
    /// memory. Excess bytes are zeroed out.
    ///
    /// The allocated memory will have [`MemoryAlignment`] as alignment: types with
    /// a higher alignment requirement will cause an [alignment error](`Error::AlignmentError`).
    fn load_mem<'a>(
        path: impl AsRef<Path>,
    ) -> anyhow::Result<MemCase<<Self as DeserializeInner>::DeserType<'a>>> {
        let align_to = align_of::<MemoryAlignment>();
        if align_of::<Self>() > align_to {
            return Err(Error::AlignmentError.into());
        }
        let file_len = path.as_ref().metadata()?.len() as usize;
        let mut file = std::fs::File::open(path)?;
        // Round up to u128 size
        let capacity = file_len + crate::pad_align_to(file_len, align_to);

        let mut uninit: MaybeUninit<MemCase<<Self as DeserializeInner>::DeserType<'_>>> =
            MaybeUninit::uninit();
        let ptr = uninit.as_mut_ptr();

        // SAFETY: the entire vector will be filled with data read from the file,
        // or with zeroes if the file is shorter than the vector.
        #[allow(invalid_value)]
        let mut aligned_vec = unsafe {
            <Vec<MemoryAlignment>>::from_raw_parts(
                std::alloc::alloc(std::alloc::Layout::from_size_align(capacity, align_to)?)
                    as *mut MemoryAlignment,
                capacity / align_to,
                capacity / align_to,
            )
        };

        let bytes = unsafe {
            core::slice::from_raw_parts_mut(aligned_vec.as_mut_ptr() as *mut u8, capacity)
        };

        file.read_exact(&mut bytes[..file_len])?;
        // Fixes the last few bytes to guarantee zero-extension semantics
        // for bit vectors and full-vector initialization.
        bytes[file_len..].fill(0);

        // SAFETY: the vector is aligned to 16 bytes.
        let backend = MemBackend::Memory(aligned_vec.into_boxed_slice());

        // store the backend inside the MemCase
        unsafe {
            addr_of_mut!((*ptr).1).write(backend);
        }
        // deserialize the data structure
        let mem = unsafe { (*ptr).1.as_ref().unwrap() };
        let s = Self::deserialize_eps(mem)?;
        // write the deserialized struct in the memcase
        unsafe {
            addr_of_mut!((*ptr).0).write(s);
        }
        // finish init
        Ok(unsafe { uninit.assume_init() })
    }

    /// Load a file into `mmap()`-allocated memory and ε-deserialize a data structure from it,
    /// returning a [`MemCase`] containing the data structure and the
    /// memory. Excess bytes are zeroed out.
    ///
    /// The behavior of `mmap()` can be modified by passing some [`Flags`]; otherwise,
    /// just pass `Flags::empty()`.
    #[allow(clippy::uninit_vec)]
    fn load_mmap<'a>(
        path: impl AsRef<Path>,
        flags: Flags,
    ) -> anyhow::Result<MemCase<<Self as DeserializeInner>::DeserType<'a>>> {
        let file_len = path.as_ref().metadata()?.len() as usize;
        let mut file = std::fs::File::open(path)?;
        let capacity = file_len + crate::pad_align_to(file_len, 16);

        let mut uninit: MaybeUninit<MemCase<<Self as DeserializeInner>::DeserType<'_>>> =
            MaybeUninit::uninit();
        let ptr = uninit.as_mut_ptr();

        let mut mmap = mmap_rs::MmapOptions::new(capacity)?
            .with_flags(flags.mmap_flags())
            .map_mut()?;
        file.read_exact(&mut mmap[..file_len])?;
        // Fixes the last few bytes to guarantee zero-extension semantics
        // for bit vectors.
        mmap[file_len..].fill(0);

        let backend = MemBackend::Mmap(mmap.make_read_only().map_err(|(_, err)| err)?);

        // store the backend inside the MemCase
        unsafe {
            addr_of_mut!((*ptr).1).write(backend);
        }
        // deserialize the data structure
        let mem = unsafe { (*ptr).1.as_ref().unwrap() };
        let s = Self::deserialize_eps(mem)?;
        // write the deserialized struct in the MemCase
        unsafe {
            addr_of_mut!((*ptr).0).write(s);
        }
        // finish init
        Ok(unsafe { uninit.assume_init() })
    }

    /// Memory map a file and ε-deserialize a data structure from it,
    /// returning a [`MemCase`] containing the data structure and the
    /// memory mapping.
    ///
    /// The behavior of `mmap()` can be modified by passing some [`Flags`]; otherwise,
    /// just pass `Flags::empty()`.
    #[allow(clippy::uninit_vec)]
    fn mmap<'a>(
        path: impl AsRef<Path>,
        flags: Flags,
    ) -> anyhow::Result<MemCase<<Self as DeserializeInner>::DeserType<'a>>> {
        let file_len = path.as_ref().metadata()?.len();
        let file = std::fs::File::open(path)?;

        let mut uninit: MaybeUninit<MemCase<<Self as DeserializeInner>::DeserType<'_>>> =
            MaybeUninit::uninit();
        let ptr = uninit.as_mut_ptr();

        let mmap = unsafe {
            mmap_rs::MmapOptions::new(file_len as _)?
                .with_flags(flags.mmap_flags())
                .with_file(&file, 0)
                .map()?
        };

        // store the backend inside the MemCase
        unsafe {
            addr_of_mut!((*ptr).1).write(MemBackend::Mmap(mmap));
        }

        let mmap = unsafe { (*ptr).1.as_ref().unwrap() };
        // deserialize the data structure
        let s = Self::deserialize_eps(mmap)?;
        // write the deserialized struct in the MemCase
        unsafe {
            addr_of_mut!((*ptr).0).write(s);
        }
        // finish init
        Ok(unsafe { uninit.assume_init() })
    }
}

/// Inner trait to implement deserialization of a type. This trait exists
/// to separate the user-facing [`Deserialize`] trait from the low-level
/// deserialization mechanisms of [`DeserializeInner::_deserialize_full_inner`]
/// and [`DeserializeInner::_deserialize_eps_inner`]. Moreover,
/// it makes it possible to behave slighly differently at the top
/// of the recursion tree (e.g., to check the endianness marker), and to prevent
/// the user from modifying the methods in [`Deserialize`].
///
/// The user should not implement this trait directly, but rather derive it.
pub trait DeserializeInner: Sized {
    /// The deserialization type associated with this type. It can be
    /// retrieved conveniently with the alias [`DeserType`].
    type DeserType<'a>;
    fn _deserialize_full_inner(backend: &mut impl ReadWithPos) -> Result<Self>;

    fn _deserialize_eps_inner<'a>(backend: &mut SliceWithPos<'a>) -> Result<Self::DeserType<'a>>;
}

/// Blanket implementation that prevents the user from overwriting the
/// methods in [`Deserialize`].
///
/// This implementation [checks the header](`check_header`) written
/// by the blanket implementation of [`crate::ser::Serialize`] and then delegates to
/// [`DeserializeInner::_deserialize_full_inner`] or
/// [`DeserializeInner::_deserialize_eps_inner`].
impl<T: TypeHash + ReprHash + DeserializeInner> Deserialize for T {
    fn deserialize_full(backend: &mut impl ReadNoStd) -> Result<Self> {
        let mut backend = ReaderWithPos::new(backend);
        check_header::<Self>(&mut backend)?;
        Self::_deserialize_full_inner(&mut backend)
    }

    fn deserialize_eps(backend: &'_ [u8]) -> Result<Self::DeserType<'_>> {
        let mut backend = SliceWithPos::new(backend);
        check_header::<Self>(&mut backend)?;
        Self::_deserialize_eps_inner(&mut backend)
    }
}

/// Common header check code for both ε-copy and full-copy deserialization.
///
/// Must be kept in sync with [`crate::ser::write_header`].
pub fn check_header<T: Deserialize>(backend: &mut impl ReadWithPos) -> Result<()> {
    let self_type_name = core::any::type_name::<T>().to_string();

    let mut type_hasher = xxhash_rust::xxh3::Xxh3::new();
    T::type_hash(&mut type_hasher);
    let self_type_hash = type_hasher.finish();

    let mut repr_hasher = xxhash_rust::xxh3::Xxh3::new();
    let mut offset_of = 0;
    T::repr_hash(&mut repr_hasher, &mut offset_of);
    let self_repr_hash = repr_hasher.finish();

    let magic = u64::_deserialize_full_inner(backend)?;
    match magic {
        MAGIC => Ok(()),
        MAGIC_REV => Err(Error::EndiannessError),
        magic => Err(Error::MagicCookieError(magic)),
    }?;

    let major = u16::_deserialize_full_inner(backend)?;
    if major != VERSION.0 {
        return Err(Error::MajorVersionMismatch(major));
    }
    let minor = u16::_deserialize_full_inner(backend)?;
    if minor > VERSION.1 {
        return Err(Error::MinorVersionMismatch(minor));
    };

    let usize_size = u8::_deserialize_full_inner(backend)?;
    let usize_size = usize_size as usize;
    let native_usize_size = core::mem::size_of::<usize>();
    if usize_size != native_usize_size {
        return Err(Error::UsizeSizeMismatch(usize_size));
    };

    let ser_type_hash = u64::_deserialize_full_inner(backend)?;
    let ser_repr_hash = u64::_deserialize_full_inner(backend)?;
    let ser_type_name = String::_deserialize_full_inner(backend)?;

    if ser_type_hash != self_type_hash {
        return Err(Error::WrongTypeHash {
            got_type_name: self_type_name,
            got: self_type_hash,
            expected_type_name: ser_type_name,
            expected: ser_type_hash,
        });
    }
    if ser_repr_hash != self_repr_hash {
        return Err(Error::WrongTypeReprHash {
            got_type_name: self_type_name,
            got: self_repr_hash,
            expected_type_name: ser_type_name,
            expected: ser_repr_hash,
        });
    }

    Ok(())
}

/// A helper trait that makes it possible to implement differently
/// deserialization for [`crate::traits::ZeroCopy`] and [`crate::traits::DeepCopy`] types.
/// See [`crate::traits::CopyType`] for more information.
pub trait DeserializeHelper<T: CopySelector> {
    type FullType;
    type DeserType<'a>;

    fn _deserialize_full_inner_impl(backend: &mut impl ReadWithPos) -> Result<Self::FullType>;

    fn _deserialize_eps_inner_impl<'a>(
        backend: &mut SliceWithPos<'a>,
    ) -> Result<Self::DeserType<'a>>;
}

#[derive(Debug)]
/// Errors that can happen during deserialization.
pub enum Error {
    /// [`Deserialize::load_full`] could not open the provided file.
    FileOpenError(std::io::Error),
    /// The underlying reader returned an error.
    ReadError,
    /// The file is from ε-serde but the endianess is wrong.
    EndiannessError,
    /// Some fields are not properly aligned.
    AlignmentError,
    /// The file was serialized with a version of ε-serde that is not compatible.
    MajorVersionMismatch(u16),
    /// The file was serialized with a compatible, but too new version of ε-serde
    /// so we might be missing features.
    MinorVersionMismatch(u16),
    /// The the `pointer_width` of the serialized file is different from the
    /// `pointer_width` of the current architecture.
    /// For example, the file was serialized on a 64-bit machine and we are trying to
    /// deserialize it on a 32-bit machine.
    UsizeSizeMismatch(usize),
    /// The magic coookie is wrong. The byte sequence does not come from ε-serde.
    MagicCookieError(u64),
    /// A tag is wrong (e.g., for [`Option`]).
    InvalidTag(usize),
    /// The type hash is wrong. Probably the user is trying to deserialize a
    /// file with the wrong type.
    WrongTypeHash {
        got_type_name: String,
        expected_type_name: String,
        expected: u64,
        got: u64,
    },
    /// The type representation hash is wrong. Probabliy the user is trying to
    /// deserialize a file with some zero-copy type that has different
    /// in-memory representations on the serialization arch and on the current one,
    /// usually because of alignment issues.
    WrongTypeReprHash {
        got_type_name: String,
        expected_type_name: String,
        expected: u64,
        got: u64,
    },
}

impl std::error::Error for Error {}

impl core::fmt::Display for Error {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        match self {
            Self::ReadError => write!(f, "Read error during ε-serde deserialization"),
            Self::FileOpenError(error) => {
                write!(f, "Error opening file during ε-serde deserialization: {}", error)
            }
            Self::EndiannessError => write!(
                f,
                "The current arch is {}-endian but the data is {}-endian.",
                if cfg!(target_endian = "little") {
                    "little"
                } else {
                    "big"
                },
                if cfg!(target_endian = "little") {
                    "big"
                } else {
                    "little"
                }
            ),
            Self::MagicCookieError(magic) => write!(
                f,
                "Wrong magic cookie 0x{:016x}. The byte stream does not come from ε-serde.",
                magic,
            ),
            Self::MajorVersionMismatch(found_major) => write!(
                f,
                "Major version mismatch. Expected {} but got {}.",
                VERSION.0, found_major,
            ),
            Self::MinorVersionMismatch(found_minor) => write!(
                f,
                "Minor version mismatch. Expected {} but got {}.",
                VERSION.1, found_minor,
            ),
            Self::UsizeSizeMismatch(usize_size) => write!(
                f,
                "The file was serialized on an architecture where a usize has size {}, but on the current architecture it has size {}.",
                usize_size,
                core::mem::size_of::<usize>()
            ),
            Self::AlignmentError => write!(f, "Alignment error. Most likely you are deserializing from a memory region with insufficient alignment."),
            Self::InvalidTag(tag) => write!(f, "Invalid tag: 0x{:02x}", tag),
            Self::WrongTypeHash {
                got_type_name,
                expected_type_name,
                expected,
                got,
            } => {
                write!(
                    f,
                    concat!(
                        "Wrong type hash. Expected: 0x{:016x} Actual: 0x{:016x}.\n",
                        "You are trying to deserialize a file with the wrong type.\n",
                        "The serialized type is '{}' and the deserialized type is '{}'.",
                    ),
                    expected, got, expected_type_name, got_type_name,
                )
            },
            Self::WrongTypeReprHash {
                got_type_name,
                expected_type_name,
                expected,
                got,
            } => {
                write!(
                    f,
                    concat!(
                        "Wrong type repr hash. Expected: 0x{:016x} Actual: 0x{:016x}.\n",
                        "You might be trying to deserialize a file that was serialized on ",
                        "an architecture with different alignment requirements, or some ",
                        "of the fields of the type have changed their copy type (zero or deep).\n",
                        "The serialized type is '{}' and the deserialized type is '{}'.",
                    ),
                    expected, got, expected_type_name, got_type_name,
                )
            }
        }
    }
}