simple_sds_sbwt/
serialize.rs

1//! Simple serialization interface.
2//!
3//! The serialized representation closely mirrors the in-memory representation with 8-byte alignment.
4//! This makes it easy to develop memory-mapped versions of the structures.
5//!
6//! Note that loading serialized structures is fundamentally unsafe.
7//! Some [`Serialize::load`] implementations do simple sanity checks on the headers.
8//! However, it is not feasible to validate all loaded data in high-performance code.
9//! The behavior of corrupted data structures is always undefined.
10//!
11//! Function [`test()`] offers a convenient way of testing that the serialization interface works correctly for a custom type.
12//!
13//! # Serialization formats
14//!
15//! The serialization format of a structure, as implemented with trait [`Serialize`], is split into the header and the body.
16//! Both contain a concatenation of 0 or more structures, and at least one of them must be non-empty.
17//! The header and the body can be serialized separately with [`Serialize::serialize_header`] and [`Serialize::serialize_body`].
18//! Method [`Serialize::serialize`] provides an easy way of calling both.
19//! A serialized structure is always loaded with a single [`Serialize::load`] call.
20//!
21//! There are currently five basic serialization types:
22//!
23//! * [`Serializable`]: A fixed-size type that can be serialized as one or more [`u64`] elements.
24//!   The header is empty and the body contains the value.
25//! * [`Vec`] of a type that implements [`Serializable`].
26//!   The header stores the number of items in the vector as [`usize`].
27//!   The body stores the items.
28//! * [`Vec`] of [`u8`].
29//!   The header stores the number of items in the vector as [`usize`].
30//!   The body stores the items followed by a padding of `0` bytes to make the size of the body a multiple of 8 bytes.
31//! * [`String`] stored as a [`Vec`] of [`u8`] using the UTF-8 encoding.
32//! * [`Option`]`<T>` for a type `T` that implements [`Serialize`].
33//!   The header stores the number of [`u64`] elements in the body as [`usize`].
34//!   The body stores `T` for [`Some`]`(T)` and is empty for [`None`].
35//!
36//! See also: [https://github.com/jltsiren/simple-sds/blob/main/SERIALIZATION.md](https://github.com/jltsiren/simple-sds/blob/main/SERIALIZATION.md).
37//!
38//! # Memory-mapped structures
39//!
40//! [`MemoryMap`] implements a highly unsafe interface of memory mapping files as arrays of [`u64`] elements.
41//! The file can be opened for reading and writing ([`MappingMode::Mutable`]) or as read-only ([`MappingMode::ReadOnly`]).
42//! While the contents of the file can be changed, the file cannot be resized.
43//!
44//! A file may contain multiple nested or concatenated structures.
45//! Trait [`MemoryMapped`] represents a memory-mapped structure that borrows an interval of the memory map.
46//! There are four implementations of [`MemoryMapped`] for basic serialization types:
47//!
48//! * [`MappedSlice`] matches the serialization format of [`Vec`] of a [`Serializable`] type.
49//! * [`MappedBytes`] matches the serialization format of [`Vec`] of [`u8`].
50//! * [`MappedStr`] matches the serialization format of [`String`].
51//! * [`MappedOption`] matches the serialization format of [`Option`].
52
53use crate::bits;
54
55use std::fmt::Debug;
56use std::fs::OpenOptions;
57#[cfg(not(target_family = "wasm"))]
58use std::fs::File;
59use std::io::{Error, ErrorKind, Read, Write};
60#[cfg(not(target_family = "wasm"))]
61use std::ops::{Deref, Index};
62#[cfg(not(target_family = "wasm"))]
63use std::os::fd::AsRawFd;
64use std::path::{Path, PathBuf};
65use std::sync::atomic::{AtomicUsize, Ordering};
66use std::{env, fs, io, mem, process, slice, str};
67#[cfg(not(target_family = "wasm"))]
68use std::{marker, ptr};
69
70#[cfg(test)]
71mod tests;
72
73//-----------------------------------------------------------------------------
74
75/// Serialize a data structure.
76///
77/// `self.size_in_elements()` should always be nonzero.
78///
79/// A structure that implements `Serialize` may also have an associated function `size_by_params`.
80/// The function determines the size of a serialized structure with the given parameters in [`u64`] elements without building the structure.
81///
82/// # Examples
83///
84/// ```
85/// use simple_sds_sbwt::serialize::Serialize;
86/// use simple_sds_sbwt::serialize;
87/// use std::{fs, io, mem};
88///
89/// #[derive(PartialEq, Eq, Debug)]
90/// struct Example(i32, u32);
91///
92/// impl Serialize for Example {
93///     fn serialize_header<T: io::Write>(&self, _: &mut T) -> io::Result<()> {
94///         Ok(())
95///     }
96///
97///     fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
98///         let bytes: [u8; mem::size_of::<Self>()] = unsafe { mem::transmute_copy(self) };
99///         writer.write_all(&bytes)?;
100///         Ok(())
101///     }
102///
103///     fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
104///         let mut bytes = [0u8; mem::size_of::<Self>()];
105///         reader.read_exact(&mut bytes)?;
106///         let value: Example = unsafe { mem::transmute_copy(&bytes) };
107///         Ok(value)
108///     }
109///
110///     fn size_in_elements(&self) -> usize {
111///         1
112///     }
113/// }
114///
115/// let example = Example(-123, 456);
116/// assert_eq!(example.size_in_bytes(), 8);
117///
118/// let filename = serialize::temp_file_name("serialize");
119/// serialize::serialize_to(&example, &filename).unwrap();
120///
121/// let copy: Example = serialize::load_from(&filename).unwrap();
122/// assert_eq!(copy, example);
123///
124/// fs::remove_file(&filename).unwrap();
125/// ```
126pub trait Serialize: Sized {
127    /// Serializes the struct to the writer.
128    ///
129    /// Equivalent to calling [`Serialize::serialize_header`] and [`Serialize::serialize_body`].
130    ///
131    /// # Errors
132    ///
133    /// Any errors from the writer may be passed through.
134    fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
135        self.serialize_header(writer)?;
136        self.serialize_body(writer)?;
137        Ok(())
138    }
139
140    /// Serializes the header to the writer.
141    ///
142    /// # Errors
143    ///
144    /// Any errors from the writer may be passed through.
145    fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()>;
146
147    /// Serializes the body to the writer.
148    ///
149    /// # Errors
150    ///
151    /// Any errors from the writer may be passed through.
152    fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()>;
153
154    /// Loads the struct from the reader.
155    ///
156    /// # Errors
157    ///
158    /// Any errors from the reader may be passed through.
159    /// [`ErrorKind::InvalidData`] should be used to indicate that the data failed sanity checks.
160    fn load<T: Read>(reader: &mut T) -> io::Result<Self>;
161
162    /// Returns the size of the serialized struct in [`u64`] elements.
163    ///
164    /// This is usually closely related to the size of the in-memory struct.
165    fn size_in_elements(&self) -> usize;
166
167    /// Returns the size of the serialized struct in bytes.
168    ///
169    /// This is usually closely related to the size of the in-memory struct.
170    fn size_in_bytes(&self) -> usize {
171        bits::words_to_bytes(self.size_in_elements())
172    }
173}
174
175//-----------------------------------------------------------------------------
176
177/// A fixed-size type that can be serialized as one or more [`u64`] elements.
178pub trait Serializable: Sized + Default {
179    /// Returns the number of elements needed for serializing the type.
180    fn elements() -> usize {
181        mem::size_of::<Self>() / bits::WORD_BYTES
182    }
183}
184
185impl Serializable for u64 {}
186impl Serializable for usize {}
187impl Serializable for (u64, u64) {}
188
189impl<V: Serializable> Serialize for V {
190    fn serialize_header<T: Write>(&self, _: &mut T) -> io::Result<()> {
191        Ok(())
192    }
193
194    fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
195        unsafe {
196            let buf: &[u8] = slice::from_raw_parts(self as *const Self as *const u8, mem::size_of::<Self>());
197            writer.write_all(buf)?;
198        }
199        Ok(())
200    }
201
202    fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
203        let mut value = Self::default();
204        unsafe {
205            let buf: &mut [u8] = slice::from_raw_parts_mut(&mut value as *mut Self as *mut u8, mem::size_of::<Self>());
206            reader.read_exact(buf)?;
207        }
208        Ok(value)
209    }
210
211    fn size_in_elements(&self) -> usize {
212        Self::elements()
213    }
214}
215
216impl<V: Serializable> Serialize for Vec<V> {
217    fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()> {
218        let size = self.len();
219        size.serialize(writer)?;
220        Ok(())
221    }
222
223    fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
224        unsafe {
225            let buf: &[u8] = slice::from_raw_parts(self.as_ptr() as *const u8, self.len() * mem::size_of::<V>());
226            writer.write_all(buf)?;
227        }
228        Ok(())
229    }
230
231    fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
232        let size = usize::load(reader)?;
233        let mut value: Vec<V> = Vec::with_capacity(size);
234
235        unsafe {
236            let buf: &mut [u8] = slice::from_raw_parts_mut(value.as_mut_ptr() as *mut u8, size * mem::size_of::<V>());
237            reader.read_exact(buf)?;
238            value.set_len(size);
239        }
240
241        Ok(value)
242    }
243
244    fn size_in_elements(&self) -> usize {
245        1 + self.len() * V::elements()
246    }
247}
248
249impl Serialize for Vec<u8> {
250    fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()> {
251        let size = self.len();
252        size.serialize(writer)?;
253        Ok(())
254    }
255
256    fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
257        writer.write_all(self.as_slice())?;
258        let padded_len = bits::round_up_to_word_bytes(self.len());
259        if padded_len > self.len() {
260            let padding = [0u8; bits::WORD_BYTES];
261            writer.write_all(&padding[0..padded_len - self.len()])?;
262        }
263        Ok(())
264    }
265
266    fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
267        let size = usize::load(reader)?;
268        let mut value: Vec<u8> = vec![0; size];
269        reader.read_exact(value.as_mut_slice())?;
270
271        // Skip padding.
272        let padded_len = bits::round_up_to_word_bytes(value.len());
273        if padded_len > value.len() {
274            let mut padding = [0u8; bits::WORD_BYTES];
275            reader.read_exact(&mut padding[0..padded_len - value.len()])?;
276        }
277
278        Ok(value)
279    }
280
281    fn size_in_elements(&self) -> usize {
282        1 + bits::bytes_to_words(self.len())
283    }
284}
285
286impl Serialize for String {
287    fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()> {
288        let size = self.len();
289        size.serialize(writer)?;
290        Ok(())
291    }
292
293    fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
294        writer.write_all(self.as_bytes())?;
295        let padded_len = bits::round_up_to_word_bytes(self.len());
296        if padded_len > self.len() {
297            let padding = [0u8; bits::WORD_BYTES];
298            writer.write_all(&padding[0..padded_len - self.len()])?;
299        }
300        Ok(())
301    }
302
303    fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
304        let bytes = Vec::<u8>::load(reader)?;
305        String::from_utf8(bytes).map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid UTF-8"))
306    }
307
308    fn size_in_elements(&self) -> usize {
309        1 + bits::bytes_to_words(self.len())
310    }
311}
312
313impl<V: Serialize> Serialize for Option<V> {
314    fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()> {
315        let mut size: usize = 0;
316        if let Some(value) = self {
317            size = value.size_in_elements();
318        }
319        size.serialize(writer)?;
320        Ok(())
321    }
322
323    fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
324        if let Some(value) = self {
325            value.serialize(writer)?;
326        }
327        Ok(())
328    }
329
330    fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
331        let size = usize::load(reader)?;
332        if size == 0 {
333            Ok(None)
334        } else {
335            let value = V::load(reader)?;
336            // Here we could check that `value.size_in_elements() == size`. However, if
337            // the value contains inner optional structures that were present in the
338            // file but were skipped by `load`, the size of the in-memory structure is
339            // too small. And because we do not require `io::Seek` from `T`, we cannot
340            // check that we advanced by `size` elements in the reader.
341            Ok(Some(value))
342        }
343    }
344
345    fn size_in_elements(&self) -> usize {
346        let mut result: usize = 1;
347        if let Some(value) = self {
348            result += value.size_in_elements();
349        }
350        result
351    }
352}
353
354//-----------------------------------------------------------------------------
355
356/// Modes of memory mapping a file.
357#[cfg(not(target_family = "wasm"))]
358#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
359pub enum MappingMode {
360    /// The file is read-only.
361    ReadOnly,
362    /// Both read and write operations are supported.
363    Mutable,
364}
365
366/// A memory-mapped file as an array of [`u64`].
367///
368/// This interface is highly unsafe.
369/// The file remains open until the `MemoryMap` is dropped.
370/// Memory-mapped structures should implement the [`MemoryMapped`] trait.
371///
372/// # Examples
373///
374/// ```
375/// use simple_sds_sbwt::serialize::{MemoryMap, MappingMode, Serialize};
376/// use simple_sds_sbwt::serialize;
377/// use std::fs;
378///
379/// let v: Vec<u64> = vec![123, 456];
380/// let filename = serialize::temp_file_name("memory-map");
381/// serialize::serialize_to(&v, &filename);
382///
383/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
384/// assert_eq!(map.mode(), MappingMode::ReadOnly);
385/// assert_eq!(map.len(), 3);
386/// unsafe {
387///     let slice: &[u64] = map.as_ref();
388///     assert_eq!(slice[0], 2);
389///     assert_eq!(slice[1], 123);
390///     assert_eq!(slice[2], 456);
391/// }
392///
393/// drop(map);
394/// fs::remove_file(&filename).unwrap();
395/// ```
396#[cfg(not(target_family = "wasm"))]
397#[derive(Debug)]
398pub struct MemoryMap {
399    _file: File, // The compiler might otherwise get overzealous and complain that we don't touch the file.
400    filename: PathBuf,
401    mode: MappingMode,
402    ptr: *mut u64,
403    len: usize,
404}
405
406// TODO: implement madvise()?
407#[cfg(not(target_family = "wasm"))]
408impl MemoryMap {
409    /// Returns a memory map for the specified file in the given mode.
410    ///
411    /// # Arguments
412    ///
413    /// * `filename`: Name of the file.
414    /// * `mode`: Memory mapping mode.
415    ///
416    /// # Errors
417    ///
418    /// The call may fail for a number of reasons, including:
419    ///
420    /// * File `filename` does not exist.
421    /// * The file cannot be opened for writing with mode `MappingMode::Mutable`.
422    /// * The size of the file is not a multiple of 8 bytes.
423    /// * Memory mapping the file fails.
424    pub fn new<P: AsRef<Path>>(filename: P, mode: MappingMode) -> io::Result<MemoryMap> {
425        let write = match mode {
426            MappingMode::ReadOnly => false,
427            MappingMode::Mutable => true,
428        };
429        let mut options = OpenOptions::new();
430        let file = options.read(true).write(write).open(&filename)?;
431
432        let metadata = file.metadata()?;
433        let len = metadata.len() as usize;
434        if len != bits::round_up_to_word_bytes(len) {
435            return Err(Error::new(ErrorKind::Other, "File size must be a multiple of 8 bytes"));
436        }
437
438        let prot = match mode {
439            MappingMode::ReadOnly => libc::PROT_READ,
440            MappingMode::Mutable => libc::PROT_READ | libc::PROT_WRITE,
441        };
442        let ptr = unsafe { libc::mmap(ptr::null_mut(), len, prot, libc::MAP_SHARED, file.as_raw_fd(), 0) };
443        if ptr.is_null() {
444            return Err(Error::new(ErrorKind::Other, "Memory mapping failed"));
445        }
446
447        let mut buf = PathBuf::new();
448        buf.push(&filename);
449        Ok(MemoryMap {
450            _file: file,
451            filename: buf,
452            mode,
453            ptr: ptr.cast::<u64>(),
454            len: bits::bytes_to_words(len),
455        })
456    }
457
458    /// Returns the name of the memory mapped file.
459    pub fn filename(&self) -> &Path {
460        self.filename.as_path()
461    }
462
463    /// Returns the memory mapping mode for the file.
464    #[inline]
465    pub fn mode(&self) -> MappingMode {
466        self.mode
467    }
468
469    /// Returns a mutable slice corresponding to the file.
470    ///
471    /// # Safety
472    ///
473    /// Behavior is undefined if the file was opened with mode `MappingMode::ReadOnly`.
474    pub unsafe fn as_mut_slice(&mut self) -> &mut [u64] {
475        slice::from_raw_parts_mut(self.ptr, self.len)
476    }
477
478    /// Returns the length of the memory-mapped file.
479    #[inline]
480    pub fn len(&self) -> usize {
481        self.len
482    }
483
484    /// Returns `true` if the file is empty.
485    #[inline]
486    pub fn is_empty(&self) -> bool {
487        self.len == 0
488    }
489}
490
491#[cfg(not(target_family = "wasm"))]
492impl AsRef<[u64]> for MemoryMap {
493    fn as_ref(&self) -> &[u64] {
494        unsafe { slice::from_raw_parts(self.ptr, self.len) }
495    }
496}
497
498#[cfg(not(target_family = "wasm"))]
499impl Drop for MemoryMap {
500    fn drop(&mut self) {
501        unsafe {
502            let _ = libc::munmap(self.ptr.cast::<libc::c_void>(), self.len);
503        }
504    }
505}
506
507//-----------------------------------------------------------------------------
508
509/// A memory-mapped structure that borrows an interval of a memory map.
510///
511/// # Example
512///
513/// ```
514/// use simple_sds_sbwt::serialize::{MappingMode, MemoryMap, MemoryMapped, Serialize};
515/// use simple_sds_sbwt::serialize;
516/// use std::io::{Error, ErrorKind};
517/// use std::{fs, io, slice};
518///
519/// // This can read a serialized `Vec<u64>`.
520/// #[derive(Debug)]
521/// struct Example<'a> {
522///     data: &'a [u64],
523///     offset: usize,
524/// }
525///
526/// impl<'a> Example<'a> {
527///     pub fn as_slice(&self) -> &[u64] {
528///         self.data
529///     }
530/// }
531///
532/// impl<'a> MemoryMapped<'a> for Example<'a> {
533///     fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
534///         if offset >= map.len() {
535///             return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
536///         }
537///         let slice: &[u64] = map.as_ref();
538///         let len = slice[offset] as usize;
539///         if offset + 1 + len > map.len() {
540///             return Err(Error::new(ErrorKind::UnexpectedEof, "The file is too short"));
541///         }
542///         Ok(Example {
543///             data: &slice[offset + 1 .. offset + 1 + len],
544///             offset: offset,
545///         })
546///     }
547///
548///     fn map_offset(&self) -> usize {
549///         self.offset
550///     }
551///
552///     fn map_len(&self) -> usize {
553///         self.data.len() + 1
554///     }
555/// }
556///
557/// let v: Vec<u64> = vec![123, 456, 789];
558/// let filename = serialize::temp_file_name("memory-mapped");
559/// serialize::serialize_to(&v, &filename);
560///
561/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
562/// let mapped = Example::new(&map, 0).unwrap();
563/// assert_eq!(mapped.map_offset(), 0);
564/// assert_eq!(mapped.map_len(), v.len() + 1);
565/// assert_eq!(mapped.as_slice(), v.as_slice());
566/// drop(mapped); drop(map);
567///
568/// fs::remove_file(&filename).unwrap();
569/// ```
570#[cfg(not(target_family = "wasm"))]
571pub trait MemoryMapped<'a>: Sized {
572    /// Returns an immutable memory-mapped structure corresponding to an interval in the file.
573    ///
574    /// # Arguments
575    ///
576    /// * `map`: Memory-mapped file.
577    /// * `offset`: Starting offset in the file.
578    ///
579    /// # Errors
580    ///
581    /// Implementing types should use [`ErrorKind::UnexpectedEof`] where appropriate.
582    /// [`ErrorKind::InvalidData`] should be used to indicate that the data failed sanity checks.
583    fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self>;
584
585    /// Returns the starting offset in the file.
586    fn map_offset(&self) -> usize;
587
588    /// Returns the length of the interval corresponding to the structure.
589    fn map_len(&self) -> usize;
590}
591
592//-----------------------------------------------------------------------------
593
594/// An immutable memory-mapped slice of a type that implements [`Serializable`].
595///
596/// The slice is compatible with the serialization format of [`Vec`] of the same type.
597///
598/// # Examples
599///
600/// ```
601/// use simple_sds_sbwt::serialize::{MappedSlice, MappingMode, MemoryMap, MemoryMapped, Serialize};
602/// use simple_sds_sbwt::serialize;
603/// use std::fs;
604///
605/// let v: Vec<(u64, u64)> = vec![(123, 456), (789, 101112)];
606/// let filename = serialize::temp_file_name("mapped-slice");
607/// serialize::serialize_to(&v, &filename);
608///
609/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
610/// let mapped = MappedSlice::<(u64, u64)>::new(&map, 0).unwrap();
611/// assert_eq!(mapped.len(), v.len());
612/// assert_eq!(mapped[0], (123, 456));
613/// assert_eq!(mapped[1], (789, 101112));
614/// assert_eq!(*mapped, *v);
615/// drop(mapped); drop(map);
616///
617/// fs::remove_file(&filename).unwrap();
618/// ```
619#[cfg(not(target_family = "wasm"))]
620#[derive(PartialEq, Eq, Debug)]
621pub struct MappedSlice<'a, T: Serializable> {
622    data: &'a [T],
623    offset: usize,
624}
625
626#[cfg(not(target_family = "wasm"))]
627impl<'a, T: Serializable> MappedSlice<'a, T> {
628    /// Returns the length of the slice.
629    pub fn len(&self) -> usize {
630        self.data.len()
631    }
632
633    /// Returns `true` if the slice is empty.
634    pub fn is_empty(&self) -> bool {
635        self.data.is_empty()
636    }
637}
638
639#[cfg(not(target_family = "wasm"))]
640impl<'a, T: Serializable> AsRef<[T]> for MappedSlice<'a, T> {
641    fn as_ref(&self) -> &[T] {
642        self.data
643    }
644}
645
646#[cfg(not(target_family = "wasm"))]
647impl<'a, T: Serializable> Deref for MappedSlice<'a, T> {
648    type Target = [T];
649
650    fn deref(&self) -> &Self::Target {
651        self.data
652    }
653}
654
655#[cfg(not(target_family = "wasm"))]
656impl<'a, T: Serializable> Index<usize> for MappedSlice<'a, T> {
657    type Output = T;
658
659    fn index(&self, index: usize) -> &Self::Output {
660        &self.data[index]
661    }
662}
663
664#[cfg(not(target_family = "wasm"))]
665impl<'a, T: Serializable> MemoryMapped<'a> for MappedSlice<'a, T> {
666    fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
667        if offset >= map.len() {
668            return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
669        }
670        let slice: &[u64] = map.as_ref();
671        let len = slice[offset] as usize;
672        if offset + 1 + len * T::elements() > map.len() {
673            return Err(Error::new(ErrorKind::UnexpectedEof, "The file is too short"));
674        }
675        let source: &[u64] = &slice[offset + 1 ..];
676        let data: &[T] = unsafe { slice::from_raw_parts(source.as_ptr() as *const T, len) };
677        Ok(MappedSlice {
678            data, offset,
679        })
680    }
681
682    fn map_offset(&self) -> usize {
683        self.offset
684    }
685
686    fn map_len(&self) -> usize {
687        self.len() * T::elements() + 1
688    }
689}
690
691//-----------------------------------------------------------------------------
692
693/// An immutable memory-mapped slice of [`u8`].
694///
695/// The slice is compatible with the serialization format of [`Vec`] of [`u8`].
696///
697/// # Examples
698///
699/// ```
700/// use simple_sds_sbwt::serialize::{MappedBytes, MappingMode, MemoryMap, MemoryMapped, Serialize};
701/// use simple_sds_sbwt::serialize;
702/// use std::fs;
703///
704/// let v: Vec<u8> = vec![1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233];
705/// let filename = serialize::temp_file_name("mapped-bytes");
706/// serialize::serialize_to(&v, &filename);
707///
708/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
709/// let mapped = MappedBytes::new(&map, 0).unwrap();
710/// assert_eq!(mapped.len(), v.len());
711/// assert_eq!(mapped[3], 3);
712/// assert_eq!(mapped[6], 13);
713/// assert_eq!(*mapped, *v);
714/// drop(mapped); drop(map);
715///
716/// fs::remove_file(&filename).unwrap();
717/// ```
718#[cfg(not(target_family = "wasm"))]
719#[derive(PartialEq, Eq, Debug)]
720pub struct MappedBytes<'a> {
721    data: &'a [u8],
722    offset: usize,
723}
724
725#[cfg(not(target_family = "wasm"))]
726impl<'a> MappedBytes<'a> {
727    /// Returns the length of the slice.
728    pub fn len(&self) -> usize {
729        self.data.len()
730    }
731
732    /// Returns `true` if the slice is empty.
733    pub fn is_empty(&self) -> bool {
734        self.data.is_empty()
735    }
736}
737
738#[cfg(not(target_family = "wasm"))]
739impl<'a> AsRef<[u8]> for MappedBytes<'a> {
740    fn as_ref(&self) -> &[u8] {
741        self.data
742    }
743}
744
745#[cfg(not(target_family = "wasm"))]
746impl<'a> Deref for MappedBytes<'a> {
747    type Target = [u8];
748
749    fn deref(&self) -> &Self::Target {
750        self.data
751    }
752}
753
754#[cfg(not(target_family = "wasm"))]
755impl<'a> Index<usize> for MappedBytes<'a> {
756    type Output = u8;
757
758    fn index(&self, index: usize) -> &Self::Output {
759        &self.data[index]
760    }
761}
762
763#[cfg(not(target_family = "wasm"))]
764impl<'a> MemoryMapped<'a> for MappedBytes<'a> {
765    fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
766        if offset >= map.len() {
767            return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
768        }
769        let slice: &[u64] = map.as_ref();
770        let len = slice[offset] as usize;
771        if offset + 1 + bits::bytes_to_words(len) > map.len() {
772            return Err(Error::new(ErrorKind::UnexpectedEof, "The file is too short"));
773        }
774        let source: &[u64] = &slice[offset + 1 ..];
775        let data: &[u8] = unsafe { slice::from_raw_parts(source.as_ptr() as *const u8, len) };
776        Ok(MappedBytes {
777            data, offset,
778        })
779    }
780
781    fn map_offset(&self) -> usize {
782        self.offset
783    }
784
785    fn map_len(&self) -> usize {
786        bits::bytes_to_words(self.len()) + 1
787    }
788}
789
790//-----------------------------------------------------------------------------
791
792/// An immutable memory-mapped string slice.
793///
794/// The slice is compatible with the serialization format of [`String`].
795///
796/// # Examples
797///
798/// ```
799/// use simple_sds_sbwt::serialize::{MappedStr, MappingMode, MemoryMap, MemoryMapped, Serialize};
800/// use simple_sds_sbwt::serialize;
801/// use std::fs;
802///
803/// let s = String::from("GATTACA");
804/// let filename = serialize::temp_file_name("mapped-str");
805/// serialize::serialize_to(&s, &filename);
806///
807/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
808/// let mapped = MappedStr::new(&map, 0).unwrap();
809/// assert_eq!(mapped.len(), s.len());
810/// assert_eq!(*mapped, *s);
811/// drop(mapped); drop(map);
812///
813/// fs::remove_file(&filename).unwrap();
814/// ```
815#[cfg(not(target_family = "wasm"))]
816#[derive(PartialEq, Eq, Debug)]
817pub struct MappedStr<'a> {
818    data: &'a str,
819    offset: usize,
820}
821
822#[cfg(not(target_family = "wasm"))]
823impl<'a> MappedStr<'a> {
824    /// Returns the length of the slice in bytes.
825    pub fn len(&self) -> usize {
826        self.data.len()
827    }
828
829    /// Returns `true` if the slice is empty.
830    pub fn is_empty(&self) -> bool {
831        self.data.is_empty()
832    }
833}
834
835#[cfg(not(target_family = "wasm"))]
836impl<'a> AsRef<str> for MappedStr<'a> {
837    fn as_ref(&self) -> &str {
838        self.data
839    }
840}
841
842#[cfg(not(target_family = "wasm"))]
843impl<'a> Deref for MappedStr<'a> {
844    type Target = str;
845
846    fn deref(&self) -> &Self::Target {
847        self.data
848    }
849}
850#[cfg(not(target_family = "wasm"))]
851impl<'a> MemoryMapped<'a> for MappedStr<'a> {
852    fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
853        if offset >= map.len() {
854            return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
855        }
856        let slice: &[u64] = map.as_ref();
857        let len = slice[offset] as usize;
858        if offset + 1 + bits::bytes_to_words(len) > map.len() {
859            return Err(Error::new(ErrorKind::UnexpectedEof, "The file is too short"));
860        }
861        let source: &[u64] = &slice[offset + 1 ..];
862        let bytes: &[u8] = unsafe { slice::from_raw_parts(source.as_ptr() as *const u8, len) };
863        let data = str::from_utf8(bytes).map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid UTF-8"))?;
864        Ok(MappedStr {
865            data, offset,
866        })
867    }
868
869    fn map_offset(&self) -> usize {
870        self.offset
871    }
872
873    fn map_len(&self) -> usize {
874        bits::bytes_to_words(self.len()) + 1
875    }
876}
877
878//-----------------------------------------------------------------------------
879
880/// An optional immutable memory-mapped structure.
881///
882/// This is compatible with the serialization format of [`Option`] of the same type.
883///
884/// # Examples
885///
886/// ```
887/// use simple_sds_sbwt::serialize::{MappedOption, MappedSlice, MappingMode, MemoryMap, MemoryMapped, Serialize};
888/// use simple_sds_sbwt::serialize;
889/// use std::fs;
890///
891/// let some: Option<Vec<u64>> = Some(vec![123, 456, 789]);
892/// let filename = serialize::temp_file_name("mapped-option");
893/// serialize::serialize_to(&some, &filename);
894///
895/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
896/// let mapped = MappedOption::<MappedSlice<u64>>::new(&map, 0).unwrap();
897/// assert_eq!(mapped.unwrap().as_ref(), some.unwrap().as_slice());
898/// drop(mapped); drop(map);
899///
900/// fs::remove_file(&filename).unwrap();
901/// ```
902#[cfg(not(target_family = "wasm"))]
903#[derive(PartialEq, Eq, Debug)]
904pub struct MappedOption<'a, T: MemoryMapped<'a>> {
905    data: Option<T>,
906    offset: usize,
907    data_len: usize,
908    _marker: marker::PhantomData<&'a ()>,
909}
910
911#[cfg(not(target_family = "wasm"))]
912impl<'a, T: MemoryMapped<'a>> MappedOption<'a, T> {
913    /// Returns `true` if the option is a [`Some`] value.
914    pub fn is_some(&self) -> bool {
915        self.data.is_some()
916    }
917
918    /// Returns `true` if the option is a [`None`] value.
919    pub fn is_none(&self) -> bool {
920        self.data.is_none()
921    }
922
923    /// Returns an immutable reference to the possibly contained value.
924    ///
925    /// # Panics
926    ///
927    /// Panics if the option is a [`None`] value.
928    pub fn unwrap(&self) -> &T {
929        match &self.data {
930            Some(value) => value,
931            None => panic!("MappedOption::unwrap(): No value to unwrap"),
932        }
933    }
934
935    /// Returns [`Option`]`<&T>` referencing the possibly contained value.
936    pub fn as_ref(&self) -> Option<&T> {
937        match &self.data {
938            Some(value) => Some(value),
939            None => None,
940        }
941    }
942}
943
944#[cfg(not(target_family = "wasm"))]
945impl<'a, T: MemoryMapped<'a>> MemoryMapped<'a> for MappedOption<'a, T> {
946    fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
947        if offset >= map.len() {
948            return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
949        }
950        let mut result = MappedOption {
951            data: None,
952            offset,
953            data_len: map.as_ref()[offset] as usize,
954            _marker: marker::PhantomData,
955        };
956        if result.data_len > 0 {
957            let value = T::new(map, offset + 1)?;
958            result.data = Some(value)
959        }
960        Ok(result)
961    }
962
963    fn map_offset(&self) -> usize {
964        self.offset
965    }
966
967    fn map_len(&self) -> usize {
968        self.data_len + 1
969    }
970}
971
972//-----------------------------------------------------------------------------
973
974/// Serializes the item to the specified file, creating or overwriting the file if necessary.
975///
976/// See [`Serialize`] for an example.
977///
978/// # Errors
979///
980/// Any errors from [`OpenOptions::open`] and [`Serialize::serialize`] will be passed through.
981pub fn serialize_to<T: Serialize, P: AsRef<Path>>(item: &T, filename: P) -> io::Result<()> {
982    let mut options = OpenOptions::new();
983    let mut file = options.create(true).write(true).truncate(true).open(filename)?;
984    item.serialize(&mut file)?;
985    Ok(())
986}
987
988/// Loads the item from the specified file.
989///
990/// See [`Serialize`] for an example.
991///
992/// # Errors
993///
994/// Any errors from [`OpenOptions::open`] and [`Serialize::load`] will be passed through.
995pub fn load_from<T: Serialize, P: AsRef<Path>>(filename: P) -> io::Result<T> {
996    let mut options = OpenOptions::new();
997    let mut file = options.read(true).open(filename)?;
998    <T as Serialize>::load(&mut file)
999}
1000
1001/// Serializes an absent optional structure of any type.
1002///
1003/// # Errors
1004///
1005/// Any errors from the writer will be passed through.
1006pub fn absent_option<T: Write>(writer: &mut T) -> io::Result<()> {
1007    let size: usize = 0;
1008    size.serialize(writer)?;
1009    Ok(())
1010}
1011
1012/// Skips a serialized optional structure.
1013///
1014/// # Errors
1015///
1016/// Any errors from the reader will be passed through.
1017pub fn skip_option<T: Read>(reader: &mut T) -> io::Result<()> {
1018    let elements = usize::load(reader)?;
1019    if elements > 0 {
1020        io::copy(&mut reader.by_ref().take((elements * bits::WORD_BYTES) as u64), &mut io::sink())?;
1021    }
1022    Ok(())
1023}
1024
1025/// Returns the size of an absent optional structure (of any type) in elements.
1026pub fn absent_option_size() -> usize {
1027    1
1028}
1029
1030// Counter used for temporary file names.
1031static TEMP_FILE_COUNTER: AtomicUsize = AtomicUsize::new(0);
1032
1033/// Returns a name for a temporary file using the provided name part.
1034///
1035/// # Examples
1036///
1037/// ```
1038/// use simple_sds_sbwt::serialize;
1039///
1040/// let filename = serialize::temp_file_name("example");
1041/// assert!(filename.into_os_string().into_string().unwrap().contains("example"));
1042/// ```
1043pub fn temp_file_name(name_part: &str) -> PathBuf {
1044    let count = TEMP_FILE_COUNTER.fetch_add(1, Ordering::SeqCst);
1045    let mut buf = env::temp_dir();
1046    buf.push(format!("{}_{}_{}", name_part, process::id(), count));
1047    buf
1048}
1049
1050/// Tests that the [`Serialize`] implementation works correctly.
1051///
1052/// Returns the name of the temporary file if `remove == false` and removes the file if `remove == true`.
1053/// The type must also implement [`PartialEq`] and [`Debug`] for the tests.
1054///
1055/// # Arguments
1056///
1057/// * `original`: Structure to be serialized.
1058/// * `name`: Name of the structure (for temporary file names and error messages).
1059/// * `expected_size`: Expected size in elements, or [`None`] if not known.
1060/// * `remove`: Should the temporary file be removed instead of returning its name.
1061///
1062/// # Examples
1063///
1064/// ```
1065/// use simple_sds_sbwt::serialize;;
1066///
1067/// let v: Vec<u64> = vec![1, 11, 111, 1111];
1068/// let _ = serialize::test(&v, "vec-u64", Some(1 + v.len()), true);
1069/// ```
1070///
1071/// # Panics
1072///
1073/// Will panic if any of the tests fails.
1074pub fn test<T: Serialize + PartialEq + Debug>(original: &T, name: &str, expected_size: Option<usize>, remove: bool) -> Option<PathBuf> {
1075    if let Some(value) = expected_size {
1076        assert_eq!(original.size_in_elements(), value, "Size estimate for the serialized {} is not as expected", name);
1077    }
1078
1079    let filename = temp_file_name(name);
1080    serialize_to(original, &filename).unwrap();
1081
1082    let metadata = fs::metadata(&filename).unwrap();
1083    let len = metadata.len() as usize;
1084    assert_eq!(original.size_in_bytes(), len, "Invalid size estimate for the serialized {}", name);
1085
1086    let copy: T = load_from(&filename).unwrap();
1087    assert_eq!(copy, *original, "Serialization changed the {}", name);
1088
1089    if remove {
1090        fs::remove_file(&filename).unwrap();
1091        None
1092    } else {
1093        Some(filename)
1094    }
1095}
1096//-----------------------------------------------------------------------------