simple_sds_sbwt/serialize.rs
1//! Simple serialization interface.
2//!
3//! The serialized representation closely mirrors the in-memory representation with 8-byte alignment.
4//! This makes it easy to develop memory-mapped versions of the structures.
5//!
6//! Note that loading serialized structures is fundamentally unsafe.
7//! Some [`Serialize::load`] implementations do simple sanity checks on the headers.
8//! However, it is not feasible to validate all loaded data in high-performance code.
9//! The behavior of corrupted data structures is always undefined.
10//!
11//! Function [`test()`] offers a convenient way of testing that the serialization interface works correctly for a custom type.
12//!
13//! # Serialization formats
14//!
15//! The serialization format of a structure, as implemented with trait [`Serialize`], is split into the header and the body.
16//! Both contain a concatenation of 0 or more structures, and at least one of them must be non-empty.
17//! The header and the body can be serialized separately with [`Serialize::serialize_header`] and [`Serialize::serialize_body`].
18//! Method [`Serialize::serialize`] provides an easy way of calling both.
19//! A serialized structure is always loaded with a single [`Serialize::load`] call.
20//!
21//! There are currently five basic serialization types:
22//!
23//! * [`Serializable`]: A fixed-size type that can be serialized as one or more [`u64`] elements.
24//! The header is empty and the body contains the value.
25//! * [`Vec`] of a type that implements [`Serializable`].
26//! The header stores the number of items in the vector as [`usize`].
27//! The body stores the items.
28//! * [`Vec`] of [`u8`].
29//! The header stores the number of items in the vector as [`usize`].
30//! The body stores the items followed by a padding of `0` bytes to make the size of the body a multiple of 8 bytes.
31//! * [`String`] stored as a [`Vec`] of [`u8`] using the UTF-8 encoding.
32//! * [`Option`]`<T>` for a type `T` that implements [`Serialize`].
33//! The header stores the number of [`u64`] elements in the body as [`usize`].
34//! The body stores `T` for [`Some`]`(T)` and is empty for [`None`].
35//!
36//! See also: [https://github.com/jltsiren/simple-sds/blob/main/SERIALIZATION.md](https://github.com/jltsiren/simple-sds/blob/main/SERIALIZATION.md).
37//!
38//! # Memory-mapped structures
39//!
40//! [`MemoryMap`] implements a highly unsafe interface of memory mapping files as arrays of [`u64`] elements.
41//! The file can be opened for reading and writing ([`MappingMode::Mutable`]) or as read-only ([`MappingMode::ReadOnly`]).
42//! While the contents of the file can be changed, the file cannot be resized.
43//!
44//! A file may contain multiple nested or concatenated structures.
45//! Trait [`MemoryMapped`] represents a memory-mapped structure that borrows an interval of the memory map.
46//! There are four implementations of [`MemoryMapped`] for basic serialization types:
47//!
48//! * [`MappedSlice`] matches the serialization format of [`Vec`] of a [`Serializable`] type.
49//! * [`MappedBytes`] matches the serialization format of [`Vec`] of [`u8`].
50//! * [`MappedStr`] matches the serialization format of [`String`].
51//! * [`MappedOption`] matches the serialization format of [`Option`].
52
53use crate::bits;
54
55use std::fmt::Debug;
56use std::fs::OpenOptions;
57#[cfg(not(target_family = "wasm"))]
58use std::fs::File;
59use std::io::{Error, ErrorKind, Read, Write};
60#[cfg(not(target_family = "wasm"))]
61use std::ops::{Deref, Index};
62#[cfg(not(target_family = "wasm"))]
63use std::os::fd::AsRawFd;
64use std::path::{Path, PathBuf};
65use std::sync::atomic::{AtomicUsize, Ordering};
66use std::{env, fs, io, mem, process, slice, str};
67#[cfg(not(target_family = "wasm"))]
68use std::{marker, ptr};
69
70#[cfg(test)]
71mod tests;
72
73//-----------------------------------------------------------------------------
74
75/// Serialize a data structure.
76///
77/// `self.size_in_elements()` should always be nonzero.
78///
79/// A structure that implements `Serialize` may also have an associated function `size_by_params`.
80/// The function determines the size of a serialized structure with the given parameters in [`u64`] elements without building the structure.
81///
82/// # Examples
83///
84/// ```
85/// use simple_sds_sbwt::serialize::Serialize;
86/// use simple_sds_sbwt::serialize;
87/// use std::{fs, io, mem};
88///
89/// #[derive(PartialEq, Eq, Debug)]
90/// struct Example(i32, u32);
91///
92/// impl Serialize for Example {
93/// fn serialize_header<T: io::Write>(&self, _: &mut T) -> io::Result<()> {
94/// Ok(())
95/// }
96///
97/// fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
98/// let bytes: [u8; mem::size_of::<Self>()] = unsafe { mem::transmute_copy(self) };
99/// writer.write_all(&bytes)?;
100/// Ok(())
101/// }
102///
103/// fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
104/// let mut bytes = [0u8; mem::size_of::<Self>()];
105/// reader.read_exact(&mut bytes)?;
106/// let value: Example = unsafe { mem::transmute_copy(&bytes) };
107/// Ok(value)
108/// }
109///
110/// fn size_in_elements(&self) -> usize {
111/// 1
112/// }
113/// }
114///
115/// let example = Example(-123, 456);
116/// assert_eq!(example.size_in_bytes(), 8);
117///
118/// let filename = serialize::temp_file_name("serialize");
119/// serialize::serialize_to(&example, &filename).unwrap();
120///
121/// let copy: Example = serialize::load_from(&filename).unwrap();
122/// assert_eq!(copy, example);
123///
124/// fs::remove_file(&filename).unwrap();
125/// ```
126pub trait Serialize: Sized {
127 /// Serializes the struct to the writer.
128 ///
129 /// Equivalent to calling [`Serialize::serialize_header`] and [`Serialize::serialize_body`].
130 ///
131 /// # Errors
132 ///
133 /// Any errors from the writer may be passed through.
134 fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
135 self.serialize_header(writer)?;
136 self.serialize_body(writer)?;
137 Ok(())
138 }
139
140 /// Serializes the header to the writer.
141 ///
142 /// # Errors
143 ///
144 /// Any errors from the writer may be passed through.
145 fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()>;
146
147 /// Serializes the body to the writer.
148 ///
149 /// # Errors
150 ///
151 /// Any errors from the writer may be passed through.
152 fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()>;
153
154 /// Loads the struct from the reader.
155 ///
156 /// # Errors
157 ///
158 /// Any errors from the reader may be passed through.
159 /// [`ErrorKind::InvalidData`] should be used to indicate that the data failed sanity checks.
160 fn load<T: Read>(reader: &mut T) -> io::Result<Self>;
161
162 /// Returns the size of the serialized struct in [`u64`] elements.
163 ///
164 /// This is usually closely related to the size of the in-memory struct.
165 fn size_in_elements(&self) -> usize;
166
167 /// Returns the size of the serialized struct in bytes.
168 ///
169 /// This is usually closely related to the size of the in-memory struct.
170 fn size_in_bytes(&self) -> usize {
171 bits::words_to_bytes(self.size_in_elements())
172 }
173}
174
175//-----------------------------------------------------------------------------
176
177/// A fixed-size type that can be serialized as one or more [`u64`] elements.
178pub trait Serializable: Sized + Default {
179 /// Returns the number of elements needed for serializing the type.
180 fn elements() -> usize {
181 mem::size_of::<Self>() / bits::WORD_BYTES
182 }
183}
184
185impl Serializable for u64 {}
186impl Serializable for usize {}
187impl Serializable for (u64, u64) {}
188
189impl<V: Serializable> Serialize for V {
190 fn serialize_header<T: Write>(&self, _: &mut T) -> io::Result<()> {
191 Ok(())
192 }
193
194 fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
195 unsafe {
196 let buf: &[u8] = slice::from_raw_parts(self as *const Self as *const u8, mem::size_of::<Self>());
197 writer.write_all(buf)?;
198 }
199 Ok(())
200 }
201
202 fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
203 let mut value = Self::default();
204 unsafe {
205 let buf: &mut [u8] = slice::from_raw_parts_mut(&mut value as *mut Self as *mut u8, mem::size_of::<Self>());
206 reader.read_exact(buf)?;
207 }
208 Ok(value)
209 }
210
211 fn size_in_elements(&self) -> usize {
212 Self::elements()
213 }
214}
215
216impl<V: Serializable> Serialize for Vec<V> {
217 fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()> {
218 let size = self.len();
219 size.serialize(writer)?;
220 Ok(())
221 }
222
223 fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
224 unsafe {
225 let buf: &[u8] = slice::from_raw_parts(self.as_ptr() as *const u8, self.len() * mem::size_of::<V>());
226 writer.write_all(buf)?;
227 }
228 Ok(())
229 }
230
231 fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
232 let size = usize::load(reader)?;
233 let mut value: Vec<V> = Vec::with_capacity(size);
234
235 unsafe {
236 let buf: &mut [u8] = slice::from_raw_parts_mut(value.as_mut_ptr() as *mut u8, size * mem::size_of::<V>());
237 reader.read_exact(buf)?;
238 value.set_len(size);
239 }
240
241 Ok(value)
242 }
243
244 fn size_in_elements(&self) -> usize {
245 1 + self.len() * V::elements()
246 }
247}
248
249impl Serialize for Vec<u8> {
250 fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()> {
251 let size = self.len();
252 size.serialize(writer)?;
253 Ok(())
254 }
255
256 fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
257 writer.write_all(self.as_slice())?;
258 let padded_len = bits::round_up_to_word_bytes(self.len());
259 if padded_len > self.len() {
260 let padding = [0u8; bits::WORD_BYTES];
261 writer.write_all(&padding[0..padded_len - self.len()])?;
262 }
263 Ok(())
264 }
265
266 fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
267 let size = usize::load(reader)?;
268 let mut value: Vec<u8> = vec![0; size];
269 reader.read_exact(value.as_mut_slice())?;
270
271 // Skip padding.
272 let padded_len = bits::round_up_to_word_bytes(value.len());
273 if padded_len > value.len() {
274 let mut padding = [0u8; bits::WORD_BYTES];
275 reader.read_exact(&mut padding[0..padded_len - value.len()])?;
276 }
277
278 Ok(value)
279 }
280
281 fn size_in_elements(&self) -> usize {
282 1 + bits::bytes_to_words(self.len())
283 }
284}
285
286impl Serialize for String {
287 fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()> {
288 let size = self.len();
289 size.serialize(writer)?;
290 Ok(())
291 }
292
293 fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
294 writer.write_all(self.as_bytes())?;
295 let padded_len = bits::round_up_to_word_bytes(self.len());
296 if padded_len > self.len() {
297 let padding = [0u8; bits::WORD_BYTES];
298 writer.write_all(&padding[0..padded_len - self.len()])?;
299 }
300 Ok(())
301 }
302
303 fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
304 let bytes = Vec::<u8>::load(reader)?;
305 String::from_utf8(bytes).map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid UTF-8"))
306 }
307
308 fn size_in_elements(&self) -> usize {
309 1 + bits::bytes_to_words(self.len())
310 }
311}
312
313impl<V: Serialize> Serialize for Option<V> {
314 fn serialize_header<T: Write>(&self, writer: &mut T) -> io::Result<()> {
315 let mut size: usize = 0;
316 if let Some(value) = self {
317 size = value.size_in_elements();
318 }
319 size.serialize(writer)?;
320 Ok(())
321 }
322
323 fn serialize_body<T: Write>(&self, writer: &mut T) -> io::Result<()> {
324 if let Some(value) = self {
325 value.serialize(writer)?;
326 }
327 Ok(())
328 }
329
330 fn load<T: Read>(reader: &mut T) -> io::Result<Self> {
331 let size = usize::load(reader)?;
332 if size == 0 {
333 Ok(None)
334 } else {
335 let value = V::load(reader)?;
336 // Here we could check that `value.size_in_elements() == size`. However, if
337 // the value contains inner optional structures that were present in the
338 // file but were skipped by `load`, the size of the in-memory structure is
339 // too small. And because we do not require `io::Seek` from `T`, we cannot
340 // check that we advanced by `size` elements in the reader.
341 Ok(Some(value))
342 }
343 }
344
345 fn size_in_elements(&self) -> usize {
346 let mut result: usize = 1;
347 if let Some(value) = self {
348 result += value.size_in_elements();
349 }
350 result
351 }
352}
353
354//-----------------------------------------------------------------------------
355
356/// Modes of memory mapping a file.
357#[cfg(not(target_family = "wasm"))]
358#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
359pub enum MappingMode {
360 /// The file is read-only.
361 ReadOnly,
362 /// Both read and write operations are supported.
363 Mutable,
364}
365
366/// A memory-mapped file as an array of [`u64`].
367///
368/// This interface is highly unsafe.
369/// The file remains open until the `MemoryMap` is dropped.
370/// Memory-mapped structures should implement the [`MemoryMapped`] trait.
371///
372/// # Examples
373///
374/// ```
375/// use simple_sds_sbwt::serialize::{MemoryMap, MappingMode, Serialize};
376/// use simple_sds_sbwt::serialize;
377/// use std::fs;
378///
379/// let v: Vec<u64> = vec![123, 456];
380/// let filename = serialize::temp_file_name("memory-map");
381/// serialize::serialize_to(&v, &filename);
382///
383/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
384/// assert_eq!(map.mode(), MappingMode::ReadOnly);
385/// assert_eq!(map.len(), 3);
386/// unsafe {
387/// let slice: &[u64] = map.as_ref();
388/// assert_eq!(slice[0], 2);
389/// assert_eq!(slice[1], 123);
390/// assert_eq!(slice[2], 456);
391/// }
392///
393/// drop(map);
394/// fs::remove_file(&filename).unwrap();
395/// ```
396#[cfg(not(target_family = "wasm"))]
397#[derive(Debug)]
398pub struct MemoryMap {
399 _file: File, // The compiler might otherwise get overzealous and complain that we don't touch the file.
400 filename: PathBuf,
401 mode: MappingMode,
402 ptr: *mut u64,
403 len: usize,
404}
405
406// TODO: implement madvise()?
407#[cfg(not(target_family = "wasm"))]
408impl MemoryMap {
409 /// Returns a memory map for the specified file in the given mode.
410 ///
411 /// # Arguments
412 ///
413 /// * `filename`: Name of the file.
414 /// * `mode`: Memory mapping mode.
415 ///
416 /// # Errors
417 ///
418 /// The call may fail for a number of reasons, including:
419 ///
420 /// * File `filename` does not exist.
421 /// * The file cannot be opened for writing with mode `MappingMode::Mutable`.
422 /// * The size of the file is not a multiple of 8 bytes.
423 /// * Memory mapping the file fails.
424 pub fn new<P: AsRef<Path>>(filename: P, mode: MappingMode) -> io::Result<MemoryMap> {
425 let write = match mode {
426 MappingMode::ReadOnly => false,
427 MappingMode::Mutable => true,
428 };
429 let mut options = OpenOptions::new();
430 let file = options.read(true).write(write).open(&filename)?;
431
432 let metadata = file.metadata()?;
433 let len = metadata.len() as usize;
434 if len != bits::round_up_to_word_bytes(len) {
435 return Err(Error::new(ErrorKind::Other, "File size must be a multiple of 8 bytes"));
436 }
437
438 let prot = match mode {
439 MappingMode::ReadOnly => libc::PROT_READ,
440 MappingMode::Mutable => libc::PROT_READ | libc::PROT_WRITE,
441 };
442 let ptr = unsafe { libc::mmap(ptr::null_mut(), len, prot, libc::MAP_SHARED, file.as_raw_fd(), 0) };
443 if ptr.is_null() {
444 return Err(Error::new(ErrorKind::Other, "Memory mapping failed"));
445 }
446
447 let mut buf = PathBuf::new();
448 buf.push(&filename);
449 Ok(MemoryMap {
450 _file: file,
451 filename: buf,
452 mode,
453 ptr: ptr.cast::<u64>(),
454 len: bits::bytes_to_words(len),
455 })
456 }
457
458 /// Returns the name of the memory mapped file.
459 pub fn filename(&self) -> &Path {
460 self.filename.as_path()
461 }
462
463 /// Returns the memory mapping mode for the file.
464 #[inline]
465 pub fn mode(&self) -> MappingMode {
466 self.mode
467 }
468
469 /// Returns a mutable slice corresponding to the file.
470 ///
471 /// # Safety
472 ///
473 /// Behavior is undefined if the file was opened with mode `MappingMode::ReadOnly`.
474 pub unsafe fn as_mut_slice(&mut self) -> &mut [u64] {
475 slice::from_raw_parts_mut(self.ptr, self.len)
476 }
477
478 /// Returns the length of the memory-mapped file.
479 #[inline]
480 pub fn len(&self) -> usize {
481 self.len
482 }
483
484 /// Returns `true` if the file is empty.
485 #[inline]
486 pub fn is_empty(&self) -> bool {
487 self.len == 0
488 }
489}
490
491#[cfg(not(target_family = "wasm"))]
492impl AsRef<[u64]> for MemoryMap {
493 fn as_ref(&self) -> &[u64] {
494 unsafe { slice::from_raw_parts(self.ptr, self.len) }
495 }
496}
497
498#[cfg(not(target_family = "wasm"))]
499impl Drop for MemoryMap {
500 fn drop(&mut self) {
501 unsafe {
502 let _ = libc::munmap(self.ptr.cast::<libc::c_void>(), self.len);
503 }
504 }
505}
506
507//-----------------------------------------------------------------------------
508
509/// A memory-mapped structure that borrows an interval of a memory map.
510///
511/// # Example
512///
513/// ```
514/// use simple_sds_sbwt::serialize::{MappingMode, MemoryMap, MemoryMapped, Serialize};
515/// use simple_sds_sbwt::serialize;
516/// use std::io::{Error, ErrorKind};
517/// use std::{fs, io, slice};
518///
519/// // This can read a serialized `Vec<u64>`.
520/// #[derive(Debug)]
521/// struct Example<'a> {
522/// data: &'a [u64],
523/// offset: usize,
524/// }
525///
526/// impl<'a> Example<'a> {
527/// pub fn as_slice(&self) -> &[u64] {
528/// self.data
529/// }
530/// }
531///
532/// impl<'a> MemoryMapped<'a> for Example<'a> {
533/// fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
534/// if offset >= map.len() {
535/// return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
536/// }
537/// let slice: &[u64] = map.as_ref();
538/// let len = slice[offset] as usize;
539/// if offset + 1 + len > map.len() {
540/// return Err(Error::new(ErrorKind::UnexpectedEof, "The file is too short"));
541/// }
542/// Ok(Example {
543/// data: &slice[offset + 1 .. offset + 1 + len],
544/// offset: offset,
545/// })
546/// }
547///
548/// fn map_offset(&self) -> usize {
549/// self.offset
550/// }
551///
552/// fn map_len(&self) -> usize {
553/// self.data.len() + 1
554/// }
555/// }
556///
557/// let v: Vec<u64> = vec![123, 456, 789];
558/// let filename = serialize::temp_file_name("memory-mapped");
559/// serialize::serialize_to(&v, &filename);
560///
561/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
562/// let mapped = Example::new(&map, 0).unwrap();
563/// assert_eq!(mapped.map_offset(), 0);
564/// assert_eq!(mapped.map_len(), v.len() + 1);
565/// assert_eq!(mapped.as_slice(), v.as_slice());
566/// drop(mapped); drop(map);
567///
568/// fs::remove_file(&filename).unwrap();
569/// ```
570#[cfg(not(target_family = "wasm"))]
571pub trait MemoryMapped<'a>: Sized {
572 /// Returns an immutable memory-mapped structure corresponding to an interval in the file.
573 ///
574 /// # Arguments
575 ///
576 /// * `map`: Memory-mapped file.
577 /// * `offset`: Starting offset in the file.
578 ///
579 /// # Errors
580 ///
581 /// Implementing types should use [`ErrorKind::UnexpectedEof`] where appropriate.
582 /// [`ErrorKind::InvalidData`] should be used to indicate that the data failed sanity checks.
583 fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self>;
584
585 /// Returns the starting offset in the file.
586 fn map_offset(&self) -> usize;
587
588 /// Returns the length of the interval corresponding to the structure.
589 fn map_len(&self) -> usize;
590}
591
592//-----------------------------------------------------------------------------
593
594/// An immutable memory-mapped slice of a type that implements [`Serializable`].
595///
596/// The slice is compatible with the serialization format of [`Vec`] of the same type.
597///
598/// # Examples
599///
600/// ```
601/// use simple_sds_sbwt::serialize::{MappedSlice, MappingMode, MemoryMap, MemoryMapped, Serialize};
602/// use simple_sds_sbwt::serialize;
603/// use std::fs;
604///
605/// let v: Vec<(u64, u64)> = vec![(123, 456), (789, 101112)];
606/// let filename = serialize::temp_file_name("mapped-slice");
607/// serialize::serialize_to(&v, &filename);
608///
609/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
610/// let mapped = MappedSlice::<(u64, u64)>::new(&map, 0).unwrap();
611/// assert_eq!(mapped.len(), v.len());
612/// assert_eq!(mapped[0], (123, 456));
613/// assert_eq!(mapped[1], (789, 101112));
614/// assert_eq!(*mapped, *v);
615/// drop(mapped); drop(map);
616///
617/// fs::remove_file(&filename).unwrap();
618/// ```
619#[cfg(not(target_family = "wasm"))]
620#[derive(PartialEq, Eq, Debug)]
621pub struct MappedSlice<'a, T: Serializable> {
622 data: &'a [T],
623 offset: usize,
624}
625
626#[cfg(not(target_family = "wasm"))]
627impl<'a, T: Serializable> MappedSlice<'a, T> {
628 /// Returns the length of the slice.
629 pub fn len(&self) -> usize {
630 self.data.len()
631 }
632
633 /// Returns `true` if the slice is empty.
634 pub fn is_empty(&self) -> bool {
635 self.data.is_empty()
636 }
637}
638
639#[cfg(not(target_family = "wasm"))]
640impl<'a, T: Serializable> AsRef<[T]> for MappedSlice<'a, T> {
641 fn as_ref(&self) -> &[T] {
642 self.data
643 }
644}
645
646#[cfg(not(target_family = "wasm"))]
647impl<'a, T: Serializable> Deref for MappedSlice<'a, T> {
648 type Target = [T];
649
650 fn deref(&self) -> &Self::Target {
651 self.data
652 }
653}
654
655#[cfg(not(target_family = "wasm"))]
656impl<'a, T: Serializable> Index<usize> for MappedSlice<'a, T> {
657 type Output = T;
658
659 fn index(&self, index: usize) -> &Self::Output {
660 &self.data[index]
661 }
662}
663
664#[cfg(not(target_family = "wasm"))]
665impl<'a, T: Serializable> MemoryMapped<'a> for MappedSlice<'a, T> {
666 fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
667 if offset >= map.len() {
668 return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
669 }
670 let slice: &[u64] = map.as_ref();
671 let len = slice[offset] as usize;
672 if offset + 1 + len * T::elements() > map.len() {
673 return Err(Error::new(ErrorKind::UnexpectedEof, "The file is too short"));
674 }
675 let source: &[u64] = &slice[offset + 1 ..];
676 let data: &[T] = unsafe { slice::from_raw_parts(source.as_ptr() as *const T, len) };
677 Ok(MappedSlice {
678 data, offset,
679 })
680 }
681
682 fn map_offset(&self) -> usize {
683 self.offset
684 }
685
686 fn map_len(&self) -> usize {
687 self.len() * T::elements() + 1
688 }
689}
690
691//-----------------------------------------------------------------------------
692
693/// An immutable memory-mapped slice of [`u8`].
694///
695/// The slice is compatible with the serialization format of [`Vec`] of [`u8`].
696///
697/// # Examples
698///
699/// ```
700/// use simple_sds_sbwt::serialize::{MappedBytes, MappingMode, MemoryMap, MemoryMapped, Serialize};
701/// use simple_sds_sbwt::serialize;
702/// use std::fs;
703///
704/// let v: Vec<u8> = vec![1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233];
705/// let filename = serialize::temp_file_name("mapped-bytes");
706/// serialize::serialize_to(&v, &filename);
707///
708/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
709/// let mapped = MappedBytes::new(&map, 0).unwrap();
710/// assert_eq!(mapped.len(), v.len());
711/// assert_eq!(mapped[3], 3);
712/// assert_eq!(mapped[6], 13);
713/// assert_eq!(*mapped, *v);
714/// drop(mapped); drop(map);
715///
716/// fs::remove_file(&filename).unwrap();
717/// ```
718#[cfg(not(target_family = "wasm"))]
719#[derive(PartialEq, Eq, Debug)]
720pub struct MappedBytes<'a> {
721 data: &'a [u8],
722 offset: usize,
723}
724
725#[cfg(not(target_family = "wasm"))]
726impl<'a> MappedBytes<'a> {
727 /// Returns the length of the slice.
728 pub fn len(&self) -> usize {
729 self.data.len()
730 }
731
732 /// Returns `true` if the slice is empty.
733 pub fn is_empty(&self) -> bool {
734 self.data.is_empty()
735 }
736}
737
738#[cfg(not(target_family = "wasm"))]
739impl<'a> AsRef<[u8]> for MappedBytes<'a> {
740 fn as_ref(&self) -> &[u8] {
741 self.data
742 }
743}
744
745#[cfg(not(target_family = "wasm"))]
746impl<'a> Deref for MappedBytes<'a> {
747 type Target = [u8];
748
749 fn deref(&self) -> &Self::Target {
750 self.data
751 }
752}
753
754#[cfg(not(target_family = "wasm"))]
755impl<'a> Index<usize> for MappedBytes<'a> {
756 type Output = u8;
757
758 fn index(&self, index: usize) -> &Self::Output {
759 &self.data[index]
760 }
761}
762
763#[cfg(not(target_family = "wasm"))]
764impl<'a> MemoryMapped<'a> for MappedBytes<'a> {
765 fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
766 if offset >= map.len() {
767 return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
768 }
769 let slice: &[u64] = map.as_ref();
770 let len = slice[offset] as usize;
771 if offset + 1 + bits::bytes_to_words(len) > map.len() {
772 return Err(Error::new(ErrorKind::UnexpectedEof, "The file is too short"));
773 }
774 let source: &[u64] = &slice[offset + 1 ..];
775 let data: &[u8] = unsafe { slice::from_raw_parts(source.as_ptr() as *const u8, len) };
776 Ok(MappedBytes {
777 data, offset,
778 })
779 }
780
781 fn map_offset(&self) -> usize {
782 self.offset
783 }
784
785 fn map_len(&self) -> usize {
786 bits::bytes_to_words(self.len()) + 1
787 }
788}
789
790//-----------------------------------------------------------------------------
791
792/// An immutable memory-mapped string slice.
793///
794/// The slice is compatible with the serialization format of [`String`].
795///
796/// # Examples
797///
798/// ```
799/// use simple_sds_sbwt::serialize::{MappedStr, MappingMode, MemoryMap, MemoryMapped, Serialize};
800/// use simple_sds_sbwt::serialize;
801/// use std::fs;
802///
803/// let s = String::from("GATTACA");
804/// let filename = serialize::temp_file_name("mapped-str");
805/// serialize::serialize_to(&s, &filename);
806///
807/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
808/// let mapped = MappedStr::new(&map, 0).unwrap();
809/// assert_eq!(mapped.len(), s.len());
810/// assert_eq!(*mapped, *s);
811/// drop(mapped); drop(map);
812///
813/// fs::remove_file(&filename).unwrap();
814/// ```
815#[cfg(not(target_family = "wasm"))]
816#[derive(PartialEq, Eq, Debug)]
817pub struct MappedStr<'a> {
818 data: &'a str,
819 offset: usize,
820}
821
822#[cfg(not(target_family = "wasm"))]
823impl<'a> MappedStr<'a> {
824 /// Returns the length of the slice in bytes.
825 pub fn len(&self) -> usize {
826 self.data.len()
827 }
828
829 /// Returns `true` if the slice is empty.
830 pub fn is_empty(&self) -> bool {
831 self.data.is_empty()
832 }
833}
834
835#[cfg(not(target_family = "wasm"))]
836impl<'a> AsRef<str> for MappedStr<'a> {
837 fn as_ref(&self) -> &str {
838 self.data
839 }
840}
841
842#[cfg(not(target_family = "wasm"))]
843impl<'a> Deref for MappedStr<'a> {
844 type Target = str;
845
846 fn deref(&self) -> &Self::Target {
847 self.data
848 }
849}
850#[cfg(not(target_family = "wasm"))]
851impl<'a> MemoryMapped<'a> for MappedStr<'a> {
852 fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
853 if offset >= map.len() {
854 return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
855 }
856 let slice: &[u64] = map.as_ref();
857 let len = slice[offset] as usize;
858 if offset + 1 + bits::bytes_to_words(len) > map.len() {
859 return Err(Error::new(ErrorKind::UnexpectedEof, "The file is too short"));
860 }
861 let source: &[u64] = &slice[offset + 1 ..];
862 let bytes: &[u8] = unsafe { slice::from_raw_parts(source.as_ptr() as *const u8, len) };
863 let data = str::from_utf8(bytes).map_err(|_| Error::new(ErrorKind::InvalidData, "Invalid UTF-8"))?;
864 Ok(MappedStr {
865 data, offset,
866 })
867 }
868
869 fn map_offset(&self) -> usize {
870 self.offset
871 }
872
873 fn map_len(&self) -> usize {
874 bits::bytes_to_words(self.len()) + 1
875 }
876}
877
878//-----------------------------------------------------------------------------
879
880/// An optional immutable memory-mapped structure.
881///
882/// This is compatible with the serialization format of [`Option`] of the same type.
883///
884/// # Examples
885///
886/// ```
887/// use simple_sds_sbwt::serialize::{MappedOption, MappedSlice, MappingMode, MemoryMap, MemoryMapped, Serialize};
888/// use simple_sds_sbwt::serialize;
889/// use std::fs;
890///
891/// let some: Option<Vec<u64>> = Some(vec![123, 456, 789]);
892/// let filename = serialize::temp_file_name("mapped-option");
893/// serialize::serialize_to(&some, &filename);
894///
895/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
896/// let mapped = MappedOption::<MappedSlice<u64>>::new(&map, 0).unwrap();
897/// assert_eq!(mapped.unwrap().as_ref(), some.unwrap().as_slice());
898/// drop(mapped); drop(map);
899///
900/// fs::remove_file(&filename).unwrap();
901/// ```
902#[cfg(not(target_family = "wasm"))]
903#[derive(PartialEq, Eq, Debug)]
904pub struct MappedOption<'a, T: MemoryMapped<'a>> {
905 data: Option<T>,
906 offset: usize,
907 data_len: usize,
908 _marker: marker::PhantomData<&'a ()>,
909}
910
911#[cfg(not(target_family = "wasm"))]
912impl<'a, T: MemoryMapped<'a>> MappedOption<'a, T> {
913 /// Returns `true` if the option is a [`Some`] value.
914 pub fn is_some(&self) -> bool {
915 self.data.is_some()
916 }
917
918 /// Returns `true` if the option is a [`None`] value.
919 pub fn is_none(&self) -> bool {
920 self.data.is_none()
921 }
922
923 /// Returns an immutable reference to the possibly contained value.
924 ///
925 /// # Panics
926 ///
927 /// Panics if the option is a [`None`] value.
928 pub fn unwrap(&self) -> &T {
929 match &self.data {
930 Some(value) => value,
931 None => panic!("MappedOption::unwrap(): No value to unwrap"),
932 }
933 }
934
935 /// Returns [`Option`]`<&T>` referencing the possibly contained value.
936 pub fn as_ref(&self) -> Option<&T> {
937 match &self.data {
938 Some(value) => Some(value),
939 None => None,
940 }
941 }
942}
943
944#[cfg(not(target_family = "wasm"))]
945impl<'a, T: MemoryMapped<'a>> MemoryMapped<'a> for MappedOption<'a, T> {
946 fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
947 if offset >= map.len() {
948 return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
949 }
950 let mut result = MappedOption {
951 data: None,
952 offset,
953 data_len: map.as_ref()[offset] as usize,
954 _marker: marker::PhantomData,
955 };
956 if result.data_len > 0 {
957 let value = T::new(map, offset + 1)?;
958 result.data = Some(value)
959 }
960 Ok(result)
961 }
962
963 fn map_offset(&self) -> usize {
964 self.offset
965 }
966
967 fn map_len(&self) -> usize {
968 self.data_len + 1
969 }
970}
971
972//-----------------------------------------------------------------------------
973
974/// Serializes the item to the specified file, creating or overwriting the file if necessary.
975///
976/// See [`Serialize`] for an example.
977///
978/// # Errors
979///
980/// Any errors from [`OpenOptions::open`] and [`Serialize::serialize`] will be passed through.
981pub fn serialize_to<T: Serialize, P: AsRef<Path>>(item: &T, filename: P) -> io::Result<()> {
982 let mut options = OpenOptions::new();
983 let mut file = options.create(true).write(true).truncate(true).open(filename)?;
984 item.serialize(&mut file)?;
985 Ok(())
986}
987
988/// Loads the item from the specified file.
989///
990/// See [`Serialize`] for an example.
991///
992/// # Errors
993///
994/// Any errors from [`OpenOptions::open`] and [`Serialize::load`] will be passed through.
995pub fn load_from<T: Serialize, P: AsRef<Path>>(filename: P) -> io::Result<T> {
996 let mut options = OpenOptions::new();
997 let mut file = options.read(true).open(filename)?;
998 <T as Serialize>::load(&mut file)
999}
1000
1001/// Serializes an absent optional structure of any type.
1002///
1003/// # Errors
1004///
1005/// Any errors from the writer will be passed through.
1006pub fn absent_option<T: Write>(writer: &mut T) -> io::Result<()> {
1007 let size: usize = 0;
1008 size.serialize(writer)?;
1009 Ok(())
1010}
1011
1012/// Skips a serialized optional structure.
1013///
1014/// # Errors
1015///
1016/// Any errors from the reader will be passed through.
1017pub fn skip_option<T: Read>(reader: &mut T) -> io::Result<()> {
1018 let elements = usize::load(reader)?;
1019 if elements > 0 {
1020 io::copy(&mut reader.by_ref().take((elements * bits::WORD_BYTES) as u64), &mut io::sink())?;
1021 }
1022 Ok(())
1023}
1024
1025/// Returns the size of an absent optional structure (of any type) in elements.
1026pub fn absent_option_size() -> usize {
1027 1
1028}
1029
1030// Counter used for temporary file names.
1031static TEMP_FILE_COUNTER: AtomicUsize = AtomicUsize::new(0);
1032
1033/// Returns a name for a temporary file using the provided name part.
1034///
1035/// # Examples
1036///
1037/// ```
1038/// use simple_sds_sbwt::serialize;
1039///
1040/// let filename = serialize::temp_file_name("example");
1041/// assert!(filename.into_os_string().into_string().unwrap().contains("example"));
1042/// ```
1043pub fn temp_file_name(name_part: &str) -> PathBuf {
1044 let count = TEMP_FILE_COUNTER.fetch_add(1, Ordering::SeqCst);
1045 let mut buf = env::temp_dir();
1046 buf.push(format!("{}_{}_{}", name_part, process::id(), count));
1047 buf
1048}
1049
1050/// Tests that the [`Serialize`] implementation works correctly.
1051///
1052/// Returns the name of the temporary file if `remove == false` and removes the file if `remove == true`.
1053/// The type must also implement [`PartialEq`] and [`Debug`] for the tests.
1054///
1055/// # Arguments
1056///
1057/// * `original`: Structure to be serialized.
1058/// * `name`: Name of the structure (for temporary file names and error messages).
1059/// * `expected_size`: Expected size in elements, or [`None`] if not known.
1060/// * `remove`: Should the temporary file be removed instead of returning its name.
1061///
1062/// # Examples
1063///
1064/// ```
1065/// use simple_sds_sbwt::serialize;;
1066///
1067/// let v: Vec<u64> = vec![1, 11, 111, 1111];
1068/// let _ = serialize::test(&v, "vec-u64", Some(1 + v.len()), true);
1069/// ```
1070///
1071/// # Panics
1072///
1073/// Will panic if any of the tests fails.
1074pub fn test<T: Serialize + PartialEq + Debug>(original: &T, name: &str, expected_size: Option<usize>, remove: bool) -> Option<PathBuf> {
1075 if let Some(value) = expected_size {
1076 assert_eq!(original.size_in_elements(), value, "Size estimate for the serialized {} is not as expected", name);
1077 }
1078
1079 let filename = temp_file_name(name);
1080 serialize_to(original, &filename).unwrap();
1081
1082 let metadata = fs::metadata(&filename).unwrap();
1083 let len = metadata.len() as usize;
1084 assert_eq!(original.size_in_bytes(), len, "Invalid size estimate for the serialized {}", name);
1085
1086 let copy: T = load_from(&filename).unwrap();
1087 assert_eq!(copy, *original, "Serialization changed the {}", name);
1088
1089 if remove {
1090 fs::remove_file(&filename).unwrap();
1091 None
1092 } else {
1093 Some(filename)
1094 }
1095}
1096//-----------------------------------------------------------------------------